Skip to content

Commit 7507a73

Browse files
authored
Add functions for counting the results of a set operation (#116)
1 parent ce8313c commit 7507a73

2 files changed

Lines changed: 233 additions & 8 deletions

File tree

benches/benches/benches.rs

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,110 @@ fn grow_and_insert(c: &mut Criterion) {
139139
});
140140
}
141141

142+
fn iter_union_count(c: &mut Criterion) {
143+
const N: usize = 1_000_000;
144+
let mut fb_a = FixedBitSet::with_capacity(N);
145+
let mut fb_b = FixedBitSet::with_capacity(N);
146+
147+
fb_a.insert_range(..);
148+
fb_b.insert_range(..);
149+
150+
c.bench_function("iter_union_count/1m", |b| {
151+
b.iter(|| black_box(fb_a.union(&fb_b).count()))
152+
});
153+
}
154+
155+
fn iter_intersect_count(c: &mut Criterion) {
156+
const N: usize = 1_000_000;
157+
let mut fb_a = FixedBitSet::with_capacity(N);
158+
let mut fb_b = FixedBitSet::with_capacity(N);
159+
160+
fb_a.insert_range(..);
161+
fb_b.insert_range(..);
162+
163+
c.bench_function("iter_intersection_count/1m", |b| {
164+
b.iter(|| black_box(fb_a.intersection(&fb_b).count()));
165+
});
166+
}
167+
168+
fn iter_difference_count(c: &mut Criterion) {
169+
const N: usize = 1_000_000;
170+
let mut fb_a = FixedBitSet::with_capacity(N);
171+
let mut fb_b = FixedBitSet::with_capacity(N);
172+
173+
fb_a.insert_range(..);
174+
fb_b.insert_range(..);
175+
176+
c.bench_function("iter_difference_count/1m", |b| {
177+
b.iter(|| black_box(fb_a.difference(&fb_b).count()));
178+
});
179+
}
180+
181+
fn iter_symmetric_difference_count(c: &mut Criterion) {
182+
const N: usize = 1_000_000;
183+
let mut fb_a = FixedBitSet::with_capacity(N);
184+
let mut fb_b = FixedBitSet::with_capacity(N);
185+
186+
fb_a.insert_range(..);
187+
fb_b.insert_range(..);
188+
189+
c.bench_function("iter_symmetric_difference_count/1m", |b| {
190+
b.iter(|| black_box(fb_a.symmetric_difference(&fb_b).count()));
191+
});
192+
}
193+
194+
fn union_count(c: &mut Criterion) {
195+
const N: usize = 1_000_000;
196+
let mut fb_a = FixedBitSet::with_capacity(N);
197+
let mut fb_b = FixedBitSet::with_capacity(N);
198+
199+
fb_a.insert_range(..);
200+
fb_b.insert_range(..);
201+
202+
c.bench_function("union_count/1m", |b| {
203+
b.iter(|| black_box(fb_a.union_count(&fb_b)))
204+
});
205+
}
206+
207+
fn intersect_count(c: &mut Criterion) {
208+
const N: usize = 1_000_000;
209+
let mut fb_a = FixedBitSet::with_capacity(N);
210+
let mut fb_b = FixedBitSet::with_capacity(N);
211+
212+
fb_a.insert_range(..);
213+
fb_b.insert_range(..);
214+
215+
c.bench_function("intersection_count/1m", |b| {
216+
b.iter(|| black_box(fb_a.intersection_count(&fb_b)));
217+
});
218+
}
219+
220+
fn difference_count(c: &mut Criterion) {
221+
const N: usize = 1_000_000;
222+
let mut fb_a = FixedBitSet::with_capacity(N);
223+
let mut fb_b = FixedBitSet::with_capacity(N);
224+
225+
fb_a.insert_range(..);
226+
fb_b.insert_range(..);
227+
228+
c.bench_function("difference_count/1m", |b| {
229+
b.iter(|| black_box(fb_a.difference_count(&fb_b)));
230+
});
231+
}
232+
233+
fn symmetric_difference_count(c: &mut Criterion) {
234+
const N: usize = 1_000_000;
235+
let mut fb_a = FixedBitSet::with_capacity(N);
236+
let mut fb_b = FixedBitSet::with_capacity(N);
237+
238+
fb_a.insert_range(..);
239+
fb_b.insert_range(..);
240+
241+
c.bench_function("symmetric_difference_count/1m", |b| {
242+
b.iter(|| black_box(fb_a.symmetric_difference_count(&fb_b)));
243+
});
244+
}
245+
142246
fn union_with(c: &mut Criterion) {
143247
const N: usize = 1_000_000;
144248
let mut fb_a = FixedBitSet::with_capacity(N);
@@ -201,6 +305,14 @@ criterion_group!(
201305
iter_ones_sparse,
202306
iter_ones_all_ones,
203307
iter_ones_all_ones_rev,
308+
iter_union_count,
309+
iter_intersect_count,
310+
iter_difference_count,
311+
iter_symmetric_difference_count,
312+
union_count,
313+
intersect_count,
314+
difference_count,
315+
symmetric_difference_count,
204316
insert_range,
205317
insert,
206318
intersect_with,

src/lib.rs

Lines changed: 121 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,11 @@ impl FixedBitSet {
132132
blocks
133133
}
134134

135+
#[inline]
136+
fn batch_count_ones(blocks: impl IntoIterator<Item = Block>) -> usize {
137+
blocks.into_iter().map(|x| x.count_ones() as usize).sum()
138+
}
139+
135140
/// Grows the internal size of the bitset before inserting a bit
136141
///
137142
/// Unlike `insert`, this cannot panic, but may allocate if the bit is outside of the existing buffer's range.
@@ -428,18 +433,16 @@ impl FixedBitSet {
428433

429434
/// Count the number of set bits in the given bit range.
430435
///
436+
/// This function is potentially much faster than using `ones(other).count()`.
431437
/// Use `..` to count the whole content of the bitset.
432438
///
433439
/// **Panics** if the range extends past the end of the bitset.
434440
#[inline]
435441
pub fn count_ones<T: IndexRange>(&self, range: T) -> usize {
436-
Masks::new(range, self.length)
437-
.map(|(block, mask)| {
438-
// SAFETY: Masks cannot return a block index that is out of range.
439-
let value = unsafe { *self.get_unchecked(block) };
440-
(value & mask).count_ones() as usize
441-
})
442-
.sum()
442+
Self::batch_count_ones(Masks::new(range, self.length).map(|(block, mask)| {
443+
// SAFETY: Masks cannot return a block index that is out of range.
444+
unsafe { *self.get_unchecked(block) & mask }
445+
}))
443446
}
444447

445448
/// Sets every bit in the given range to the given state (`enabled`)
@@ -669,6 +672,73 @@ impl FixedBitSet {
669672
}
670673
}
671674

675+
/// Computes how many bits would be set in the union between two bitsets.
676+
///
677+
/// This is potentially much faster than using `union(other).count()`. Unlike
678+
/// other methods like using [`union_with`] followed by [`count_ones`], this
679+
/// does not mutate in place or require separate allocations.
680+
#[inline]
681+
pub fn union_count(&self, other: &FixedBitSet) -> usize {
682+
let me = self.as_slice();
683+
let other = other.as_slice();
684+
let mut count = Self::batch_count_ones(me.iter().zip(other.iter()).map(|(x, y)| (*x | *y)));
685+
if other.len() > me.len() {
686+
count += Self::batch_count_ones(other[me.len()..].iter().copied());
687+
} else if self.len() > other.len() {
688+
count += Self::batch_count_ones(me[other.len()..].iter().copied());
689+
}
690+
count
691+
}
692+
693+
/// Computes how many bits would be set in the intersection between two bitsets.
694+
///
695+
/// This is potentially much faster than using `intersection(other).count()`. Unlike
696+
/// other methods like using [`intersect_with`] followed by [`count_ones`], this
697+
/// does not mutate in place or require separate allocations.
698+
#[inline]
699+
pub fn intersection_count(&self, other: &FixedBitSet) -> usize {
700+
Self::batch_count_ones(
701+
self.as_slice()
702+
.iter()
703+
.zip(other.as_slice())
704+
.map(|(x, y)| (*x & *y)),
705+
)
706+
}
707+
708+
/// Computes how many bits would be set in the difference between two bitsets.
709+
///
710+
/// This is potentially much faster than using `difference(other).count()`. Unlike
711+
/// other methods like using [`difference_with`] followed by [`count_ones`], this
712+
/// does not mutate in place or require separate allocations.
713+
#[inline]
714+
pub fn difference_count(&self, other: &FixedBitSet) -> usize {
715+
Self::batch_count_ones(
716+
self.as_slice()
717+
.iter()
718+
.zip(other.as_slice().iter())
719+
.map(|(x, y)| (*x & !*y)),
720+
)
721+
}
722+
723+
/// Computes how many bits would be set in the symmetric difference between two bitsets.
724+
///
725+
/// This is potentially much faster than using `symmetric_difference(other).count()`. Unlike
726+
/// other methods like using [`symmetric_difference_with`] followed by [`count_ones`], this
727+
/// does not mutate in place or require separate allocations.
728+
#[inline]
729+
pub fn symmetric_difference_count(&self, other: &FixedBitSet) -> usize {
730+
let me = self.as_slice();
731+
let other = other.as_slice();
732+
let count = Self::batch_count_ones(me.iter().zip(other.iter()).map(|(x, y)| (*x ^ *y)));
733+
if other.len() > me.len() {
734+
count + Self::batch_count_ones(other[me.len()..].iter().copied())
735+
} else if me.len() > other.len() {
736+
count + Self::batch_count_ones(me[other.len()..].iter().copied())
737+
} else {
738+
count
739+
}
740+
}
741+
672742
/// Returns `true` if `self` has no elements in common with `other`. This
673743
/// is equivalent to checking for an empty intersection.
674744
pub fn is_disjoint(&self, other: &FixedBitSet) -> bool {
@@ -1830,7 +1900,8 @@ mod tests {
18301900
let mut b = FixedBitSet::with_capacity(len);
18311901
a.set_range(..a_end, true);
18321902
b.set_range(b_start.., true);
1833-
1903+
let count = a.intersection_count(&b);
1904+
let iterator_count = a.intersection(&b).count();
18341905
let mut ab = a.intersection(&b).collect::<FixedBitSet>();
18351906

18361907
for i in 0..b_start {
@@ -1850,6 +1921,15 @@ mod tests {
18501921
ab, a,
18511922
"intersection and intersect_with produce the same results"
18521923
);
1924+
assert_eq!(
1925+
ab.count_ones(..),
1926+
count,
1927+
"intersection and intersection_count produce the same results"
1928+
);
1929+
assert_eq!(
1930+
count, iterator_count,
1931+
"intersection and intersection_count produce the same results"
1932+
);
18531933
}
18541934

18551935
#[test]
@@ -1862,6 +1942,8 @@ mod tests {
18621942
let mut b = FixedBitSet::with_capacity(b_len);
18631943
a.set_range(a_start.., true);
18641944
b.set_range(..b_end, true);
1945+
let count = a.union_count(&b);
1946+
let iterator_count = a.union(&b).count();
18651947
let ab = a.union(&b).collect::<FixedBitSet>();
18661948
for i in a_start..a_len {
18671949
assert!(ab.contains(i));
@@ -1875,6 +1957,15 @@ mod tests {
18751957

18761958
a.union_with(&b);
18771959
assert_eq!(ab, a, "union and union_with produce the same results");
1960+
assert_eq!(
1961+
count,
1962+
ab.count_ones(..),
1963+
"union and union_count produce the same results"
1964+
);
1965+
assert_eq!(
1966+
count, iterator_count,
1967+
"union and union_count produce the same results"
1968+
);
18781969
}
18791970

18801971
#[test]
@@ -1888,6 +1979,8 @@ mod tests {
18881979
let mut b = FixedBitSet::with_capacity(b_len);
18891980
a.set_range(a_start..a_end, true);
18901981
b.set_range(b_start..b_len, true);
1982+
let count = a.difference_count(&b);
1983+
let iterator_count = a.difference(&b).count();
18911984
let mut a_diff_b = a.difference(&b).collect::<FixedBitSet>();
18921985
for i in a_start..b_start {
18931986
assert!(a_diff_b.contains(i));
@@ -1903,6 +1996,15 @@ mod tests {
19031996
a_diff_b, a,
19041997
"difference and difference_with produce the same results"
19051998
);
1999+
assert_eq!(
2000+
a_diff_b.count_ones(..),
2001+
count,
2002+
"difference and difference_count produce the same results"
2003+
);
2004+
assert_eq!(
2005+
count, iterator_count,
2006+
"intersection and intersection_count produce the same results"
2007+
);
19062008
}
19072009

19082010
#[test]
@@ -1916,6 +2018,8 @@ mod tests {
19162018
let mut b = FixedBitSet::with_capacity(b_len);
19172019
a.set_range(a_start..a_end, true);
19182020
b.set_range(b_start..b_len, true);
2021+
let count = a.symmetric_difference_count(&b);
2022+
let iterator_count = a.symmetric_difference(&b).count();
19192023
let a_sym_diff_b = a.symmetric_difference(&b).collect::<FixedBitSet>();
19202024
for i in 0..a_start {
19212025
assert!(!a_sym_diff_b.contains(i));
@@ -1935,6 +2039,15 @@ mod tests {
19352039
a_sym_diff_b, a,
19362040
"symmetric_difference and _with produce the same results"
19372041
);
2042+
assert_eq!(
2043+
a_sym_diff_b.count_ones(..),
2044+
count,
2045+
"symmetric_difference and _count produce the same results"
2046+
);
2047+
assert_eq!(
2048+
count, iterator_count,
2049+
"symmetric_difference and _count produce the same results"
2050+
);
19382051
}
19392052

19402053
#[test]

0 commit comments

Comments
 (0)