read_fonts/collections/int_set/
bitset.rs

1//! A fast & efficient ordered set for unsigned integers.
2
3use super::bitpage::BitPage;
4use super::bitpage::RangeIter;
5use super::bitpage::PAGE_BITS;
6use std::cmp::Ordering;
7use std::hash::Hash;
8
9use std::ops::RangeInclusive;
10
11// log_2(PAGE_BITS)
12const PAGE_BITS_LOG_2: u32 = PAGE_BITS.ilog2();
13
14/// An ordered integer (u32) set.
15#[derive(Clone, Debug, Default)]
16#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
17pub(crate) struct BitSet {
18    // TODO(garretrieger): consider a "small array" type instead of Vec.
19    pages: Vec<BitPage>,
20    page_map: Vec<PageInfo>,
21    length: u64,
22}
23
24impl BitSet {
25    /// Add val as a member of this set.
26    ///
27    /// If the set did not previously contain this value, returns `true`.
28    pub(crate) fn insert(&mut self, val: u32) -> bool {
29        let page = self.ensure_page_for_mut(val);
30        let ret = page.insert(val);
31        self.length += ret as u64;
32        ret
33    }
34
35    /// Add all values in range as members of this set.
36    pub(crate) fn insert_range(&mut self, range: RangeInclusive<u32>) {
37        let start = *range.start();
38        let end = *range.end();
39        if start > end {
40            return;
41        }
42
43        let major_start = Self::get_major_value(start);
44        let major_end = Self::get_major_value(end);
45
46        let mut total_added = 0;
47
48        for major in major_start..=major_end {
49            let page_start = start.max(Self::major_start(major));
50            let page_end = end.min(Self::major_start(major) + (PAGE_BITS - 1));
51            let page = self.ensure_page_for_major_mut(major);
52            let pre_len = page.len();
53            page.insert_range(page_start, page_end);
54            let delta_len = page.len() - pre_len;
55            total_added += delta_len as u64;
56        }
57        self.length += total_added;
58    }
59
60    /// An alternate version of [`extend()`] which is optimized for inserting an unsorted
61    /// iterator of values.
62    ///
63    /// [`extend()`]: Self::extend
64    pub(crate) fn extend_unsorted<U: IntoIterator<Item = u32>>(&mut self, iter: U) {
65        self.length += iter
66            .into_iter()
67            .map(|val| {
68                let major_value = Self::get_major_value(val);
69                let page = self.ensure_page_for_major_mut(major_value);
70                page.insert(val) as u64
71            })
72            .sum::<u64>();
73    }
74
75    /// Remove val from this set.
76    ///
77    /// Returns `true` if the value was present.
78    pub(crate) fn remove(&mut self, val: u32) -> bool {
79        let maybe_page = self.page_for_mut(val);
80        if let Some(page) = maybe_page {
81            let ret = page.remove(val);
82            self.length -= ret as u64;
83            ret
84        } else {
85            false
86        }
87    }
88
89    // Remove all values in iter from this set.
90    pub(crate) fn remove_all<U: IntoIterator<Item = u32>>(&mut self, iter: U) {
91        let mut last_page_index: Option<usize> = None;
92        let mut last_major_value = u32::MAX;
93        let mut total_removed = 0;
94        for val in iter {
95            let major_value = Self::get_major_value(val);
96            if major_value != last_major_value {
97                last_page_index = self.page_index_for_major(major_value);
98                last_major_value = major_value;
99            };
100
101            let Some(page_index) = last_page_index else {
102                continue;
103            };
104
105            if let Some(page) = self.pages.get_mut(page_index) {
106                total_removed += page.remove(val) as u64;
107            }
108        }
109        self.length -= total_removed;
110    }
111
112    /// Removes all values in range as members of this set.
113    pub(crate) fn remove_range(&mut self, range: RangeInclusive<u32>) {
114        let start = *(range.start());
115        let end = *(range.end());
116        if start > end {
117            return;
118        }
119
120        let start_major = Self::get_major_value(start);
121        let end_major = Self::get_major_value(end);
122        let mut info_index = match self
123            .page_map
124            .binary_search_by(|probe| probe.major_value.cmp(&start_major))
125        {
126            Ok(info_index) => info_index,
127            Err(info_index) => info_index,
128        };
129
130        loop {
131            let Some(info) = self.page_map.get(info_index) else {
132                break;
133            };
134            let Some(page) = self.pages.get_mut(info.index as usize) else {
135                break;
136            };
137
138            if info.major_value > end_major {
139                break;
140            } else if info.major_value == start_major {
141                page.remove_range(start, Self::major_end(start_major).min(end));
142            } else if info.major_value == end_major {
143                page.remove_range(Self::major_start(end_major), end);
144                break;
145            } else {
146                page.clear();
147            }
148            info_index += 1;
149        }
150
151        self.recompute_length();
152    }
153
154    /// Returns true if val is a member of this set.
155    pub(crate) fn contains(&self, val: u32) -> bool {
156        self.page_for(val)
157            .map(|page| page.contains(val))
158            .unwrap_or(false)
159    }
160
161    pub(crate) const fn empty() -> BitSet {
162        BitSet {
163            pages: Vec::new(),
164            page_map: Vec::new(),
165            length: 0,
166        }
167    }
168
169    /// Remove all members from this set.
170    pub(crate) fn clear(&mut self) {
171        self.pages.clear();
172        self.page_map.clear();
173        self.length = 0;
174    }
175
176    /// Return true if there are no members in this set.
177    #[cfg(test)]
178    pub(crate) fn is_empty(&self) -> bool {
179        self.len() == 0
180    }
181
182    fn recompute_length(&mut self) {
183        self.length = self.pages.iter().map(|page| page.len() as u64).sum();
184    }
185
186    /// Returns the number of members in this set.
187    pub(crate) fn len(&self) -> u64 {
188        self.length
189    }
190
191    pub(crate) fn num_pages(&self) -> usize {
192        self.pages.len()
193    }
194
195    /// Sets the members of this set to the union of self and other.
196    pub(crate) fn union(&mut self, other: &BitSet) {
197        self.process(BitPage::union, other);
198    }
199
200    /// Sets the members of this set to the intersection of self and other.
201    pub(crate) fn intersect(&mut self, other: &BitSet) {
202        self.process(BitPage::intersect, other);
203    }
204
205    /// Sets the members of this set to self - other.
206    pub(crate) fn subtract(&mut self, other: &BitSet) {
207        self.process(BitPage::subtract, other);
208    }
209
210    /// Sets the members of this set to other - self.
211    pub(crate) fn reversed_subtract(&mut self, other: &BitSet) {
212        self.process(|a, b| BitPage::subtract(b, a), other);
213    }
214
215    pub(crate) fn iter(&self) -> impl DoubleEndedIterator<Item = u32> + '_ {
216        self.iter_non_empty_pages().flat_map(|(major, page)| {
217            let base = Self::major_start(major);
218            page.iter().map(move |v| base + v)
219        })
220    }
221
222    /// Iterator over the members of this set starting from value.
223    ///
224    /// So value is included in the iterator if it's in the set.
225    pub(crate) fn iter_from(&self, value: u32) -> impl Iterator<Item = u32> + '_ {
226        let major_value = Self::get_major_value(value);
227        let result = self
228            .page_map
229            .binary_search_by(|probe| probe.major_value.cmp(&major_value));
230
231        let (page_map_index, partial_first_page) = match result {
232            Ok(page_map_index) => (page_map_index, true),
233            Err(page_map_index) => (page_map_index, false),
234        };
235
236        let page = self
237            .page_map
238            .get(page_map_index)
239            .and_then(move |page_info| {
240                self.pages
241                    .get(page_info.index as usize)
242                    .map(|page| (page, page_info.major_value))
243            });
244
245        let init_it =
246            page.filter(|_| partial_first_page)
247                .into_iter()
248                .flat_map(move |(page, major)| {
249                    let base = Self::major_start(major);
250                    page.iter_from(value).map(move |v| base + v)
251                });
252
253        let follow_on_page_map_index = if partial_first_page {
254            page_map_index + 1
255        } else {
256            page_map_index
257        };
258
259        let follow_on_it = self.page_map[follow_on_page_map_index..]
260            .iter()
261            .flat_map(|info| {
262                self.pages
263                    .get(info.index as usize)
264                    .map(|page| (info.major_value, page))
265            })
266            .filter(|(_, page)| !page.is_empty())
267            .flat_map(|(major, page)| {
268                let base = Self::major_start(major);
269                page.iter().map(move |v| base + v)
270            });
271
272        init_it.chain(follow_on_it)
273    }
274
275    pub(crate) fn iter_ranges(&self) -> impl Iterator<Item = RangeInclusive<u32>> + '_ {
276        BitSetRangeIter::new(self)
277    }
278
279    fn iter_pages(&self) -> impl DoubleEndedIterator<Item = (u32, &BitPage)> + '_ {
280        self.page_map.iter().flat_map(|info| {
281            self.pages
282                .get(info.index as usize)
283                .map(|page| (info.major_value, page))
284        })
285    }
286
287    fn iter_non_empty_pages(&self) -> impl DoubleEndedIterator<Item = (u32, &BitPage)> + '_ {
288        self.iter_pages().filter(|(_, page)| !page.is_empty())
289    }
290
291    /// Determine the passthrough behaviour of the operator.
292    ///
293    /// The passthrough behaviour is what happens to a page on one side of the operation if the other side is 0.
294    /// For example union passes through both left and right sides since it preserves the left or right side when
295    /// the other side is 0. Knowing this lets us optimize some cases when only one page is present on one side.
296    fn passthrough_behavior<Op>(op: &Op) -> (bool, bool)
297    where
298        Op: Fn(&BitPage, &BitPage) -> BitPage,
299    {
300        let mut one: BitPage = BitPage::new_zeroes();
301        one.insert(0);
302        let zero: BitPage = BitPage::new_zeroes();
303
304        let passthrough_left: bool = op(&one, &zero).contains(0);
305        let passthrough_right: bool = op(&zero, &one).contains(0);
306
307        (passthrough_left, passthrough_right)
308    }
309
310    fn process<Op>(&mut self, op: Op, other: &BitSet)
311    where
312        Op: Fn(&BitPage, &BitPage) -> BitPage,
313    {
314        let (passthrough_left, passthrough_right) = BitSet::passthrough_behavior(&op);
315
316        let mut len_a = self.pages.len();
317        let len_b = other.pages.len();
318        let mut idx_a = 0;
319        let mut idx_b = 0;
320        let mut count = 0;
321        let mut write_idx = 0;
322
323        // Step 1: Estimate the new size of this set (in number of pages) after processing, and remove left side
324        //         pages that won't be needed.
325        while idx_a < len_a && idx_b < len_b {
326            let a_major = self.page_map[idx_a].major_value;
327            let b_major = other.page_map[idx_b].major_value;
328
329            match a_major.cmp(&b_major) {
330                Ordering::Equal => {
331                    if !passthrough_left {
332                        // If we don't passthrough the left side, then the only case where we
333                        // keep a page from the left is when there is also a page at the same major
334                        // on the right side. In this case move page_map entries that we're keeping
335                        // on the left side set to the front of the page_map vector. Otherwise if
336                        // we do passthrough left, then we we keep all left hand side pages and this
337                        // isn't necessary.
338                        if write_idx < idx_a {
339                            self.page_map[write_idx] = self.page_map[idx_a];
340                        }
341                        write_idx += 1;
342                    }
343
344                    count += 1;
345                    idx_a += 1;
346                    idx_b += 1;
347                }
348                Ordering::Less => {
349                    if passthrough_left {
350                        count += 1;
351                    }
352                    idx_a += 1;
353                }
354                Ordering::Greater => {
355                    if passthrough_right {
356                        count += 1;
357                    }
358                    idx_b += 1;
359                }
360            }
361        }
362
363        if passthrough_left {
364            count += len_a - idx_a;
365        }
366
367        if passthrough_right {
368            count += len_b - idx_b;
369        }
370
371        // Step 2: compact and resize for the new estimated left side size.
372        let mut next_page = len_a;
373        if !passthrough_left {
374            len_a = write_idx;
375            next_page = write_idx;
376            self.compact(write_idx);
377        }
378
379        self.resize(count);
380        let new_count = count;
381
382        // Step 3: process and apply op in-place from the last to first page.
383        idx_a = len_a;
384        idx_b = len_b;
385        while idx_a > 0 && idx_b > 0 {
386            match self.page_map[idx_a - 1]
387                .major_value
388                .cmp(&other.page_map[idx_b - 1].major_value)
389            {
390                Ordering::Equal => {
391                    idx_a -= 1;
392                    idx_b -= 1;
393                    count -= 1;
394                    self.page_map[count] = self.page_map[idx_a];
395                    *self.page_for_index_mut(count).unwrap() = op(
396                        self.page_for_index(idx_a).unwrap(),
397                        other.page_for_index(idx_b).unwrap(),
398                    );
399                }
400                Ordering::Greater => {
401                    idx_a -= 1;
402                    if passthrough_left {
403                        count -= 1;
404                        self.page_map[count] = self.page_map[idx_a];
405                    }
406                }
407                Ordering::Less => {
408                    idx_b -= 1;
409                    if passthrough_right {
410                        count -= 1;
411                        self.page_map[count].major_value = other.page_map[idx_b].major_value;
412                        self.page_map[count].index = next_page as u32;
413                        next_page += 1;
414                        *self.page_for_index_mut(count).unwrap() =
415                            other.page_for_index(idx_b).unwrap().clone();
416                    }
417                }
418            }
419        }
420
421        // Step 4: there are only pages left on one side now, finish processing them if the appropriate passthrough is
422        //         enabled.
423        if passthrough_left {
424            while idx_a > 0 {
425                idx_a -= 1;
426                count -= 1;
427                self.page_map[count] = self.page_map[idx_a];
428            }
429        }
430
431        if passthrough_right {
432            while idx_b > 0 {
433                idx_b -= 1;
434                count -= 1;
435                self.page_map[count].major_value = other.page_map[idx_b].major_value;
436                self.page_map[count].index = next_page as u32;
437                next_page += 1;
438                *self.page_for_index_mut(count).unwrap() =
439                    other.page_for_index(idx_b).unwrap().clone();
440            }
441        }
442
443        self.resize(new_count);
444        self.recompute_length();
445    }
446
447    fn compact(&mut self, new_len: usize) {
448        let mut old_index_to_page_map_index = Vec::<usize>::with_capacity(self.pages.len());
449        old_index_to_page_map_index.resize(self.pages.len(), usize::MAX);
450
451        for i in 0usize..new_len {
452            old_index_to_page_map_index[self.page_map[i].index as usize] = i;
453        }
454
455        self.compact_pages(old_index_to_page_map_index);
456    }
457
458    fn compact_pages(&mut self, old_index_to_page_map_index: Vec<usize>) {
459        let mut write_index = 0;
460        for (i, page_map_index) in old_index_to_page_map_index
461            .iter()
462            .enumerate()
463            .take(self.pages.len())
464        {
465            if *page_map_index == usize::MAX {
466                continue;
467            }
468
469            if write_index < i {
470                self.pages[write_index] = self.pages[i].clone();
471            }
472
473            self.page_map[*page_map_index].index = write_index as u32;
474            write_index += 1;
475        }
476    }
477
478    fn resize(&mut self, new_len: usize) {
479        self.page_map.resize(
480            new_len,
481            PageInfo {
482                major_value: 0,
483                index: 0,
484            },
485        );
486        self.pages.resize(new_len, BitPage::new_zeroes());
487    }
488
489    /// Return the major value (top 23 bits) of the page associated with value.
490    const fn get_major_value(value: u32) -> u32 {
491        value >> PAGE_BITS_LOG_2
492    }
493
494    const fn major_start(major: u32) -> u32 {
495        major << PAGE_BITS_LOG_2
496    }
497
498    const fn major_end(major: u32) -> u32 {
499        // Note: (PAGE_BITS - 1) must be grouped to prevent overflow on addition for the largest page.
500        Self::major_start(major) + (PAGE_BITS - 1)
501    }
502
503    /// Returns the index in `self.pages` (if it exists) for the page with the same major as `major_value`.
504    fn page_index_for_major(&self, major_value: u32) -> Option<usize> {
505        self.page_map
506            .binary_search_by(|probe| probe.major_value.cmp(&major_value))
507            .ok()
508            .map(|info_idx| self.page_map[info_idx].index as usize)
509    }
510
511    /// Returns the index in `self.pages` for the page with the same major as `major_value`. Will create
512    /// the page if it does not yet exist.
513    fn ensure_page_index_for_major(&mut self, major_value: u32) -> usize {
514        match self
515            .page_map
516            .binary_search_by(|probe| probe.major_value.cmp(&major_value))
517        {
518            Ok(map_index) => self.page_map[map_index].index as usize,
519            Err(map_index_to_insert) => {
520                let page_index = self.pages.len();
521                self.pages.push(BitPage::new_zeroes());
522                let new_info = PageInfo {
523                    index: page_index as u32,
524                    major_value,
525                };
526                self.page_map.insert(map_index_to_insert, new_info);
527                page_index
528            }
529        }
530    }
531
532    /// Return a reference to the page that `value` resides in.
533    fn page_for(&self, value: u32) -> Option<&BitPage> {
534        let major_value = Self::get_major_value(value);
535        let pages_index = self.page_index_for_major(major_value)?;
536        self.pages.get(pages_index)
537    }
538
539    /// Return a mutable reference to the page that `value` resides in.
540    ///
541    /// Insert a new page if it doesn't exist.
542    fn page_for_mut(&mut self, value: u32) -> Option<&mut BitPage> {
543        let major_value = Self::get_major_value(value);
544        self.page_for_major_mut(major_value)
545    }
546
547    /// Return a mutable reference to the page with major value equal to `major_value`.
548    fn page_for_major_mut(&mut self, major_value: u32) -> Option<&mut BitPage> {
549        let page_index = self.page_index_for_major(major_value)?;
550        self.pages.get_mut(page_index)
551    }
552
553    /// Return a mutable reference to the page that `value` resides in.
554    ///
555    /// Insert a new page if it doesn't exist.
556    fn ensure_page_for_mut(&mut self, value: u32) -> &mut BitPage {
557        self.ensure_page_for_major_mut(Self::get_major_value(value))
558    }
559
560    /// Return a mutable reference to the page with major value equal to `major_value`.
561    /// Inserts a new page if it doesn't exist.
562    fn ensure_page_for_major_mut(&mut self, major_value: u32) -> &mut BitPage {
563        let page_index = self.ensure_page_index_for_major(major_value);
564        self.pages.get_mut(page_index).unwrap()
565    }
566
567    /// Return the mutable page at a given index
568    fn page_for_index_mut(&mut self, index: usize) -> Option<&mut BitPage> {
569        self.page_map
570            .get(index)
571            .and_then(|info| self.pages.get_mut(info.index as usize))
572    }
573
574    fn page_for_index(&self, index: usize) -> Option<&BitPage> {
575        self.page_map
576            .get(index)
577            .and_then(|info| self.pages.get(info.index as usize))
578    }
579}
580
581impl Extend<u32> for BitSet {
582    fn extend<U: IntoIterator<Item = u32>>(&mut self, iter: U) {
583        let mut builder = BitSetBuilder::start(self);
584        for val in iter {
585            builder.insert(val);
586        }
587        builder.finish();
588    }
589}
590
591/// This helper is used to construct [`BitSet`]'s from a stream of possibly sorted values.
592/// It remembers the last page index to reduce the amount of page lookups needed when inserting
593/// sorted data. If given unsorted values it will still work correctly, but may be slower then just
594/// repeatedly calling `insert()` on the bitset.
595pub(crate) struct BitSetBuilder<'a> {
596    pub(crate) set: &'a mut BitSet,
597    last_page_index: usize,
598    last_major_value: u32,
599}
600
601impl<'a> BitSetBuilder<'a> {
602    pub(crate) fn start(set: &'a mut BitSet) -> Self {
603        Self {
604            set,
605            last_page_index: usize::MAX,
606            last_major_value: u32::MAX,
607        }
608    }
609
610    pub(crate) fn insert(&mut self, val: u32) {
611        // TODO(garretrieger): additional optimization ideas:
612        // - Assuming data is sorted accumulate a single element mask and only commit it to the element
613        //   once the next value passes the end of the element.
614        let major_value = BitSet::get_major_value(val);
615        if major_value != self.last_major_value {
616            self.last_page_index = self.set.ensure_page_index_for_major(major_value);
617            self.last_major_value = major_value;
618        };
619        if let Some(page) = self.set.pages.get_mut(self.last_page_index) {
620            self.set.length += page.insert(val) as u64;
621        }
622    }
623
624    pub(crate) fn finish(&mut self) {
625        // we used to do some finalization and bookkeeping here, and we will
626        // want to again if we optimize the impl more.
627    }
628}
629
630#[derive(Clone, Copy, Debug, PartialEq, Eq)]
631#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
632struct PageInfo {
633    // index into pages vector of this page
634    index: u32,
635    /// the top 23 bits of values covered by this page
636    major_value: u32,
637}
638
639impl Hash for BitSet {
640    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
641        self.iter_non_empty_pages().for_each(|t| t.hash(state));
642    }
643}
644
645impl std::cmp::PartialEq for BitSet {
646    fn eq(&self, other: &Self) -> bool {
647        let mut this = self.iter_non_empty_pages();
648        let mut other = other.iter_non_empty_pages();
649
650        // Note: normally we would prefer to use zip, but we also
651        //       need to check that both iters have the same length.
652        loop {
653            match (this.next(), other.next()) {
654                (Some(a), Some(b)) if a == b => continue,
655                (None, None) => return true,
656                _ => return false,
657            }
658        }
659    }
660}
661
662impl std::cmp::Eq for BitSet {}
663
664impl std::cmp::PartialOrd for BitSet {
665    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
666        Some(self.cmp(other))
667    }
668}
669
670impl std::cmp::Ord for BitSet {
671    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
672        let this_it = self.iter();
673        let other_it = other.iter();
674
675        for (us, them) in this_it.zip(other_it) {
676            match us.cmp(&them) {
677                core::cmp::Ordering::Equal => continue,
678                other => return other,
679            }
680        }
681
682        // all items in iter are the same: is one collection longer?
683        self.len().cmp(&other.len())
684    }
685}
686
687struct BitSetRangeIter<'a> {
688    set: &'a BitSet,
689    page_info_index: usize,
690    page_iter: Option<RangeIter<'a>>,
691}
692
693impl<'a> BitSetRangeIter<'a> {
694    fn new(set: &'a BitSet) -> BitSetRangeIter<'a> {
695        BitSetRangeIter {
696            set,
697            page_info_index: 0,
698            page_iter: BitSetRangeIter::<'a>::page_iter(set, 0),
699        }
700    }
701
702    fn move_to_next_page(&mut self) -> bool {
703        self.page_info_index += 1;
704        self.reset_page_iter();
705        self.page_iter.is_some()
706    }
707
708    fn reset_page_iter(&mut self) {
709        self.page_iter = BitSetRangeIter::<'a>::page_iter(self.set, self.page_info_index);
710    }
711
712    fn page_iter(set: &'a BitSet, page_info_index: usize) -> Option<RangeIter<'a>> {
713        set.page_map
714            .get(page_info_index)
715            .map(|pi| pi.index as usize)
716            .and_then(|index| set.pages.get(index))
717            .map(|p| p.iter_ranges())
718    }
719
720    fn next_range(&mut self) -> Option<RangeInclusive<u32>> {
721        // TODO(garretrieger): don't recompute page start on each call.
722        let page = self.set.page_map.get(self.page_info_index)?;
723        let page_start = BitSet::major_start(page.major_value);
724        self.page_iter
725            .as_mut()?
726            .next()
727            .map(|r| (r.start() + page_start)..=(r.end() + page_start))
728    }
729}
730
731impl Iterator for BitSetRangeIter<'_> {
732    type Item = RangeInclusive<u32>;
733
734    fn next(&mut self) -> Option<Self::Item> {
735        self.page_iter.as_ref()?;
736        let mut current_range = self.next_range();
737        loop {
738            let page = self.set.page_map.get(self.page_info_index)?;
739            let page_end = BitSet::major_end(page.major_value);
740
741            let Some(range) = current_range.clone() else {
742                // The current page has no more ranges, but there may be more pages.
743                if !self.move_to_next_page() {
744                    return None;
745                }
746                current_range = self.next_range();
747                continue;
748            };
749
750            if *range.end() != page_end {
751                break;
752            }
753
754            // The range goes right to the end of the current page and may continue into it.
755            self.move_to_next_page();
756            let continuation = self.next_range();
757            let Some(continuation) = continuation else {
758                break;
759            };
760
761            if *continuation.start() == *range.end() + 1 {
762                current_range = Some(*range.start()..=*continuation.end());
763                continue;
764            }
765
766            // Continuation range does not touch the current range, ignore it and return what we have.
767            // Since we consumed an item from the new page iterator, reset it.
768            self.reset_page_iter();
769            break;
770        }
771
772        current_range
773    }
774}
775
776#[cfg(test)]
777mod test {
778    use super::*;
779    use std::collections::HashSet;
780
781    impl FromIterator<u32> for BitSet {
782        fn from_iter<I: IntoIterator<Item = u32>>(iter: I) -> Self {
783            let mut out = BitSet::empty();
784            out.extend(iter);
785            out
786        }
787    }
788
789    #[test]
790    fn len() {
791        let bitset = BitSet::empty();
792        assert_eq!(bitset.len(), 0);
793        assert!(bitset.is_empty());
794    }
795
796    #[test]
797    fn iter() {
798        let mut bitset = BitSet::empty();
799        bitset.insert(3);
800        bitset.insert(8);
801        bitset.insert(534);
802        bitset.insert(700);
803        bitset.insert(10000);
804        bitset.insert(10001);
805        bitset.insert(10002);
806
807        let v: Vec<u32> = bitset.iter().collect();
808        assert_eq!(v, vec![3, 8, 534, 700, 10000, 10001, 10002]);
809    }
810
811    fn check_iter_ranges(ranges: Vec<RangeInclusive<u32>>) {
812        let mut set = BitSet::empty();
813        for range in ranges.iter() {
814            set.insert_range(*range.start()..=*range.end());
815        }
816        let items: Vec<_> = set.iter_ranges().collect();
817        assert_eq!(items, ranges);
818    }
819
820    #[test]
821    fn iter_ranges() {
822        check_iter_ranges(vec![0..=0]);
823        check_iter_ranges(vec![4578..=4578]);
824        check_iter_ranges(vec![0..=10, 4578..=4583]);
825        check_iter_ranges(vec![0..=700]);
826        check_iter_ranges(vec![353..=737]);
827
828        check_iter_ranges(vec![u32::MAX..=u32::MAX]);
829        check_iter_ranges(vec![(u32::MAX - 10)..=u32::MAX]);
830        check_iter_ranges(vec![0..=5, (u32::MAX - 5)..=u32::MAX]);
831
832        check_iter_ranges(vec![0..=511, 513..=517]);
833        check_iter_ranges(vec![512..=1023, 1025..=1027]);
834
835        check_iter_ranges(vec![1792..=2650]);
836    }
837
838    #[test]
839    fn iter_ranges_zero_pages() {
840        let mut set = BitSet::empty();
841
842        set.insert(1000);
843        set.insert_range(300..=511);
844        set.remove(1000);
845
846        let items: Vec<_> = set.iter_ranges().collect();
847        assert_eq!(items, vec![300..=511]);
848    }
849
850    #[test]
851    fn iter_backwards() {
852        let mut bitset = BitSet::empty();
853
854        bitset.insert_range(1..=6);
855        {
856            let mut it = bitset.iter();
857            assert_eq!(Some(1), it.next());
858            assert_eq!(Some(6), it.next_back());
859            assert_eq!(Some(5), it.next_back());
860            assert_eq!(Some(2), it.next());
861            assert_eq!(Some(3), it.next());
862            assert_eq!(Some(4), it.next());
863            assert_eq!(None, it.next());
864            assert_eq!(None, it.next_back());
865        }
866
867        bitset.insert_range(700..=701);
868        {
869            let mut it = bitset.iter();
870            assert_eq!(Some(1), it.next());
871            assert_eq!(Some(701), it.next_back());
872            assert_eq!(Some(700), it.next_back());
873            assert_eq!(Some(6), it.next_back());
874            assert_eq!(Some(5), it.next_back());
875            assert_eq!(Some(2), it.next());
876            assert_eq!(Some(3), it.next());
877            assert_eq!(Some(4), it.next());
878            assert_eq!(None, it.next());
879            assert_eq!(None, it.next_back());
880        }
881
882        let v: Vec<u32> = bitset.iter().rev().collect();
883        assert_eq!(vec![701, 700, 6, 5, 4, 3, 2, 1], v);
884    }
885
886    #[test]
887    fn iter_from() {
888        let mut bitset = BitSet::empty();
889        bitset.extend([5, 7, 10, 1250, 1300, 3001]);
890
891        assert_eq!(
892            bitset.iter_from(0).collect::<Vec<u32>>(),
893            vec![5, 7, 10, 1250, 1300, 3001]
894        );
895
896        assert_eq!(
897            bitset.iter_from(4).collect::<Vec<u32>>(),
898            vec![5, 7, 10, 1250, 1300, 3001]
899        );
900        assert_eq!(
901            bitset.iter_from(5).collect::<Vec<u32>>(),
902            vec![5, 7, 10, 1250, 1300, 3001]
903        );
904        assert_eq!(
905            bitset.iter_from(6).collect::<Vec<u32>>(),
906            vec![7, 10, 1250, 1300, 3001]
907        );
908
909        assert_eq!(
910            bitset.iter_from(10).collect::<Vec<u32>>(),
911            vec![10, 1250, 1300, 3001]
912        );
913
914        assert_eq!(
915            bitset.iter_from(700).collect::<Vec<u32>>(),
916            vec![1250, 1300, 3001]
917        );
918
919        assert_eq!(
920            bitset.iter_from(1250).collect::<Vec<u32>>(),
921            vec![1250, 1300, 3001]
922        );
923        assert_eq!(
924            bitset.iter_from(1251).collect::<Vec<u32>>(),
925            vec![1300, 3001]
926        );
927
928        assert_eq!(bitset.iter_from(3000).collect::<Vec<u32>>(), vec![3001]);
929        assert_eq!(bitset.iter_from(3001).collect::<Vec<u32>>(), vec![3001]);
930        assert_eq!(bitset.iter_from(3002).count(), 0);
931        assert_eq!(bitset.iter_from(5000).count(), 0);
932        assert_eq!(bitset.iter_from(u32::MAX).count(), 0);
933
934        bitset.insert(u32::MAX);
935        assert_eq!(
936            bitset.iter_from(u32::MAX).collect::<Vec<u32>>(),
937            vec![u32::MAX]
938        );
939        assert_eq!(
940            bitset.iter_from(u32::MAX - 1).collect::<Vec<u32>>(),
941            vec![u32::MAX]
942        );
943
944        let mut bitset = BitSet::empty();
945        bitset.extend([510, 511, 512]);
946
947        assert_eq!(
948            bitset.iter_from(509).collect::<Vec<u32>>(),
949            vec![510, 511, 512]
950        );
951        assert_eq!(
952            bitset.iter_from(510).collect::<Vec<u32>>(),
953            vec![510, 511, 512]
954        );
955        assert_eq!(bitset.iter_from(511).collect::<Vec<u32>>(), vec![511, 512]);
956        assert_eq!(bitset.iter_from(512).collect::<Vec<u32>>(), vec![512]);
957        assert!(bitset.iter_from(513).collect::<Vec<u32>>().is_empty());
958    }
959
960    #[test]
961    fn extend() {
962        let values = [3, 8, 534, 700, 10000, 10001, 10002];
963        let values_unsorted = [10000, 3, 534, 700, 8, 10001, 10002];
964
965        let mut s1 = BitSet::empty();
966        let mut s2 = BitSet::empty();
967        let mut s3 = BitSet::empty();
968        let mut s4 = BitSet::empty();
969        assert_eq!(s1.len(), 0);
970
971        s1.extend(values.iter().copied());
972        s2.extend_unsorted(values.iter().copied());
973        s3.extend(values_unsorted.iter().copied());
974        s4.extend_unsorted(values_unsorted.iter().copied());
975
976        assert_eq!(s1.iter().collect::<Vec<u32>>(), values);
977        assert_eq!(s2.iter().collect::<Vec<u32>>(), values);
978        assert_eq!(s3.iter().collect::<Vec<u32>>(), values);
979        assert_eq!(s4.iter().collect::<Vec<u32>>(), values);
980
981        assert_eq!(s1.len(), 7);
982        assert_eq!(s2.len(), 7);
983        assert_eq!(s3.len(), 7);
984        assert_eq!(s4.len(), 7);
985    }
986
987    #[test]
988    fn insert_unordered() {
989        let mut bitset = BitSet::empty();
990
991        assert!(!bitset.contains(0));
992        assert!(!bitset.contains(768));
993        assert!(!bitset.contains(1678));
994
995        assert!(bitset.insert(0));
996        assert!(bitset.insert(1678));
997        assert!(bitset.insert(768));
998
999        assert!(bitset.contains(0));
1000        assert!(bitset.contains(768));
1001        assert!(bitset.contains(1678));
1002
1003        assert!(!bitset.contains(1));
1004        assert!(!bitset.contains(769));
1005        assert!(!bitset.contains(1679));
1006
1007        assert_eq!(bitset.len(), 3);
1008    }
1009
1010    #[test]
1011    fn remove() {
1012        let mut bitset = BitSet::empty();
1013
1014        assert!(bitset.insert(0));
1015        assert!(bitset.insert(511));
1016        assert!(bitset.insert(512));
1017        assert!(bitset.insert(1678));
1018        assert!(bitset.insert(768));
1019
1020        assert_eq!(bitset.len(), 5);
1021
1022        assert!(!bitset.remove(12));
1023        assert!(bitset.remove(511));
1024        assert!(bitset.remove(512));
1025        assert!(!bitset.remove(512));
1026
1027        assert_eq!(bitset.len(), 3);
1028        assert!(bitset.contains(0));
1029        assert!(!bitset.contains(511));
1030        assert!(!bitset.contains(512));
1031    }
1032
1033    #[test]
1034    fn remove_all() {
1035        let mut bitset = BitSet::empty();
1036        bitset.extend([5, 7, 11, 18, 620, 2000]);
1037
1038        assert_eq!(bitset.len(), 6);
1039
1040        bitset.remove_all([7, 11, 13, 18, 620]);
1041        assert_eq!(bitset.len(), 2);
1042        assert_eq!(bitset.iter().collect::<Vec<u32>>(), vec![5, 2000]);
1043    }
1044
1045    #[test]
1046    fn remove_range() {
1047        let mut bitset = BitSet::empty();
1048        bitset.extend([5, 7, 11, 18, 511, 620, 1023, 1024, 1200]);
1049        assert_eq!(bitset.len(), 9);
1050        bitset.remove_range(7..=620);
1051        assert_eq!(bitset.len(), 4);
1052        assert_eq!(
1053            bitset.iter().collect::<Vec<u32>>(),
1054            vec![5, 1023, 1024, 1200]
1055        );
1056
1057        let mut bitset = BitSet::empty();
1058        bitset.extend([5, 7, 11, 18, 511, 620, 1023, 1024, 1200]);
1059        bitset.remove_range(7..=1024);
1060        assert_eq!(bitset.len(), 2);
1061        assert_eq!(bitset.iter().collect::<Vec<u32>>(), vec![5, 1200]);
1062
1063        let mut bitset = BitSet::empty();
1064        bitset.extend([5, 7, 11, 18, 511, 620, 1023, 1024, 1200]);
1065        bitset.remove_range(2000..=2100);
1066        assert_eq!(bitset.len(), 9);
1067        assert_eq!(
1068            bitset.iter().collect::<Vec<u32>>(),
1069            vec![5, 7, 11, 18, 511, 620, 1023, 1024, 1200]
1070        );
1071
1072        // Remove all from one page
1073        let mut bitset = BitSet::empty();
1074        bitset.extend([1001, 1002, 1003, 1004]);
1075        bitset.remove_range(1002..=1003);
1076        assert!(bitset.contains(1001));
1077        assert!(!bitset.contains(1002));
1078        assert!(!bitset.contains(1003));
1079        assert!(bitset.contains(1004));
1080
1081        bitset.remove_range(100..=200);
1082        assert!(bitset.contains(1001));
1083        assert!(!bitset.contains(1002));
1084        assert!(!bitset.contains(1003));
1085        assert!(bitset.contains(1004));
1086
1087        bitset.remove_range(100..=1001);
1088        assert!(!bitset.contains(1001));
1089        assert!(!bitset.contains(1002));
1090        assert!(!bitset.contains(1003));
1091        assert!(bitset.contains(1004));
1092    }
1093
1094    #[test]
1095    fn remove_range_boundary() {
1096        let mut set = BitSet::empty();
1097
1098        set.remove_range(u32::MAX - 10..=u32::MAX);
1099        assert!(!set.contains(u32::MAX));
1100        set.insert_range(u32::MAX - 10..=u32::MAX);
1101        assert!(set.contains(u32::MAX));
1102        set.remove_range(u32::MAX - 10..=u32::MAX);
1103        assert!(!set.contains(u32::MAX));
1104
1105        set.remove_range(0..=10);
1106        assert!(!set.contains(0));
1107        set.insert_range(0..=10);
1108        assert!(set.contains(0));
1109        set.remove_range(0..=10);
1110        assert!(!set.contains(0));
1111    }
1112
1113    #[test]
1114    fn remove_to_empty_page() {
1115        let mut bitset = BitSet::empty();
1116
1117        bitset.insert(793);
1118        bitset.insert(43);
1119        bitset.remove(793);
1120
1121        assert!(bitset.contains(43));
1122        assert!(!bitset.contains(793));
1123        assert_eq!(bitset.len(), 1);
1124    }
1125
1126    #[test]
1127    fn insert_max_value() {
1128        let mut bitset = BitSet::empty();
1129        assert!(!bitset.contains(u32::MAX));
1130        assert!(bitset.insert(u32::MAX));
1131        assert!(bitset.contains(u32::MAX));
1132        assert!(!bitset.contains(u32::MAX - 1));
1133        assert_eq!(bitset.len(), 1);
1134    }
1135
1136    fn check_process<A, B, C, Op>(a: A, b: B, expected: C, op: Op)
1137    where
1138        A: IntoIterator<Item = u32>,
1139        B: IntoIterator<Item = u32>,
1140        C: IntoIterator<Item = u32>,
1141        Op: Fn(&mut BitSet, &BitSet),
1142    {
1143        let mut result = BitSet::from_iter(a);
1144        let b_set = BitSet::from_iter(b);
1145        let expected_set = BitSet::from_iter(expected);
1146        result.len();
1147
1148        op(&mut result, &b_set);
1149        assert_eq!(result, expected_set);
1150        assert_eq!(result.len(), expected_set.len());
1151    }
1152
1153    #[test]
1154    fn union() {
1155        check_process([], [5], [5], |a, b| a.union(b));
1156        check_process([128], [5], [128, 5], |a, b| a.union(b));
1157        check_process([128], [], [128], |a, b| a.union(b));
1158        check_process([1280], [5], [5, 1280], |a, b| a.union(b));
1159        check_process([5], [1280], [5, 1280], |a, b| a.union(b));
1160    }
1161
1162    #[test]
1163    fn intersect() {
1164        check_process([], [5], [], |a, b| a.intersect(b));
1165        check_process([5], [], [], |a, b| a.intersect(b));
1166        check_process([1, 5, 9], [5, 7], [5], |a, b| a.intersect(b));
1167        check_process([1, 1000, 2000], [1000], [1000], |a, b| a.intersect(b));
1168        check_process([1000], [1, 1000, 2000], [1000], |a, b| a.intersect(b));
1169        check_process([1, 1000, 2000], [1000, 5000], [1000], |a, b| a.intersect(b));
1170    }
1171
1172    #[test]
1173    fn subtract() {
1174        check_process([], [5], [], |a, b| a.subtract(b));
1175        check_process([5], [], [5], |a, b| a.subtract(b));
1176        check_process([5, 1000], [1000], [5], |a, b| a.subtract(b));
1177        check_process([5, 1000], [5], [1000], |a, b| a.subtract(b));
1178    }
1179
1180    #[test]
1181    fn reversed_subtract() {
1182        check_process([], [5], [5], |a, b| a.reversed_subtract(b));
1183        check_process([5], [], [], |a, b| a.reversed_subtract(b));
1184        check_process([1000], [5, 1000], [5], |a, b| a.reversed_subtract(b));
1185        check_process([5], [5, 1000], [1000], |a, b| a.reversed_subtract(b));
1186    }
1187
1188    fn set_for_range(first: u32, last: u32) -> BitSet {
1189        let mut set = BitSet::empty();
1190        for i in first..=last {
1191            set.insert(i);
1192        }
1193        set
1194    }
1195
1196    #[test]
1197    fn insert_range() {
1198        for range in [
1199            (0, 0),
1200            (0, 364),
1201            (0, 511),
1202            (512, 1023),
1203            (0, 1023),
1204            (364, 700),
1205            (364, 6000),
1206        ] {
1207            let mut set = BitSet::empty();
1208            set.len();
1209            set.insert_range(range.0..=range.1);
1210            assert_eq!(set, set_for_range(range.0, range.1), "{range:?}");
1211            assert_eq!(set.len(), (range.1 - range.0 + 1) as u64, "{range:?}");
1212        }
1213    }
1214
1215    #[test]
1216    fn insert_range_on_existing() {
1217        let mut set = BitSet::empty();
1218        set.insert(700);
1219        set.insert(2000);
1220        set.insert_range(32..=4000);
1221        assert_eq!(set, set_for_range(32, 4000));
1222        assert_eq!(set.len(), 4000 - 32 + 1);
1223    }
1224
1225    #[test]
1226    fn insert_range_max() {
1227        let mut set = BitSet::empty();
1228        set.insert_range(u32::MAX..=u32::MAX);
1229        assert!(set.contains(u32::MAX));
1230        assert_eq!(set.len(), 1);
1231    }
1232
1233    #[test]
1234    fn clear() {
1235        let mut bitset = BitSet::empty();
1236
1237        bitset.insert(13);
1238        bitset.insert(670);
1239        assert!(bitset.contains(13));
1240        assert!(bitset.contains(670));
1241
1242        bitset.clear();
1243        assert!(!bitset.contains(13));
1244        assert!(!bitset.contains(670));
1245        assert_eq!(bitset.len(), 0);
1246    }
1247
1248    #[test]
1249    #[allow(clippy::mutable_key_type)]
1250    fn hash_and_eq() {
1251        let mut bitset1 = BitSet::empty();
1252        let mut bitset2 = BitSet::empty();
1253        let mut bitset3 = BitSet::empty();
1254        let mut bitset4 = BitSet::empty();
1255
1256        bitset1.insert(43);
1257        bitset1.insert(793);
1258
1259        bitset2.insert(793);
1260        bitset2.insert(43);
1261        bitset2.len();
1262
1263        bitset3.insert(43);
1264        bitset3.insert(793);
1265        bitset3.insert(794);
1266
1267        bitset4.insert(0);
1268
1269        assert_eq!(BitSet::empty(), BitSet::empty());
1270        assert_eq!(bitset1, bitset2);
1271        assert_ne!(bitset1, bitset3);
1272        assert_ne!(bitset2, bitset3);
1273        assert_eq!(bitset4, bitset4);
1274
1275        let set = HashSet::from([bitset1]);
1276        assert!(set.contains(&bitset2));
1277        assert!(!set.contains(&bitset3));
1278    }
1279
1280    #[test]
1281    #[allow(clippy::mutable_key_type)]
1282    fn hash_and_eq_with_empty_pages() {
1283        let mut bitset1 = BitSet::empty();
1284        let mut bitset2 = BitSet::empty();
1285        let mut bitset3 = BitSet::empty();
1286
1287        bitset1.insert(43);
1288
1289        bitset2.insert(793);
1290        bitset2.insert(43);
1291        bitset2.remove(793);
1292
1293        bitset3.insert(43);
1294        bitset3.insert(793);
1295
1296        assert_eq!(bitset1, bitset2);
1297        assert_ne!(bitset1, bitset3);
1298
1299        let set = HashSet::from([bitset1]);
1300        assert!(set.contains(&bitset2));
1301    }
1302
1303    #[test]
1304    fn ordering() {
1305        macro_rules! assert_ord {
1306            ($lhs:expr, $rhs:expr, $ord:path) => {
1307                assert_eq!(
1308                    BitSet::from_iter($lhs).cmp(&BitSet::from_iter($rhs)),
1309                    $ord,
1310                    "{:?}, {:?}",
1311                    $lhs,
1312                    $rhs
1313                )
1314            };
1315        }
1316
1317        const EMPTY: [u32; 0] = [];
1318        assert_ord!(EMPTY, EMPTY, Ordering::Equal);
1319        assert_ord!(EMPTY, [0], Ordering::Less);
1320        assert_ord!([0], [0], Ordering::Equal);
1321        assert_ord!([0, 1, 2], [1, 2, 3], Ordering::Less);
1322        assert_ord!([0, 1, 4], [1, 2, 3], Ordering::Less);
1323        assert_ord!([1, 2, 3], [0, 2, 4], Ordering::Greater);
1324        assert_ord!([5, 4, 0], [1, 2, 3], Ordering::Less); // out of order
1325        assert_ord!([1, 2, 3], [1, 2, 3, 4], Ordering::Less); // out of order
1326        assert_ord!([2, 3, 4], [1, 2, 3, 4, 5], Ordering::Greater); // out of order
1327
1328        assert_ord!([1000, 2000, 3000], [1000, 2000, 3000, 4000], Ordering::Less); // out of order
1329        assert_ord!([1000, 1001,], [1000, 2000], Ordering::Less); // out of order
1330        assert_ord!(
1331            [2000, 3000, 4000],
1332            [1000, 2000, 3000, 4000, 5000],
1333            Ordering::Greater
1334        ); // out of order
1335    }
1336}