litemap/store/
vec_impl.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use super::*;
6use alloc::vec::Vec;
7
8type MapF<K, V> = fn(&(K, V)) -> (&K, &V);
9
10#[inline]
11fn map_f<K, V>(input: &(K, V)) -> (&K, &V) {
12    (&input.0, &input.1)
13}
14
15type MapFMut<K, V> = fn(&mut (K, V)) -> (&K, &mut V);
16
17#[inline]
18fn map_f_mut<K, V>(input: &mut (K, V)) -> (&K, &mut V) {
19    (&input.0, &mut input.1)
20}
21
22impl<K, V> StoreConstEmpty<K, V> for Vec<(K, V)> {
23    const EMPTY: Vec<(K, V)> = Vec::new();
24}
25
26impl<K, V> Store<K, V> for Vec<(K, V)> {
27    #[inline]
28    fn lm_len(&self) -> usize {
29        self.as_slice().len()
30    }
31
32    #[inline]
33    fn lm_is_empty(&self) -> bool {
34        self.as_slice().is_empty()
35    }
36
37    #[inline]
38    fn lm_get(&self, index: usize) -> Option<(&K, &V)> {
39        self.as_slice().get(index).map(map_f)
40    }
41
42    #[inline]
43    fn lm_last(&self) -> Option<(&K, &V)> {
44        self.as_slice().last().map(map_f)
45    }
46
47    #[inline]
48    fn lm_binary_search_by<F>(&self, mut cmp: F) -> Result<usize, usize>
49    where
50        F: FnMut(&K) -> Ordering,
51    {
52        self.as_slice().binary_search_by(|(k, _)| cmp(k))
53    }
54}
55
56impl<K, V> StoreSlice<K, V> for Vec<(K, V)> {
57    type Slice = [(K, V)];
58
59    fn lm_get_range(&self, range: Range<usize>) -> Option<&Self::Slice> {
60        self.get(range)
61    }
62}
63
64impl<K, V> StoreMut<K, V> for Vec<(K, V)> {
65    #[inline]
66    fn lm_with_capacity(capacity: usize) -> Self {
67        Self::with_capacity(capacity)
68    }
69
70    #[inline]
71    fn lm_reserve(&mut self, additional: usize) {
72        self.reserve(additional)
73    }
74
75    #[inline]
76    fn lm_get_mut(&mut self, index: usize) -> Option<(&K, &mut V)> {
77        self.as_mut_slice().get_mut(index).map(map_f_mut)
78    }
79
80    #[inline]
81    fn lm_push(&mut self, key: K, value: V) {
82        self.push((key, value))
83    }
84
85    #[inline]
86    fn lm_insert(&mut self, index: usize, key: K, value: V) {
87        self.insert(index, (key, value))
88    }
89
90    #[inline]
91    fn lm_remove(&mut self, index: usize) -> (K, V) {
92        self.remove(index)
93    }
94
95    #[inline]
96    fn lm_clear(&mut self) {
97        self.clear()
98    }
99}
100
101impl<K: Ord, V> StoreBulkMut<K, V> for Vec<(K, V)> {
102    #[inline]
103    fn lm_retain<F>(&mut self, mut predicate: F)
104    where
105        F: FnMut(&K, &V) -> bool,
106    {
107        self.retain(|(k, v)| predicate(k, v))
108    }
109
110    /// Extends this store with items from an iterator.
111    ///
112    /// It uses a two-pass (sort + dedup) approach to avoid any potential quadratic costs.
113    ///
114    /// The asymptotic worst case complexity is O((n + m) log(n + m)), where `n`
115    /// is the number of elements already in `self` and `m` is the number of elements
116    /// in the iterator. The best case complexity is O(m), when the input iterator is
117    /// already sorted, keys aren't duplicated and all keys sort after the existing ones.
118    #[inline]
119    fn lm_extend<I>(&mut self, iter: I)
120    where
121        I: IntoIterator<Item = (K, V)>,
122        K: Ord,
123    {
124        // First N elements in self that are already sorted and not duplicated.
125        let mut sorted_len = self.len();
126        // Use Vec::extend as it has a specialized code for slice and trusted-len iterators.
127        self.extend(iter);
128        // `sorted_len` is the length of the sorted run before extension
129        // window slice `w` is guaranteed to have a length of 2.
130        #[allow(clippy::indexing_slicing)]
131        {
132            // Count new elements that are sorted and non-duplicated.
133            // Starting from the end of the existing sorted run, if any.
134            // Thus, start the slice at sorted_len.saturating_sub(1).
135            sorted_len += self[sorted_len.saturating_sub(1)..]
136                .windows(2)
137                .take_while(|w| w[0].0 < w[1].0)
138                .count();
139        }
140        // `windows(2)` only yields `slice len - 1` times, or none if the slice is empty.
141        // In other words, the first extended element of the slice won't be counted as sorted
142        // if self was initially empty (sorted_len == 0). We adjust this by adding 1 if the
143        // original slice was empty but became not empty after extend.
144        sorted_len += (sorted_len == 0 && !self.is_empty()) as usize;
145
146        // If everything was in order, we're done
147        if sorted_len >= self.len() {
148            return;
149        }
150
151        // Use stable sort to keep relative order of duplicates.
152        self.sort_by(|a, b| a.0.cmp(&b.0));
153        // Deduplicate by keeping the last element of the run in the first slice.
154        let (dedup, _merged_dup) = partition_dedup_by(self);
155        sorted_len = dedup.len();
156        self.truncate(sorted_len);
157    }
158}
159
160/// Moves all but the _last_ of consecutive elements to the end of the slice satisfying
161/// equality on K.
162///
163/// Returns two slices. The first contains no consecutive repeated elements.
164/// The second contains all the duplicates in no specified order.
165///
166/// This is based on std::slice::partition_dedup_by (currently unstable) but retains the
167/// _last_ element of the duplicate run in the first slice (instead of first).
168#[inline]
169#[allow(clippy::type_complexity)]
170fn partition_dedup_by<K: Eq, V>(v: &mut [(K, V)]) -> (&mut [(K, V)], &mut [(K, V)]) {
171    // Although we have a mutable reference to `self`, we cannot make
172    // *arbitrary* changes. The comparison could panic, so we
173    // must ensure that the slice is in a valid state at all times.
174    //
175    // The way that we handle this is by using swaps; we iterate
176    // over all the elements, swapping as we go so that at the end
177    // the elements we wish to keep are in the front, and those we
178    // wish to reject are at the back. We can then split the slice.
179    // This operation is still `O(n)`.
180    //
181    // Example:
182    // Assume (K, V) is (char, u8):
183    //
184    // We start in this state, where `r` represents "next
185    // read" and `w` represents "next_write".
186    //
187    //              r
188    //     | a,0 | b,0 | b,1 | c,0 | d,0 | d,1 |
189    //              w
190    //
191    // Comparing self[r] against self[w-1], this is not a duplicate, so
192    // we swap self[r] and self[w] (no effect as r==w) and then increment both
193    // r and w, leaving us with:
194    //
195    //                    r
196    //     | a,0 | b,0 | b,1 | c,0 | d,0 | d,0 |
197    //                    w
198    //
199    // Comparing self[r] against self[w-1], this value is a duplicate,
200    // we swap self[r] and self[w-1] and then increment `r`:
201    //
202    //                          r
203    //     | a,0 | b,1 | b,0 | c,0 | d,0 | d,1 |
204    //                    w
205    //
206    // Comparing self[r] against self[w-1], this is not a duplicate,
207    // so swap self[r] and self[w] and advance r and w:
208    //
209    //                                r
210    //     | a,0 | b,1 | c,0 | b,0 | d,0 | d,1 |
211    //                          w
212    //
213    // Comparing self[r] against self[w-1], this is not a duplicate,
214    // so swap self[r] and self[w] and advance r and w:
215    //
216    //                                      r
217    //     | a,0 | b,1 | c,0 | d,0 | b,0 | d,1 |
218    //                                w
219    //
220    // Comparing self[r] against self[w-1], this value is a duplicate,
221    // we swap self[r] and self[w-1] and then increment `r`:
222    //                                             r
223    //     | a,0 | b,1 | c,0 | d,1 | b,0 | d,0 |
224    //                                w
225    //
226    // End of slice, as r > len. Split at w.
227
228    if v.len() <= 1 {
229        return (v, &mut []);
230    }
231
232    let mut read_idx: usize = 1;
233    let mut write_idx: usize = 1;
234
235    while let Some((before_read, [read, ..])) = v.split_at_mut_checked(read_idx) {
236        // First, `read_idx >= write_idx` is always true as `read_idx` is always incremented
237        // whereas `write_idx` is only incremented when a distinct element is found.
238        // Second, before_read is always at least 1 length due to read_idx being initialized to 1.
239        // Thus it is safe to index before_read with `write_idx - 1`.
240        #[allow(clippy::indexing_slicing)]
241        let prev_write = &mut before_read[write_idx - 1];
242        if read.0 == prev_write.0 {
243            core::mem::swap(read, prev_write);
244        } else {
245            // Equivalent to checking if write_idx == read_idx
246            if let Some(write) = before_read.get_mut(write_idx) {
247                core::mem::swap(read, write);
248            }
249            write_idx += 1;
250        }
251        read_idx += 1;
252    }
253    v.split_at_mut(write_idx)
254}
255
256impl<K: Ord, V> StoreFromIterable<K, V> for Vec<(K, V)> {
257    fn lm_sort_from_iter<I: IntoIterator<Item = (K, V)>>(iter: I) -> Self {
258        let mut v = Self::new();
259        v.lm_extend(iter);
260        v
261    }
262}
263
264impl<'a, K: 'a, V: 'a> StoreIterable<'a, K, V> for Vec<(K, V)> {
265    type KeyValueIter = core::iter::Map<core::slice::Iter<'a, (K, V)>, MapF<K, V>>;
266
267    #[inline]
268    fn lm_iter(&'a self) -> Self::KeyValueIter {
269        self.as_slice().iter().map(map_f)
270    }
271}
272
273impl<'a, K: 'a, V: 'a> StoreIterableMut<'a, K, V> for Vec<(K, V)> {
274    type KeyValueIterMut = core::iter::Map<core::slice::IterMut<'a, (K, V)>, MapFMut<K, V>>;
275
276    #[inline]
277    fn lm_iter_mut(&'a mut self) -> Self::KeyValueIterMut {
278        self.as_mut_slice().iter_mut().map(map_f_mut)
279    }
280}
281
282impl<K, V> StoreIntoIterator<K, V> for Vec<(K, V)> {
283    type KeyValueIntoIter = alloc::vec::IntoIter<(K, V)>;
284
285    #[inline]
286    fn lm_into_iter(self) -> Self::KeyValueIntoIter {
287        IntoIterator::into_iter(self)
288    }
289
290    #[inline]
291    fn lm_extend_end(&mut self, other: Self) {
292        self.extend(other)
293    }
294
295    #[inline]
296    fn lm_extend_start(&mut self, other: Self) {
297        self.splice(0..0, other);
298    }
299}
300
301impl<K, V> StoreFromIterator<K, V> for Vec<(K, V)> {}
302
303#[test]
304fn test_vec_impl() {
305    crate::testing::check_store_full::<Vec<(u32, u64)>>();
306}