litemap/store/vec_impl.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use super::*;
6use alloc::vec::Vec;
7
8type MapF<K, V> = fn(&(K, V)) -> (&K, &V);
9
10#[inline]
11fn map_f<K, V>(input: &(K, V)) -> (&K, &V) {
12 (&input.0, &input.1)
13}
14
15type MapFMut<K, V> = fn(&mut (K, V)) -> (&K, &mut V);
16
17#[inline]
18fn map_f_mut<K, V>(input: &mut (K, V)) -> (&K, &mut V) {
19 (&input.0, &mut input.1)
20}
21
22impl<K, V> StoreConstEmpty<K, V> for Vec<(K, V)> {
23 const EMPTY: Vec<(K, V)> = Vec::new();
24}
25
26impl<K, V> Store<K, V> for Vec<(K, V)> {
27 #[inline]
28 fn lm_len(&self) -> usize {
29 self.as_slice().len()
30 }
31
32 #[inline]
33 fn lm_is_empty(&self) -> bool {
34 self.as_slice().is_empty()
35 }
36
37 #[inline]
38 fn lm_get(&self, index: usize) -> Option<(&K, &V)> {
39 self.as_slice().get(index).map(map_f)
40 }
41
42 #[inline]
43 fn lm_last(&self) -> Option<(&K, &V)> {
44 self.as_slice().last().map(map_f)
45 }
46
47 #[inline]
48 fn lm_binary_search_by<F>(&self, mut cmp: F) -> Result<usize, usize>
49 where
50 F: FnMut(&K) -> Ordering,
51 {
52 self.as_slice().binary_search_by(|(k, _)| cmp(k))
53 }
54}
55
56impl<K, V> StoreSlice<K, V> for Vec<(K, V)> {
57 type Slice = [(K, V)];
58
59 fn lm_get_range(&self, range: Range<usize>) -> Option<&Self::Slice> {
60 self.get(range)
61 }
62}
63
64impl<K, V> StoreMut<K, V> for Vec<(K, V)> {
65 #[inline]
66 fn lm_with_capacity(capacity: usize) -> Self {
67 Self::with_capacity(capacity)
68 }
69
70 #[inline]
71 fn lm_reserve(&mut self, additional: usize) {
72 self.reserve(additional)
73 }
74
75 #[inline]
76 fn lm_get_mut(&mut self, index: usize) -> Option<(&K, &mut V)> {
77 self.as_mut_slice().get_mut(index).map(map_f_mut)
78 }
79
80 #[inline]
81 fn lm_push(&mut self, key: K, value: V) {
82 self.push((key, value))
83 }
84
85 #[inline]
86 fn lm_insert(&mut self, index: usize, key: K, value: V) {
87 self.insert(index, (key, value))
88 }
89
90 #[inline]
91 fn lm_remove(&mut self, index: usize) -> (K, V) {
92 self.remove(index)
93 }
94
95 #[inline]
96 fn lm_clear(&mut self) {
97 self.clear()
98 }
99}
100
101impl<K: Ord, V> StoreBulkMut<K, V> for Vec<(K, V)> {
102 #[inline]
103 fn lm_retain<F>(&mut self, mut predicate: F)
104 where
105 F: FnMut(&K, &V) -> bool,
106 {
107 self.retain(|(k, v)| predicate(k, v))
108 }
109
110 /// Extends this store with items from an iterator.
111 ///
112 /// It uses a two-pass (sort + dedup) approach to avoid any potential quadratic costs.
113 ///
114 /// The asymptotic worst case complexity is O((n + m) log(n + m)), where `n`
115 /// is the number of elements already in `self` and `m` is the number of elements
116 /// in the iterator. The best case complexity is O(m), when the input iterator is
117 /// already sorted, keys aren't duplicated and all keys sort after the existing ones.
118 #[inline]
119 fn lm_extend<I>(&mut self, iter: I)
120 where
121 I: IntoIterator<Item = (K, V)>,
122 K: Ord,
123 {
124 // First N elements in self that are already sorted and not duplicated.
125 let mut sorted_len = self.len();
126 // Use Vec::extend as it has a specialized code for slice and trusted-len iterators.
127 self.extend(iter);
128 // `sorted_len` is the length of the sorted run before extension
129 // window slice `w` is guaranteed to have a length of 2.
130 #[allow(clippy::indexing_slicing)]
131 {
132 // Count new elements that are sorted and non-duplicated.
133 // Starting from the end of the existing sorted run, if any.
134 // Thus, start the slice at sorted_len.saturating_sub(1).
135 sorted_len += self[sorted_len.saturating_sub(1)..]
136 .windows(2)
137 .take_while(|w| w[0].0 < w[1].0)
138 .count();
139 }
140 // `windows(2)` only yields `slice len - 1` times, or none if the slice is empty.
141 // In other words, the first extended element of the slice won't be counted as sorted
142 // if self was initially empty (sorted_len == 0). We adjust this by adding 1 if the
143 // original slice was empty but became not empty after extend.
144 sorted_len += (sorted_len == 0 && !self.is_empty()) as usize;
145
146 // If everything was in order, we're done
147 if sorted_len >= self.len() {
148 return;
149 }
150
151 // Use stable sort to keep relative order of duplicates.
152 self.sort_by(|a, b| a.0.cmp(&b.0));
153 // Deduplicate by keeping the last element of the run in the first slice.
154 let (dedup, _merged_dup) = partition_dedup_by(self);
155 sorted_len = dedup.len();
156 self.truncate(sorted_len);
157 }
158}
159
160/// Moves all but the _last_ of consecutive elements to the end of the slice satisfying
161/// equality on K.
162///
163/// Returns two slices. The first contains no consecutive repeated elements.
164/// The second contains all the duplicates in no specified order.
165///
166/// This is based on std::slice::partition_dedup_by (currently unstable) but retains the
167/// _last_ element of the duplicate run in the first slice (instead of first).
168#[inline]
169#[allow(clippy::type_complexity)]
170fn partition_dedup_by<K: Eq, V>(v: &mut [(K, V)]) -> (&mut [(K, V)], &mut [(K, V)]) {
171 // Although we have a mutable reference to `self`, we cannot make
172 // *arbitrary* changes. The comparison could panic, so we
173 // must ensure that the slice is in a valid state at all times.
174 //
175 // The way that we handle this is by using swaps; we iterate
176 // over all the elements, swapping as we go so that at the end
177 // the elements we wish to keep are in the front, and those we
178 // wish to reject are at the back. We can then split the slice.
179 // This operation is still `O(n)`.
180 //
181 // Example:
182 // Assume (K, V) is (char, u8):
183 //
184 // We start in this state, where `r` represents "next
185 // read" and `w` represents "next_write".
186 //
187 // r
188 // | a,0 | b,0 | b,1 | c,0 | d,0 | d,1 |
189 // w
190 //
191 // Comparing self[r] against self[w-1], this is not a duplicate, so
192 // we swap self[r] and self[w] (no effect as r==w) and then increment both
193 // r and w, leaving us with:
194 //
195 // r
196 // | a,0 | b,0 | b,1 | c,0 | d,0 | d,0 |
197 // w
198 //
199 // Comparing self[r] against self[w-1], this value is a duplicate,
200 // we swap self[r] and self[w-1] and then increment `r`:
201 //
202 // r
203 // | a,0 | b,1 | b,0 | c,0 | d,0 | d,1 |
204 // w
205 //
206 // Comparing self[r] against self[w-1], this is not a duplicate,
207 // so swap self[r] and self[w] and advance r and w:
208 //
209 // r
210 // | a,0 | b,1 | c,0 | b,0 | d,0 | d,1 |
211 // w
212 //
213 // Comparing self[r] against self[w-1], this is not a duplicate,
214 // so swap self[r] and self[w] and advance r and w:
215 //
216 // r
217 // | a,0 | b,1 | c,0 | d,0 | b,0 | d,1 |
218 // w
219 //
220 // Comparing self[r] against self[w-1], this value is a duplicate,
221 // we swap self[r] and self[w-1] and then increment `r`:
222 // r
223 // | a,0 | b,1 | c,0 | d,1 | b,0 | d,0 |
224 // w
225 //
226 // End of slice, as r > len. Split at w.
227
228 if v.len() <= 1 {
229 return (v, &mut []);
230 }
231
232 let mut read_idx: usize = 1;
233 let mut write_idx: usize = 1;
234
235 while let Some((before_read, [read, ..])) = v.split_at_mut_checked(read_idx) {
236 // First, `read_idx >= write_idx` is always true as `read_idx` is always incremented
237 // whereas `write_idx` is only incremented when a distinct element is found.
238 // Second, before_read is always at least 1 length due to read_idx being initialized to 1.
239 // Thus it is safe to index before_read with `write_idx - 1`.
240 #[allow(clippy::indexing_slicing)]
241 let prev_write = &mut before_read[write_idx - 1];
242 if read.0 == prev_write.0 {
243 core::mem::swap(read, prev_write);
244 } else {
245 // Equivalent to checking if write_idx == read_idx
246 if let Some(write) = before_read.get_mut(write_idx) {
247 core::mem::swap(read, write);
248 }
249 write_idx += 1;
250 }
251 read_idx += 1;
252 }
253 v.split_at_mut(write_idx)
254}
255
256impl<K: Ord, V> StoreFromIterable<K, V> for Vec<(K, V)> {
257 fn lm_sort_from_iter<I: IntoIterator<Item = (K, V)>>(iter: I) -> Self {
258 let mut v = Self::new();
259 v.lm_extend(iter);
260 v
261 }
262}
263
264impl<'a, K: 'a, V: 'a> StoreIterable<'a, K, V> for Vec<(K, V)> {
265 type KeyValueIter = core::iter::Map<core::slice::Iter<'a, (K, V)>, MapF<K, V>>;
266
267 #[inline]
268 fn lm_iter(&'a self) -> Self::KeyValueIter {
269 self.as_slice().iter().map(map_f)
270 }
271}
272
273impl<'a, K: 'a, V: 'a> StoreIterableMut<'a, K, V> for Vec<(K, V)> {
274 type KeyValueIterMut = core::iter::Map<core::slice::IterMut<'a, (K, V)>, MapFMut<K, V>>;
275
276 #[inline]
277 fn lm_iter_mut(&'a mut self) -> Self::KeyValueIterMut {
278 self.as_mut_slice().iter_mut().map(map_f_mut)
279 }
280}
281
282impl<K, V> StoreIntoIterator<K, V> for Vec<(K, V)> {
283 type KeyValueIntoIter = alloc::vec::IntoIter<(K, V)>;
284
285 #[inline]
286 fn lm_into_iter(self) -> Self::KeyValueIntoIter {
287 IntoIterator::into_iter(self)
288 }
289
290 #[inline]
291 fn lm_extend_end(&mut self, other: Self) {
292 self.extend(other)
293 }
294
295 #[inline]
296 fn lm_extend_start(&mut self, other: Self) {
297 self.splice(0..0, other);
298 }
299}
300
301impl<K, V> StoreFromIterator<K, V> for Vec<(K, V)> {}
302
303#[test]
304fn test_vec_impl() {
305 crate::testing::check_store_full::<Vec<(u32, u64)>>();
306}