phf_generator/
lib.rs
1#![doc(html_root_url = "https://docs.rs/phf_generator/0.11")]
6use phf_shared::{HashKey, Hashes, PhfHash};
7use rand::distributions::Standard;
8use rand::rngs::SmallRng;
9use rand::{Rng, SeedableRng};
10
11const DEFAULT_LAMBDA: usize = 5;
12
13const FIXED_SEED: u64 = 1234567890;
14
15pub struct HashState {
16 pub key: HashKey,
17 pub disps: Vec<(u32, u32)>,
18 pub map: Vec<usize>,
19}
20
21pub fn generate_hash<H: PhfHash>(entries: &[H]) -> HashState {
22 generate_hash_with_hash_fn(entries, phf_shared::hash)
23}
24
25pub fn generate_hash_with_hash_fn<T, F>(entries: &[T], hash_fn: F) -> HashState
26where
27 F: Fn(&T, &HashKey) -> Hashes,
28{
29 let mut generator = Generator::new(entries.len());
30
31 SmallRng::seed_from_u64(FIXED_SEED)
32 .sample_iter(Standard)
33 .find(|key| {
34 let hashes = entries.iter().map(|entry| hash_fn(entry, key));
35 generator.reset(hashes);
36
37 generator.try_generate_hash()
38 })
39 .map(|key| HashState {
40 key,
41 disps: generator.disps,
42 map: generator.map.into_iter().map(|i| i.unwrap()).collect(),
43 })
44 .expect("failed to solve PHF")
45}
46
47struct Bucket {
48 idx: usize,
49 keys: Vec<usize>,
50}
51
52struct Generator {
53 hashes: Vec<Hashes>,
54 buckets: Vec<Bucket>,
55 disps: Vec<(u32, u32)>,
56 map: Vec<Option<usize>>,
57 try_map: Vec<u64>,
58}
59
60impl Generator {
61 fn new(table_len: usize) -> Self {
62 let hashes = Vec::with_capacity(table_len);
63
64 let buckets_len = (table_len + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA;
65 let buckets: Vec<_> = (0..buckets_len)
66 .map(|i| Bucket {
67 idx: i,
68 keys: vec![],
69 })
70 .collect();
71 let disps = vec![(0u32, 0u32); buckets_len];
72
73 let map = vec![None; table_len];
74 let try_map = vec![0u64; table_len];
75
76 Self {
77 hashes,
78 buckets,
79 disps,
80 map,
81 try_map,
82 }
83 }
84
85 fn reset<I>(&mut self, hashes: I)
86 where
87 I: Iterator<Item = Hashes>,
88 {
89 self.buckets.iter_mut().for_each(|b| b.keys.clear());
90 self.buckets.sort_by_key(|b| b.idx);
91 self.disps.iter_mut().for_each(|d| *d = (0, 0));
92 self.map.iter_mut().for_each(|m| *m = None);
93 self.try_map.iter_mut().for_each(|m| *m = 0);
94
95 self.hashes.clear();
96 self.hashes.extend(hashes);
97 }
98
99 fn try_generate_hash(&mut self) -> bool {
100 let buckets_len = self.buckets.len() as u32;
101 for (i, hash) in self.hashes.iter().enumerate() {
102 self.buckets[(hash.g % buckets_len) as usize].keys.push(i);
103 }
104
105 self.buckets
107 .sort_by(|a, b| a.keys.len().cmp(&b.keys.len()).reverse());
108
109 let table_len = self.hashes.len();
110
111 let mut generation = 0u64;
119
120 let mut values_to_add = vec![];
124
125 'buckets: for bucket in &self.buckets {
126 for d1 in 0..(table_len as u32) {
127 'disps: for d2 in 0..(table_len as u32) {
128 values_to_add.clear();
129 generation += 1;
130
131 for &key in &bucket.keys {
132 let idx =
133 (phf_shared::displace(self.hashes[key].f1, self.hashes[key].f2, d1, d2)
134 % (table_len as u32)) as usize;
135 if self.map[idx].is_some() || self.try_map[idx] == generation {
136 continue 'disps;
137 }
138 self.try_map[idx] = generation;
139 values_to_add.push((idx, key));
140 }
141
142 self.disps[bucket.idx] = (d1, d2);
144 for &(idx, key) in &values_to_add {
145 self.map[idx] = Some(key);
146 }
147 continue 'buckets;
148 }
149 }
150
151 return false;
153 }
154 true
155 }
156}