phf_generator/
lib.rs

1//! See [the `phf` crate's documentation][phf] for details.
2//!
3//! [phf]: https://docs.rs/phf
4
5#![doc(html_root_url = "https://docs.rs/phf_generator/0.11")]
6use phf_shared::{HashKey, Hashes, PhfHash};
7use rand::distributions::Standard;
8use rand::rngs::SmallRng;
9use rand::{Rng, SeedableRng};
10
11const DEFAULT_LAMBDA: usize = 5;
12
13const FIXED_SEED: u64 = 1234567890;
14
15pub struct HashState {
16    pub key: HashKey,
17    pub disps: Vec<(u32, u32)>,
18    pub map: Vec<usize>,
19}
20
21pub fn generate_hash<H: PhfHash>(entries: &[H]) -> HashState {
22    generate_hash_with_hash_fn(entries, phf_shared::hash)
23}
24
25pub fn generate_hash_with_hash_fn<T, F>(entries: &[T], hash_fn: F) -> HashState
26where
27    F: Fn(&T, &HashKey) -> Hashes,
28{
29    let mut generator = Generator::new(entries.len());
30
31    SmallRng::seed_from_u64(FIXED_SEED)
32        .sample_iter(Standard)
33        .find(|key| {
34            let hashes = entries.iter().map(|entry| hash_fn(entry, key));
35            generator.reset(hashes);
36
37            generator.try_generate_hash()
38        })
39        .map(|key| HashState {
40            key,
41            disps: generator.disps,
42            map: generator.map.into_iter().map(|i| i.unwrap()).collect(),
43        })
44        .expect("failed to solve PHF")
45}
46
47struct Bucket {
48    idx: usize,
49    keys: Vec<usize>,
50}
51
52struct Generator {
53    hashes: Vec<Hashes>,
54    buckets: Vec<Bucket>,
55    disps: Vec<(u32, u32)>,
56    map: Vec<Option<usize>>,
57    try_map: Vec<u64>,
58}
59
60impl Generator {
61    fn new(table_len: usize) -> Self {
62        let hashes = Vec::with_capacity(table_len);
63
64        let buckets_len = (table_len + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA;
65        let buckets: Vec<_> = (0..buckets_len)
66            .map(|i| Bucket {
67                idx: i,
68                keys: vec![],
69            })
70            .collect();
71        let disps = vec![(0u32, 0u32); buckets_len];
72
73        let map = vec![None; table_len];
74        let try_map = vec![0u64; table_len];
75
76        Self {
77            hashes,
78            buckets,
79            disps,
80            map,
81            try_map,
82        }
83    }
84
85    fn reset<I>(&mut self, hashes: I)
86    where
87        I: Iterator<Item = Hashes>,
88    {
89        self.buckets.iter_mut().for_each(|b| b.keys.clear());
90        self.buckets.sort_by_key(|b| b.idx);
91        self.disps.iter_mut().for_each(|d| *d = (0, 0));
92        self.map.iter_mut().for_each(|m| *m = None);
93        self.try_map.iter_mut().for_each(|m| *m = 0);
94
95        self.hashes.clear();
96        self.hashes.extend(hashes);
97    }
98
99    fn try_generate_hash(&mut self) -> bool {
100        let buckets_len = self.buckets.len() as u32;
101        for (i, hash) in self.hashes.iter().enumerate() {
102            self.buckets[(hash.g % buckets_len) as usize].keys.push(i);
103        }
104
105        // Sort descending
106        self.buckets
107            .sort_by(|a, b| a.keys.len().cmp(&b.keys.len()).reverse());
108
109        let table_len = self.hashes.len();
110
111        // store whether an element from the bucket being placed is
112        // located at a certain position, to allow for efficient overlap
113        // checks. It works by storing the generation in each cell and
114        // each new placement-attempt is a new generation, so you can tell
115        // if this is legitimately full by checking that the generations
116        // are equal. (A u64 is far too large to overflow in a reasonable
117        // time for current hardware.)
118        let mut generation = 0u64;
119
120        // the actual values corresponding to the markers above, as
121        // (index, key) pairs, for adding to the main map once we've
122        // chosen the right disps.
123        let mut values_to_add = vec![];
124
125        'buckets: for bucket in &self.buckets {
126            for d1 in 0..(table_len as u32) {
127                'disps: for d2 in 0..(table_len as u32) {
128                    values_to_add.clear();
129                    generation += 1;
130
131                    for &key in &bucket.keys {
132                        let idx =
133                            (phf_shared::displace(self.hashes[key].f1, self.hashes[key].f2, d1, d2)
134                                % (table_len as u32)) as usize;
135                        if self.map[idx].is_some() || self.try_map[idx] == generation {
136                            continue 'disps;
137                        }
138                        self.try_map[idx] = generation;
139                        values_to_add.push((idx, key));
140                    }
141
142                    // We've picked a good set of disps
143                    self.disps[bucket.idx] = (d1, d2);
144                    for &(idx, key) in &values_to_add {
145                        self.map[idx] = Some(key);
146                    }
147                    continue 'buckets;
148                }
149            }
150
151            // Unable to find displacements for a bucket
152            return false;
153        }
154        true
155    }
156}