fdeflate/
huffman.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
use crate::decompress::{EXCEPTIONAL_ENTRY, LITERAL_ENTRY, SECONDARY_TABLE_ENTRY};

/// Return the next code, or if the codeword is already all ones (which is the final code), return
/// the same code again.
fn next_codeword(mut codeword: u16, table_size: u16) -> u16 {
    if codeword == table_size - 1 {
        return codeword;
    }

    let adv = (u16::BITS - 1) - (codeword ^ (table_size - 1)).leading_zeros();
    let bit = 1 << adv;
    codeword &= bit - 1;
    codeword |= bit;
    codeword
}

#[allow(clippy::needless_range_loop)]
pub fn build_table(
    lengths: &[u8],
    entries: &[u32],
    codes: &mut [u16],
    primary_table: &mut [u32],
    secondary_table: &mut Vec<u16>,
    is_distance_table: bool,
    double_literal: bool,
) -> bool {
    // Count the number of symbols with each code length.
    let mut histogram = [0; 16];
    for &length in lengths {
        histogram[length as usize] += 1;
    }

    // Determine the maximum code length.
    let mut max_length = 15;
    while max_length > 1 && histogram[max_length] == 0 {
        max_length -= 1;
    }

    // Handle zero and one symbol huffman codes (which are only allowed for distance codes).
    if is_distance_table {
        if max_length == 0 {
            primary_table.fill(0);
            secondary_table.clear();
            return true;
        } else if max_length == 1 && histogram[1] == 1 {
            let symbol = lengths.iter().position(|&l| l == 1).unwrap();
            codes[symbol] = 0;
            let entry = entries
                .get(symbol)
                .cloned()
                .unwrap_or((symbol as u32) << 16)
                | 1;
            for chunk in primary_table.chunks_mut(2) {
                chunk[0] = entry;
                chunk[1] = 0;
            }
            return true;
        }
    }

    // Sort symbols by code length. Given the histogram, we can determine the starting offset
    // for each code length.
    let mut offsets = [0; 16];
    let mut codespace_used = 0;
    offsets[1] = histogram[0];
    for i in 1..max_length {
        offsets[i + 1] = offsets[i] + histogram[i];
        codespace_used = (codespace_used << 1) + histogram[i];
    }
    codespace_used = (codespace_used << 1) + histogram[max_length];

    // Check that the provided lengths form a valid Huffman tree.
    if codespace_used != (1 << max_length) {
        return false;
    }

    // Sort the symbols by code length.
    let mut next_index = offsets;
    let mut sorted_symbols = [0; 288];
    for symbol in 0..lengths.len() {
        let length = lengths[symbol];
        sorted_symbols[next_index[length as usize]] = symbol;
        next_index[length as usize] += 1;
    }

    let mut codeword = 0u16;
    let mut i = histogram[0];

    // Populate the primary decoding table
    let primary_table_bits = primary_table.len().ilog2() as usize;
    let primary_table_mask = (1 << primary_table_bits) - 1;
    for length in 1..=primary_table_bits {
        let current_table_end = 1 << length;

        // Loop over all symbols with the current code length and set their table entries.
        for _ in 0..histogram[length] {
            let symbol = sorted_symbols[i];
            i += 1;

            primary_table[codeword as usize] = entries
                .get(symbol)
                .cloned()
                .unwrap_or((symbol as u32) << 16)
                | length as u32;

            codes[symbol] = codeword;
            codeword = next_codeword(codeword, current_table_end as u16);
        }

        if double_literal {
            for len1 in 1..(length - 1) {
                let len2 = length - len1;
                for sym1_index in offsets[len1]..next_index[len1] {
                    for sym2_index in offsets[len2]..next_index[len2] {
                        let sym1 = sorted_symbols[sym1_index];
                        let sym2 = sorted_symbols[sym2_index];
                        if sym1 < 256 && sym2 < 256 {
                            let codeword1 = codes[sym1];
                            let codeword2 = codes[sym2];
                            let codeword = codeword1 | (codeword2 << len1);
                            let entry = (sym1 as u32) << 16
                                | (sym2 as u32) << 24
                                | LITERAL_ENTRY
                                | (2 << 8);
                            primary_table[codeword as usize] = entry | (length as u32);
                        }
                    }
                }
            }
        }

        // If we aren't at the maximum table size, double the size of the table.
        if length < primary_table_bits {
            primary_table.copy_within(0..current_table_end, current_table_end);
        }
    }

    // Populate the secondary decoding table.
    secondary_table.clear();
    if max_length > primary_table_bits {
        let mut subtable_start = 0;
        let mut subtable_prefix = !0;
        for length in (primary_table_bits + 1)..=max_length {
            let subtable_size = 1 << (length - primary_table_bits);
            for _ in 0..histogram[length] {
                // If the codeword's prefix doesn't match the current subtable, create a new
                // subtable.
                if codeword & primary_table_mask != subtable_prefix {
                    subtable_prefix = codeword & primary_table_mask;
                    subtable_start = secondary_table.len();
                    primary_table[subtable_prefix as usize] = ((subtable_start as u32) << 16)
                        | EXCEPTIONAL_ENTRY
                        | SECONDARY_TABLE_ENTRY
                        | (subtable_size as u32 - 1);
                    secondary_table.resize(subtable_start + subtable_size, 0);
                }

                // Lookup the symbol.
                let symbol = sorted_symbols[i];
                i += 1;

                // Insert the symbol into the secondary table and advance to the next codeword.
                codes[symbol] = codeword;
                secondary_table[subtable_start + (codeword >> primary_table_bits) as usize] =
                    ((symbol as u16) << 4) | (length as u16);
                codeword = next_codeword(codeword, 1 << length);
            }

            // If there are more codes with the same subtable prefix, extend the subtable.
            if length < max_length && codeword & primary_table_mask == subtable_prefix {
                secondary_table.extend_from_within(subtable_start..);
                let subtable_size = secondary_table.len() - subtable_start;
                primary_table[subtable_prefix as usize] = ((subtable_start as u32) << 16)
                    | EXCEPTIONAL_ENTRY
                    | SECONDARY_TABLE_ENTRY
                    | (subtable_size as u32 - 1);
            }
        }
    }

    true
}