tiff/encoder/compression/
packbits.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
use crate::{encoder::compression::*, tags::CompressionMethod};
use std::io::{BufWriter, Error, ErrorKind, Write};

/// Compressor that uses the Packbits[^note] algorithm to compress bytes.
///
/// [^note]: PackBits is often ineffective on continuous tone images,
///          including many grayscale images. In such cases, it is better
///          to leave the image uncompressed.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct Packbits;

impl Compression for Packbits {
    const COMPRESSION_METHOD: CompressionMethod = CompressionMethod::PackBits;

    fn get_algorithm(&self) -> Compressor {
        Compressor::Packbits(*self)
    }
}

impl CompressionAlgorithm for Packbits {
    fn write_to<W: Write>(&mut self, writer: &mut W, bytes: &[u8]) -> Result<u64, io::Error> {
        // Inspired by https://github.com/skirridsystems/packbits

        const MIN_REPT: u8 = 3; // Minimum run to compress between differ blocks
        const MAX_BYTES: u8 = 128; // Maximum number of bytes that can be encoded in a header byte

        // Encoding for header byte based on number of bytes represented.
        fn encode_diff(n: u8) -> u8 {
            n - 1
        }
        fn encode_rept(n: u8) -> u8 {
            let var = 256 - (n - 1) as u16;
            var as u8
        }

        fn write_u8<W: Write>(writer: &mut W, byte: u8) -> Result<u64, Error> {
            writer.write(&[byte]).map(|byte_count| byte_count as u64)
        }

        let mut bufwriter = BufWriter::new(writer);
        let mut bytes_written = 0u64; // The number of bytes written into the writer
        let mut offset: Option<u64> = None; // The index of the first byte written into the writer

        let mut src_index: usize = 0; // Index of the current byte
        let mut src_count = bytes.len(); //The number of bytes remaining to be compressed

        let mut in_run = false; // Indication whether counting of similar bytes is performed
        let mut run_index = 0u8; // Distance into pending bytes that a run starts

        let mut bytes_pending = 0u8; // Bytes looked at but not yet output
        let mut pending_index = 0usize; // Index of the first pending byte

        let mut curr_byte: u8; // Byte currently being considered
        let mut last_byte: u8; // Previous byte

        // Need at least one byte to compress
        if src_count == 0 {
            return Err(Error::new(ErrorKind::WriteZero, "write zero"));
        }

        // Prime compressor with first character.
        last_byte = bytes[src_index];
        src_index += 1;
        bytes_pending += 1;

        while src_count - 1 != 0 {
            src_count -= 1;
            curr_byte = bytes[src_index];
            src_index += 1;
            bytes_pending += 1;

            if in_run {
                if (curr_byte != last_byte) || (bytes_pending > MAX_BYTES) {
                    offset.get_or_insert(write_u8(&mut bufwriter, encode_rept(bytes_pending - 1))?);
                    write_u8(&mut bufwriter, last_byte)?;
                    bytes_written += 2;

                    bytes_pending = 1;
                    pending_index = src_index - 1;
                    run_index = 0;
                    in_run = false;
                }
            } else if bytes_pending > MAX_BYTES {
                // We have as much differing data as we can output in one chunk.
                // Output MAX_BYTES leaving one byte.
                offset.get_or_insert(write_u8(&mut bufwriter, encode_diff(MAX_BYTES))?);
                bufwriter.write_all(&bytes[pending_index..pending_index + MAX_BYTES as usize])?;
                bytes_written += 1 + MAX_BYTES as u64;

                pending_index += MAX_BYTES as usize;
                bytes_pending -= MAX_BYTES;
                run_index = bytes_pending - 1; // A run could start here
            } else if curr_byte == last_byte {
                if (bytes_pending - run_index >= MIN_REPT) || (run_index == 0) {
                    // This is a worthwhile run
                    if run_index != 0 {
                        // Flush differing data out of input buffer
                        offset.get_or_insert(write_u8(&mut bufwriter, encode_diff(run_index))?);
                        bufwriter
                            .write_all(&bytes[pending_index..pending_index + run_index as usize])?;
                        bytes_written += 1 + run_index as u64;
                    }
                    bytes_pending -= run_index; // Length of run
                    in_run = true;
                }
            } else {
                run_index = bytes_pending - 1; // A run could start here
            }
            last_byte = curr_byte;
        }

        // Output the remainder
        if in_run {
            bytes_written += 2;
            offset.get_or_insert(write_u8(&mut bufwriter, encode_rept(bytes_pending))?);
            write_u8(&mut bufwriter, last_byte)?;
        } else {
            bytes_written += 1 + bytes_pending as u64;
            offset.get_or_insert(write_u8(&mut bufwriter, encode_diff(bytes_pending))?);
            bufwriter.write_all(&bytes[pending_index..pending_index + bytes_pending as usize])?;
        }

        bufwriter.flush()?;
        Ok(bytes_written)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::encoder::compression::tests::TEST_DATA;
    use std::io::Cursor;

    #[test]
    fn test_packbits_single_byte() {
        // compress single byte
        const UNCOMPRESSED_DATA: [u8; 1] = [0x3F];
        const EXPECTED_COMPRESSED_DATA: [u8; 2] = [0x00, 0x3F];

        let mut compressed_data = Vec::<u8>::new();
        let mut writer = Cursor::new(&mut compressed_data);
        Packbits.write_to(&mut writer, &UNCOMPRESSED_DATA).unwrap();
        assert_eq!(compressed_data, EXPECTED_COMPRESSED_DATA);
    }

    #[test]
    fn test_packbits_rept() {
        // compress buffer with repetitive sequence
        const UNCOMPRESSED_DATA: &[u8] =
            b"This strrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrring hangs.";
        const EXPECTED_COMPRESSED_DATA: &[u8] = b"\x06This st\xD1r\x09ing hangs.";

        let mut compressed_data = Vec::<u8>::new();
        let mut writer = Cursor::new(&mut compressed_data);
        Packbits.write_to(&mut writer, UNCOMPRESSED_DATA).unwrap();
        assert_eq!(compressed_data, EXPECTED_COMPRESSED_DATA);
    }

    #[test]
    fn test_packbits_large_rept_nonrept() {
        // compress buffer with large repetitive and non-repetitive sequence
        let mut data = b"This st".to_vec();
        for _i in 0..158 {
            data.push(b'r');
        }
        data.extend_from_slice(b"ing hangs.");
        for i in 0..158 {
            data.push(i);
        }

        const EXPECTED_COMPRESSED_DATA: [u8; 182] = [
            0x06, 0x54, 0x68, 0x69, 0x73, 0x20, 0x73, 0x74, 0x81, 0x72, 0xE3, 0x72, 0x7F, 0x69,
            0x6E, 0x67, 0x20, 0x68, 0x61, 0x6E, 0x67, 0x73, 0x2E, 0x00, 0x01, 0x02, 0x03, 0x04,
            0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12,
            0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20,
            0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E,
            0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C,
            0x3D, 0x3E, 0x3F, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A,
            0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
            0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66,
            0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74,
            0x75, 0x27, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81,
            0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
            0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D,
        ];

        let mut compressed_data = Vec::<u8>::new();
        let mut writer = Cursor::new(&mut compressed_data);
        Packbits.write_to(&mut writer, data.as_slice()).unwrap();
        assert_eq!(compressed_data, EXPECTED_COMPRESSED_DATA);
    }

    #[test]
    fn test_packbits() {
        // compress teststring
        const EXPECTED_COMPRESSED_DATA: &[u8] =
            b"\x3CThis is a string for checking various compression algorithms.";

        let mut compressed_data = Vec::<u8>::new();
        let mut writer = Cursor::new(&mut compressed_data);
        Packbits.write_to(&mut writer, TEST_DATA).unwrap();
        assert_eq!(compressed_data, EXPECTED_COMPRESSED_DATA);
    }
}