weezl/
lib.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
//! # LZW decoder and encoder
//!
//! This crates provides an `Encoder` and a `Decoder` in their respective modules. The code words
//! are written from and to bit byte slices (or streams) where it is possible to write either the
//! most or least significant bits first. The maximum possible code size is 12 bits, the smallest
//! available code size is 2 bits.
//!
//! ## Example
//!
//! These two code blocks show the compression and corresponding decompression. Note that you must
//! use the same arguments to `Encoder` and `Decoder`, otherwise the decoding might fail or produce
//! bad results.
//!
#![cfg_attr(feature = "std", doc = "```")]
#![cfg_attr(not(feature = "std"), doc = "```ignore")]
//! use weezl::{BitOrder, encode::Encoder};
//!
//! let data = b"Hello, world";
//! let compressed = Encoder::new(BitOrder::Msb, 9)
//!     .encode(data)
//!     .unwrap();
//! ```
//!
#![cfg_attr(feature = "std", doc = "```")]
#![cfg_attr(not(feature = "std"), doc = "```ignore")]
//! use weezl::{BitOrder, decode::Decoder};
//! # let compressed = b"\x80\x04\x81\x94l\x1b\x06\xf0\xb0 \x1d\xc6\xf1\xc8l\x19 \x10".to_vec();
//! # let data = b"Hello, world";
//!
//! let decompressed = Decoder::new(BitOrder::Msb, 9)
//!     .decode(&compressed)
//!     .unwrap();
//! assert_eq!(decompressed, data);
//! ```
//!
//! ## LZW Details
//!
//! The de- and encoder expect the LZW stream to start with a clear code and end with an
//! end code which are defined as follows:
//!
//!  * `CLEAR_CODE == 1 << min_code_size`
//!  * `END_CODE   == CLEAR_CODE + 1`
//!
//! For optimal performance, all buffers and input and output slices should be as large as possible
//! and at least 2048 bytes long. This extends to input streams which should have similarly sized
//! buffers. This library uses Rust's standard allocation interfaces (`Box` and `Vec` to be
//! precise). Since there are no ways to handle allocation errors it is not recommended to operate
//! it on 16-bit targets.
//!
//! ## Allocations and standard library
//!
//! The main algorithm can be used in `no_std` as well, although it requires an allocator. This
//! restriction might be lifted at a later stage. For this you should deactivate the `std` feature.
//! The main interfaces stay intact but the `into_stream` combinator is no available.
#![cfg_attr(not(feature = "std"), no_std)]
#![forbid(unsafe_code)]
#![forbid(missing_docs)]

#[cfg(all(feature = "alloc", not(feature = "std")))]
extern crate alloc;
#[cfg(all(feature = "alloc", feature = "std"))]
use std as alloc;

pub(crate) const MAX_CODESIZE: u8 = 12;
pub(crate) const MAX_ENTRIES: usize = 1 << MAX_CODESIZE as usize;

/// Alias for a LZW code point
pub(crate) type Code = u16;

/// A default buffer size for encoding/decoding buffer.
///
/// Note that this is larger than the default size for buffers (usually 4K) since each code word
/// can expand to multiple bytes. Expanding one buffer would yield multiple and require a costly
/// break in the decoding loop. Note that the decoded size can be up to quadratic in code block.
pub(crate) const STREAM_BUF_SIZE: usize = 1 << 24;

/// The order of bits in bytes.
#[derive(Clone, Copy, Debug)]
pub enum BitOrder {
    /// The most significant bit is processed first.
    Msb,
    /// The least significant bit is processed first.
    Lsb,
}

/// An owned or borrowed buffer for stream operations.
#[cfg(feature = "alloc")]
pub(crate) enum StreamBuf<'d> {
    Borrowed(&'d mut [u8]),
    Owned(crate::alloc::vec::Vec<u8>),
}

#[cold]
fn assert_decode_size(size: u8) {
    assert!(
        size <= MAX_CODESIZE,
        "Maximum code size 12 required, got {}",
        size
    );
}

#[cold]
fn assert_encode_size(size: u8) {
    assert!(size >= 2, "Minimum code size 2 required, got {}", size);
    assert!(
        size <= MAX_CODESIZE,
        "Maximum code size 12 required, got {}",
        size
    );
}

#[cfg(feature = "alloc")]
pub mod decode;
#[cfg(feature = "alloc")]
pub mod encode;
mod error;

#[cfg(feature = "std")]
pub use self::error::StreamResult;
pub use self::error::{BufferResult, LzwError, LzwStatus};

#[cfg(all(test, feature = "alloc"))]
mod tests {
    use crate::decode::Decoder;
    use crate::encode::Encoder;

    #[cfg(feature = "std")]
    use crate::{decode, encode};

    #[test]
    fn stable_send() {
        fn must_be_send<T: Send + 'static>() {}
        must_be_send::<Decoder>();
        must_be_send::<Encoder>();

        #[cfg(feature = "std")]
        fn _send_and_lt<'lt, T: Send + 'lt>() {}

        // Check that the inference `W: Send + 'd` => `IntoStream: Send + 'd` works.
        #[cfg(feature = "std")]
        fn _all_send_writer<'d, W: std::io::Write + Send + 'd>() {
            _send_and_lt::<'d, decode::IntoStream<'d, W>>();
            _send_and_lt::<'d, encode::IntoStream<'d, W>>();
        }
    }
}