yazi/
lib.rs

1//! Yet another zlib implementation.
2//!
3//! This crate is an implementation of the RFC 1950 DEFLATE specification with
4//! support for the zlib wrapper. There are many fine options for such in the
5//! Rust ecosystem, but I was looking for one that was small and relatively
6//! simple with reasonable performance/compression ratio and support for heap-free
7//! compression/decompression scenarios. This crate aims to tick those boxes
8//! while also providing composable streaming support based on the standard I/O
9//! mechanisms.
10//!
11//! See the quick start guide below for basic usage or jump to the [compression](#compression)
12//! or [decompression](#decompression) section for more detail.
13//!
14//! # Quick Start
15//!
16//! So you've got some bytes, they all fit in memory, you don't need to reuse allocations,
17//! and you just want to compress or decompress them. This section is for you.
18//!
19//! Cargo.toml:
20//! ```toml
21//! [dependencies]
22//! yazi = "0.1.4"
23//! ```
24//!
25//! The [`compress`] and [`decompress`] functions are provided for the most common use cases:
26//! ```
27//! use yazi::*;
28//! // Your source data.
29//! let data = &(0..=255).cycle().take(8192).collect::<Vec<u8>>()[..];
30//! // Compress it into a Vec<u8> with a zlib wrapper using the default compression level.
31//! let compressed = compress(data, Format::Zlib, CompressionLevel::Default).unwrap();
32//! // Decompress it into a Vec<u8>.
33//! let (decompressed, checksum) = decompress(&compressed, Format::Zlib).unwrap();
34//! // Verify the checksum.
35//! assert_eq!(Adler32::from_buf(&decompressed).finish(), checksum.unwrap());
36//! // Verify that the decompressed data matches the original.
37//! assert_eq!(&decompressed[..], data);
38//! ```
39//!
40//! Read on for more detailed usage.
41//!
42//! # Compression
43//!
44//! To compress data, you'll need to create an instance of the [`Encoder`] struct.
45//! The [`new`](Encoder::new) method can be used to construct an encoder on the
46//! stack, but the internal buffers are large (~300k) and may cause a stack overflow
47//! so it is advisable to use the [`boxed`](Encoder::boxed) method to allocate
48//! the encoder on the heap.
49//!
50//! Newly constructed encoders are configured to output a raw DEFLATE bitstream using a
51//! medium compression level and a default strategy. Call [`set_format`](Encoder::set_format)
52//! to change the output [`Format`]. Raw DEFLATE and zlib are supported. The
53//! [`set_level`](Encoder::set_level) method allows you to choose the preferred
54//! [`CompressionLevel`] from a set of basic options or a specific level between 1 and 10.
55//! The [`CompressionStrategy`] can be changed with the [`set_strategy`](Encoder::set_strategy)
56//! method. This allows you to, for example, force the encoder to output only static blocks.
57//!
58//! To create an encoder that outputs a zlib bitstream and spends some extra time to potentially
59//! produce a result with a higher compression ratio:
60//! ```
61//! use yazi::{CompressionLevel, Encoder, Format};
62//! let mut encoder = Encoder::boxed();
63//! encoder.set_format(Format::Zlib);
64//! encoder.set_level(CompressionLevel::BestSize);
65//! ```
66//!
67//! The encoder itself does not provide any functionality. It simply stores state and
68//! configuration. To actually compress data, you'll need an [`EncoderStream`]. A stream
69//! is a binding between an encoder and some specific output that will receive the
70//! compressed data. This design allows an encoder to be reused with different types
71//! of outputs without paying the allocation and initialization cost each time.
72//!
73//! Streaming supports outputs of the following forms:
74//! - Fixed buffers, created with the [`stream_into_buf`](Encoder::stream_into_buf) method.
75//! - Vectors, created with the [`stream_into_vec`](Encoder::stream_into_vec) method.
76//! - Any type that implements [`std::io::Write`], created with the generic
77//!   [`stream`](Encoder::stream) method.
78//!
79//! Once you have an [`EncoderStream`], simply call [`write`](EncoderStream::write) one
80//! or more times, feeding your raw data into the stream. If available, you can submit
81//! the entire input buffer at once, or in arbitrarily sized chunks down to a single
82//! byte. After all data has been written, call [`finish`](EncoderStream::finish) on
83//! the stream which will consume it, flush all remaining input and output, and
84//! finalize the operation. The finish method returns a [`Result`] containing the
85//! total number of compressed bytes written to the output on success, or an
86//! [`Error`] describing the problem on failure.
87//!
88//! Let's write a function that compresses some arbitrary bytes into a vector:
89//! ```
90//! fn compress_bytes(buf: &[u8]) -> Result<Vec<u8>, yazi::Error> {
91//!     use yazi::Encoder;
92//!     let mut encoder = Encoder::boxed();
93//!     let mut vec = Vec::new();
94//!     let mut stream = encoder.stream_into_vec(&mut vec);
95//!     stream.write(buf)?;
96//!     stream.finish()?;
97//!     Ok(vec)
98//! }
99//! ```
100//!
101//! Now let's do something a bit more interesting, and given two paths, compress
102//! one file into another:
103//! ```
104//! fn compress_file(source: &str, dest: &str) -> Result<u64, yazi::Error> {
105//!     use yazi::Encoder;
106//!     use std::fs::File;
107//!     use std::io::{copy, BufWriter};
108//!     let mut encoder = Encoder::boxed();
109//!     // yazi does not perform any internal buffering beyond what is necessary
110//!     // for correctness.
111//!     let mut target = BufWriter::new(File::create(dest)?);
112//!     let mut stream = encoder.stream(&mut target);
113//!     copy(&mut File::open(source)?, &mut stream)?;
114//!     stream.finish()
115//! }
116//! ```
117//!
118//! Here, we can see that [`EncoderStream`] also implements [`std::io::Write`], so we
119//! can pass it directly to [`std::io::copy`]. This allows streams to be composable
120//! with the standard I/O facilities and other libraries that support those interfaces.
121//!
122//! # Decompression
123//!
124//! If you've already read the section on compression, the API for decompression
125//! is essentially identical with the types replaced by [`Decoder`] and [`DecoderStream`].
126//! The documentation is copied here almost verbatim for the sake of completeness and for
127//! those who might have skipped directly to this section.
128//!
129//! To decompress data, you'll need to create an instance of the [`Decoder`] struct.
130//! The [`new`](Decoder::new) method can be used to construct a decoder on the stack,
131//! and unlike encoders, the decoder struct is relatively small (~10k) and generally
132//! safe to stack allocate. You can create a decoder on the heap with the
133//! [`boxed`](Decoder::boxed) method if you prefer.
134//!
135//! Newly constructed decoders are configured to decompress a raw DEFLATE bitstream. Call
136//! [`set_format`](Decoder::set_format) to change the input [`Format`]. Raw DEFLATE and
137//! zlib are supported. No other configuration is necessary for decompression.
138//!
139//! To create a decoder that decompresses a zlib bitstream:
140//! ```
141//! use yazi::{Decoder, Format};
142//! let mut decoder = Decoder::new();
143//! decoder.set_format(Format::Zlib);
144//! ```
145//!
146//! The decoder itself does not provide any functionality. It simply stores state and
147//! configuration. To actually decompress data, you'll need a
148//! [`DecoderStream`]. A stream is a binding between a
149//! decoder and some specific output that will receive the decompressed data. This
150//! design allows a decoder to be reused with different types of outputs without paying the
151//! allocation and initialization cost each time.
152//!
153//! Streaming supports outputs of the following forms:
154//! - Fixed buffers, created with the [`stream_into_buf`](Decoder::stream_into_buf) method.
155//! - Vectors, created with the [`stream_into_vec`](Decoder::stream_into_vec) method.
156//! - Any type that implements [`std::io::Write`], created with the generic
157//!   [`stream`](Decoder::stream) method.
158//!
159//! Once you have a [`DecoderStream`], simply call [`write`](DecoderStream::write) one or
160//! more times, feeding your compressed data into the stream. If available, you can submit
161//! the entire input buffer at once, or in arbitrarily sized chunks down to a single byte.
162//! After all data has been written, call [`finish`](DecoderStream::finish) on the stream
163//! which will consume it, flush all remaining input and output, and finalize the operation.
164//! The finish method returns a [`Result`] containing the total number of decompressed bytes
165//! written to the output along with an optional Adler-32 checksum (if the stream was
166//! zlib-encoded) on success, or an [`Error`] describing the problem on failure.
167//!
168//! Let's write a function that decompresses a zlib bitstream into a vector and verifies
169//! the checksum:
170//! ```
171//! fn decompress_zlib(buf: &[u8]) -> Result<Vec<u8>, yazi::Error> {
172//!     use yazi::{Adler32, Decoder, Error, Format};
173//!     let mut decoder = Decoder::new();
174//!     decoder.set_format(Format::Zlib);
175//!     let mut vec = Vec::new();
176//!     let mut stream = decoder.stream_into_vec(&mut vec);
177//!     stream.write(buf)?;
178//!     // checksum is an Option<u32>
179//!     let (_, checksum) = stream.finish()?;
180//!     if Adler32::from_buf(&vec).finish() != checksum.unwrap() {
181//!         return Err(Error::InvalidBitstream);
182//!     }
183//!     Ok(vec)
184//! }
185//! ```
186//!
187//! Now let's do something a bit more interesting, and given two paths, decompress
188//! one file into another:
189//! ```
190//! fn decompress_file(source: &str, dest: &str) -> Result<(u64, Option<u32>), yazi::Error> {
191//!     use yazi::Decoder;
192//!     use std::fs::File;
193//!     use std::io::{copy, BufWriter};
194//!     let mut decoder = Decoder::new();
195//!     // yazi does not perform any internal buffering beyond what is necessary
196//!     // for correctness.
197//!     let mut target = BufWriter::new(File::create(dest)?);
198//!     let mut stream = decoder.stream(&mut target);
199//!     copy(&mut File::open(source)?, &mut stream)?;
200//!     stream.finish()
201//! }
202//! ```
203//!
204//! Here, we can see that [`DecoderStream`] also implements [`std::io::Write`], so we can
205//! pass it directly to [`std::io::copy`]. This allows streams to be composable with the
206//! standard I/O facilities and other libraries that support those interfaces.
207//!
208//! # Implementation Notes
209//!
210//! The compressor is based heavily on both [miniz](https://github.com/richgel999/miniz)
211//! by Rich Geldreich and [miniz_oxide](https://github.com/Frommi/miniz_oxide)
212//! by Frommi. The available compression levels and strategies are the same and
213//! it should produce an identical bitstream for a given set of options. The
214//! decompressor is based on the techniques in [libdeflate](https://github.com/ebiggers/libdeflate)
215//! by Eric Biggers.
216
217#![cfg_attr(not(feature = "std"), no_std)]
218
219extern crate alloc;
220
221mod decode;
222mod encode;
223
224#[cfg(feature = "std")]
225use std::io;
226
227pub use decode::{decompress, Decoder, DecoderStream};
228pub use encode::{compress, CompressionLevel, CompressionStrategy, Encoder, EncoderStream};
229
230/// Defines the format for a compressed bitstream.
231#[derive(Copy, Clone, PartialEq, Debug)]
232pub enum Format {
233    /// Raw DEFLATE data.
234    Raw,
235    /// Zlib header with an Adler-32 footer.
236    Zlib,
237}
238
239/// Errors that may occur during compression or decompression.
240#[derive(Debug)]
241pub enum Error {
242    /// Not enough input was provided.
243    Underflow,
244    /// The bitstream was corrupt.
245    InvalidBitstream,
246    /// Output buffer was too small.
247    Overflow,
248    /// Attempt to write into a finished stream.
249    Finished,
250    /// A system I/O error.
251    ///
252    /// Only available with the `std` feature enabled.
253    #[cfg(feature = "std")]
254    Io(io::Error),
255}
256
257#[cfg(feature = "std")]
258impl From<io::Error> for Error {
259    fn from(error: io::Error) -> Self {
260        Self::Io(error)
261    }
262}
263
264/// Rolling Adler-32 checksum.
265#[derive(Copy, Clone)]
266pub struct Adler32(u32);
267
268impl Adler32 {
269    /// Creates a new checksum initialized to the default value.
270    pub fn new() -> Self {
271        Self(1)
272    }
273
274    /// Creates a checksum from a buffer.
275    pub fn from_buf(buf: &[u8]) -> Self {
276        let mut checksum = Self::new();
277        checksum.update(buf);
278        checksum
279    }
280
281    /// Updates the checksum with bytes provided by the specified buffer.
282    pub fn update(&mut self, buf: &[u8]) {
283        let mut s1 = self.0 & 0xFFFF;
284        let mut s2 = (self.0 >> 16) & 0xFFFF;
285        for chunk in buf.chunks(5550) {
286            for b in chunk {
287                s1 += *b as u32;
288                s2 += s1;
289            }
290            s1 %= 65521;
291            s2 %= 65521;
292        }
293        self.0 = (s2 << 16) | s1;
294    }
295
296    /// Returns the checksum.
297    pub fn finish(self) -> u32 {
298        self.0
299    }
300}
301
302impl Default for Adler32 {
303    fn default() -> Self {
304        Self::new()
305    }
306}
307
308#[cfg(test)]
309mod tests {
310    use super::*;
311    use alloc::vec::Vec;
312
313    #[cfg(target_family = "wasm")]
314    use wasm_bindgen_test::wasm_bindgen_test as test;
315
316    fn generate_bytes() -> Vec<u8> {
317        const BYTES: &[u8; 26] = b"abcdefghijklmnopqrstuvwxyz";
318        let mut buf = Vec::new();
319        for i in 0..4096 {
320            if i % 3 == 0 {
321                buf.extend_from_slice(&BYTES[13..]);
322            } else if i & 1 != 0 {
323                buf.extend_from_slice(BYTES);
324            } else {
325                buf.extend(BYTES.iter().rev());
326            }
327        }
328        buf
329    }
330
331    #[test]
332    fn compress_decompress() {
333        let buf = generate_bytes();
334        let mut compressed = Vec::new();
335        let mut encoder = Encoder::boxed();
336        let mut stream = encoder.stream_into_vec(&mut compressed);
337        stream.write(&buf).unwrap();
338        stream.finish().unwrap();
339        let mut decompressed = Vec::new();
340        let mut decoder = Decoder::new();
341        let mut stream = decoder.stream_into_vec(&mut decompressed);
342        stream.write(&compressed).unwrap();
343        stream.finish().unwrap();
344        assert_eq!(buf, decompressed);
345    }
346
347    #[test]
348    fn compress_decompress_zlib() {
349        let buf = generate_bytes();
350        let mut compressed = Vec::new();
351        let mut encoder = Encoder::boxed();
352        encoder.set_format(Format::Zlib);
353        let mut stream = encoder.stream_into_vec(&mut compressed);
354        stream.write(&buf).unwrap();
355        stream.finish().unwrap();
356        let mut decompressed = Vec::new();
357        let mut decoder = Decoder::new();
358        decoder.set_format(Format::Zlib);
359        let mut stream = decoder.stream_into_vec(&mut decompressed);
360        stream.write(&compressed).unwrap();
361        let (_, checksum) = stream.finish().unwrap();
362        assert_eq!(buf, decompressed);
363        let mut adler = Adler32::new();
364        adler.update(&decompressed);
365        assert_eq!(adler.finish(), checksum.unwrap());
366    }
367
368    #[test]
369    fn compress_decompress_static() {
370        let buf = generate_bytes();
371        let mut compressed = Vec::new();
372        let mut encoder = Encoder::boxed();
373        encoder.set_strategy(CompressionStrategy::Static);
374        let mut stream = encoder.stream_into_vec(&mut compressed);
375        stream.write(&buf).unwrap();
376        stream.finish().unwrap();
377        let mut decompressed = Vec::new();
378        let mut decoder = Decoder::new();
379        let mut stream = decoder.stream_into_vec(&mut decompressed);
380        stream.write(&compressed).unwrap();
381        stream.finish().unwrap();
382        assert_eq!(buf, decompressed);
383    }
384
385    #[test]
386    fn compress_decompress_raw() {
387        let buf = generate_bytes();
388        let mut compressed = Vec::new();
389        let mut encoder = Encoder::boxed();
390        encoder.set_level(CompressionLevel::None);
391        let mut stream = encoder.stream_into_vec(&mut compressed);
392        stream.write(&buf).unwrap();
393        stream.finish().unwrap();
394        let mut decompressed = Vec::new();
395        let mut decoder = Decoder::new();
396        let mut stream = decoder.stream_into_vec(&mut decompressed);
397        stream.write(&compressed).unwrap();
398        stream.finish().unwrap();
399        assert_eq!(buf, decompressed);
400    }
401
402    #[test]
403    fn compress_decompress_streaming_1byte() {
404        let buf = generate_bytes();
405        let mut compressed = Vec::new();
406        let mut encoder = Encoder::boxed();
407        let mut stream = encoder.stream_into_vec(&mut compressed);
408        for &b in &buf {
409            stream.write(&[b]).unwrap();
410        }
411        stream.finish().unwrap();
412        let mut decompressed = Vec::new();
413        let mut decoder = Decoder::new();
414        let mut stream = decoder.stream_into_vec(&mut decompressed);
415        for &b in &compressed {
416            stream.write(&[b]).unwrap();
417        }
418        stream.finish().unwrap();
419        assert_eq!(buf, decompressed);
420    }
421    #[test]
422    fn compress_decompress_streaming_64bytes() {
423        let buf = generate_bytes();
424        let mut compressed = Vec::new();
425        let mut encoder = Encoder::boxed();
426        let mut stream = encoder.stream_into_vec(&mut compressed);
427        for chunk in buf.chunks(64) {
428            stream.write(chunk).unwrap();
429        }
430        stream.finish().unwrap();
431        let mut decompressed = Vec::new();
432        let mut decoder = Decoder::new();
433        let mut stream = decoder.stream_into_vec(&mut decompressed);
434        for chunk in compressed.chunks(64) {
435            stream.write(chunk).unwrap();
436        }
437        stream.finish().unwrap();
438        assert_eq!(buf, decompressed);
439    }
440}