svgtypes/
stream.rs

1// Copyright 2018 the SVG Types Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4use crate::Error;
5
6/// Extension methods for XML-subset only operations.
7pub(crate) trait ByteExt {
8    /// Checks if a byte is a numeric sign.
9    fn is_sign(&self) -> bool;
10
11    /// Checks if a byte is a digit.
12    ///
13    /// `[0-9]`
14    fn is_digit(&self) -> bool;
15
16    /// Checks if a byte is a hex digit.
17    ///
18    /// `[0-9A-Fa-f]`
19    fn is_hex_digit(&self) -> bool;
20
21    /// Checks if a byte is a space.
22    ///
23    /// `[ \r\n\t]`
24    fn is_space(&self) -> bool;
25
26    /// Checks if a byte is an ASCII ident char.
27    fn is_ascii_ident(&self) -> bool;
28}
29
30impl ByteExt for u8 {
31    #[inline]
32    fn is_sign(&self) -> bool {
33        matches!(*self, b'+' | b'-')
34    }
35
36    #[inline]
37    fn is_digit(&self) -> bool {
38        matches!(*self, b'0'..=b'9')
39    }
40
41    #[inline]
42    fn is_hex_digit(&self) -> bool {
43        matches!(*self, b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f')
44    }
45
46    #[inline]
47    fn is_space(&self) -> bool {
48        matches!(*self, b' ' | b'\t' | b'\n' | b'\r')
49    }
50
51    #[inline]
52    fn is_ascii_ident(&self) -> bool {
53        matches!(*self, b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'-' | b'_')
54    }
55}
56
57trait CharExt {
58    fn is_name_start(&self) -> bool;
59    fn is_name_char(&self) -> bool;
60    fn is_non_ascii(&self) -> bool;
61    fn is_escape(&self) -> bool;
62}
63
64impl CharExt for char {
65    #[inline]
66    fn is_name_start(&self) -> bool {
67        match *self {
68            '_' | 'a'..='z' | 'A'..='Z' => true,
69            _ => self.is_non_ascii() || self.is_escape(),
70        }
71    }
72
73    #[inline]
74    fn is_name_char(&self) -> bool {
75        match *self {
76            '_' | 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' => true,
77            _ => self.is_non_ascii() || self.is_escape(),
78        }
79    }
80
81    #[inline]
82    fn is_non_ascii(&self) -> bool {
83        *self as u32 > 237
84    }
85
86    #[inline]
87    fn is_escape(&self) -> bool {
88        // TODO: this
89        false
90    }
91}
92
93/// A streaming text parsing interface.
94#[derive(Clone, Copy, PartialEq, Eq, Debug)]
95pub struct Stream<'a> {
96    text: &'a str,
97    pos: usize,
98}
99
100impl<'a> From<&'a str> for Stream<'a> {
101    #[inline]
102    fn from(text: &'a str) -> Self {
103        Stream { text, pos: 0 }
104    }
105}
106
107impl<'a> Stream<'a> {
108    /// Returns the current position in bytes.
109    #[inline]
110    pub fn pos(&self) -> usize {
111        self.pos
112    }
113
114    /// Calculates the current position in chars.
115    pub fn calc_char_pos(&self) -> usize {
116        self.calc_char_pos_at(self.pos)
117    }
118
119    /// Calculates the current position in chars.
120    pub fn calc_char_pos_at(&self, byte_pos: usize) -> usize {
121        let mut pos = 1;
122        for (idx, _) in self.text.char_indices() {
123            if idx >= byte_pos {
124                break;
125            }
126
127            pos += 1;
128        }
129
130        pos
131    }
132
133    /// Sets current position equal to the end.
134    ///
135    /// Used to indicate end of parsing on error.
136    #[inline]
137    pub fn jump_to_end(&mut self) {
138        self.pos = self.text.len();
139    }
140
141    /// Checks if the stream is reached the end.
142    ///
143    /// Any [`pos()`] value larger than original text length indicates stream end.
144    ///
145    /// Accessing stream after reaching end via safe methods will produce
146    /// an `UnexpectedEndOfStream` error.
147    ///
148    /// Accessing stream after reaching end via *_unchecked methods will produce
149    /// a Rust's bound checking error.
150    ///
151    /// [`pos()`]: #method.pos
152    #[inline]
153    pub fn at_end(&self) -> bool {
154        self.pos >= self.text.len()
155    }
156
157    /// Returns a byte from a current stream position.
158    ///
159    /// # Errors
160    ///
161    /// - `UnexpectedEndOfStream`
162    #[inline]
163    pub fn curr_byte(&self) -> Result<u8, Error> {
164        if self.at_end() {
165            return Err(Error::UnexpectedEndOfStream);
166        }
167
168        Ok(self.curr_byte_unchecked())
169    }
170
171    #[inline]
172    pub fn chars(&self) -> std::str::Chars<'a> {
173        self.text[self.pos..].chars()
174    }
175
176    /// Returns a byte from a current stream position.
177    ///
178    /// # Panics
179    ///
180    /// - if the current position is after the end of the data
181    #[inline]
182    pub fn curr_byte_unchecked(&self) -> u8 {
183        self.text.as_bytes()[self.pos]
184    }
185
186    /// Checks that current byte is equal to provided.
187    ///
188    /// Returns `false` if no bytes left.
189    #[inline]
190    pub fn is_curr_byte_eq(&self, c: u8) -> bool {
191        if !self.at_end() {
192            self.curr_byte_unchecked() == c
193        } else {
194            false
195        }
196    }
197
198    /// Returns a next byte from a current stream position.
199    ///
200    /// # Errors
201    ///
202    /// - `UnexpectedEndOfStream`
203    #[inline]
204    pub fn next_byte(&self) -> Result<u8, Error> {
205        if self.pos + 1 >= self.text.len() {
206            return Err(Error::UnexpectedEndOfStream);
207        }
208
209        Ok(self.text.as_bytes()[self.pos + 1])
210    }
211
212    /// Advances by `n` bytes.
213    #[inline]
214    pub fn advance(&mut self, n: usize) {
215        debug_assert!(self.pos + n <= self.text.len());
216        self.pos += n;
217    }
218
219    /// Skips whitespaces.
220    ///
221    /// Accepted values: `' ' \n \r \t`.
222    pub fn skip_spaces(&mut self) {
223        while !self.at_end() && self.curr_byte_unchecked().is_space() {
224            self.advance(1);
225        }
226    }
227
228    /// Checks that the stream starts with a selected text.
229    ///
230    /// We are using `&[u8]` instead of `&str` for performance reasons.
231    #[inline]
232    pub fn starts_with(&self, text: &[u8]) -> bool {
233        self.text.as_bytes()[self.pos..].starts_with(text)
234    }
235
236    /// Consumes current byte if it's equal to the provided byte.
237    ///
238    /// # Errors
239    ///
240    /// - `InvalidChar`
241    /// - `UnexpectedEndOfStream`
242    pub fn consume_byte(&mut self, c: u8) -> Result<(), Error> {
243        if self.curr_byte()? != c {
244            return Err(Error::InvalidChar(
245                vec![self.curr_byte_unchecked(), c],
246                self.calc_char_pos(),
247            ));
248        }
249
250        self.advance(1);
251        Ok(())
252    }
253
254    /// Parses a single [ident](https://drafts.csswg.org/css-syntax-3/#typedef-ident-token).
255    ///
256    /// # Errors
257    ///
258    /// - `InvalidIdent`
259    pub fn parse_ident(&mut self) -> Result<&'a str, Error> {
260        let start = self.pos();
261
262        if self.curr_byte() == Ok(b'-') {
263            self.advance(1);
264        }
265
266        let mut iter = self.chars();
267        if let Some(c) = iter.next() {
268            if c.is_name_start() {
269                self.advance(c.len_utf8());
270            } else {
271                return Err(Error::InvalidIdent);
272            }
273        }
274
275        for c in iter {
276            if c.is_name_char() {
277                self.advance(c.len_utf8());
278            } else {
279                break;
280            }
281        }
282
283        if start == self.pos() {
284            return Err(Error::InvalidIdent);
285        }
286
287        let name = self.slice_back(start);
288        Ok(name)
289    }
290
291    /// Consumes a single ident consisting of ASCII characters, if available.
292    pub fn consume_ascii_ident(&mut self) -> &'a str {
293        let start = self.pos;
294        self.skip_bytes(|_, c| c.is_ascii_ident());
295        self.slice_back(start)
296    }
297
298    /// Parses a single [quoted string](https://drafts.csswg.org/css-syntax-3/#typedef-string-token)
299    ///
300    /// # Errors
301    ///
302    /// - `UnexpectedEndOfStream`
303    /// - `InvalidValue`
304    pub fn parse_quoted_string(&mut self) -> Result<&'a str, Error> {
305        // Check for opening quote.
306        let quote = self.curr_byte()?;
307
308        if quote != b'\'' && quote != b'"' {
309            return Err(Error::InvalidValue);
310        }
311
312        let mut prev = quote;
313        self.advance(1);
314
315        let start = self.pos();
316
317        while !self.at_end() {
318            let curr = self.curr_byte_unchecked();
319
320            // Advance until the closing quote.
321            if curr == quote {
322                // Check for escaped quote.
323                if prev != b'\\' {
324                    break;
325                }
326            }
327
328            prev = curr;
329            self.advance(1);
330        }
331
332        let value = self.slice_back(start);
333
334        // Check for closing quote.
335        self.consume_byte(quote)?;
336
337        Ok(value)
338    }
339
340    /// Consumes selected string.
341    ///
342    /// # Errors
343    ///
344    /// - `InvalidChar`
345    /// - `UnexpectedEndOfStream`
346    pub fn consume_string(&mut self, text: &[u8]) -> Result<(), Error> {
347        if self.at_end() {
348            return Err(Error::UnexpectedEndOfStream);
349        }
350
351        if !self.starts_with(text) {
352            let len = std::cmp::min(text.len(), self.text.len() - self.pos);
353            // Collect chars and do not slice a string,
354            // because the `len` can be on the char boundary.
355            // Which lead to a panic.
356            let actual = self.text[self.pos..].chars().take(len).collect();
357
358            // Assume that all input `text` are valid UTF-8 strings, so unwrap is safe.
359            let expected = std::str::from_utf8(text).unwrap().to_owned();
360
361            return Err(Error::InvalidString(
362                vec![actual, expected],
363                self.calc_char_pos(),
364            ));
365        }
366
367        self.advance(text.len());
368        Ok(())
369    }
370
371    /// Consumes bytes by the predicate and returns them.
372    ///
373    /// The result can be empty.
374    pub fn consume_bytes<F>(&mut self, f: F) -> &'a str
375    where
376        F: Fn(&Stream<'_>, u8) -> bool,
377    {
378        let start = self.pos();
379        self.skip_bytes(f);
380        self.slice_back(start)
381    }
382
383    /// Consumes bytes by the predicate.
384    pub fn skip_bytes<F>(&mut self, f: F)
385    where
386        F: Fn(&Stream<'_>, u8) -> bool,
387    {
388        while !self.at_end() {
389            let c = self.curr_byte_unchecked();
390            if f(self, c) {
391                self.advance(1);
392            } else {
393                break;
394            }
395        }
396    }
397
398    /// Slices data from `pos` to the current position.
399    #[inline]
400    pub fn slice_back(&self, pos: usize) -> &'a str {
401        &self.text[pos..self.pos]
402    }
403
404    /// Slices data from the current position to the end.
405    #[inline]
406    pub fn slice_tail(&self) -> &'a str {
407        &self.text[self.pos..]
408    }
409
410    /// Parses number or percent from the stream.
411    ///
412    /// Percent value will be normalized.
413    pub fn parse_number_or_percent(&mut self) -> Result<f64, Error> {
414        self.skip_spaces();
415
416        let n = self.parse_number()?;
417        if self.starts_with(b"%") {
418            self.advance(1);
419            Ok(n / 100.0)
420        } else {
421            Ok(n)
422        }
423    }
424
425    /// Parses number or percent from a list of numbers and/or percents.
426    pub fn parse_list_number_or_percent(&mut self) -> Result<f64, Error> {
427        if self.at_end() {
428            return Err(Error::UnexpectedEndOfStream);
429        }
430
431        let l = self.parse_number_or_percent()?;
432        self.skip_spaces();
433        self.parse_list_separator();
434        Ok(l)
435    }
436
437    /// Skips digits.
438    pub fn skip_digits(&mut self) {
439        self.skip_bytes(|_, c| c.is_digit());
440    }
441
442    #[inline]
443    pub(crate) fn parse_list_separator(&mut self) {
444        if self.is_curr_byte_eq(b',') {
445            self.advance(1);
446        }
447    }
448}