wayland_scanner/
token.rs

1// `bytes`, `next_chr`, `parse_lit_str`, `parse_lit_str_cooked` and `parse_lit_str_raw` are adapted
2// from syn:
3// https://github.com/dtolnay/syn/blob/362ee2d02df3f1b2e74c7b7a4cf2ed3c106404c9/src/lit.rs#L1062-L1167
4// and
5// https://github.com/dtolnay/syn/blob/362ee2d02df3f1b2e74c7b7a4cf2ed3c106404c9/src/lit.rs#L1327-L1388
6
7/// Get the byte at offset idx, or a default of `b'\0'` if we're looking
8/// past the end of the input buffer.
9fn byte(s: &str, idx: usize) -> u8 {
10    if idx < s.len() {
11        s.as_bytes()[idx]
12    } else {
13        0
14    }
15}
16
17fn next_chr(s: &str) -> char {
18    s.chars().next().unwrap_or('\0')
19}
20
21// Returns (content, suffix).
22fn parse_lit_str(s: &str) -> String {
23    match byte(s, 0) {
24        b'"' => parse_lit_str_cooked(s),
25        b'r' => parse_lit_str_raw(s),
26        _ => unreachable!(),
27    }
28}
29
30// Clippy false positive
31// https://github.com/rust-lang-nursery/rust-clippy/issues/2329
32#[allow(clippy::needless_continue)]
33fn parse_lit_str_cooked(mut s: &str) -> String {
34    assert_eq!(byte(s, 0), b'"');
35    s = &s[1..];
36
37    let mut content = String::new();
38    'outer: loop {
39        let ch = match byte(s, 0) {
40            b'"' => break,
41            b'\\' => {
42                let b = byte(s, 1);
43                s = &s[2..];
44                match b {
45                    b'x' => {
46                        let (byte, rest) = backslash_x(s);
47                        s = rest;
48                        assert!(byte <= 0x80, "Invalid \\x byte in string literal");
49                        char::from_u32(u32::from(byte)).unwrap()
50                    }
51                    b'u' => {
52                        let (chr, rest) = backslash_u(s);
53                        s = rest;
54                        chr
55                    }
56                    b'n' => '\n',
57                    b'r' => '\r',
58                    b't' => '\t',
59                    b'\\' => '\\',
60                    b'0' => '\0',
61                    b'\'' => '\'',
62                    b'"' => '"',
63                    b'\r' | b'\n' => loop {
64                        let ch = next_chr(s);
65                        if ch.is_whitespace() {
66                            s = &s[ch.len_utf8()..];
67                        } else {
68                            continue 'outer;
69                        }
70                    },
71                    b => panic!("unexpected byte {:?} after \\ character in byte literal", b),
72                }
73            }
74            b'\r' => {
75                assert_eq!(byte(s, 1), b'\n', "Bare CR not allowed in string");
76                s = &s[2..];
77                '\n'
78            }
79            _ => {
80                let ch = next_chr(s);
81                s = &s[ch.len_utf8()..];
82                ch
83            }
84        };
85        content.push(ch);
86    }
87
88    assert!(s.starts_with('"'));
89    content
90}
91
92fn parse_lit_str_raw(mut s: &str) -> String {
93    assert_eq!(byte(s, 0), b'r');
94    s = &s[1..];
95
96    let mut pounds = 0;
97    while byte(s, pounds) == b'#' {
98        pounds += 1;
99    }
100    assert_eq!(byte(s, pounds), b'"');
101    let close = s.rfind('"').unwrap();
102    for end in s[close + 1..close + 1 + pounds].bytes() {
103        assert_eq!(end, b'#');
104    }
105
106    s[pounds + 1..close].to_owned()
107}
108
109fn backslash_x(s: &str) -> (u8, &str) {
110    let mut ch = 0;
111    let b0 = byte(s, 0);
112    let b1 = byte(s, 1);
113    ch += 0x10
114        * match b0 {
115            b'0'..=b'9' => b0 - b'0',
116            b'a'..=b'f' => 10 + (b0 - b'a'),
117            b'A'..=b'F' => 10 + (b0 - b'A'),
118            _ => panic!("unexpected non-hex character after \\x"),
119        };
120    ch += match b1 {
121        b'0'..=b'9' => b1 - b'0',
122        b'a'..=b'f' => 10 + (b1 - b'a'),
123        b'A'..=b'F' => 10 + (b1 - b'A'),
124        _ => panic!("unexpected non-hex character after \\x"),
125    };
126    (ch, &s[2..])
127}
128
129fn backslash_u(mut s: &str) -> (char, &str) {
130    if byte(s, 0) != b'{' {
131        panic!("{}", "expected { after \\u");
132    }
133    s = &s[1..];
134
135    let mut ch = 0;
136    let mut digits = 0;
137    loop {
138        let b = byte(s, 0);
139        let digit = match b {
140            b'0'..=b'9' => b - b'0',
141            b'a'..=b'f' => 10 + b - b'a',
142            b'A'..=b'F' => 10 + b - b'A',
143            b'_' if digits > 0 => {
144                s = &s[1..];
145                continue;
146            }
147            b'}' if digits == 0 => panic!("invalid empty unicode escape"),
148            b'}' => break,
149            _ => panic!("unexpected non-hex character after \\u"),
150        };
151        if digits == 6 {
152            panic!("overlong unicode escape (must have at most 6 hex digits)");
153        }
154        ch *= 0x10;
155        ch += u32::from(digit);
156        digits += 1;
157        s = &s[1..];
158    }
159    assert!(byte(s, 0) == b'}');
160    s = &s[1..];
161
162    if let Some(ch) = char::from_u32(ch) {
163        (ch, s)
164    } else {
165        panic!("character code {:x} is not a valid unicode character", ch);
166    }
167}
168
169// End of code adapted from syn
170
171pub fn parse_lit_str_token(mut stream: proc_macro::TokenStream) -> String {
172    loop {
173        let mut iter = stream.into_iter();
174        let token = iter.next().expect("expected string argument");
175        assert!(iter.next().is_none(), "unexpected trailing token");
176        let literal = match token {
177            proc_macro::TokenTree::Literal(literal) => literal,
178            proc_macro::TokenTree::Group(group) => {
179                stream = group.stream();
180                continue;
181            }
182            _ => panic!("expected string argument found `{:?}`", token),
183        };
184        return parse_lit_str(&literal.to_string());
185    }
186}