toml_edit/parser/
trivia.rs
1use std::ops::RangeInclusive;
2
3use winnow::combinator::alt;
4use winnow::combinator::eof;
5use winnow::combinator::opt;
6use winnow::combinator::repeat;
7use winnow::combinator::terminated;
8use winnow::prelude::*;
9use winnow::token::one_of;
10use winnow::token::take_while;
11
12use crate::parser::prelude::*;
13
14pub(crate) unsafe fn from_utf8_unchecked<'b>(
15 bytes: &'b [u8],
16 safety_justification: &'static str,
17) -> &'b str {
18 if cfg!(debug_assertions) {
19 std::str::from_utf8(bytes).expect(safety_justification)
21 } else {
22 std::str::from_utf8_unchecked(bytes)
23 }
24}
25
26pub(crate) const WSCHAR: (u8, u8) = (b' ', b'\t');
29
30pub(crate) fn ws<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
32 take_while(0.., WSCHAR)
33 .map(|b| unsafe { from_utf8_unchecked(b, "`is_wschar` filters out on-ASCII") })
34 .parse_next(input)
35}
36
37pub(crate) const NON_ASCII: RangeInclusive<u8> = 0x80..=0xff;
42
43pub(crate) const NON_EOL: (u8, RangeInclusive<u8>, RangeInclusive<u8>) =
45 (0x09, 0x20..=0x7E, NON_ASCII);
46
47pub(crate) const COMMENT_START_SYMBOL: u8 = b'#';
49
50pub(crate) fn comment<'i>(input: &mut Input<'i>) -> PResult<&'i [u8]> {
52 (COMMENT_START_SYMBOL, take_while(0.., NON_EOL))
53 .recognize()
54 .parse_next(input)
55}
56
57pub(crate) fn newline(input: &mut Input<'_>) -> PResult<u8> {
60 alt((
61 one_of(LF).value(b'\n'),
62 (one_of(CR), one_of(LF)).value(b'\n'),
63 ))
64 .parse_next(input)
65}
66pub(crate) const LF: u8 = b'\n';
67pub(crate) const CR: u8 = b'\r';
68
69pub(crate) fn ws_newline<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
71 repeat(
72 0..,
73 alt((newline.value(&b"\n"[..]), take_while(1.., WSCHAR))),
74 )
75 .map(|()| ())
76 .recognize()
77 .map(|b| unsafe { from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII") })
78 .parse_next(input)
79}
80
81pub(crate) fn ws_newlines<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
83 (newline, ws_newline)
84 .recognize()
85 .map(|b| unsafe {
86 from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII")
87 })
88 .parse_next(input)
89}
90
91pub(crate) fn ws_comment_newline<'i>(input: &mut Input<'i>) -> PResult<&'i [u8]> {
94 repeat(
95 0..,
96 alt((
97 repeat(
98 1..,
99 alt((take_while(1.., WSCHAR), newline.value(&b"\n"[..]))),
100 )
101 .map(|()| ()),
102 comment.value(()),
103 )),
104 )
105 .map(|()| ())
106 .recognize()
107 .parse_next(input)
108}
109
110pub(crate) fn line_ending<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
113 alt((newline.value("\n"), eof.value(""))).parse_next(input)
114}
115
116pub(crate) fn line_trailing(input: &mut Input<'_>) -> PResult<std::ops::Range<usize>> {
119 terminated((ws, opt(comment)).span(), line_ending).parse_next(input)
120}
121
122#[cfg(test)]
123mod test {
124 use super::*;
125
126 #[test]
127 fn trivia() {
128 let inputs = [
129 "",
130 r#" "#,
131 r#"
132"#,
133 r#"
134# comment
135
136# comment2
137
138
139"#,
140 r#"
141 "#,
142 r#"# comment
143# comment2
144
145
146 "#,
147 ];
148 for input in inputs {
149 dbg!(input);
150 let parsed = ws_comment_newline.parse(new_input(input));
151 assert!(parsed.is_ok(), "{:?}", parsed);
152 let parsed = parsed.unwrap();
153 assert_eq!(parsed, input.as_bytes());
154 }
155 }
156}