simplecss/
selector.rs

1// Copyright 2019 the SimpleCSS Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4use alloc::{vec, vec::Vec};
5use core::fmt;
6
7use log::warn;
8
9use crate::stream::Stream;
10use crate::Error;
11
12/// An attribute selector operator.
13#[derive(Clone, Copy, PartialEq, Debug)]
14pub enum AttributeOperator<'a> {
15    /// `[attr]`
16    Exists,
17    /// `[attr=value]`
18    Matches(&'a str),
19    /// `[attr~=value]`
20    Contains(&'a str),
21    /// `[attr|=value]`
22    StartsWith(&'a str),
23}
24
25impl AttributeOperator<'_> {
26    /// Checks that value is matching the operator.
27    pub fn matches(&self, value: &str) -> bool {
28        match *self {
29            AttributeOperator::Exists => true,
30            AttributeOperator::Matches(v) => value == v,
31            AttributeOperator::Contains(v) => value.split(' ').any(|s| s == v),
32            AttributeOperator::StartsWith(v) => {
33                // exactly `v` or beginning with `v` immediately followed by `-`
34                if value == v {
35                    true
36                } else if value.starts_with(v) {
37                    value.get(v.len()..v.len() + 1) == Some("-")
38                } else {
39                    false
40                }
41            }
42        }
43    }
44}
45
46/// A pseudo-class.
47#[derive(Clone, Copy, PartialEq, Debug)]
48#[allow(missing_docs)]
49pub enum PseudoClass<'a> {
50    FirstChild,
51    Link,
52    Visited,
53    Hover,
54    Active,
55    Focus,
56    Lang(&'a str),
57}
58
59impl fmt::Display for PseudoClass<'_> {
60    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
61        match self {
62            PseudoClass::FirstChild => write!(f, "first-child"),
63            PseudoClass::Link => write!(f, "link"),
64            PseudoClass::Visited => write!(f, "visited"),
65            PseudoClass::Hover => write!(f, "hover"),
66            PseudoClass::Active => write!(f, "active"),
67            PseudoClass::Focus => write!(f, "focus"),
68            PseudoClass::Lang(lang) => write!(f, "lang({})", lang),
69        }
70    }
71}
72
73/// A trait to query an element node metadata.
74pub trait Element: Sized {
75    /// Returns a parent element.
76    fn parent_element(&self) -> Option<Self>;
77
78    /// Returns a previous sibling element.
79    fn prev_sibling_element(&self) -> Option<Self>;
80
81    /// Checks that the element has a specified local name.
82    fn has_local_name(&self, name: &str) -> bool;
83
84    /// Checks that the element has a specified attribute.
85    fn attribute_matches(&self, local_name: &str, operator: AttributeOperator<'_>) -> bool;
86
87    /// Checks that the element matches a specified pseudo-class.
88    fn pseudo_class_matches(&self, class: PseudoClass<'_>) -> bool;
89}
90
91#[derive(Clone, Copy, PartialEq, Debug)]
92enum SimpleSelectorType<'a> {
93    Type(&'a str),
94    Universal,
95}
96
97#[derive(Clone, Copy, PartialEq, Debug)]
98enum SubSelector<'a> {
99    Attribute(&'a str, AttributeOperator<'a>),
100    PseudoClass(PseudoClass<'a>),
101}
102
103#[derive(Clone, Debug)]
104struct SimpleSelector<'a> {
105    kind: SimpleSelectorType<'a>,
106    subselectors: Vec<SubSelector<'a>>,
107}
108
109#[derive(Clone, Copy, PartialEq, Debug)]
110enum Combinator {
111    None,
112    Descendant,
113    Child,
114    AdjacentSibling,
115}
116
117#[derive(Clone, Debug)]
118struct Component<'a> {
119    /// A combinator that precede the selector.
120    combinator: Combinator,
121    selector: SimpleSelector<'a>,
122}
123
124/// A selector.
125#[derive(Clone, Debug)]
126pub struct Selector<'a> {
127    components: Vec<Component<'a>>,
128}
129
130impl<'a> Selector<'a> {
131    /// Parses a selector from a string.
132    ///
133    /// Will log any errors as a warnings.
134    ///
135    /// Parsing will be stopped at EOF, `,` or `{`.
136    pub fn parse(text: &'a str) -> Option<Self> {
137        parse(text).0
138    }
139
140    /// Compute the selector's specificity.
141    ///
142    /// Cf. <https://www.w3.org/TR/selectors/#specificity>.
143    pub fn specificity(&self) -> [u8; 3] {
144        let mut spec = [0u8; 3];
145
146        for selector in self.components.iter().map(|c| &c.selector) {
147            if matches!(selector.kind, SimpleSelectorType::Type(_)) {
148                spec[2] = spec[2].saturating_add(1);
149            }
150
151            for sub in &selector.subselectors {
152                match sub {
153                    SubSelector::Attribute("id", _) => spec[0] = spec[0].saturating_add(1),
154                    _ => spec[1] = spec[1].saturating_add(1),
155                }
156            }
157        }
158
159        spec
160    }
161
162    /// Checks that the provided element matches the current selector.
163    pub fn matches<E: Element>(&self, element: &E) -> bool {
164        assert!(!self.components.is_empty(), "selector must not be empty");
165        assert_eq!(
166            self.components[0].combinator,
167            Combinator::None,
168            "the first component must not have a combinator"
169        );
170
171        self.matches_impl(self.components.len() - 1, element)
172    }
173
174    fn matches_impl<E: Element>(&self, idx: usize, element: &E) -> bool {
175        let component = &self.components[idx];
176
177        if !match_selector(&component.selector, element) {
178            return false;
179        }
180
181        match component.combinator {
182            Combinator::Descendant => {
183                let mut parent = element.parent_element();
184                while let Some(e) = parent {
185                    if self.matches_impl(idx - 1, &e) {
186                        return true;
187                    }
188
189                    parent = e.parent_element();
190                }
191
192                false
193            }
194            Combinator::Child => {
195                if let Some(parent) = element.parent_element() {
196                    if self.matches_impl(idx - 1, &parent) {
197                        return true;
198                    }
199                }
200
201                false
202            }
203            Combinator::AdjacentSibling => {
204                if let Some(prev) = element.prev_sibling_element() {
205                    if self.matches_impl(idx - 1, &prev) {
206                        return true;
207                    }
208                }
209
210                false
211            }
212            Combinator::None => true,
213        }
214    }
215}
216
217fn match_selector<E: Element>(selector: &SimpleSelector<'_>, element: &E) -> bool {
218    if let SimpleSelectorType::Type(ident) = selector.kind {
219        if !element.has_local_name(ident) {
220            return false;
221        }
222    }
223
224    for sub in &selector.subselectors {
225        match sub {
226            SubSelector::Attribute(name, operator) => {
227                if !element.attribute_matches(name, *operator) {
228                    return false;
229                }
230            }
231            SubSelector::PseudoClass(class) => {
232                if !element.pseudo_class_matches(*class) {
233                    return false;
234                }
235            }
236        }
237    }
238
239    true
240}
241
242pub(crate) fn parse(text: &str) -> (Option<Selector<'_>>, usize) {
243    let mut components: Vec<Component<'_>> = Vec::new();
244    let mut combinator = Combinator::None;
245
246    let mut tokenizer = SelectorTokenizer::from(text);
247    for token in &mut tokenizer {
248        let mut add_sub = |sub| {
249            if combinator == Combinator::None && !components.is_empty() {
250                if let Some(ref mut component) = components.last_mut() {
251                    component.selector.subselectors.push(sub);
252                }
253            } else {
254                components.push(Component {
255                    selector: SimpleSelector {
256                        kind: SimpleSelectorType::Universal,
257                        subselectors: vec![sub],
258                    },
259                    combinator,
260                });
261
262                combinator = Combinator::None;
263            }
264        };
265
266        let token = match token {
267            Ok(t) => t,
268            Err(e) => {
269                warn!("Selector parsing failed cause {}.", e);
270                return (None, tokenizer.stream.pos());
271            }
272        };
273
274        match token {
275            SelectorToken::UniversalSelector => {
276                components.push(Component {
277                    selector: SimpleSelector {
278                        kind: SimpleSelectorType::Universal,
279                        subselectors: Vec::new(),
280                    },
281                    combinator,
282                });
283
284                combinator = Combinator::None;
285            }
286            SelectorToken::TypeSelector(ident) => {
287                components.push(Component {
288                    selector: SimpleSelector {
289                        kind: SimpleSelectorType::Type(ident),
290                        subselectors: Vec::new(),
291                    },
292                    combinator,
293                });
294
295                combinator = Combinator::None;
296            }
297            SelectorToken::ClassSelector(ident) => {
298                add_sub(SubSelector::Attribute(
299                    "class",
300                    AttributeOperator::Contains(ident),
301                ));
302            }
303            SelectorToken::IdSelector(id) => {
304                add_sub(SubSelector::Attribute("id", AttributeOperator::Matches(id)));
305            }
306            SelectorToken::AttributeSelector(name, op) => {
307                add_sub(SubSelector::Attribute(name, op));
308            }
309            SelectorToken::PseudoClass(ident) => {
310                let class = match ident {
311                    "first-child" => PseudoClass::FirstChild,
312                    "link" => PseudoClass::Link,
313                    "visited" => PseudoClass::Visited,
314                    "hover" => PseudoClass::Hover,
315                    "active" => PseudoClass::Active,
316                    "focus" => PseudoClass::Focus,
317                    _ => {
318                        warn!("':{}' is not supported. Selector skipped.", ident);
319                        return (None, tokenizer.stream.pos());
320                    }
321                };
322
323                // TODO: duplicates
324                // TODO: order
325
326                add_sub(SubSelector::PseudoClass(class));
327            }
328            SelectorToken::LangPseudoClass(lang) => {
329                add_sub(SubSelector::PseudoClass(PseudoClass::Lang(lang)));
330            }
331            SelectorToken::DescendantCombinator => {
332                combinator = Combinator::Descendant;
333            }
334            SelectorToken::ChildCombinator => {
335                combinator = Combinator::Child;
336            }
337            SelectorToken::AdjacentCombinator => {
338                combinator = Combinator::AdjacentSibling;
339            }
340        }
341    }
342
343    if components.is_empty() {
344        (None, tokenizer.stream.pos())
345    } else if components[0].combinator != Combinator::None {
346        debug_assert_eq!(
347            components[0].combinator,
348            Combinator::None,
349            "the first component must not have a combinator"
350        );
351
352        (None, tokenizer.stream.pos())
353    } else {
354        (Some(Selector { components }), tokenizer.stream.pos())
355    }
356}
357
358impl fmt::Display for Selector<'_> {
359    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
360        for component in &self.components {
361            match component.combinator {
362                Combinator::Descendant => write!(f, " ")?,
363                Combinator::Child => write!(f, " > ")?,
364                Combinator::AdjacentSibling => write!(f, " + ")?,
365                Combinator::None => {}
366            }
367
368            match component.selector.kind {
369                SimpleSelectorType::Universal => write!(f, "*")?,
370                SimpleSelectorType::Type(ident) => write!(f, "{}", ident)?,
371            };
372
373            for sel in &component.selector.subselectors {
374                match sel {
375                    SubSelector::Attribute(name, operator) => {
376                        match operator {
377                            AttributeOperator::Exists => {
378                                write!(f, "[{}]", name)?;
379                            }
380                            AttributeOperator::Matches(value) => {
381                                write!(f, "[{}='{}']", name, value)?;
382                            }
383                            AttributeOperator::Contains(value) => {
384                                write!(f, "[{}~='{}']", name, value)?;
385                            }
386                            AttributeOperator::StartsWith(value) => {
387                                write!(f, "[{}|='{}']", name, value)?;
388                            }
389                        };
390                    }
391                    SubSelector::PseudoClass(class) => write!(f, ":{}", class)?,
392                }
393            }
394        }
395
396        Ok(())
397    }
398}
399
400/// A selector token.
401#[derive(Clone, Copy, PartialEq, Debug)]
402pub enum SelectorToken<'a> {
403    /// `*`
404    UniversalSelector,
405
406    /// `div`
407    TypeSelector(&'a str),
408
409    /// `.class`
410    ClassSelector(&'a str),
411
412    /// `#id`
413    IdSelector(&'a str),
414
415    /// `[color=red]`
416    AttributeSelector(&'a str, AttributeOperator<'a>),
417
418    /// `:first-child`
419    PseudoClass(&'a str),
420
421    /// `:lang(en)`
422    LangPseudoClass(&'a str),
423
424    /// `a b`
425    DescendantCombinator,
426
427    /// `a > b`
428    ChildCombinator,
429
430    /// `a + b`
431    AdjacentCombinator,
432}
433
434/// A selector tokenizer.
435///
436/// # Example
437///
438/// ```
439/// use simplecss::{SelectorTokenizer, SelectorToken};
440///
441/// let mut t = SelectorTokenizer::from("div > p:first-child");
442/// assert_eq!(t.next().unwrap().unwrap(), SelectorToken::TypeSelector("div"));
443/// assert_eq!(t.next().unwrap().unwrap(), SelectorToken::ChildCombinator);
444/// assert_eq!(t.next().unwrap().unwrap(), SelectorToken::TypeSelector("p"));
445/// assert_eq!(t.next().unwrap().unwrap(), SelectorToken::PseudoClass("first-child"));
446/// assert!(t.next().is_none());
447/// ```
448pub struct SelectorTokenizer<'a> {
449    stream: Stream<'a>,
450    after_combinator: bool,
451    finished: bool,
452}
453
454impl<'a> From<&'a str> for SelectorTokenizer<'a> {
455    fn from(text: &'a str) -> Self {
456        SelectorTokenizer {
457            stream: Stream::from(text),
458            after_combinator: true,
459            finished: false,
460        }
461    }
462}
463
464impl<'a> Iterator for SelectorTokenizer<'a> {
465    type Item = Result<SelectorToken<'a>, Error>;
466
467    fn next(&mut self) -> Option<Self::Item> {
468        if self.finished || self.stream.at_end() {
469            if self.after_combinator {
470                self.after_combinator = false;
471                return Some(Err(Error::SelectorMissing));
472            }
473
474            return None;
475        }
476
477        macro_rules! try2 {
478            ($e:expr) => {
479                match $e {
480                    Ok(v) => v,
481                    Err(e) => {
482                        self.finished = true;
483                        return Some(Err(e));
484                    }
485                }
486            };
487        }
488
489        match self.stream.curr_byte_unchecked() {
490            b'*' => {
491                if !self.after_combinator {
492                    self.finished = true;
493                    return Some(Err(Error::UnexpectedSelector));
494                }
495
496                self.after_combinator = false;
497                self.stream.advance(1);
498                Some(Ok(SelectorToken::UniversalSelector))
499            }
500            b'#' => {
501                self.after_combinator = false;
502                self.stream.advance(1);
503                let ident = try2!(self.stream.consume_ident());
504                Some(Ok(SelectorToken::IdSelector(ident)))
505            }
506            b'.' => {
507                self.after_combinator = false;
508                self.stream.advance(1);
509                let ident = try2!(self.stream.consume_ident());
510                Some(Ok(SelectorToken::ClassSelector(ident)))
511            }
512            b'[' => {
513                self.after_combinator = false;
514                self.stream.advance(1);
515                let ident = try2!(self.stream.consume_ident());
516
517                let op = match try2!(self.stream.curr_byte()) {
518                    b']' => AttributeOperator::Exists,
519                    b'=' => {
520                        self.stream.advance(1);
521                        let value = try2!(self.stream.consume_string());
522                        AttributeOperator::Matches(value)
523                    }
524                    b'~' => {
525                        self.stream.advance(1);
526                        try2!(self.stream.consume_byte(b'='));
527                        let value = try2!(self.stream.consume_string());
528                        AttributeOperator::Contains(value)
529                    }
530                    b'|' => {
531                        self.stream.advance(1);
532                        try2!(self.stream.consume_byte(b'='));
533                        let value = try2!(self.stream.consume_string());
534                        AttributeOperator::StartsWith(value)
535                    }
536                    _ => {
537                        self.finished = true;
538                        return Some(Err(Error::InvalidAttributeSelector));
539                    }
540                };
541
542                try2!(self.stream.consume_byte(b']'));
543
544                Some(Ok(SelectorToken::AttributeSelector(ident, op)))
545            }
546            b':' => {
547                self.after_combinator = false;
548                self.stream.advance(1);
549                let ident = try2!(self.stream.consume_ident());
550
551                if ident == "lang" {
552                    try2!(self.stream.consume_byte(b'('));
553                    let lang = self.stream.consume_bytes(|c| c != b')').trim();
554                    try2!(self.stream.consume_byte(b')'));
555
556                    if lang.is_empty() {
557                        self.finished = true;
558                        return Some(Err(Error::InvalidLanguagePseudoClass));
559                    }
560
561                    Some(Ok(SelectorToken::LangPseudoClass(lang)))
562                } else {
563                    Some(Ok(SelectorToken::PseudoClass(ident)))
564                }
565            }
566            b'>' => {
567                if self.after_combinator {
568                    self.after_combinator = false;
569                    self.finished = true;
570                    return Some(Err(Error::UnexpectedCombinator));
571                }
572
573                self.stream.advance(1);
574                self.after_combinator = true;
575                Some(Ok(SelectorToken::ChildCombinator))
576            }
577            b'+' => {
578                if self.after_combinator {
579                    self.after_combinator = false;
580                    self.finished = true;
581                    return Some(Err(Error::UnexpectedCombinator));
582                }
583
584                self.stream.advance(1);
585                self.after_combinator = true;
586                Some(Ok(SelectorToken::AdjacentCombinator))
587            }
588            b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => {
589                self.stream.skip_spaces();
590
591                if self.after_combinator {
592                    return self.next();
593                }
594
595                while self.stream.curr_byte() == Ok(b'/') {
596                    try2!(self.stream.skip_comment());
597                    self.stream.skip_spaces();
598                }
599
600                match self.stream.curr_byte() {
601                    Ok(b'>') | Ok(b'+') | Ok(b',') | Ok(b'{') | Err(_) => self.next(),
602                    _ => {
603                        if self.after_combinator {
604                            self.after_combinator = false;
605                            self.finished = true;
606                            return Some(Err(Error::UnexpectedSelector));
607                        }
608
609                        self.after_combinator = true;
610                        Some(Ok(SelectorToken::DescendantCombinator))
611                    }
612                }
613            }
614            b'/' => {
615                if self.stream.next_byte() == Ok(b'*') {
616                    try2!(self.stream.skip_comment());
617                } else {
618                    self.finished = true;
619                }
620
621                self.next()
622            }
623            b',' | b'{' => {
624                self.finished = true;
625                self.next()
626            }
627            _ => {
628                let ident = try2!(self.stream.consume_ident());
629
630                if !self.after_combinator {
631                    self.finished = true;
632                    return Some(Err(Error::UnexpectedSelector));
633                }
634
635                self.after_combinator = false;
636                Some(Ok(SelectorToken::TypeSelector(ident)))
637            }
638        }
639    }
640}