usvg/parser/svgtree/
parse.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5use std::collections::HashMap;
6
7use roxmltree::Error;
8use simplecss::Declaration;
9use svgtypes::FontShorthand;
10
11use super::{AId, Attribute, Document, EId, NodeData, NodeId, NodeKind, ShortRange};
12
13const SVG_NS: &str = "http://www.w3.org/2000/svg";
14const XLINK_NS: &str = "http://www.w3.org/1999/xlink";
15const XML_NAMESPACE_NS: &str = "http://www.w3.org/XML/1998/namespace";
16
17impl<'input> Document<'input> {
18    /// Parses a [`Document`] from a [`roxmltree::Document`].
19    pub fn parse_tree(xml: &roxmltree::Document<'input>) -> Result<Document<'input>, Error> {
20        parse(xml)
21    }
22
23    pub(crate) fn append(&mut self, parent_id: NodeId, kind: NodeKind) -> NodeId {
24        let new_child_id = NodeId::from(self.nodes.len());
25        self.nodes.push(NodeData {
26            parent: Some(parent_id),
27            next_sibling: None,
28            children: None,
29            kind,
30        });
31
32        let last_child_id = self.nodes[parent_id.get_usize()].children.map(|(_, id)| id);
33
34        if let Some(id) = last_child_id {
35            self.nodes[id.get_usize()].next_sibling = Some(new_child_id);
36        }
37
38        self.nodes[parent_id.get_usize()].children = Some(
39            if let Some((first_child_id, _)) = self.nodes[parent_id.get_usize()].children {
40                (first_child_id, new_child_id)
41            } else {
42                (new_child_id, new_child_id)
43            },
44        );
45
46        new_child_id
47    }
48
49    fn append_attribute(&mut self, name: AId, value: roxmltree::StringStorage<'input>) {
50        self.attrs.push(Attribute { name, value });
51    }
52}
53
54fn parse<'input>(xml: &roxmltree::Document<'input>) -> Result<Document<'input>, Error> {
55    let mut doc = Document {
56        nodes: Vec::new(),
57        attrs: Vec::new(),
58        links: HashMap::new(),
59    };
60
61    // build a map of id -> node for resolve_href
62    let mut id_map = HashMap::new();
63    for node in xml.descendants() {
64        if let Some(id) = node.attribute("id") {
65            if !id_map.contains_key(id) {
66                id_map.insert(id, node);
67            }
68        }
69    }
70
71    // Add a root node.
72    doc.nodes.push(NodeData {
73        parent: None,
74        next_sibling: None,
75        children: None,
76        kind: NodeKind::Root,
77    });
78
79    let style_sheet = resolve_css(xml);
80
81    parse_xml_node_children(
82        xml.root(),
83        xml.root(),
84        doc.root().id,
85        &style_sheet,
86        false,
87        0,
88        &mut doc,
89        &id_map,
90    )?;
91
92    // Check that the root element is `svg`.
93    match doc.root().first_element_child() {
94        Some(child) => {
95            if child.tag_name() != Some(EId::Svg) {
96                return Err(roxmltree::Error::NoRootNode);
97            }
98        }
99        None => return Err(roxmltree::Error::NoRootNode),
100    }
101
102    // Collect all elements with `id` attribute.
103    let mut links = HashMap::new();
104    for node in doc.descendants() {
105        if let Some(id) = node.attribute::<&str>(AId::Id) {
106            links.insert(id.to_string(), node.id);
107        }
108    }
109    doc.links = links;
110
111    fix_recursive_patterns(&mut doc);
112    fix_recursive_links(EId::ClipPath, AId::ClipPath, &mut doc);
113    fix_recursive_links(EId::Mask, AId::Mask, &mut doc);
114    fix_recursive_links(EId::Filter, AId::Filter, &mut doc);
115    fix_recursive_fe_image(&mut doc);
116
117    Ok(doc)
118}
119
120pub(crate) fn parse_tag_name(node: roxmltree::Node) -> Option<EId> {
121    if !node.is_element() {
122        return None;
123    }
124
125    if node.tag_name().namespace() != Some(SVG_NS) {
126        return None;
127    }
128
129    EId::from_str(node.tag_name().name())
130}
131
132fn parse_xml_node_children<'input>(
133    parent: roxmltree::Node<'_, 'input>,
134    origin: roxmltree::Node,
135    parent_id: NodeId,
136    style_sheet: &simplecss::StyleSheet,
137    ignore_ids: bool,
138    depth: u32,
139    doc: &mut Document<'input>,
140    id_map: &HashMap<&str, roxmltree::Node<'_, 'input>>,
141) -> Result<(), Error> {
142    for node in parent.children() {
143        parse_xml_node(
144            node,
145            origin,
146            parent_id,
147            style_sheet,
148            ignore_ids,
149            depth,
150            doc,
151            id_map,
152        )?;
153    }
154
155    Ok(())
156}
157
158fn parse_xml_node<'input>(
159    node: roxmltree::Node<'_, 'input>,
160    origin: roxmltree::Node,
161    parent_id: NodeId,
162    style_sheet: &simplecss::StyleSheet,
163    ignore_ids: bool,
164    depth: u32,
165    doc: &mut Document<'input>,
166    id_map: &HashMap<&str, roxmltree::Node<'_, 'input>>,
167) -> Result<(), Error> {
168    if depth > 1024 {
169        return Err(Error::NodesLimitReached);
170    }
171
172    let mut tag_name = match parse_tag_name(node) {
173        Some(id) => id,
174        None => return Ok(()),
175    };
176
177    if tag_name == EId::Style {
178        return Ok(());
179    }
180
181    // TODO: remove?
182    // Treat links as groups.
183    if tag_name == EId::A {
184        tag_name = EId::G;
185    }
186
187    let node_id = parse_svg_element(node, parent_id, tag_name, style_sheet, ignore_ids, doc)?;
188    if tag_name == EId::Text {
189        super::text::parse_svg_text_element(node, node_id, style_sheet, doc)?;
190    } else if tag_name == EId::Use {
191        parse_svg_use_element(node, origin, node_id, style_sheet, depth + 1, doc, id_map)?;
192    } else {
193        parse_xml_node_children(
194            node,
195            origin,
196            node_id,
197            style_sheet,
198            ignore_ids,
199            depth + 1,
200            doc,
201            id_map,
202        )?;
203    }
204
205    Ok(())
206}
207
208pub(crate) fn parse_svg_element<'input>(
209    xml_node: roxmltree::Node<'_, 'input>,
210    parent_id: NodeId,
211    tag_name: EId,
212    style_sheet: &simplecss::StyleSheet,
213    ignore_ids: bool,
214    doc: &mut Document<'input>,
215) -> Result<NodeId, Error> {
216    let attrs_start_idx = doc.attrs.len();
217
218    // Copy presentational attributes first.
219    for attr in xml_node.attributes() {
220        match attr.namespace() {
221            None | Some(SVG_NS) | Some(XLINK_NS) | Some(XML_NAMESPACE_NS) => {}
222            _ => continue,
223        }
224
225        let aid = match AId::from_str(attr.name()) {
226            Some(v) => v,
227            None => continue,
228        };
229
230        // During a `use` resolving, all `id` attributes must be ignored.
231        // Otherwise we will get elements with duplicated id's.
232        if ignore_ids && aid == AId::Id {
233            continue;
234        }
235
236        // For some reason those properties are allowed only inside a `style` attribute and CSS.
237        if matches!(aid, AId::MixBlendMode | AId::Isolation | AId::FontKerning) {
238            continue;
239        }
240
241        append_attribute(parent_id, tag_name, aid, attr.value_storage().clone(), doc);
242    }
243
244    let mut insert_attribute = |aid, value: &str| {
245        // Check that attribute already exists.
246        let idx = doc.attrs[attrs_start_idx..]
247            .iter_mut()
248            .position(|a| a.name == aid);
249
250        // Append an attribute as usual.
251        let added = append_attribute(
252            parent_id,
253            tag_name,
254            aid,
255            roxmltree::StringStorage::new_owned(value),
256            doc,
257        );
258
259        // Check that attribute was actually added, because it could be skipped.
260        if added {
261            if let Some(idx) = idx {
262                // Swap the last attribute with an existing one.
263                let last_idx = doc.attrs.len() - 1;
264                doc.attrs.swap(attrs_start_idx + idx, last_idx);
265                // Remove last.
266                doc.attrs.pop();
267            }
268        }
269    };
270
271    let mut write_declaration = |declaration: &Declaration| {
272        // TODO: perform XML attribute normalization
273        if declaration.name == "marker" {
274            insert_attribute(AId::MarkerStart, declaration.value);
275            insert_attribute(AId::MarkerMid, declaration.value);
276            insert_attribute(AId::MarkerEnd, declaration.value);
277        } else if declaration.name == "font" {
278            if let Ok(shorthand) = FontShorthand::from_str(declaration.value) {
279                // First we need to reset all values to their default.
280                insert_attribute(AId::FontStyle, "normal");
281                insert_attribute(AId::FontVariant, "normal");
282                insert_attribute(AId::FontWeight, "normal");
283                insert_attribute(AId::FontStretch, "normal");
284                insert_attribute(AId::LineHeight, "normal");
285                insert_attribute(AId::FontSizeAdjust, "none");
286                insert_attribute(AId::FontKerning, "auto");
287                insert_attribute(AId::FontVariantCaps, "normal");
288                insert_attribute(AId::FontVariantLigatures, "normal");
289                insert_attribute(AId::FontVariantNumeric, "normal");
290                insert_attribute(AId::FontVariantEastAsian, "normal");
291                insert_attribute(AId::FontVariantPosition, "normal");
292
293                // Then, we set the properties that have been declared.
294                shorthand
295                    .font_stretch
296                    .map(|s| insert_attribute(AId::FontStretch, s));
297                shorthand
298                    .font_weight
299                    .map(|s| insert_attribute(AId::FontWeight, s));
300                shorthand
301                    .font_variant
302                    .map(|s| insert_attribute(AId::FontVariant, s));
303                shorthand
304                    .font_style
305                    .map(|s| insert_attribute(AId::FontStyle, s));
306                insert_attribute(AId::FontSize, shorthand.font_size);
307                insert_attribute(AId::FontFamily, shorthand.font_family);
308            } else {
309                log::warn!(
310                    "Failed to parse {} value: '{}'",
311                    AId::Font,
312                    declaration.value
313                );
314            }
315        } else if let Some(aid) = AId::from_str(declaration.name) {
316            // Parse only the presentation attributes.
317            if aid.is_presentation() {
318                insert_attribute(aid, declaration.value);
319            }
320        }
321    };
322
323    // Apply CSS.
324    for rule in &style_sheet.rules {
325        if rule.selector.matches(&XmlNode(xml_node)) {
326            for declaration in &rule.declarations {
327                write_declaration(declaration);
328            }
329        }
330    }
331
332    // Split a `style` attribute.
333    if let Some(value) = xml_node.attribute("style") {
334        for declaration in simplecss::DeclarationTokenizer::from(value) {
335            write_declaration(&declaration);
336        }
337    }
338
339    if doc.nodes.len() > 1_000_000 {
340        return Err(Error::NodesLimitReached);
341    }
342
343    let node_id = doc.append(
344        parent_id,
345        NodeKind::Element {
346            tag_name,
347            attributes: ShortRange::new(attrs_start_idx as u32, doc.attrs.len() as u32),
348        },
349    );
350
351    Ok(node_id)
352}
353
354fn append_attribute<'input>(
355    parent_id: NodeId,
356    tag_name: EId,
357    aid: AId,
358    value: roxmltree::StringStorage<'input>,
359    doc: &mut Document<'input>,
360) -> bool {
361    match aid {
362        // The `style` attribute will be split into attributes, so we don't need it.
363        AId::Style |
364        // No need to copy a `class` attribute since CSS were already resolved.
365        AId::Class => return false,
366        _ => {}
367    }
368
369    // Ignore `xlink:href` on `tspan` (which was originally `tref` or `a`),
370    // because we will convert `tref` into `tspan` anyway.
371    if tag_name == EId::Tspan && aid == AId::Href {
372        return false;
373    }
374
375    if aid.allows_inherit_value() && &*value == "inherit" {
376        return resolve_inherit(parent_id, aid, doc);
377    }
378
379    doc.append_attribute(aid, value);
380    true
381}
382
383fn resolve_inherit(parent_id: NodeId, aid: AId, doc: &mut Document) -> bool {
384    if aid.is_inheritable() {
385        // Inheritable attributes can inherit a value from an any ancestor.
386        let node_id = doc
387            .get(parent_id)
388            .ancestors()
389            .find(|n| n.has_attribute(aid))
390            .map(|n| n.id);
391        if let Some(node_id) = node_id {
392            if let Some(attr) = doc
393                .get(node_id)
394                .attributes()
395                .iter()
396                .find(|a| a.name == aid)
397                .cloned()
398            {
399                doc.attrs.push(Attribute {
400                    name: aid,
401                    value: attr.value,
402                });
403
404                return true;
405            }
406        }
407    } else {
408        // Non-inheritable attributes can inherit a value only from a direct parent.
409        if let Some(attr) = doc
410            .get(parent_id)
411            .attributes()
412            .iter()
413            .find(|a| a.name == aid)
414            .cloned()
415        {
416            doc.attrs.push(Attribute {
417                name: aid,
418                value: attr.value,
419            });
420
421            return true;
422        }
423    }
424
425    // Fallback to a default value if possible.
426    let value = match aid {
427        AId::ImageRendering | AId::ShapeRendering | AId::TextRendering => "auto",
428
429        AId::ClipPath
430        | AId::Filter
431        | AId::MarkerEnd
432        | AId::MarkerMid
433        | AId::MarkerStart
434        | AId::Mask
435        | AId::Stroke
436        | AId::StrokeDasharray
437        | AId::TextDecoration => "none",
438
439        AId::FontStretch
440        | AId::FontStyle
441        | AId::FontVariant
442        | AId::FontWeight
443        | AId::LetterSpacing
444        | AId::WordSpacing => "normal",
445
446        AId::Fill | AId::FloodColor | AId::StopColor => "black",
447
448        AId::FillOpacity
449        | AId::FloodOpacity
450        | AId::Opacity
451        | AId::StopOpacity
452        | AId::StrokeOpacity => "1",
453
454        AId::ClipRule | AId::FillRule => "nonzero",
455
456        AId::BaselineShift => "baseline",
457        AId::ColorInterpolationFilters => "linearRGB",
458        AId::Direction => "ltr",
459        AId::Display => "inline",
460        AId::FontSize => "medium",
461        AId::Overflow => "visible",
462        AId::StrokeDashoffset => "0",
463        AId::StrokeLinecap => "butt",
464        AId::StrokeLinejoin => "miter",
465        AId::StrokeMiterlimit => "4",
466        AId::StrokeWidth => "1",
467        AId::TextAnchor => "start",
468        AId::Visibility => "visible",
469        AId::WritingMode => "lr-tb",
470        _ => return false,
471    };
472
473    doc.append_attribute(aid, roxmltree::StringStorage::Borrowed(value));
474    true
475}
476
477fn resolve_href<'a, 'input: 'a>(
478    node: roxmltree::Node<'a, 'input>,
479    id_map: &HashMap<&str, roxmltree::Node<'a, 'input>>,
480) -> Option<roxmltree::Node<'a, 'input>> {
481    let link_value = node
482        .attribute((XLINK_NS, "href"))
483        .or_else(|| node.attribute("href"))?;
484
485    let link_id = svgtypes::IRI::from_str(link_value).ok()?.0;
486
487    id_map.get(link_id).copied()
488}
489
490fn parse_svg_use_element<'input>(
491    node: roxmltree::Node<'_, 'input>,
492    origin: roxmltree::Node,
493    parent_id: NodeId,
494    style_sheet: &simplecss::StyleSheet,
495    depth: u32,
496    doc: &mut Document<'input>,
497    id_map: &HashMap<&str, roxmltree::Node<'_, 'input>>,
498) -> Result<(), Error> {
499    let link = match resolve_href(node, id_map) {
500        Some(v) => v,
501        None => return Ok(()),
502    };
503
504    if link == node || link == origin {
505        log::warn!(
506            "Recursive 'use' detected. '{}' will be skipped.",
507            node.attribute((SVG_NS, "id")).unwrap_or_default()
508        );
509        return Ok(());
510    }
511
512    // Make sure we're linked to an SVG element.
513    if parse_tag_name(link).is_none() {
514        return Ok(());
515    }
516
517    // Check that none of the linked node's children reference current `use` node
518    // via other `use` node.
519    //
520    // Example:
521    // <g id="g1">
522    //     <use xlink:href="#use1" id="use2"/>
523    // </g>
524    // <use xlink:href="#g1" id="use1"/>
525    //
526    // `use2` should be removed.
527    //
528    // Also, child should not reference its parent:
529    // <g id="g1">
530    //     <use xlink:href="#g1" id="use1"/>
531    // </g>
532    //
533    // `use1` should be removed.
534    let mut is_recursive = false;
535    for link_child in link
536        .descendants()
537        .skip(1)
538        .filter(|n| n.has_tag_name((SVG_NS, "use")))
539    {
540        if let Some(link2) = resolve_href(link_child, id_map) {
541            if link2 == node || link2 == link {
542                is_recursive = true;
543                break;
544            }
545        }
546    }
547
548    if is_recursive {
549        log::warn!(
550            "Recursive 'use' detected. '{}' will be skipped.",
551            node.attribute((SVG_NS, "id")).unwrap_or_default()
552        );
553        return Ok(());
554    }
555
556    parse_xml_node(
557        link,
558        node,
559        parent_id,
560        style_sheet,
561        true,
562        depth + 1,
563        doc,
564        id_map,
565    )
566}
567
568fn resolve_css<'a>(xml: &'a roxmltree::Document<'a>) -> simplecss::StyleSheet<'a> {
569    let mut sheet = simplecss::StyleSheet::new();
570
571    for node in xml.descendants().filter(|n| n.has_tag_name("style")) {
572        match node.attribute("type") {
573            Some("text/css") => {}
574            Some(_) => continue,
575            None => {}
576        }
577
578        let text = match node.text() {
579            Some(v) => v,
580            None => continue,
581        };
582
583        sheet.parse_more(text);
584    }
585
586    sheet
587}
588
589struct XmlNode<'a, 'input: 'a>(roxmltree::Node<'a, 'input>);
590
591impl simplecss::Element for XmlNode<'_, '_> {
592    fn parent_element(&self) -> Option<Self> {
593        self.0.parent_element().map(XmlNode)
594    }
595
596    fn prev_sibling_element(&self) -> Option<Self> {
597        self.0.prev_sibling_element().map(XmlNode)
598    }
599
600    fn has_local_name(&self, local_name: &str) -> bool {
601        self.0.tag_name().name() == local_name
602    }
603
604    fn attribute_matches(&self, local_name: &str, operator: simplecss::AttributeOperator) -> bool {
605        match self.0.attribute(local_name) {
606            Some(value) => operator.matches(value),
607            None => false,
608        }
609    }
610
611    fn pseudo_class_matches(&self, class: simplecss::PseudoClass) -> bool {
612        match class {
613            simplecss::PseudoClass::FirstChild => self.prev_sibling_element().is_none(),
614            // TODO: lang
615            _ => false, // Since we are querying a static SVG we can ignore other pseudo-classes.
616        }
617    }
618}
619
620fn fix_recursive_patterns(doc: &mut Document) {
621    while let Some(node_id) = find_recursive_pattern(AId::Fill, doc) {
622        let idx = doc.get(node_id).attribute_id(AId::Fill).unwrap();
623        doc.attrs[idx].value = roxmltree::StringStorage::Borrowed("none");
624    }
625
626    while let Some(node_id) = find_recursive_pattern(AId::Stroke, doc) {
627        let idx = doc.get(node_id).attribute_id(AId::Stroke).unwrap();
628        doc.attrs[idx].value = roxmltree::StringStorage::Borrowed("none");
629    }
630}
631
632fn find_recursive_pattern(aid: AId, doc: &mut Document) -> Option<NodeId> {
633    for pattern_node in doc
634        .root()
635        .descendants()
636        .filter(|n| n.tag_name() == Some(EId::Pattern))
637    {
638        for node in pattern_node.descendants() {
639            let value = match node.attribute(aid) {
640                Some(v) => v,
641                None => continue,
642            };
643
644            if let Ok(svgtypes::Paint::FuncIRI(link_id, _)) = svgtypes::Paint::from_str(value) {
645                if link_id == pattern_node.element_id() {
646                    // If a pattern child has a link to the pattern itself
647                    // then we have to replace it with `none`.
648                    // Otherwise we will get endless loop/recursion and stack overflow.
649                    return Some(node.id);
650                } else {
651                    // Check that linked node children doesn't link this pattern.
652                    if let Some(linked_node) = doc.element_by_id(link_id) {
653                        for node2 in linked_node.descendants() {
654                            let value2 = match node2.attribute(aid) {
655                                Some(v) => v,
656                                None => continue,
657                            };
658
659                            if let Ok(svgtypes::Paint::FuncIRI(link_id2, _)) =
660                                svgtypes::Paint::from_str(value2)
661                            {
662                                if link_id2 == pattern_node.element_id() {
663                                    return Some(node2.id);
664                                }
665                            }
666                        }
667                    }
668                }
669            }
670        }
671    }
672
673    None
674}
675
676fn fix_recursive_links(eid: EId, aid: AId, doc: &mut Document) {
677    while let Some(node_id) = find_recursive_link(eid, aid, doc) {
678        let idx = doc.get(node_id).attribute_id(aid).unwrap();
679        doc.attrs[idx].value = roxmltree::StringStorage::Borrowed("none");
680    }
681}
682
683fn find_recursive_link(eid: EId, aid: AId, doc: &Document) -> Option<NodeId> {
684    for node in doc
685        .root()
686        .descendants()
687        .filter(|n| n.tag_name() == Some(eid))
688    {
689        for child in node.descendants() {
690            if let Some(link) = child.node_attribute(aid) {
691                if link == node {
692                    // If an element child has a link to the element itself
693                    // then we have to replace it with `none`.
694                    // Otherwise we will get endless loop/recursion and stack overflow.
695                    return Some(child.id);
696                } else {
697                    // Check that linked node children doesn't link this element.
698                    for node2 in link.descendants() {
699                        if let Some(link2) = node2.node_attribute(aid) {
700                            if link2 == node {
701                                return Some(node2.id);
702                            }
703                        }
704                    }
705                }
706            }
707        }
708    }
709
710    None
711}
712
713/// Detects cases like:
714///
715/// ```xml
716/// <filter id="filter1">
717///   <feImage xlink:href="#rect1"/>
718/// </filter>
719/// <rect id="rect1" x="36" y="36" width="120" height="120" fill="green" filter="url(#filter1)"/>
720/// ```
721fn fix_recursive_fe_image(doc: &mut Document) {
722    let mut ids = Vec::new();
723    for fe_node in doc
724        .root()
725        .descendants()
726        .filter(|n| n.tag_name() == Some(EId::FeImage))
727    {
728        if let Some(link) = fe_node.node_attribute(AId::Href) {
729            if let Some(filter_uri) = link.attribute::<&str>(AId::Filter) {
730                let filter_id = fe_node.parent().unwrap().element_id();
731                for func in svgtypes::FilterValueListParser::from(filter_uri).flatten() {
732                    if let svgtypes::FilterValue::Url(url) = func {
733                        if url == filter_id {
734                            ids.push(link.id);
735                        }
736                    }
737                }
738            }
739        }
740    }
741
742    for id in ids {
743        let idx = doc.get(id).attribute_id(AId::Filter).unwrap();
744        doc.attrs[idx].value = roxmltree::StringStorage::Borrowed("none");
745    }
746}