usvg/parser/svgtree/
text.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5#![allow(clippy::comparison_chain)]
6
7use roxmltree::Error;
8
9use super::{AId, Document, EId, NodeId, NodeKind, SvgNode};
10
11const XLINK_NS: &str = "http://www.w3.org/1999/xlink";
12
13pub(crate) fn parse_svg_text_element<'input>(
14    parent: roxmltree::Node<'_, 'input>,
15    parent_id: NodeId,
16    style_sheet: &simplecss::StyleSheet,
17    doc: &mut Document<'input>,
18) -> Result<(), Error> {
19    debug_assert_eq!(parent.tag_name().name(), "text");
20
21    let space = if doc.get(parent_id).has_attribute(AId::Space) {
22        get_xmlspace(doc, parent_id, XmlSpace::Default)
23    } else {
24        if let Some(node) = doc
25            .get(parent_id)
26            .ancestors()
27            .find(|n| n.has_attribute(AId::Space))
28        {
29            get_xmlspace(doc, node.id, XmlSpace::Default)
30        } else {
31            XmlSpace::Default
32        }
33    };
34
35    parse_svg_text_element_impl(parent, parent_id, style_sheet, space, doc)?;
36
37    trim_text_nodes(parent_id, space, doc);
38    Ok(())
39}
40
41fn parse_svg_text_element_impl<'input>(
42    parent: roxmltree::Node<'_, 'input>,
43    parent_id: NodeId,
44    style_sheet: &simplecss::StyleSheet,
45    space: XmlSpace,
46    doc: &mut Document<'input>,
47) -> Result<(), Error> {
48    for node in parent.children() {
49        if node.is_text() {
50            let text = trim_text(node.text().unwrap(), space);
51            doc.append(parent_id, NodeKind::Text(text));
52            continue;
53        }
54
55        let mut tag_name = match super::parse::parse_tag_name(node) {
56            Some(v) => v,
57            None => continue,
58        };
59
60        if tag_name == EId::A {
61            // Treat links as simple text.
62            tag_name = EId::Tspan;
63        }
64
65        if !matches!(tag_name, EId::Tspan | EId::Tref | EId::TextPath) {
66            continue;
67        }
68
69        // `textPath` must be a direct `text` child.
70        if tag_name == EId::TextPath && parent.tag_name().name() != "text" {
71            continue;
72        }
73
74        // We are converting `tref` into `tspan` to simplify later use.
75        let mut is_tref = false;
76        if tag_name == EId::Tref {
77            tag_name = EId::Tspan;
78            is_tref = true;
79        }
80
81        let node_id =
82            super::parse::parse_svg_element(node, parent_id, tag_name, style_sheet, false, doc)?;
83        let space = get_xmlspace(doc, node_id, space);
84
85        if is_tref {
86            let link_value = node
87                .attribute((XLINK_NS, "href"))
88                .or_else(|| node.attribute("href"));
89
90            if let Some(href) = link_value {
91                if let Some(text) = resolve_tref_text(node.document(), href) {
92                    let text = trim_text(&text, space);
93                    doc.append(node_id, NodeKind::Text(text));
94                }
95            }
96        } else {
97            parse_svg_text_element_impl(node, node_id, style_sheet, space, doc)?;
98        }
99    }
100
101    Ok(())
102}
103
104fn resolve_tref_text(xml: &roxmltree::Document, href: &str) -> Option<String> {
105    let id = svgtypes::IRI::from_str(href).ok()?.0;
106
107    // Find linked element in the original tree.
108    let node = xml.descendants().find(|n| n.attribute("id") == Some(id))?;
109
110    // `tref` should be linked to an SVG element.
111    super::parse::parse_tag_name(node)?;
112
113    // 'All character data within the referenced element, including character data enclosed
114    // within additional markup, will be rendered.'
115    //
116    // So we don't care about attributes and everything. Just collecting text nodes data.
117    //
118    // Note: we have to filter nodes by `is_text()` first since `text()` will look up
119    // for text nodes in element children therefore we will get duplicates.
120    let text: String = node
121        .descendants()
122        .filter(|n| n.is_text())
123        .filter_map(|n| n.text())
124        .collect();
125    if text.is_empty() {
126        None
127    } else {
128        Some(text)
129    }
130}
131
132#[derive(Clone, Copy, PartialEq, Debug)]
133enum XmlSpace {
134    Default,
135    Preserve,
136}
137
138fn get_xmlspace(doc: &Document, node_id: NodeId, default: XmlSpace) -> XmlSpace {
139    match doc.get(node_id).attribute(AId::Space) {
140        Some("preserve") => XmlSpace::Preserve,
141        Some(_) => XmlSpace::Default,
142        _ => default,
143    }
144}
145
146trait StrTrim {
147    fn remove_first_space(&mut self);
148    fn remove_last_space(&mut self);
149}
150
151impl StrTrim for String {
152    fn remove_first_space(&mut self) {
153        debug_assert_eq!(self.chars().next().unwrap(), ' ');
154        self.drain(0..1);
155    }
156
157    fn remove_last_space(&mut self) {
158        debug_assert_eq!(self.chars().next_back().unwrap(), ' ');
159        self.pop();
160    }
161}
162
163/// Prepares text nodes according to the spec: https://www.w3.org/TR/SVG11/text.html#WhiteSpace
164///
165/// This function handles:
166/// - 'xml:space' processing
167/// - tabs and newlines removing/replacing
168/// - spaces trimming
169fn trim_text_nodes(text_elem_id: NodeId, xmlspace: XmlSpace, doc: &mut Document) {
170    let mut nodes = Vec::new(); // TODO: allocate only once
171    collect_text_nodes(doc.get(text_elem_id), 0, &mut nodes);
172
173    // `trim` method has already collapsed all spaces into a single one,
174    // so we have to check only for one leading or trailing space.
175
176    if nodes.len() == 1 {
177        // Process element with a single text node child.
178
179        let node_id = nodes[0].0;
180
181        if xmlspace == XmlSpace::Default {
182            if let NodeKind::Text(ref mut text) = doc.nodes[node_id.get_usize()].kind {
183                match text.len() {
184                    0 => {} // An empty string. Do nothing.
185                    1 => {
186                        // If string has only one character and it's a space - clear this string.
187                        if text.as_bytes()[0] == b' ' {
188                            text.clear();
189                        }
190                    }
191                    _ => {
192                        // 'text' has at least 2 bytes, so indexing is safe.
193                        let c1 = text.as_bytes()[0];
194                        let c2 = text.as_bytes()[text.len() - 1];
195
196                        if c1 == b' ' {
197                            text.remove_first_space();
198                        }
199
200                        if c2 == b' ' {
201                            text.remove_last_space();
202                        }
203                    }
204                }
205            }
206        } else {
207            // Do nothing when xml:space=preserve.
208        }
209    } else if nodes.len() > 1 {
210        // Process element with many text node children.
211
212        // We manage all text nodes as a single text node
213        // and trying to remove duplicated spaces across nodes.
214        //
215        // For example    '<text>Text <tspan> text </tspan> text</text>'
216        // is the same is '<text>Text <tspan>text</tspan> text</text>'
217
218        let mut i = 0;
219        let len = nodes.len() - 1;
220        let mut last_non_empty: Option<NodeId> = None;
221        while i < len {
222            // Process pairs.
223            let (mut node1_id, depth1) = nodes[i];
224            let (node2_id, depth2) = nodes[i + 1];
225
226            if doc.get(node1_id).text().is_empty() {
227                if let Some(n) = last_non_empty {
228                    node1_id = n;
229                }
230            }
231
232            // Parent of the text node is always an element node and always exist,
233            // so unwrap is safe.
234            let xmlspace1 = get_xmlspace(doc, doc.get(node1_id).parent().unwrap().id, xmlspace);
235            let xmlspace2 = get_xmlspace(doc, doc.get(node2_id).parent().unwrap().id, xmlspace);
236
237            // >text<..>text<
238            //  1  2    3  4
239            let (c1, c2, c3, c4) = {
240                let text1 = doc.get(node1_id).text();
241                let text2 = doc.get(node2_id).text();
242
243                let bytes1 = text1.as_bytes();
244                let bytes2 = text2.as_bytes();
245
246                let c1 = bytes1.first().cloned();
247                let c2 = bytes1.last().cloned();
248                let c3 = bytes2.first().cloned();
249                let c4 = bytes2.last().cloned();
250
251                (c1, c2, c3, c4)
252            };
253
254            // NOTE: xml:space processing is mostly an undefined behavior,
255            // because everyone do it differently.
256            // We're mimicking the Chrome behavior.
257
258            // Remove space from the second text node if both nodes has bound spaces.
259            // From: '<text>Text <tspan> text</tspan></text>'
260            // To:   '<text>Text <tspan>text</tspan></text>'
261            //
262            // See text-tspan-02-b.svg for details.
263            if depth1 < depth2 {
264                if c3 == Some(b' ') {
265                    if xmlspace2 == XmlSpace::Default {
266                        if let NodeKind::Text(ref mut text) = doc.nodes[node2_id.get_usize()].kind {
267                            text.remove_first_space();
268                        }
269                    }
270                }
271            } else {
272                if c2 == Some(b' ') && c2 == c3 {
273                    if xmlspace1 == XmlSpace::Default && xmlspace2 == XmlSpace::Default {
274                        if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind {
275                            text.remove_last_space();
276                        }
277                    } else {
278                        if xmlspace1 == XmlSpace::Preserve && xmlspace2 == XmlSpace::Default {
279                            if let NodeKind::Text(ref mut text) =
280                                doc.nodes[node2_id.get_usize()].kind
281                            {
282                                text.remove_first_space();
283                            }
284                        }
285                    }
286                }
287            }
288
289            let is_first = i == 0;
290            let is_last = i == len - 1;
291
292            if is_first
293                && c1 == Some(b' ')
294                && xmlspace1 == XmlSpace::Default
295                && !doc.get(node1_id).text().is_empty()
296            {
297                // Remove a leading space from a first text node.
298                if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind {
299                    text.remove_first_space();
300                }
301            } else if is_last
302                && c4 == Some(b' ')
303                && !doc.get(node2_id).text().is_empty()
304                && xmlspace2 == XmlSpace::Default
305            {
306                // Remove a trailing space from a last text node.
307                // Also check that 'text2' is not empty already.
308                if let NodeKind::Text(ref mut text) = doc.nodes[node2_id.get_usize()].kind {
309                    text.remove_last_space();
310                }
311            }
312
313            if is_last
314                && c2 == Some(b' ')
315                && !doc.get(node1_id).text().is_empty()
316                && doc.get(node2_id).text().is_empty()
317                && doc.get(node1_id).text().ends_with(' ')
318            {
319                if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind {
320                    text.remove_last_space();
321                }
322            }
323
324            if !doc.get(node1_id).text().trim().is_empty() {
325                last_non_empty = Some(node1_id);
326            }
327
328            i += 1;
329        }
330    }
331
332    // TODO: find a way to remove all empty text nodes
333}
334
335fn collect_text_nodes(parent: SvgNode, depth: usize, nodes: &mut Vec<(NodeId, usize)>) {
336    for child in parent.children() {
337        if child.is_text() {
338            nodes.push((child.id, depth));
339        } else if child.is_element() {
340            collect_text_nodes(child, depth + 1, nodes);
341        }
342    }
343}
344
345fn trim_text(text: &str, space: XmlSpace) -> String {
346    let mut s = String::with_capacity(text.len());
347
348    let mut prev = '0';
349    for c in text.chars() {
350        // \r, \n and \t should be converted into spaces.
351        let c = match c {
352            '\r' | '\n' | '\t' => ' ',
353            _ => c,
354        };
355
356        // Skip continuous spaces.
357        if space == XmlSpace::Default && c == ' ' && c == prev {
358            continue;
359        }
360
361        prev = c;
362
363        s.push(c);
364    }
365
366    s
367}