idna/
deprecated.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
// Copyright 2013-2014 The rust-url developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

//! Deprecated API for [*Unicode IDNA Compatibility Processing*
//! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/)

#![allow(deprecated)]

use alloc::borrow::Cow;
use alloc::string::String;

use crate::uts46::*;
use crate::Errors;

/// Performs preprocessing equivalent to UTS 46 transitional processing
/// if `transitional` is `true`. If `transitional` is `false`, merely
/// lets the input pass through as-is (for call site convenience).
///
/// The output of this function is to be passed to [`Uts46::process`].
fn map_transitional(domain: &str, transitional: bool) -> Cow<'_, str> {
    if !transitional {
        return Cow::Borrowed(domain);
    }
    let mut chars = domain.chars();
    loop {
        let prev = chars.clone();
        if let Some(c) = chars.next() {
            match c {
                'ß' | 'ẞ' | 'ς' | '\u{200C}' | '\u{200D}' => {
                    let mut s = String::with_capacity(domain.len());
                    let tail = prev.as_str();
                    let head = &domain[..domain.len() - tail.len()];
                    s.push_str(head);
                    for c in tail.chars() {
                        match c {
                            'ß' | 'ẞ' => {
                                s.push_str("ss");
                            }
                            'ς' => {
                                s.push('σ');
                            }
                            '\u{200C}' | '\u{200D}' => {}
                            _ => {
                                s.push(c);
                            }
                        }
                    }
                    return Cow::Owned(s);
                }
                _ => {}
            }
        } else {
            break;
        }
    }
    Cow::Borrowed(domain)
}

/// Deprecated. Use the crate-top-level functions or [`Uts46`].
#[derive(Default)]
#[deprecated]
pub struct Idna {
    config: Config,
}

impl Idna {
    pub fn new(config: Config) -> Self {
        Self { config }
    }

    /// [UTS 46 ToASCII](http://www.unicode.org/reports/tr46/#ToASCII)
    #[allow(clippy::wrong_self_convention)] // Retain old weirdness in deprecated API
    pub fn to_ascii(&mut self, domain: &str, out: &mut String) -> Result<(), Errors> {
        let mapped = map_transitional(domain, self.config.transitional_processing);
        match Uts46::new().process(
            mapped.as_bytes(),
            self.config.deny_list(),
            self.config.hyphens(),
            ErrorPolicy::FailFast, // Old code did not appear to expect the output to be useful in the error case.
            |_, _, _| false,
            out,
            None,
        ) {
            Ok(ProcessingSuccess::Passthrough) => {
                if self.config.verify_dns_length && !verify_dns_length(&mapped, true) {
                    return Err(crate::Errors::default());
                }
                out.push_str(&mapped);
                Ok(())
            }
            Ok(ProcessingSuccess::WroteToSink) => {
                if self.config.verify_dns_length && !verify_dns_length(out, true) {
                    return Err(crate::Errors::default());
                }
                Ok(())
            }
            Err(ProcessingError::ValidityError) => Err(crate::Errors::default()),
            Err(ProcessingError::SinkError) => unreachable!(),
        }
    }

    /// [UTS 46 ToUnicode](http://www.unicode.org/reports/tr46/#ToUnicode)
    #[allow(clippy::wrong_self_convention)] // Retain old weirdness in deprecated API
    pub fn to_unicode(&mut self, domain: &str, out: &mut String) -> Result<(), Errors> {
        let mapped = map_transitional(domain, self.config.transitional_processing);
        match Uts46::new().process(
            mapped.as_bytes(),
            self.config.deny_list(),
            self.config.hyphens(),
            ErrorPolicy::MarkErrors,
            |_, _, _| true,
            out,
            None,
        ) {
            Ok(ProcessingSuccess::Passthrough) => {
                out.push_str(&mapped);
                Ok(())
            }
            Ok(ProcessingSuccess::WroteToSink) => Ok(()),
            Err(ProcessingError::ValidityError) => Err(crate::Errors::default()),
            Err(ProcessingError::SinkError) => unreachable!(),
        }
    }
}

/// Deprecated configuration API.
#[derive(Clone, Copy)]
#[must_use]
#[deprecated]
pub struct Config {
    use_std3_ascii_rules: bool,
    transitional_processing: bool,
    verify_dns_length: bool,
    check_hyphens: bool,
}

/// The defaults are that of _beStrict=false_ in the [WHATWG URL Standard](https://url.spec.whatwg.org/#idna)
impl Default for Config {
    fn default() -> Self {
        Config {
            use_std3_ascii_rules: false,
            transitional_processing: false,
            check_hyphens: false,
            // Only use for to_ascii, not to_unicode
            verify_dns_length: false,
        }
    }
}

impl Config {
    /// Whether to enforce STD3 or WHATWG URL Standard ASCII deny list.
    ///
    /// `true` for STD3, `false` for no deny list.
    ///
    /// Note that `true` rejects pseudo-hosts used by various TXT record-based protocols.
    #[inline]
    pub fn use_std3_ascii_rules(mut self, value: bool) -> Self {
        self.use_std3_ascii_rules = value;
        self
    }

    /// Whether to enable (deprecated) transitional processing.
    ///
    /// Note that Firefox, Safari, and Chrome do not use transitional
    /// processing.
    #[inline]
    pub fn transitional_processing(mut self, value: bool) -> Self {
        self.transitional_processing = value;
        self
    }

    /// Whether the _VerifyDNSLength_ operation should be performed
    /// by `to_ascii`.
    ///
    /// For compatibility with previous behavior, even when set to `true`,
    /// the trailing root label dot is allowed contrary to the spec.
    #[inline]
    pub fn verify_dns_length(mut self, value: bool) -> Self {
        self.verify_dns_length = value;
        self
    }

    /// Whether to enforce STD3 rules for hyphen placement.
    ///
    /// `true` to deny hyphens in the first and last positions.
    /// `false` to not enforce hyphen placement.
    ///
    /// Note that for backward compatibility this is not the same as
    /// UTS 46 _CheckHyphens_, which also disallows hyphens in the
    /// third and fourth positions.
    ///
    /// Note that `true` rejects real-world names, including some GitHub user pages.
    #[inline]
    pub fn check_hyphens(mut self, value: bool) -> Self {
        self.check_hyphens = value;
        self
    }

    /// Obsolete method retained to ease migration. The argument must be `false`.
    ///
    /// Panics
    ///
    /// If the argument is `true`.
    #[inline]
    #[allow(unused_mut)]
    pub fn use_idna_2008_rules(mut self, value: bool) -> Self {
        assert!(!value, "IDNA 2008 rules are no longer supported");
        self
    }

    /// Compute the deny list
    fn deny_list(&self) -> AsciiDenyList {
        if self.use_std3_ascii_rules {
            AsciiDenyList::STD3
        } else {
            AsciiDenyList::EMPTY
        }
    }

    /// Compute the hyphen mode
    fn hyphens(&self) -> Hyphens {
        if self.check_hyphens {
            Hyphens::CheckFirstLast
        } else {
            Hyphens::Allow
        }
    }

    /// [UTS 46 ToASCII](http://www.unicode.org/reports/tr46/#ToASCII)
    pub fn to_ascii(self, domain: &str) -> Result<String, Errors> {
        let mut result = String::with_capacity(domain.len());
        let mut codec = Idna::new(self);
        codec.to_ascii(domain, &mut result).map(|()| result)
    }

    /// [UTS 46 ToUnicode](http://www.unicode.org/reports/tr46/#ToUnicode)
    pub fn to_unicode(self, domain: &str) -> (String, Result<(), Errors>) {
        let mut codec = Idna::new(self);
        let mut out = String::with_capacity(domain.len());
        let result = codec.to_unicode(domain, &mut out);
        (out, result)
    }
}