idna/lib.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
// Copyright 2016 The rust-url developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! This Rust crate implements IDNA
//! [per the WHATWG URL Standard](https://url.spec.whatwg.org/#idna).
//!
//! It also exposes the underlying algorithms from [*Unicode IDNA Compatibility Processing*
//! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/)
//! and [Punycode (RFC 3492)](https://tools.ietf.org/html/rfc3492).
//!
//! Quoting from [UTS #46’s introduction](http://www.unicode.org/reports/tr46/#Introduction):
//!
//! > Initially, domain names were restricted to ASCII characters.
//! > A system was introduced in 2003 for internationalized domain names (IDN).
//! > This system is called Internationalizing Domain Names for Applications,
//! > or IDNA2003 for short.
//! > This mechanism supports IDNs by means of a client software transformation
//! > into a format known as Punycode.
//! > A revision of IDNA was approved in 2010 (IDNA2008).
//! > This revision has a number of incompatibilities with IDNA2003.
//! >
//! > The incompatibilities force implementers of client software,
//! > such as browsers and emailers,
//! > to face difficult choices during the transition period
//! > as registries shift from IDNA2003 to IDNA2008.
//! > This document specifies a mechanism
//! > that minimizes the impact of this transition for client software,
//! > allowing client software to access domains that are valid under either system.
#![no_std]
// For forwards compatibility
#[cfg(feature = "std")]
extern crate std;
extern crate alloc;
#[cfg(not(feature = "alloc"))]
compile_error!("the `alloc` feature must be enabled");
// Avoid a breaking change if in the future there's a use case for
// having a Bring-Your-Own-ICU4X-Data constructor for `Uts46` and
// not also having compiled data in the binary.
#[cfg(not(feature = "compiled_data"))]
compile_error!("the `compiled_data` feature must be enabled");
use alloc::borrow::Cow;
use alloc::string::String;
pub use uts46::AsciiDenyList;
use uts46::Uts46;
mod deprecated;
pub mod punycode;
pub mod uts46;
#[allow(deprecated)]
pub use crate::deprecated::{Config, Idna};
/// Type indicating that there were errors during UTS #46 processing.
#[derive(Default, Debug)]
#[non_exhaustive]
pub struct Errors {}
impl From<Errors> for Result<(), Errors> {
fn from(e: Errors) -> Result<(), Errors> {
Err(e)
}
}
#[cfg(feature = "std")]
impl std::error::Error for Errors {}
#[cfg(not(feature = "std"))]
impl core::error::Error for Errors {}
impl core::fmt::Display for Errors {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
core::fmt::Debug::fmt(self, f)
}
}
/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm;
/// version returning a `Cow`.
///
/// Most applications should be using this function rather than the sibling functions,
/// and most applications should pass [`AsciiDenyList::URL`] as the second argument.
/// Passing [`AsciiDenyList::URL`] as the second argument makes this function also
/// perform the [forbidden domain code point](https://url.spec.whatwg.org/#forbidden-domain-code-point)
/// check in addition to the [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii)
/// algorithm.
///
/// Returns the ASCII representation a domain name,
/// normalizing characters (upper-case to lower-case and other kinds of equivalence)
/// and using Punycode as necessary.
///
/// This process may fail.
///
/// If you have a `&str` instead of `&[u8]`, just call `.to_bytes()` on it before
/// passing it to this function. It's still preferable to use this function over
/// the sibling functions that take `&str`.
pub fn domain_to_ascii_cow(
domain: &[u8],
ascii_deny_list: AsciiDenyList,
) -> Result<Cow<'_, str>, Errors> {
Uts46::new().to_ascii(
domain,
ascii_deny_list,
uts46::Hyphens::Allow,
uts46::DnsLength::Ignore,
)
}
/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm;
/// version returning `String` and no ASCII deny list (i.e. _UseSTD3ASCIIRules=false_).
///
/// This function exists for backward-compatibility. Consider using [`domain_to_ascii_cow`]
/// instead.
///
/// Return the ASCII representation a domain name,
/// normalizing characters (upper-case to lower-case and other kinds of equivalence)
/// and using Punycode as necessary.
///
/// This process may fail.
pub fn domain_to_ascii(domain: &str) -> Result<String, Errors> {
domain_to_ascii_cow(domain.as_bytes(), AsciiDenyList::EMPTY).map(|cow| cow.into_owned())
}
/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm,
/// with the `beStrict` flag set.
///
/// Note that this rejects various real-world names including:
/// * YouTube CDN nodes
/// * Some GitHub user pages
/// * Pseudo-hosts used by various TXT record-based protocols.
pub fn domain_to_ascii_strict(domain: &str) -> Result<String, Errors> {
Uts46::new()
.to_ascii(
domain.as_bytes(),
uts46::AsciiDenyList::STD3,
uts46::Hyphens::Check,
uts46::DnsLength::Verify,
)
.map(|cow| cow.into_owned())
}
/// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm;
/// version returning `String` and no ASCII deny list (i.e. _UseSTD3ASCIIRules=false_).
///
/// This function exists for backward-compatibility. Consider using [`Uts46::to_user_interface`]
/// or [`Uts46::to_unicode`].
///
/// Return the Unicode representation of a domain name,
/// normalizing characters (upper-case to lower-case and other kinds of equivalence)
/// and decoding Punycode as necessary.
///
/// If the second item of the tuple indicates an error, the first item of the tuple
/// denotes errors using the REPLACEMENT CHARACTERs in order to be able to illustrate
/// errors to the user. When the second item of the return tuple signals an error,
/// the first item of the tuple must not be used in a network protocol.
pub fn domain_to_unicode(domain: &str) -> (String, Result<(), Errors>) {
let (cow, result) = Uts46::new().to_unicode(
domain.as_bytes(),
uts46::AsciiDenyList::EMPTY,
uts46::Hyphens::Allow,
);
(cow.into_owned(), result)
}