sys_locale/unix.rs
1use std::{env, ffi::OsStr};
2
3const LANGUAGE: &str = "LANGUAGE";
4const LC_ALL: &str = "LC_ALL";
5const LC_MESSAGES: &str = "LC_MESSAGES";
6const LANG: &str = "LANG";
7
8/// Environment variable access abstraction to allow testing without
9/// mutating env variables.
10///
11/// Use [StdEnv] to query [std::env]
12trait EnvAccess {
13 /// See also [std::env::var]
14 fn get(&self, key: impl AsRef<OsStr>) -> Option<String>;
15}
16
17/// Proxy to [std::env]
18struct StdEnv;
19impl EnvAccess for StdEnv {
20 fn get(&self, key: impl AsRef<OsStr>) -> Option<String> {
21 env::var(key).ok()
22 }
23}
24
25pub(crate) fn get() -> impl Iterator<Item = String> {
26 _get(&StdEnv)
27}
28
29/// Retrieves a list of unique locales by checking specific environment variables
30/// in a predefined order: LANGUAGE, LC_ALL, LC_MESSAGES, and LANG.
31///
32/// The function first checks the `LANGUAGE` environment variable, which can contain
33/// one or more locales separated by a colon (`:`). It then splits these values,
34/// converts them from [POSIX](https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/V1_chap08.html)
35/// to [BCP 47](https://www.ietf.org/rfc/bcp/bcp47.html) format, and adds them to the list of locales
36/// if they are not already included.
37///
38/// Next, the function checks the `LC_ALL`, `LC_MESSAGES`, and `LANG` environment
39/// variables. Each of these variables contains a single locale. If a locale is found,
40/// and it's not empty, it is converted to BCP 47 format and added to the list if
41/// it is not already included.
42///
43/// For more information check this issue: https://github.com/1Password/sys-locale/issues/14.
44///
45/// The function ensures that locales are returned in the order of precedence
46/// and without duplicates. The final list of locales is returned as an iterator.
47///
48/// # Returns
49///
50/// An iterator over the unique locales found in the environment variables.
51///
52/// # Environment Variables Checked
53///
54/// 1. `LANGUAGE` - Can contain multiple locales, each separated by a colon (`:`), highest priority.
55/// 2. `LC_ALL` - Contains a single locale, high priority.
56/// 3. `LC_MESSAGES` - Contains a single locale, medium priority.
57/// 4. `LANG` - Contains a single locale, low priority.
58///
59/// # Example
60///
61/// ```ignore
62/// let locales: Vec<String> = _get(&env).collect();
63/// for locale in locales {
64/// println!("User's preferred locales: {}", locale);
65/// }
66/// ```
67fn _get(env: &impl EnvAccess) -> impl Iterator<Item = String> {
68 let mut locales = Vec::new();
69
70 // LANGUAGE contains one or multiple locales separated by colon (':')
71 if let Some(val) = env.get(LANGUAGE).filter(|val| !val.is_empty()) {
72 for part in val.split(':') {
73 let locale = posix_to_bcp47(part);
74 if !locales.contains(&locale) {
75 locales.push(locale);
76 }
77 }
78 }
79
80 // LC_ALL, LC_MESSAGES and LANG contain one locale
81 for variable in [LC_ALL, LC_MESSAGES, LANG] {
82 if let Some(val) = env.get(variable).filter(|val| !val.is_empty()) {
83 let locale = posix_to_bcp47(&val);
84 if !locales.contains(&locale) {
85 locales.push(locale);
86 }
87 }
88 }
89
90 locales.into_iter()
91}
92
93/// Converts a POSIX locale string to a BCP 47 locale string.
94///
95/// This function processes the input `code` by removing any character encoding
96/// (the part after the `.` character) and any modifiers (the part after the `@` character).
97/// It replaces underscores (`_`) with hyphens (`-`) to conform to BCP 47 formatting.
98///
99/// If the locale is already in the BCP 47 format, no changes are made.
100///
101/// Useful links:
102/// - [The Open Group Base Specifications Issue 8 - 7. Locale](https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/V1_chap07.html)
103/// - [The Open Group Base Specifications Issue 8 - 8. Environment Variables](https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/V1_chap08.html)
104/// - [BCP 47 specification](https://www.ietf.org/rfc/bcp/bcp47.html)
105///
106/// # Examples
107///
108/// ```ignore
109/// let bcp47 = posix_to_bcp47("en-US"); // already BCP 47
110/// assert_eq!(bcp47, "en-US"); // no changes
111///
112/// let bcp47 = posix_to_bcp47("en_US");
113/// assert_eq!(bcp47, "en-US");
114///
115/// let bcp47 = posix_to_bcp47("ru_RU.UTF-8");
116/// assert_eq!(bcp47, "ru-RU");
117///
118/// let bcp47 = posix_to_bcp47("fr_FR@dict");
119/// assert_eq!(bcp47, "fr-FR");
120///
121/// let bcp47 = posix_to_bcp47("de_DE.UTF-8@euro");
122/// assert_eq!(bcp47, "de-DE");
123/// ```
124///
125/// # TODO
126///
127/// 1. Implement POSIX to BCP 47 modifier conversion (see https://github.com/1Password/sys-locale/issues/32).
128/// 2. Optimize to avoid creating a new buffer (see https://github.com/1Password/sys-locale/pull/33).
129fn posix_to_bcp47(locale: &str) -> String {
130 locale
131 .chars()
132 .take_while(|&c| c != '.' && c != '@')
133 .map(|c| if c == '_' { '-' } else { c })
134 .collect()
135}
136
137#[cfg(test)]
138mod tests {
139 use super::{EnvAccess, _get, posix_to_bcp47, LANG, LANGUAGE, LC_ALL, LC_MESSAGES};
140 use std::{
141 collections::HashMap,
142 ffi::{OsStr, OsString},
143 };
144
145 type MockEnv = HashMap<OsString, String>;
146 impl EnvAccess for MockEnv {
147 fn get(&self, key: impl AsRef<OsStr>) -> Option<String> {
148 self.get(key.as_ref()).cloned()
149 }
150 }
151
152 const BCP_47: &str = "fr-FR";
153 const POSIX: &str = "fr_FR";
154 const POSIX_ENC: &str = "fr_FR.UTF-8";
155 const POSIX_MOD: &str = "fr_FR@euro";
156 const POSIX_ENC_MOD: &str = "fr_FR.UTF-8@euro";
157
158 #[test]
159 fn parse_identifier() {
160 assert_eq!(posix_to_bcp47(BCP_47), BCP_47);
161 assert_eq!(posix_to_bcp47(POSIX), BCP_47);
162 assert_eq!(posix_to_bcp47(POSIX_ENC), BCP_47);
163 assert_eq!(posix_to_bcp47(POSIX_MOD), BCP_47);
164 assert_eq!(posix_to_bcp47(POSIX_ENC_MOD), BCP_47);
165 }
166
167 #[test]
168 fn env_get() {
169 fn case(
170 env: &mut MockEnv,
171 language: impl Into<String>,
172 lc_all: impl Into<String>,
173 lc_messages: impl Into<String>,
174 lang: impl Into<String>,
175 expected: impl IntoIterator<Item = impl Into<String>>,
176 ) {
177 env.insert(LANGUAGE.into(), language.into());
178 env.insert(LC_ALL.into(), lc_all.into());
179 env.insert(LC_MESSAGES.into(), lc_messages.into());
180 env.insert(LANG.into(), lang.into());
181 assert!(_get(env).eq(expected.into_iter().map(|s| s.into())));
182 }
183
184 let mut env = MockEnv::new();
185 assert_eq!(_get(&env).next(), None);
186
187 // Empty
188 case(&mut env, "", "", "", "", &[] as &[String]);
189
190 // Constants
191 case(
192 &mut env,
193 POSIX_ENC_MOD,
194 POSIX_ENC,
195 POSIX_MOD,
196 POSIX,
197 [BCP_47],
198 );
199
200 // Only one variable
201 case(&mut env, "en_US", "", "", "", ["en-US"]);
202 case(&mut env, "", "en_US", "", "", ["en-US"]);
203 case(&mut env, "", "", "en_US", "", ["en-US"]);
204 case(&mut env, "", "", "", "en_US", ["en-US"]);
205
206 // Duplicates
207 case(&mut env, "en_US", "en_US", "en_US", "en_US", ["en-US"]);
208 case(
209 &mut env,
210 "en_US",
211 "en_US",
212 "ru_RU",
213 "en_US",
214 ["en-US", "ru-RU"],
215 );
216 case(
217 &mut env,
218 "en_US",
219 "ru_RU",
220 "ru_RU",
221 "en_US",
222 ["en-US", "ru-RU"],
223 );
224 case(
225 &mut env,
226 "en_US",
227 "es_ES",
228 "ru_RU",
229 "en_US",
230 ["en-US", "es-ES", "ru-RU"],
231 );
232 case(
233 &mut env,
234 "en_US:ru_RU:es_ES:en_US",
235 "es_ES",
236 "ru_RU",
237 "en_US",
238 ["en-US", "ru-RU", "es-ES"],
239 );
240
241 // Duplicates with different case
242 case(
243 &mut env,
244 "en_US:fr_fr",
245 "EN_US",
246 "fR_Fr",
247 "En_US",
248 ["en-US", "fr-fr", "EN-US", "fR-Fr", "En-US"],
249 );
250
251 // More complicated cases
252 case(
253 &mut env,
254 "ru_RU:ru:en_US:en",
255 "ru_RU.UTF-8",
256 "ru_RU.UTF-8",
257 "ru_RU.UTF-8",
258 ["ru-RU", "ru", "en-US", "en"],
259 );
260 case(
261 &mut env,
262 "fr_FR.UTF-8@euro:fr_FR.UTF-8:fr_FR:fr:en_US.UTF-8:en_US:en",
263 "es_ES.UTF-8@euro",
264 "fr_FR.UTF-8@euro",
265 "fr_FR.UTF-8@euro",
266 ["fr-FR", "fr", "en-US", "en", "es-ES"],
267 );
268 case(
269 &mut env,
270 "",
271 "es_ES.UTF-8@euro",
272 "fr_FR.UTF-8@euro",
273 "fr_FR.UTF-8@euro",
274 ["es-ES", "fr-FR"],
275 );
276 case(
277 &mut env,
278 "fr_FR@euro",
279 "fr_FR.UTF-8",
280 "en_US.UTF-8",
281 "en_US.UTF-8@dict",
282 ["fr-FR", "en-US"],
283 );
284
285 // Already BCP 47
286 case(&mut env, BCP_47, BCP_47, BCP_47, POSIX, [BCP_47]);
287 case(
288 &mut env,
289 "fr-FR",
290 "es-ES",
291 "de-DE",
292 "en-US",
293 ["fr-FR", "es-ES", "de-DE", "en-US"],
294 );
295 }
296}