From 9db5c0a836c65640ffa11397bcfb55db0ed69fc6 Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Mon, 22 Sep 2025 19:14:43 -0700 Subject: [PATCH 1/2] Refactor env_preferences to host_info --- Cargo.lock | 292 +++++++++- Cargo.toml | 2 +- utils/env_preferences/README.md | 14 - utils/env_preferences/src/apple.rs | 170 ------ utils/env_preferences/src/error.rs | 98 ---- utils/env_preferences/src/lib.rs | 90 --- utils/env_preferences/src/parse/aliases.rs | 52 -- utils/env_preferences/src/parse/apple.rs | 23 - utils/env_preferences/src/parse/mod.rs | 13 - utils/env_preferences/src/parse/posix.rs | 312 ----------- utils/env_preferences/src/parse/windows.rs | 83 --- utils/env_preferences/src/posix.rs | 119 ---- utils/env_preferences/src/windows.rs | 41 -- utils/env_preferences/tests/parse/mod.rs | 6 - utils/env_preferences/tests/parse/posix.rs | 298 ---------- utils/env_preferences/tests/parse/windows.rs | 60 -- utils/env_preferences/tests/test.rs | 218 -------- .../{env_preferences => host_info}/Cargo.toml | 31 +- utils/{env_preferences => host_info}/LICENSE | 0 utils/host_info/README.md | 288 ++++++++++ utils/host_info/examples/dt_format.rs | 24 + utils/host_info/examples/dt_format_locale.rs | 33 ++ utils/host_info/examples/get_data.rs | 31 + utils/host_info/src/backends/android.rs | 33 ++ utils/host_info/src/backends/linux.rs | 101 ++++ utils/host_info/src/backends/macos.rs | 519 +++++++++++++++++ utils/host_info/src/backends/mod.rs | 280 +++++++++ utils/host_info/src/backends/unavailable.rs | 14 + utils/host_info/src/backends/windows.rs | 182 ++++++ utils/host_info/src/error.rs | 64 +++ utils/host_info/src/host_info.rs | 243 ++++++++ utils/host_info/src/lib.rs | 301 ++++++++++ utils/host_info/src/locale/mod.rs | 14 + utils/host_info/src/locale/posix.rs | 529 ++++++++++++++++++ utils/host_info/src/locale/windows.rs | 161 ++++++ utils/host_info/src/posix.rs | 235 ++++++++ .../tests/datasets/mod.rs | 10 +- .../tests/datasets/posix.txt | 0 .../tests/datasets/windows.txt | 0 39 files changed, 3362 insertions(+), 1622 deletions(-) delete mode 100644 utils/env_preferences/README.md delete mode 100644 utils/env_preferences/src/apple.rs delete mode 100644 utils/env_preferences/src/error.rs delete mode 100644 utils/env_preferences/src/lib.rs delete mode 100644 utils/env_preferences/src/parse/aliases.rs delete mode 100644 utils/env_preferences/src/parse/apple.rs delete mode 100644 utils/env_preferences/src/parse/mod.rs delete mode 100644 utils/env_preferences/src/parse/posix.rs delete mode 100644 utils/env_preferences/src/parse/windows.rs delete mode 100644 utils/env_preferences/src/posix.rs delete mode 100644 utils/env_preferences/src/windows.rs delete mode 100644 utils/env_preferences/tests/parse/mod.rs delete mode 100644 utils/env_preferences/tests/parse/posix.rs delete mode 100644 utils/env_preferences/tests/parse/windows.rs delete mode 100644 utils/env_preferences/tests/test.rs rename utils/{env_preferences => host_info}/Cargo.toml (51%) rename utils/{env_preferences => host_info}/LICENSE (100%) create mode 100644 utils/host_info/README.md create mode 100644 utils/host_info/examples/dt_format.rs create mode 100644 utils/host_info/examples/dt_format_locale.rs create mode 100644 utils/host_info/examples/get_data.rs create mode 100644 utils/host_info/src/backends/android.rs create mode 100644 utils/host_info/src/backends/linux.rs create mode 100644 utils/host_info/src/backends/macos.rs create mode 100644 utils/host_info/src/backends/mod.rs create mode 100644 utils/host_info/src/backends/unavailable.rs create mode 100644 utils/host_info/src/backends/windows.rs create mode 100644 utils/host_info/src/error.rs create mode 100644 utils/host_info/src/host_info.rs create mode 100644 utils/host_info/src/lib.rs create mode 100644 utils/host_info/src/locale/mod.rs create mode 100644 utils/host_info/src/locale/posix.rs create mode 100644 utils/host_info/src/locale/windows.rs create mode 100644 utils/host_info/src/posix.rs rename utils/{env_preferences => host_info}/tests/datasets/mod.rs (82%) rename utils/{env_preferences => host_info}/tests/datasets/posix.txt (100%) rename utils/{env_preferences => host_info}/tests/datasets/windows.txt (100%) diff --git a/Cargo.lock b/Cargo.lock index 8153242fa04..8263648e4a5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -384,6 +384,16 @@ dependencies = [ "shlex", ] +[[package]] +name = "cfg-expr" +version = "0.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a2c5f3bf25ec225351aa1c8e230d04d880d3bd89dea133537dafad4ae291e5c" +dependencies = [ + "smallvec", + "target-lexicon", +] + [[package]] name = "cfg-if" version = "1.0.4" @@ -522,6 +532,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -859,18 +879,6 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" -[[package]] -name = "env_preferences" -version = "0.1.0" -dependencies = [ - "core-foundation-sys", - "displaydoc", - "icu_locale_core", - "libc", - "windows", - "windows-core 0.60.1", -] - [[package]] name = "equivalent" version = "1.0.2" @@ -966,6 +974,69 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-core", + "futures-macro", + "futures-task", + "pin-project-lite", + "pin-utils", + "slab", +] + [[package]] name = "getopts" version = "0.2.24" @@ -1006,6 +1077,91 @@ version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" +[[package]] +name = "gio" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed68efc12b748a771be2dccc49480d8584004382967c98323245fc3c38b74a42" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "gio-sys", + "glib", + "libc", + "pin-project-lite", + "smallvec", +] + +[[package]] +name = "gio-sys" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "171ed2f6dd927abbe108cfd9eebff2052c335013f5879d55bab0dc1dee19b706" +dependencies = [ + "glib-sys", + "gobject-sys", + "libc", + "system-deps", + "windows-sys 0.59.0", +] + +[[package]] +name = "glib" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1f2cbc4577536c849335878552f42086bfd25a8dcd6f54a18655cf818b20c8f" +dependencies = [ + "bitflags", + "futures-channel", + "futures-core", + "futures-executor", + "futures-task", + "futures-util", + "gio-sys", + "glib-macros", + "glib-sys", + "gobject-sys", + "libc", + "memchr", + "smallvec", +] + +[[package]] +name = "glib-macros" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55eda916eecdae426d78d274a17b48137acdca6fba89621bd3705f2835bc719f" +dependencies = [ + "heck 0.5.0", + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.106", +] + +[[package]] +name = "glib-sys" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d09d3d0fddf7239521674e57b0465dfbd844632fec54f059f7f56112e3f927e1" +dependencies = [ + "libc", + "system-deps", +] + +[[package]] +name = "gobject-sys" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "538e41d8776173ec107e7b0f2aceced60abc368d7e1d81c1f0e2ecd35f59080d" +dependencies = [ + "glib-sys", + "libc", + "system-deps", +] + [[package]] name = "half" version = "2.4.1" @@ -1463,6 +1619,23 @@ dependencies = [ "icu_provider", ] +[[package]] +name = "icu_host_info" +version = "0.1.0" +dependencies = [ + "core-foundation", + "core-foundation-sys", + "displaydoc", + "gio", + "icu", + "icu_calendar", + "icu_datetime", + "icu_locale_core", + "icu_time", + "libc", + "windows", +] + [[package]] name = "icu_list" version = "2.0.0" @@ -2305,6 +2478,24 @@ dependencies = [ "siphasher", ] +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + [[package]] name = "plotters" version = "0.3.7" @@ -2387,6 +2578,15 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "proc-macro-crate" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" +dependencies = [ + "toml_edit 0.23.5", +] + [[package]] name = "proc-macro2" version = "1.0.103" @@ -2920,6 +3120,12 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" +[[package]] +name = "slab" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" + [[package]] name = "smallvec" version = "1.15.1" @@ -2984,6 +3190,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", + "rustversion", "syn 2.0.108", ] @@ -3036,6 +3243,19 @@ dependencies = [ "syn 2.0.108", ] +[[package]] +name = "system-deps" +version = "7.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "550b2c61a9c30b85ca1f6ef0afcd2befcb12e73b1d31ef0526423bc7b6a99d7f" +dependencies = [ + "cfg-expr", + "heck 0.5.0", + "pkg-config", + "toml", + "version-compare", +] + [[package]] name = "tap" version = "1.0.1" @@ -3052,6 +3272,12 @@ dependencies = [ "libc", ] +[[package]] +name = "target-lexicon" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a" + [[package]] name = "thiserror" version = "2.0.17" @@ -3159,8 +3385,8 @@ checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" dependencies = [ "serde", "serde_spanned", - "toml_datetime", - "toml_edit", + "toml_datetime 0.6.9", + "toml_edit 0.22.26", ] [[package]] @@ -3172,6 +3398,15 @@ dependencies = [ "serde", ] +[[package]] +name = "toml_datetime" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a197c0ec7d131bfc6f7e82c8442ba1595aeab35da7adbf05b6b73cd06a16b6be" +dependencies = [ + "serde_core", +] + [[package]] name = "toml_edit" version = "0.22.27" @@ -3181,11 +3416,32 @@ dependencies = [ "indexmap", "serde", "serde_spanned", - "toml_datetime", + "toml_datetime 0.6.9", "toml_write", "winnow", ] +[[package]] +name = "toml_edit" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2ad0b7ae9cfeef5605163839cb9221f453399f15cfb5c10be9885fcf56611f9" +dependencies = [ + "indexmap", + "toml_datetime 0.7.1", + "toml_parser", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b551886f449aa90d4fe2bdaa9f4a2577ad2dde302c61ecf262d80b116db95c10" +dependencies = [ + "winnow", +] + [[package]] name = "toml_write" version = "0.1.2" @@ -3330,6 +3586,12 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77439c1b53d2303b20d9459b1ade71a83c716e3f9c34f3228c00e6f185d6c002" +[[package]] +name = "version-compare" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852e951cb7832cb45cb1169900d19760cfa39b82bc0ea9c0e5a14ae88411c98b" + [[package]] name = "version_check" version = "0.9.5" diff --git a/Cargo.toml b/Cargo.toml index 8493198ea22..d218658e4ee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -70,7 +70,7 @@ members = [ "utils/ixdtf", "utils/litemap", "utils/resb", - "utils/env_preferences", + "utils/host_info", "utils/tinystr", "utils/tzif", "utils/potential_utf", diff --git a/utils/env_preferences/README.md b/utils/env_preferences/README.md deleted file mode 100644 index 5b932c9f374..00000000000 --- a/utils/env_preferences/README.md +++ /dev/null @@ -1,14 +0,0 @@ - - -# env_preferences - -`env_preferences` is a crate to retrieve system locale and preferences for -Apple, Linux & Windows systems. - -It provides functionality to fetch preferred locales from the user's operating -system and parse them lossily to an ICU4X [`Locale`](icu_locale_core::Locale). - -It also retrieves preferences for [`Calendar`](https://crates.io/crates/icu_calendar) -& [`TimeZone`](https://crates.io/crates/icu_time) - - diff --git a/utils/env_preferences/src/apple.rs b/utils/env_preferences/src/apple.rs deleted file mode 100644 index 597223d588b..00000000000 --- a/utils/env_preferences/src/apple.rs +++ /dev/null @@ -1,170 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -use core_foundation_sys::{ - array::{CFArrayGetCount, CFArrayGetValueAtIndex}, - base::{CFIndex, CFRelease, CFRetain}, - calendar::{CFCalendarCopyCurrent, CFCalendarCopyLocale, CFCalendarGetIdentifier}, - locale::{CFLocaleCopyPreferredLanguages, CFLocaleGetIdentifier}, - string::{ - kCFStringEncodingUTF8, CFStringGetCString, CFStringGetCStringPtr, CFStringGetLength, - CFStringRef, - }, - timezone, -}; -use libc::c_char; -use std::ffi::{CStr, CString}; - -use crate::RetrievalError; - -/// Helps to get string, it tries to get the string directly from the pointer itself, in case it is unable to retrieve -/// the string (c_str_ptr is NULL) a buffer is created of size `length + 1` and we perform manual allocations to get -/// the string -fn get_string(ptr: CFStringRef) -> Result { - // SAFETY: The call to `CFStringGetCStringPtr` because the reference of string we are accessing is not `NULL` - // Returns pointer in O(1) without any memory allocation. This can return NULL so we are handling it by directly - // copying it using `CFStringGetCString` - let c_str_ptr: *const c_char = unsafe { CFStringGetCStringPtr(ptr, kCFStringEncodingUTF8) }; - - if !c_str_ptr.is_null() { - // SAFETY: A valid `NULL` terminator is present which is a requirement of `from_ptr` - let lang_rust_str = unsafe { CStr::from_ptr(c_str_ptr) }.to_str()?; - Ok(lang_rust_str.to_string()) - } else { - // `c_str_ptr` is null, i.e. `CFStringGetCStringPtr` couldn't give desired output, trying with - // manual allocations - // SAFETY: It returns length of the string, from above conditional statement we ensure - // that the `lang_ptr` is not NULL thus making it safe to call - let length = unsafe { CFStringGetLength(ptr) as usize }; - - let mut c_str_buf: Vec = vec![0; length + 1]; - - // SAFETY: Safety is ensured by following points - // 1. `lang_ptr` is not NULL, checked through conditional statement - // 2. `c_str_buf` is large enough and in scope after this call - unsafe { - CFStringGetCString( - ptr, - c_str_buf.as_mut_ptr() as *mut c_char, - c_str_buf.len() as CFIndex, - kCFStringEncodingUTF8, - ); - } - - let c_string = CString::from_vec_with_nul(c_str_buf)?; - c_string - .into_string() - .map_err(|e| RetrievalError::ConversionError(e.utf8_error())) - } -} - -/// Retrieves system locales for Apple operating systems, in the order preferred by the -/// user, it consumes [`CFLocaleCopyPreferredLanguages`](https://developer.apple.com/documentation/corefoundation/1542887-cflocalecopypreferredlanguages) -/// to copy the languages preferred by the user. -pub fn get_raw_locales() -> Result, RetrievalError> { - let mut languages: Vec = Vec::new(); - - // SAFETY: The call to `CFLocaleCopyPreferredLanguages` returns an immutable reference to `CFArray` which is owned by us - // https://developer.apple.com/documentation/corefoundation/cfarrayref. It is ensured that `locale_carr_ref` is not mutated - // Immutablility ensures that nothing is overridden during it's scope - let locale_carr_ref = unsafe { CFLocaleCopyPreferredLanguages() }; - - if !locale_carr_ref.is_null() { - // SAFETY: The call to `CFArrayGetCount` is only made when is `locale_carr_ref` is not `NULL` - let count = unsafe { CFArrayGetCount(locale_carr_ref as _) }; - - for i in 0..count { - // SAFETY: The call to `CFArrayGetValueAtIndex` is safe because we are iterating from 0 to count (`CFArrayGetCount`) which - // in itself will always be greater than 0 and less than count ensuring we will not get "out of bounds" error - let lang_ptr = unsafe { CFArrayGetValueAtIndex(locale_carr_ref, i) }; - - if !lang_ptr.is_null() { - let locale_str = get_string(lang_ptr as CFStringRef)?; - languages.push(locale_str); - } else { - return Err(RetrievalError::NullLocale); - } - } - } else { - // No need to release memory for `locale_carr_ref` since it is NULL - return Err(RetrievalError::NullLocale); - } - // Release for memory - unsafe { - CFRelease(locale_carr_ref as _); - } - - Ok(languages) -} - -/// Gets the list calendar type and it's corresponding locale. It returns a Vec<(String, String)> -/// The first element is the locale of the calendar, second is the calendar identifier -pub fn get_system_calendars() -> Result, RetrievalError> { - let mut calendars = Vec::new(); - let calendar_locale_str: String; - let mut calendar_identifier_str = String::new(); - - // SAFETY: The call to `CFCalendarCopyCurrent` returns a calendar object owned by us - // This calendar object is used extract locale and type of calendar (identifier) - let calendar = unsafe { CFCalendarCopyCurrent() }; - - if !calendar.is_null() { - // SAFETY: Retaining the calendar object when not `NULL` - // It is released when all actions are completed - unsafe { CFRetain(calendar as _) }; - - // SAFETY: Retrieves `CFLocale` object for the calendar, the `if` statement ensures we don't - // pass in a `NULL` references - let locale = unsafe { CFCalendarCopyLocale(calendar as _) }; - - // SAFETY: Retrieves `CFString` (identifier) for the calendar, the `if` statement ensures - // we don't pass in a `NULL` references - let identifier = unsafe { CFCalendarGetIdentifier(calendar as _) }; - - if !locale.is_null() { - // SAFETY: Retain the locale object, released when we extracted the string - unsafe { CFRetain(locale as _) }; - - // SAFETY: Retrieves `CFString` (identifier) for the calendar, the `if` statement ensures - // we don't pass in a `NULL` reference - let locale_identifier = unsafe { CFLocaleGetIdentifier(locale) }; - calendar_locale_str = get_string(locale_identifier as CFStringRef)?; - - // SAFETY: Releases the locale object which was retained - unsafe { CFRelease(locale as _) }; - } else { - return Err(RetrievalError::NullLocale); - } - - if !identifier.is_null() { - calendar_identifier_str = get_string(identifier as CFStringRef)?; - } - // SAFETY: Release the calendar when done to avoid memory leaks - unsafe { CFRelease(calendar as _) }; - - calendars.push((calendar_locale_str, calendar_identifier_str)); - } else { - return Err(RetrievalError::NullCalendar); - } - - Ok(calendars) -} - -/// Get the current time zone of the system -pub fn get_system_time_zone() -> Result { - // SAFETY: Returns the time zone currently used by the system - // Returns an immutable reference to TimeZone object owned by us - let timezone = unsafe { timezone::CFTimeZoneCopySystem() }; - - if !timezone.is_null() { - // SAFETY: Extracts name of time zone from the TimeZone object, reference to timezone - // is guaranteed to be not NULL - let cf_string = unsafe { timezone::CFTimeZoneGetName(timezone) }; - - if !cf_string.is_null() { - return get_string(cf_string); - } - } - Err(RetrievalError::NullTimeZone) -} diff --git a/utils/env_preferences/src/error.rs b/utils/env_preferences/src/error.rs deleted file mode 100644 index ebae643a357..00000000000 --- a/utils/env_preferences/src/error.rs +++ /dev/null @@ -1,98 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -use displaydoc::Display; -use std::{ffi::FromVecWithNulError, str::Utf8Error}; - -use crate::parse::posix::PosixParseError; - -/// An error encountered while retrieving the system locale -#[derive(Debug, PartialEq)] -pub enum RetrievalError { - /// Error converting into `&CStr` to `&str` - ConversionError(Utf8Error), - - /// Error creating a `CString` from a buffer with a null terminator - FromVecWithNulError(FromVecWithNulError), - - /// Unable to retrieve the calendar - NullCalendar, - - /// Unable to retrieve the locale - NullLocale, - - /// Unable to retrieve TimeZone - NullTimeZone, - - /// UnknownCategory when retrieving locale for linux - #[cfg(any(doc, target_os = "linux"))] - UnknownCategory, - - /// Error handling for windows system - #[cfg(target_os = "windows")] - Windows(windows::core::Error), - - Other(String), -} - -#[cfg(target_os = "windows")] -impl From for RetrievalError { - fn from(input: windows::core::Error) -> Self { - Self::Windows(input) - } -} - -impl From for RetrievalError { - fn from(input: Utf8Error) -> Self { - Self::ConversionError(input) - } -} - -impl From for RetrievalError { - fn from(input: FromVecWithNulError) -> Self { - Self::FromVecWithNulError(input) - } -} - -/// An error encountered while either retrieving or parsing a system locale -#[derive(Display, Debug, PartialEq)] -pub enum ParseError { - #[displaydoc("Locale failed native parsing logic: {0}")] - Posix(PosixParseError), - #[displaydoc("Unable to parse ICU4X locale: {0}")] - Icu(icu_locale_core::ParseError), -} - -impl From for ParseError { - fn from(value: PosixParseError) -> Self { - Self::Posix(value) - } -} - -impl From for ParseError { - fn from(value: icu_locale_core::ParseError) -> Self { - Self::Icu(value) - } -} - -/// An error encountered while either retrieving or parsing a system locale -#[derive(Display, Debug)] -pub enum LocaleError { - #[displaydoc("Unable to retrieve locales: {0:?}")] - Retrieval(RetrievalError), - #[displaydoc("Unable to parse locale: {0}")] - Parse(ParseError), -} - -impl From for LocaleError { - fn from(value: RetrievalError) -> Self { - Self::Retrieval(value) - } -} - -impl From for LocaleError { - fn from(value: ParseError) -> Self { - Self::Parse(value) - } -} diff --git a/utils/env_preferences/src/lib.rs b/utils/env_preferences/src/lib.rs deleted file mode 100644 index 2d45c051d55..00000000000 --- a/utils/env_preferences/src/lib.rs +++ /dev/null @@ -1,90 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -//! # env_preferences -//! -//! `env_preferences` is a crate to retrieve system locale and preferences for -//! Apple, Linux & Windows systems. -//! -//! It provides functionality to fetch preferred locales from the user's operating -//! system and parse them lossily to an ICU4X [`Locale`](icu_locale_core::Locale). -//! -//! It also retrieves preferences for [`Calendar`](https://crates.io/crates/icu_calendar) -//! & [`TimeZone`](https://crates.io/crates/icu_time) - -mod error; -pub mod parse; - -pub use error::{LocaleError, ParseError, RetrievalError}; - -// doc -use core_foundation_sys as _; -#[cfg(target_os = "windows")] -use libc as _; - -#[cfg(any(doc, target_os = "macos"))] -pub mod apple; -#[cfg(any(doc, target_os = "linux"))] -pub mod posix; -#[cfg(any(doc, target_os = "windows"))] -pub mod windows; -#[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "windows")))] -compile_error!( - "Unsupported target OS. Supported operating systems are Apple, Linux & Windows as of now" -); - -#[cfg(target_os = "macos")] -use apple as system; -#[cfg(target_os = "linux")] -use posix as system; -#[cfg(target_os = "windows")] -use windows as system; - -#[cfg(target_os = "macos")] -use parse::apple::AppleLocale as SystemLocale; -#[cfg(target_os = "linux")] -use parse::posix::PosixLocale as SystemLocale; -#[cfg(target_os = "windows")] -use parse::windows::WindowsLocale as SystemLocale; - -/// List the user's available locales as the platform-provided [`String`]s, ordered by preference. -/// -///
-/// -/// The output of this function is platform-dependent and **is not guaranteed** to be a valid -/// BCP-47 identifier. To get a list of parsed locales, see [`get_locales_lossy()`]. -/// -///
-/// -/// Specific information can be found at the platform's implementation: -/// - [`apple::get_raw_locales()`] -/// - [`posix::get_raw_locales()`] -/// - [`windows::get_raw_locales()`] -pub fn get_raw_locales() -> Result, RetrievalError> { - system::get_raw_locales() -} - -/// List the user's available locales as ICU4X [`Locale`](icu_locale_core::Locale)s, ordered by preference. -/// -/// This performs a best-effort conversion that may lose some (or all!) data in certain cases. -/// For getting a list of raw system locales, see [`get_raw_locales()`]. -/// -/// Specific information can be found at the platform's implementation: -/// - [`parse::apple::AppleLocale`] -/// - [`parse::posix::PosixLocale`] -/// - [`parse::windows::WindowsLocale`] -pub fn get_locales_lossy() -> Result, LocaleError> { - let raw_locales = get_raw_locales()?; - let system_locales = raw_locales - .iter() - .map(String::as_str) - .map(SystemLocale::try_from_str) - .collect::, ParseError>>()?; - - system_locales - .iter() - .map(SystemLocale::try_convert_lossy) - .map(|result| result.map_err(LocaleError::from)) - .collect::, LocaleError>>() -} diff --git a/utils/env_preferences/src/parse/aliases.rs b/utils/env_preferences/src/parse/aliases.rs deleted file mode 100644 index 86880a7b1d7..00000000000 --- a/utils/env_preferences/src/parse/aliases.rs +++ /dev/null @@ -1,52 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -//! Platform-specific conversion from locale strings to BCP-47 identifiers. - -/// Strip any Windows "Sort Order Identifier" and return a matching CLDR collation value. -/// -/// The full table is available at: -/// -pub fn strip_windows_collation_suffix_lossy( - lcid: &str, -) -> (&str, Option) { - use icu_locale_core::extensions::unicode::value; - - // All known LCIDs containing an underscore are used for a collation suffix - if let Some((prefix, suffix)) = lcid.split_once('_') { - let collation_value = match suffix { - "phoneb" => value!("phonebk"), - "pronun" => value!("zhuyin"), - "radstr" => value!("unihan"), - "stroke" => value!("stroke"), - "tradnl" => value!("trad"), - // Strip the suffix on LCIDs with an underscore but no (known) matching CLDR data - _ => return (prefix, None), - }; - - // Return the LCID with the stripped prefix, and the matching CLDR collation key - (prefix, Some(collation_value)) - } else { - // No underscore found, return the LCID as-is - (lcid, None) - } -} - -/// Find a BCP-47 identifier from a list of known Windows aliases. -pub fn find_windows_language_alias_lossy( - lcid: &str, -) -> Option { - use icu_locale_core::langid; - - match lcid { - "zh-yue-HK" => Some(langid!("yue-HK")), - // LCID with no (known) matching CLDR data: "math alphanumeric sorting" - // This would be `x-IV_mathan`, but the collation suffix may already be stripped by - // `strip_windows_collation_suffix_lossy`. For some reason, `LocaleEnumProcEx` also uses - // `x-IV-mathan`, so that is included here too. - // https://learn.microsoft.com/en-us/windows/win32/api/winnls/nc-winnls-locale_enumprocex - "x-IV" | "x-IV_mathan" | "x-IV-mathan" => Some(langid!("und")), - _ => None, - } -} diff --git a/utils/env_preferences/src/parse/apple.rs b/utils/env_preferences/src/parse/apple.rs deleted file mode 100644 index 275fed332b9..00000000000 --- a/utils/env_preferences/src/parse/apple.rs +++ /dev/null @@ -1,23 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -use icu_locale_core::Locale; - -use crate::ParseError; - -pub struct AppleLocale<'src> { - src: &'src str, -} - -impl<'src> AppleLocale<'src> { - pub fn try_from_str(src: &'src str) -> Result { - Ok(Self { src }) - } - - pub fn try_convert_lossy(&self) -> Result { - let locale = Locale::try_from_str(self.src)?; - - Ok(locale) - } -} diff --git a/utils/env_preferences/src/parse/mod.rs b/utils/env_preferences/src/parse/mod.rs deleted file mode 100644 index 4e4a7a206b0..00000000000 --- a/utils/env_preferences/src/parse/mod.rs +++ /dev/null @@ -1,13 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -//! Parsing functionality for various popular operating systems - -// Re-export all alias functions -mod aliases; -pub use aliases::*; - -pub mod apple; -pub mod posix; -pub mod windows; diff --git a/utils/env_preferences/src/parse/posix.rs b/utils/env_preferences/src/parse/posix.rs deleted file mode 100644 index 01bb131ab23..00000000000 --- a/utils/env_preferences/src/parse/posix.rs +++ /dev/null @@ -1,312 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -//! Parsing functionality for POSIX locale identifiers. -//! For more information, see [`PosixLocale`]. -//! -//! # Usage example -//! ``` -//! # use icu_locale_core::locale; -//! # use env_preferences::parse::posix::PosixLocale; -//! # use env_preferences::LocaleError; -//! # fn main() -> Result<(), LocaleError> { -//! let posix_locale = PosixLocale::try_from_str("en_US")?; -//! assert_eq!(posix_locale.try_convert_lossy()?, locale!("en-US")); -//! # Ok(()) -//! # } -//! ``` - -use displaydoc::Display; -use icu_locale_core::extensions::unicode::{key, value}; -use icu_locale_core::extensions::Extensions; -use icu_locale_core::subtags::{language, script, variant, Language, Region, Variants}; -use icu_locale_core::{locale, LanguageIdentifier, Locale}; - -use crate::ParseError; - -#[derive(Display, Debug, PartialEq)] -/// An error while parsing a POSIX locale identifier -pub enum PosixParseError { - #[displaydoc("Empty locale")] - EmptyLocale, - #[displaydoc("Empty section beginning at offset {offset}")] - EmptySection { offset: usize }, - #[displaydoc("Invalid character at offset {offset}")] - InvalidCharacter { offset: usize }, - #[displaydoc("Invalid locale")] - InvalidLocale, - #[displaydoc("Delimiter repeated at offsets {first_offset} and {second_offset}")] - RepeatedDelimiter { - first_offset: usize, - second_offset: usize, - }, - #[displaydoc("Delimiters found out-of-order at offsets {first_offset} and {second_offset}")] - UnorderedDelimiter { - first_offset: usize, - second_offset: usize, - }, -} - -#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)] -enum Delimiter { - Territory, - Codeset, - Modifier, -} - -impl Delimiter { - /// Find any optional sections, returning an error if the delimiters are invalid - pub fn try_find_sections(src: &str) -> Result, PosixParseError> { - // Find the offset and delimiter of each optional section - let optional_sections = src - .chars() - .enumerate() - .flat_map(|(index, character)| match character { - '_' => Some((index, Self::Territory)), - '.' => Some((index, Self::Codeset)), - '@' => Some((index, Self::Modifier)), - _ => None, - }) - .collect::>(); - - // Find any errors in the arrangement of delimiters - for (index, (first_offset, first_delimiter)) in optional_sections.iter().enumerate() { - // Find any repeated delimiters - if let Some((second_offset, _second_delimiter)) = optional_sections - .iter() - // Check all delimiters past this index - .skip(index + 1) - .find(|(_second_offset, second_delimiter)| first_delimiter == second_delimiter) - { - return Err(PosixParseError::RepeatedDelimiter { - first_offset: *first_offset, - second_offset: *second_offset, - }); - } - - // Find any delimiters that have been invalidated by a delimiter that should appear after it - // For example "en.utf8_US" is invalid because codeset appears before territory - if let Some((second_offset, second_delimiter)) = optional_sections.get(index + 1) { - if first_delimiter > second_delimiter { - return Err(PosixParseError::UnorderedDelimiter { - first_offset: *first_offset, - second_offset: *second_offset, - }); - } - } - } - - Ok(optional_sections) - } -} - -#[derive(Debug)] -/// A parsed and validated POSIX locale identifier. -pub struct PosixLocale<'src> { - language: &'src str, - territory: Option<&'src str>, - codeset: Option<&'src str>, - // TODO: is it possible to have multiple modifiers? - modifier: Option<&'src str>, -} - -impl<'src> PosixLocale<'src> { - /// Attempt to parse a POSIX locale. - /// - /// Locales are expected to be in the format `language[_territory][.codeset][@modifier]`; - /// only the language section is mandatory, all other sections are optional. - /// For example: - /// - All sections: `en_US.utf8@euro` - /// - Only required sections: `en` - /// - /// See section 8.2 of the POSIX spec for more details: - /// - pub fn try_from_str(src: &'src str) -> Result { - // These cases are implementation-defined and can be ignored: - // - Empty locales - if src.is_empty() { - return Err(ParseError::Posix(PosixParseError::EmptyLocale)); - } - // - Any locale containing '/' - if let Some(offset) = src.find('/') { - return Err(ParseError::Posix(PosixParseError::InvalidCharacter { - offset, - })); - } - // - Locales consisting of "." or ".." - if src == "." || src == ".." { - return Err(ParseError::Posix(PosixParseError::InvalidLocale)); - } - - // Find any optional sections, and return any delimiter-related errors - let optional_sections = Delimiter::try_find_sections(src)?; - - // The language field continues until the start of the first optional section, if one exists - let language = match optional_sections.first() { - Some((offset, _delimiter)) => &src[..*offset], - None => src, - }; - - // Make sure the language itself is non-empty - if language.is_empty() { - return Err(ParseError::Posix(PosixParseError::EmptySection { - offset: 0, - })); - } - - let mut locale = Self { - language, - territory: None, - codeset: None, - modifier: None, - }; - - for (index, (start_offset, delimiter)) in optional_sections.iter().enumerate() { - // Find the offset of the next section, or end of the string if none exist - let end_offset = optional_sections - .get(index + 1) - .map(|(next_offset, _next_delimiter)| *next_offset) - .unwrap_or(src.len()); - - // Make sure this section is non-empty (more characters than just the delimiter) - if start_offset + 1 >= end_offset { - return Err(ParseError::Posix(PosixParseError::EmptySection { - offset: *start_offset, - })); - } - - // Write the section to the appropriate field - let section_value = Some(&src[start_offset + 1..end_offset]); - match delimiter { - Delimiter::Territory => locale.territory = section_value, - Delimiter::Codeset => locale.codeset = section_value, - Delimiter::Modifier => locale.modifier = section_value, - } - } - - Ok(locale) - } - - /// Attempt to convert a POSIX locale into a valid BCP-47 identifier. - /// - /// This is a best-effort conversion process, and there are valid - /// POSIX locales that will return an error or silently ignore data. - /// In particular, the codeset section is always ignored, and only some common modifiers are handled - /// (unknown modifiers will be silently ignored). - /// - /// # Examples - /// - /// ## Parsing behaviour - /// ``` - /// # use icu_locale_core::locale; - /// # use env_preferences::parse::posix::PosixLocale; - /// # use env_preferences::LocaleError; - /// # fn main() -> Result<(), LocaleError> { - /// // Locales will always include the `posix` variant - /// assert_eq!( - /// PosixLocale::try_from_str("en_US")?.try_convert_lossy()?, - /// locale!("en-US") - /// ); - /// // The codeset field will be ignored - /// assert_eq!( - /// PosixLocale::try_from_str("en_US.iso88591")?.try_convert_lossy()?, - /// locale!("en-US") - /// ); - /// // Any unknown modifiers will be ignored - /// assert_eq!( - /// PosixLocale::try_from_str("en_US@unknown")?.try_convert_lossy()?, - /// locale!("en-US") - /// ); - /// # Ok(()) - /// # } - /// ``` - /// - /// ## Edge cases - /// ``` - /// # use icu_locale_core::locale; - /// # use env_preferences::parse::posix::PosixLocale; - /// # use env_preferences::LocaleError; - /// # fn main() -> Result<(), LocaleError> { - /// // The default "C"/"POSIX" locale will be converted to "en-US-posix" - /// assert_eq!( - /// PosixLocale::try_from_str("C")?.try_convert_lossy()?, - /// locale!("en-US-posix") - /// ); - /// assert_eq!( - /// PosixLocale::try_from_str("POSIX")?.try_convert_lossy()?, - /// locale!("en-US-posix") - /// ); - /// - /// // Known script modifiers will be converted to the matching CLDR keys - /// assert_eq!( - /// PosixLocale::try_from_str("uz_UZ@cyrillic")?.try_convert_lossy()?, - /// locale!("uz-Cyrl-UZ") - /// ); - /// assert_eq!( - /// PosixLocale::try_from_str("ks_IN@devanagari")?.try_convert_lossy()?, - /// locale!("ks-Deva-IN") - /// ); - /// assert_eq!( - /// PosixLocale::try_from_str("be_BY@latin")?.try_convert_lossy()?, - /// locale!("be-Latn-BY") - /// ); - /// - /// // Other known modifiers are handled accordingly - /// assert_eq!( - /// PosixLocale::try_from_str("en_US@euro")?.try_convert_lossy()?, - /// locale!("en-US-u-cu-eur") - /// ); - /// assert_eq!( - /// PosixLocale::try_from_str("aa_ER@saaho")?.try_convert_lossy()?, - /// locale!("ssy-ER") - /// ); - /// # Ok(()) - /// # } - /// ``` - pub fn try_convert_lossy(&self) -> Result { - // The default "C"/"POSIX" locale should map to "en-US-posix", - // which is the default behaviour in ICU4C: - // https://github.com/unicode-org/icu/blob/795d7ac82c4b29cf721d0ad62c0b178347d453bf/icu4c/source/common/putil.cpp#L1738 - if self.language == "C" || self.language == "POSIX" { - return Ok(locale!("en-US-posix")); - } - - let mut extensions = Extensions::new(); - let mut script = None; - let mut variant = None; - - // Parse the language/region - let mut language = Language::try_from_str(self.language)?; - let region = self.territory.map(Region::try_from_str).transpose()?; - - if let Some(modifier) = self.modifier { - match modifier.to_ascii_lowercase().as_str() { - "euro" => { - extensions.unicode.keywords.set(key!("cu"), value!("eur")); - } - // Known script modifiers - "cyrillic" => script = Some(script!("Cyrl")), - "devanagari" => script = Some(script!("Deva")), - "latin" => script = Some(script!("Latn")), - // Saaho seems to be the only "legacy variant" that appears as a modifier: - // https://www.unicode.org/reports/tr35/#table-legacy-variant-mappings - "saaho" => language = language!("ssy"), - "valencia" => variant = Some(variant!("valencia")), - // Some modifiers are known but can't be expressed as a BCP-47 identifier - // e.g. "@abegede", "@iqtelif" - _ => (), - } - } - - Ok(Locale { - id: LanguageIdentifier { - language, - region, - script, - variants: variant.map_or_else(Variants::new, Variants::from_variant), - }, - extensions, - }) - } -} diff --git a/utils/env_preferences/src/parse/windows.rs b/utils/env_preferences/src/parse/windows.rs deleted file mode 100644 index 4569bb8fafa..00000000000 --- a/utils/env_preferences/src/parse/windows.rs +++ /dev/null @@ -1,83 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -//! Parsing functionality for Windows LCIDs. -//! For more information, see [`WindowsLocale`]. -//! -//! # Usage example -//! ``` -//! # use icu_locale_core::locale; -//! # use env_preferences::parse::windows::WindowsLocale; -//! # use env_preferences::LocaleError; -//! # fn main() -> Result<(), LocaleError> { -//! let windows_locale = WindowsLocale::try_from_str("en-US")?; -//! assert_eq!(windows_locale.try_convert_lossy()?, locale!("en-US")); -//! # Ok(()) -//! # } -//! ``` - -use icu_locale_core::extensions::unicode::{key, Keywords, Unicode}; -use icu_locale_core::extensions::Extensions; -use icu_locale_core::{LanguageIdentifier, Locale}; - -use super::aliases::{find_windows_language_alias_lossy, strip_windows_collation_suffix_lossy}; -use crate::ParseError; - -pub struct WindowsLocale<'src> { - src: &'src str, -} - -impl<'src> WindowsLocale<'src> { - pub fn try_from_str(src: &'src str) -> Result { - Ok(Self { src }) - } - - /// ## Edge cases - /// ``` - /// # use icu_locale_core::locale; - /// # use env_preferences::parse::windows::WindowsLocale; - /// # use env_preferences::LocaleError; - /// # fn main() -> Result<(), LocaleError> { - /// // Known invalid values are converted to a matching BCP-47 identifier - /// assert_eq!( - /// WindowsLocale::try_from_str("zh-yue-HK")?.try_convert_lossy()?, - /// locale!("yue-HK") - /// ); - /// - /// // Known collation suffixes and converted to `-u-co-VALUE` extension syntax - /// assert_eq!( - /// WindowsLocale::try_from_str("de-DE_phoneb")?.try_convert_lossy()?, - /// locale!("de-DE-u-co-phonebk") - /// ); - /// assert_eq!( - /// WindowsLocale::try_from_str("zh-TW_pronun")?.try_convert_lossy()?, - /// locale!("zh-TW-u-co-zhuyin") - /// ); - /// # Ok(()) - /// # } - /// ``` - pub fn try_convert_lossy(&self) -> Result { - let (lcid, collation_value) = strip_windows_collation_suffix_lossy(self.src); - let keywords = match collation_value { - // Add the -u-co-VALUE extension to the locale - Some(collation_value) => Keywords::new_single(key!("co"), collation_value), - // No collation value found, use default keywords - None => Keywords::new(), - }; - - // Use a matching alias if found - let language = match find_windows_language_alias_lossy(lcid) { - Some(locale) => locale, - None => LanguageIdentifier::try_from_str(lcid)?, - }; - - Ok(Locale { - id: language, - extensions: Extensions::from_unicode(Unicode { - keywords, - ..Unicode::new() - }), - }) - } -} diff --git a/utils/env_preferences/src/posix.rs b/utils/env_preferences/src/posix.rs deleted file mode 100644 index 4b6f7626af6..00000000000 --- a/utils/env_preferences/src/posix.rs +++ /dev/null @@ -1,119 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -use libc::{setlocale, LC_ALL, LC_TIME}; -use std::collections::HashMap; -use std::ffi::CStr; -use std::ptr; -use std::str::FromStr; - -use crate::RetrievalError; - -#[derive(Hash, Eq, PartialEq, Debug)] -pub enum LocaleCategory { - Character, - Number, - Time, - Collate, - Monetary, - Messages, - Paper, - Name, - Address, - Telephone, - Measurement, - Identification, - All, -} - -impl FromStr for LocaleCategory { - type Err = RetrievalError; - - fn from_str(s: &str) -> Result { - match s { - "LC_CTYPE" => Ok(Self::Character), - "LC_NUMERIC" => Ok(Self::Number), - "LC_TIME" => Ok(Self::Time), - "LC_COLLATE" => Ok(Self::Collate), - "LC_MONETARY" => Ok(Self::Monetary), - "LC_MESSAGES" => Ok(Self::Messages), - "LC_PAPER" => Ok(Self::Paper), - "LC_NAME" => Ok(Self::Name), - "LC_ADDRESS" => Ok(Self::Address), - "LC_TELEPHONE" => Ok(Self::Telephone), - "LC_MEASUREMENT" => Ok(Self::Measurement), - "LC_IDENTIFICATION" => Ok(Self::Identification), - "LC_ALL" => Ok(Self::All), - _ => Err(RetrievalError::UnknownCategory), - } - } -} - -/// Use [`get_raw_locale_categories`] to find a list of the user's preferred locales -pub fn get_raw_locales() -> Result, RetrievalError> { - let mut categories = get_raw_locale_categories()?; - let mut locales = Vec::with_capacity(categories.len()); - - // Add LC_ALL if it exists - if let Some(primary_locale) = categories.remove(&LocaleCategory::All) { - locales.push(primary_locale); - } - - // Add any remaining locales that were explicitly set - locales.extend(categories.into_values()); - Ok(locales) -} - -// TODO: Add a function to return all the locales POSIX categories explicitly - -/// Retrieves locale for `LC_ALL` POSIX category. Also returns other categories if any are explicitly -/// set in the thread -pub fn get_raw_locale_categories() -> Result, RetrievalError> { - let mut locale_map = HashMap::new(); - - // SAFETY: Safety is ensured because we pass a `NULL` pointer and retrieve the locale there is - // no subsequent calls for `setlocale` which could change the locale of this particular thread - let locales_ptr = unsafe { setlocale(LC_ALL, ptr::null()) }; - - if locales_ptr.is_null() { - return Err(RetrievalError::NullLocale); - } - - // SAFETY: A valid `NULL` terminator is present which is a requirement of `from_ptr` - let locales_str = unsafe { CStr::from_ptr(locales_ptr) }.to_str()?; - let locale_pairs = locales_str.split(';'); - for locale_pair in locale_pairs { - let mut parts = locale_pair.split('='); - if let Some(value) = parts.next() { - if let Some(key) = parts.next() { - if let Ok(category) = LocaleCategory::from_str(value) { - locale_map.insert(category, key.to_string()); - } - } else { - // Handle case where only a single locale - locale_map.insert(LocaleCategory::All, value.to_string()); - } - } - } - Ok(locale_map) -} - -/// Get the system calendar locale (LC_TIME). -/// -/// This only returns the calendar locale, `gnome-calendar` is the default calendar in linux -/// The locale returned is for `Gregorian` calendar -/// Related issue: `` -pub fn get_system_calendars() -> Result { - // SAFETY: Safety is ensured because we pass a `NULL` pointer and retrieve the locale there is - // no subsequent calls for `setlocale` which could change the locale of this particular thread - let locale_ptr = unsafe { setlocale(LC_TIME, ptr::null()) }; - - if !locale_ptr.is_null() { - // SAFETY: A valid `NULL` terminator is present which is a requirement of `from_ptr` - let rust_str = unsafe { CStr::from_ptr(locale_ptr) }.to_str()?; - let calendar_locale = rust_str.to_string(); - return Ok(calendar_locale); - } - Err(RetrievalError::NullLocale) -} diff --git a/utils/env_preferences/src/windows.rs b/utils/env_preferences/src/windows.rs deleted file mode 100644 index 6b76ae4df82..00000000000 --- a/utils/env_preferences/src/windows.rs +++ /dev/null @@ -1,41 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -use crate::RetrievalError; - -/// Retrieves languages preferred by the user , it consumes [`GlobalizationPreferences::Languages`](https://learn.microsoft.com/en-us/uwp/api/windows.system.userprofile.globalizationpreferences.languages?view=winrt-26100) -pub fn get_raw_locales() -> Result, RetrievalError> { - let mut locale_vec_str: Vec = Vec::new(); - let locale = windows::System::UserProfile::GlobalizationPreferences::Languages()?; - - for i in 0..locale.Size()? { - let hstring = locale.GetAt(i)?; - let string = hstring.to_string_lossy(); - locale_vec_str.push(string); - } - Ok(locale_vec_str) -} - -/// Gets the list calendar type and it's corresponding locale. It returns a Vec<(String, String)> -/// The first element is the locale of the calendar, second is the calendar identifier -pub fn get_system_calendars() -> Result, RetrievalError> { - let calendar = windows::Globalization::Calendar::new()?; - let system_calendar = windows::Globalization::Calendar::GetCalendarSystem(&calendar)?; - let calendar_type: String = system_calendar.to_string(); - let locale_list: Vec = get_raw_locales()?; - - let result: Vec<(String, String)> = locale_list - .into_iter() - .map(|locale| (locale, calendar_type.clone())) - .collect(); - - Ok(result) -} - -/// Get the current time zone of the system -pub fn get_system_time_zone() -> Result { - let calendar = windows::Globalization::Calendar::new()?; - let timezone = calendar.GetTimeZone()?; - Ok(timezone.to_string_lossy()) -} diff --git a/utils/env_preferences/tests/parse/mod.rs b/utils/env_preferences/tests/parse/mod.rs deleted file mode 100644 index 767795cfe78..00000000000 --- a/utils/env_preferences/tests/parse/mod.rs +++ /dev/null @@ -1,6 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -mod posix; -mod windows; diff --git a/utils/env_preferences/tests/parse/posix.rs b/utils/env_preferences/tests/parse/posix.rs deleted file mode 100644 index 842546b5af6..00000000000 --- a/utils/env_preferences/tests/parse/posix.rs +++ /dev/null @@ -1,298 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -use env_preferences::parse::posix::PosixLocale; -use icu_locale_core::Locale; - -fn expect_success(src: &str, expected: &str) { - let posix_locale = PosixLocale::try_from_str(src).expect(src); - let converted_locale = posix_locale.try_convert_lossy().expect(src); - - let expected_locale = Locale::try_from_str(expected).expect(src); - assert_eq!(converted_locale, expected_locale, "POSIX locale: `{src}`"); -} - -#[test] -fn default_locale() { - expect_success("C", "en-US-posix"); - expect_success("POSIX", "en-US-posix"); -} - -#[test] -fn region() { - expect_success("en_US", "en-US"); - expect_success("ne_NP", "ne-NP"); - expect_success("zh_TW", "zh-TW"); -} - -#[test] -fn codeset_ignored() { - expect_success("lv_LV.iso885913", "lv-LV"); - expect_success("hy_AM.armscii8", "hy-AM"); -} - -#[test] -fn modifier() { - // Currency - expect_success("it_IT@euro", "it-IT-u-cu-eur"); - - // Script - expect_success("uz_UZ@cyrillic", "uz-Cyrl-UZ"); - expect_success("sd_IN@devanagari", "sd-Deva-IN"); - expect_success("sr_RS@latin", "sr-Latn-RS"); - - // Language - expect_success("aa_ER@saaho", "ssy-ER"); - - // Variant - expect_success("ca_ES@valencia", "ca-ES-valencia"); -} - -mod error { - mod parse { - use env_preferences::parse::posix::{PosixLocale, PosixParseError}; - use env_preferences::ParseError; - - fn expect_error(src: &str, posix_error: PosixParseError) { - let result = PosixLocale::try_from_str(src); - let expected = ParseError::Posix(posix_error); - - match result { - Ok(invalid_locale) => { - panic!("Expected the error `{expected:?}`, got the locale `{invalid_locale:?}` from input of `{src}`") - } - Err(error) => { - assert_eq!(error, expected, "Comparing expected output of `{src}`") - } - } - } - - #[test] - fn empty_locale() { - expect_error("", PosixParseError::EmptyLocale); - } - - #[test] - fn empty_section() { - // Single, empty optional section - expect_error("en_", PosixParseError::EmptySection { offset: 2 }); - expect_error("en.", PosixParseError::EmptySection { offset: 2 }); - expect_error("en@", PosixParseError::EmptySection { offset: 2 }); - - // Multiple optional sections, one empty - expect_error("en_.utf8@euro", PosixParseError::EmptySection { offset: 2 }); - expect_error("en_US.@euro", PosixParseError::EmptySection { offset: 5 }); - expect_error("en_US.utf8@", PosixParseError::EmptySection { offset: 10 }); - - // Single delimiter (excluding "." as that should return `PosixParseError::InvalidLocale` instead) - expect_error("_", PosixParseError::EmptySection { offset: 0 }); - expect_error("@", PosixParseError::EmptySection { offset: 0 }); - - // All delimiters - expect_error("_.@", PosixParseError::EmptySection { offset: 0 }); - } - - #[test] - fn invalid_character() { - const SAMPLE_LOCALES: [&str; 2] = [ - "en", // No optional fields - "en_US.utf8@euro", // All optional fields - ]; - - for locale in SAMPLE_LOCALES { - // Insert an invalid character ('/') at every position along the sample locale - for offset in 0..=locale.len() { - let (left, right) = locale.split_at(offset); - let invalid_locale = format!("{left}/{right}"); - expect_error( - &invalid_locale, - PosixParseError::InvalidCharacter { offset }, - ); - } - } - - // Test a single '/' character - expect_error("/", PosixParseError::InvalidCharacter { offset: 0 }); - } - - #[test] - fn invalid_locale() { - expect_error(".", PosixParseError::InvalidLocale); - expect_error("..", PosixParseError::InvalidLocale); - } - - #[test] - fn repeated_delimiter() { - // Repeated delimiter at the end of locale - expect_error( - "en_US.utf8@euro_US", - PosixParseError::RepeatedDelimiter { - first_offset: 2, - second_offset: 15, - }, - ); - expect_error( - "en_US.utf8@euro.utf8", - PosixParseError::RepeatedDelimiter { - first_offset: 5, - second_offset: 15, - }, - ); - expect_error( - "en_US.utf8@euro@euro", - PosixParseError::RepeatedDelimiter { - first_offset: 10, - second_offset: 15, - }, - ); - - // Multiple repeated delimiters - expect_error( - "en.utf8.utf8.utf8", - PosixParseError::RepeatedDelimiter { - first_offset: 2, - second_offset: 7, - }, - ); - - // Consecutive repeated delimiters - expect_error( - "en__US.utf8@euro", - PosixParseError::RepeatedDelimiter { - first_offset: 2, - second_offset: 3, - }, - ); - expect_error( - "en_US..utf8@euro", - PosixParseError::RepeatedDelimiter { - first_offset: 5, - second_offset: 6, - }, - ); - expect_error( - "en_US.utf8@@euro", - PosixParseError::RepeatedDelimiter { - first_offset: 10, - second_offset: 11, - }, - ); - } - - #[test] - fn unordered_delimiter() { - expect_error( - "en_US@euro.utf8", - PosixParseError::UnorderedDelimiter { - first_offset: 5, - second_offset: 10, - }, - ); - expect_error( - "en.utf8_US@euro", - PosixParseError::UnorderedDelimiter { - first_offset: 2, - second_offset: 7, - }, - ); - expect_error( - "en.utf8@euro_US", - PosixParseError::UnorderedDelimiter { - first_offset: 7, - second_offset: 12, - }, - ); - expect_error( - "en@euro_US.utf8", - PosixParseError::UnorderedDelimiter { - first_offset: 2, - second_offset: 7, - }, - ); - expect_error( - "en@euro.utf8_US", - PosixParseError::UnorderedDelimiter { - first_offset: 2, - second_offset: 7, - }, - ); - } - - #[test] - fn offset() { - // Empty section - let src = "en_.utf8@euro"; - match PosixLocale::try_from_str(src) { - Err(ParseError::Posix(PosixParseError::EmptySection { offset })) => { - assert_eq!(&src[offset..offset + 1], "_"); - } - _ => unreachable!(), - } - - // Invalid character - let src = "en_U/S"; - match PosixLocale::try_from_str(src) { - Err(ParseError::Posix(PosixParseError::InvalidCharacter { offset })) => { - assert_eq!(&src[offset..offset + 1], "/"); - } - _ => unreachable!(), - } - - // Repeated delimiter - let src = "en_US.utf8@euro_US"; - match PosixLocale::try_from_str(src) { - Err(ParseError::Posix(PosixParseError::RepeatedDelimiter { - first_offset, - second_offset, - })) => { - assert_eq!(&src[first_offset..first_offset + 1], "_"); - assert_eq!(&src[second_offset..second_offset + 1], "_"); - } - _ => unreachable!(), - } - - // Unordered delimiter - let src = "en_US@euro.utf8"; - match PosixLocale::try_from_str(src) { - Err(ParseError::Posix(PosixParseError::UnorderedDelimiter { - first_offset, - second_offset, - })) => { - assert_eq!(&src[first_offset..first_offset + 1], "@"); - assert_eq!(&src[second_offset..second_offset + 1], "."); - } - _ => unreachable!(), - } - } - } - - mod conversion { - use env_preferences::parse::posix::PosixLocale; - - fn expect_error(src: &str, icu_error: icu_locale_core::ParseError) { - let result = PosixLocale::try_from_str(src) - .expect(src) - .try_convert_lossy(); - let expected = env_preferences::ParseError::Icu(icu_error); - match result { - Ok(invalid_locale) => { - panic!("Expected the error `{icu_error:?}`, got the locale `{invalid_locale:?}` from input of `{src}`") - } - Err(error) => { - assert_eq!(error, expected, "Comparing expected output of `{src}`") - } - } - } - - #[test] - fn invalid_language() { - expect_error("invalid", icu_locale_core::ParseError::InvalidLanguage); - } - - #[test] - fn invalid_region() { - expect_error("en_invalid", icu_locale_core::ParseError::InvalidSubtag); - } - } -} diff --git a/utils/env_preferences/tests/parse/windows.rs b/utils/env_preferences/tests/parse/windows.rs deleted file mode 100644 index 925c608dacb..00000000000 --- a/utils/env_preferences/tests/parse/windows.rs +++ /dev/null @@ -1,60 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -use env_preferences::parse::windows::WindowsLocale; -use icu_locale_core::Locale; - -fn expect_success(src: &str, expected: &str) { - let windows_locale = WindowsLocale::try_from_str(src).expect(src); - let locale = windows_locale.try_convert_lossy().expect(src); - - assert_eq!( - locale, - Locale::try_from_str(expected).unwrap(), - "Case: {src}" - ); -} - -#[test] -fn collation() { - /// All MS-LCID collation entries with a known matching CLDR collation value - const CASES: [(&str, &str); 12] = [ - ("de-DE_phoneb", "de-DE-u-co-phonebk"), - ("es-ES_tradnl", "es-ES-u-co-trad"), - ("ja-JP_radstr", "ja-JP-u-co-unihan"), - ("zh-CN_phoneb", "zh-CN-u-co-phonebk"), - ("zh-CN_stroke", "zh-CN-u-co-stroke"), - ("zh-HK_radstr", "zh-HK-u-co-unihan"), - ("zh-MO_radstr", "zh-MO-u-co-unihan"), - ("zh-MO_stroke", "zh-MO-u-co-stroke"), - ("zh-SG_phoneb", "zh-SG-u-co-phonebk"), - ("zh-SG_stroke", "zh-SG-u-co-stroke"), - ("zh-TW_pronun", "zh-TW-u-co-zhuyin"), - ("zh-TW_radstr", "zh-TW-u-co-unihan"), - ]; - - for (src, expected) in CASES { - expect_success(src, expected); - } -} - -#[test] -fn collation_strip_known_invalid() { - // All MS-LCID collation entries with NO known matching CLDR collation value - expect_success("hu-HU_tchncl", "hu-HU"); - expect_success("ka-GE_modern", "ka-GE"); -} - -#[test] -fn collation_strip_unknown() { - expect_success("en-US_unknown", "en-US"); - expect_success("en-US_unknown_multiple_underscores", "en-US"); - expect_success("en-US_unknown-with-hyphens", "en-US"); -} - -#[test] -fn alias() { - expect_success("zh-yue-HK", "yue-HK"); - expect_success("x-IV-mathan", "und"); -} diff --git a/utils/env_preferences/tests/test.rs b/utils/env_preferences/tests/test.rs deleted file mode 100644 index f598bfbd97d..00000000000 --- a/utils/env_preferences/tests/test.rs +++ /dev/null @@ -1,218 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -mod datasets; -mod parse; - -#[cfg(target_os = "linux")] -#[cfg(test)] -mod linux_tests { - use env_preferences::posix::{get_raw_locale_categories, get_system_calendars, LocaleCategory}; - use env_preferences::RetrievalError; - use icu_locale_core::Locale; - use libc::setlocale; - - // Testing fetching of locale, as `get_locales` fetches the locales for category - // `LC_ALL`. For this category this should return non empty - #[test] - fn test_get_raw_locale_categories() { - let locale_res = get_raw_locale_categories().unwrap(); - assert!( - !locale_res.is_empty(), - "Empty hashmap for locales retrieved" - ); - for locale in locale_res.into_values() { - assert!(locale.is_ascii(), "Invalid form of locale retrieved") - } - } - - #[test] - fn test_converting_locales() { - let locale_res: std::collections::HashMap = - get_raw_locale_categories().unwrap(); - for locale in locale_res.into_values() { - let parts: Vec<&str> = locale.split('.').collect(); - - // Skipping "C" and those ending with "UTF-8", as they cannot be converted - // into the locale - if !parts.contains(&"C") && (parts.len() > 1 && parts[parts.len() - 1] != "UTF-8") { - let mut locale_converted: Locale = locale.parse().unwrap(); - locale_converted.extensions.unicode.clear(); - assert_eq!(locale_converted, locale.parse().unwrap()); - } - } - } - - // This test contains unsafe code, the idea is to manually set a locale for `LC_TIME`, - // compare the result from `get_locales` and `get_system_calendar` they must be equal - #[test] - fn test_calendar() { - // Using "C" locale since it is the default, using any other locale like `en_IN` or `en_US` - // may work on some system and may not others depending on the availability - let test_calendar_locale = "C"; - let locale_cstr = - std::ffi::CString::new(test_calendar_locale).expect("CString::new failed"); - - // SAFETY: This call is safe because any subsequent call to `setlocale` we pass a `NULL` locale - // to retrieve locale which does not sets the locale. The test locale `locale_cstr` is a CString - // nul terminated string for which we have the ownership - let tr = unsafe { setlocale(libc::LC_TIME, locale_cstr.as_ptr()) }; - - if tr.is_null() { - panic!("{:?}", RetrievalError::NullLocale); - } - - let calendar_locale = get_system_calendars().unwrap(); - assert_eq!(test_calendar_locale.to_string(), calendar_locale); - } -} - -#[cfg(target_os = "macos")] -#[cfg(test)] -mod macos_test { - use env_preferences::apple::{get_raw_locales, get_system_calendars, get_system_time_zone}; - use icu_locale_core::Locale; - - #[test] - fn test_get_raw_locales() { - let locales_res = get_raw_locales(); - match locales_res { - Ok(locales) => { - for locale in locales { - assert!(!locale.is_empty(), "Empty locale retrieved"); - assert!(locale.is_ascii(), "Invalid form of locale retrieved"); - } - } - Err(e) => { - panic!("{e:?}") - } - } - } - - #[test] - fn test_converting_locales() { - let locales = get_raw_locales().unwrap(); - for locale in locales { - let _loc: Locale = locale.parse().unwrap(); - } - } - - #[test] - fn test_calendar() { - let calendar_res = get_system_calendars().unwrap(); - for calendar in calendar_res { - assert!(!calendar.0.is_empty(), "Couldn't retreive calendar locale"); - assert!(calendar.0.is_ascii(), "Calendar locale form is not valid"); - assert!(!calendar.1.is_empty(), "Couldn't retreive calendar"); - assert!( - calendar.1.is_ascii(), - "Calendar identifier form is not valid" - ); - } - } - - #[test] - fn test_time_zone() { - let time_zone = get_system_time_zone().unwrap(); - assert!(!time_zone.is_empty(), "Couldn't retreive time_zone"); - } -} - -#[cfg(target_os = "windows")] -#[cfg(test)] -mod windows_test { - use env_preferences::parse::windows::WindowsLocale; - use env_preferences::windows::{get_raw_locales, get_system_calendars, get_system_time_zone}; - use icu_locale_core::Locale; - use std::sync::{LazyLock, Mutex}; - use windows::Win32::{ - Foundation::LPARAM, - Globalization::{EnumSystemLocalesEx, LOCALE_ALL}, - }; - use windows_core::{BOOL, PCWSTR}; - - // Since [`EnumSystemLocalesEx`] iterates using a callback with no obvious (safe) way to return data, - // store them in this static instead. Since this is only a single test with roughly 1,000 items, - // it shouldn't be much of a concern. - static LOCALES: LazyLock>> = LazyLock::new(|| Mutex::new(Vec::new())); - - /// Callback provided to the [`EnumSystemLocalesEx`] to enumerate over locales. - unsafe extern "system" fn callback( - locale_name: PCWSTR, - _locale_flags: u32, - _callback_parameter: LPARAM, - ) -> BOOL { - // SAFETY: caller is the [`EnumSystemLocalesEx`] function, which guarantees a valid null-terminated string - let locale_name = unsafe { locale_name.to_string() }.unwrap(); - - // Skip empty locale 0x007F, marked as "Reserved for invariant locale behavior" - // Source: MS-LCID version 16.0, page 13 (section 2.2 under "Language ID" table) - if !locale_name.is_empty() { - LOCALES.lock().unwrap().push(locale_name); - } - - // Tell [`EnumSystemLocalesEx`] to continue enumeration - BOOL::from(true) - } - - /// Enumerate over all Windows locales, and make sure [`WindowsLocale`] can parse it without any (direct) errors. - #[test] - fn system_locales() -> windows_core::Result<()> { - // Find the list of supported locales, using the [`EnumSystemLocalesEx`] API: - // https://learn.microsoft.com/en-us/windows/win32/api/winnls/nf-winnls-enumsystemlocalesex - // SAFETY: a valid function pointer is provided and lpReserved is set to NULL/None as required - unsafe { - EnumSystemLocalesEx(Some(callback), LOCALE_ALL, LPARAM::default(), None)?; - } - - // Get the list of locales which the callback has been modifying - let locales = LOCALES.lock().unwrap(); - - // Make sure [`WindowsLocale`] can parse without any obvious issues - for locale in locales.iter() { - let windows_locale = WindowsLocale::try_from_str(locale).expect(locale); - windows_locale.try_convert_lossy().expect(locale); - } - - Ok(()) - } - - #[test] - fn test_get_raw_locales() { - let locales = get_raw_locales().unwrap(); - for locale in locales { - assert!(!locale.is_empty(), "Empty locale retrieved"); - assert!(locale.is_ascii(), "Invalid form of locale retrieved"); - } - } - - #[test] - fn test_converting_locales() { - let locales = get_raw_locales().unwrap(); - for locale in locales { - let _converted_locale: Locale = locale.parse().unwrap(); - } - } - - #[test] - fn test_calendar() { - let calendars = get_system_calendars().unwrap(); - for calendar in calendars { - assert!(!calendar.0.is_empty(), "Calendar locale is empty"); - assert!(calendar.0.is_ascii(), "Calendar locale form is not valid"); - assert!(!calendar.1.is_empty(), "Calendar identifier is empty"); - assert!( - calendar.1.is_ascii(), - "Calendar identifier form is not valid" - ); - } - } - - #[test] - fn test_time_zone() { - let time_zone = get_system_time_zone().unwrap(); - assert!(!time_zone.is_empty(), "Couldn't retreive time_zone"); - assert!(time_zone.is_ascii(), "Invalid TimeZone format"); - } -} diff --git a/utils/env_preferences/Cargo.toml b/utils/host_info/Cargo.toml similarity index 51% rename from utils/env_preferences/Cargo.toml rename to utils/host_info/Cargo.toml index db70c7be2b9..0ba88c85a89 100644 --- a/utils/env_preferences/Cargo.toml +++ b/utils/host_info/Cargo.toml @@ -3,7 +3,7 @@ # (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). [package] -name = "env_preferences" +name = "icu_host_info" version = "0.1.0" publish = false @@ -17,11 +17,17 @@ categories.workspace = true include.workspace = true [dependencies] -core-foundation-sys = "0.8.6" displaydoc = { workspace = true } icu_locale_core = { workspace = true, features = ["alloc"] } -libc = "0.2.155" +icu_datetime = { workspace = true, optional = true } +libc = "0.2.175" +[target.'cfg(target_os = "linux")'.dependencies] +gio = { version = "0.21.2", optional = true } + +[target.'cfg(target_os = "macos")'.dependencies] +core-foundation-sys = { version = "0.8.6" } +core-foundation = { version = "0.10.1" } [target.'cfg(target_os = "windows")'.dependencies.windows] version = "0.60.0" @@ -36,5 +42,20 @@ features = [ "Win32_Globalization", ] -[target.'cfg(target_os = "windows")'.dev-dependencies] -windows-core = "0.60.1" +[dev-dependencies] +icu = { path = "../../components/icu", default-features = false } +icu_datetime = { workspace = true, features = ["compiled_data"] } +icu_calendar = { workspace = true } +icu_time = { workspace = true, features = ["compiled_data"] } + +[features] +default = [] +datetime = ["dep:icu_datetime"] +gnome = ["dep:gio"] + +[[example]] +name = "dt_format" +required-features = ["datetime"] + +[package.metadata.cargo-all-features] +skip_optional_dependencies = true diff --git a/utils/env_preferences/LICENSE b/utils/host_info/LICENSE similarity index 100% rename from utils/env_preferences/LICENSE rename to utils/host_info/LICENSE diff --git a/utils/host_info/README.md b/utils/host_info/README.md new file mode 100644 index 00000000000..02921aba250 --- /dev/null +++ b/utils/host_info/README.md @@ -0,0 +1,288 @@ +# icu_host_info [![crates.io](https://img.shields.io/crates/v/icu_host_info)](https://crates.io/crates/icu_host_info) + + + +## host_info + +`host_info` is a library providing functionality to retrieve regional preferences +from host environments - primarily the operating system the program is running in. + +The library is designed to bind the different host environment preferences architectures +to ICU4X model. + +## Example + +```rust +use icu_host_info::HostInfo; +use icu::calendar::Date; +use icu::datetime::{fieldsets, DateTimeFormatter}; + +let date = Date::try_new_gregorian(2025, 10, 10) + .expect("Failed to create date"); + +// requires feature `datetime` +let prefs = HostInfo::datetime_preferences() + .expect("Failed to retrieve host info"); + +let dtf = DateTimeFormatter::try_new(prefs, fieldsets::YMD::long()) + .expect("Failed to create datetime formatter."); + +let formatted_dt = dtf.format(&date); + +assert_eq!(formatted_dt.to_string(), "October 10, 2025"); +``` + +## Feature Matrix + +The library intends to provide means to retrieve regional preferences +to ICU4X preferences with a focus on Unicode Extensions, but allow for +propagation of preferences offered by the host environments which may +not have a representation in Unicode Extensions (for example: date format pattern). + +Legend: +- ✅ = OS + `host_info` support +- ⚠️ = OS supports, `host_info` doesn't +- ❌ = OS doesn't supported + +| Feature | Android | iOS | Linux (1) | macOS | Windows | +|---------------------| :-----: | :-: | :------------------: | :---: | :-----: | +| Requested Locales | ✅ | ✅ | ✅ | ✅ | ✅ | +| Calendar | ⚠️ | ⚠️ | ⚠️ | ✅ | ✅ | +| Region | ⚠️ | ⚠️ | ⚠️ | ✅ | ✅ | +| Hour cycle | ⚠️ | ⚠️ | ✅ | ✅ | ⚠️ | +| Measurement System | ⚠️ | ⚠️ | ⚠️ | ✅ | ⚠️ | +| Measurement Override| ⚠️ | ⚠️ | ⚠️ | ✅ | ⚠️ | +| First Day of week | ⚠️ | ⚠️ | ⚠️ | ✅ | ✅ | +| Collation | ⚠️ | ⚠️ | ⚠️ | ✅ | ❌ | +| Date format | ⚠️ | ⚠️ | ⚠️ | ⚠️ | ⚠️ | +| Number format | ⚠️ | ⚠️ | ⚠️ | ⚠️ | ⚠️ | + +(1) In case of Linux different DE's such as Gnoem and KDE are supported together. + +## Integrating preferences into ICU4X formatters + +The library provides three ways of injecting retrieved values into formatters: + +### 1. Preference Bag + +For most common components, such as `DateTimeFormatter`, the library exposes +a direct getter that retrieves a `Preferences` struct for that component. +This getter is located behind a flag to allow for control over which dependencies are being +pulled. + +#### Example + +```rust +use icu_host_info::HostInfo; +use icu::datetime::{fieldsets, DateTimeFormatter}; + +// requires feature `datetime` +let prefs = HostInfo::datetime_preferences() + .expect("Failed to retrieve host info"); + +let dtf = DateTimeFormatter::try_new(prefs, fieldsets::YMD::long()) + .expect("Failed to create datetime formatter."); +``` + +### 2. Locale + +For all components that `HostInfo` does not have special preference getter for, +and for cases where the user prefers to avoid pulling extra dependencies at the cost +of narrowing down the retrieved values to just ones encoded in Unicode Extensions, +the library provides an ergonomic getter: + +#### Example + +```rust +use icu_host_info::HostInfo; +use icu::{ + datetime::{fieldsets, DateTimeFormatter}, + locale::Locale, +}; + +let mut locale = HostInfo::requested_locales() + .expect("Failed to retrieve locales") + .first() + .cloned() + .unwrap_or(Locale::UNKNOWN); + +locale.extensions.unicode = HostInfo::unicode_extensions() + .expect("Failed to retrieve host info"); + +let dtf = DateTimeFormatter::try_new(locale.into(), fieldsets::YMD::long()) + .expect("Failed to create datetime formatter."); +``` + +Notice that the regional preferences encoded in Unicode Extensions +are retrieved separately from the list of requested locales. +There are two reasons for this design: +- The user has to decide whether the regional preferences apply onto all locales, or just the first one +- The locale negotiation may result in a different locale being selected. + +### 3. Individual Preferences + +For each preference the library also attempts to provide a direct getter +allowing the user to retrieve just that preference and use it as they see fit. + +#### Example + +```rust +use icu_host_info::HostInfo; +use icu::locale::preferences::extensions::unicode::keywords::HourCycle; + +let mut calendar: Option = HostInfo::hour_cycle() + .expect("Failed to retrieve hour_cycle preference"); +``` + +## Locale Negotiation + +Locale Negotiation is an upcoming feature in ICU4X which will enable the system integrating ICU4X to +perform a negotiation between requested locales, and locales for which the data is available in the system. +The output of `HostInfo` will be utilized in that negotiation allowing the deployment to 1) select +the most appropriate locales for the given user and target modality, 2) apply regional preferences onto that +locale. + +The need to allow `HostInfo` to be pluggable info locale negotiation and multi source merging (see next section) +guided many design choices in this library. This section will be extended once locale negotiation is implemented. + +## Multi Source Merging + +In simple systems the user will most often use ICU4X to format +some information in a selected locale, and use this library to augument +the formatting with regional preferences set by the user in the host environment. + +In more complex systems, the user may also want to introduce a second source of regional preferences +and mix the values set in the host environment with those set in the program itself. + +For example, a web browser may offer some regional preferences set in the browser +itself, or even set separate for some contexts of the browser. + +In those cases, the depoyment requires merging of the preferences. +ICU4X exposes an `extend` method on both `Preferences` and `Unicode` extensions struct. + +This allows the system to retrieve [`HostInfo`] Preferences or `Unicode`, and applications' +equivalent, and merge of them. + +### `Preferences` Example + +```rust +use icu_host_info::HostInfo; +use icu::datetime::{fieldsets, DateTimeFormatter}; + +let app_prefs = app.datetime_preferences(); + +// requires feature `datetime` +let mut combined_prefs = HostInfo::datetime_preferences() + .expect("Failed to retrieve host info"); + +combined_prefs.extend(app_prefs); + +let dtf = DateTimeFormatter::try_new(combined_prefs, fieldsets::YMD::long()) + .expect("Failed to create datetime formatter."); +``` + +### `Unicode` Extensions Example + +```rust +use icu_host_info::HostInfo; +use icu::{ + datetime::{fieldsets, DateTimeFormatter}, + locale::locale, +}; + +let mut locale = locale!("fr-CA"); + +let app_ue = app.unicode_extensions(); + +let mut combined_ue = HostInfo::unicode_extensions() + .expect("Failed to retrieve host info"); + +combined_ue.extend(app_ue); + +locale.extensions.unicode = combined_ue; + +let dtf = DateTimeFormatter::try_new(locale.into(), fieldsets::YMD::long()) + .expect("Failed to create datetime formatter."); +``` + +## Design Decisions + +The library operates on a boundary of diverse set of host +environments and uniformal ICU4X design derived from Unicode LDML. +It requires a number of design tradeoffs that had to be made in +order to achieve the uniformity and scale over time as the host +platforms design evolves. + +### Host Environment + +The library is designed to handle retrieval of data from the direct host +environment. This usually means an operating system, but it can mean a +virtual environment, sandbox or runtime. +In such a case it is the responsibility of the execution logic +setting up such environment to ensure propagation of customer preferences. + +### Lossy Results + +The library makes best-effort to retrieve the values +that can be directly used in ICU4X. As the operating systems, +runtimes and ICU4X evolve, there's always a risk of a mismatch. +This library makes a design decision to be lossy-by-default. + +Any value that cannot be directly mapped onto a valid value is ignored +and indistinguishable in the ergonomic API from a missing value. + +Similarly, the API does not distinguish between missing binding logic and unknown value. +The assumption is that users of this library are aiming to respect user choices +encoded in host environment regional preferences, but are not in a position +to act differently on a failed attempt to retrieve them from a missing attempt. +Therefore errors in this library are very rare and only related to catastrophic +cases like memory corruption or OS API errors propagation. + +### Normalized vs Raw values + +The main API of this library - [`HostInfo`] - provides methods that return normalized +values, often directly taken from `icu::locale_core::preferences`. +Per-host backends provide additional trait implementation that returns +raw values, allowing the user to handle or introspect those values manually. +When using `HostInfo`, the library performs best-effort to normalize and parse +those raw values into canonical Unicode ICU4X representation, often discarding +unknown values and values that fail to parse. + +Those raw backends are not exposed in the documentation as the documentation. + +#### Example + +```rust +use icu_host_info::backends::{ + RawHostInfoBackend, + macos::MacOSHostInfoBackend, +}; + +let raw_cal: Option = MacOSHostInfoBackend::raw_calendar() + .expect("Failed to retrieve raw calendar"); +``` + +### Minimize defaults + +The library attempts to use host APIs in a way that allows distinguishing between +preference values that represent defaults for a given locale, from ones manually set +by the user. +In some cases, the host API does not allow for distinguishing of that, which may result +in overly expressive locales such as `en-US-ca-gregory` (`gregory` being already a default calendar for en-US). + +This, like other aspects of the library, operates on best-effort basis and may be further improved in the future +releases as better bindings become available. + +#### Host API Design Guidance + +A note for host API designers - it is useful for foundational libraries such as this to expose APIs that enable us +to distinguish between regional preferences values derived by the host from defaults of a locale, from cases +when the value is explicitly set by the user. +This dinstinction allows ICU4X to better serve in locale negotiations scenario where other-than-first locale may be used +and the deployment should respect whether the user set a given preference explicitly or left it to the per-locale default. + + + +## More Information + +For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x). diff --git a/utils/host_info/examples/dt_format.rs b/utils/host_info/examples/dt_format.rs new file mode 100644 index 00000000000..80e664a6c47 --- /dev/null +++ b/utils/host_info/examples/dt_format.rs @@ -0,0 +1,24 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use icu_calendar::Date; +use icu_datetime::{fieldsets, input::Time, DateTimeFormatter}; +use icu_host_info::HostInfo; +use icu_time::DateTime; + +fn main() { + let prefs = HostInfo::datetime_preferences().expect("Failed to retrieve host info"); + let dtf = DateTimeFormatter::try_new( + prefs, + fieldsets::YMDT::long().with_alignment(icu_datetime::options::Alignment::Column), + ) + .expect("Failed to create datetime formatter."); + + let date = Date::try_new_gregorian(2020, 10, 10).unwrap(); + let time = Time::try_new(18, 56, 0, 0).unwrap(); + + let formatted_dt = dtf.format(&DateTime { date, time }); + + println!("Today is: {formatted_dt}"); +} diff --git a/utils/host_info/examples/dt_format_locale.rs b/utils/host_info/examples/dt_format_locale.rs new file mode 100644 index 00000000000..9eeea15b1f4 --- /dev/null +++ b/utils/host_info/examples/dt_format_locale.rs @@ -0,0 +1,33 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use icu_calendar::Date; +use icu_datetime::{fieldsets, input::Time, DateTimeFormatter}; +use icu_host_info::HostInfo; +use icu_locale_core::Locale; +use icu_time::DateTime; + +fn main() { + let mut locale = HostInfo::requested_locales() + .unwrap() + .first() + .cloned() + .unwrap_or(Locale::UNKNOWN); + + locale.extensions.unicode = + HostInfo::unicode_extensions().expect("Failed to retrieve host info"); + + let dtf = DateTimeFormatter::try_new( + locale.into(), + fieldsets::YMDT::long().with_alignment(icu_datetime::options::Alignment::Column), + ) + .expect("Failed to create datetime formatter."); + + let date = Date::try_new_gregorian(2025, 10, 10).unwrap(); + let time = Time::try_new(18, 56, 0, 0).unwrap(); + + let formatted_dt = dtf.format(&DateTime { date, time }); + + println!("Today is: {formatted_dt}"); +} diff --git a/utils/host_info/examples/get_data.rs b/utils/host_info/examples/get_data.rs new file mode 100644 index 00000000000..6a47f3e3885 --- /dev/null +++ b/utils/host_info/examples/get_data.rs @@ -0,0 +1,31 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use icu_host_info::HostInfo; + +fn main() { + println!("resolved backend: {:?}", HostInfo::resolved_backend()); + println!("-----"); + println!("requested locales: {:?}", HostInfo::requested_locales()); + println!("calendar: {:?}", HostInfo::calendar()); + println!("region: {:?}", HostInfo::region()); + println!("hour_cycle: {:?}", HostInfo::hour_cycle()); + println!("measurement_system: {:?}", HostInfo::measurement_system()); + println!( + "measurement_unit_override: {:?}", + HostInfo::measurement_unit_override() + ); + println!("first_day: {:?}", HostInfo::first_day_of_week()); + println!("collation: {:?}", HostInfo::collation()); + println!("-----"); + println!( + "unicode_extensions: {:?}", + HostInfo::unicode_extensions().unwrap().to_string() + ); + #[cfg(feature = "datetime")] + println!( + "datetimeformatter_preferences: {:#?}", + HostInfo::datetime_preferences() + ); +} diff --git a/utils/host_info/src/backends/android.rs b/utils/host_info/src/backends/android.rs new file mode 100644 index 00000000000..2b21df3a3ed --- /dev/null +++ b/utils/host_info/src/backends/android.rs @@ -0,0 +1,33 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::{ + backends::{HostInfoBackend, RawHostInfoBackend}, + error::HostInfoError, + posix::{raw_locale_categories, LocaleCategory}, +}; + +pub struct AndroidHostInfoBackend; + +impl HostInfoBackend for AndroidHostInfoBackend {} + +impl RawHostInfoBackend for AndroidHostInfoBackend { + fn raw_requested_locales() -> Result, HostInfoError> { + let mut categories = raw_locale_categories()?; + let mut locales = Vec::with_capacity(categories.len()); + + // Add LC_ALL if it exists + if let Some(primary_locale) = categories.remove(&LocaleCategory::All) { + locales.push(primary_locale); + } + + // Add any remaining locales that were explicitly set + for s in categories.into_values() { + if !locales.contains(&s) { + locales.push(s); + } + } + Ok(locales) + } +} diff --git a/utils/host_info/src/backends/linux.rs b/utils/host_info/src/backends/linux.rs new file mode 100644 index 00000000000..c6be3ffafc5 --- /dev/null +++ b/utils/host_info/src/backends/linux.rs @@ -0,0 +1,101 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::{ + backends::{HostInfoBackend, RawHostInfoBackend}, + error::HostInfoError, + locale::PosixLocale, +}; +use icu_locale_core::{preferences::extensions::unicode::keywords::HourCycle, Locale}; + +pub struct LinuxHostInfoBackend; + +impl HostInfoBackend for LinuxHostInfoBackend { + #[cfg(feature = "datetime")] + fn datetime_preferences() -> Result { + use crate::posix::{raw_locale_categories, LocaleCategory}; + + let mut categories = raw_locale_categories()?; + + let mut locale = Locale::UNKNOWN; + if let Some(lc_time) = categories.remove(&LocaleCategory::Time) { + if let Ok(loc) = PosixLocale::try_from_str(&lc_time) { + if let Ok(loc) = Locale::try_from(loc) { + locale = loc; + } + } + } else { + if let Some(lc_all) = categories.remove(&LocaleCategory::All) { + if let Ok(loc) = PosixLocale::try_from_str(&lc_all) { + if let Ok(loc) = Locale::try_from(loc) { + locale = loc; + } + } + } + } + + let mut result = icu_datetime::DateTimeFormatterPreferences::from(locale); + result.numbering_system = None; + result.hour_cycle = Self::hour_cycle()?; + result.calendar_algorithm = Self::calendar()?; + Ok(result) + } + + fn requested_locales() -> Result, HostInfoError> { + Ok(Self::raw_requested_locales()? + .into_iter() + .filter_map(|s| { + PosixLocale::try_from_str(&s) + .ok() + .and_then(|posix_locale| Locale::try_from(posix_locale).ok()) + }) + .collect()) + } + + fn hour_cycle() -> Result, HostInfoError> { + #[cfg(feature = "gnome")] + if let Some(hc) = gnome_clock_format_hc() { + return Ok(Some(hc)); + } + Ok(None) + } +} + +impl RawHostInfoBackend for LinuxHostInfoBackend { + fn raw_requested_locales() -> Result, HostInfoError> { + // 1) LANGUAGE: colon-separated, ordered + if let Ok(s) = std::env::var("LANGUAGE") { + let v: Vec = s + .split(':') + .filter(|x| !x.is_empty()) + .map(|s| s.to_string()) + .collect(); + if !v.is_empty() { + return Ok(v); + } + } + + // 2) Fallbacks: LC_MESSAGES > LC_ALL > LANG + for k in ["LC_MESSAGES", "LC_ALL", "LANG"] { + if let Ok(s) = std::env::var(k) { + if !s.is_empty() { + return Ok(vec![s]); + } + } + } + + Ok(vec![]) + } +} + +#[cfg(feature = "gnome")] +fn gnome_clock_format_hc() -> Option { + use gio::prelude::*; + let s = gio::Settings::new("org.gnome.desktop.interface"); + match s.string("clock-format").as_str() { + "12h" => Some(HourCycle::H12), + "24h" => Some(HourCycle::H23), + _ => None, + } +} diff --git a/utils/host_info/src/backends/macos.rs b/utils/host_info/src/backends/macos.rs new file mode 100644 index 00000000000..a30905abd37 --- /dev/null +++ b/utils/host_info/src/backends/macos.rs @@ -0,0 +1,519 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use core_foundation::{ + array::CFArray, + base::{kCFAllocatorDefault, CFGetTypeID, CFTypeRef, TCFType}, + dictionary::{ + CFDictionaryGetCount, CFDictionaryGetKeysAndValues, CFDictionaryGetTypeID, CFDictionaryRef, + }, + number::{kCFNumberSInt32Type, CFNumberGetTypeID, CFNumberGetValue}, + string::{CFString, CFStringGetMaximumSizeForEncoding}, +}; +use core_foundation_sys::{ + base::CFRelease, + calendar::{CFCalendarCopyCurrent, CFCalendarGetIdentifier}, + date_formatter::CFDateFormatterCreateDateFormatFromTemplate, + locale::{ + kCFLocaleCountryCode, kCFLocaleMeasurementSystem, CFLocaleCopyCurrent, + CFLocaleCopyPreferredLanguages, CFLocaleGetValue, CFLocaleRef, + }, + preferences::{kCFPreferencesAnyHost, kCFPreferencesCurrentUser, CFPreferencesCopyValue}, + string::{kCFStringEncodingUTF8, CFStringGetCString, CFStringGetCStringPtr, CFStringRef}, +}; +use icu_locale_core::{ + extensions::unicode, + preferences::extensions::unicode::keywords::{ + CalendarAlgorithm, CollationType, HourCycle, MeasurementSystem, + }, + subtags::Language, +}; +use std::os::raw::c_char; +use std::{ffi::CStr, str::FromStr}; + +use crate::{ + backends::{HostInfoBackend, RawHostInfoBackend}, + error::HostInfoError, +}; + +pub struct MacOSHostInfoBackend; + +impl HostInfoBackend for MacOSHostInfoBackend { + fn calendar() -> Result, HostInfoError> { + Ok(Self::raw_calendar()? + .and_then(|raw| { + let canonical = match raw.as_str() { + "gregorian" => "gregory", + r => r, + }; + unicode::Value::from_str(canonical).ok() + }) + .and_then(|value| CalendarAlgorithm::try_from(&value).ok())) + } + + fn hour_cycle() -> Result, HostInfoError> { + with_current_locale(|locale| { + let template = CFString::new("j"); // hour-cycle probe + + // SAFETY: All parameters are valid - kCFAllocatorDefault is a system constant, + // template is a valid CFStringRef, locale is non-null, and 0 is a valid options value. + let format = unsafe { + CFDateFormatterCreateDateFormatFromTemplate( + kCFAllocatorDefault, + template.as_concrete_TypeRef(), + 0, + locale, + ) + }; + + if format.is_null() { + return None; + } + + // SAFETY: format is non-null and owned by us, so we use wrap_under_create_rule. + // This properly handles ownership and will release the format when dropped. + let format_string = unsafe { CFString::wrap_under_create_rule(format) }; + + // Detect hour cycle from the first character of the format pattern + match format_string.to_string().chars().next() { + Some('K') => Some(HourCycle::H11), + Some('h') => Some(HourCycle::H12), + Some('H') => Some(HourCycle::H23), + _ => None, + } + }) + } + + fn measurement_system() -> Result, HostInfoError> { + Ok( + Self::raw_measurement_system()?.and_then(|raw| match raw.as_str() { + "U.S." => Some(MeasurementSystem::USSystem), + "U.K." => Some(MeasurementSystem::UKSystem), + "Metric" => Some(MeasurementSystem::Metric), + _ => None, + }), + ) + } + + fn collation() -> Result, HostInfoError> { + Ok(Self::raw_collation()?.and_then(|(lang, col)| { + if let Ok(val) = unicode::Value::from_str(&col) { + if let Ok(col) = CollationType::try_from(&val) { + let lang = Language::try_from_str(lang.as_str()).unwrap_or(Language::UNKNOWN); + Some((lang, col)) + } else { + None + } + } else { + None + } + })) + } +} + +impl RawHostInfoBackend for MacOSHostInfoBackend { + /// Retrieves system locales for Apple operating systems, in the order preferred by the + /// user, using [`CFLocaleCopyPreferredLanguages`](https://developer.apple.com/documentation/corefoundation/1542887-cflocalecopypreferredlanguages). + fn raw_requested_locales() -> Result, HostInfoError> { + // SAFETY: CFLocaleCopyPreferredLanguages returns an owned CFArrayRef that we must release. + // The function is documented to return NULL only in exceptional circumstances. + let arr_ref = unsafe { CFLocaleCopyPreferredLanguages() }; + if arr_ref.is_null() { + return Ok(vec![]); + } + let arr = unsafe { CFArray::::wrap_under_create_rule(arr_ref as _) }; + + // Use iterator combinators for more idiomatic Rust + let out = (0..arr.len()) + .filter_map(|i| arr.get(i)) + .map(|v| v.to_string()) + .collect(); + Ok(out) + } + + /// Gets the current system calendar identifier. + fn raw_calendar() -> Result, HostInfoError> { + /// RAII wrapper for CFCalendarRef + struct CFCalendarWrapper(core_foundation_sys::calendar::CFCalendarRef); + + impl CFCalendarWrapper { + fn new() -> Option { + // SAFETY: CFCalendarCopyCurrent returns an owned CFCalendarRef that we must release. + let cal = unsafe { CFCalendarCopyCurrent() }; + if cal.is_null() { + None + } else { + Some(CFCalendarWrapper(cal)) + } + } + + fn get_identifier(&self) -> Option { + // SAFETY: self.0 is non-null. CFCalendarGetIdentifier expects a CFCalendarRef + // cast to the appropriate type, and returns a borrowed CFStringRef that doesn't + // need to be released. + let identifier = unsafe { CFCalendarGetIdentifier(self.0 as _) }; + cfstring_to_string(identifier as CFStringRef) + } + } + + impl Drop for CFCalendarWrapper { + fn drop(&mut self) { + // SAFETY: We own the calendar reference and must release it. + unsafe { CFRelease(self.0 as _) }; + } + } + + let calendar = CFCalendarWrapper::new(); + Ok(calendar.and_then(|cal| cal.get_identifier())) + } + + fn raw_region() -> Result, HostInfoError> { + with_current_locale(|locale| { + // SAFETY: locale is non-null and kCFLocaleCountryCode is a valid key. + // CFLocaleGetValue returns a borrowed reference. + let value = unsafe { CFLocaleGetValue(locale, kCFLocaleCountryCode) }; + + if value.is_null() { + return None; + } + + // SAFETY: We use wrap_under_get_rule because the value is borrowed, not owned. + let cf_string = unsafe { CFString::wrap_under_get_rule(value as CFStringRef) }; + Some(cf_string.to_string()) + }) + } + + fn raw_measurement_system() -> Result, HostInfoError> { + with_current_locale(|locale| { + // SAFETY: locale is non-null and kCFLocaleMeasurementSystem is a valid key. + // CFLocaleGetValue returns a borrowed reference. + let value = unsafe { CFLocaleGetValue(locale, kCFLocaleMeasurementSystem) }; + + if value.is_null() { + return None; + } + + // SAFETY: We use wrap_under_get_rule because the value is borrowed, not owned. + let cf_string = unsafe { CFString::wrap_under_get_rule(value as CFStringRef) }; + Some(cf_string.to_string()) + }) + } + + fn raw_measurement_unit_override() -> Result, HostInfoError> { + unsafe { + let key = CFString::new("AppleTemperatureUnit"); + let domain = CFString::new(".GlobalPreferences"); + let v = CFPreferencesCopyValue( + key.as_concrete_TypeRef(), + domain.as_concrete_TypeRef(), + kCFPreferencesCurrentUser, + kCFPreferencesAnyHost, + ); + if v.is_null() { + return Ok(None); + } + let s = core_foundation::string::CFString::wrap_under_get_rule(v as CFStringRef) + .to_string(); + Ok(Some(s)) + } + } + + fn raw_first_day_of_week() -> Result, HostInfoError> { + unsafe { + let key = CFString::new("AppleFirstWeekday"); + let domain = CFString::new(".GlobalPreferences"); + let val: CFTypeRef = CFPreferencesCopyValue( + key.as_concrete_TypeRef(), + domain.as_concrete_TypeRef(), + kCFPreferencesCurrentUser, + kCFPreferencesAnyHost, + ); + if val.is_null() { + return Ok(None); + } + + if CFGetTypeID(val) != CFDictionaryGetTypeID() { + return Ok(None); + } + + // take the first value in the dictionary + let dict = val as CFDictionaryRef; + let count = CFDictionaryGetCount(dict); + if count == 0 { + CFRelease(val); + return Ok(None); + } + let mut keys: Vec = vec![std::ptr::null_mut(); count as usize]; + let mut vals: Vec = vec![std::ptr::null_mut(); count as usize]; + CFDictionaryGetKeysAndValues(dict, keys.as_mut_ptr() as _, vals.as_mut_ptr() as _); + + unsafe fn cfnum_i32(n: CFTypeRef) -> Option { + if CFGetTypeID(n) != CFNumberGetTypeID() { + return None; + } + let mut out = 0i32; + if CFNumberGetValue(n as _, kCFNumberSInt32Type, &mut out as *mut _ as _) { + Some(out) + } else { + None + } + } + Ok(cfnum_i32(vals[0]).and_then(|n| match n { + 1 => Some("sun".to_string()), + 2 => Some("mon".to_string()), + 3 => Some("tue".to_string()), + 4 => Some("wed".to_string()), + 5 => Some("thu".to_string()), + 6 => Some("fri".to_string()), + 7 => Some("sat".to_string()), + _ => None, + })) + } + } + + fn raw_collation() -> Result, HostInfoError> { + /// Parse macOS "AppleCollationOrder" style locale strings into (language, collation). + /// Accepts: + /// - "zh@collation=stroke" + /// - "zh-Hant@collation=zhuyin;foo=bar" + /// - "zh-u-co-pinyin" + /// + /// Returns None if no collation is present or language is invalid. + pub fn parse_mac_collation_locale(input: &str) -> Option<(String, String)> { + if input.is_empty() { + return None; + } + + // 1) Split off any "@..." suffix first (Apple legacy syntax uses @collation=...) + let (before_at, after_at_opt) = match input.split_once('@') { + Some((head, tail)) => (head, Some(tail)), + None => (input, None), + }; + + // 2) Extract language subtag from the head (before '@'): first token before '-' or '_' + let lang = before_at + .split(['-', '_']) + .next() + .map(|s| s.to_ascii_lowercase()) + .filter(|s| !s.is_empty() && s.chars().all(|c| c.is_ascii_alphabetic())) + .filter(|s| (2..=8).contains(&s.len()))?; // permit 2–8 alpha per BCP47 + + // Helper to validate and normalize collation value + fn norm_co(s: &str) -> Option { + let t = s.to_ascii_lowercase(); + if t.is_empty() { + return None; + } + // Allow a–z and hyphen (e.g., "radical-stroke") + if t.chars().all(|c| c.is_ascii_lowercase() || c == '-') { + Some(t) + } else { + None + } + } + + // 3a) Try legacy "@collation=" form + if let Some(after_at) = after_at_opt { + if let Some(rest) = after_at.strip_prefix("collation=") { + let co = rest + .split([';', '@']) // stop at next key or stray '@' + .next() + .and_then(norm_co); + if let Some(co) = co { + return Some((lang, co)); + } + } + // If "@..." present but not collation, fall through to try -u- parsing below + } + + // 3b) Try BCP47 U-extension with "co" + // Look for "-u-" then scan for "-co-" + let lower = input.to_ascii_lowercase(); + if let Some(u_pos) = lower.find("-u-") { + let tail = &lower[u_pos + 3..]; // after "-u-" + let mut it = tail.split('-'); + while let Some(k) = it.next() { + if k == "co" { + if let Some(v) = it.next() { + if let Some(co) = norm_co(v) { + return Some((lang, co)); + } + } + break; + } + // Skip possible multi-part values for other keys; for co it's single + } + } + + None + } + + unsafe { + let key = CFString::new("AppleCollationOrder"); + let domain = CFString::new(".GlobalPreferences"); + let val = CFPreferencesCopyValue( + key.as_concrete_TypeRef(), + domain.as_concrete_TypeRef(), + kCFPreferencesCurrentUser, + kCFPreferencesAnyHost, + ); + if !val.is_null() { + let cf = CFString::wrap_under_get_rule(val as CFStringRef); + let s = cf.to_string(); + if let Some((lang, co)) = parse_mac_collation_locale(&s) { + return Ok(Some((lang, co))); + } + // Some locales like "pl" carry no @collation; ignore and fall through + } + } + + Ok(None) + } +} + +/// RAII wrapper for CFLocaleRef to ensure proper cleanup +struct CFLocaleWrapper(CFLocaleRef); + +impl CFLocaleWrapper { + fn new() -> Option { + // SAFETY: CFLocaleCopyCurrent returns an owned CFLocaleRef that we must release. + let locale = unsafe { CFLocaleCopyCurrent() }; + if locale.is_null() { + None + } else { + Some(CFLocaleWrapper(locale)) + } + } + + fn as_ref(&self) -> CFLocaleRef { + self.0 + } +} + +impl Drop for CFLocaleWrapper { + fn drop(&mut self) { + // SAFETY: We own the locale reference and must release it. + unsafe { CFRelease(self.0 as _) }; + } +} + +/// Helper function to reduce duplication when working with current locale. +/// Handles the common pattern of getting current locale, using it, and releasing it. +fn with_current_locale(f: F) -> Result, HostInfoError> +where + F: FnOnce(CFLocaleRef) -> Option, +{ + let locale = CFLocaleWrapper::new(); + Ok(locale.and_then(|loc| f(loc.as_ref()))) +} + +/// Converts a CFStringRef to a Rust String. +/// Returns None if the CFStringRef is null or conversion fails. +fn cfstring_to_string(cf_string: CFStringRef) -> Option { + if cf_string.is_null() { + return None; + } + + // SAFETY: cf_string is non-null as verified above. + unsafe { + // Try the fast path first - get direct pointer to UTF-8 data + let direct_ptr = CFStringGetCStringPtr(cf_string, kCFStringEncodingUTF8); + if !direct_ptr.is_null() { + // SAFETY: CFStringGetCStringPtr returned non-null, so it points to valid UTF-8 data. + return CStr::from_ptr(direct_ptr as *const c_char) + .to_str() + .ok() + .map(str::to_owned); + } + + // Fall back to copying the string data + let length = core_foundation_sys::string::CFStringGetLength(cf_string); + let max_size = CFStringGetMaximumSizeForEncoding(length, kCFStringEncodingUTF8) + 1; + + // Use stack buffer for small strings to avoid heap allocation + const STACK_BUFFER_SIZE: usize = 256; + let mut stack_buffer = [0u8; STACK_BUFFER_SIZE]; + + if max_size <= STACK_BUFFER_SIZE as isize { + // SAFETY: stack_buffer has sufficient size, cf_string is non-null, + // and kCFStringEncodingUTF8 is a valid encoding. + let success = CFStringGetCString( + cf_string, + stack_buffer.as_mut_ptr() as *mut i8, + STACK_BUFFER_SIZE as isize, + kCFStringEncodingUTF8, + ); + + if success != 0 { + // SAFETY: CFStringGetCString succeeded, so buffer contains valid UTF-8 C string. + return CStr::from_ptr(stack_buffer.as_ptr() as *const c_char) + .to_str() + .ok() + .map(str::to_owned); + } + } else { + // Use heap allocation for larger strings + let mut heap_buffer = vec![0u8; max_size as usize]; + + // SAFETY: heap_buffer has the required size as calculated by CFStringGetMaximumSizeForEncoding, + // cf_string is non-null, and kCFStringEncodingUTF8 is a valid encoding. + let success = CFStringGetCString( + cf_string, + heap_buffer.as_mut_ptr() as *mut i8, + max_size, + kCFStringEncodingUTF8, + ); + + if success != 0 { + // SAFETY: CFStringGetCString succeeded, so buffer contains valid UTF-8 C string. + return CStr::from_ptr(heap_buffer.as_ptr() as *const c_char) + .to_str() + .ok() + .map(str::to_owned); + } + } + + None + } +} + +#[cfg(test)] +mod tests { + use crate::backends::{macos::MacOSHostInfoBackend, RawHostInfoBackend}; + use icu_locale_core::Locale; + + #[test] + fn test_get_raw_locales() { + let locales_res = MacOSHostInfoBackend::raw_requested_locales(); + match locales_res { + Ok(locales) => { + for locale in locales { + assert!(!locale.is_empty(), "Empty locale retrieved"); + assert!(locale.is_ascii(), "Invalid form of locale retrieved"); + } + } + Err(e) => { + panic!("{e:?}") + } + } + } + + #[test] + fn test_converting_locales() { + let locales = MacOSHostInfoBackend::raw_requested_locales().unwrap(); + for locale in locales { + let _loc: Locale = locale.parse().unwrap(); + } + } + + #[test] + fn test_calendar() { + let calendar = MacOSHostInfoBackend::raw_calendar().unwrap(); + assert!(calendar.is_some(), "Couldn't retrieve calendar"); + assert!( + calendar.unwrap().is_ascii(), + "Calendar identifier form is not valid" + ); + } +} diff --git a/utils/host_info/src/backends/mod.rs b/utils/host_info/src/backends/mod.rs new file mode 100644 index 00000000000..a11d5fc4b79 --- /dev/null +++ b/utils/host_info/src/backends/mod.rs @@ -0,0 +1,280 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Per-host implementations for `HostInfo`. +//! +//! This module contains traits implemented for per-host backends. +//! +//! When compiling for any given host architecture, the developer +//! may access the per-backend implementation via `icu_host_info::backends::{arch}::{Arch}HostInfoBackend`. +//! +//! # RawHostInfoBackend +//! +//! This trait provides low level implementation of per-host bindings to retrieve regional preferences in their +//! original form. +//! +//! # HostInfoBackend +//! +//! This trait provides high level implementation of per-host bindings to convert raw values into their ICU4X +//! types. +use std::str::FromStr; + +use icu_locale_core::{ + extensions::unicode::{self, key, Unicode, Value}, + preferences::extensions::unicode::keywords::{ + CalendarAlgorithm, CollationType, FirstDay, HourCycle, MeasurementSystem, + MeasurementUnitOverride, + }, + subtags::{Language, Region}, + Locale, +}; + +use crate::error::HostInfoError; + +#[cfg(target_os = "android")] +#[doc(hidden)] +pub mod android; + +#[cfg(target_os = "ios")] +#[doc(hidden)] +pub mod macos; + +#[cfg(target_os = "linux")] +#[doc(hidden)] +pub mod linux; + +#[cfg(target_os = "macos")] +#[doc(hidden)] +pub mod macos; + +#[cfg(target_os = "windows")] +#[doc(hidden)] +pub mod windows; + +#[cfg(not(any( + target_os = "android", + target_os = "ios", + target_os = "linux", + target_os = "macos", + target_os = "windows" +)))] +#[doc(hidden)] +mod unavailable; + +/// High level implementation of per-host bindings to convert raw values into their ICU4X types. +pub trait HostInfoBackend: RawHostInfoBackend { + /// The implementation should attempt to collect all relevant regional preferences available in the given + /// host environment into a unicode extensions bag. + fn unicode_extensions() -> Result { + let mut result = Unicode::new(); + if let Some(calendar) = Self::calendar()? { + result.keywords.set(key!("ca"), calendar.into()); + } + if let Some(hc) = Self::hour_cycle()? { + result.keywords.set(key!("hc"), hc.into()); + } + if let Some(ms) = Self::measurement_system()? { + result.keywords.set(key!("ms"), ms.into()); + } + if let Some(mu) = Self::measurement_unit_override()? { + result.keywords.set(key!("mu"), mu.into()); + } + if let Some(fw) = Self::first_day_of_week()? { + result.keywords.set(key!("fw"), fw.into()); + } + if let Some((_lang, co)) = Self::collation()? { + result.keywords.set(key!("co"), co.into()); + } + if let Some(rg) = Self::region()? { + let mut rg_str = rg.to_string(); + rg_str.push_str("zzzz"); + if let Ok(value) = Value::try_from_str(&rg_str) { + result.keywords.set(key!("rg"), value); + } + } + Ok(result) + } + + /// The implementation should attempt to retrieve date/time related regional preferences and collect + /// them into `DateTimeFormatterPreferences` bag. + #[cfg(feature = "datetime")] + fn datetime_preferences() -> Result { + use icu_locale_core::Locale; + + let requested_locales = Self::requested_locales()?; + let requested_locale = requested_locales + .first() + .cloned() + .unwrap_or(Locale::UNKNOWN); + let mut result = icu_datetime::DateTimeFormatterPreferences::from(requested_locale); + result.numbering_system = None; + result.hour_cycle = Self::hour_cycle()?; + result.calendar_algorithm = Self::calendar()?; + Ok(result) + } + + /// The implementation should attempt to retrieve requested locales set by the user in the host system. + fn requested_locales() -> Result, HostInfoError> { + Ok(Self::raw_requested_locales()? + .into_iter() + .filter_map(|s| Locale::try_from_str(&s).ok()) + .collect()) + } + + /// The implementation should attempt to retrieve calendar set by the user in the host system. + fn calendar() -> Result, HostInfoError> { + Ok(Self::raw_calendar()? + .and_then(|raw| unicode::Value::from_str(&raw).ok()) + .and_then(|value| CalendarAlgorithm::try_from(&value).ok())) + } + + /// The implementation should attempt to retrieve region set by the user in the host system. + fn region() -> Result, HostInfoError> { + Ok(Self::raw_region()?.and_then(|raw| Region::try_from_str(&raw).ok())) + } + + /// The implementation should attempt to retrieve hour_cycle set by the user in the host system. + fn hour_cycle() -> Result, HostInfoError> { + Ok(Self::raw_hour_cycle()? + .and_then(|raw| unicode::Value::from_str(&raw).ok()) + .and_then(|value| HourCycle::try_from(&value).ok())) + } + + /// The implementation should attempt to retrieve measurement system set by the user in the host system. + fn measurement_system() -> Result, HostInfoError> { + Ok(Self::raw_measurement_system()? + .and_then(|raw| unicode::Value::from_str(&raw).ok()) + .and_then(|value| MeasurementSystem::try_from(&value).ok())) + } + + /// The implementation should attempt to retrieve measurement unit override set by the user in the host system. + fn measurement_unit_override() -> Result, HostInfoError> { + Ok(Self::raw_measurement_unit_override()? + .and_then(|raw| unicode::Value::from_str(&raw).ok()) + .and_then(|value| MeasurementUnitOverride::try_from(&value).ok())) + } + + /// The implementation should attempt to retrieve first day of week set by the user in the host system. + fn first_day_of_week() -> Result, HostInfoError> { + Ok(Self::raw_first_day_of_week()? + .and_then(|raw| unicode::Value::from_str(&raw).ok()) + .and_then(|value| FirstDay::try_from(&value).ok())) + } + + /// The implementation should attempt to retrieve collation set by the user in the host system. + fn collation() -> Result, HostInfoError> { + Ok(Self::raw_collation()?.and_then(|(raw_lang, raw_col)| { + unicode::Value::from_str(&raw_col) + .ok() + .and_then(|col| CollationType::try_from(&col).ok()) + .and_then(|col| Language::from_str(&raw_lang).ok().map(|lang| (lang, col))) + })) + } +} + +/// Low level implementation of per-host bindings to retrieve regional preferences in their original form. +/// +/// As per library design, the implementations should attempt to return `None` in scenarios where user +/// did not explicitly set a value for any of the preferences. +/// For example, if the user set `en-US` as their preferred locale, and did not manually set `HourCycle` +/// to any value, the host API may return hour cycle default value for en-US. +/// If possible, the implementation should attempt to distinguish between explicity set value that matches +/// default for a given locale, from lack of explicit value set. +/// +/// If that is not possible, the API should return the value retrieved from the system for each field getter. +/// +/// The goal is to avoid constructing a `en-US-hc-h12` locale in a scenario where the user set their locale to `en-US` +/// but did not explicitly define hour cycle preference, and the `h12` value is just a default for `en-US`. +/// This becomes impactful when locale negotiation results in the system picking one of the fallback locales, and +/// needs to determine if it should follow its regional preferences, or take some from the host system. +/// For example, if the user set `["en-US", "de-DE"]` as their requested locales, and the host API returns `h12` for +/// the hour cycle getter, it may be problematic to not know if this is explicit preference of the user, or default +/// for `en-US`. As a result, it may become challenging to decide if `h12` should be used even if `de-DE` is being negotiated +/// as the locale for the given application. +pub trait RawHostInfoBackend { + /// Attempt to retrieve a list of locales set in the host regional preferences as requested by the user. + /// + /// The list is ordered and should contain locales explicitly requested by the user, with an empty + /// list being a valid response in case no locale has been set by the user, or the backend cannot retrieve any. + fn raw_requested_locales() -> Result, HostInfoError> { + Ok(vec![]) + } + + /// Attempt to retrieve calendar system set in the host regional preferences by the user. + fn raw_calendar() -> Result, HostInfoError> { + Ok(None) + } + + /// Attempt to retrieve region set in the host regional preferences by the user. + fn raw_region() -> Result, HostInfoError> { + Ok(None) + } + + /// Attempt to retrieve hour cycle set in the host regional preferences by the user. + fn raw_hour_cycle() -> Result, HostInfoError> { + Ok(None) + } + + /// Attempt to retrieve measurement system set in the host regional preferences by the user. + fn raw_measurement_system() -> Result, HostInfoError> { + Ok(None) + } + + /// Attempt to retrieve measurement unut override set in the host regional preferences by the user. + /// + /// This should retrieve `temperature` unit. + fn raw_measurement_unit_override() -> Result, HostInfoError> { + Ok(None) + } + + /// Attempt to retrieve first day of week option set in the host regional preferences by the user. + fn raw_first_day_of_week() -> Result, HostInfoError> { + Ok(None) + } + + /// Attempt to retrieve collation set in the host regional preferences by the user. + fn raw_collation() -> Result, HostInfoError> { + Ok(None) + } + + /// Attempt to retrieve customized date format set in the host regional preferences by the user. + fn raw_date_format() -> Result, HostInfoError> { + Ok(None) + } + + /// Attempt to retrieve customized number format set in the host regional preferences by the user. + fn raw_number_format() -> Result, HostInfoError> { + Ok(None) + } +} + +#[cfg(target_os = "android")] +#[doc(hidden)] +pub(crate) type Impl = android::AndroidHostInfoBackend; + +#[cfg(target_os = "ios")] +#[doc(hidden)] +pub(crate) type Impl = macos::MacOSHostInfoBackend; + +#[cfg(target_os = "linux")] +#[doc(hidden)] +pub(crate) type Impl = linux::LinuxHostInfoBackend; + +#[cfg(target_os = "macos")] +#[doc(hidden)] +pub(crate) type Impl = macos::MacOSHostInfoBackend; + +#[cfg(target_os = "windows")] +#[doc(hidden)] +pub(crate) type Impl = windows::WindowsHostInfoBackend; + +#[cfg(not(any( + target_os = "android", + target_os = "ios", + target_os = "linux", + target_os = "macos", + target_os = "windows" +)))] +#[doc(hidden)] +pub(crate) type Impl = unavailable::UnavailableHostInfoBackend; diff --git a/utils/host_info/src/backends/unavailable.rs b/utils/host_info/src/backends/unavailable.rs new file mode 100644 index 00000000000..c7ee94ec84f --- /dev/null +++ b/utils/host_info/src/backends/unavailable.rs @@ -0,0 +1,14 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::{ + backends::{HostInfoBackend, RawHostInfoBackend}, + error::HostInfoError, +}; + +pub struct UnavailableHostInfoBackend; + +impl HostInfoBackend for UnavailableHostInfoBackend {} + +impl RawHostInfoBackend for UnavailableHostInfoBackend {} diff --git a/utils/host_info/src/backends/windows.rs b/utils/host_info/src/backends/windows.rs new file mode 100644 index 00000000000..cfef10fc488 --- /dev/null +++ b/utils/host_info/src/backends/windows.rs @@ -0,0 +1,182 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::{ + backends::{HostInfoBackend, RawHostInfoBackend}, + error::HostInfoError, + locale::WindowsLocale, +}; +use icu_locale_core::{ + extensions::unicode, preferences::extensions::unicode::keywords::CalendarAlgorithm, Locale, +}; +use std::str::FromStr; + +pub struct WindowsHostInfoBackend; + +impl HostInfoBackend for WindowsHostInfoBackend { + fn requested_locales() -> Result, HostInfoError> { + Ok(Self::raw_requested_locales()? + .into_iter() + .filter_map(|s| { + WindowsLocale::try_from_str(&s) + .map_err(|_| HostInfoError::HostLocaleError) + .and_then(|wl| Locale::try_from(wl).map_err(Into::into)) + .ok() + }) + .collect()) + } + + fn calendar() -> Result, HostInfoError> { + Ok(Self::raw_calendar()? + .and_then(|raw| { + let canonical = match raw.as_str() { + "GregorianCalendar" => "gregory", + "JapaneseCalendar" => "japanese", + "TaiwanCalendar" => "roc", + "KoreanCalendar" => "dangi", + "HebrewCalendar" => "hebrew", + "HijriCalendar" => "islamic", + "UmmAlQuraCalendar" => "islamic-umalqura", + "PersianCalendar" => "persian", + "ThaiCalendar" => "buddhist", + "JulianCalendar" => "julian", + r => r, + }; + unicode::Value::from_str(canonical).ok() + }) + .and_then(|value| CalendarAlgorithm::try_from(&value).ok())) + } +} + +impl RawHostInfoBackend for WindowsHostInfoBackend { + fn raw_region() -> Result, HostInfoError> { + let region = + windows::System::UserProfile::GlobalizationPreferences::HomeGeographicRegion()?; + let s = region.to_string_lossy(); + if s.is_empty() { + Ok(None) + } else { + Ok(Some(s)) + } + } + + fn raw_requested_locales() -> Result, HostInfoError> { + let locale = windows::System::UserProfile::GlobalizationPreferences::Languages()?; + let len = locale.Size()?; + + let mut locale_vec_str: Vec = Vec::with_capacity(len as usize); + + for i in 0..len { + let hstring = locale.GetAt(i)?; + let string = hstring.to_string_lossy(); + locale_vec_str.push(string); + } + Ok(locale_vec_str) + } + + fn raw_calendar() -> Result, HostInfoError> { + let calendar = ::windows::Globalization::Calendar::new()?; + let system_calendar = ::windows::Globalization::Calendar::GetCalendarSystem(&calendar)?; + let calendar_type: String = system_calendar.to_string(); + Ok(Some(calendar_type)) + } + + fn raw_first_day_of_week() -> Result, HostInfoError> { + Ok( + match ::windows::System::UserProfile::GlobalizationPreferences::WeekStartsOn()?.0 { + 0 => Some("sun".to_string()), + 1 => Some("mon".to_string()), + 2 => Some("tue".to_string()), + 3 => Some("wed".to_string()), + 4 => Some("thu".to_string()), + 5 => Some("fri".to_string()), + 6 => Some("sat".to_string()), + _ => None, + }, + ) + } +} + +#[cfg(test)] +mod tests { + use crate::backends::{windows::WindowsHostInfoBackend, RawHostInfoBackend}; + use crate::locale::WindowsLocale; + use icu_locale_core::Locale; + use std::sync::{LazyLock, Mutex}; + use windows::core::{BOOL, PCWSTR}; + use windows::Win32::{ + Foundation::LPARAM, + Globalization::{EnumSystemLocalesEx, LOCALE_ALL}, + }; + + // Since [`EnumSystemLocalesEx`] iterates using a callback with no obvious (safe) way to return data, + // store them in this static instead. Since this is only a single test with roughly 1,000 items, + // it shouldn't be much of a concern. + static LOCALES: LazyLock>> = LazyLock::new(|| Mutex::new(Vec::new())); + + /// Callback provided to the [`EnumSystemLocalesEx`] to enumerate over locales. + unsafe extern "system" fn callback( + locale_name: PCWSTR, + _locale_flags: u32, + _callback_parameter: LPARAM, + ) -> BOOL { + // SAFETY: caller is the [`EnumSystemLocalesEx`] function, which guarantees a valid null-terminated string + let locale_name = unsafe { locale_name.to_string() }.unwrap(); + + // Skip empty locale 0x007F, marked as "Reserved for invariant locale behavior" + // Source: MS-LCID version 16.0, page 13 (section 2.2 under "Language ID" table) + if !locale_name.is_empty() { + LOCALES.lock().unwrap().push(locale_name); + } + + // Tell [`EnumSystemLocalesEx`] to continue enumeration + BOOL::from(true) + } + + /// Enumerate over all Windows locales, and make sure [`WindowsLocale`] can parse it without any (direct) errors. + #[test] + fn system_locales() -> windows::core::Result<()> { + // Find the list of supported locales, using the [`EnumSystemLocalesEx`] API: + // https://learn.microsoft.com/en-us/windows/win32/api/winnls/nf-winnls-enumsystemlocalesex + // SAFETY: a valid function pointer is provided and lpReserved is set to NULL/None as required + unsafe { + EnumSystemLocalesEx(Some(callback), LOCALE_ALL, LPARAM::default(), None)?; + } + + // Get the list of locales which the callback has been modifying + let locales = LOCALES.lock().unwrap(); + + // Make sure [`WindowsLocale`] can parse without any obvious issues + for locale in locales.iter() { + let windows_locale = WindowsLocale::try_from_str(locale).expect(locale); + Locale::try_from(windows_locale).expect(locale); + } + + Ok(()) + } + + #[test] + fn test_get_raw_requested_locales() { + let locales = WindowsHostInfoBackend::raw_requested_locales().unwrap(); + for locale in locales { + assert!(!locale.is_empty(), "Empty locale retrieved"); + assert!(locale.is_ascii(), "Invalid form of locale retrieved"); + } + } + + #[test] + fn test_converting_locales() { + let locales = WindowsHostInfoBackend::raw_requested_locales().unwrap(); + for locale in locales { + let _converted_locale: Locale = locale.parse().unwrap(); + } + } + + #[test] + fn test_calendar() { + let calendar = WindowsHostInfoBackend::raw_calendar().unwrap().unwrap(); + assert!(!calendar.is_empty(), "Calendar identifier is empty"); + assert!(calendar.is_ascii(), "Calendar identifier form is not valid"); + } +} diff --git a/utils/host_info/src/error.rs b/utils/host_info/src/error.rs new file mode 100644 index 00000000000..ce11a04d6b8 --- /dev/null +++ b/utils/host_info/src/error.rs @@ -0,0 +1,64 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use displaydoc::Display; +use icu_locale_core::ParseError; +use std::error::Error; +use std::{ffi::FromVecWithNulError, str::Utf8Error}; + +/// An error encountered while retrieving the host information +#[derive(Debug, Display)] +pub enum HostInfoError { + #[displaydoc("Error converting into `&CStr` to `&str`")] + Conversion(Utf8Error), + + #[displaydoc("Error creating a `CString` from a buffer with a null terminator")] + FromVecWithNul(FromVecWithNulError), + + #[displaydoc("No backend matching backend have been identified")] + UnavailableBackend, + + #[displaydoc("Unknown category when retrieving locale category for linux")] + UnknownCategory, + + #[cfg(target_os = "windows")] + #[displaydoc("Windows error: {0}")] + Windows(windows::core::Error), + + #[displaydoc("Host locale parsing error")] + HostLocaleError, + + #[displaydoc("Failed to parse region")] + UnknownRegion, + + #[displaydoc("Failed to parse a locale: {0}")] + LocaleParse(ParseError), +} + +impl Error for HostInfoError {} + +impl From for HostInfoError { + fn from(input: Utf8Error) -> Self { + Self::Conversion(input) + } +} + +impl From for HostInfoError { + fn from(input: FromVecWithNulError) -> Self { + Self::FromVecWithNul(input) + } +} + +#[cfg(target_os = "windows")] +impl From for HostInfoError { + fn from(input: windows::core::Error) -> Self { + Self::Windows(input) + } +} + +impl From for HostInfoError { + fn from(input: ParseError) -> Self { + Self::LocaleParse(input) + } +} diff --git a/utils/host_info/src/host_info.rs b/utils/host_info/src/host_info.rs new file mode 100644 index 00000000000..815957f8d2e --- /dev/null +++ b/utils/host_info/src/host_info.rs @@ -0,0 +1,243 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use icu_locale_core::{ + extensions::unicode::Unicode, + preferences::extensions::unicode::keywords::{ + CalendarAlgorithm, CollationType, FirstDay, HourCycle, MeasurementSystem, + MeasurementUnitOverride, + }, + subtags::{Language, Region}, + Locale, +}; + +use crate::{ + backends::{self, HostInfoBackend}, + error::HostInfoError, +}; + +use super::HostKind; + +pub const RESOLVED_BACKEND: Option = { + #[cfg(target_os = "android")] + { + Some(HostKind::Android) + } + #[cfg(target_os = "ios")] + { + Some(HostKind::Ios) + } + #[cfg(target_os = "linux")] + { + Some(HostKind::Linux) + } + #[cfg(target_os = "macos")] + { + Some(HostKind::MacOS) + } + #[cfg(target_os = "windows")] + { + Some(HostKind::Windows) + } + #[cfg(not(any( + target_os = "android", + target_os = "ios", + target_os = "linux", + target_os = "macos", + target_os = "windows" + )))] + { + None + } +}; + +/// Provides getters for common regional preferences from the host environment. +/// +/// # Example +/// +/// ```ignore +/// use icu_host_info::HostInfo; +/// use icu::calendar::Date; +/// use icu::datetime::{fieldsets, DateTimeFormatter}; +/// +/// let date = Date::try_new_gregorian(2025, 10, 10) +/// .expect("Failed to create date"); +/// +/// // requires feature `datetime` +/// let prefs = HostInfo::datetime_preferences() +/// .expect("Failed to retrieve host info"); +/// +/// let dtf = DateTimeFormatter::try_new(prefs, fieldsets::YMD::long()) +/// .expect("Failed to create datetime formatter."); +/// +/// let formatted_dt = dtf.format(&date); +/// +/// assert_eq!(formatted_dt.to_string(), "October 10, 2025"); +/// ``` +pub struct HostInfo; + +impl HostInfo { + /// Retrieves `Unicode` extensions struct populated from host regional preferences. + /// + /// # Example + /// + /// ``` + /// use icu_host_info::HostInfo; + /// + /// let ue = HostInfo::unicode_extensions() + /// .expect("Failed to retrieve host info"); + /// ``` + pub fn unicode_extensions() -> Result { + backends::Impl::unicode_extensions() + } + + /// Retrieves `Preferences` object for `DateTimeFormatter`. + /// + /// # Example + /// + /// ``` + /// use icu_host_info::HostInfo; + /// + /// let ue = HostInfo::datetime_preferences() + /// .expect("Failed to retrieve datetime preferences"); + /// ``` + #[cfg(feature = "datetime")] + pub fn datetime_preferences( + ) -> Result { + backends::Impl::datetime_preferences() + } + + /// Retrieves an ordered list of locales set as requested by the user in the host + /// environment regional preferences. + /// + /// # Example + /// + /// ``` + /// use icu_host_info::HostInfo; + /// + /// let locales = HostInfo::requested_locales() + /// .expect("Failed to retrieve requested locales"); + /// ``` + pub fn requested_locales() -> Result, HostInfoError> { + backends::Impl::requested_locales() + } + + /// Retrieves a calendar preference. + /// + /// In `::unicode_extensions()` this field is being encoded as `ca`. + /// + /// # Example + /// + /// ``` + /// use icu_host_info::HostInfo; + /// + /// let region = HostInfo::calendar() + /// .expect("Failed to retrieve calendar"); + /// ``` + pub fn calendar() -> Result, HostInfoError> { + backends::Impl::calendar() + } + + /// Retrieves a region set in the host environment regional preferences. + /// + /// That region may be already populated into `requested_locales` or not, depending + /// on the host. + /// In `::unicode_extensions()` this field is being encoded as `rg`. + /// + /// # Example + /// + /// ``` + /// use icu_host_info::HostInfo; + /// + /// let region = HostInfo::region() + /// .expect("Failed to retrieve region"); + /// ``` + pub fn region() -> Result, HostInfoError> { + backends::Impl::region() + } + + /// Retrieves an hour_cycle preference. + /// + /// In `::unicode_extensions()` this field is being encoded as `hc`. + /// + /// # Example + /// + /// ``` + /// use icu_host_info::HostInfo; + /// + /// let region = HostInfo::hour_cycle() + /// .expect("Failed to retrieve hour cycle"); + /// ``` + pub fn hour_cycle() -> Result, HostInfoError> { + backends::Impl::hour_cycle() + } + + /// Retrieves a measurement system preference. + /// + /// In `::unicode_extensions()` this field is being encoded as `ms`. + /// + /// # Example + /// + /// ``` + /// use icu_host_info::HostInfo; + /// + /// let region = HostInfo::calendar() + /// .expect("Failed to retrieve calendar"); + /// ``` + pub fn measurement_system() -> Result, HostInfoError> { + backends::Impl::measurement_system() + } + + /// Retrieves a first day of week preference. + /// + /// In `::unicode_extensions()` this field is being encoded as `fd`. + /// + /// # Example + /// + /// ``` + /// use icu_host_info::HostInfo; + /// + /// let region = HostInfo::first_day_of_week() + /// .expect("Failed to retrieve first day of week"); + /// ``` + pub fn first_day_of_week() -> Result, HostInfoError> { + backends::Impl::first_day_of_week() + } + + /// Retrieves a collation preference. + /// + /// In `::unicode_extensions()` this field is being encoded as `co`. + /// + /// # Example + /// + /// ``` + /// use icu_host_info::HostInfo; + /// + /// let region = HostInfo::collation() + /// .expect("Failed to retrieve collation"); + /// ``` + pub fn collation() -> Result, HostInfoError> { + backends::Impl::collation() + } + + /// Retrieves measurement unit override preference. + /// + /// In `::unicode_extensions()` this field is being encoded as `mu`. + /// + /// # Example + /// + /// ``` + /// use icu_host_info::HostInfo; + /// + /// let region = HostInfo::measurement_unit_override() + /// .expect("Failed to retrieve measurement unit override"); + /// ``` + pub fn measurement_unit_override() -> Result, HostInfoError> { + backends::Impl::measurement_unit_override() + } + + pub fn resolved_backend() -> Option { + RESOLVED_BACKEND + } +} diff --git a/utils/host_info/src/lib.rs b/utils/host_info/src/lib.rs new file mode 100644 index 00000000000..9aae370f107 --- /dev/null +++ b/utils/host_info/src/lib.rs @@ -0,0 +1,301 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! # host_info +//! +//! `host_info` is a library providing functionality to retrieve regional preferences +//! from host environments - primarily the operating system the program is running in. +//! +//! The library is designed to bind the different host environment preferences architectures +//! to ICU4X model. +//! +//! # Example +//! +//! ```ignore +//! use icu_host_info::HostInfo; +//! use icu::calendar::Date; +//! use icu::datetime::{fieldsets, DateTimeFormatter}; +//! +//! let date = Date::try_new_gregorian(2025, 10, 10) +//! .expect("Failed to create date"); +//! +//! // requires feature `datetime` +//! let prefs = HostInfo::datetime_preferences() +//! .expect("Failed to retrieve host info"); +//! +//! let dtf = DateTimeFormatter::try_new(prefs, fieldsets::YMD::long()) +//! .expect("Failed to create datetime formatter."); +//! +//! let formatted_dt = dtf.format(&date); +//! +//! assert_eq!(formatted_dt.to_string(), "October 10, 2025"); +//! ``` +//! +//! # Feature Matrix +//! +//! The library intends to provide means to retrieve regional preferences +//! to ICU4X preferences with a focus on Unicode Extensions, but allow for +//! propagation of preferences offered by the host environments which may +//! not have a representation in Unicode Extensions (for example: date format pattern). +//! +//! Legend: +//! - ✅ = OS + `host_info` support +//! - ⚠️ = OS supports, `host_info` doesn't +//! - ❌ = OS doesn't supported +//! +//! | Feature | Android | iOS | Linux (1) | macOS | Windows | +//! |---------------------| :-----: | :-: | :------------------: | :---: | :-----: | +//! | Requested Locales | ✅ | ✅ | ✅ | ✅ | ✅ | +//! | Calendar | ⚠️ | ⚠️ | ⚠️ | ✅ | ✅ | +//! | Region | ⚠️ | ⚠️ | ⚠️ | ✅ | ✅ | +//! | Hour cycle | ⚠️ | ⚠️ | ✅ | ✅ | ⚠️ | +//! | Measurement System | ⚠️ | ⚠️ | ⚠️ | ✅ | ⚠️ | +//! | Measurement Override| ⚠️ | ⚠️ | ⚠️ | ✅ | ⚠️ | +//! | First Day of week | ⚠️ | ⚠️ | ⚠️ | ✅ | ✅ | +//! | Collation | ⚠️ | ⚠️ | ⚠️ | ✅ | ❌ | +//! | Date format | ⚠️ | ⚠️ | ⚠️ | ⚠️ | ⚠️ | +//! | Number format | ⚠️ | ⚠️ | ⚠️ | ⚠️ | ⚠️ | +//! +//! (1) In case of Linux different DE's such as Gnoem and KDE are supported together. +//! +//! # Integrating preferences into ICU4X formatters +//! +//! The library provides three ways of injecting retrieved values into formatters: +//! +//! ## 1. Preference Bag +//! +//! For most common components, such as `DateTimeFormatter`, the library exposes +//! a direct getter that retrieves a `Preferences` struct for that component. +//! This getter is located behind a flag to allow for control over which dependencies are being +//! pulled. +//! +//! ### Example +//! +//! ```ignore +//! use icu_host_info::HostInfo; +//! use icu::datetime::{fieldsets, DateTimeFormatter}; +//! +//! // requires feature `datetime` +//! let prefs = HostInfo::datetime_preferences() +//! .expect("Failed to retrieve host info"); +//! +//! let dtf = DateTimeFormatter::try_new(prefs, fieldsets::YMD::long()) +//! .expect("Failed to create datetime formatter."); +//! ``` +//! +//! ## 2. Locale +//! +//! For all components that `HostInfo` does not have special preference getter for, +//! and for cases where the user prefers to avoid pulling extra dependencies at the cost +//! of narrowing down the retrieved values to just ones encoded in Unicode Extensions, +//! the library provides an ergonomic getter: +//! +//! ### Example +//! +//! ``` +//! use icu_host_info::HostInfo; +//! use icu::{ +//! datetime::{fieldsets, DateTimeFormatter}, +//! locale::Locale, +//! }; +//! +//! let mut locale = HostInfo::requested_locales() +//! .expect("Failed to retrieve locales") +//! .first() +//! .cloned() +//! .unwrap_or(Locale::UNKNOWN); +//! +//! locale.extensions.unicode = HostInfo::unicode_extensions() +//! .expect("Failed to retrieve host info"); +//! +//! let dtf = DateTimeFormatter::try_new(locale.into(), fieldsets::YMD::long()) +//! .expect("Failed to create datetime formatter."); +//! ``` +//! +//! Notice that the regional preferences encoded in Unicode Extensions +//! are retrieved separately from the list of requested locales. +//! There are two reasons for this design: +//! - The user has to decide whether the regional preferences apply onto all locales, or just the first one +//! - The locale negotiation may result in a different locale being selected. +//! +//! ## 3. Individual Preferences +//! +//! For each preference the library also attempts to provide a direct getter +//! allowing the user to retrieve just that preference and use it as they see fit. +//! +//! ### Example +//! +//! ``` +//! use icu_host_info::HostInfo; +//! use icu::locale::preferences::extensions::unicode::keywords::HourCycle; +//! +//! let mut calendar: Option = HostInfo::hour_cycle() +//! .expect("Failed to retrieve hour_cycle preference"); +//! ``` +//! +//! # Locale Negotiation +//! +//! Locale Negotiation is an upcoming feature in ICU4X which will enable the system integrating ICU4X to +//! perform a negotiation between requested locales, and locales for which the data is available in the system. +//! The output of `HostInfo` will be utilized in that negotiation allowing the deployment to 1) select +//! the most appropriate locales for the given user and target modality, 2) apply regional preferences onto that +//! locale. +//! +//! The need to allow `HostInfo` to be pluggable info locale negotiation and multi source merging (see next section) +//! guided many design choices in this library. This section will be extended once locale negotiation is implemented. +//! +//! # Multi Source Merging +//! +//! In simple systems the user will most often use ICU4X to format +//! some information in a selected locale, and use this library to augument +//! the formatting with regional preferences set by the user in the host environment. +//! +//! In more complex systems, the user may also want to introduce a second source of regional preferences +//! and mix the values set in the host environment with those set in the program itself. +//! +//! For example, a web browser may offer some regional preferences set in the browser +//! itself, or even set separate for some contexts of the browser. +//! +//! In those cases, the depoyment requires merging of the preferences. +//! ICU4X exposes an `extend` method on both `Preferences` and `Unicode` extensions struct. +//! +//! This allows the system to retrieve [`HostInfo`] Preferences or `Unicode`, and applications' +//! equivalent, and merge of them. +//! +//! ## `Preferences` Example +//! +//! ```ignore +//! use icu_host_info::HostInfo; +//! use icu::datetime::{fieldsets, DateTimeFormatter}; +//! +//! let app_prefs = app.datetime_preferences(); +//! +//! // requires feature `datetime` +//! let mut combined_prefs = HostInfo::datetime_preferences() +//! .expect("Failed to retrieve host info"); +//! +//! combined_prefs.extend(app_prefs); +//! +//! let dtf = DateTimeFormatter::try_new(combined_prefs, fieldsets::YMD::long()) +//! .expect("Failed to create datetime formatter."); +//! ``` +//! +//! ## `Unicode` Extensions Example +//! +//! ```ignore +//! use icu_host_info::HostInfo; +//! use icu::{ +//! datetime::{fieldsets, DateTimeFormatter}, +//! locale::locale, +//! }; +//! +//! let mut locale = locale!("fr-CA"); +//! +//! let app_ue = app.unicode_extensions(); +//! +//! let mut combined_ue = HostInfo::unicode_extensions() +//! .expect("Failed to retrieve host info"); +//! +//! combined_ue.extend(app_ue); +//! +//! locale.extensions.unicode = combined_ue; +//! +//! let dtf = DateTimeFormatter::try_new(locale.into(), fieldsets::YMD::long()) +//! .expect("Failed to create datetime formatter."); +//! ``` +//! +//! # Design Decisions +//! +//! The library operates on a boundary of diverse set of host +//! environments and uniformal ICU4X design derived from Unicode LDML. +//! It requires a number of design tradeoffs that had to be made in +//! order to achieve the uniformity and scale over time as the host +//! platforms design evolves. +//! +//! ## Host Environment +//! +//! The library is designed to handle retrieval of data from the direct host +//! environment. This usually means an operating system, but it can mean a +//! virtual environment, sandbox or runtime. +//! In such a case it is the responsibility of the execution logic +//! setting up such environment to ensure propagation of customer preferences. +//! +//! ## Lossy Results +//! +//! The library makes best-effort to retrieve the values +//! that can be directly used in ICU4X. As the operating systems, +//! runtimes and ICU4X evolve, there's always a risk of a mismatch. +//! This library makes a design decision to be lossy-by-default. +//! +//! Any value that cannot be directly mapped onto a valid value is ignored +//! and indistinguishable in the ergonomic API from a missing value. +//! +//! Similarly, the API does not distinguish between missing binding logic and unknown value. +//! The assumption is that users of this library are aiming to respect user choices +//! encoded in host environment regional preferences, but are not in a position +//! to act differently on a failed attempt to retrieve them from a missing attempt. +//! Therefore errors in this library are very rare and only related to catastrophic +//! cases like memory corruption or OS API errors propagation. +//! +//! ## Normalized vs Raw values +//! +//! The main API of this library - [`HostInfo`] - provides methods that return normalized +//! values, often directly taken from `icu::locale_core::preferences`. +//! Per-host backends provide additional trait implementation that returns +//! raw values, allowing the user to handle or introspect those values manually. +//! When using `HostInfo`, the library performs best-effort to normalize and parse +//! those raw values into canonical Unicode ICU4X representation, often discarding +//! unknown values and values that fail to parse. +//! +//! Those raw backends are not exposed in the documentation as the documentation. +//! +//! ### Example +//! +//! ```ignore +//! use icu_host_info::backends::{ +//! RawHostInfoBackend, +//! macos::MacOSHostInfoBackend, +//! }; +//! +//! let raw_cal: Option = MacOSHostInfoBackend::raw_calendar() +//! .expect("Failed to retrieve raw calendar"); +//! ``` +//! +//! ## Minimize defaults +//! +//! The library attempts to use host APIs in a way that allows distinguishing between +//! preference values that represent defaults for a given locale, from ones manually set +//! by the user. +//! In some cases, the host API does not allow for distinguishing of that, which may result +//! in overly expressive locales such as `en-US-ca-gregory` (`gregory` being already a default calendar for en-US). +//! +//! This, like other aspects of the library, operates on best-effort basis and may be further improved in the future +//! releases as better bindings become available. +//! +//! ### Host API Design Guidance +//! +//! A note for host API designers - it is useful for foundational libraries such as this to expose APIs that enable us +//! to distinguish between regional preferences values derived by the host from defaults of a locale, from cases +//! when the value is explicitly set by the user. +//! This dinstinction allows ICU4X to better serve in locale negotiations scenario where other-than-first locale may be used +//! and the deployment should respect whether the user set a given preference explicitly or left it to the per-locale default. +//! +pub mod backends; +mod error; +mod host_info; +pub mod locale; +mod posix; + +pub use host_info::HostInfo; + +/// Enumeration of known hosts. +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] +#[non_exhaustive] +pub enum HostKind { + Android, + Ios, + Linux, + MacOS, + Windows, +} diff --git a/utils/host_info/src/locale/mod.rs b/utils/host_info/src/locale/mod.rs new file mode 100644 index 00000000000..d5110a85e61 --- /dev/null +++ b/utils/host_info/src/locale/mod.rs @@ -0,0 +1,14 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Host-specific Locale representations. +//! +//! Some popular host environments provide custom definition of a `Locale`. +//! This module contains APIs allowing for encoding of those variants and their conversion +//! to ICU4X Locale. +pub mod posix; +pub mod windows; + +pub use posix::PosixLocale; +pub use windows::WindowsLocale; diff --git a/utils/host_info/src/locale/posix.rs b/utils/host_info/src/locale/posix.rs new file mode 100644 index 00000000000..c94cde00fc2 --- /dev/null +++ b/utils/host_info/src/locale/posix.rs @@ -0,0 +1,529 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Parsing functionality for POSIX locale identifiers. +//! For more information, see [`PosixLocale`]. +//! +//! # Usage example +//! ``` +//! use icu_locale_core::{Locale, locale}; +//! use icu_host_info::locale::{posix::PosixParseError, PosixLocale}; +//! +//! # fn main() -> Result<(), PosixParseError> { +//! let posix_locale = PosixLocale::try_from_str("en_US.utf8@euro").unwrap(); +//! +//! assert_eq!(Locale::try_from(posix_locale), Ok(locale!("en-US-u-cu-eur"))); +//! # Ok(()) +//! # } +//! ``` + +use displaydoc::Display; +use icu_locale_core::extensions::unicode::{key, value}; +use icu_locale_core::extensions::Extensions; +use icu_locale_core::subtags::{language, script, variant, Language, Region, Variants}; +use icu_locale_core::{locale, LanguageIdentifier, Locale, ParseError}; + +#[derive(Display, Debug, PartialEq)] +/// An error while parsing a POSIX locale identifier +pub enum PosixParseError { + #[displaydoc("Empty locale")] + EmptyLocale, + #[displaydoc("Empty section beginning at offset {offset}")] + EmptySection { offset: usize }, + #[displaydoc("Invalid character at offset {offset}")] + InvalidCharacter { offset: usize }, + #[displaydoc("Invalid locale")] + InvalidLocale, + #[displaydoc("Delimiter repeated at offsets {first_offset} and {second_offset}")] + RepeatedDelimiter { + first_offset: usize, + second_offset: usize, + }, + #[displaydoc("Delimiters found out-of-order at offsets {first_offset} and {second_offset}")] + UnorderedDelimiter { + first_offset: usize, + second_offset: usize, + }, +} + +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)] +enum Delimiter { + Territory, + Codeset, + Modifier, +} + +impl Delimiter { + /// Find any optional sections, returning an error if the delimiters are invalid + pub fn try_find_sections(src: &str) -> Result, PosixParseError> { + // Find the offset and delimiter of each optional section + let optional_sections = src + .chars() + .enumerate() + .flat_map(|(index, character)| match character { + '_' => Some((index, Self::Territory)), + '.' => Some((index, Self::Codeset)), + '@' => Some((index, Self::Modifier)), + _ => None, + }) + .collect::>(); + + // Find any errors in the arrangement of delimiters + for (index, (first_offset, first_delimiter)) in optional_sections.iter().enumerate() { + // Find any repeated delimiters + if let Some((second_offset, _second_delimiter)) = optional_sections + .iter() + // Check all delimiters past this index + .skip(index + 1) + .find(|(_second_offset, second_delimiter)| first_delimiter == second_delimiter) + { + return Err(PosixParseError::RepeatedDelimiter { + first_offset: *first_offset, + second_offset: *second_offset, + }); + } + + // Find any delimiters that have been invalidated by a delimiter that should appear after it + // For example "en.utf8_US" is invalid because codeset appears before territory + if let Some((second_offset, second_delimiter)) = optional_sections.get(index + 1) { + if first_delimiter > second_delimiter { + return Err(PosixParseError::UnorderedDelimiter { + first_offset: *first_offset, + second_offset: *second_offset, + }); + } + } + } + + Ok(optional_sections) + } +} + +#[derive(Debug)] +/// A parsed and validated POSIX locale identifier. +/// +/// Locales are expected to be in the format `language[_territory][.codeset][@modifier]`; +/// only the language section is mandatory, all other sections are optional. +/// For example: +/// - All sections: `en_US.utf8@euro` +/// - Only required sections: `en` +/// +/// See section 8.2 of the POSIX spec for more details: +/// +pub struct PosixLocale<'src> { + language: &'src str, + territory: Option<&'src str>, + codeset: Option<&'src str>, + // TODO: is it possible to have multiple modifiers? + modifier: Option<&'src str>, +} + +impl<'src> PosixLocale<'src> { + /// Attempt to parse a POSIX locale. + pub fn try_from_str(src: &'src str) -> Result { + // These cases are implementation-defined and can be ignored: + // - Empty locales + if src.is_empty() { + return Err(PosixParseError::EmptyLocale); + } + // - Any locale containing '/' + if let Some(offset) = src.find('/') { + return Err(PosixParseError::InvalidCharacter { offset }); + } + // - Locales consisting of "." or ".." + if src == "." || src == ".." { + return Err(PosixParseError::InvalidLocale); + } + + // Find any optional sections, and return any delimiter-related errors + let optional_sections = Delimiter::try_find_sections(src)?; + + // The language field continues until the start of the first optional section, if one exists + let language = match optional_sections.first() { + Some((offset, _delimiter)) => &src[..*offset], + None => src, + }; + + // Make sure the language itself is non-empty + if language.is_empty() { + return Err(PosixParseError::EmptySection { offset: 0 }); + } + + let mut locale = Self { + language, + territory: None, + codeset: None, + modifier: None, + }; + + for (index, (start_offset, delimiter)) in optional_sections.iter().enumerate() { + // Find the offset of the next section, or end of the string if none exist + let end_offset = optional_sections + .get(index + 1) + .map(|(next_offset, _next_delimiter)| *next_offset) + .unwrap_or(src.len()); + + // Make sure this section is non-empty (more characters than just the delimiter) + if start_offset + 1 >= end_offset { + return Err(PosixParseError::EmptySection { + offset: *start_offset, + }); + } + + // Write the section to the appropriate field + let section_value = Some(&src[start_offset + 1..end_offset]); + match delimiter { + Delimiter::Territory => locale.territory = section_value, + Delimiter::Codeset => locale.codeset = section_value, + Delimiter::Modifier => locale.modifier = section_value, + } + } + + Ok(locale) + } +} + +impl<'s> TryFrom> for Locale { + type Error = ParseError; + + fn try_from(input: PosixLocale<'s>) -> Result { + // The default "C"/"POSIX" locale should map to "en-US-posix", + // which is the default behaviour in ICU4C: + // https://github.com/unicode-org/icu/blob/795d7ac82c4b29cf721d0ad62c0b178347d453bf/icu4c/source/common/putil.cpp#L1738 + if input.language == "C" || input.language == "POSIX" { + return Ok(locale!("en-US-posix")); + } + + let mut extensions = Extensions::new(); + let mut script = None; + let mut variant = None; + + // Parse the language/region + let mut language = Language::try_from_str(input.language)?; + let region = input.territory.map(Region::try_from_str).transpose()?; + + if let Some(modifier) = input.modifier { + match modifier.to_ascii_lowercase().as_str() { + "euro" => { + extensions.unicode.keywords.set(key!("cu"), value!("eur")); + } + // Known script modifiers + "cyrillic" => script = Some(script!("Cyrl")), + "devanagari" => script = Some(script!("Deva")), + "latin" => script = Some(script!("Latn")), + // Saaho seems to be the only "legacy variant" that appears as a modifier: + // https://www.unicode.org/reports/tr35/#table-legacy-variant-mappings + "saaho" => language = language!("ssy"), + "valencia" => variant = Some(variant!("valencia")), + // Some modifiers are known but can't be expressed as a BCP-47 identifier + // e.g. "@abegede", "@iqtelif" + _ => (), + } + } + + Ok(Locale { + id: LanguageIdentifier { + language, + region, + script, + variants: variant.map_or_else(Variants::new, Variants::from_variant), + }, + extensions, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn expect_success(src: &str, expected: &str) { + let posix_locale = PosixLocale::try_from_str(src).expect(src); + let converted_locale: Locale = posix_locale.try_into().expect(src); + + let expected_locale = Locale::try_from_str(expected).expect(src); + assert_eq!(converted_locale, expected_locale, "POSIX locale: `{src}`"); + } + + #[test] + fn default_locale() { + expect_success("C", "en-US-posix"); + expect_success("POSIX", "en-US-posix"); + } + + #[test] + fn region() { + expect_success("en_US", "en-US"); + expect_success("ne_NP", "ne-NP"); + expect_success("zh_TW", "zh-TW"); + } + + #[test] + fn codeset_ignored() { + expect_success("lv_LV.iso885913", "lv-LV"); + expect_success("hy_AM.armscii8", "hy-AM"); + } + + #[test] + fn modifier() { + // Currency + expect_success("it_IT@euro", "it-IT-u-cu-eur"); + + // Script + expect_success("uz_UZ@cyrillic", "uz-Cyrl-UZ"); + expect_success("sd_IN@devanagari", "sd-Deva-IN"); + expect_success("sr_RS@latin", "sr-Latn-RS"); + + // Language + expect_success("aa_ER@saaho", "ssy-ER"); + + // Variant + expect_success("ca_ES@valencia", "ca-ES-valencia"); + } + + mod error { + mod parse { + use crate::locale::{posix::PosixParseError, PosixLocale}; + + fn expect_error(src: &str, posix_error: PosixParseError) { + let result = PosixLocale::try_from_str(src); + + match result { + Ok(invalid_locale) => { + panic!("Expected the error `{posix_error:?}`, got the locale `{invalid_locale:?}` from input of `{src}`") + } + Err(error) => { + assert_eq!(error, posix_error, "Comparing expected output of `{src}`") + } + } + } + + #[test] + fn empty_locale() { + expect_error("", PosixParseError::EmptyLocale); + } + + #[test] + fn empty_section() { + // Single, empty optional section + expect_error("en_", PosixParseError::EmptySection { offset: 2 }); + expect_error("en.", PosixParseError::EmptySection { offset: 2 }); + expect_error("en@", PosixParseError::EmptySection { offset: 2 }); + + // Multiple optional sections, one empty + expect_error("en_.utf8@euro", PosixParseError::EmptySection { offset: 2 }); + expect_error("en_US.@euro", PosixParseError::EmptySection { offset: 5 }); + expect_error("en_US.utf8@", PosixParseError::EmptySection { offset: 10 }); + + // Single delimiter (excluding "." as that should return `PosixParseError::InvalidLocale` instead) + expect_error("_", PosixParseError::EmptySection { offset: 0 }); + expect_error("@", PosixParseError::EmptySection { offset: 0 }); + + // All delimiters + expect_error("_.@", PosixParseError::EmptySection { offset: 0 }); + } + + #[test] + fn invalid_character() { + const SAMPLE_LOCALES: [&str; 2] = [ + "en", // No optional fields + "en_US.utf8@euro", // All optional fields + ]; + + for locale in SAMPLE_LOCALES { + // Insert an invalid character ('/') at every position along the sample locale + for offset in 0..=locale.len() { + let (left, right) = locale.split_at(offset); + let invalid_locale = format!("{left}/{right}"); + expect_error( + &invalid_locale, + PosixParseError::InvalidCharacter { offset }, + ); + } + } + + // Test a single '/' character + expect_error("/", PosixParseError::InvalidCharacter { offset: 0 }); + } + + #[test] + fn invalid_locale() { + expect_error(".", PosixParseError::InvalidLocale); + expect_error("..", PosixParseError::InvalidLocale); + } + + #[test] + fn repeated_delimiter() { + // Repeated delimiter at the end of locale + expect_error( + "en_US.utf8@euro_US", + PosixParseError::RepeatedDelimiter { + first_offset: 2, + second_offset: 15, + }, + ); + expect_error( + "en_US.utf8@euro.utf8", + PosixParseError::RepeatedDelimiter { + first_offset: 5, + second_offset: 15, + }, + ); + expect_error( + "en_US.utf8@euro@euro", + PosixParseError::RepeatedDelimiter { + first_offset: 10, + second_offset: 15, + }, + ); + + // Multiple repeated delimiters + expect_error( + "en.utf8.utf8.utf8", + PosixParseError::RepeatedDelimiter { + first_offset: 2, + second_offset: 7, + }, + ); + + // Consecutive repeated delimiters + expect_error( + "en__US.utf8@euro", + PosixParseError::RepeatedDelimiter { + first_offset: 2, + second_offset: 3, + }, + ); + expect_error( + "en_US..utf8@euro", + PosixParseError::RepeatedDelimiter { + first_offset: 5, + second_offset: 6, + }, + ); + expect_error( + "en_US.utf8@@euro", + PosixParseError::RepeatedDelimiter { + first_offset: 10, + second_offset: 11, + }, + ); + } + + #[test] + fn unordered_delimiter() { + expect_error( + "en_US@euro.utf8", + PosixParseError::UnorderedDelimiter { + first_offset: 5, + second_offset: 10, + }, + ); + expect_error( + "en.utf8_US@euro", + PosixParseError::UnorderedDelimiter { + first_offset: 2, + second_offset: 7, + }, + ); + expect_error( + "en.utf8@euro_US", + PosixParseError::UnorderedDelimiter { + first_offset: 7, + second_offset: 12, + }, + ); + expect_error( + "en@euro_US.utf8", + PosixParseError::UnorderedDelimiter { + first_offset: 2, + second_offset: 7, + }, + ); + expect_error( + "en@euro.utf8_US", + PosixParseError::UnorderedDelimiter { + first_offset: 2, + second_offset: 7, + }, + ); + } + + #[test] + fn offset() { + // Empty section + let src = "en_.utf8@euro"; + match PosixLocale::try_from_str(src) { + Err(PosixParseError::EmptySection { offset }) => { + assert_eq!(&src[offset..offset + 1], "_"); + } + _ => unreachable!(), + } + + // Invalid character + let src = "en_U/S"; + match PosixLocale::try_from_str(src) { + Err(PosixParseError::InvalidCharacter { offset }) => { + assert_eq!(&src[offset..offset + 1], "/"); + } + _ => unreachable!(), + } + + // Repeated delimiter + let src = "en_US.utf8@euro_US"; + match PosixLocale::try_from_str(src) { + Err(PosixParseError::RepeatedDelimiter { + first_offset, + second_offset, + }) => { + assert_eq!(&src[first_offset..first_offset + 1], "_"); + assert_eq!(&src[second_offset..second_offset + 1], "_"); + } + _ => unreachable!(), + } + + // Unordered delimiter + let src = "en_US@euro.utf8"; + match PosixLocale::try_from_str(src) { + Err(PosixParseError::UnorderedDelimiter { + first_offset, + second_offset, + }) => { + assert_eq!(&src[first_offset..first_offset + 1], "@"); + assert_eq!(&src[second_offset..second_offset + 1], "."); + } + _ => unreachable!(), + } + } + } + + mod conversion { + use crate::locale::PosixLocale; + use icu_locale_core::Locale; + + fn expect_error(src: &str, icu_error: icu_locale_core::ParseError) { + let result: Result = + PosixLocale::try_from_str(src).expect(src).try_into(); + match result { + Ok(invalid_locale) => { + panic!("Expected the error `{icu_error:?}`, got the locale `{invalid_locale:?}` from input of `{src}`") + } + Err(error) => { + assert_eq!(error, icu_error, "Comparing expected output of `{src}`") + } + } + } + + #[test] + fn invalid_language() { + expect_error("invalid", icu_locale_core::ParseError::InvalidLanguage); + } + + #[test] + fn invalid_region() { + expect_error("en_invalid", icu_locale_core::ParseError::InvalidSubtag); + } + } + } +} diff --git a/utils/host_info/src/locale/windows.rs b/utils/host_info/src/locale/windows.rs new file mode 100644 index 00000000000..df7681ca6c3 --- /dev/null +++ b/utils/host_info/src/locale/windows.rs @@ -0,0 +1,161 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Parsing functionality for Windows LCIDs. +//! For more information, see [`WindowsLocale`]. +//! +//! # Usage example +//! ``` +//! use icu_locale_core::{Locale, locale}; +//! use icu_host_info::locale::{WindowsLocale, windows::WindowsLocaleParseError}; +//! +//! # fn main() -> Result<(), WindowsLocaleParseError> { +//! let windows_locale = WindowsLocale::try_from_str("zh-CN_radstr")?; +//! +//! assert_eq!(Locale::try_from(windows_locale), Ok(locale!("zh-CN-u-co-unihan"))); +//! # Ok(()) +//! # } +//! ``` + +use displaydoc::Display; +use icu_locale_core::extensions::unicode::{key, value, Keywords, Unicode, Value}; +use icu_locale_core::extensions::Extensions; +use icu_locale_core::{langid, LanguageIdentifier, Locale, ParseError}; + +#[derive(Display, Debug, PartialEq)] +/// An error while parsing a Windows locale identifier +pub enum WindowsLocaleParseError {} + +/// A parsed and validated Windows locale identifier. +pub struct WindowsLocale<'src> { + src: &'src str, +} + +impl<'src> WindowsLocale<'src> { + pub fn try_from_str(src: &'src str) -> Result { + Ok(Self { src }) + } +} + +impl<'src> TryFrom> for Locale { + type Error = ParseError; + + fn try_from(input: WindowsLocale<'src>) -> Result { + let (lcid, collation_value) = strip_windows_collation_suffix_lossy(input.src); + let keywords = match collation_value { + // Add the -u-co-VALUE extension to the locale + Some(collation_value) => Keywords::new_single(key!("co"), collation_value), + // No collation value found, use default keywords + None => Keywords::new(), + }; + + // Use a matching alias if found + let language = match find_windows_language_alias_lossy(lcid) { + Some(locale) => locale, + None => LanguageIdentifier::try_from_str(lcid)?, + }; + + Ok(Locale { + id: language, + extensions: Extensions::from_unicode(Unicode { + keywords, + ..Unicode::new() + }), + }) + } +} + +fn strip_windows_collation_suffix_lossy(lcid: &str) -> (&str, Option) { + // All known LCIDs containing an underscore are used for a collation suffix + if let Some((prefix, suffix)) = lcid.split_once('_') { + let collation_value = match suffix { + "phoneb" => value!("phonebk"), + "pronun" => value!("zhuyin"), + "radstr" => value!("unihan"), + "stroke" => value!("stroke"), + "tradnl" => value!("trad"), + // Strip the suffix on LCIDs with an underscore but no (known) matching CLDR data + _ => return (prefix, None), + }; + + // Return the LCID with the stripped prefix, and the matching CLDR collation key + (prefix, Some(collation_value)) + } else { + // No underscore found, return the LCID as-is + (lcid, None) + } +} + +/// Find a BCP-47 identifier from a list of known Windows aliases. +fn find_windows_language_alias_lossy(lcid: &str) -> Option { + match lcid { + "zh-yue-HK" => Some(langid!("yue-HK")), + // LCID with no (known) matching CLDR data: "math alphanumeric sorting" + // This would be `x-IV_mathan`, but the collation suffix may already be stripped by + // `strip_windows_collation_suffix_lossy`. For some reason, `LocaleEnumProcEx` also uses + // `x-IV-mathan`, so that is included here too. + // https://learn.microsoft.com/en-us/windows/win32/api/winnls/nc-winnls-locale_enumprocex + "x-IV" | "x-IV_mathan" | "x-IV-mathan" => Some(langid!("und")), + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn expect_success(src: &str, expected: &str) { + let windows_locale = WindowsLocale::try_from_str(src).expect(src); + let locale = Locale::try_from(windows_locale).expect(src); + + assert_eq!( + locale, + Locale::try_from_str(expected).unwrap(), + "Case: {src}" + ); + } + + #[test] + fn collation() { + /// All MS-LCID collation entries with a known matching CLDR collation value + const CASES: [(&str, &str); 12] = [ + ("de-DE_phoneb", "de-DE-u-co-phonebk"), + ("es-ES_tradnl", "es-ES-u-co-trad"), + ("ja-JP_radstr", "ja-JP-u-co-unihan"), + ("zh-CN_phoneb", "zh-CN-u-co-phonebk"), + ("zh-CN_stroke", "zh-CN-u-co-stroke"), + ("zh-HK_radstr", "zh-HK-u-co-unihan"), + ("zh-MO_radstr", "zh-MO-u-co-unihan"), + ("zh-MO_stroke", "zh-MO-u-co-stroke"), + ("zh-SG_phoneb", "zh-SG-u-co-phonebk"), + ("zh-SG_stroke", "zh-SG-u-co-stroke"), + ("zh-TW_pronun", "zh-TW-u-co-zhuyin"), + ("zh-TW_radstr", "zh-TW-u-co-unihan"), + ]; + + for (src, expected) in CASES { + expect_success(src, expected); + } + } + + #[test] + fn collation_strip_known_invalid() { + // All MS-LCID collation entries with NO known matching CLDR collation value + expect_success("hu-HU_tchncl", "hu-HU"); + expect_success("ka-GE_modern", "ka-GE"); + } + + #[test] + fn collation_strip_unknown() { + expect_success("en-US_unknown", "en-US"); + expect_success("en-US_unknown_multiple_underscores", "en-US"); + expect_success("en-US_unknown-with-hyphens", "en-US"); + } + + #[test] + fn alias() { + expect_success("zh-yue-HK", "yue-HK"); + expect_success("x-IV-mathan", "und"); + } +} diff --git a/utils/host_info/src/posix.rs b/utils/host_info/src/posix.rs new file mode 100644 index 00000000000..e5373a00c2f --- /dev/null +++ b/utils/host_info/src/posix.rs @@ -0,0 +1,235 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +#![allow(dead_code)] + +use libc::{setlocale, LC_ALL}; +use std::{collections::HashMap, ffi::CStr, ptr, str::FromStr}; + +use crate::error::HostInfoError; + +#[derive(Hash, Eq, PartialEq, Debug, Clone, Copy)] +pub enum LocaleCategory { + Character, // LC_CTYPE + Number, // LC_NUMERIC + Time, // LC_TIME + Collate, // LC_COLLATE + Monetary, // LC_MONETARY + Messages, // LC_MESSAGES + // GNU extensions (may not exist on non-gnu targets) + Paper, // LC_PAPER + Name, // LC_NAME + Address, // LC_ADDRESS + Telephone, // LC_TELEPHONE + Measurement, // LC_MEASUREMENT + Identification, // LC_IDENTIFICATION + All, // LC_ALL +} + +impl LocaleCategory { + #[inline] + fn to_env_var_name(self) -> &'static str { + match self { + LocaleCategory::Character => "LC_CTYPE", + LocaleCategory::Number => "LC_NUMERIC", + LocaleCategory::Time => "LC_TIME", + LocaleCategory::Collate => "LC_COLLATE", + LocaleCategory::Monetary => "LC_MONETARY", + LocaleCategory::Messages => "LC_MESSAGES", + LocaleCategory::Paper => "LC_PAPER", + LocaleCategory::Name => "LC_NAME", + LocaleCategory::Address => "LC_ADDRESS", + LocaleCategory::Telephone => "LC_TELEPHONE", + LocaleCategory::Measurement => "LC_MEASUREMENT", + LocaleCategory::Identification => "LC_IDENTIFICATION", + LocaleCategory::All => "LC_ALL", + } + } +} + +impl FromStr for LocaleCategory { + type Err = HostInfoError; + + fn from_str(s: &str) -> Result { + match s { + "LC_CTYPE" => Ok(Self::Character), + "LC_NUMERIC" => Ok(Self::Number), + "LC_TIME" => Ok(Self::Time), + "LC_COLLATE" => Ok(Self::Collate), + "LC_MONETARY" => Ok(Self::Monetary), + "LC_MESSAGES" => Ok(Self::Messages), + "LC_PAPER" => Ok(Self::Paper), + "LC_NAME" => Ok(Self::Name), + "LC_ADDRESS" => Ok(Self::Address), + "LC_TELEPHONE" => Ok(Self::Telephone), + "LC_MEASUREMENT" => Ok(Self::Measurement), + "LC_IDENTIFICATION" => Ok(Self::Identification), + "LC_ALL" => Ok(Self::All), + _ => Err(HostInfoError::UnknownCategory), + } + } +} + +// --- helpers --- + +#[inline] +fn is_c_like(raw: &str) -> bool { + let s = raw.trim(); + if s.is_empty() { + return true; + } + let up = s.to_ascii_uppercase(); + // Strip charset and modifier suffixes like ".UTF-8" or "@euro" + let base = up.split('.').next().unwrap_or(&up); + let base = base.split('@').next().unwrap_or(base); + base == "C" || base == "POSIX" +} + +#[inline] +fn non_c_like_env(name: &str) -> Option { + std::env::var_os(name).and_then(|v| { + let s = v.to_string_lossy(); + if s.is_empty() || is_c_like(&s) { + None + } else { + Some(s.into_owned()) + } + }) +} + +/// POSIX precedence: LC_ALL > LC_ > LANG. +/// Returns Some(non-C/POSIX) or None if unset/C-like. +fn resolve_env_for_category(cat: LocaleCategory) -> Option { + if let Some(v) = non_c_like_env("LC_ALL") { + return Some(v); + } + if cat != LocaleCategory::All { + if let Some(v) = non_c_like_env(cat.to_env_var_name()) { + return Some(v); + } + } + non_c_like_env("LANG") +} + +/// Attempt to parse `setlocale(LC_ALL, NULL)` into a map. +/// Returns None if NULL or C/POSIX-like (uninformative), to trigger env fallback. +/// Note: We only check LC_ALL because if libc is uninitialized, all categories return "C". +/// If initialized, LC_ALL contains the composite snapshot of all category values. +fn parse_setlocale_snapshot() -> Option> { + // SAFETY: read-only query of current thread's locale snapshot + let ptr = unsafe { setlocale(LC_ALL, ptr::null()) }; + if ptr.is_null() { + return None; + } + let s = unsafe { CStr::from_ptr(ptr) }.to_str().ok()?; + if s.is_empty() || is_c_like(s) { + return None; + } + + let mut map = HashMap::new(); + if !s.contains('=') { + // Single composite locale -> LC_ALL + if !is_c_like(s) { + map.insert(LocaleCategory::All, s.to_string()); + } + return if map.is_empty() { None } else { Some(map) }; + } + + for pair in s.split(';') { + let mut it = pair.splitn(2, '='); + let k = it.next().unwrap_or_default().trim(); + let v = it.next().unwrap_or_default().trim(); + if v.is_empty() || is_c_like(v) { + continue; + } + if let Ok(cat) = LocaleCategory::from_str(k) { + map.insert(cat, v.to_string()); + } + } + + if map.is_empty() { + None + } else { + Some(map) + } +} + +// --- public --- + +/// Retrieves locales for LC_ALL and any explicitly-set categories in this thread. +/// If libc is uninitialized (NULL/C/POSIX), falls back to env precedence. +/// If nothing resolves, returns `{ LC_ALL: "en-US-posix" }`. +pub(crate) fn raw_locale_categories() -> Result, HostInfoError> { + if let Some(map) = parse_setlocale_snapshot() { + return Ok(map); + } + + // Env fallback: collect only categories that resolve to non-C/POSIX. + const CATS: &[LocaleCategory] = &[ + LocaleCategory::Character, + LocaleCategory::Number, + LocaleCategory::Time, + LocaleCategory::Collate, + LocaleCategory::Monetary, + LocaleCategory::Messages, + LocaleCategory::Paper, + LocaleCategory::Name, + LocaleCategory::Address, + LocaleCategory::Telephone, + LocaleCategory::Measurement, + LocaleCategory::Identification, + LocaleCategory::All, + ]; + + let mut out = HashMap::new(); + + for &cat in CATS { + if let Some(v) = resolve_env_for_category(cat) { + out.insert(cat, v); + } + } + + if out.is_empty() { + out.insert(LocaleCategory::All, "en-US-posix".to_string()); + } + + Ok(out) +} + +#[cfg(test)] +mod tests { + use super::*; + use icu_locale_core::Locale; + + // Testing fetching of locale, as `get_locales` fetches the locales for category + // `LC_ALL`. For this category this should return non empty + #[test] + fn test_get_raw_locale_categories() { + let locale_res = raw_locale_categories().unwrap(); + assert!( + !locale_res.is_empty(), + "Empty hashmap for locales retrieved" + ); + for locale in locale_res.into_values() { + assert!(locale.is_ascii(), "Invalid form of locale retrieved") + } + } + + #[test] + fn test_converting_locales() { + let locale_res: std::collections::HashMap = + raw_locale_categories().unwrap(); + for locale in locale_res.into_values() { + let parts: Vec<&str> = locale.split('.').collect(); + + // Skipping "C" and those ending with "UTF-8", as they cannot be converted + // into the locale + if !parts.contains(&"C") && (parts.len() > 1 && parts[parts.len() - 1] != "UTF-8") { + let mut locale_converted: Locale = locale.parse().unwrap(); + locale_converted.extensions.unicode.clear(); + assert_eq!(locale_converted, locale.parse().unwrap()); + } + } + } +} diff --git a/utils/env_preferences/tests/datasets/mod.rs b/utils/host_info/tests/datasets/mod.rs similarity index 82% rename from utils/env_preferences/tests/datasets/mod.rs rename to utils/host_info/tests/datasets/mod.rs index 242575b2eea..9980a3b8fc1 100644 --- a/utils/env_preferences/tests/datasets/mod.rs +++ b/utils/host_info/tests/datasets/mod.rs @@ -16,22 +16,24 @@ const WINDOWS_DATASET: &str = include_str!("windows.txt"); #[test] fn posix() { - use env_preferences::parse::posix::PosixLocale; + use icu_host_info::locale::PosixLocale; + use icu_locale_core::Locale; for locale in POSIX_DATASET.lines() { let posix_locale = PosixLocale::try_from_str(locale).expect(locale); - posix_locale.try_convert_lossy().expect(locale); + Locale::try_from(posix_locale).expect(locale); } } #[test] fn windows() { - use env_preferences::parse::windows::WindowsLocale; + use icu_host_info::locale::WindowsLocale; + use icu_locale_core::Locale; for locale in WINDOWS_DATASET.lines() { let windows_locale = WindowsLocale::try_from_str(locale).expect(locale); - windows_locale.try_convert_lossy().expect(locale); + Locale::try_from(windows_locale).expect(locale); } } diff --git a/utils/env_preferences/tests/datasets/posix.txt b/utils/host_info/tests/datasets/posix.txt similarity index 100% rename from utils/env_preferences/tests/datasets/posix.txt rename to utils/host_info/tests/datasets/posix.txt diff --git a/utils/env_preferences/tests/datasets/windows.txt b/utils/host_info/tests/datasets/windows.txt similarity index 100% rename from utils/env_preferences/tests/datasets/windows.txt rename to utils/host_info/tests/datasets/windows.txt From 77d05e45f8d3b0206bd25ce748c0ecd83aca2b56 Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Wed, 22 Oct 2025 06:14:58 +0200 Subject: [PATCH 2/2] Rebase and apply feedback --- Cargo.lock | 401 +------ Cargo.toml | 3 +- utils/host_info/Cargo.lock | 1001 +++++++++++++++++ utils/host_info/Cargo.toml | 40 +- utils/host_info/README.md | 101 +- utils/host_info/examples/dt_format.rs | 3 +- utils/host_info/examples/dt_format_locale.rs | 5 +- utils/host_info/examples/get_data.rs | 30 +- utils/host_info/src/backends/android.rs | 6 +- utils/host_info/src/backends/linux.rs | 3 +- utils/host_info/src/backends/mod.rs | 6 +- utils/host_info/src/backends/shared/mod.rs | 5 + .../src/{ => backends/shared}/posix.rs | 2 +- utils/host_info/src/error.rs | 2 +- utils/host_info/src/host_info.rs | 298 ++--- utils/host_info/src/lib.rs | 103 +- 16 files changed, 1306 insertions(+), 703 deletions(-) create mode 100644 utils/host_info/Cargo.lock create mode 100644 utils/host_info/src/backends/shared/mod.rs rename utils/host_info/src/{ => backends/shared}/posix.rs (99%) diff --git a/Cargo.lock b/Cargo.lock index 8263648e4a5..a743c820a40 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -384,16 +384,6 @@ dependencies = [ "shlex", ] -[[package]] -name = "cfg-expr" -version = "0.20.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a2c5f3bf25ec225351aa1c8e230d04d880d3bd89dea133537dafad4ae291e5c" -dependencies = [ - "smallvec", - "target-lexicon", -] - [[package]] name = "cfg-if" version = "1.0.4" @@ -410,7 +400,7 @@ dependencies = [ "js-sys", "num-traits", "wasm-bindgen", - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -532,16 +522,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "core-foundation" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" -dependencies = [ - "core-foundation-sys", - "libc", -] - [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -974,69 +954,6 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" -[[package]] -name = "futures-channel" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" -dependencies = [ - "futures-core", -] - -[[package]] -name = "futures-core" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" - -[[package]] -name = "futures-executor" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" -dependencies = [ - "futures-core", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-io" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" - -[[package]] -name = "futures-macro" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.106", -] - -[[package]] -name = "futures-task" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" - -[[package]] -name = "futures-util" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" -dependencies = [ - "futures-core", - "futures-macro", - "futures-task", - "pin-project-lite", - "pin-utils", - "slab", -] - [[package]] name = "getopts" version = "0.2.24" @@ -1077,91 +994,6 @@ version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" -[[package]] -name = "gio" -version = "0.21.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed68efc12b748a771be2dccc49480d8584004382967c98323245fc3c38b74a42" -dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-util", - "gio-sys", - "glib", - "libc", - "pin-project-lite", - "smallvec", -] - -[[package]] -name = "gio-sys" -version = "0.21.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "171ed2f6dd927abbe108cfd9eebff2052c335013f5879d55bab0dc1dee19b706" -dependencies = [ - "glib-sys", - "gobject-sys", - "libc", - "system-deps", - "windows-sys 0.59.0", -] - -[[package]] -name = "glib" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1f2cbc4577536c849335878552f42086bfd25a8dcd6f54a18655cf818b20c8f" -dependencies = [ - "bitflags", - "futures-channel", - "futures-core", - "futures-executor", - "futures-task", - "futures-util", - "gio-sys", - "glib-macros", - "glib-sys", - "gobject-sys", - "libc", - "memchr", - "smallvec", -] - -[[package]] -name = "glib-macros" -version = "0.21.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55eda916eecdae426d78d274a17b48137acdca6fba89621bd3705f2835bc719f" -dependencies = [ - "heck 0.5.0", - "proc-macro-crate", - "proc-macro2", - "quote", - "syn 2.0.106", -] - -[[package]] -name = "glib-sys" -version = "0.21.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d09d3d0fddf7239521674e57b0465dfbd844632fec54f059f7f56112e3f927e1" -dependencies = [ - "libc", - "system-deps", -] - -[[package]] -name = "gobject-sys" -version = "0.21.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "538e41d8776173ec107e7b0f2aceced60abc368d7e1d81c1f0e2ecd35f59080d" -dependencies = [ - "glib-sys", - "libc", - "system-deps", -] - [[package]] name = "half" version = "2.4.1" @@ -1257,7 +1089,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core 0.58.0", + "windows-core", ] [[package]] @@ -1619,23 +1451,6 @@ dependencies = [ "icu_provider", ] -[[package]] -name = "icu_host_info" -version = "0.1.0" -dependencies = [ - "core-foundation", - "core-foundation-sys", - "displaydoc", - "gio", - "icu", - "icu_calendar", - "icu_datetime", - "icu_locale_core", - "icu_time", - "libc", - "windows", -] - [[package]] name = "icu_list" version = "2.0.0" @@ -2433,7 +2248,7 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -2478,24 +2293,6 @@ dependencies = [ "siphasher", ] -[[package]] -name = "pin-project-lite" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - -[[package]] -name = "pkg-config" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" - [[package]] name = "plotters" version = "0.3.7" @@ -2578,15 +2375,6 @@ dependencies = [ "zerocopy", ] -[[package]] -name = "proc-macro-crate" -version = "3.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" -dependencies = [ - "toml_edit 0.23.5", -] - [[package]] name = "proc-macro2" version = "1.0.103" @@ -3120,12 +2908,6 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" -[[package]] -name = "slab" -version = "0.4.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" - [[package]] name = "smallvec" version = "1.15.1" @@ -3190,7 +2972,6 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "rustversion", "syn 2.0.108", ] @@ -3243,19 +3024,6 @@ dependencies = [ "syn 2.0.108", ] -[[package]] -name = "system-deps" -version = "7.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "550b2c61a9c30b85ca1f6ef0afcd2befcb12e73b1d31ef0526423bc7b6a99d7f" -dependencies = [ - "cfg-expr", - "heck 0.5.0", - "pkg-config", - "toml", - "version-compare", -] - [[package]] name = "tap" version = "1.0.1" @@ -3272,12 +3040,6 @@ dependencies = [ "libc", ] -[[package]] -name = "target-lexicon" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a" - [[package]] name = "thiserror" version = "2.0.17" @@ -3385,8 +3147,8 @@ checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" dependencies = [ "serde", "serde_spanned", - "toml_datetime 0.6.9", - "toml_edit 0.22.26", + "toml_datetime", + "toml_edit", ] [[package]] @@ -3398,15 +3160,6 @@ dependencies = [ "serde", ] -[[package]] -name = "toml_datetime" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a197c0ec7d131bfc6f7e82c8442ba1595aeab35da7adbf05b6b73cd06a16b6be" -dependencies = [ - "serde_core", -] - [[package]] name = "toml_edit" version = "0.22.27" @@ -3416,32 +3169,11 @@ dependencies = [ "indexmap", "serde", "serde_spanned", - "toml_datetime 0.6.9", + "toml_datetime", "toml_write", "winnow", ] -[[package]] -name = "toml_edit" -version = "0.23.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2ad0b7ae9cfeef5605163839cb9221f453399f15cfb5c10be9885fcf56611f9" -dependencies = [ - "indexmap", - "toml_datetime 0.7.1", - "toml_parser", - "winnow", -] - -[[package]] -name = "toml_parser" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b551886f449aa90d4fe2bdaa9f4a2577ad2dde302c61ecf262d80b116db95c10" -dependencies = [ - "winnow", -] - [[package]] name = "toml_write" version = "0.1.2" @@ -3586,12 +3318,6 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77439c1b53d2303b20d9459b1ade71a83c716e3f9c34f3228c00e6f185d6c002" -[[package]] -name = "version-compare" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "852e951cb7832cb45cb1169900d19760cfa39b82bc0ea9c0e5a14ae88411c98b" - [[package]] name = "version_check" version = "0.9.5" @@ -3810,64 +3536,19 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "windows" -version = "0.60.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddf874e74c7a99773e62b1c671427abf01a425e77c3d3fb9fb1e4883ea934529" -dependencies = [ - "windows-collections", - "windows-core 0.60.1", - "windows-future", - "windows-link 0.1.3", - "windows-numerics", -] - -[[package]] -name = "windows-collections" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5467f79cc1ba3f52ebb2ed41dbb459b8e7db636cc3429458d9a852e15bc24dec" -dependencies = [ - "windows-core 0.60.1", -] - [[package]] name = "windows-core" version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99" dependencies = [ - "windows-implement 0.58.0", - "windows-interface 0.58.0", - "windows-result 0.2.0", - "windows-strings 0.1.0", + "windows-implement", + "windows-interface", + "windows-result", + "windows-strings", "windows-targets 0.52.6", ] -[[package]] -name = "windows-core" -version = "0.60.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca21a92a9cae9bf4ccae5cf8368dce0837100ddf6e6d57936749e85f152f6247" -dependencies = [ - "windows-implement 0.59.0", - "windows-interface 0.59.3", - "windows-link 0.1.3", - "windows-result 0.3.4", - "windows-strings 0.3.1", -] - -[[package]] -name = "windows-future" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a787db4595e7eb80239b74ce8babfb1363d8e343ab072f2ffe901400c03349f0" -dependencies = [ - "windows-core 0.60.1", - "windows-link 0.1.3", -] - [[package]] name = "windows-implement" version = "0.58.0" @@ -3879,17 +3560,6 @@ dependencies = [ "syn 2.0.108", ] -[[package]] -name = "windows-implement" -version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83577b051e2f49a058c308f17f273b570a6a758386fc291b5f6a934dd84e48c1" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.108", -] - [[package]] name = "windows-interface" version = "0.58.0" @@ -3901,39 +3571,12 @@ dependencies = [ "syn 2.0.108", ] -[[package]] -name = "windows-interface" -version = "0.59.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.108", -] - -[[package]] -name = "windows-link" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" - [[package]] name = "windows-link" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" -[[package]] -name = "windows-numerics" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "005dea54e2f6499f2cee279b8f703b3cf3b5734a2d8d21867c8f44003182eeed" -dependencies = [ - "windows-core 0.60.1", - "windows-link 0.1.3", -] - [[package]] name = "windows-result" version = "0.2.0" @@ -3943,34 +3586,16 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "windows-result" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" -dependencies = [ - "windows-link 0.1.3", -] - [[package]] name = "windows-strings" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" dependencies = [ - "windows-result 0.2.0", + "windows-result", "windows-targets 0.52.6", ] -[[package]] -name = "windows-strings" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319" -dependencies = [ - "windows-link 0.1.3", -] - [[package]] name = "windows-sys" version = "0.52.0" @@ -4004,7 +3629,7 @@ version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -4029,7 +3654,7 @@ version = "0.53.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" dependencies = [ - "windows-link 0.2.1", + "windows-link", "windows_aarch64_gnullvm 0.53.1", "windows_aarch64_msvc 0.53.1", "windows_i686_gnu 0.53.1", diff --git a/Cargo.toml b/Cargo.toml index d218658e4ee..ae2185d2b66 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -70,7 +70,6 @@ members = [ "utils/ixdtf", "utils/litemap", "utils/resb", - "utils/host_info", "utils/tinystr", "utils/tzif", "utils/potential_utf", @@ -100,6 +99,8 @@ members = [ # Note: Workspaces in subdirectories, such as components/collator/fuzz, are # implicitly excluded from the main workspace. exclude = [ + # host_info requires a custom ci job to avoid having to pull its optional dependencies + "utils/host_info", # Examples are tested outside the workspace to simulate external users "examples", # Don't publish the graveyard diff --git a/utils/host_info/Cargo.lock b/utils/host_info/Cargo.lock new file mode 100644 index 00000000000..974b0d41da8 --- /dev/null +++ b/utils/host_info/Cargo.lock @@ -0,0 +1,1001 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "bitflags" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" + +[[package]] +name = "calendrical_calculations" +version = "0.2.2" +dependencies = [ + "core_maths", + "displaydoc", +] + +[[package]] +name = "cfg-expr" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a2b34126159980f92da2a08bdec0694fd80fb5eb9e48aff25d20a0d8dfa710d" +dependencies = [ + "smallvec", + "target-lexicon", +] + +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "core_maths" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77745e017f5edba1a9c1d854f6f3a52dac8a12dd5af5d2f54aecf61e43d80d30" +dependencies = [ + "libm", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "fixed_decimal" +version = "0.7.0" +dependencies = [ + "displaydoc", + "smallvec", + "writeable", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-core", + "futures-macro", + "futures-task", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "gio" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed68efc12b748a771be2dccc49480d8584004382967c98323245fc3c38b74a42" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "gio-sys", + "glib", + "libc", + "pin-project-lite", + "smallvec", +] + +[[package]] +name = "gio-sys" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "171ed2f6dd927abbe108cfd9eebff2052c335013f5879d55bab0dc1dee19b706" +dependencies = [ + "glib-sys", + "gobject-sys", + "libc", + "system-deps", + "windows-sys", +] + +[[package]] +name = "glib" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1f2cbc4577536c849335878552f42086bfd25a8dcd6f54a18655cf818b20c8f" +dependencies = [ + "bitflags", + "futures-channel", + "futures-core", + "futures-executor", + "futures-task", + "futures-util", + "gio-sys", + "glib-macros", + "glib-sys", + "gobject-sys", + "libc", + "memchr", + "smallvec", +] + +[[package]] +name = "glib-macros" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55eda916eecdae426d78d274a17b48137acdca6fba89621bd3705f2835bc719f" +dependencies = [ + "heck", + "proc-macro-crate", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "glib-sys" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d09d3d0fddf7239521674e57b0465dfbd844632fec54f059f7f56112e3f927e1" +dependencies = [ + "libc", + "system-deps", +] + +[[package]] +name = "gobject-sys" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "538e41d8776173ec107e7b0f2aceced60abc368d7e1d81c1f0e2ecd35f59080d" +dependencies = [ + "glib-sys", + "libc", + "system-deps", +] + +[[package]] +name = "hashbrown" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "icu" +version = "2.0.0" +dependencies = [ + "icu_calendar", + "icu_casemap", + "icu_collator", + "icu_collections", + "icu_datetime", + "icu_decimal", + "icu_list", + "icu_locale", + "icu_normalizer", + "icu_plurals", + "icu_properties", + "icu_provider", + "icu_segmenter", + "icu_time", +] + +[[package]] +name = "icu_calendar" +version = "2.0.4" +dependencies = [ + "calendrical_calculations", + "displaydoc", + "icu_calendar_data", + "icu_locale", + "icu_locale_core", + "icu_provider", + "ixdtf", + "serde", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_calendar_data" +version = "2.0.0" + +[[package]] +name = "icu_casemap" +version = "2.0.0" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties", + "icu_provider", + "potential_utf", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_collator" +version = "2.1.0-dev" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_normalizer", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "zerovec", +] + +[[package]] +name = "icu_collections" +version = "2.0.0" +dependencies = [ + "displaydoc", + "potential_utf", + "serde", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_datetime" +version = "2.0.0" +dependencies = [ + "displaydoc", + "fixed_decimal", + "icu_calendar", + "icu_datetime_data", + "icu_decimal", + "icu_locale", + "icu_locale_core", + "icu_pattern", + "icu_plurals", + "icu_provider", + "icu_time", + "potential_utf", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_datetime_data" +version = "2.0.2" + +[[package]] +name = "icu_decimal" +version = "2.0.0" +dependencies = [ + "fixed_decimal", + "icu_decimal_data", + "icu_locale", + "icu_locale_core", + "icu_provider", + "serde", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_decimal_data" +version = "2.0.0" + +[[package]] +name = "icu_host_info" +version = "0.1.0" +dependencies = [ + "core-foundation", + "core-foundation-sys", + "displaydoc", + "gio", + "icu", + "icu_calendar", + "icu_datetime", + "icu_locale_core", + "icu_time", + "libc", + "windows", +] + +[[package]] +name = "icu_list" +version = "2.0.0" +dependencies = [ + "icu_provider", + "regex-automata", + "serde", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locale" +version = "2.0.0" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_locale_data", + "icu_provider", + "potential_utf", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.0.0" +dependencies = [ + "displaydoc", + "litemap", + "serde", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locale_data" +version = "2.0.0" + +[[package]] +name = "icu_normalizer" +version = "2.0.0" +dependencies = [ + "icu_collections", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_pattern" +version = "0.4.0" +dependencies = [ + "displaydoc", + "either", + "serde", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_plurals" +version = "2.0.0" +dependencies = [ + "fixed_decimal", + "icu_locale", + "icu_plurals_data", + "icu_provider", + "zerovec", +] + +[[package]] +name = "icu_plurals_data" +version = "2.0.0" + +[[package]] +name = "icu_properties" +version = "2.0.1" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_provider", + "serde", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_provider" +version = "2.0.0" +dependencies = [ + "displaydoc", + "icu_locale_core", + "serde", + "stable_deref_trait", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_segmenter" +version = "2.0.0" +dependencies = [ + "icu_collections", + "icu_provider", + "potential_utf", + "utf8_iter", + "zerovec", +] + +[[package]] +name = "icu_time" +version = "2.0.0" +dependencies = [ + "calendrical_calculations", + "displaydoc", + "icu_calendar", + "icu_locale_core", + "icu_provider", + "icu_time_data", + "ixdtf", + "serde", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_time_data" +version = "2.0.2" + +[[package]] +name = "indexmap" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "ixdtf" +version = "0.6.3" + +[[package]] +name = "libc" +version = "0.2.177" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" + +[[package]] +name = "libm" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" + +[[package]] +name = "litemap" +version = "0.8.0" + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "potential_utf" +version = "0.1.3" +dependencies = [ + "serde_core", + "writeable", + "zerovec", +] + +[[package]] +name = "proc-macro-crate" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" +dependencies = [ + "toml_edit", +] + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_spanned" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e24345aa0fe688594e73770a5f6d1b216508b4f93484c0026d521acd30134392" +dependencies = [ + "serde_core", +] + +[[package]] +name = "slab" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "syn" +version = "2.0.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "system-deps" +version = "7.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c236d79f20808ca0084bfcd1a2fd6c686216b7f7a0c4fc39deb0cbf5eaab3713" +dependencies = [ + "cfg-expr", + "heck", + "pkg-config", + "toml", + "version-compare", +] + +[[package]] +name = "target-lexicon" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a" + +[[package]] +name = "tinystr" +version = "0.8.1" +dependencies = [ + "displaydoc", + "serde_core", + "zerovec", +] + +[[package]] +name = "toml" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0dc8b1fb61449e27716ec0e1bdf0f6b8f3e8f6b05391e8497b8b6d7804ea6d8" +dependencies = [ + "indexmap", + "serde_core", + "serde_spanned", + "toml_datetime", + "toml_parser", + "toml_writer", + "winnow", +] + +[[package]] +name = "toml_datetime" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.23.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" +dependencies = [ + "indexmap", + "toml_datetime", + "toml_parser", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e" +dependencies = [ + "winnow", +] + +[[package]] +name = "toml_writer" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df8b2b54733674ad286d16267dcfc7a71ed5c776e4ac7aa3c3e2561f7c637bf2" + +[[package]] +name = "unicode-ident" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06" + +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "version-compare" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852e951cb7832cb45cb1169900d19760cfa39b82bc0ea9c0e5a14ae88411c98b" + +[[package]] +name = "windows" +version = "0.60.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddf874e74c7a99773e62b1c671427abf01a425e77c3d3fb9fb1e4883ea934529" +dependencies = [ + "windows-collections", + "windows-core", + "windows-future", + "windows-link 0.1.3", + "windows-numerics", +] + +[[package]] +name = "windows-collections" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5467f79cc1ba3f52ebb2ed41dbb459b8e7db636cc3429458d9a852e15bc24dec" +dependencies = [ + "windows-core", +] + +[[package]] +name = "windows-core" +version = "0.60.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca21a92a9cae9bf4ccae5cf8368dce0837100ddf6e6d57936749e85f152f6247" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link 0.1.3", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-future" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a787db4595e7eb80239b74ce8babfb1363d8e343ab072f2ffe901400c03349f0" +dependencies = [ + "windows-core", + "windows-link 0.1.3", +] + +[[package]] +name = "windows-implement" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83577b051e2f49a058c308f17f273b570a6a758386fc291b5f6a934dd84e48c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-numerics" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "005dea54e2f6499f2cee279b8f703b3cf3b5734a2d8d21867c8f44003182eeed" +dependencies = [ + "windows-core", + "windows-link 0.1.3", +] + +[[package]] +name = "windows-result" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +dependencies = [ + "windows-link 0.1.3", +] + +[[package]] +name = "windows-strings" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319" +dependencies = [ + "windows-link 0.1.3", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link 0.2.1", +] + +[[package]] +name = "winnow" +version = "0.7.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" +dependencies = [ + "memchr", +] + +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.6.1" +dependencies = [ + "either", +] + +[[package]] +name = "yoke" +version = "0.8.0" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.0" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerofrom" +version = "0.1.6" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerotrie" +version = "0.2.2" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.4" +dependencies = [ + "serde", + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/utils/host_info/Cargo.toml b/utils/host_info/Cargo.toml index 0ba88c85a89..0c19e564956 100644 --- a/utils/host_info/Cargo.toml +++ b/utils/host_info/Cargo.toml @@ -7,19 +7,29 @@ name = "icu_host_info" version = "0.1.0" publish = false -rust-version = "1.82" -authors.workspace = true -edition.workspace = true -repository.workspace = true -homepage.workspace = true -license.workspace = true -categories.workspace = true -include.workspace = true +rust-version = "1.83" +authors = ["The ICU4X Project Developers"] +edition = "2021" +repository = "https://github.com/unicode-org/icu4x" +homepage = "https://icu4x.unicode.org" +license = "Unicode-3.0" +categories = ["internationalization"] +include = [ + "data/**/*", + "src/**/*", + "examples/**/*", + "benches/**/*", + "tests/**/*", + "Cargo.toml", + "LICENSE", + "README.md", + "build.rs", +] [dependencies] -displaydoc = { workspace = true } -icu_locale_core = { workspace = true, features = ["alloc"] } -icu_datetime = { workspace = true, optional = true } +displaydoc = { version = "0.2.3", default-features = false } +icu_locale_core = { version = "~2.0.0", path = "../../components/locale_core", features = ["alloc"] } +icu_datetime = { version = "~2.0.0", path = "../../components/datetime", optional = true } libc = "0.2.175" [target.'cfg(target_os = "linux")'.dependencies] @@ -30,7 +40,7 @@ core-foundation-sys = { version = "0.8.6" } core-foundation = { version = "0.10.1" } [target.'cfg(target_os = "windows")'.dependencies.windows] -version = "0.60.0" +version = "0.60" features = [ "System", "Foundation", @@ -44,9 +54,9 @@ features = [ [dev-dependencies] icu = { path = "../../components/icu", default-features = false } -icu_datetime = { workspace = true, features = ["compiled_data"] } -icu_calendar = { workspace = true } -icu_time = { workspace = true, features = ["compiled_data"] } +icu_datetime = { version = "~2.0.0", path = "../../components/datetime", features = ["compiled_data"] } +icu_calendar = { version = "~2.0.0", path = "../../components/calendar" } +icu_time = { version = "~2.0.0", path = "../../components/time", features = ["compiled_data"] } [features] default = [] diff --git a/utils/host_info/README.md b/utils/host_info/README.md index 02921aba250..122487f7f66 100644 --- a/utils/host_info/README.md +++ b/utils/host_info/README.md @@ -7,13 +7,13 @@ `host_info` is a library providing functionality to retrieve regional preferences from host environments - primarily the operating system the program is running in. -The library is designed to bind the different host environment preferences architectures -to ICU4X model. +The library is designed to bind the different host environment preference architectures +to the [`icu`] model. ## Example ```rust -use icu_host_info::HostInfo; +use icu_host_info::icu_host_info; use icu::calendar::Date; use icu::datetime::{fieldsets, DateTimeFormatter}; @@ -21,7 +21,7 @@ let date = Date::try_new_gregorian(2025, 10, 10) .expect("Failed to create date"); // requires feature `datetime` -let prefs = HostInfo::datetime_preferences() +let prefs = icu_host_info::datetime_preferences() .expect("Failed to retrieve host info"); let dtf = DateTimeFormatter::try_new(prefs, fieldsets::YMD::long()) @@ -35,49 +35,49 @@ assert_eq!(formatted_dt.to_string(), "October 10, 2025"); ## Feature Matrix The library intends to provide means to retrieve regional preferences -to ICU4X preferences with a focus on Unicode Extensions, but allow for +to [`icu`] preferences with a focus on Unicode Extensions, but allow for propagation of preferences offered by the host environments which may not have a representation in Unicode Extensions (for example: date format pattern). Legend: - ✅ = OS + `host_info` support -- ⚠️ = OS supports, `host_info` doesn't -- ❌ = OS doesn't supported +- 🚧 = OS supports, `host_info` doesn't +- ❌ = OS doesn't support | Feature | Android | iOS | Linux (1) | macOS | Windows | |---------------------| :-----: | :-: | :------------------: | :---: | :-----: | | Requested Locales | ✅ | ✅ | ✅ | ✅ | ✅ | -| Calendar | ⚠️ | ⚠️ | ⚠️ | ✅ | ✅ | -| Region | ⚠️ | ⚠️ | ⚠️ | ✅ | ✅ | -| Hour cycle | ⚠️ | ⚠️ | ✅ | ✅ | ⚠️ | -| Measurement System | ⚠️ | ⚠️ | ⚠️ | ✅ | ⚠️ | -| Measurement Override| ⚠️ | ⚠️ | ⚠️ | ✅ | ⚠️ | -| First Day of week | ⚠️ | ⚠️ | ⚠️ | ✅ | ✅ | -| Collation | ⚠️ | ⚠️ | ⚠️ | ✅ | ❌ | -| Date format | ⚠️ | ⚠️ | ⚠️ | ⚠️ | ⚠️ | -| Number format | ⚠️ | ⚠️ | ⚠️ | ⚠️ | ⚠️ | +| Calendar | 🚧 | 🚧 | 🚧 | ✅ | ✅ | +| Region | 🚧 | 🚧 | 🚧 | ✅ | ✅ | +| Hour cycle | 🚧 | 🚧 | ✅ | ✅ | 🚧 | +| Measurement System | 🚧 | 🚧 | 🚧 | ✅ | 🚧 | +| Measurement Override| 🚧 | 🚧 | 🚧 | ✅ | 🚧 | +| First Day of week | 🚧 | 🚧 | 🚧 | ✅ | ✅ | +| Collation | 🚧 | 🚧 | 🚧 | ✅ | ❌ | +| Date format | 🚧 | 🚧 | 🚧 | 🚧 | 🚧 | +| Number format | 🚧 | 🚧 | 🚧 | 🚧 | 🚧 | -(1) In case of Linux different DE's such as Gnoem and KDE are supported together. +(1) In the case of Linux, different desktop environments such as Gnome and KDE are supported together. -## Integrating preferences into ICU4X formatters +## Integrating preferences into ICU formatters The library provides three ways of injecting retrieved values into formatters: ### 1. Preference Bag -For most common components, such as `DateTimeFormatter`, the library exposes -a direct getter that retrieves a `Preferences` struct for that component. +For most common components, such as [`DateTimeFormatter`], the library exposes +a direct getter that retrieves a [`Preferences`] struct for that component. This getter is located behind a flag to allow for control over which dependencies are being pulled. #### Example ```rust -use icu_host_info::HostInfo; +use icu_host_info::icu_host_info; use icu::datetime::{fieldsets, DateTimeFormatter}; // requires feature `datetime` -let prefs = HostInfo::datetime_preferences() +let prefs = icu_host_info::datetime_preferences() .expect("Failed to retrieve host info"); let dtf = DateTimeFormatter::try_new(prefs, fieldsets::YMD::long()) @@ -86,7 +86,7 @@ let dtf = DateTimeFormatter::try_new(prefs, fieldsets::YMD::long()) ### 2. Locale -For all components that `HostInfo` does not have special preference getter for, +For all components that `icu_host_info` does not have special preference getter for, and for cases where the user prefers to avoid pulling extra dependencies at the cost of narrowing down the retrieved values to just ones encoded in Unicode Extensions, the library provides an ergonomic getter: @@ -94,19 +94,18 @@ the library provides an ergonomic getter: #### Example ```rust -use icu_host_info::HostInfo; use icu::{ datetime::{fieldsets, DateTimeFormatter}, locale::Locale, }; -let mut locale = HostInfo::requested_locales() +let mut locale = icu_host_info::requested_locales() .expect("Failed to retrieve locales") .first() .cloned() .unwrap_or(Locale::UNKNOWN); -locale.extensions.unicode = HostInfo::unicode_extensions() +locale.extensions.unicode = icu_host_info::unicode_extensions() .expect("Failed to retrieve host info"); let dtf = DateTimeFormatter::try_new(locale.into(), fieldsets::YMD::long()) @@ -127,28 +126,27 @@ allowing the user to retrieve just that preference and use it as they see fit. #### Example ```rust -use icu_host_info::HostInfo; use icu::locale::preferences::extensions::unicode::keywords::HourCycle; -let mut calendar: Option = HostInfo::hour_cycle() +let mut calendar: Option = icu_host_info::hour_cycle() .expect("Failed to retrieve hour_cycle preference"); ``` ## Locale Negotiation -Locale Negotiation is an upcoming feature in ICU4X which will enable the system integrating ICU4X to +Locale Negotiation is an upcoming feature in ICU4X which will enable the system integrating ICU to perform a negotiation between requested locales, and locales for which the data is available in the system. -The output of `HostInfo` will be utilized in that negotiation allowing the deployment to 1) select +The output of `icu_host_info` will be utilized in that negotiation allowing the deployment to 1) select the most appropriate locales for the given user and target modality, 2) apply regional preferences onto that locale. -The need to allow `HostInfo` to be pluggable info locale negotiation and multi source merging (see next section) +The need to allow `icu_host_info` to be pluggable into locale negotiation and multi source merging (see next section) guided many design choices in this library. This section will be extended once locale negotiation is implemented. ## Multi Source Merging -In simple systems the user will most often use ICU4X to format -some information in a selected locale, and use this library to augument +In simple systems the user will most often use ICU to format +some information in a selected locale, and use this library to augment the formatting with regional preferences set by the user in the host environment. In more complex systems, the user may also want to introduce a second source of regional preferences @@ -157,22 +155,22 @@ and mix the values set in the host environment with those set in the program its For example, a web browser may offer some regional preferences set in the browser itself, or even set separate for some contexts of the browser. -In those cases, the depoyment requires merging of the preferences. -ICU4X exposes an `extend` method on both `Preferences` and `Unicode` extensions struct. +In those cases, the deployment requires merging of the preferences. +ICU exposes an `extend` method on both [`Preferences`] and [`Unicode`] extensions struct. -This allows the system to retrieve [`HostInfo`] Preferences or `Unicode`, and applications' -equivalent, and merge of them. +This allows the system to retrieve Preferences or [`Unicode`], and the application's +equivalent, and merge them. ### `Preferences` Example ```rust -use icu_host_info::HostInfo; +use icu_host_info::icu_host_info; use icu::datetime::{fieldsets, DateTimeFormatter}; let app_prefs = app.datetime_preferences(); // requires feature `datetime` -let mut combined_prefs = HostInfo::datetime_preferences() +let mut combined_prefs = icu_host_info::datetime_preferences() .expect("Failed to retrieve host info"); combined_prefs.extend(app_prefs); @@ -184,7 +182,7 @@ let dtf = DateTimeFormatter::try_new(combined_prefs, fieldsets::YMD::long()) ### `Unicode` Extensions Example ```rust -use icu_host_info::HostInfo; +use icu_host_info::icu_host_info; use icu::{ datetime::{fieldsets, DateTimeFormatter}, locale::locale, @@ -194,7 +192,7 @@ let mut locale = locale!("fr-CA"); let app_ue = app.unicode_extensions(); -let mut combined_ue = HostInfo::unicode_extensions() +let mut combined_ue = icu_host_info::unicode_extensions() .expect("Failed to retrieve host info"); combined_ue.extend(app_ue); @@ -208,7 +206,7 @@ let dtf = DateTimeFormatter::try_new(locale.into(), fieldsets::YMD::long()) ## Design Decisions The library operates on a boundary of diverse set of host -environments and uniformal ICU4X design derived from Unicode LDML. +environments and uniform ICU design derived from Unicode LDML. It requires a number of design tradeoffs that had to be made in order to achieve the uniformity and scale over time as the host platforms design evolves. @@ -224,8 +222,8 @@ setting up such environment to ensure propagation of customer preferences. ### Lossy Results The library makes best-effort to retrieve the values -that can be directly used in ICU4X. As the operating systems, -runtimes and ICU4X evolve, there's always a risk of a mismatch. +that can be directly used in ICU. As the operating systems, +runtimes and ICU evolve, there's always a risk of a mismatch. This library makes a design decision to be lossy-by-default. Any value that cannot be directly mapped onto a valid value is ignored @@ -240,15 +238,15 @@ cases like memory corruption or OS API errors propagation. ### Normalized vs Raw values -The main API of this library - [`HostInfo`] - provides methods that return normalized +The library provides methods that return normalized values, often directly taken from `icu::locale_core::preferences`. Per-host backends provide additional trait implementation that returns raw values, allowing the user to handle or introspect those values manually. -When using `HostInfo`, the library performs best-effort to normalize and parse -those raw values into canonical Unicode ICU4X representation, often discarding +When using `icu_host_info`, the library performs best-effort to normalize and parse +those raw values into canonical Unicode ICU representation, often discarding unknown values and values that fail to parse. -Those raw backends are not exposed in the documentation as the documentation. +Those raw backends are not exposed in the main documentation. #### Example @@ -278,9 +276,14 @@ releases as better bindings become available. A note for host API designers - it is useful for foundational libraries such as this to expose APIs that enable us to distinguish between regional preferences values derived by the host from defaults of a locale, from cases when the value is explicitly set by the user. -This dinstinction allows ICU4X to better serve in locale negotiations scenario where other-than-first locale may be used +This distinction allows ICU to better serve in locale negotiations scenario where other-than-first locale may be used and the deployment should respect whether the user set a given preference explicitly or left it to the per-locale default. +[`icu`]: https://docs.rs/icu/latest/icu/ +[`Unicode`]: https://docs.rs/icu_locale_core/latest/icu_locale_core/extensions/unicode/struct.Unicode.html +[`Preferences`]: https://docs.rs/icu_locale_core/latest/icu_locale_core/preferences/index.html +[`DateTimeFormatter`]: https://docs.rs/icu_datetime/latest/icu_datetime/struct.DateTimeFormatter.html + ## More Information diff --git a/utils/host_info/examples/dt_format.rs b/utils/host_info/examples/dt_format.rs index 80e664a6c47..8fd9be5ee40 100644 --- a/utils/host_info/examples/dt_format.rs +++ b/utils/host_info/examples/dt_format.rs @@ -4,11 +4,10 @@ use icu_calendar::Date; use icu_datetime::{fieldsets, input::Time, DateTimeFormatter}; -use icu_host_info::HostInfo; use icu_time::DateTime; fn main() { - let prefs = HostInfo::datetime_preferences().expect("Failed to retrieve host info"); + let prefs = icu_host_info::datetime_preferences().expect("Failed to retrieve host info"); let dtf = DateTimeFormatter::try_new( prefs, fieldsets::YMDT::long().with_alignment(icu_datetime::options::Alignment::Column), diff --git a/utils/host_info/examples/dt_format_locale.rs b/utils/host_info/examples/dt_format_locale.rs index 9eeea15b1f4..e0f4aa44da5 100644 --- a/utils/host_info/examples/dt_format_locale.rs +++ b/utils/host_info/examples/dt_format_locale.rs @@ -4,19 +4,18 @@ use icu_calendar::Date; use icu_datetime::{fieldsets, input::Time, DateTimeFormatter}; -use icu_host_info::HostInfo; use icu_locale_core::Locale; use icu_time::DateTime; fn main() { - let mut locale = HostInfo::requested_locales() + let mut locale = icu_host_info::requested_locales() .unwrap() .first() .cloned() .unwrap_or(Locale::UNKNOWN); locale.extensions.unicode = - HostInfo::unicode_extensions().expect("Failed to retrieve host info"); + icu_host_info::unicode_extensions().expect("Failed to retrieve host info"); let dtf = DateTimeFormatter::try_new( locale.into(), diff --git a/utils/host_info/examples/get_data.rs b/utils/host_info/examples/get_data.rs index 6a47f3e3885..02cbc590270 100644 --- a/utils/host_info/examples/get_data.rs +++ b/utils/host_info/examples/get_data.rs @@ -2,30 +2,34 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use icu_host_info::HostInfo; - fn main() { - println!("resolved backend: {:?}", HostInfo::resolved_backend()); + println!("resolved backend: {:?}", icu_host_info::resolved_backend()); println!("-----"); - println!("requested locales: {:?}", HostInfo::requested_locales()); - println!("calendar: {:?}", HostInfo::calendar()); - println!("region: {:?}", HostInfo::region()); - println!("hour_cycle: {:?}", HostInfo::hour_cycle()); - println!("measurement_system: {:?}", HostInfo::measurement_system()); + println!( + "requested locales: {:?}", + icu_host_info::requested_locales() + ); + println!("calendar: {:?}", icu_host_info::calendar()); + println!("region: {:?}", icu_host_info::region()); + println!("hour_cycle: {:?}", icu_host_info::hour_cycle()); + println!( + "measurement_system: {:?}", + icu_host_info::measurement_system() + ); println!( "measurement_unit_override: {:?}", - HostInfo::measurement_unit_override() + icu_host_info::measurement_unit_override() ); - println!("first_day: {:?}", HostInfo::first_day_of_week()); - println!("collation: {:?}", HostInfo::collation()); + println!("first_day: {:?}", icu_host_info::first_day_of_week()); + println!("collation: {:?}", icu_host_info::collation()); println!("-----"); println!( "unicode_extensions: {:?}", - HostInfo::unicode_extensions().unwrap().to_string() + icu_host_info::unicode_extensions().unwrap().to_string() ); #[cfg(feature = "datetime")] println!( "datetimeformatter_preferences: {:#?}", - HostInfo::datetime_preferences() + icu_host_info::datetime_preferences() ); } diff --git a/utils/host_info/src/backends/android.rs b/utils/host_info/src/backends/android.rs index 2b21df3a3ed..08d80d6e42d 100644 --- a/utils/host_info/src/backends/android.rs +++ b/utils/host_info/src/backends/android.rs @@ -3,9 +3,11 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::{ - backends::{HostInfoBackend, RawHostInfoBackend}, + backends::{ + shared::posix::{raw_locale_categories, LocaleCategory}, + HostInfoBackend, RawHostInfoBackend, + }, error::HostInfoError, - posix::{raw_locale_categories, LocaleCategory}, }; pub struct AndroidHostInfoBackend; diff --git a/utils/host_info/src/backends/linux.rs b/utils/host_info/src/backends/linux.rs index c6be3ffafc5..e9cc0705876 100644 --- a/utils/host_info/src/backends/linux.rs +++ b/utils/host_info/src/backends/linux.rs @@ -3,9 +3,8 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::{ - backends::{HostInfoBackend, RawHostInfoBackend}, + backends::{shared::posix::PosixLocale, HostInfoBackend, RawHostInfoBackend}, error::HostInfoError, - locale::PosixLocale, }; use icu_locale_core::{preferences::extensions::unicode::keywords::HourCycle, Locale}; diff --git a/utils/host_info/src/backends/mod.rs b/utils/host_info/src/backends/mod.rs index a11d5fc4b79..671ef0f56b4 100644 --- a/utils/host_info/src/backends/mod.rs +++ b/utils/host_info/src/backends/mod.rs @@ -32,6 +32,8 @@ use icu_locale_core::{ use crate::error::HostInfoError; +mod shared; + #[cfg(target_os = "android")] #[doc(hidden)] pub mod android; @@ -179,7 +181,7 @@ pub trait HostInfoBackend: RawHostInfoBackend { /// did not explicitly set a value for any of the preferences. /// For example, if the user set `en-US` as their preferred locale, and did not manually set `HourCycle` /// to any value, the host API may return hour cycle default value for en-US. -/// If possible, the implementation should attempt to distinguish between explicity set value that matches +/// If possible, the implementation should attempt to distinguish between explicitly set value that matches /// default for a given locale, from lack of explicit value set. /// /// If that is not possible, the API should return the value retrieved from the system for each field getter. @@ -221,7 +223,7 @@ pub trait RawHostInfoBackend { Ok(None) } - /// Attempt to retrieve measurement unut override set in the host regional preferences by the user. + /// Attempt to retrieve measurement unit override set in the host regional preferences by the user. /// /// This should retrieve `temperature` unit. fn raw_measurement_unit_override() -> Result, HostInfoError> { diff --git a/utils/host_info/src/backends/shared/mod.rs b/utils/host_info/src/backends/shared/mod.rs new file mode 100644 index 00000000000..52f2164a3f7 --- /dev/null +++ b/utils/host_info/src/backends/shared/mod.rs @@ -0,0 +1,5 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +pub mod posix; diff --git a/utils/host_info/src/posix.rs b/utils/host_info/src/backends/shared/posix.rs similarity index 99% rename from utils/host_info/src/posix.rs rename to utils/host_info/src/backends/shared/posix.rs index e5373a00c2f..eb4a137233a 100644 --- a/utils/host_info/src/posix.rs +++ b/utils/host_info/src/backends/shared/posix.rs @@ -10,7 +10,7 @@ use std::{collections::HashMap, ffi::CStr, ptr, str::FromStr}; use crate::error::HostInfoError; #[derive(Hash, Eq, PartialEq, Debug, Clone, Copy)] -pub enum LocaleCategory { +pub(crate) enum LocaleCategory { Character, // LC_CTYPE Number, // LC_NUMERIC Time, // LC_TIME diff --git a/utils/host_info/src/error.rs b/utils/host_info/src/error.rs index ce11a04d6b8..242ced1150f 100644 --- a/utils/host_info/src/error.rs +++ b/utils/host_info/src/error.rs @@ -16,7 +16,7 @@ pub enum HostInfoError { #[displaydoc("Error creating a `CString` from a buffer with a null terminator")] FromVecWithNul(FromVecWithNulError), - #[displaydoc("No backend matching backend have been identified")] + #[displaydoc("No matching backend has been identified")] UnavailableBackend, #[displaydoc("Unknown category when retrieving locale category for linux")] diff --git a/utils/host_info/src/host_info.rs b/utils/host_info/src/host_info.rs index 815957f8d2e..b29582150b5 100644 --- a/utils/host_info/src/host_info.rs +++ b/utils/host_info/src/host_info.rs @@ -19,7 +19,7 @@ use crate::{ use super::HostKind; -pub const RESOLVED_BACKEND: Option = { +pub(crate) const RESOLVED_BACKEND: Option = { #[cfg(target_os = "android")] { Some(HostKind::Android) @@ -52,192 +52,144 @@ pub const RESOLVED_BACKEND: Option = { } }; -/// Provides getters for common regional preferences from the host environment. +/// Retrieves `Unicode` extensions struct populated from host regional preferences. /// /// # Example /// -/// ```ignore -/// use icu_host_info::HostInfo; -/// use icu::calendar::Date; -/// use icu::datetime::{fieldsets, DateTimeFormatter}; -/// -/// let date = Date::try_new_gregorian(2025, 10, 10) -/// .expect("Failed to create date"); -/// -/// // requires feature `datetime` -/// let prefs = HostInfo::datetime_preferences() +/// ``` +/// let ue = icu_host_info::unicode_extensions() /// .expect("Failed to retrieve host info"); +/// ``` +pub fn unicode_extensions() -> Result { + backends::Impl::unicode_extensions() +} + +/// Retrieves `Preferences` object for `DateTimeFormatter`. /// -/// let dtf = DateTimeFormatter::try_new(prefs, fieldsets::YMD::long()) -/// .expect("Failed to create datetime formatter."); -/// -/// let formatted_dt = dtf.format(&date); +/// # Example /// -/// assert_eq!(formatted_dt.to_string(), "October 10, 2025"); /// ``` -pub struct HostInfo; - -impl HostInfo { - /// Retrieves `Unicode` extensions struct populated from host regional preferences. - /// - /// # Example - /// - /// ``` - /// use icu_host_info::HostInfo; - /// - /// let ue = HostInfo::unicode_extensions() - /// .expect("Failed to retrieve host info"); - /// ``` - pub fn unicode_extensions() -> Result { - backends::Impl::unicode_extensions() - } - - /// Retrieves `Preferences` object for `DateTimeFormatter`. - /// - /// # Example - /// - /// ``` - /// use icu_host_info::HostInfo; - /// - /// let ue = HostInfo::datetime_preferences() - /// .expect("Failed to retrieve datetime preferences"); - /// ``` - #[cfg(feature = "datetime")] - pub fn datetime_preferences( - ) -> Result { - backends::Impl::datetime_preferences() - } +/// let ue = icu_host_info::datetime_preferences() +/// .expect("Failed to retrieve datetime preferences"); +/// ``` +#[cfg(feature = "datetime")] +pub fn datetime_preferences() -> Result { + backends::Impl::datetime_preferences() +} - /// Retrieves an ordered list of locales set as requested by the user in the host - /// environment regional preferences. - /// - /// # Example - /// - /// ``` - /// use icu_host_info::HostInfo; - /// - /// let locales = HostInfo::requested_locales() - /// .expect("Failed to retrieve requested locales"); - /// ``` - pub fn requested_locales() -> Result, HostInfoError> { - backends::Impl::requested_locales() - } +/// Retrieves an ordered list of locales set as requested by the user in the host +/// environment regional preferences. +/// +/// # Example +/// +/// ``` +/// let locales = icu_host_info::requested_locales() +/// .expect("Failed to retrieve requested locales"); +/// ``` +pub fn requested_locales() -> Result, HostInfoError> { + backends::Impl::requested_locales() +} - /// Retrieves a calendar preference. - /// - /// In `::unicode_extensions()` this field is being encoded as `ca`. - /// - /// # Example - /// - /// ``` - /// use icu_host_info::HostInfo; - /// - /// let region = HostInfo::calendar() - /// .expect("Failed to retrieve calendar"); - /// ``` - pub fn calendar() -> Result, HostInfoError> { - backends::Impl::calendar() - } +/// Retrieves a calendar preference. +/// +/// In `::unicode_extensions()` this field is being encoded as `ca`. +/// +/// # Example +/// +/// ``` +/// let calendar = icu_host_info::calendar() +/// .expect("Failed to retrieve calendar"); +/// ``` +pub fn calendar() -> Result, HostInfoError> { + backends::Impl::calendar() +} - /// Retrieves a region set in the host environment regional preferences. - /// - /// That region may be already populated into `requested_locales` or not, depending - /// on the host. - /// In `::unicode_extensions()` this field is being encoded as `rg`. - /// - /// # Example - /// - /// ``` - /// use icu_host_info::HostInfo; - /// - /// let region = HostInfo::region() - /// .expect("Failed to retrieve region"); - /// ``` - pub fn region() -> Result, HostInfoError> { - backends::Impl::region() - } +/// Retrieves a region set in the host environment regional preferences. +/// +/// That region may be already populated into `requested_locales` or not, depending +/// on the host. +/// In `::unicode_extensions()` this field is being encoded as `rg`. +/// +/// # Example +/// +/// ``` +/// let region = icu_host_info::region() +/// .expect("Failed to retrieve region"); +/// ``` +pub fn region() -> Result, HostInfoError> { + backends::Impl::region() +} - /// Retrieves an hour_cycle preference. - /// - /// In `::unicode_extensions()` this field is being encoded as `hc`. - /// - /// # Example - /// - /// ``` - /// use icu_host_info::HostInfo; - /// - /// let region = HostInfo::hour_cycle() - /// .expect("Failed to retrieve hour cycle"); - /// ``` - pub fn hour_cycle() -> Result, HostInfoError> { - backends::Impl::hour_cycle() - } +/// Retrieves an hour_cycle preference. +/// +/// In `::unicode_extensions()` this field is being encoded as `hc`. +/// +/// # Example +/// +/// ``` +/// let region = icu_host_info::hour_cycle() +/// .expect("Failed to retrieve hour cycle"); +/// ``` +pub fn hour_cycle() -> Result, HostInfoError> { + backends::Impl::hour_cycle() +} - /// Retrieves a measurement system preference. - /// - /// In `::unicode_extensions()` this field is being encoded as `ms`. - /// - /// # Example - /// - /// ``` - /// use icu_host_info::HostInfo; - /// - /// let region = HostInfo::calendar() - /// .expect("Failed to retrieve calendar"); - /// ``` - pub fn measurement_system() -> Result, HostInfoError> { - backends::Impl::measurement_system() - } +/// Retrieves a measurement system preference. +/// +/// In `::unicode_extensions()` this field is being encoded as `ms`. +/// +/// # Example +/// +/// ``` +/// let ms = icu_host_info::measurement_system() +/// .expect("Failed to retrieve measurement system"); +/// ``` +pub fn measurement_system() -> Result, HostInfoError> { + backends::Impl::measurement_system() +} - /// Retrieves a first day of week preference. - /// - /// In `::unicode_extensions()` this field is being encoded as `fd`. - /// - /// # Example - /// - /// ``` - /// use icu_host_info::HostInfo; - /// - /// let region = HostInfo::first_day_of_week() - /// .expect("Failed to retrieve first day of week"); - /// ``` - pub fn first_day_of_week() -> Result, HostInfoError> { - backends::Impl::first_day_of_week() - } +/// Retrieves a first day of week preference. +/// +/// In `::unicode_extensions()` this field is being encoded as `fd`. +/// +/// # Example +/// +/// ``` +/// let fd = icu_host_info::first_day_of_week() +/// .expect("Failed to retrieve first day of week"); +/// ``` +pub fn first_day_of_week() -> Result, HostInfoError> { + backends::Impl::first_day_of_week() +} - /// Retrieves a collation preference. - /// - /// In `::unicode_extensions()` this field is being encoded as `co`. - /// - /// # Example - /// - /// ``` - /// use icu_host_info::HostInfo; - /// - /// let region = HostInfo::collation() - /// .expect("Failed to retrieve collation"); - /// ``` - pub fn collation() -> Result, HostInfoError> { - backends::Impl::collation() - } +/// Retrieves a collation preference. +/// +/// In `::unicode_extensions()` this field is being encoded as `co`. +/// +/// # Example +/// +/// ``` +/// let collation = icu_host_info::collation() +/// .expect("Failed to retrieve collation"); +/// ``` +pub fn collation() -> Result, HostInfoError> { + backends::Impl::collation() +} - /// Retrieves measurement unit override preference. - /// - /// In `::unicode_extensions()` this field is being encoded as `mu`. - /// - /// # Example - /// - /// ``` - /// use icu_host_info::HostInfo; - /// - /// let region = HostInfo::measurement_unit_override() - /// .expect("Failed to retrieve measurement unit override"); - /// ``` - pub fn measurement_unit_override() -> Result, HostInfoError> { - backends::Impl::measurement_unit_override() - } +/// Retrieves measurement unit override preference. +/// +/// In `::unicode_extensions()` this field is being encoded as `mu`. +/// +/// # Example +/// +/// ``` +/// let mu = icu_host_info::measurement_unit_override() +/// .expect("Failed to retrieve measurement unit override"); +/// ``` +pub fn measurement_unit_override() -> Result, HostInfoError> { + backends::Impl::measurement_unit_override() +} - pub fn resolved_backend() -> Option { - RESOLVED_BACKEND - } +pub fn resolved_backend() -> Option { + RESOLVED_BACKEND } diff --git a/utils/host_info/src/lib.rs b/utils/host_info/src/lib.rs index 9aae370f107..cfbef1bbb6d 100644 --- a/utils/host_info/src/lib.rs +++ b/utils/host_info/src/lib.rs @@ -7,13 +7,13 @@ //! `host_info` is a library providing functionality to retrieve regional preferences //! from host environments - primarily the operating system the program is running in. //! -//! The library is designed to bind the different host environment preferences architectures -//! to ICU4X model. +//! The library is designed to bind the different host environment preference architectures +//! to the [`icu`] model. //! //! # Example //! //! ```ignore -//! use icu_host_info::HostInfo; +//! use icu_host_info::icu_host_info; //! use icu::calendar::Date; //! use icu::datetime::{fieldsets, DateTimeFormatter}; //! @@ -21,7 +21,7 @@ //! .expect("Failed to create date"); //! //! // requires feature `datetime` -//! let prefs = HostInfo::datetime_preferences() +//! let prefs = icu_host_info::datetime_preferences() //! .expect("Failed to retrieve host info"); //! //! let dtf = DateTimeFormatter::try_new(prefs, fieldsets::YMD::long()) @@ -35,49 +35,49 @@ //! # Feature Matrix //! //! The library intends to provide means to retrieve regional preferences -//! to ICU4X preferences with a focus on Unicode Extensions, but allow for +//! to [`icu`] preferences with a focus on Unicode Extensions, but allow for //! propagation of preferences offered by the host environments which may //! not have a representation in Unicode Extensions (for example: date format pattern). //! //! Legend: //! - ✅ = OS + `host_info` support -//! - ⚠️ = OS supports, `host_info` doesn't -//! - ❌ = OS doesn't supported +//! - 🚧 = OS supports, `host_info` doesn't +//! - ❌ = OS doesn't support //! //! | Feature | Android | iOS | Linux (1) | macOS | Windows | //! |---------------------| :-----: | :-: | :------------------: | :---: | :-----: | //! | Requested Locales | ✅ | ✅ | ✅ | ✅ | ✅ | -//! | Calendar | ⚠️ | ⚠️ | ⚠️ | ✅ | ✅ | -//! | Region | ⚠️ | ⚠️ | ⚠️ | ✅ | ✅ | -//! | Hour cycle | ⚠️ | ⚠️ | ✅ | ✅ | ⚠️ | -//! | Measurement System | ⚠️ | ⚠️ | ⚠️ | ✅ | ⚠️ | -//! | Measurement Override| ⚠️ | ⚠️ | ⚠️ | ✅ | ⚠️ | -//! | First Day of week | ⚠️ | ⚠️ | ⚠️ | ✅ | ✅ | -//! | Collation | ⚠️ | ⚠️ | ⚠️ | ✅ | ❌ | -//! | Date format | ⚠️ | ⚠️ | ⚠️ | ⚠️ | ⚠️ | -//! | Number format | ⚠️ | ⚠️ | ⚠️ | ⚠️ | ⚠️ | +//! | Calendar | 🚧 | 🚧 | 🚧 | ✅ | ✅ | +//! | Region | 🚧 | 🚧 | 🚧 | ✅ | ✅ | +//! | Hour cycle | 🚧 | 🚧 | ✅ | ✅ | 🚧 | +//! | Measurement System | 🚧 | 🚧 | 🚧 | ✅ | 🚧 | +//! | Measurement Override| 🚧 | 🚧 | 🚧 | ✅ | 🚧 | +//! | First Day of week | 🚧 | 🚧 | 🚧 | ✅ | ✅ | +//! | Collation | 🚧 | 🚧 | 🚧 | ✅ | ❌ | +//! | Date format | 🚧 | 🚧 | 🚧 | 🚧 | 🚧 | +//! | Number format | 🚧 | 🚧 | 🚧 | 🚧 | 🚧 | //! -//! (1) In case of Linux different DE's such as Gnoem and KDE are supported together. +//! (1) In the case of Linux, different desktop environments such as Gnome and KDE are supported together. //! -//! # Integrating preferences into ICU4X formatters +//! # Integrating preferences into ICU formatters //! //! The library provides three ways of injecting retrieved values into formatters: //! //! ## 1. Preference Bag //! -//! For most common components, such as `DateTimeFormatter`, the library exposes -//! a direct getter that retrieves a `Preferences` struct for that component. +//! For most common components, such as [`DateTimeFormatter`], the library exposes +//! a direct getter that retrieves a [`Preferences`] struct for that component. //! This getter is located behind a flag to allow for control over which dependencies are being //! pulled. //! //! ### Example //! //! ```ignore -//! use icu_host_info::HostInfo; +//! use icu_host_info::icu_host_info; //! use icu::datetime::{fieldsets, DateTimeFormatter}; //! //! // requires feature `datetime` -//! let prefs = HostInfo::datetime_preferences() +//! let prefs = icu_host_info::datetime_preferences() //! .expect("Failed to retrieve host info"); //! //! let dtf = DateTimeFormatter::try_new(prefs, fieldsets::YMD::long()) @@ -86,7 +86,7 @@ //! //! ## 2. Locale //! -//! For all components that `HostInfo` does not have special preference getter for, +//! For all components that `icu_host_info` does not have special preference getter for, //! and for cases where the user prefers to avoid pulling extra dependencies at the cost //! of narrowing down the retrieved values to just ones encoded in Unicode Extensions, //! the library provides an ergonomic getter: @@ -94,19 +94,18 @@ //! ### Example //! //! ``` -//! use icu_host_info::HostInfo; //! use icu::{ //! datetime::{fieldsets, DateTimeFormatter}, //! locale::Locale, //! }; //! -//! let mut locale = HostInfo::requested_locales() +//! let mut locale = icu_host_info::requested_locales() //! .expect("Failed to retrieve locales") //! .first() //! .cloned() //! .unwrap_or(Locale::UNKNOWN); //! -//! locale.extensions.unicode = HostInfo::unicode_extensions() +//! locale.extensions.unicode = icu_host_info::unicode_extensions() //! .expect("Failed to retrieve host info"); //! //! let dtf = DateTimeFormatter::try_new(locale.into(), fieldsets::YMD::long()) @@ -127,28 +126,27 @@ //! ### Example //! //! ``` -//! use icu_host_info::HostInfo; //! use icu::locale::preferences::extensions::unicode::keywords::HourCycle; //! -//! let mut calendar: Option = HostInfo::hour_cycle() +//! let mut calendar: Option = icu_host_info::hour_cycle() //! .expect("Failed to retrieve hour_cycle preference"); //! ``` //! //! # Locale Negotiation //! -//! Locale Negotiation is an upcoming feature in ICU4X which will enable the system integrating ICU4X to +//! Locale Negotiation is an upcoming feature in ICU4X which will enable the system integrating ICU to //! perform a negotiation between requested locales, and locales for which the data is available in the system. -//! The output of `HostInfo` will be utilized in that negotiation allowing the deployment to 1) select +//! The output of `icu_host_info` will be utilized in that negotiation allowing the deployment to 1) select //! the most appropriate locales for the given user and target modality, 2) apply regional preferences onto that //! locale. //! -//! The need to allow `HostInfo` to be pluggable info locale negotiation and multi source merging (see next section) +//! The need to allow `icu_host_info` to be pluggable into locale negotiation and multi source merging (see next section) //! guided many design choices in this library. This section will be extended once locale negotiation is implemented. //! //! # Multi Source Merging //! -//! In simple systems the user will most often use ICU4X to format -//! some information in a selected locale, and use this library to augument +//! In simple systems the user will most often use ICU to format +//! some information in a selected locale, and use this library to augment //! the formatting with regional preferences set by the user in the host environment. //! //! In more complex systems, the user may also want to introduce a second source of regional preferences @@ -157,22 +155,22 @@ //! For example, a web browser may offer some regional preferences set in the browser //! itself, or even set separate for some contexts of the browser. //! -//! In those cases, the depoyment requires merging of the preferences. -//! ICU4X exposes an `extend` method on both `Preferences` and `Unicode` extensions struct. +//! In those cases, the deployment requires merging of the preferences. +//! ICU exposes an `extend` method on both [`Preferences`] and [`Unicode`] extensions struct. //! -//! This allows the system to retrieve [`HostInfo`] Preferences or `Unicode`, and applications' -//! equivalent, and merge of them. +//! This allows the system to retrieve Preferences or [`Unicode`], and the application's +//! equivalent, and merge them. //! //! ## `Preferences` Example //! //! ```ignore -//! use icu_host_info::HostInfo; +//! use icu_host_info::icu_host_info; //! use icu::datetime::{fieldsets, DateTimeFormatter}; //! //! let app_prefs = app.datetime_preferences(); //! //! // requires feature `datetime` -//! let mut combined_prefs = HostInfo::datetime_preferences() +//! let mut combined_prefs = icu_host_info::datetime_preferences() //! .expect("Failed to retrieve host info"); //! //! combined_prefs.extend(app_prefs); @@ -184,7 +182,7 @@ //! ## `Unicode` Extensions Example //! //! ```ignore -//! use icu_host_info::HostInfo; +//! use icu_host_info::icu_host_info; //! use icu::{ //! datetime::{fieldsets, DateTimeFormatter}, //! locale::locale, @@ -194,7 +192,7 @@ //! //! let app_ue = app.unicode_extensions(); //! -//! let mut combined_ue = HostInfo::unicode_extensions() +//! let mut combined_ue = icu_host_info::unicode_extensions() //! .expect("Failed to retrieve host info"); //! //! combined_ue.extend(app_ue); @@ -208,7 +206,7 @@ //! # Design Decisions //! //! The library operates on a boundary of diverse set of host -//! environments and uniformal ICU4X design derived from Unicode LDML. +//! environments and uniform ICU design derived from Unicode LDML. //! It requires a number of design tradeoffs that had to be made in //! order to achieve the uniformity and scale over time as the host //! platforms design evolves. @@ -224,8 +222,8 @@ //! ## Lossy Results //! //! The library makes best-effort to retrieve the values -//! that can be directly used in ICU4X. As the operating systems, -//! runtimes and ICU4X evolve, there's always a risk of a mismatch. +//! that can be directly used in ICU. As the operating systems, +//! runtimes and ICU evolve, there's always a risk of a mismatch. //! This library makes a design decision to be lossy-by-default. //! //! Any value that cannot be directly mapped onto a valid value is ignored @@ -240,15 +238,15 @@ //! //! ## Normalized vs Raw values //! -//! The main API of this library - [`HostInfo`] - provides methods that return normalized +//! The library provides methods that return normalized //! values, often directly taken from `icu::locale_core::preferences`. //! Per-host backends provide additional trait implementation that returns //! raw values, allowing the user to handle or introspect those values manually. -//! When using `HostInfo`, the library performs best-effort to normalize and parse -//! those raw values into canonical Unicode ICU4X representation, often discarding +//! When using `icu_host_info`, the library performs best-effort to normalize and parse +//! those raw values into canonical Unicode ICU representation, often discarding //! unknown values and values that fail to parse. //! -//! Those raw backends are not exposed in the documentation as the documentation. +//! Those raw backends are not exposed in the main documentation. //! //! ### Example //! @@ -278,16 +276,19 @@ //! A note for host API designers - it is useful for foundational libraries such as this to expose APIs that enable us //! to distinguish between regional preferences values derived by the host from defaults of a locale, from cases //! when the value is explicitly set by the user. -//! This dinstinction allows ICU4X to better serve in locale negotiations scenario where other-than-first locale may be used +//! This distinction allows ICU to better serve in locale negotiations scenario where other-than-first locale may be used //! and the deployment should respect whether the user set a given preference explicitly or left it to the per-locale default. //! +//! [`icu`]: https://docs.rs/icu/latest/icu/ +//! [`Unicode`]: https://docs.rs/icu_locale_core/latest/icu_locale_core/extensions/unicode/struct.Unicode.html +//! [`Preferences`]: https://docs.rs/icu_locale_core/latest/icu_locale_core/preferences/index.html +//! [`DateTimeFormatter`]: https://docs.rs/icu_datetime/latest/icu_datetime/struct.DateTimeFormatter.html pub mod backends; mod error; mod host_info; pub mod locale; -mod posix; -pub use host_info::HostInfo; +pub use host_info::*; /// Enumeration of known hosts. #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]