-
Notifications
You must be signed in to change notification settings - Fork 25
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft: Gettext Localization System #351
base: main
Are you sure you want to change the base?
Draft: Gettext Localization System #351
Conversation
.next() | ||
.expect("expected at least one locale to be present (at least the default)"); | ||
|
||
println!("Using locale {locale}"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove debug println
} | ||
|
||
Ok(if locale != &default_locale { | ||
// TODO: do we need to load if en-US, as it's the fallback? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove TODO, we already know the answer, it's NO
let lc_all = match std::env::var("LC_ALL") { | ||
Ok(var) => Some(var), | ||
Err(std::env::VarError::NotPresent) => None, | ||
// TODO: a proper error message |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will require a custom error type here I think.
|
||
Ok(if locale != &default_locale { | ||
// TODO: do we need to load if en-US, as it's the fallback? | ||
let f = std::fs::File::open(format!("messages.{locale}.mo"))?; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Handle where to search for messages. Probably need to handle NLSPATH
here.
From 01070bc4cb47f0ea7629f469d16f889ca6caa5b6 Mon Sep 17 00:00:00 2001
From: fox0 <[email protected]>
Date: Mon, 21 Oct 2024 19:35:30 +0700
Subject: [PATCH] 111
---
plib/src/i18n.rs | 259 ++++++++++++++++++++++++++++++++++-------------
1 file changed, 187 insertions(+), 72 deletions(-)
diff --git a/plib/src/i18n.rs b/plib/src/i18n.rs
index 27036e3..d160372 100644
--- a/plib/src/i18n.rs
+++ b/plib/src/i18n.rs
@@ -1,87 +1,69 @@
+// SPDX-License-Identifier: MIT
+
+use std::env;
use std::ffi::CString;
+use std::fs::File;
+
+const LC_ALL: &str = "LC_ALL";
+const LC_CTYPE: &str = "LC_CTYPE";
+const LANG: &str = "LANG";
/// Initialize localization catalog.
pub fn initialize_catalog() -> Result<Option<gettext::Catalog>, Box<dyn std::error::Error>> {
- // TODO: load from available mo files
- let available_locales = &["en-US", "en-AU"];
-
- let mut requested_locales: Vec<String> = sys_locale::get_locales().into_iter().collect();
-
- let lc_all = match std::env::var("LC_ALL") {
- Ok(var) => Some(var),
- Err(std::env::VarError::NotPresent) => None,
- // TODO: a proper error message
- Err(error) => return Err(error.into()),
- };
-
- let lc_messages = match std::env::var("LC_MESSAGES") {
- Ok(var) => Some(var),
- Err(std::env::VarError::NotPresent) => None,
- // TODO: a proper error message
- Err(error) => return Err(error.into()),
- };
-
- let mut insert_highest_priority = |var: &Option<String>| {
- if let Some(var) = &var {
- let mut move_locale = None;
- for (i, locale) in requested_locales.iter().enumerate() {
- if locale == var {
- if i != 0 {
- move_locale = Some(i);
- }
- }
- }
- if let Some(i) = move_locale {
- let locale = requested_locales.remove(i);
- requested_locales.insert(0, locale);
- } else {
- requested_locales.insert(0, var.clone());
+ let mut locale = String::new();
+ for key in [LC_ALL, LC_CTYPE, LANG] {
+ match env::var(key) {
+ Ok(v) => {
+ locale = v;
+ break;
}
+ Err(_) => {}
}
- };
-
- insert_highest_priority(&lc_messages);
- insert_highest_priority(&lc_all);
-
- let default_locale: fluent_langneg::LanguageIdentifier = "en-US".parse().unwrap();
- let available_locales = fluent_langneg::convert_vec_str_to_langids_lossy(available_locales);
- let locale = fluent_langneg::negotiate_languages(
- &fluent_langneg::convert_vec_str_to_langids_lossy(requested_locales),
- &available_locales,
- Some(&default_locale),
- // Using Lookup strategy because gettext only allows one fallback :'(
- fluent_langneg::NegotiationStrategy::Lookup,
- )
- .into_iter()
- .next()
- .expect("expected at least one locale to be present (at least the default)");
+ }
+ if locale.is_empty() {
+ return Ok(None);
+ }
println!("Using locale {locale}");
-
- if let Some(lc_all) = lc_all {
- let lc_all_c = CString::new(lc_all)?.as_ptr();
- unsafe {
- libc::setlocale(libc::LC_ALL, lc_all_c);
- }
- } else {
- let lc_messages_c = CString::new(locale.to_string())?.as_ptr();
- unsafe {
- libc::setlocale(libc::LC_MESSAGES, lc_messages_c);
+ // if let Some(lc_all) = lc_all {
+ // let lc_all_c = CString::new(lc_all)?.as_ptr();
+ // unsafe {
+ // libc::setlocale(libc::LC_ALL, lc_all_c);
+ // }
+ // } else {
+ // let lc_messages_c = CString::new(locale.to_string())?.as_ptr();
+ //
+ // }
+
+ for i in parse_locales(locale) {
+ #[cfg(debug_assertions)]
+ let path = format!(
+ concat!("locale/{}/LC_MESSAGES/", env!("PROJECT_NAME"), ".mo"),
+ i
+ );
+ #[cfg(not(debug_assertions))]
+ let path = format!(
+ concat!(
+ "/usr/share/locale/{}/LC_MESSAGES/",
+ env!("PROJECT_NAME"),
+ ".mo"
+ ),
+ i
+ );
+ match File::open(path) {
+ Ok(f) => {
+ let catalog = gettext::Catalog::parse(f)?;
+ return Ok(Some(catalog));
+ }
+ Err(_) => {}
}
}
- Ok(if locale != &default_locale {
- // TODO: do we need to load if en-US, as it's the fallback?
- let f = std::fs::File::open(format!("messages.{locale}.mo"))?;
- let catalog = gettext::Catalog::parse(f)?;
- Some(catalog)
- } else {
- None
- })
+ Ok(None)
}
-/// Macro to initialize
+/// Macro to initialize
#[macro_export]
macro_rules! initialize_i18n {
() => {
@@ -89,9 +71,142 @@ macro_rules! initialize_i18n {
Ok(Some(catalog)) => {
tr::set_translator!(catalog);
Ok(())
- },
+ }
Ok(None) => Ok(()),
Err(error) => Err(error),
}
};
-}
\ No newline at end of file
+}
+
+/// The POSIX or "XPG" format is `language[_territory][.codeset]`
+///
+/// # Examples
+/// ```
+/// let (language, territory, codeset) = parse_lang("ru_RU.UTF-8".into());
+/// assert_eq!(language, "ru");
+/// assert_eq!(territory, Some("RU".into()));
+/// assert_eq!(codeset, Some("UTF-8".into()));
+/// ```
+fn parse_locale(lang: String) -> (String, Option<String>, Option<String>) {
+ if let Some((lang, codeset)) = lang.split_once('.') {
+ if let Some((lang, territory)) = lang.split_once('_') {
+ (
+ lang.to_string(),
+ Some(territory.to_string()),
+ Some(codeset.to_string()),
+ )
+ } else {
+ (lang.to_string(), None, Some(codeset.to_string()))
+ }
+ } else {
+ if let Some((lang, territory)) = lang.split_once('_') {
+ (lang.to_string(), Some(territory.to_string()), None)
+ } else {
+ (lang, None, None)
+ }
+ }
+}
+
+/// # Examples
+/// ```
+/// if let Some(lang) = std::env::var("LANG").ok() {
+/// let locales = parse_locales(lang);
+/// }
+/// ```
+// $ LANG="ru_RU.UTF-8" strace target/debug/time 2>&1 | grep openat
+// ...
+// openat(AT_FDCWD, "/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3
+// openat(AT_FDCWD, "/usr/share/locale/locale.alias", O_RDONLY|O_CLOEXEC) = 3
+// openat(AT_FDCWD, "/usr/share/locale/ru_RU.UTF-8/LC_MESSAGES/posixutils-rs.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
+// openat(AT_FDCWD, "/usr/share/locale/ru_RU.utf8/LC_MESSAGES/posixutils-rs.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
+// openat(AT_FDCWD, "/usr/share/locale/ru_RU/LC_MESSAGES/posixutils-rs.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
+// openat(AT_FDCWD, "/usr/share/locale/ru.UTF-8/LC_MESSAGES/posixutils-rs.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
+// openat(AT_FDCWD, "/usr/share/locale/ru.utf8/LC_MESSAGES/posixutils-rs.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
+// openat(AT_FDCWD, "/usr/share/locale/ru/LC_MESSAGES/posixutils-rs.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
+fn parse_locales(lang: String) -> Vec<String> {
+ let mut locales = vec![];
+
+ let (language, territory, codeset) = parse_locale(lang);
+
+ // TODO /usr/lib/locale/locale-archive
+ // TODO /usr/share/locale/locale.alias
+
+ if let Some(ref territory) = territory {
+ if let Some(ref codeset) = codeset {
+ locales.push(format!("{}_{}.{}", language, territory, codeset));
+ locales.push(format!(
+ "{}_{}.{}",
+ language,
+ territory,
+ codeset.to_lowercase().replace("-", "")
+ ));
+ }
+ }
+ if let Some(territory) = territory {
+ locales.push(format!("{}_{}", language, territory));
+ }
+ if let Some(codeset) = codeset {
+ locales.push(format!("{}.{}", language, codeset));
+ locales.push(format!(
+ "{}.{}",
+ language,
+ codeset.to_lowercase().replace("-", "")
+ ));
+ }
+ locales.push(language);
+
+ locales
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_parse_lang1() {
+ let (language, territory, codeset) = parse_locale("ru_RU.UTF-8".into());
+ assert_eq!(language, "ru");
+ assert_eq!(territory, Some("RU".into()));
+ assert_eq!(codeset, Some("UTF-8".into()));
+ }
+
+ #[test]
+ fn test_parse_lang2() {
+ let (language, territory, codeset) = parse_locale("ru.UTF-8".into());
+ assert_eq!(language, "ru");
+ assert_eq!(territory, None);
+ assert_eq!(codeset, Some("UTF-8".into()));
+ }
+
+ #[test]
+ fn test_parse_lang3() {
+ let (language, territory, codeset) = parse_locale("en_AU".into());
+ assert_eq!(language, "en");
+ assert_eq!(territory, Some("AU".into()));
+ assert_eq!(codeset, None);
+ }
+
+ #[test]
+ fn test_parse_lang4() {
+ let (language, territory, codeset) = parse_locale("en".into());
+ assert_eq!(language, "en");
+ assert_eq!(territory, None);
+ assert_eq!(codeset, None);
+ }
+
+ #[test]
+ fn test_parse_locales() {
+ let result = parse_locales("ru_RU.UTF-8".into());
+ assert_eq!(
+ result,
+ vec![
+ "ru_RU.UTF-8",
+ "ru_RU.utf8",
+ "ru_RU",
+ "ru.UTF-8",
+ "ru.utf8",
+ "ru",
+ ]
+ );
+ }
+}
--
2.39.2 |
Initial reaction - it seems needlessly disruptive to search-and-replace gettext() calls with tr!(). The current code appears to be tested with un-mocked gettext by @fox0, making a cosmetic search and replace with tr!() disruptive. A better path forward would be source code compatible to the extent that |
@jgarzik Our current use of gettext has no standard support for string formatting for messages. We could depend on some runtime string formatting/templating library or implement our own. Or we could use a macro based string formatting library, which supports (or is theoretically able to support) compile time checks for arguments, and ideally one that also supports plurals in a convenient way (i.e. We try to work around this limitation when using Rust's standard library compile time string formatting macros, take this code for example: https://github.com/rustcoreutils/posixutils-rs/blob/main/tree/ls.rs#L739 println!("{} {}", gettext("total"), total_block_size); It assumes that for all languages, the gramatically correct way to print this message is to print the value after the word. However I'm not sure that's true. Using something like println!("{}", tr!("{} total", total_block_size));
Alternatively we could do it like this: println!("{}", gettext(format!("{} total", total_block_size))) However that's much more complicated parsing to attempt to extract the message. Or alternatively again, if we use some runtime string formatting solution: println!("{}", runtime_format(gettext("{} total"), total_block_size)) Probably the cleanest solution while keeping vanilla If you'd still prefer to keep things how they are instead, let me know and I'll take this PR in the direction of retaining the plain |
@jgarzik Or sorry perhaps I misunderstood you! You're still suggesting that we use tr! but use some kind of compile time transformation to transform vanilla |
I'd be happy to do the find and replace on any branch that @fox0 has going, it's completely automatable using grep and regex from what I can understand. |
My opinion is this: in the current code |
@fox0 Can you elaborate on why you consider it to be harmful? At the very least, as I understand it, our current use of static linking with |
Oh actually I see on second reading perhaps environment variables can be used to link against GNU gettext on Mac too https://github.com/gettext-rs/gettext-rs/tree/master/gettext-sys#environment-variables Anyway, this adds pain to what is otherwise a pleasant rust build experience. |
https://github.com/woboq/tr/blob/master/tr/Cargo.toml#L19 - inside is the same dependency that is currently used by the project. There is no point in using a wrapper dependency. |
Without more context, we could probably make the same statement about many other dependencies that we have in this project. Are you trying to say that the Do you have an alternative preferred proposal which covers the concerns outlined in #351 (comment) regarding string formatting, and #351 (comment) regarding licensing? Or do you think it's just not a problem? |
I'm happy to change my mind, willing to be convinced, but I don't understand what your reasoning is yet. |
#[macro_export]
macro_rules! gettext {
($literal:literal) => {
gettext($literal)
};
($literal:literal, $($arg:tt)*) => {
{
use dynfmt::Format;
let format = gettext($literal);
// or any runtime formatter
match dynfmt::SimpleCurlyFormat.format(&format, &[$($arg)*]) {
Ok(v) => v.into_owned(),
Err(e) => {
eprintln!("error gettext!: {e}");
format!($literal, $($arg)*) // fallback
}
}
}
};
} |
I think that's not a bad idea. Just as a point of comparison though, this introduces a new dependency |
Hmm it seems like |
#72
Draft!
TODO
xtr
.tr!()
macro (onlycal
has been done so far).