From 14f747019b0b6e4cca0e057a024f2618da714c38 Mon Sep 17 00:00:00 2001 From: Daniel Rainer Date: Fri, 19 Dec 2025 02:17:14 +0100 Subject: [PATCH] l10n: create localization/settings Extract the language selection code from the gettext crate, and to a lesser extent from `src/localization/mod.rs` and put it into `src/localization/settings.rs`. No functional changes are intended. Aside from better separation of concerns, this refactoring makes it feasible to reuse the language selection logic for Fluent later on. Part of #12190 --- crates/gettext/src/lib.rs | 295 +++--------------- src/localization/mod.rs | 185 +---------- src/localization/settings.rs | 414 +++++++++++++++++++++++++ tests/checks/message-localization.fish | 3 +- 4 files changed, 470 insertions(+), 427 deletions(-) create mode 100644 src/localization/settings.rs diff --git a/crates/gettext/src/lib.rs b/crates/gettext/src/lib.rs index 65c7e35da..05fb279e7 100644 --- a/crates/gettext/src/lib.rs +++ b/crates/gettext/src/lib.rs @@ -1,259 +1,20 @@ use fish_gettext_maps::CATALOGS; use once_cell::sync::Lazy; -use std::{collections::HashSet, sync::Mutex}; +use std::{ + collections::HashMap, + sync::{LazyLock, Mutex}, +}; type Catalog = &'static phf::Map<&'static str, &'static str>; -pub struct SetLanguageLints<'a> { - pub duplicates: Vec<&'a str>, - pub non_existing: Vec<&'a str>, -} - -#[derive(PartialEq, Eq, Clone, Copy)] -pub enum LanguagePrecedenceOrigin { - Default, - LocaleVariable(LocaleVariable), - LanguageEnvVar, - StatusLanguage, -} - -#[derive(PartialEq, Eq, Clone, Copy)] -pub enum LocaleVariable { - #[allow(clippy::upper_case_acronyms)] - LANG, - #[allow(non_camel_case_types)] - LC_MESSAGES, - #[allow(non_camel_case_types)] - LC_ALL, -} - -impl LocaleVariable { - fn as_language_precedence_origin(&self) -> LanguagePrecedenceOrigin { - LanguagePrecedenceOrigin::LocaleVariable(*self) - } - - pub fn as_str(&self) -> &'static str { - match self { - Self::LANG => "LANG", - Self::LC_MESSAGES => "LC_MESSAGES", - Self::LC_ALL => "LC_ALL", - } - } -} - -impl std::fmt::Display for LocaleVariable { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.as_str()) - } -} - -struct InternalLocalizationState { - precedence_origin: LanguagePrecedenceOrigin, - language_precedence: Vec<(String, Catalog)>, -} - -pub struct PublicLocalizationState { - pub precedence_origin: LanguagePrecedenceOrigin, - pub language_precedence: Vec, -} - -/// Stores the current localization status. -/// `is_active` indicates whether localization is currently active, and the reason if it is -/// not. -/// The `origin` indicates where the values in `language_precedence` were taken from. -/// `language_precedence` stores the catalogs in the order they should be used. -/// -/// This struct should be updated when the relevant variables change or `status language` is used -/// to modify the localization state. -static LOCALIZATION_STATE: Lazy> = - Lazy::new(|| Mutex::new(InternalLocalizationState::new())); - -impl InternalLocalizationState { - fn new() -> Self { - Self { - precedence_origin: LanguagePrecedenceOrigin::Default, - language_precedence: vec![], - } - } - - fn to_public(&self) -> PublicLocalizationState { - PublicLocalizationState { - precedence_origin: self.precedence_origin, - language_precedence: self - .language_precedence - .iter() - .map(|(lang, _)| lang.to_owned()) - .collect(), - } - } - - fn update_from_env( - &mut self, - message_locale: Option<(LocaleVariable, String)>, - language_var: Option>, - ) { - // Do not override values set via `status language`. - if self.precedence_origin == LanguagePrecedenceOrigin::StatusLanguage { - return; - } - - if let Some((precedence_origin, locale)) = &message_locale { - // Regular locale names start with lowercase letters (`ll_CC`, followed by some suffix). - // The C or POSIX locale is special, and often used to disable localization. - // Their names are upper-case, but variants with suffixes (`C.UTF-8`) exist. - // To ensure that such variants are accounted for, we match on prefixes of the - // locale name. - // https://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html#tag_07_02 - fn is_c_locale(locale: &str) -> bool { - locale.starts_with('C') || locale.starts_with("POSIX") - } - if is_c_locale(locale) { - self.precedence_origin = - LanguagePrecedenceOrigin::LocaleVariable(*precedence_origin); - self.language_precedence.clear(); - return; - } - } - - let (precedence_origin, language_list) = if let Some(list) = language_var { - (LanguagePrecedenceOrigin::LanguageEnvVar, list) - } else if let Some((precedence_origin, locale)) = message_locale { - let mut normalized_name = String::new(); - // Strip off encoding and modifier. (We always expect UTF-8 and don't support modifiers.) - for c in locale.chars() { - if c.is_alphabetic() || c == '_' { - normalized_name.push(c); - } else { - break; - } - } - // At this point, the normalized_name should have the shape `ll` or `ll_CC`. - ( - precedence_origin.as_language_precedence_origin(), - vec![normalized_name], - ) - } else { - (LanguagePrecedenceOrigin::Default, vec![]) - }; - - let mut seen_languages = HashSet::new(); - self.language_precedence = language_list - .into_iter() - .flat_map(|lang| find_existing_catalogs(&lang)) - .filter(|(lang, _)| seen_languages.insert(lang.to_owned())) - .collect(); - self.precedence_origin = precedence_origin; - } - - fn update_from_status_language_builtin<'a, 'b: 'a, S: AsRef + 'a>( - &mut self, - langs: &'b [S], - ) -> SetLanguageLints<'a> { - let mut seen = HashSet::new(); - let mut duplicates = vec![]; - for lang in langs { - let lang = lang.as_ref(); - if !seen.insert(lang) { - duplicates.push(lang) - } - } - let mut existing_langs = vec![]; - let mut non_existing = vec![]; - for lang in langs { - let lang = lang.as_ref(); - if let Some(catalog) = CATALOGS.get(lang) { - existing_langs.push((lang.to_owned(), *catalog)); - } else { - non_existing.push(lang); - } - } - - let mut seen = HashSet::new(); - let unique_langs = existing_langs - .into_iter() - .filter(|(lang, _)| seen.insert(lang.to_owned())) - .collect(); - self.language_precedence = unique_langs; - self.precedence_origin = LanguagePrecedenceOrigin::StatusLanguage; - - SetLanguageLints { - duplicates, - non_existing, - } - } -} - -/// Tries to find catalogs for `language`. -/// `language` must be an ISO 639 language code, optionally followed by an underscore and an ISO -/// 3166 country/territory code. -/// Uses the catalog with the exact same name as `language` if it exists. -/// If a country code is present (`ll_CC`), only the catalog named `ll` will be considered as a fallback. -/// If no country code is present (`ll`), all catalogs whose names start with `ll_` will be used in -/// arbitrary order. -fn find_existing_catalogs(language: &str) -> Vec<(String, Catalog)> { - // Try the exact name first. - // If there already is a corresponding catalog return the language. - if let Some(catalog) = CATALOGS.get(language) { - return vec![(language.to_owned(), catalog)]; - } - let language_without_country_code = language.split_once('_').map_or(language, |(ll, _cc)| ll); - if language == language_without_country_code { - // We have `ll` format. In this case, try to find any catalog whose name starts with `ll_`. - // Note that it is important to include the underscore in the pattern, otherwise `ll` might - // fall back to `llx_CC`, where `llx` is a 3-letter language identifier. - let ll_prefix = format!("{language}_"); - let mut lang_catalogs = vec![]; - for (&lang_name, &catalog) in CATALOGS.entries() { - if lang_name.starts_with(&ll_prefix) { - lang_catalogs.push((lang_name.to_owned(), catalog)); - } - } - lang_catalogs - } else { - // If `language` contained a country code, we only try to fall back to a catalog - // without a country code. - if let Some(catalog) = CATALOGS.get(language_without_country_code) { - vec![(language_without_country_code.to_owned(), catalog)] - } else { - vec![] - } - } -} - -pub fn update_from_env( - locale: Option<(LocaleVariable, String)>, - language_var: Option>, -) { - let mut localization_state = LOCALIZATION_STATE.lock().unwrap(); - localization_state.update_from_env(locale, language_var); -} - -pub fn update_from_status_language_builtin<'a, 'b: 'a, S: AsRef + 'a>( - langs: &'b [S], -) -> SetLanguageLints<'a> { - let mut localization_state = LOCALIZATION_STATE.lock().unwrap(); - localization_state.update_from_status_language_builtin(langs) -} - -pub fn unset_from_status_language_builtin( - locale: Option<(LocaleVariable, String)>, - language_var: Option>, -) { - let mut localization_state = LOCALIZATION_STATE.lock().unwrap(); - localization_state.precedence_origin = LanguagePrecedenceOrigin::Default; - localization_state.update_from_env(locale, language_var); -} - -pub fn status_language() -> PublicLocalizationState { - let localization_state = LOCALIZATION_STATE.lock().unwrap(); - localization_state.to_public() -} +static LANGUAGE_PRECEDENCE: Lazy>> = + Lazy::new(|| Mutex::new(vec![])); pub fn gettext(message_str: &'static str) -> Option<&'static str> { - let localization_state = LOCALIZATION_STATE.lock().unwrap(); + let language_precedence = LANGUAGE_PRECEDENCE.lock().unwrap(); // Use the localization from the highest-precedence language that has one available. - for (_, catalog) in localization_state.language_precedence.iter() { + for (_, catalog) in language_precedence.iter() { if let Some(localized_str) = catalog.get(message_str) { return Some(localized_str); } @@ -261,8 +22,40 @@ pub fn gettext(message_str: &'static str) -> Option<&'static str> { None } -pub fn list_available_languages() -> Vec<&'static str> { - let mut langs: Vec<_> = CATALOGS.entries().map(|(&lang, _)| lang).collect(); - langs.sort(); - langs +#[derive(Clone, Copy)] +pub struct GettextLocalizationLanguage { + language: &'static str, +} + +static AVAILABLE_LANGUAGES: LazyLock> = + LazyLock::new(|| { + HashMap::from_iter( + CATALOGS + .entries() + .map(|(&language, _)| (language, GettextLocalizationLanguage { language })), + ) + }); + +pub fn get_available_languages() -> &'static HashMap<&'static str, GettextLocalizationLanguage> { + &AVAILABLE_LANGUAGES +} + +pub fn set_language_precedence(new_precedence: &[GettextLocalizationLanguage]) { + let catalogs = new_precedence + .iter() + .map(|lang| { + ( + lang.language, + *CATALOGS + .get(lang.language) + .expect("Only languages for which catalogs exist may be passed to gettext."), + ) + }) + .collect(); + *LANGUAGE_PRECEDENCE.lock().unwrap() = catalogs; +} + +pub fn get_language_precedence() -> Vec<&'static str> { + let language_precedence = LANGUAGE_PRECEDENCE.lock().unwrap(); + language_precedence.iter().map(|&(lang, _)| lang).collect() } diff --git a/src/localization/mod.rs b/src/localization/mod.rs index eac6ee206..e78eb1e68 100644 --- a/src/localization/mod.rs +++ b/src/localization/mod.rs @@ -1,180 +1,16 @@ +#[cfg(feature = "localize-messages")] +use crate::env::EnvStack; +use fish_wchar::{L, WString, wstr}; +use once_cell::sync::Lazy; use std::sync::Mutex; #[cfg(feature = "localize-messages")] -use crate::env::{EnvStack, Environment}; -use crate::prelude::*; -use once_cell::sync::Lazy; - +mod settings; #[cfg(feature = "localize-messages")] -fn get_message_locale(vars: &EnvStack) -> Option<(fish_gettext::LocaleVariable, String)> { - use fish_gettext::LocaleVariable; - let get = |var_str: &wstr, var: LocaleVariable| { - vars.get_unless_empty(var_str) - .map(|val| (var, val.as_string().to_string())) - }; - get(L!("LC_ALL"), LocaleVariable::LC_ALL) - .or_else(|| get(L!("LC_MESSAGES"), LocaleVariable::LC_MESSAGES)) - .or_else(|| get(L!("LANG"), LocaleVariable::LANG)) -} - -#[cfg(feature = "localize-messages")] -fn get_language_var(vars: &EnvStack) -> Option> { - let langs = vars.get_unless_empty(L!("LANGUAGE"))?; - let langs = langs.as_list(); - let filtered_langs: Vec = langs - .iter() - .filter(|lang| !lang.is_empty()) - .map(|lang| lang.to_string()) - .collect(); - if filtered_langs.is_empty() { - return None; - } - Some(filtered_langs) -} - -/// Call this when one of `LANGUAGE`, `LC_ALL`, `LC_MESSAGES`, `LANG` changes. -/// Updates internal state such that the correct localizations will be used in subsequent -/// localization requests. -/// -/// For deciding how to localize, the following is done: -/// -/// 1. If the language precedence was set via `status language`, env vars are ignored. -/// 2. Check the first non-empty value of the env vars `LC_ALL`, `LC_MESSAGES`, `LANG`. If it -/// starts with `C` we consider this a C locale and disable localization. -/// 3. Otherwise, the value of the `LANGUAGE` env var is used, if non-empty. This allows specifying -/// multiple languages, with languages specified first taking precedence, e.g. -/// `LANGUAGE=zh_TW:zh_CN:pt_BR` -/// 4. Otherwise, the first non-empty value of the env vars `LC_ALL`, `LC_MESSAGES`, `LANG` is -/// used. This can only specify a single language, e.g. `LANG=de_AT.UTF-8`. -/// There, we normalize locale names by stripping off the suffix, leaving only the `ll_CC` part. -/// 5. Otherwise, localization will not happen. -/// -/// If users specify `ll_CC` as a language and we don't have a catalog for this language, but we -/// have one for `ll`, that will be used instead. If users specify `ll` (without specifying a -/// language variant), which we discourage, and we don't have a catalog for `ll`, but we do have -/// one for `ll_CC`, that will be used as a fallback. If we have multiple `ll_*` catalogs, all of -/// them will be used, in arbitrary order. -#[cfg(feature = "localize-messages")] -pub fn update_from_env(vars: &EnvStack) { - fish_gettext::update_from_env(get_message_locale(vars), get_language_var(vars)); -} - -#[cfg(feature = "localize-messages")] -fn append_space_separated_list>( - string: &mut WString, - list: impl IntoIterator, -) { - for lang in list.into_iter() { - string.push(' '); - string.push_utfstr(&crate::common::escape( - WString::from_str(lang.as_ref()).as_utfstr(), - )); - } -} - -#[cfg(feature = "localize-messages")] -pub struct SetLanguageLints<'a> { - duplicates: Vec<&'a str>, - non_existing: Vec<&'a str>, -} - -#[cfg(feature = "localize-messages")] -impl<'a> From> for SetLanguageLints<'a> { - fn from(lints: fish_gettext::SetLanguageLints<'a>) -> Self { - Self { - duplicates: lints.duplicates, - non_existing: lints.non_existing, - } - } -} - -#[cfg(feature = "localize-messages")] -impl<'a> SetLanguageLints<'a> { - pub fn display_duplicates(&self) -> WString { - let mut result = WString::new(); - if self.duplicates.is_empty() { - return result; - } - result.push_utfstr(wgettext!("Language specifiers appear repeatedly:")); - append_space_separated_list(&mut result, &self.duplicates); - result.push('\n'); - result - } - - pub fn display_non_existing(&self) -> WString { - let mut result = WString::new(); - if self.non_existing.is_empty() { - return result; - } - result.push_utfstr(wgettext!("No catalogs available for language specifiers:")); - append_space_separated_list(&mut result, &self.non_existing); - result.push('\n'); - result - } - - pub fn display_all(&self) -> WString { - let mut result = WString::new(); - result.push_utfstr(&self.display_duplicates()); - result.push_utfstr(&self.display_non_existing()); - result - } -} -/// Call this when the `status language` builtin should update the language precedence. -/// `langs` should be the list of languages the precedence should be set to. -#[cfg(feature = "localize-messages")] -pub fn update_from_status_language_builtin<'a, 'b: 'a, S: AsRef + 'a>( - langs: &'b [S], -) -> SetLanguageLints<'a> { - fish_gettext::update_from_status_language_builtin(langs).into() -} - -#[cfg(feature = "localize-messages")] -pub fn unset_from_status_language_builtin(vars: &EnvStack) { - fish_gettext::unset_from_status_language_builtin( - get_message_locale(vars), - get_language_var(vars), - ); -} - -#[cfg(feature = "localize-messages")] -pub fn status_language() -> WString { - use fish_gettext::LanguagePrecedenceOrigin; - let localization_state = fish_gettext::status_language(); - let mut result = WString::new(); - localizable_consts!( - LANGUAGE_LIST_VARIABLE_ORIGIN "%s variable" - ); - let origin_string = match localization_state.precedence_origin { - LanguagePrecedenceOrigin::Default => wgettext!("default").to_owned(), - LanguagePrecedenceOrigin::LocaleVariable(var) => { - wgettext_fmt!(LANGUAGE_LIST_VARIABLE_ORIGIN, var.as_str()) - } - LanguagePrecedenceOrigin::LanguageEnvVar => { - wgettext_fmt!(LANGUAGE_LIST_VARIABLE_ORIGIN, "LANGUAGE") - } - LanguagePrecedenceOrigin::StatusLanguage => { - wgettext_fmt!("%s command", "`status language set`") - } - }; - result.push_utfstr(&wgettext_fmt!( - "Active languages (source: %s):", - origin_string - )); - append_space_separated_list(&mut result, &localization_state.language_precedence); - result.push('\n'); - - result -} - -#[cfg(feature = "localize-messages")] -pub fn list_available_languages() -> WString { - let mut languages = WString::new(); - for lang in fish_gettext::list_available_languages() { - languages.push_str(lang); - languages.push('\n'); - } - languages -} +pub use settings::{ + list_available_languages, status_language, unset_from_status_language_builtin, update_from_env, + update_from_status_language_builtin, +}; #[cfg(not(feature = "localize-messages"))] pub fn initialize_gettext() {} @@ -188,8 +24,7 @@ pub fn initialize_gettext() { env_stack_set_from_env!(vars, "LC_ALL"); env_stack_set_from_env!(vars, "LC_MESSAGES"); env_stack_set_from_env!(vars, "LANG"); - - fish_gettext::update_from_env(get_message_locale(&vars), get_language_var(&vars)); + update_from_env(&vars); } /// Use this function to localize a message. diff --git a/src/localization/settings.rs b/src/localization/settings.rs new file mode 100644 index 000000000..92856e9ee --- /dev/null +++ b/src/localization/settings.rs @@ -0,0 +1,414 @@ +use super::{localizable_consts, localizable_string, wgettext, wgettext_fmt}; +use crate::env::{EnvStack, Environment}; +use fish_wchar::{L, WString, wstr}; +use once_cell::sync::Lazy; +use std::collections::{HashMap, HashSet}; +use std::sync::Mutex; + +#[derive(PartialEq, Eq, Clone, Copy)] +enum LanguagePrecedenceOrigin { + Default, + LocaleVariable(LocaleVariable), + LanguageEnvVar, + StatusLanguage, +} + +#[derive(PartialEq, Eq, Clone, Copy)] +enum LocaleVariable { + #[allow(clippy::upper_case_acronyms)] + LANG, + #[allow(non_camel_case_types)] + LC_MESSAGES, + #[allow(non_camel_case_types)] + LC_ALL, +} + +impl LocaleVariable { + fn as_language_precedence_origin(&self) -> LanguagePrecedenceOrigin { + LanguagePrecedenceOrigin::LocaleVariable(*self) + } + + fn as_str(&self) -> &'static str { + match self { + Self::LANG => "LANG", + Self::LC_MESSAGES => "LC_MESSAGES", + Self::LC_ALL => "LC_ALL", + } + } +} + +impl std::fmt::Display for LocaleVariable { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +struct LocalizationVariables { + message_locale: Option<(LocaleVariable, String)>, + language: Option>, +} + +impl LocalizationVariables { + fn get_message_locale(env: &EnvStack) -> Option<(LocaleVariable, String)> { + let get = |var_str: &wstr, var: LocaleVariable| { + env.get_unless_empty(var_str) + .map(|val| (var, val.as_string().to_string())) + }; + get(L!("LC_ALL"), LocaleVariable::LC_ALL) + .or_else(|| get(L!("LC_MESSAGES"), LocaleVariable::LC_MESSAGES)) + .or_else(|| get(L!("LANG"), LocaleVariable::LANG)) + } + + fn get_language_var(env: &EnvStack) -> Option> { + let langs = env.get_unless_empty(L!("LANGUAGE"))?; + let langs = langs.as_list(); + let filtered_langs: Vec = langs + .iter() + .filter(|lang| !lang.is_empty()) + .map(|lang| lang.to_string()) + .collect(); + if filtered_langs.is_empty() { + return None; + } + Some(filtered_langs) + } + + fn from_env(env: &EnvStack) -> Self { + Self { + message_locale: Self::get_message_locale(env), + language: Self::get_language_var(env), + } + } +} + +fn append_space_separated_list>( + string: &mut WString, + list: impl IntoIterator, +) { + for lang in list.into_iter() { + string.push(' '); + string.push_utfstr(&crate::common::escape( + WString::from_str(lang.as_ref()).as_utfstr(), + )); + } +} + +pub struct SetLanguageLints<'a> { + duplicates: Vec<&'a str>, + non_existing: Vec<&'a str>, +} + +impl<'a> SetLanguageLints<'a> { + fn display_duplicates(&self) -> WString { + let mut result = WString::new(); + if self.duplicates.is_empty() { + return result; + } + result.push_utfstr(wgettext!("Language specifiers appear repeatedly:")); + append_space_separated_list(&mut result, &self.duplicates); + result.push('\n'); + result + } + + fn display_non_existing(&self) -> WString { + let mut result = WString::new(); + if self.non_existing.is_empty() { + return result; + } + result.push_utfstr(wgettext!("No catalogs available for language specifiers:")); + append_space_separated_list(&mut result, &self.non_existing); + result.push('\n'); + result + } + + pub fn display_all(&self) -> WString { + let mut result = WString::new(); + result.push_utfstr(&self.display_duplicates()); + result.push_utfstr(&self.display_non_existing()); + result + } +} + +struct LocalizationState { + precedence_origin: LanguagePrecedenceOrigin, +} + +impl LocalizationState { + fn new() -> Self { + Self { + precedence_origin: LanguagePrecedenceOrigin::Default, + } + } + + /// Tries to find catalogs for `language`. + /// `language` must be an ISO 639 language code, optionally followed by an underscore and an ISO + /// 3166 country/territory code. + /// Uses the catalog with the exact same name as `language` if it exists. + /// If a country code is present (`ll_CC`), only the catalog named `ll` will be considered as a fallback. + /// If no country code is present (`ll`), all catalogs whose names start with `ll_` will be used in + /// arbitrary order. + fn find_best_matches<'a, 'b: 'a, L: Copy>( + language: &str, + available_languages: &'a HashMap<&'b str, L>, + ) -> Vec<(&'b str, L)> { + // Try the exact name first. + // If there already is a corresponding catalog return the language. + if let Some((&lang_str, &lang_value)) = available_languages.get_key_value(language) { + return vec![(lang_str, lang_value)]; + } + let language_without_country_code = + language.split_once('_').map_or(language, |(ll, _cc)| ll); + if language == language_without_country_code { + // We have `ll` format. In this case, try to find any catalog whose name starts with `ll_`. + // Note that it is important to include the underscore in the pattern, otherwise `ll` might + // fall back to `llx_CC`, where `llx` is a 3-letter language identifier. + let ll_prefix = format!("{language}_"); + let mut lang_catalogs = vec![]; + for (&lang_str, &localization_lang) in available_languages.iter() { + if lang_str.starts_with(&ll_prefix) { + lang_catalogs.push((lang_str, localization_lang)); + } + } + lang_catalogs + } else { + // If `language` contained a country code, we only try to fall back to a catalog + // without a country code. + if let Some((&lang_str, &lang_value)) = + available_languages.get_key_value(language_without_country_code) + { + vec![(lang_str, lang_value)] + } else { + vec![] + } + } + } + + fn update_from_env(&mut self, localization_vars: LocalizationVariables) { + // Do not override values set via `status language`. + if self.precedence_origin == LanguagePrecedenceOrigin::StatusLanguage { + return; + } + + if let Some((precedence_origin, locale)) = &localization_vars.message_locale { + // Regular locale names start with lowercase letters (`ll_CC`, followed by some suffix). + // The C or POSIX locale is special, and often used to disable localization. + // Their names are upper-case, but variants with suffixes (`C.UTF-8`) exist. + // To ensure that such variants are accounted for, we match on prefixes of the + // locale name. + // https://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html#tag_07_02 + fn is_c_locale(locale: &str) -> bool { + locale.starts_with('C') || locale.starts_with("POSIX") + } + if is_c_locale(locale) { + self.precedence_origin = + LanguagePrecedenceOrigin::LocaleVariable(*precedence_origin); + fish_gettext::set_language_precedence(&[]); + return; + } + } + + let (precedence_origin, language_list) = if let Some(list) = localization_vars.language { + (LanguagePrecedenceOrigin::LanguageEnvVar, list) + } else if let Some((precedence_origin, locale)) = &localization_vars.message_locale { + // Strip off encoding and modifier. (We always expect UTF-8 and don't support modifiers.) + let normalized_name = locale + .split_once(|c: char| !(c.is_ascii_alphabetic() || c == '_')) + .map_or(locale.as_str(), |(lang_name, _)| lang_name) + .to_owned(); + // At this point, the normalized_name should have the shape `ll` or `ll_CC`. + ( + precedence_origin.as_language_precedence_origin(), + vec![normalized_name], + ) + } else { + (LanguagePrecedenceOrigin::Default, vec![]) + }; + fn update_precedence<'a, 'b: 'a, LocalizationLanguage: Copy + 'a>( + language_list: &[String], + get_available_languages: fn() -> &'a HashMap<&'b str, LocalizationLanguage>, + set_language_precedence: fn(&[LocalizationLanguage]), + ) { + let available_langs = get_available_languages(); + let mut seen_languages = HashSet::new(); + let language_precedence: Vec<_> = language_list + .iter() + .flat_map(|lang| LocalizationState::find_best_matches(lang, available_langs)) + .filter(|&(lang_str, _)| seen_languages.insert(lang_str)) + .map(|(_, localization_lang)| localization_lang) + .collect(); + set_language_precedence(&language_precedence); + } + update_precedence( + &language_list, + fish_gettext::get_available_languages, + fish_gettext::set_language_precedence, + ); + self.precedence_origin = precedence_origin; + } + + fn update_from_status_language_builtin<'a, 'b: 'a, S: AsRef + 'a>( + &mut self, + langs: &'b [S], + ) -> SetLanguageLints<'a> { + let mut seen_in_input = HashSet::new(); + let mut unique_lang_strs = vec![]; + let mut duplicates = vec![]; + for lang in langs { + let lang = lang.as_ref(); + if seen_in_input.insert(lang) { + unique_lang_strs.push(lang); + } else { + duplicates.push(lang) + } + } + let mut all_available_langs = HashSet::new(); + fn update_precedence<'a, 'b, 'c: 'a + 'b, LocalizationLanguage: Copy + 'a>( + unique_lang_strs: &[&str], + get_available_languages: fn() -> &'a HashMap<&'c str, LocalizationLanguage>, + set_language_precedence: fn(&[LocalizationLanguage]), + all_available_langs: &'b mut HashSet<&'c str>, + ) { + let available_langs = get_available_languages(); + for &lang in available_langs.keys() { + all_available_langs.insert(lang); + } + let mut existing_langs = vec![]; + for lang in unique_lang_strs { + if let Some((&lang_str, &lang_value)) = available_langs.get_key_value(lang) { + existing_langs.push((lang_str, lang_value)); + } + } + + let mut seen = HashSet::new(); + let unique_langs: Vec<_> = existing_langs + .into_iter() + .filter(|&(lang, _)| seen.insert(lang)) + .map(|(_, localization_lang)| localization_lang) + .collect(); + set_language_precedence(&unique_langs); + } + update_precedence( + &unique_lang_strs, + fish_gettext::get_available_languages, + fish_gettext::set_language_precedence, + &mut all_available_langs, + ); + + self.precedence_origin = LanguagePrecedenceOrigin::StatusLanguage; + + let mut seen_non_existing = HashSet::new(); + let non_existing: Vec<&str> = langs + .iter() + .map(|lang| lang.as_ref()) + .filter(|&lang| !all_available_langs.contains(lang) && seen_non_existing.insert(lang)) + .collect(); + + SetLanguageLints { + duplicates, + non_existing, + } + } +} + +/// Stores the current localization status. +/// `is_active` indicates whether localization is currently active, and the reason if it is +/// not. +/// The `origin` indicates where the values in `language_precedence` were taken from. +/// `language_precedence` stores the catalogs in the order they should be used. +/// +/// This struct should be updated when the relevant variables change or `status language` is used +/// to modify the localization state. +static LOCALIZATION_STATE: Lazy> = + Lazy::new(|| Mutex::new(LocalizationState::new())); + +/// Call this when one of `LANGUAGE`, `LC_ALL`, `LC_MESSAGES`, `LANG` changes. +/// Updates internal state such that the correct localizations will be used in subsequent +/// localization requests. +/// +/// For deciding how to localize, the following is done: +/// +/// 1. If the language precedence was set via `status language`, env vars are ignored. +/// 2. Check the first non-empty value of the env vars `LC_ALL`, `LC_MESSAGES`, `LANG`. If it +/// starts with `C` or `POSIX` we consider this a C locale and disable localization. +/// 3. Otherwise, the value of the `LANGUAGE` env var is used, if non-empty. This allows specifying +/// multiple languages, with languages specified first taking precedence, e.g. +/// `LANGUAGE=zh_TW:zh_CN:pt_BR` +/// 4. Otherwise, the first non-empty value of the env vars `LC_ALL`, `LC_MESSAGES`, `LANG` is +/// used. This can only specify a single language, e.g. `LANG=de_AT.UTF-8`. +/// There, we normalize locale names by stripping off the suffix, leaving only the `ll_CC` part. +/// 5. Otherwise, localization will not happen. +/// +/// If users specify `ll_CC` as a language and we don't have a catalog for this language, but we +/// have one for `ll`, that will be used instead. If users specify `ll` (without specifying a +/// language variant), which we discourage, and we don't have a catalog for `ll`, but we do have +/// one for `ll_CC`, that will be used as a fallback. If we have multiple `ll_*` catalogs, all of +/// them will be used, in arbitrary order. +pub fn update_from_env(env: &EnvStack) { + let mut localization_state = LOCALIZATION_STATE.lock().unwrap(); + localization_state.update_from_env(LocalizationVariables::from_env(env)); +} + +/// Call this when the `status language` builtin should update the language precedence. +/// `langs` should be the list of languages the precedence should be set to. +pub fn update_from_status_language_builtin<'a, 'b: 'a, S: AsRef + 'a>( + langs: &'b [S], +) -> SetLanguageLints<'a> { + let mut localization_state = LOCALIZATION_STATE.lock().unwrap(); + localization_state.update_from_status_language_builtin(langs) +} + +pub fn unset_from_status_language_builtin(env: &EnvStack) { + let mut localization_state = LOCALIZATION_STATE.lock().unwrap(); + localization_state.precedence_origin = LanguagePrecedenceOrigin::Default; + localization_state.update_from_env(LocalizationVariables::from_env(env)); +} + +pub fn status_language() -> WString { + let localization_state = LOCALIZATION_STATE.lock().unwrap(); + let mut result = WString::new(); + localizable_consts!( + LANGUAGE_LIST_VARIABLE_ORIGIN "%s variable" + ); + let origin_string = match localization_state.precedence_origin { + LanguagePrecedenceOrigin::Default => wgettext!("default").to_owned(), + LanguagePrecedenceOrigin::LocaleVariable(var) => { + wgettext_fmt!(LANGUAGE_LIST_VARIABLE_ORIGIN, var.as_str()) + } + LanguagePrecedenceOrigin::LanguageEnvVar => { + wgettext_fmt!(LANGUAGE_LIST_VARIABLE_ORIGIN, "LANGUAGE") + } + LanguagePrecedenceOrigin::StatusLanguage => { + wgettext_fmt!("%s command", "`status language set`") + } + }; + result.push_utfstr(&wgettext_fmt!( + "Active languages (source: %s):", + origin_string + )); + let gettext_language_precedence = fish_gettext::get_language_precedence(); + append_space_separated_list(&mut result, &gettext_language_precedence); + result.push('\n'); + + result +} + +pub fn list_available_languages() -> WString { + let mut language_set = HashSet::new(); + fn add_languages<'a, 'b: 'a, LocalizationLanguage: 'a>( + language_set: &mut HashSet<&'b str>, + get_available_languages: fn() -> &'a HashMap<&'b str, LocalizationLanguage>, + ) { + for &lang in get_available_languages().keys() { + language_set.insert(lang); + } + } + add_languages(&mut language_set, fish_gettext::get_available_languages); + let mut language_list = Vec::from_iter(language_set); + language_list.sort(); + let mut languages = WString::new(); + for lang in language_list { + languages.push_str(lang); + languages.push('\n'); + } + languages +} diff --git a/tests/checks/message-localization.fish b/tests/checks/message-localization.fish index 04f524704..0794c555c 100644 --- a/tests/checks/message-localization.fish +++ b/tests/checks/message-localization.fish @@ -177,7 +177,8 @@ status language set asdf # CHECKERR: No catalogs available for language specifiers: asdf # This will have to be changed if we add catalogs for languages used here. -status language set zh_HK it_IT +status language set zh_HK it_IT zh_HK +# CHECKERR: Language specifiers appear repeatedly: zh_HK # CHECKERR: No catalogs available for language specifiers: zh_HK it_IT status language set de de