diff --git a/doc_src/language.rst b/doc_src/language.rst index 022161085..50675747f 100644 --- a/doc_src/language.rst +++ b/doc_src/language.rst @@ -1836,7 +1836,16 @@ In UNIX, these are made up of several categories. The categories used by fish ar This is treated like :envvar:`LC_MESSAGES` except that it can hold multiple values, which allows to specify a priority list of languages for translation. - It's a :ref:`PATH variable `, like in `GNU gettext `__. + It's a :ref:`PATH variable `, like in `GNU gettext `__. + + Language identifiers without a region specified (e.g. ``zh``) result in all available variants of this language being tried in arbitrary order. + In this example, we might first look for messages in the ``zh_CN`` catalog, followed by ``zh_TW``, or the other way around. + This is different from GNU gettext, which uses a "default" variant of the language instead. + If you prefer a certain variant, specify it earlier in the list, + e.g. ``zh_TW:zh`` if your preferred language is ``zh_TW``, and you prefer any other variants of ``zh`` over the English default. + If ``zh_TW`` is the only variant of ``zh`` you want, + specifying ``zh_TW`` in the ``LANGUAGE`` variable will result in messages which are not available in ``zh_TW`` being displayed in English. + See also :doc:`builtin _ (underscore) `. .. envvar:: LC_ALL diff --git a/src/wutil/gettext.rs b/src/wutil/gettext.rs index e4d7ea381..870f60a5d 100644 --- a/src/wutil/gettext.rs +++ b/src/wutil/gettext.rs @@ -7,7 +7,7 @@ #[cfg(feature = "localize-messages")] mod gettext_impl { - use std::sync::Mutex; + use std::{collections::HashSet, sync::Mutex}; use once_cell::sync::Lazy; @@ -16,20 +16,18 @@ mod gettext_impl { use crate::env::{EnvStack, Environment}; - /// Tries to find a catalog for `language`. + /// Tries to find catalogs for `language`. /// `language` must be an ISO 639 language code, optionally followed by an underscore and an ISO /// 3166 country/territory code. - /// Always prefers the catalog with the exact same name as `language` if it exists. + /// Uses the catalog with the exact same name as `language` if it exists. /// If a country code is present (`ll_CC`), only the catalog named `ll` will be considered as a fallback. - /// If no country code is present (`ll`), an arbitrary catalog whose name starts with `ll_` - /// will be used as a fallback, if one exists. - /// If there is a catalog for the language, then `Some(catalog)` will be returned. - /// `None` will be returned if no variant of the language has localizations. - fn find_existing_catalog(language: &str) -> Option { + /// If no country code is present (`ll`), all catalogs whose names start with `ll_` will be used in + /// arbitrary order. + fn find_existing_catalogs(language: &str) -> Vec<(String, Catalog)> { // Try the exact name first. // If there already is a corresponding catalog return the language. if let Some(catalog) = CATALOGS.get(language) { - return Some(catalog); + return vec![(language.to_owned(), catalog)]; } let language_without_country_code = language.split_once('_').map_or(language, |(ll, _cc)| ll); @@ -38,17 +36,21 @@ fn find_existing_catalog(language: &str) -> Option { // Note that it is important to include the underscore in the pattern, otherwise `ll` might // fall back to `llx_CC`, where `llx` is a 3-letter language identifier. let ll_prefix = format!("{language}_"); + let mut lang_catalogs = vec![]; for (&lang_name, &catalog) in CATALOGS.entries() { if lang_name.starts_with(&ll_prefix) { - return Some(catalog); + lang_catalogs.push((lang_name.to_owned(), catalog)); } } - // No localizations for the language (and any regional variations) exist. - None + lang_catalogs } else { // If `language` contained a country code, we only try to fall back to a catalog // without a country code. - CATALOGS.get(language_without_country_code).copied() + if let Some(catalog) = CATALOGS.get(language_without_country_code) { + vec![(language_without_country_code.to_owned(), catalog)] + } else { + vec![] + } } } @@ -143,10 +145,13 @@ fn check_language_var(vars: &EnvStack) -> Option> { /// Implementation of the function with the same name in super. pub(super) fn update_locale_from_env(vars: &EnvStack) { + let mut seen_languages = HashSet::new(); let mut language_precedence = LANGUAGE_PRECEDENCE.lock().unwrap(); *language_precedence = get_language_preferences_from_env(vars) - .iter() - .filter_map(|lang| find_existing_catalog(lang)) + .into_iter() + .flat_map(|lang| find_existing_catalogs(&lang)) + .filter(|(lang, _)| seen_languages.insert(lang.to_owned())) + .map(|(_, catalog)| catalog) .collect(); } }