diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 75ae68e6d..8709b5e4a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -37,9 +37,9 @@ jobs: # Generate PO files. This should not result it a change in the repo if all translations are # up to date. # Ensure that fish is available as an executable. - PATH="$PWD/build:$PATH" build_tools/update_translations.fish --no-mo + PATH="$PWD/build:$PATH" build_tools/update_translations.fish # Show diff output. Fail if there is any. - git --no-pager diff --exit-code || { echo 'There are uncommitted changes after regenerating the gettext PO files. Make sure to update them via `build_tools/update_translations.fish --no-mo` after changing source files.'; exit 1; } + git --no-pager diff --exit-code || { echo 'There are uncommitted changes after regenerating the gettext PO files. Make sure to update them via `build_tools/update_translations.fish` after changing source files.'; exit 1; } ubuntu-32bit-static-pcre2: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2a67b4d2b..81d93dcca 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -110,7 +110,6 @@ jobs: run: | set -x CFLAGS="-D_FORTIFY_SOURCE=2" \ - CMAKE_WITH_GETTEXT=0 \ CC=aarch64-linux-gnu-gcc \ RUSTFLAGS="-C linker=aarch64-linux-gnu-gcc -C link-arg=-lgcc -C link-arg=-D_FORTIFY_SOURCE=0" \ cargo build --release --target aarch64-unknown-linux-musl --bin fish diff --git a/.github/workflows/rust_checks.yml b/.github/workflows/rust_checks.yml index 6f78a1d14..ed0827fb2 100644 --- a/.github/workflows/rust_checks.yml +++ b/.github/workflows/rust_checks.yml @@ -50,6 +50,9 @@ jobs: steps: - uses: actions/checkout@v3 - uses: ./.github/actions/rust-toolchain@stable + - name: Install deps + run: | + sudo apt install gettext - name: cargo doc run: | RUSTDOCFLAGS='-D warnings' cargo doc --workspace diff --git a/.gitignore b/.gitignore index f7af3adc6..18267df7b 100644 --- a/.gitignore +++ b/.gitignore @@ -38,7 +38,6 @@ Desktop.ini Thumbs.db ehthumbs.db -*.mo .directory .fuse_hidden* diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 1e83260e0..7081b2659 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -18,6 +18,21 @@ Notable improvements and fixes use `status get-file` or find alternatives (like loading completions for "foo" via `complete -C"foo "`). We're considering making data embedding mandatory in future releases because it has a few advantages even for installation from a package (like making file conflicts with other packages impossible). (:issue:`11143`) +- Reworked gettext localization (:issue:`11726`). + We replaced several parts of the gettext functionality with custom implementations. + Most notably, message extraction, which should now work reliably, and the runtime implementation, where we no longer dynamically link to gettext, but instead use our own implementation, whose behavior is similar to GNU gettext, with some minor deviations. + Our implementation now fully respects fish variables, so locale variables do not have to be exported for fish localizations to work. + They still have to be exported to inform other programs about language preferences. + The :envvar:`LANGUAGE` environment variable is now treated as a path variable, meaning it is an implicitly colon-separated list. + While we no longer have any runtime dependency on gettext, we still need gettext tools for building, most notably ``msgfmt``. + When building without ``msgfmt`` available, localization will not work with the resulting executable. + Localization data is no longer sourced at runtime from MO files on the file system, but instead built into the executable. + This is always done, independently of the other data embedding, so all fish executables will have access to all message catalogs, regardless of the state of the file system. + We have a new cargo feature called ``localize-messages``, which is enabled by default. + Disabling it will cause fish to be built without localization support. + CMake builds can continue to use the ``WITH_GETTEXT`` option, with the same semantics as the ``localize-messages`` feature. + The current implementation does not provide any configuration options for controlling which language catalogs are built into the executable (other than disabling them all). + As a workaround, you can delete files in the ``po`` directory before building to exclude unwanted languages. Deprecations and removed features --------------------------------- @@ -105,6 +120,9 @@ For distributors - The CMake system was simplified and no longer second-guesses rustup. It will run rustc and cargo via $PATH or in ~/.cargo/bin/. If that doesn't match your setup, set the Rust_COMPILER and Rust_CARGO cmake variables (:issue:`11328`). - Cygwin support has been reintroduced, since rust gained a Cygwin target (https://github.com/rust-lang/rust/pull/134999, :issue:`11238`). +- Fish no longer uses gettext MO files (:issue:`11726`). + See the description about changes to the gettext behavior for details. + If you have use cases which are incompatible with our new approach, please let us know. -------------- diff --git a/CMakeLists.txt b/CMakeLists.txt index 53a47fe87..15f32f846 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,8 +15,6 @@ endif() # Set up standard directories. include(GNUInstallDirs) -include(cmake/gettext.cmake) - # Set up PCRE2 # This sets an environment variable that needs to be available before the Rust stanzas include(cmake/PCRE2.cmake) @@ -54,8 +52,8 @@ function(CREATE_TARGET target) $<$:--profile=release-with-debug> --target ${Rust_CARGO_TARGET} --no-default-features + --features=${FISH_CARGO_FEATURES} ${CARGO_FLAGS} - ${FEATURES_ARG} && "${CMAKE_COMMAND}" -E copy "${rust_target_dir}/${rust_profile}/${target}" "${CMAKE_CURRENT_BINARY_DIR}" diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 487a0d5c6..bd474f258 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -36,7 +36,7 @@ For that, you'll require: - Rust - when in doubt, try rustup - CMake - PCRE2 (headers and libraries) - optional, this will be downloaded if missing -- gettext (headers and libraries) - optional, for translation support +- gettext (only the msgfmt tool) - optional, for translation support - Sphinx - optional, to build the documentation Of course not everything is required always - if you just want to contribute something to the documentation you'll just need Sphinx, @@ -272,13 +272,20 @@ To install the hook, place the code in a new file Contributing Translations ========================= -Fish uses the GNU gettext library to translate messages from English to -other languages. +Fish uses GNU gettext to translate messages from English to other languages. +We use custom tools for extracting messages from source files and to localize at runtime. +This means that we do not have a runtime dependency on the gettext library. +It also means that some features are not supported, such as message context and plurals. +We also expect all files to be UTF-8-encoded. +In practice, this should not matter much for contributing translations. Translation sources are -stored in the ``po`` directory, named ``LANG.po``, where ``LANG`` is the -two letter ISO 639-1 language code of the target language (e.g. ``de`` for -German). A region specifier can also be used (e.g. ``pt_BR`` for Brazilian Portuguese). +stored in the ``po`` directory, named ``ll_CC.po``, where ``ll`` is the +two (or possibly three) letter ISO 639-1 language code of the target language +(e.g. ``pt`` for Portuguese). ``CC`` is an ISO 3166 country/territory code, +(e.g. ``BR`` for Brazil). +An example for a valid name is ``pt_BR.po``, indicating Brazilian Portuguese. +These are the files you will interact with when adding translations. Adding translations for a new language -------------------------------------- @@ -288,21 +295,36 @@ More specifically, you will need ``msguniq`` and ``msgmerge`` for creating trans language. To create a new translation, run:: - build_tools/update_translations.fish po/LANG.po + build_tools/update_translations.fish po/ll_CC.po -By default, this also creates ``mo`` files, which contain the information from the ``po`` files in a -binary format. -Fish uses these files for translating at runtime. -They are not tracked in version control, but they can help translators check if their translations -show up correctly. -If you build fish locally (``cargo build``), and then run the resulting binary, -it will make use of the ``mo`` files generated by the script. -Use the ``LANG`` environment variable to tell fish which language to use, e.g.:: +This will create a new PO file containing all messages available for translation. +If the file already exists, it will be updated. - LANG=pt_BR.utf8 target/debug/fish +After modifying a PO file, you can recompile fish, and it will integrate the modifications you made. +This requires that the ``msgfmt`` utility is installed (comes as part of ``gettext``). +It is important that the ``localize-messages`` cargo feature is enabled, which it is by default. +You can explicitly enable it using:: -If you do not care about the ``mo`` files you can pass the ``--no-mo`` flag to the -``update_translations.fish`` script. + cargo build --features=localize-messages + +Use environment variables to tell fish which language to use, e.g.:: + + LANG=pt_BR.utf8 fish + +or within the running fish shell:: + + set LANG pt_BR.utf8 + +For more options regarding how to choose languages, see +`the corresponding gettext documentation +`. +One neat thing you can do is set a list of languages to check for translations in the order defined +using the ``LANGUAGE`` variable, e.g.:: + + set LANGUAGE pt_BR de_DE + +to try to translate messages to Portuguese, if that fails try German, and if that fails too you will +see the English version defined in the source code. Modifying existing translations ------------------------------- @@ -310,13 +332,8 @@ Modifying existing translations If you want to work on translations for a language which already has a corresponding ``po`` file, it is sufficient to edit this file. No other changes are necessary. -To see your translations in action you can run:: - - build_tools/update_translations.fish --only-mo po/LANG.po - -to update the binary ``mo`` used by fish. Check the information for adding new languages for a -description on how you can get fish to use these files. -Running this script requires a fish executable and the gettext ``msgfmt`` tool. +After recompiling fish, you should be able to see your translations in action. See the previous +section for details. Editing PO files ---------------- @@ -324,18 +341,18 @@ Editing PO files Many tools are available for editing translation files, including command-line and graphical user interface programs. For simple use, you can use your text editor. -Open up the po file, for example ``po/sv.po``, and you'll see something like:: +Open up the PO file, for example ``po/sv.po``, and you'll see something like:: - msgid "%ls: No suitable job\n" - msgstr "" + msgid "%ls: No suitable job\n" + msgstr "" The ``msgid`` here is the "name" of the string to translate, typically the English string to translate. The second line (``msgstr``) is where your translation goes. For example:: - msgid "%ls: No suitable job\n" - msgstr "%ls: Inget passande jobb\n" + msgid "%ls: No suitable job\n" + msgstr "%ls: Inget passande jobb\n" Any ``%s`` / ``%ls`` or ``%d`` are placeholders that fish will use for formatting at runtime. It is important that they match - the translated string should have the same placeholders in the same order. @@ -350,7 +367,7 @@ Modifications to strings in source files ---------------------------------------- If a string changes in the sources, the old translations will no longer work. -They will be preserved in the ``po`` files, but commented-out (starting with ``#~``). +They will be preserved in the PO files, but commented-out (starting with ``#~``). If you add/remove/change a translatable strings in a source file, run ``build_tools/update_translations.fish`` to propagate this to all translation files (``po/*.po``). This is only relevant for developers modifying the source files of fish or fish scripts. @@ -364,7 +381,7 @@ macros: :: - streams.out.append(wgettext_fmt!("%ls: There are no jobs\n", argv[0])); + streams.out.append(wgettext_fmt!("%ls: There are no jobs\n", argv[0])); All messages in fish script must be enclosed in single or double quote characters for our message extraction script to find them. @@ -373,15 +390,15 @@ that the following are **not** valid: :: - echo (_ hello) - _ "goodbye" + echo (_ hello) + _ "goodbye" Above should be written like this instead: :: - echo (_ "hello") - echo (_ "goodbye") + echo (_ "hello") + echo (_ "goodbye") You can use either single or double quotes to enclose the message to be translated. You can also optionally include spaces after diff --git a/Cargo.lock b/Cargo.lock index d8c7ab19b..df6685435 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -97,6 +97,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + [[package]] name = "fish" version = "4.1.0-alpha0" @@ -107,6 +113,8 @@ dependencies = [ "fish-build-helper", "fish-build-man-pages", "fish-gettext-extraction", + "fish-gettext-maps", + "fish-gettext-mo-file-parser", "fish-printf", "libc", "lru", @@ -114,6 +122,8 @@ dependencies = [ "num-traits", "once_cell", "pcre2", + "phf 0.12.1", + "phf_codegen 0.12.1", "portable-atomic", "rand", "rsconf", @@ -146,6 +156,21 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "fish-gettext-maps" +version = "0.0.0" +dependencies = [ + "fish-build-helper", + "fish-gettext-mo-file-parser", + "phf 0.12.1", + "phf_codegen 0.12.1", + "rsconf", +] + +[[package]] +name = "fish-gettext-mo-file-parser" +version = "0.0.0" + [[package]] name = "fish-printf" version = "0.2.1" @@ -327,7 +352,16 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" dependencies = [ - "phf_shared", + "phf_shared 0.11.3", +] + +[[package]] +name = "phf" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7" +dependencies = [ + "phf_shared 0.12.1", ] [[package]] @@ -336,8 +370,18 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" dependencies = [ - "phf_generator", - "phf_shared", + "phf_generator 0.11.3", + "phf_shared 0.11.3", +] + +[[package]] +name = "phf_codegen" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efbdcb6f01d193b17f0b9c3360fa7e0e620991b193ff08702f78b3ce365d7e61" +dependencies = [ + "phf_generator 0.12.1", + "phf_shared 0.12.1", ] [[package]] @@ -346,10 +390,20 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ - "phf_shared", + "phf_shared 0.11.3", "rand", ] +[[package]] +name = "phf_generator" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cbb1126afed61dd6368748dae63b1ee7dc480191c6262a3b4ff1e29d86a6c5b" +dependencies = [ + "fastrand", + "phf_shared 0.12.1", +] + [[package]] name = "phf_shared" version = "0.11.3" @@ -359,6 +413,15 @@ dependencies = [ "siphasher", ] +[[package]] +name = "phf_shared" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981" +dependencies = [ + "siphasher", +] + [[package]] name = "pkg-config" version = "0.3.31" @@ -557,8 +620,8 @@ checksum = "d4ea810f0692f9f51b382fff5893887bb4580f5fa246fde546e0b13e7fcee662" dependencies = [ "fnv", "nom", - "phf", - "phf_codegen", + "phf 0.11.3", + "phf_codegen 0.11.3", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 6e021d75f..89194ef86 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,8 @@ errno = "0.3.0" fish-build-helper = { path = "crates/build-helper" } fish-build-man-pages = { path = "crates/build-man-pages" } fish-gettext-extraction = { path = "crates/gettext-extraction" } +fish-gettext-maps = { path = "crates/gettext-maps" } +fish-gettext-mo-file-parser = { path = "crates/gettext-mo-file-parser" } fish-printf = { path = "crates/printf", features = ["widestring"] } libc = "0.2.155" # lru pulls in hashbrown by default, which uses a faster (though less DoS resistant) hashing algo. @@ -32,6 +34,8 @@ once_cell = "1.19.0" pcre2 = { git = "https://github.com/fish-shell/rust-pcre2", tag = "0.2.9-utf32", default-features = false, features = [ "utf32", ] } +phf = { version = "0.12", default-features = false } +phf_codegen = { version = "0.12" } portable-atomic = { version = "1", default-features = false, features = [ "fallback", ] } @@ -76,6 +80,7 @@ errno.workspace = true fish-build-helper.workspace = true fish-build-man-pages = { workspace = true, optional = true } fish-gettext-extraction = { workspace = true, optional = true } +fish-gettext-maps = { workspace = true, optional = true } fish-printf.workspace = true libc.workspace = true lru.workspace = true @@ -83,6 +88,7 @@ nix.workspace = true num-traits.workspace = true once_cell.workspace = true pcre2.workspace = true +phf = { workspace = true, optional = true } rand.workspace = true rust-embed = { workspace = true, optional = true } terminfo.workspace = true @@ -97,6 +103,8 @@ serial_test.workspace = true [build-dependencies] cc.workspace = true fish-build-helper.workspace = true +fish-gettext-mo-file-parser.workspace = true +phf_codegen = { workspace = true, optional = true } rsconf.workspace = true [target.'cfg(windows)'.build-dependencies] @@ -119,9 +127,12 @@ name = "fish_key_reader" path = "src/bin/fish_key_reader.rs" [features] -default = ["embed-data"] +default = ["embed-data", "localize-messages"] benchmark = [] embed-data = ["dep:rust-embed", "dep:fish-build-man-pages"] +# Enable gettext localization at runtime. Requires the `msgfmt` tool to generate catalog data at +# build time. +localize-messages = ["dep:phf", "dep:fish-gettext-maps"] # This feature is used to enable extracting messages from the source code for localization. # It only needs to be enabled if updating these messages (and the corresponding PO files) is # desired. This happens when running tests via `build_tools/check.sh` and when calling diff --git a/README.rst b/README.rst index 8aa11b64d..3a3a0fe8b 100644 --- a/README.rst +++ b/README.rst @@ -93,8 +93,6 @@ Running fish requires: ``file``, ``ls``, ``mkdir``, ``mkfifo``, ``rm``, ``sh``, ``sort``, ``tee``, ``tr``, ``uname`` and ``sed`` at least, but the full coreutils plus ``find`` and ``awk`` is preferred) -- The gettext library, if compiled with - translation support The following optional features also have specific requirements: @@ -125,7 +123,7 @@ Compiling fish requires: - CMake (version 3.15 or later) - a C compiler (for system feature detection and the test helper binary) - PCRE2 (headers and libraries) - optional, this will be downloaded if missing -- gettext (headers and libraries) - optional, for translation support +- gettext (only the msgfmt tool) - optional, for translation support - an Internet connection, as other dependencies will be downloaded automatically Sphinx is also optionally required to build the documentation from a @@ -165,7 +163,7 @@ In addition to the normal CMake build options (like ``CMAKE_INSTALL_PREFIX``), f - INSTALL_DOCS=ON|OFF - whether to install the docs. This is automatically set to on when BUILD_DOCS is or prebuilt documentation is available (like when building in-tree from a tarball). - FISH_USE_SYSTEM_PCRE2=ON|OFF - whether to use an installed pcre2. This is normally autodetected. - MAC_CODESIGN_ID=String|OFF - the codesign ID to use on Mac, or "OFF" to disable codesigning. -- WITH_GETTEXT=ON|OFF - whether to build with gettext support for translations. +- WITH_GETTEXT=ON|OFF - whether to include translations. - extra_functionsdir, extra_completionsdir and extra_confdir - to compile in an additional directory to be searched for functions, completions and configuration snippets Building fish with embedded data (experimental) @@ -185,14 +183,15 @@ To install fish with embedded files, just use ``cargo``, like:: This will place the binaries in ``~/.cargo/bin/``, but you can place them wherever you want. -This build won't have the HTML docs (``help`` will open the online version) or translations. - +This build won't have the HTML docs (``help`` will open the online version). It will try to build the man pages with sphinx-build. If that is not available and you would like to include man pages, you need to install it and retrigger the build script, e.g. by setting FISH_BUILD_DOCS=1:: FISH_BUILD_DOCS=1 cargo install --path . Setting it to "0" disables the inclusion of man pages. +To disable translations, disable the ``localize-messages`` feature by passing ``--no-default-features --features=embed-data`` to cargo. + You can also link this build statically (but not against glibc) and move it to other computers. Contributing Changes to the Code diff --git a/build.rs b/build.rs index 9100ba2b8..2d6b955b4 100644 --- a/build.rs +++ b/build.rs @@ -1,7 +1,7 @@ #![allow(clippy::uninlined_format_args)] -use fish_build_helper::{cargo_target_dir, workspace_root}; -use rsconf::{LinkType, Target}; +use fish_build_helper::{fish_build_dir, workspace_root}; +use rsconf::Target; use std::env; use std::error::Error; use std::path::{Path, PathBuf}; @@ -18,12 +18,9 @@ fn main() { rsconf::set_env_value( "FISH_BUILD_DIR", - // This is set by CMake and might include symlinks. Since we want to compare this to - // the dir fish is executed in we need to canonicalize it. - option_env!("FISH_BUILD_DIR") - .map_or(canonicalize(cargo_target_dir()), canonicalize) - .to_str() - .unwrap(), + // If set by CMake, this might include symlinks. Since we want to compare this to the + // dir fish is executed in we need to canonicalize it. + canonicalize(fish_build_dir()).to_str().unwrap(), ); // We need to canonicalize (i.e. realpath) the manifest dir because we want to be able to @@ -57,10 +54,7 @@ fn main() { rsconf::rebuild_if_path_changed("src/libc.c"); cc::Build::new().file("src/libc.c").compile("flibc.a"); - let mut build = cc::Build::new(); - // Add to the default library search path - build.flag_if_supported("-L/usr/local/lib/"); - rsconf::add_library_search_path("/usr/local/lib"); + let build = cc::Build::new(); let mut target = Target::new_from(build).unwrap(); // Keep verbose mode on until we've ironed out rust build script stuff target.set_verbose(true); @@ -94,7 +88,6 @@ fn detect_cfgs(target: &mut Target) { ("apple", &detect_apple), ("bsd", &detect_bsd), ("cygwin", &detect_cygwin), - ("gettext", &have_gettext), ("small_main_stack", &has_small_stack), // See if libc supports the thread-safe localeconv_l(3) alternative to localeconv(3). ("localeconv_l", &|target| { @@ -161,51 +154,6 @@ fn detect_bsd(_: &Target) -> Result> { Ok(is_bsd) } -/// Detect libintl/gettext and its needed symbols to enable internationalization/localization -/// support. -fn have_gettext(target: &Target) -> Result> { - // The following script correctly detects and links against gettext, but so long as we are using - // C++ and generate a static library linked into the C++ binary via CMake, we need to account - // for the CMake option WITH_GETTEXT being explicitly disabled. - rsconf::rebuild_if_env_changed("CMAKE_WITH_GETTEXT"); - if let Some(with_gettext) = std::env::var_os("CMAKE_WITH_GETTEXT") { - if with_gettext.eq_ignore_ascii_case("0") { - return Ok(false); - } - } - - // In order for fish to correctly operate, we need some way of notifying libintl to invalidate - // its localizations when the locale environment variables are modified. Without the libintl - // symbol _nl_msg_cat_cntr, we cannot use gettext even if we find it. - let mut libraries = Vec::new(); - let mut found = 0; - let symbols = ["gettext", "_nl_msg_cat_cntr"]; - for symbol in &symbols { - // Historically, libintl was required in order to use gettext() and co, but that - // functionality was subsumed by some versions of libc. - if target.has_symbol(symbol) { - // No need to link anything special for this symbol - found += 1; - continue; - } - for library in ["intl", "gettextlib"] { - if target.has_symbol_in(symbol, &[library]) { - libraries.push(library); - found += 1; - continue; - } - } - } - match found { - 0 => Ok(false), - 1 => Err(format!("gettext found but cannot be used without {}", symbols[1]).into()), - _ => { - rsconf::link_libraries(&libraries, LinkType::Default); - Ok(true) - } - } -} - /// Rust sets the stack size of newly created threads to a sane value, but is at at the mercy of the /// OS when it comes to the size of the main stack. Some platforms we support default to a tiny /// 0.5 MiB main stack, which is insufficient for fish's MAX_EVAL_DEPTH/MAX_STACK_DEPTH values. diff --git a/build_tools/update_translations.fish b/build_tools/update_translations.fish index af2ca880c..4fd5ce187 100755 --- a/build_tools/update_translations.fish +++ b/build_tools/update_translations.fish @@ -1,19 +1,14 @@ #!/usr/bin/env fish # Updates the files used for gettext translations. -# By default, the whole xgettext, msgmerge, msgfmt pipeline runs, +# By default, the whole xgettext + msgmerge pipeline runs, # which extracts the messages from the source files into $template_file, -# updates the PO files for each language from that -# (changed line numbers, added messages, removed messages), -# and finally generates a machine-readable MO file for each language, -# which is stored in share/locale/$LANG/LC_MESSAGES/fish.mo (relative to the workspace root). +# and updates the PO files for each language from that. # # Use cases: # For developers: -# - Run with args `--no-mo` to update all PO files after making changes to Rust/fish -# sources. +# - Run with no args to update all PO files after making changes to Rust/fish sources. # For translators: -# - Run with `--no-mo` first, to ensure that the strings you are translating are up to date. # - Specify the language you want to work on as an argument, which must be a file in the po/ # directory. You can specify a language which does not have translations yet by specifying the # name of a file which does not yet exist. Make sure to follow the naming convention. @@ -41,9 +36,8 @@ set -l po_dir $build_tools/../po set -l extract set -l po -set -l mo -argparse --exclusive 'no-mo,only-mo,dry-run' no-mo only-mo dry-run use-existing-template= -- $argv +argparse dry-run use-existing-template= -- $argv or exit $status if test -z $argv[1] @@ -71,14 +65,6 @@ else set -g po_files $argv end -if set -l --query _flag_no_mo - set -l --erase mo -end -if set -l --query _flag_only_mo - set -l --erase extract - set -l --erase po -end - set -g template_file (mktemp) # Protect from externally set $tmpdir leaking into this script. set -g tmpdir @@ -109,9 +95,6 @@ if set -l --query _flag_dry_run # there is a difference between po/ and the tmpdir after re-generating the PO files. set -g tmpdir (mktemp -d) - # On a dry-run, we do not update the MO files. - set -l --erase mo - # Ensure tmpdir has the same initial state as the po dir. cp -r $po_dir/* $tmpdir end @@ -129,12 +112,6 @@ for po_file in $po_files cp $template_file $po_file end end - if set -l --query mo - set -l locale_dir $build_tools/../share/locale - set -l out_dir $locale_dir/(basename $po_file .po)/LC_MESSAGES - mkdir -p $out_dir - msgfmt --check-format --output-file=$out_dir/fish.mo $po_file - end end if set -g --query tmpdir[1] diff --git a/cmake/Install.cmake b/cmake/Install.cmake index b2fbfe1e5..f7d894e95 100644 --- a/cmake/Install.cmake +++ b/cmake/Install.cmake @@ -156,16 +156,6 @@ install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/user_doc/html/ # Trailing slash is DESTINATION ${docdir} OPTIONAL) install(FILES CHANGELOG.rst DESTINATION ${docdir}) -# These files are built by cmake/gettext.cmake, but using GETTEXT_PROCESS_PO_FILES's -# INSTALL_DESTINATION leads to them being installed as ${lang}.gmo, not fish.mo -# The ${languages} array comes from cmake/gettext.cmake -if(GETTEXT_FOUND) - foreach(lang ${languages}) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${lang}.gmo DESTINATION - ${CMAKE_INSTALL_LOCALEDIR}/${lang}/LC_MESSAGES/ RENAME fish.mo) - endforeach() -endif() - # Group install targets into a InstallTargets folder set_property(TARGET build_fish_pc CHECK-FISH-BUILD-VERSION-FILE PROPERTY FOLDER cmake/InstallTargets) diff --git a/cmake/Rust.cmake b/cmake/Rust.cmake index 9cdd01c30..00ddf9055 100644 --- a/cmake/Rust.cmake +++ b/cmake/Rust.cmake @@ -5,11 +5,11 @@ set(FISH_RUST_BUILD_DIR "${CMAKE_BINARY_DIR}/cargo/build") if(DEFINED ASAN) list(APPEND CARGO_FLAGS "-Z" "build-std") - list(APPEND FISH_CRATE_FEATURES "asan") + list(APPEND FISH_CARGO_FEATURES_LIST "asan") endif() if(DEFINED TSAN) list(APPEND CARGO_FLAGS "-Z" "build-std") - list(APPEND FISH_CRATE_FEATURES "tsan") + list(APPEND FISH_CARGO_FEATURES_LIST "tsan") endif() if (Rust_CARGO_TARGET) @@ -21,32 +21,24 @@ endif() set(rust_profile $,debug,$,release-with-debug,release>>) set(rust_debugflags "$<$:-g>$<$:-g>") - -# Temporary hack to propagate CMake flags/options to build.rs. We need to get CMake to evaluate the -# truthiness of the strings if they are set. -set(CMAKE_WITH_GETTEXT "1") -if(DEFINED WITH_GETTEXT AND NOT "${WITH_GETTEXT}") - set(CMAKE_WITH_GETTEXT "0") +option(WITH_GETTEXT "Build with gettext localization support. Requires `msgfmt` to work." ON) +# Enable gettext feature unless explicitly disabled. +if(NOT DEFINED WITH_GETTEXT OR "${WITH_GETTEXT}") + list(APPEND FISH_CARGO_FEATURES_LIST "localize-messages") endif() -if(FISH_CRATE_FEATURES) - set(FEATURES_ARG ${FISH_CRATE_FEATURES}) - list(PREPEND FEATURES_ARG "--features") -endif() +list(JOIN FISH_CARGO_FEATURES_LIST , FISH_CARGO_FEATURES) # Tell Cargo where our build directory is so it can find Cargo.toml. set(VARS_FOR_CARGO "FISH_BUILD_DIR=${CMAKE_BINARY_DIR}" "PREFIX=${CMAKE_INSTALL_PREFIX}" - # Temporary hack to propagate CMake flags/options to build.rs. - "CMAKE_WITH_GETTEXT=${CMAKE_WITH_GETTEXT}" # Cheesy so we can tell cmake was used to build "CMAKE=1" "DOCDIR=${CMAKE_INSTALL_FULL_DOCDIR}" "DATADIR=${CMAKE_INSTALL_FULL_DATADIR}" "SYSCONFDIR=${CMAKE_INSTALL_FULL_SYSCONFDIR}" "BINDIR=${CMAKE_INSTALL_FULL_BINDIR}" - "LOCALEDIR=${CMAKE_INSTALL_FULL_LOCALEDIR}" "CARGO_TARGET_DIR=${FISH_RUST_BUILD_DIR}" "CARGO_BUILD_RUSTC=${Rust_COMPILER}" "${FISH_PCRE2_BUILDFLAG}" diff --git a/cmake/gettext.cmake b/cmake/gettext.cmake deleted file mode 100644 index 9e6986af6..000000000 --- a/cmake/gettext.cmake +++ /dev/null @@ -1,22 +0,0 @@ -set(languages de en fr pl pt_BR sv zh_CN) - -include(FeatureSummary) - -option(WITH_GETTEXT "translate messages if gettext is available" ON) -if(WITH_GETTEXT) - find_package(Gettext) -endif() -add_feature_info(gettext GETTEXT_FOUND "translate messages with gettext") - -# Define translations -if(GETTEXT_FOUND) - # Group pofile targets into their own folder, as there's a lot of them. - set(CMAKE_FOLDER pofiles) - foreach(lang ${languages}) - # Our translations aren't set up entirely as CMake expects, so installation is done in - # cmake/Install.cmake instead of using INSTALL_DESTINATION - gettext_process_po_files(${lang} ALL - PO_FILES po/${lang}.po) - endforeach() - set(CMAKE_FOLDER) -endif() diff --git a/crates/build-helper/src/lib.rs b/crates/build-helper/src/lib.rs index 372470948..529ce8f38 100644 --- a/crates/build-helper/src/lib.rs +++ b/crates/build-helper/src/lib.rs @@ -5,10 +5,22 @@ pub fn workspace_root() -> &'static Path { manifest_dir.ancestors().nth(2).unwrap() } -pub fn cargo_target_dir() -> Cow<'static, Path> { +fn cargo_target_dir() -> Cow<'static, Path> { option_env!("CARGO_TARGET_DIR") .map(|d| Cow::Borrowed(Path::new(d))) - .unwrap_or(std::borrow::Cow::Owned(workspace_root().join("target"))) + .unwrap_or(Cow::Owned(workspace_root().join("target"))) +} + +pub fn fish_build_dir() -> Cow<'static, Path> { + // FISH_BUILD_DIR is set by CMake, if we are using it. + option_env!("FISH_BUILD_DIR") + .map(|d| Cow::Borrowed(Path::new(d))) + .unwrap_or(cargo_target_dir()) +} + +// TODO Move this to rsconf +pub fn rebuild_if_path_changed>(path: P) { + rsconf::rebuild_if_path_changed(path.as_ref().to_str().unwrap()); } // TODO Move this to rsconf diff --git a/crates/build-man-pages/build.rs b/crates/build-man-pages/build.rs index d534706e0..6b6013931 100644 --- a/crates/build-man-pages/build.rs +++ b/crates/build-man-pages/build.rs @@ -1,11 +1,8 @@ #[cfg(not(clippy))] use std::path::Path; -use fish_build_helper::cargo_target_dir; - fn main() { - let cargo_target_dir = cargo_target_dir(); - let mandir = cargo_target_dir.join("fish-man"); + let mandir = fish_build_helper::fish_build_dir().join("fish-man"); let sec1dir = mandir.join("man1"); // Running `cargo clippy` on a clean build directory panics, because when rust-embed tries to // embed a directory which does not exist it will panic. diff --git a/crates/gettext-maps/Cargo.toml b/crates/gettext-maps/Cargo.toml new file mode 100644 index 000000000..bc474e9b2 --- /dev/null +++ b/crates/gettext-maps/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "fish-gettext-maps" +edition.workspace = true +rust-version.workspace = true +version = "0.0.0" +repository.workspace = true + +[dependencies] +phf.workspace = true + +[build-dependencies] +fish-build-helper.workspace = true +fish-gettext-mo-file-parser.workspace = true +phf_codegen.workspace = true +rsconf.workspace = true + +[lints] +workspace = true diff --git a/crates/gettext-maps/build.rs b/crates/gettext-maps/build.rs new file mode 100644 index 000000000..35b62c935 --- /dev/null +++ b/crates/gettext-maps/build.rs @@ -0,0 +1,142 @@ +use std::{ + env, + ffi::OsStr, + path::{Path, PathBuf}, + process::Command, +}; + +fn main() { + let cache_dir = + PathBuf::from(fish_build_helper::fish_build_dir()).join("fish-localization-map-cache"); + embed_localizations(&cache_dir); + + fish_build_helper::rebuild_if_path_changed(fish_build_helper::workspace_root().join("po")); +} + +fn embed_localizations(cache_dir: &Path) { + use fish_gettext_mo_file_parser::parse_mo_file; + use std::{ + fs::File, + io::{BufWriter, Write}, + }; + + let po_dir = fish_build_helper::workspace_root().join("po"); + + // Ensure that the directory is created, because clippy cannot compile the code if the + // directory does not exist. + std::fs::create_dir_all(cache_dir).unwrap(); + + let localization_map_path = + Path::new(&env::var("OUT_DIR").unwrap()).join("localization_maps.rs"); + let mut localization_map_file = BufWriter::new(File::create(&localization_map_path).unwrap()); + + // This will become a map which maps from language identifiers to maps containing localizations + // for the respective language. + let mut catalogs = phf_codegen::Map::new(); + + match Command::new("msgfmt").arg("-h").status() { + Err(e) if e.kind() == std::io::ErrorKind::NotFound => { + rsconf::warn!( + "Cannot find msgfmt to build gettext message catalogs. Localization will not work." + ); + rsconf::warn!( + "If you install it now you need to trigger a rebuild to get localization support." + ); + rsconf::warn!( + "One way to achieve that is running `touch po` followed by the build command." + ); + } + Err(e) => { + panic!("Error when trying to run `msgfmt -h`: {e:?}"); + } + Ok(_) => { + for dir_entry_result in po_dir.read_dir().unwrap() { + let dir_entry = dir_entry_result.unwrap(); + let po_file_path = dir_entry.path(); + if po_file_path.extension() != Some(OsStr::new("po")) { + continue; + } + let lang = po_file_path + .file_stem() + .expect("All entries in the po directory must be regular files."); + let language = lang.to_str().unwrap().to_owned(); + + // Each language gets its own static map for the mapping from message in the source code to + // the localized version. + let map_name = format!("LANG_MAP_{language}"); + + let cached_map_path = cache_dir.join(lang); + + // Include the file containing the map for this language in the main generated file. + writeln!( + &mut localization_map_file, + "include!(\"{}\");", + cached_map_path.display() + ) + .unwrap(); + // Map from the language identifier to the map containing the localizations for this + // language. + catalogs.entry(language, format!("&{map_name}")); + + if let Ok(metadata) = std::fs::metadata(&cached_map_path) { + // Cached map file exists, but might be outdated. + let cached_map_mtime = metadata.modified().unwrap(); + let po_mtime = dir_entry.metadata().unwrap().modified().unwrap(); + if cached_map_mtime > po_mtime { + // Cached map file is considered up-to-date. + continue; + }; + } + + // Generate the map file. + + // Try to create new MO data and load it into `mo_data`. + let output = Command::new("msgfmt") + .arg("--check-format") + .arg("--output-file=-") + .arg(&po_file_path) + .output() + .unwrap(); + let mo_data = output.stdout; + + // Extract map from MO data. + let language_localizations = parse_mo_file(&mo_data).unwrap(); + + // This file will contain the localization map for the current language. + let mut cached_map_file = File::create(&cached_map_path).unwrap(); + let mut single_language_localization_map = phf_codegen::Map::new(); + + // The values will be written into the source code as is, meaning escape sequences and + // double quotes in the data will be interpreted by the Rust compiler, which is undesirable. + // Converting them to raw strings prevents this. (As long as no input data contains `"###`.) + fn to_raw_str(s: &str) -> String { + assert!(!s.contains("\"###")); + format!("r###\"{s}\"###") + } + for (msgid, msgstr) in language_localizations { + single_language_localization_map.entry( + String::from_utf8(msgid.into()).unwrap(), + to_raw_str(&String::from_utf8(msgstr.into()).unwrap()), + ); + } + writeln!(&mut cached_map_file, "#[allow(non_upper_case_globals)]").unwrap(); + write!( + &mut cached_map_file, + "static {}: phf::Map<&'static str, &'static str> = {}", + &map_name, + single_language_localization_map.build() + ) + .unwrap(); + writeln!(&mut cached_map_file, ";").unwrap(); + } + } + } + + write!( + &mut localization_map_file, + "pub static CATALOGS: phf::Map<&str, &phf::Map<&str, &str>> = {}", + catalogs.build() + ) + .unwrap(); + writeln!(&mut localization_map_file, ";").unwrap(); +} diff --git a/crates/gettext-maps/src/lib.rs b/crates/gettext-maps/src/lib.rs new file mode 100644 index 000000000..65e1fc5dc --- /dev/null +++ b/crates/gettext-maps/src/lib.rs @@ -0,0 +1 @@ +include!(concat!(env!("OUT_DIR"), "/localization_maps.rs")); diff --git a/crates/gettext-mo-file-parser/Cargo.toml b/crates/gettext-mo-file-parser/Cargo.toml new file mode 100644 index 000000000..bcfe07cfc --- /dev/null +++ b/crates/gettext-mo-file-parser/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "fish-gettext-mo-file-parser" +edition.workspace = true +rust-version.workspace = true +version = "0.0.0" +repository.workspace = true + +[lints] +workspace = true diff --git a/crates/gettext-mo-file-parser/src/lib.rs b/crates/gettext-mo-file-parser/src/lib.rs new file mode 100644 index 000000000..5c0e4a133 --- /dev/null +++ b/crates/gettext-mo-file-parser/src/lib.rs @@ -0,0 +1,131 @@ +use std::collections::HashMap; + +const U32_SIZE: usize = std::mem::size_of::(); + +fn read_le_u32(bytes: &[u8]) -> u32 { + u32::from_le_bytes(bytes[..U32_SIZE].try_into().unwrap()) +} + +fn read_be_u32(bytes: &[u8]) -> u32 { + u32::from_be_bytes(bytes[..U32_SIZE].try_into().unwrap()) +} + +fn get_u32_reader_from_magic_number(magic_number: &[u8]) -> std::io::Result u32> { + match magic_number { + [0x95, 0x04, 0x12, 0xde] => Ok(read_be_u32), + [0xde, 0x12, 0x04, 0x95] => Ok(read_le_u32), + _ => Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "First 4 bytes of MO file must correspond to magic number 0x950412de, either big or little endian.", + )), + } +} + +/// Returns an error if an unknown major revision is detected. +/// There are no relevant differences between supported revisions. +fn check_if_revision_is_supported(revision: u32) -> std::io::Result<()> { + // From the reference: + // A program seeing an unexpected major revision number should stop reading the MO file entirely; + // whereas an unexpected minor revision number means that the file can be read + // but will not reveal its full contents, + // when parsed by a program that supports only smaller minor revision numbers. + let major_revision = revision >> 16; + match major_revision { + 0 | 1 => { + // At time of writing, these are the only major revisions which exist. + // There is no documented difference and the GNU gettext code does not seem to + // differentiate between the two either. + // All features we care about are supported in minor revision 0, + // so we do not need to care about the minor revision. + Ok(()) + } + _ => Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "Major revision must be 0 or 1", + )), + } +} + +fn as_usize(value: u32) -> usize { + use std::mem::size_of; + const _: () = assert!(size_of::() <= size_of::()); + usize::try_from(value).unwrap() +} + +fn parse_strings( + file_content: &[u8], + num_strings: usize, + table_offset: usize, + read_u32: fn(&[u8]) -> u32, +) -> std::io::Result> { + let file_too_short_error = || { + Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "MO file is too short.", + )) + }; + if table_offset + num_strings * 2 * U32_SIZE > file_content.len() { + return file_too_short_error(); + } + let mut strings = Vec::with_capacity(num_strings); + let mut offset = table_offset; + let mut get_next_u32 = || { + let val = read_u32(&file_content[offset..]); + offset += U32_SIZE; + val + }; + for _ in 0..num_strings { + // not including NUL terminator + let string_length = as_usize(get_next_u32()); + let string_offset = as_usize(get_next_u32()); + let string_end = string_offset.checked_add(string_length).unwrap(); + if string_end > file_content.len() { + return file_too_short_error(); + } + // Contexts are stored by storing the concatenation of the context, a EOT byte, and the original string, instead of the original string. + // Contexts are not supported by this implementation. + // The format allows plural forms to appear behind singular forms, separated by a NUL byte, + // where `string_length` includes the length of both. + // This is not supported here. + // Do not include the NUL terminator in the slice. + strings.push(&file_content[string_offset..string_end]); + } + Ok(strings) +} + +/// Parse a MO file. +/// Format reference used: +pub fn parse_mo_file(file_content: &[u8]) -> std::io::Result> { + if file_content.len() < 7 * U32_SIZE { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "File too short to contain header.", + )); + } + // The first 4 bytes are a magic number, from which the endianness can be determined. + let read_u32 = get_u32_reader_from_magic_number(&file_content[0..U32_SIZE])?; + let mut offset = U32_SIZE; + let mut get_next_u32 = || { + let val = read_u32(&file_content[offset..]); + offset += U32_SIZE; + val + }; + let file_format_revision = get_next_u32(); + check_if_revision_is_supported(file_format_revision)?; + let num_strings = as_usize(get_next_u32()); + let original_strings_offset = as_usize(get_next_u32()); + let translation_strings_offset = as_usize(get_next_u32()); + let original_strings = + parse_strings(file_content, num_strings, original_strings_offset, read_u32)?; + let translated_strings = parse_strings( + file_content, + num_strings, + translation_strings_offset, + read_u32, + )?; + let mut translation_map = HashMap::with_capacity(num_strings); + for i in 0..num_strings { + translation_map.insert(original_strings[i], translated_strings[i]); + } + Ok(translation_map) +} diff --git a/debian/control b/debian/control index c9bc83738..60235292d 100644 --- a/debian/control +++ b/debian/control @@ -23,7 +23,7 @@ Architecture: any # for col and lock - bsdmainutils is required in Ubuntu focal Depends: bsdextrautils | bsdmainutils, file, -# for the gettext command +# for the msgfmt command gettext-base, # for nroff and preconv groff-base, diff --git a/doc_src/cmds/_.rst b/doc_src/cmds/_.rst index b5b1519eb..ad0809b21 100644 --- a/doc_src/cmds/_.rst +++ b/doc_src/cmds/_.rst @@ -15,12 +15,15 @@ Description ``_`` translates its arguments into the current language, if possible. -It is equivalent to ``gettext fish STRING``, meaning it can only be used to look up fish's own translations. +This only works with messages which are translated as part of fish's own sources, so using it as part of your own fish scripts which are not upstreamed into the fish repo will not work unless the exact same message also exists upstream. -It requires fish to be built with gettext support. If that support is disabled, or there is no translation it will echo the argument back. +It requires fish to be built with gettext support. If that support is disabled or there is no translation it will echo the argument back. -The language depends on the current locale, set with :envvar:`LANG` and :envvar:`LC_MESSAGES`. +The language depends on the current locale, set with :envvar:`LANG`, :envvar:`LC_MESSAGES`, :envvar:`LC_ALL`, and :envvar:`LANGUAGE`. +These variables do not have to be exported for fish to use them, and fish's variable scopes are supported. +If other programs launched via fish should respect these locale variables they have to be exported to make them available outside of fish. +For :envvar:`LANGUAGE` you can use a list, or use colons to separate multiple languages. Options ------- @@ -30,7 +33,20 @@ Options Examples -------- -:: +Use German translations:: - > _ File + > set LANG de_DE.UTF-8 + > _ file Datei + +Specify a precedence of languages (only works with :envvar:`LANGUAGE`):: + + > set LANGUAGE pt de + > _ file # This message has a Portuguese translation. + arquivo + > _ "Invalid arguments" # This message does not have a Portuguese translation, but a German one. + Ungültige Argumente + > _ untranslatable # No translation in Portuguese, nor in German. + untranslatable + +Note that the specific examples may change if translations are added/modified. diff --git a/doc_src/language.rst b/doc_src/language.rst index 1acff6e06..73236d9b7 100644 --- a/doc_src/language.rst +++ b/doc_src/language.rst @@ -1519,7 +1519,7 @@ For more information on argparse, like how to handle option arguments, see :doc: PATH variables ^^^^^^^^^^^^^^ -Path variables are a special kind of variable used to support colon-delimited path lists including :envvar:`PATH`, :envvar:`CDPATH`, :envvar:`MANPATH`, :envvar:`PYTHONPATH`, etc. All variables that end in "PATH" (case-sensitive) become PATH variables by default. +Path variables are a special kind of variable used to support colon-delimited path lists including :envvar:`PATH`, :envvar:`CDPATH`, :envvar:`MANPATH`, :envvar:`PYTHONPATH`, :envvar:`LANGUAGE` (for :doc:`localization `) etc. All variables that end in "PATH" (case-sensitive) become PATH variables by default. PATH variables act as normal lists, except they are implicitly joined and split on colons. diff --git a/src/bin/fish.rs b/src/bin/fish.rs index 0f11550aa..c324ae714 100644 --- a/src/bin/fish.rs +++ b/src/bin/fish.rs @@ -418,6 +418,10 @@ fn throwing_main() -> i32 { .collect(); let config_path_detection = init_locale_dir(&args[0]); + // Initialize gettext translation. + #[cfg(feature = "localize-messages")] + fish::wutil::gettext::initialize_gettext(); + // Enable debug categories set in FISH_DEBUG. // This is in *addition* to the ones given via --debug. if let Some(debug_categories) = env::var_os("FISH_DEBUG") { diff --git a/src/builtins/status.rs b/src/builtins/status.rs index 047fe4d46..3abc0876c 100644 --- a/src/builtins/status.rs +++ b/src/builtins/status.rs @@ -572,10 +572,10 @@ pub fn status(parser: &Parser, streams: &mut IoStreams, args: &mut [&wstr]) -> B streams.out.appendln(profile); streams.out.append(L!("Features: ")); let features: &[&str] = &[ - #[cfg(gettext)] - "gettext", #[cfg(feature = "embed-data")] "embed-data", + #[cfg(feature = "localize-messages")] + "localize-messages", #[cfg(target_feature = "crt-static")] "crt-static", ]; diff --git a/src/env/environment_impl.rs b/src/env/environment_impl.rs index 747ba718f..05f769257 100644 --- a/src/env/environment_impl.rs +++ b/src/env/environment_impl.rs @@ -55,7 +55,7 @@ pub fn colon_split>(val: &[T]) -> Vec { /// Return true if a variable should become a path variable by default. See #436. fn variable_should_auto_pathvar(name: &wstr) -> bool { - name.ends_with("PATH") + name.ends_with("PATH") || name == "LANGUAGE" } /// We cache our null-terminated export list. However an exported variable may change for lots of diff --git a/src/env_dispatch.rs b/src/env_dispatch.rs index e5227f59e..cbfbc6a85 100644 --- a/src/env_dispatch.rs +++ b/src/env_dispatch.rs @@ -603,18 +603,8 @@ fn init_locale(vars: &EnvStack) { new_msg_locale.to_string_lossy() ); - #[cfg(gettext)] - { - if old_msg_locale.as_c_str() != new_msg_locale { - // Make change known to GNU gettext. - extern "C" { - static mut _nl_msg_cat_cntr: libc::c_int; - } - unsafe { - _nl_msg_cat_cntr += 1; - } - } - } + #[cfg(feature = "localize-messages")] + crate::wutil::gettext::update_locale_from_env(vars); } pub fn use_posix_spawn() -> bool { diff --git a/src/wutil/gettext.rs b/src/wutil/gettext.rs index 10ace96cc..1f6ca64d1 100644 --- a/src/wutil/gettext.rs +++ b/src/wutil/gettext.rs @@ -1,76 +1,245 @@ -use std::collections::HashMap; use std::sync::Mutex; -use crate::common::{charptr2wcstring, wcs2zstring}; +#[cfg(feature = "localize-messages")] +use crate::env::EnvStack; #[cfg(test)] use crate::tests::prelude::*; use crate::wchar::prelude::*; -use errno::{errno, set_errno}; -use once_cell::sync::{Lazy, OnceCell}; +use once_cell::sync::Lazy; -#[cfg(gettext)] -mod internal { - use libc::c_char; - use std::ffi::CStr; - extern "C" { - fn gettext(msgid: *const c_char) -> *mut c_char; - #[cfg(not(feature = "embed-data"))] - fn bindtextdomain(domainname: *const c_char, dirname: *const c_char) -> *mut c_char; - #[cfg(not(feature = "embed-data"))] - fn textdomain(domainname: *const c_char) -> *mut c_char; +#[cfg(feature = "localize-messages")] +mod gettext_impl { + use std::sync::Mutex; + + use once_cell::sync::Lazy; + + pub(super) use fish_gettext_maps::CATALOGS; + type Catalog = &'static phf::Map<&'static str, &'static str>; + + use crate::env::{EnvStack, Environment}; + + /// Tries to find a catalog for `language`. + /// `language` must be an ISO 639 language code, optionally followed by an underscore and an ISO + /// 3166 country/territory code. + /// Always prefers the catalog with the exact same name as `language` if it exists. + /// If a country code is present (`ll_CC`), only the catalog named `ll` will be considered as a fallback. + /// If no country code is present (`ll`), an arbitrary catalog whose name starts with `ll_` + /// will be used as a fallback, if one exists. + /// If there is a catalog for the language, then `Some(catalog)` will be returned. + /// `None` will be returned if no variant of the language has localizations. + fn find_existing_catalog(language: &str) -> Option { + // Try the exact name first. + // If there already is a corresponding catalog return the language. + if let Some(catalog) = CATALOGS.get(language) { + return Some(catalog); + } + let language_without_country_code = + language.split_once('_').map_or(language, |(ll, _cc)| ll); + if language == language_without_country_code { + // We have `ll` format. In this case, try to find any catalog whose name starts with `ll_`. + // Note that it is important to include the underscore in the pattern, otherwise `ll` might + // fall back to `llx_CC`, where `llx` is a 3-letter language identifier. + let ll_prefix = format!("{language}_"); + for (&lang_name, &catalog) in CATALOGS.entries() { + if lang_name.starts_with(&ll_prefix) { + return Some(catalog); + } + } + // No localizations for the language (and any regional variations) exist. + None + } else { + // If `language` contained a country code, we only try to fall back to a catalog + // without a country code. + CATALOGS.get(language_without_country_code).copied() + } } - pub fn fish_gettext(msgid: &CStr) -> *const c_char { - unsafe { gettext(msgid.as_ptr()) } + + /// The precedence list of user-preferred languages, obtained from the relevant environment + /// variables. + /// This should be updated when the relevant variables change. + pub(super) static LANGUAGE_PRECEDENCE: Lazy>> = + Lazy::new(|| Mutex::new(Vec::new())); + + /// Four environment variables can be used to select languages. + /// A detailed description is available at + /// + /// Our does not replicate the behavior exactly. + /// See the following description. + /// + /// There are three variables which can be used for setting the locale for messages: + /// 1. `LC_ALL` + /// 2. `LC_MESSAGES` + /// 3. `LANG` + /// The value of the first one set to a non-zero value will be considered. + /// If it is set to the `C` locale (we consider any value starting with `C` as the `C` locale), + /// localization will be disabled. + /// Otherwise, the variable `LANGUAGE` is checked. If it is non-empty, it is considered a + /// colon-separated list of languages. Languages are listed with descending priority, meaning + /// we will localize each message into the first language with a localization available. + /// Each language is specified by a 2 or 3 letter ISO 639 language code, optionally followed by + /// an underscore and an ISO 3166 country/territory code. If the second part is omitted, some + /// variant of the language will be used if localizations exist for one. We make no guarantees + /// about which variant that will be. + /// In addition to the colon-separated format, using a list with one language per element is + /// also supported. + /// + /// Returns the (possibly empty) preference list of languages. + fn get_language_preferences_from_env(vars: &EnvStack) -> Vec { + use crate::wchar::L; + + fn normalize_locale_name(locale: &str) -> String { + // Strips off the encoding and modifier parts. + let mut normalized_name = String::new(); + // Strip off encoding and modifier. (We always expect UTF-8 and don't support modifiers.) + for c in locale.chars() { + if c.is_alphabetic() || c == '_' { + normalized_name.push(c); + } else { + break; + } + } + // At this point, the normalized_name should have the shape `ll` or `ll_CC`. + normalized_name + } + + fn check_language_var(vars: &EnvStack) -> Option> { + let langs = vars.get(L!("LANGUAGE"))?; + let langs = langs.as_list(); + let filtered_langs: Vec = langs + .iter() + .filter(|lang| !lang.is_empty()) + .map(|lang| normalize_locale_name(&lang.to_string())) + .collect(); + if filtered_langs.is_empty() { + return None; + } + Some(filtered_langs) + } + + // Locale value is determined by the first of these three variables set to a non-zero + // value. + if let Some(locale) = vars + .get(L!("LC_ALL")) + .or_else(|| vars.get(L!("LC_MESSAGES")).or_else(|| vars.get(L!("LANG")))) + { + let locale = locale.as_string().to_string(); + if locale.starts_with('C') { + // Do not localize in C locale. + return vec![]; + } + // `LANGUAGE` has higher precedence than the locale value. + if let Some(precedence_list) = check_language_var(vars) { + return precedence_list; + } + // Use the locale value if `LANGUAGE` is not set. + vec![normalize_locale_name(&locale)] + } else if let Some(precedence_list) = check_language_var(vars) { + // Use the `LANGUAGE` value if locale is not set. + return precedence_list; + } else { + // None of the relevant variables are set, so we will not localize. + vec![] + } } - #[cfg(not(feature = "embed-data"))] - pub fn fish_bindtextdomain(domainname: &CStr, dirname: &CStr) -> *mut c_char { - unsafe { bindtextdomain(domainname.as_ptr(), dirname.as_ptr()) } - } - #[cfg(not(feature = "embed-data"))] - pub fn fish_textdomain(domainname: &CStr) -> *mut c_char { - unsafe { textdomain(domainname.as_ptr()) } - } -} -#[cfg(not(gettext))] -mod internal { - use libc::c_char; - use std::ffi::CStr; - pub fn fish_gettext(msgid: &CStr) -> *const c_char { - msgid.as_ptr() - } - #[cfg(not(feature = "embed-data"))] - pub fn fish_bindtextdomain(_domainname: &CStr, _dirname: &CStr) -> *mut c_char { - std::ptr::null_mut() - } - #[cfg(not(feature = "embed-data"))] - pub fn fish_textdomain(_domainname: &CStr) -> *mut c_char { - std::ptr::null_mut() + + /// Implementation of the function with the same name in super. + pub(super) fn update_locale_from_env(vars: &EnvStack) { + let mut language_precedence = LANGUAGE_PRECEDENCE.lock().unwrap(); + *language_precedence = get_language_preferences_from_env(vars) + .iter() + .filter_map(|lang| find_existing_catalog(lang)) + .collect(); } } -use internal::*; +/// Call this when one of `LANGUAGE`, `LC_ALL`, `LC_MESSAGES`, `LANG` changes. +/// Updates internal state such that the correct localizations will be used in subsequent +/// localization requests. +#[cfg(feature = "localize-messages")] +pub fn update_locale_from_env(vars: &EnvStack) { + gettext_impl::update_locale_from_env(vars); +} -// Really init wgettext. -fn wgettext_really_init() { - #[cfg(not(feature = "embed-data"))] +/// This function only exists to provide a way for initializing gettext before an [`EnvStack`] is +/// available. Without this, early error messages cannot be localized. +#[cfg(feature = "localize-messages")] +pub fn initialize_gettext() { + use crate::common::str2wcstring; + use crate::env::EnvMode; + use std::os::unix::ffi::OsStrExt; + + let locale_vars = EnvStack::new(); + macro_rules! from_env { + ($var_name:literal) => { + if let Some(var) = std::env::var_os($var_name) { + locale_vars.set_one(L!($var_name), EnvMode::GLOBAL, str2wcstring(var.as_bytes())); + } + }; + } + from_env!("LANGUAGE"); + from_env!("LC_ALL"); + from_env!("LC_MESSAGES"); + from_env!("LANG"); + + gettext_impl::update_locale_from_env(&locale_vars); +} + +/// Use this function to localize a message. +/// The [`MaybeStatic`] wrapper type allows avoiding allocating and leaking a new [`wstr`] when no +/// localization is found and the input is returned, but as a static reference. +fn gettext(message: MaybeStatic) -> &'static wstr { + use std::collections::HashMap; + + #[cfg(not(feature = "localize-messages"))] + type NarrowMessage = (); + #[cfg(feature = "localize-messages")] + type NarrowMessage = &'static str; + + let message_wstr = match message { + MaybeStatic::Static(s) => s, + MaybeStatic::Local(s) => s, + }; + static MESSAGE_TO_NARROW: Lazy>> = + Lazy::new(|| Mutex::new(HashMap::default())); + let mut message_to_narrow = MESSAGE_TO_NARROW.lock().unwrap(); + if !message_to_narrow.contains_key(message_wstr) { + let message_wstr: &'static wstr = match message { + MaybeStatic::Static(s) => s, + MaybeStatic::Local(l) => wstr::from_char_slice(Box::leak(l.as_char_slice().into())), + }; + #[cfg(not(feature = "localize-messages"))] + let message_str = (); + #[cfg(feature = "localize-messages")] + let message_str = Box::leak(message_wstr.to_string().into_boxed_str()); + message_to_narrow.insert(message_wstr, message_str); + } + let (message_static_wstr, message_str) = message_to_narrow.get_key_value(message_wstr).unwrap(); + + #[cfg(not(feature = "localize-messages"))] + let () = message_str; + #[cfg(feature = "localize-messages")] { - use crate::common::PACKAGE_NAME; - use crate::env::config_paths::LOCALE_DIR; - use std::ffi::CString; + let language_precedence = gettext_impl::LANGUAGE_PRECEDENCE.lock().unwrap(); - let package_name = CString::new(PACKAGE_NAME).unwrap(); - let localedir = LOCALE_DIR.load(); - #[cfg(not(test))] - assert!(!localedir.is_empty()); - let localedir = CString::new(localedir).unwrap(); - fish_bindtextdomain(&package_name, &localedir); - fish_textdomain(&package_name); + // Use the localization from the highest-precedence language that has one available. + + for catalog in language_precedence.iter() { + if let Some(localization_str) = catalog.get(message_str) { + static LOCALIZATION_TO_WIDE: Lazy>> = + Lazy::new(|| Mutex::new(HashMap::default())); + let mut locatizations_to_wide = LOCALIZATION_TO_WIDE.lock().unwrap(); + if !locatizations_to_wide.contains_key(localization_str) { + let localization_wstr = + Box::leak(WString::from_str(localization_str).into_boxed_utfstr()); + locatizations_to_wide.insert(localization_str, localization_wstr); + } + return locatizations_to_wide.get(localization_str).unwrap(); + } + } } -} -fn wgettext_init_if_necessary() { - static INIT: OnceCell<()> = OnceCell::new(); - INIT.get_or_init(wgettext_really_init); + // No localization found. + message_static_wstr } /// A type that can be either a static or local string. @@ -79,53 +248,6 @@ enum MaybeStatic<'a> { Local(&'a wstr), } -/// Implementation detail for wgettext!. -/// Wide character wrapper around the gettext function. For historic reasons, unlike the real -/// gettext function, wgettext takes care of setting the correct domain, etc. using the textdomain -/// and bindtextdomain functions. This should probably be moved out of wgettext, so that wgettext -/// will be nothing more than a wrapper around gettext, like all other functions in this file. -fn wgettext_impl(text: MaybeStatic) -> &'static wstr { - // Preserve errno across this since this is often used in printing error messages. - let err = errno(); - - wgettext_init_if_necessary(); - - let key = match text { - MaybeStatic::Static(s) => s, - MaybeStatic::Local(s) => s, - }; - - debug_assert!(!key.contains('\0'), "key should not contain NUL"); - - // Note that because entries are immortal, we simply leak non-static keys, and all values. - static WGETTEXT_MAP: Lazy>> = - Lazy::new(|| Mutex::new(HashMap::new())); - let mut wmap = WGETTEXT_MAP.lock().unwrap(); - let res = match wmap.get(key) { - Some(v) => *v, - None => { - let mbs_in = wcs2zstring(key); - let out = fish_gettext(&mbs_in); - let out = charptr2wcstring(out); - // Leak the value into the heap. - let value: &'static wstr = Box::leak(out.into_boxed_utfstr()); - - // Get a static key, perhaps leaking it into the heap as well. - let key: &'static wstr = match text { - MaybeStatic::Static(s) => s, - MaybeStatic::Local(s) => wstr::from_char_slice(Box::leak(s.as_char_slice().into())), - }; - - wmap.insert(key, value); - value - } - }; - - set_errno(err); - - res -} - /// A string which can be localized. /// The wrapped string itself is the original, unlocalized version. /// Use [`LocalizableString::localize`] to obtain the localized version. @@ -159,14 +281,14 @@ pub fn localize(&self) -> &'static wstr { if s.is_empty() { L!("") } else { - wgettext_impl(MaybeStatic::Static(s)) + gettext(MaybeStatic::Static(s)) } } Self::Owned(s) => { if s.is_empty() { L!("") } else { - wgettext_impl(MaybeStatic::Local(s)) + gettext(MaybeStatic::Local(s)) } } }