From d5e80d43d91225a28519af824415b8daf70e6a61 Mon Sep 17 00:00:00 2001 From: Daniel Rainer Date: Sat, 3 May 2025 15:31:23 +0200 Subject: [PATCH 1/4] Extract function for gettext extraction Extracting explicit and implicit messages works essentially the same way, which is also reflected in the code being identical, except for the regex. Extract the duplicated code into a function. --- build_tools/fish_xgettext.fish | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/build_tools/fish_xgettext.fish b/build_tools/fish_xgettext.fish index 771f25647..a3e728226 100755 --- a/build_tools/fish_xgettext.fish +++ b/build_tools/fish_xgettext.fish @@ -6,7 +6,7 @@ # works around that - based on share/functions/funced.fish. set -q TMPDIR or set -l TMPDIR /tmp -set -l tmpdir (mktemp -d $TMPDIR/fish.XXXXXX) +set tmpdir (mktemp -d $TMPDIR/fish.XXXXXX) or exit 1 # This is a gigantic crime. @@ -36,29 +36,23 @@ for str in $strs echo 'msgstr ""' end >messages.pot +function extract_fish_script_messages --argument-names name regex; + mkdir -p $tmpdir/$name/share/completions $tmpdir/$name/share/functions + for f in share/config.fish share/completions/*.fish share/functions/*.fish + string replace --filter --regex $regex '$1' <$f | string unescape \ + | string replace --all '"' '\\"' | string replace -r '(.*)' 'N_ "$1"' >$tmpdir/$name/$f + end +end + # This regex handles descriptions for `complete` and `function` statements. These messages are not # particularly important to translate. Hence the "implicit" label. set -l implicit_regex '(?:^| +)(?:complete|function).*? (?:-d|--description) (([\'"]).+?(?$tmpdir/explicit/$f - - # Handle `complete` / `function` description messages. The `| fish` is subtle. It basically - # avoids the need to use `source` with a command substitution that could affect the current - # shell. - string replace --filter --regex $implicit_regex '$1' <$f | string unescape \ - | string replace --all '"' '\\"' | string replace -r '(.*)' 'N_ "$1"' >$tmpdir/implicit/$f -end +extract_fish_script_messages explicit $explicit_regex xgettext -j -k -kN_ -LShell --from-code=UTF-8 -cDescription --no-wrap -o messages.pot $tmpdir/{ex,im}plicit/share/*/*.fish From d31dc9ffd8f33da23f6fd367497c23594300b10f Mon Sep 17 00:00:00 2001 From: Daniel Rainer Date: Fri, 18 Apr 2025 02:42:06 +0200 Subject: [PATCH 2/4] Fix fish script translation file generation The previous version generates files which do not preserve the line number from the original fish script file, resulting in translation not working. The new approach is quite ugly, and might have some issues, but at least it seems to work in some cases. --- build_tools/fish_xgettext.fish | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/build_tools/fish_xgettext.fish b/build_tools/fish_xgettext.fish index a3e728226..bcaf355d5 100755 --- a/build_tools/fish_xgettext.fish +++ b/build_tools/fish_xgettext.fish @@ -39,8 +39,28 @@ end >messages.pot function extract_fish_script_messages --argument-names name regex; mkdir -p $tmpdir/$name/share/completions $tmpdir/$name/share/functions for f in share/config.fish share/completions/*.fish share/functions/*.fish - string replace --filter --regex $regex '$1' <$f | string unescape \ - | string replace --all '"' '\\"' | string replace -r '(.*)' 'N_ "$1"' >$tmpdir/$name/$f + # Extract messages from fish script. + # This is done by creating a file which has the message strings in the same lines as the + # oritinal, in the form: + # N_ "message" + # All other lines will be empty. + # Multiple messages on a single line are not supported. + + # Start by transforming the matching lines according to $explicit_regex and adding 'N_ ' as + # a prefix. + # Then, replace all lines without this prefix by empty lines. + # These lines are kept to ensure that the correct line number makes it into the pot file. + # Replacing irrelevant lines by empty lines must happen before unescaping, because otherwise + # multi-line commands (lines ending on \) would get merged, changing the line count. + # Double quotes are escaped, and finally unescaped double quotes are added around the string. + # The result will be a file consisting of empty lines and potentially some lines prefixed with + # 'N_ ', followed by a double-quoted string. + string replace --regex $regex 'N_ $1' <$f | + sed '/^N_ / !{s/^.*$//}' | + string unescape | + string replace --all '"' '\\"' | + string replace --regex '^N_ (.*)$' 'N_ "$1"' \ + >$tmpdir/$name/$f end end From dd5864ce136b07593b3d5898e14308f0f711c9c3 Mon Sep 17 00:00:00 2001 From: Daniel Rainer Date: Fri, 18 Apr 2025 16:59:08 +0200 Subject: [PATCH 3/4] Add quotes around gettext string This should prevent occurrences of the search string from being found in other locations (e.g. in a comment). The whole approach of string extraction from Rust sources is sketchy, but this at least prevents producing garbage when the content of a string appears somewhere else unquoted. --- build_tools/fish_xgettext.fish | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build_tools/fish_xgettext.fish b/build_tools/fish_xgettext.fish index bcaf355d5..b559b4a63 100755 --- a/build_tools/fish_xgettext.fish +++ b/build_tools/fish_xgettext.fish @@ -30,7 +30,7 @@ for str in $strs # grep -P needed for string escape to be compatible (PCRE-style), # -H gives the filename, -n the line number. # If you want to run this on non-GNU grep: Don't. - echo "#:" (grep -PHn -r -- (string escape --style=regex -- $str) src/ | + echo "#:" (grep -PHn -r -- \"(string escape --style=regex -- $str)\" src/ | head -n1 | string replace -r ':\s.*' '') echo "msgid \"$str\"" echo 'msgstr ""' From af6c3eb69ffe66f62d9fac73bd12e374172e2c6b Mon Sep 17 00:00:00 2001 From: Daniel Rainer Date: Fri, 18 Apr 2025 17:08:27 +0200 Subject: [PATCH 4/4] Ensure that strings do not get wrapped in po files --- CONTRIBUTING.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index a479770f9..64570c411 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -304,7 +304,7 @@ To update a translation: ``build_tools/fish_xgettext.fish`` from the source tree * update the existing translation by running - ``msgmerge --update --no-fuzzy-matching po/LANG.po messages.pot`` + ``msgmerge --update --no-fuzzy-matching --no-wrap po/LANG.po messages.pot`` The ``--no-fuzzy-matching`` is important as we have had terrible experiences with gettext's "fuzzy" translations in the past.