mirror of
https://github.com/fish-shell/fish-shell.git
synced 2026-05-07 07:11:14 -03:00
This greatly reduces the number of changes necessary to the PO files when the Rust/fish source files are updated. (Changes to the line number can be applied automatically, but this adds a lot of noise to the git history.) Due to the way we have been extracting Rust strings, differentiation between the same source string in different contexts has not been possible regardless of the change. It seems that duplicate msgid entries are not permitted in PO files, so since we do not use context to distinguish the strings we extract, there is no way to have context-/location-dependent translations, so we might as well reduce the git noise by eliminating line numbers. Including source locations helps translators with understanding context. Because we do not distinguish between contexts for a given source string, this is of limited utility, but keeping file names at least allows to open the relevant files and search them for the string. This might also be helpful to identify translations which do not make sense in all context in which they are used. (Although without adding context support, the only remedy would be to remove the translation altogether, as far as I can tell.) For extraction from Rust, additional issues are fixed: - File name extraction from the grep results now works properly. Previously, lines not starting with whitespace resulted in missing or corrupted matches. (missing if the source line contains no colon followed by a whitespace, corrupted if it does, then the match included the part of the line in front of the colon, instead of just the location) - Only a single source location per string was supported (`head -n1`). The new approach using sed does not have this limitation.
86 lines
4.1 KiB
Fish
Executable File
86 lines
4.1 KiB
Fish
Executable File
#!/usr/bin/env fish
|
|
#
|
|
# Tool to generate messages.pot
|
|
|
|
# Create temporary directory for these operations. OS X `mktemp` is somewhat restricted, so this block
|
|
# works around that - based on share/functions/funced.fish.
|
|
set -q TMPDIR
|
|
or set -l TMPDIR /tmp
|
|
set tmpdir (mktemp -d $TMPDIR/fish.XXXXXX)
|
|
or exit 1
|
|
|
|
# This is a gigantic crime.
|
|
# xgettext still does not support rust *at all*, so we use cargo-expand to get all our wgettext invocations.
|
|
set -l expanded (cargo expand --lib; for f in fish fish_indent fish_key_reader; cargo expand --bin $f; end)
|
|
|
|
# Extract any gettext call
|
|
set -l strs (printf '%s\n' $expanded | grep -A1 wgettext_static_str |
|
|
grep 'widestring::internals::core::primitive::str =' |
|
|
string match -rg '"(.*)"' | string match -rv '^%ls$|^$' |
|
|
# escaping difference between gettext and cargo-expand: single-quotes
|
|
string replace -a "\'" "'" | sort -u)
|
|
|
|
# Extract any constants
|
|
set -a strs (string match -rv 'BUILD_VERSION:|PACKAGE_NAME' -- $expanded |
|
|
string match -rg 'const [A-Z_]*: &str = "(.*)"' | string replace -a "\'" "'")
|
|
|
|
# We construct messages.pot ourselves instead of forcing this into msgmerge or whatever.
|
|
# The escaping so far works out okay.
|
|
for str in $strs
|
|
# grep -P needed for string escape to be compatible (PCRE-style),
|
|
# -H gives the filename.
|
|
# If you want to run this on non-GNU grep: Don't.
|
|
# The sed command extracts just the filename from the matches grep finds,
|
|
# and prepends the '#: ' prefix, marking the line as a source refecence.
|
|
# sort -u just gets rid of duplicates.
|
|
grep -PH -r -- \"(string escape --style=regex -- $str)\" src/ |
|
|
sed -E 's/^([^:]*):.*$/#: \1/' | sort -u
|
|
echo "msgid \"$str\""
|
|
echo 'msgstr ""'
|
|
end >messages.pot
|
|
|
|
function extract_fish_script_messages --argument-names name regex;
|
|
mkdir -p $tmpdir/$name/share/completions $tmpdir/$name/share/functions
|
|
for f in share/config.fish share/completions/*.fish share/functions/*.fish
|
|
# Extract messages from fish script.
|
|
# This is done by creating a file which has the message strings in the same lines as the
|
|
# oritinal, in the form:
|
|
# N_ "message"
|
|
# All other lines will be empty.
|
|
# Multiple messages on a single line are not supported.
|
|
|
|
# Start by transforming the matching lines according to $explicit_regex and adding 'N_ ' as
|
|
# a prefix.
|
|
# Then, replace all lines without this prefix by empty lines.
|
|
# These lines are kept to ensure that the correct line number makes it into the pot file.
|
|
# Replacing irrelevant lines by empty lines must happen before unescaping, because otherwise
|
|
# multi-line commands (lines ending on \) would get merged, changing the line count.
|
|
# Double quotes are escaped, and finally unescaped double quotes are added around the string.
|
|
# The result will be a file consisting of empty lines and potentially some lines prefixed with
|
|
# 'N_ ', followed by a double-quoted string.
|
|
string replace --regex $regex 'N_ $1' <$f |
|
|
sed '/^N_ / !{s/^.*$//}' |
|
|
string unescape |
|
|
string replace --all '"' '\\"' |
|
|
string replace --regex '^N_ (.*)$' 'N_ "$1"' \
|
|
>$tmpdir/$name/$f
|
|
end
|
|
end
|
|
|
|
# This regex handles descriptions for `complete` and `function` statements. These messages are not
|
|
# particularly important to translate. Hence the "implicit" label.
|
|
set -l implicit_regex '(?:^| +)(?:complete|function).*? (?:-d|--description) (([\'"]).+?(?<!\\\\)\\2).*'
|
|
extract_fish_script_messages implicit $implicit_regex
|
|
|
|
# This regex handles explicit requests to translate a message. These are more important to translate
|
|
# than messages which should be implicitly translated.
|
|
set -l explicit_regex '.*\( *_ (([\'"]).+?(?<!\\\\)\\2) *\).*'
|
|
extract_fish_script_messages explicit $explicit_regex
|
|
|
|
xgettext -j -k -kN_ -LShell --from-code=UTF-8 -cDescription --no-wrap --add-location=file -o messages.pot $tmpdir/{ex,im}plicit/share/*/*.fish
|
|
|
|
# Remove the tmpdir from the location to avoid churn
|
|
sed -i 's_^#: /.*/share/_#: share/_' messages.pot
|
|
|
|
rm -r $tmpdir
|