Files
fish-shell/build_tools/fish_xgettext.fish
Daniel Rainer 3e2336043a gettext-extract: fix race condition
Multiple gettext-extraction proc macro instances can run at the same
time due to Rust's compilation model. In the previous implementation,
where every instance appended to the same file, this has resulted in
corruption of the file. This was reported and discussed in
https://github.com/fish-shell/fish-shell/pull/11928#discussion_r2488047964
for the equivalent macro for Fluent message ID extraction. The
underlying problem is the same.

The best way we have found to avoid such race condition is to write each
entry to a new file, and concatenate them together before using them.
It's not a beautiful approach, but it should be fairly robust and
portable.

Closes #12125
2025-12-10 16:15:07 +01:00

148 lines
5.7 KiB
Fish
Executable File

#!/usr/bin/env fish
#
# Tool to generate gettext messages template file.
# Writes to stdout.
# Intended to be called from `update_translations.fish`.
argparse use-existing-template= -- $argv
or exit $status
begin
# Write header. This is required by msguniq.
# Note that this results in the file being overwritten.
# This is desired behavior, to get rid of the results of prior invocations
# of this script.
begin
echo 'msgid ""'
echo 'msgstr ""'
echo '"Content-Type: text/plain; charset=UTF-8\n"'
echo ""
end
set -g workspace_root (path resolve (status dirname)/..)
set -l rust_extraction_dir
if set -l --query _flag_use_existing_template
set rust_extraction_dir $_flag_use_existing_template
else
set rust_extraction_dir (mktemp -d)
# We need to build to ensure that the proc macro for extracting strings runs.
FISH_GETTEXT_EXTRACTION_DIR=$rust_extraction_dir cargo check --features=gettext-extract
or exit 1
end
function mark_section
set -l section_name $argv[1]
echo 'msgid "fish-section-'$section_name'"'
echo 'msgstr ""'
echo ''
end
mark_section tier1-from-rust
# Get rid of duplicates and sort.
find $rust_extraction_dir -type f -exec cat {} + | msguniq --no-wrap --sort-output
or exit 1
if not set -l --query _flag_use_existing_template
rm -r $rust_extraction_dir
end
function extract_fish_script_messages_impl
set -l regex $argv[1]
set -e argv[1]
# Using xgettext causes more trouble than it helps.
# This is due to handling of escaping in fish differing from formats xgettext understands
# (e.g. POSIX shell strings).
# We work around this issue by manually writing the file content.
# Steps:
# 1. We extract strings to be translated from the relevant files and drop the rest. This step
# depends on the regex matching the entire line, and the first capture group matching the
# string.
# 2. We unescape. This gets rid of some escaping necessary in fish strings.
# 3. The resulting strings are sorted alphabetically. This step is optional. Not sorting would
# result in strings from the same file appearing together. Removing duplicates is also
# optional, since msguniq takes care of that later on as well.
# 4. Single backslashes are replaced by double backslashes. This results in the backslashes
# being interpreted as literal backslashes by gettext tooling.
# 5. Double quotes are escaped, such that they are not interpreted as the start or end of
# a msgid.
# 6. We transform the string into the format expected in a PO file.
cat $argv |
string replace --filter --regex $regex '$1' |
string unescape |
sort -u |
sed -E -e 's_\\\\_\\\\\\\\_g' -e 's_"_\\\\"_g' -e 's_^(.*)$_msgid "\1"\nmsgstr ""\n_'
end
function extract_fish_script_messages
set -l tier $argv[1]
set -e argv[1]
if not set -q argv[1]
return
end
# This regex handles explicit requests to translate a message. These are more important to translate
# than messages which should be implicitly translated.
set -l explicit_regex '.*\( *_ (([\'"]).+?(?<!\\\\)\\2) *\).*'
mark_section "$tier-from-script-explicitly-added"
extract_fish_script_messages_impl $explicit_regex $argv
# This regex handles descriptions for `complete` and `function` statements. These messages are not
# particularly important to translate. Hence the "implicit" label.
set -l implicit_regex '^(?:\s|and |or )*(?:complete|function).*? (?:-d|--description) (([\'"]).+?(?<!\\\\)\\2).*'
mark_section "$tier-from-script-implicitly-added"
extract_fish_script_messages_impl $implicit_regex $argv
end
set -g share_dir $workspace_root/share
set -l tier1 $share_dir/config.fish
set -l tier2
set -l tier3
for file in $share_dir/completions/*.fish $share_dir/functions/*.fish
# set -l tier (string match -r '^# localization: .*' <$file)
set -l tier (string replace -rf -m1 \
'^# localization: (.*)$' '$1' <$file)
if set -q tier[1]
switch "$tier"
case tier1 tier2 tier3
set -a $tier $file
case 'skip*'
case '*'
echo >&2 "$file:1 unexpected localization tier: $tier"
exit 1
end
continue
end
set -l dirname (path basename (path dirname $file))
set -l command_name (path basename --no-extension $file)
if test $dirname = functions &&
string match -q -- 'fish_*' $command_name
set -a tier1 $file
continue
end
if test $dirname != completions
echo >&2 "$file:1 missing localization tier for function file"
exit 1
end
if test -e $workspace_root/doc_src/cmds/$command_name.rst
set -a tier1 $file
else
set -a tier3 $file
end
end
extract_fish_script_messages tier1 $tier1
extract_fish_script_messages tier2 $tier2
extract_fish_script_messages tier3 $tier3
end |
# At this point, all extracted strings have been written to stdout,
# starting with the ones taken from the Rust sources,
# followed by strings explicitly marked for translation in fish scripts,
# and finally the strings from fish scripts which get translated implicitly.
# Because we do not eliminate duplicates across these categories,
# we do it here, since other gettext tools expect no duplicates.
msguniq --no-wrap