gettext: support non-ASCII msgids for Rust

The `msguniq` call for deduplicating the msgids originating from Rust
previously did not get a header entry (empty msgid with msgstr
containing metadata). This works fine as long as all msgids are
ASCII-only. But when a non-ASCII character appears in a msgid, `msguniq`
errors out without a header specifying the encoding. To resolve this,
add the header to the input of this `msguniq` invocation and then remove
the header again using sed to prevent duplicating it for the outer
msguniq call at the end of the file.

Closes #12491
This commit is contained in:
Daniel Rainer
2026-02-28 23:14:21 +01:00
committed by Johannes Altmanninger
parent fcdcae72c5
commit 29160a1592

View File

@@ -12,12 +12,8 @@ begin
# Note that this results in the file being overwritten.
# This is desired behavior, to get rid of the results of prior invocations
# of this script.
begin
echo 'msgid ""'
echo 'msgstr ""'
echo '"Content-Type: text/plain; charset=UTF-8\n"'
echo ""
end
set -l header 'msgid ""\nmsgstr "Content-Type: text/plain; charset=UTF-8\\\\n"\n\n'
printf $header
set -g workspace_root (path resolve (status dirname)/..)
@@ -41,8 +37,15 @@ begin
mark_section tier1-from-rust
# Get rid of duplicates and sort.
find $rust_extraction_dir -type f -exec cat {} + | msguniq --no-wrap --sort-output
or exit 1
begin
# Without providing this header, msguniq complains when a msgid is non-ASCII.
printf $header
find $rust_extraction_dir -type f -exec cat {} +
end |
msguniq --no-wrap --sort-output |
# Remove the header again. Otherwise it would appear twice, breaking the msguniq at the end
# of this file.
sed '/^msgid ""$/ {N; /\nmsgstr "Content-Type: text\/plain; charset=UTF-8\\\\n"$/ {N; d}}'
if not set -l --query _flag_use_existing_template
rm -r $rust_extraction_dir