#!/usr/bin/env fish # # Tool to generate gettext messages template file. # Writes to stdout. # Intended to be called from `update_translations.fish`. argparse use-existing-template= -- $argv or exit $status begin # Write header. This is required by msguniq. # Note that this results in the file being overwritten. # This is desired behavior, to get rid of the results of prior invocations # of this script. set -l header 'msgid ""\nmsgstr "Content-Type: text/plain; charset=UTF-8\\\\n"\n\n' printf $header set -g workspace_root (path resolve (status dirname)/..) set -l rust_extraction_dir if set -l --query _flag_use_existing_template set rust_extraction_dir $_flag_use_existing_template else set rust_extraction_dir (mktemp -d) # We need to build to ensure that the proc macro for extracting strings runs. FISH_GETTEXT_EXTRACTION_DIR=$rust_extraction_dir cargo check --features=gettext-extract or exit 1 end function mark_section set -l section_name $argv[1] echo 'msgid "fish-section-'$section_name'"' echo 'msgstr ""' echo '' end mark_section tier1-from-rust # Get rid of duplicates and sort. begin # Without providing this header, msguniq complains when a msgid is non-ASCII. printf $header find $rust_extraction_dir -type f -exec cat {} + end | msguniq --no-wrap --sort-output | # Remove the header again. Otherwise it would appear twice, breaking the msguniq at the end # of this file. sed '/^msgid ""$/ {N; /\nmsgstr "Content-Type: text\/plain; charset=UTF-8\\\\n"$/ {N; d}}' if not set -l --query _flag_use_existing_template rm -r $rust_extraction_dir end function extract_fish_script_messages_impl set -l regex $argv[1] set -e argv[1] # Using xgettext causes more trouble than it helps. # This is due to handling of escaping in fish differing from formats xgettext understands # (e.g. POSIX shell strings). # We work around this issue by manually writing the file content. # Steps: # 1. We extract strings to be translated from the relevant files and drop the rest. This step # depends on the regex matching the entire line, and the first capture group matching the # string. # 2. We unescape. This gets rid of some escaping necessary in fish strings. # 3. The resulting strings are sorted alphabetically. This step is optional. Not sorting would # result in strings from the same file appearing together. Removing duplicates is also # optional, since msguniq takes care of that later on as well. # 4. Single backslashes are replaced by double backslashes. This results in the backslashes # being interpreted as literal backslashes by gettext tooling. # 5. Double quotes are escaped, such that they are not interpreted as the start or end of # a msgid. # 6. We transform the string into the format expected in a PO file. cat $argv | string replace --filter --regex $regex '$1' | string unescape | sort -u | sed -E -e 's_\\\\_\\\\\\\\_g' -e 's_"_\\\\"_g' -e 's_^(.*)$_msgid "\1"\nmsgstr ""\n_' end function extract_fish_script_messages set -l tier $argv[1] set -e argv[1] if not set -q argv[1] return end # This regex handles explicit requests to translate a message. These are more important to translate # than messages which should be implicitly translated. set -l explicit_regex '.*\( *_ (([\'"]).+?(?&2 "$file:1 unexpected localization tier: $tier" exit 1 end continue end set -l dirname (path basename (path dirname $file)) set -l command_name (path basename --no-extension $file) if test $dirname = functions && string match -q -- 'fish_*' $command_name set -a tier1 $file continue end if test $dirname != completions echo >&2 "$file:1 missing localization tier for function file" exit 1 end if test -e $workspace_root/doc_src/cmds/$command_name.rst set -a tier1 $file else set -a tier3 $file end end extract_fish_script_messages tier1 $tier1 extract_fish_script_messages tier2 $tier2 extract_fish_script_messages tier3 $tier3 end | # At this point, all extracted strings have been written to stdout, # starting with the ones taken from the Rust sources, # followed by strings explicitly marked for translation in fish scripts, # and finally the strings from fish scripts which get translated implicitly. # Because we do not eliminate duplicates across these categories, # we do it here, since other gettext tools expect no duplicates. msguniq --no-wrap