gettext-extract: fix race condition

Multiple gettext-extraction proc macro instances can run at the same
time due to Rust's compilation model. In the previous implementation,
where every instance appended to the same file, this has resulted in
corruption of the file. This was reported and discussed in
https://github.com/fish-shell/fish-shell/pull/11928#discussion_r2488047964
for the equivalent macro for Fluent message ID extraction. The
underlying problem is the same.

The best way we have found to avoid such race condition is to write each
entry to a new file, and concatenate them together before using them.
It's not a beautiful approach, but it should be fairly robust and
portable.

Closes #12125
This commit is contained in:
Daniel Rainer
2025-12-01 19:48:02 +01:00
committed by Johannes Altmanninger
parent c1db2744cf
commit 3e2336043a
8 changed files with 36 additions and 29 deletions

1
Cargo.lock generated
View File

@@ -203,6 +203,7 @@ dependencies = [
name = "fish-gettext-extraction"
version = "0.0.0"
dependencies = [
"fish-tempfile",
"proc-macro2",
"rsconf",
]

View File

@@ -38,8 +38,8 @@ cargo() {
}
cleanup () {
if [ -n "$template_file" ] && [ -e "$template_file" ]; then
rm "$template_file"
if [ -n "$template_dir" ] && [ -e "$template_dir" ]; then
rm -r "$template_dir"
fi
}
@@ -64,9 +64,9 @@ if [ -n "$FISH_TEST_MAX_CONCURRENCY" ]; then
export CARGO_BUILD_JOBS="$FISH_TEST_MAX_CONCURRENCY"
fi
template_file=$(mktemp)
template_dir=$(mktemp -d)
(
export FISH_GETTEXT_EXTRACTION_FILE="$template_file"
export FISH_GETTEXT_EXTRACTION_DIR="$template_dir"
cargo build --workspace --all-targets --features=gettext-extract
)
if $lint; then
@@ -83,7 +83,7 @@ cargo test --doc --workspace
if $lint; then
cargo doc --workspace
fi
FISH_GETTEXT_EXTRACTION_FILE=$template_file "$workspace_root/tests/test_driver.py" "$build_dir"
FISH_GETTEXT_EXTRACTION_DIR=$template_dir "$workspace_root/tests/test_driver.py" "$build_dir"
exit
}

View File

@@ -21,13 +21,13 @@ begin
set -g workspace_root (path resolve (status dirname)/..)
set -l rust_extraction_file
set -l rust_extraction_dir
if set -l --query _flag_use_existing_template
set rust_extraction_file $_flag_use_existing_template
set rust_extraction_dir $_flag_use_existing_template
else
set rust_extraction_file (mktemp)
set rust_extraction_dir (mktemp -d)
# We need to build to ensure that the proc macro for extracting strings runs.
FISH_GETTEXT_EXTRACTION_FILE=$rust_extraction_file cargo check --features=gettext-extract
FISH_GETTEXT_EXTRACTION_DIR=$rust_extraction_dir cargo check --features=gettext-extract
or exit 1
end
@@ -41,11 +41,11 @@ begin
mark_section tier1-from-rust
# Get rid of duplicates and sort.
msguniq --no-wrap --sort-output $rust_extraction_file
find $rust_extraction_dir -type f -exec cat {} + | msguniq --no-wrap --sort-output
or exit 1
if not set -l --query _flag_use_existing_template
rm $rust_extraction_file
rm -r $rust_extraction_dir
end
function extract_fish_script_messages_impl

View File

@@ -17,14 +17,14 @@
# If this flag is specified, the script will exit with an error if there are outstanding
# changes, and will display the diff. Do not specify other flags if `--dry-run` is specified.
#
# Specify `--use-existing-template=FILE` to prevent running cargo for extracting an up-to-date
# Specify `--use-existing-template=DIR` to prevent running cargo for extracting an up-to-date
# version of the localized strings. This flag is intended for testing setups which make it
# inconvenient to run cargo here, but run it in an earlier step to ensure up-to-date values.
# This argument is passed on to the `fish_xgettext.fish` script and has no other uses.
# `FILE` must be the path to a gettext template file generated from our compilation process.
# `DIR` must be the path to a gettext template file generated from our compilation process.
# It can be obtained by running:
# set -l FILE (mktemp)
# FISH_GETTEXT_EXTRACTION_FILE=$FILE cargo check --features=gettext-extract
# set -l DIR (mktemp -d)
# FISH_GETTEXT_EXTRACTION_DIR=$DIR cargo check --features=gettext-extract
# The sort utility is locale-sensitive.
# Ensure that sorting output is consistent by setting LC_ALL here.

View File

@@ -11,6 +11,7 @@ description = "proc-macro for extracting strings for gettext translation"
proc-macro = true
[dependencies]
fish-tempfile.workspace = true
proc-macro2.workspace = true
[build-dependencies]

View File

@@ -1,3 +1,3 @@
fn main() {
rsconf::rebuild_if_env_changed("FISH_GETTEXT_EXTRACTION_FILE");
rsconf::rebuild_if_env_changed("FISH_GETTEXT_EXTRACTION_DIR");
}

View File

@@ -1,6 +1,7 @@
extern crate proc_macro;
use fish_tempfile::random_filename;
use proc_macro::TokenStream;
use std::{ffi::OsString, fs::OpenOptions, io::Write};
use std::{ffi::OsString, io::Write, path::PathBuf};
fn unescape_multiline_rust_string(s: String) -> String {
if !s.contains('\n') {
@@ -42,12 +43,9 @@ enum State {
unescaped
}
fn append_po_entry_to_file(message: &TokenStream, file_name: &OsString) {
let mut file = OpenOptions::new()
.create(true)
.append(true)
.open(file_name)
.unwrap_or_else(|e| panic!("Could not open file {file_name:?}: {e}"));
// Each entry is written to a fresh file to avoid race conditions arising when there are multiple
// unsynchronized writers to the same file.
fn write_po_entry_to_file(message: &TokenStream, dir: &OsString) {
let message_string = unescape_multiline_rust_string(message.to_string());
if message_string.contains('\n') {
panic!(
@@ -61,25 +59,32 @@ fn append_po_entry_to_file(message: &TokenStream, file_name: &OsString) {
""
};
let po_entry = format!("{format_string_annotation}msgid {message_string}\nmsgstr \"\"\n\n");
let dir = PathBuf::from(dir);
let (path, result) =
fish_tempfile::create_file_with_retry(|| dir.join(random_filename(OsString::new())));
let mut file = result.unwrap_or_else(|e| {
panic!("Failed to create temporary file {path:?}:\n{e}");
});
file.write_all(po_entry.as_bytes()).unwrap();
}
/// The `message` is passed through unmodified.
/// If `FISH_GETTEXT_EXTRACTION_FILE` is defined in the environment,
/// If `FISH_GETTEXT_EXTRACTION_DIR` is defined in the environment,
/// this file is used to write the message,
/// so that it can then be used for generating gettext PO files.
/// The `message` must be a string literal.
///
/// # Panics
///
/// This macro panics if the `FISH_GETTEXT_EXTRACTION_FILE` variable is set and `message` has an
/// This macro panics if the `FISH_GETTEXT_EXTRACTION_DIR` variable is set and `message` has an
/// unexpected format.
/// Note that for example `concat!(...)` cannot be passed to this macro, because expansion works
/// outside in, meaning this macro would still see the `concat!` macro invocation, instead of a
/// string literal.
#[proc_macro]
pub fn gettext_extract(message: TokenStream) -> TokenStream {
if let Some(file_path) = std::env::var_os("FISH_GETTEXT_EXTRACTION_FILE") {
if let Some(dir_path) = std::env::var_os("FISH_GETTEXT_EXTRACTION_DIR") {
let pm2_message = proc_macro2::TokenStream::from(message.clone());
let mut token_trees = pm2_message.into_iter();
let first_token = token_trees
@@ -103,7 +108,7 @@ pub fn gettext_extract(message: TokenStream) -> TokenStream {
)
}
if let proc_macro2::TokenTree::Literal(_) = first_group_token {
append_po_entry_to_file(&message, &file_path);
write_po_entry_to_file(&message, &dir_path);
} else {
panic!("Expected literal in gettext_extract, but got: {first_group_token:?}");
}

View File

@@ -3,7 +3,7 @@
# Compiling in this test is too expensive.
# We need the gettext template extracted from the Rust sources passed in via env var,
# in order to pass it on.
#REQUIRES: test -e "$FISH_GETTEXT_EXTRACTION_FILE"
#REQUIRES: test -e "$FISH_GETTEXT_EXTRACTION_DIR"
set -l dir (status dirname)
@@ -16,5 +16,5 @@ set -lxp PATH (path dirname $fish)
# The `--use-existing-template` argument allows using the pre-built version of the gettext template
# file.
$dir/../../build_tools/update_translations.fish --dry-run \
--use-existing-template=$FISH_GETTEXT_EXTRACTION_FILE
--use-existing-template=$FISH_GETTEXT_EXTRACTION_DIR
or exit 1