Be explicit about setlocale() scope

We use the following locale-variables via locale-aware C functions
(to look them up, grep for "libc::$function_name"):

- LC_CTYPE:    wcwidth
- LC_MESSAGES: strerror, strerror 
- LC_NUMERIC:  localeconv/localeconv_l, snprintf (only in tests)
- LC_TIME:     strftime

Additionally, we interpret LC_MESSAGES in our own code.

As of today, the PCRE2 library does not seem to use LC_MESSAGES
(their error messages are English-only); and I don't think it uses
LC_CTYPE unless we use "pcre2_maketables()".

Let's make it more obvious which locale categories are actually used
by setting those instead of LC_ALL.

This means that instead of logging the return value of «
setlocale(LC_ALL, "") » (which is an opaque binary string on Linux),
we can log the actual per-category locales that are in effect.
This means that there's no longer really a reason to hardcode a log
for the LC_MESSAGES locale. We're not logging at early startup but
we will log if any locale var had been set at startup.
This commit is contained in:
Johannes Altmanninger
2025-10-24 22:41:39 +02:00
parent 732942ec62
commit 71a962653d
5 changed files with 44 additions and 51 deletions

View File

@@ -408,7 +408,9 @@ fn throwing_main() -> i32 {
threads::init();
// Safety: single-threaded.
unsafe { set_libc_locales() };
unsafe {
set_libc_locales(/*log_ok=*/ false)
};
fish::wutil::gettext::initialize_gettext();

View File

@@ -890,7 +890,9 @@ fn throwing_main() -> i32 {
let mut streams = IoStreams::new(&mut out, &mut err, &io_chain);
streams.stdin_fd = STDIN_FILENO;
// Safety: single-threaded.
unsafe { set_libc_locales() };
unsafe {
set_libc_locales(/*log_ok=*/ false)
};
crate::wutil::gettext::initialize_gettext();
env_init(None, true, false);

View File

@@ -17,10 +17,7 @@
use crate::wchar::prelude::*;
use crate::wutil::fish_wcstoi;
use crate::{function, terminal};
use std::borrow::Cow;
use std::collections::HashMap;
use std::ffi::{CStr, CString};
use std::ptr;
use std::sync::atomic::{AtomicBool, Ordering};
/// List of all locale environment variable names that might trigger (re)initializing of the locale
@@ -502,16 +499,6 @@ pub fn read_terminfo_database(vars: &EnvStack) {
fn init_locale(vars: &EnvStack) {
let _guard = crate::locale::LOCALE_LOCK.lock().unwrap();
let old_msg_locale: CString = {
let old = unsafe { libc::setlocale(libc::LC_MESSAGES, ptr::null()) };
assert_ne!(old, ptr::null_mut());
// We have to make a copy because the subsequent setlocale() call to change the locale will
// invalidate the pointer from this setlocale() call.
// safety: `old` is not a null-pointer, and should be a reference to the currently set locale
unsafe { CStr::from_ptr(old.cast()) }.to_owned()
};
for var_name in LOCALE_VARIABLES {
let var = vars
.getf_unless_empty(var_name, EnvMode::EXPORT)
@@ -525,40 +512,15 @@ fn init_locale(vars: &EnvStack) {
}
}
let user_locale = {
// Safety: we hold the locale lock.
let loc = unsafe { set_libc_locales() };
if loc.is_none() {
FLOG!(env_locale, "user has an invalid locale configured");
}
loc
};
// Safety: we hold the locale lock.
if !unsafe {
set_libc_locales(/*log_ok=*/ true)
} {
FLOG!(env_locale, "user has an invalid locale configured");
}
// Update cached locale information.
crate::common::fish_setlocale();
FLOG!(
env_locale,
"init_locale() setlocale():",
user_locale
.map(CStr::to_string_lossy)
.unwrap_or(Cow::Borrowed("(null)"))
);
let new_msg_loc_ptr = unsafe { libc::setlocale(libc::LC_MESSAGES, std::ptr::null()) };
// should never fail
assert_ne!(new_msg_loc_ptr, ptr::null_mut());
// safety: we just asserted it is not a null-pointer.
let new_msg_locale = unsafe { CStr::from_ptr(new_msg_loc_ptr) };
FLOG!(
env_locale,
"Old LC_MESSAGES locale:",
old_msg_locale.to_string_lossy()
);
FLOG!(
env_locale,
"New LC_MESSAGES locale:",
new_msg_locale.to_string_lossy()
);
#[cfg(feature = "localize-messages")]
crate::wutil::gettext::update_locale_from_env(vars);

View File

@@ -8,10 +8,35 @@
/// # Safety
/// Call this either before starting any locale-using thread, or while holding a lock on the
/// above mutex.
pub unsafe fn set_libc_locales() -> Option<&'static CStr> {
let opaque_locale_str = setlocale(libc::LC_ALL, Some(c""));
setlocale(libc::LC_CTYPE, Some(c"C.UTF-8"));
opaque_locale_str
pub unsafe fn set_libc_locales(log_ok: bool) -> bool {
let mut ok = true;
let mut set = |category_name, category, value| {
let locale_string = setlocale(category, Some(value));
if log_ok {
crate::flog::FLOG!(
env_locale,
match locale_string {
Some(locale_string) => {
format!("Set {category_name} to {}", locale_string.to_string_lossy())
}
None => {
format!("Failed to set {category_name}",)
}
},
);
}
ok &= locale_string.is_some();
};
let from_environment = c"";
// For wcwidth(3p)
set("LC_CTYPE", libc::LC_CTYPE, c"C.UTF-8");
// For strerror(3p) and strsignal(3p)
set("LC_MESSAGES", libc::LC_MESSAGES, from_environment);
// For builtin printf
set("LC_NUMERIC", libc::LC_NUMERIC, from_environment);
// For "history --show-time"
set("LC_TIME", libc::LC_TIME, from_environment);
ok
}
fn setlocale(category: libc::c_int, locale: Option<&CStr>) -> Option<&'static CStr> {

View File

@@ -27,7 +27,9 @@ pub fn test_init() -> impl ScopeGuarding<Target = ()> {
std::fs::create_dir_all(&test_dir).unwrap();
set_current_dir(&test_dir).unwrap();
// Safety: all tests that access locale should go through the enclosing function.
unsafe { set_libc_locales() };
unsafe {
set_libc_locales(/*log_ok=*/ false)
};
topic_monitor_init();
crate::threads::init();
proc_init();