encoding: remove or annotate WString::from_str

We want to discourage direct conversion from regular Rust strings to
`WString`, since our `WString`s are assumed to use the PUA encoding
scheme. If the input string contains certain PUA codepoints and the
resulting `WString` is decoded, it would not result in the same bytes as
the UTF-8 encoding of the input string. To avoid this, use
`str2wcstring`.

There are two remaining usages of `WString::from_str` which have been
annotated to indicate why they are there.
This commit is contained in:
Daniel Rainer
2026-01-27 00:01:07 +01:00
committed by Johannes Altmanninger
parent a93c24b084
commit cce788388f
11 changed files with 24 additions and 15 deletions

View File

@@ -1,5 +1,5 @@
use super::prelude::*;
use crate::common::{Named, bytes2wcstring, escape, get_by_sorted_name};
use crate::common::{Named, bytes2wcstring, escape, get_by_sorted_name, str2wcstring};
use crate::fds::BorrowedFdFile;
use crate::io::OutputStream;
use crate::parse_constants::UNKNOWN_BUILTIN_ERR_MSG;
@@ -716,7 +716,7 @@ pub fn builtin_wperror(program_name: &wstr, streams: &mut IoStreams) {
streams.err.append(program_name);
streams.err.append(L!(": "));
if err.0 != 0 {
let werr = WString::from_str(&err.to_string());
let werr = str2wcstring(err.to_string());
streams.err.append(&werr);
streams.err.append_char('\n');
}

View File

@@ -3,6 +3,7 @@
use std::num::NonZeroUsize;
use super::*;
use crate::common::str2wcstring;
use crate::env::{EnvVar, EnvVarFlags};
use crate::flog::flog;
use crate::parse_util::unescape_wildcards;
@@ -307,7 +308,7 @@ fn validate_capture_group_names(
capture_group_names: &[Option<String>],
) -> Result<(), RegexError> {
for name in capture_group_names.iter().filter_map(|n| n.as_ref()) {
let wname = WString::from_str(name);
let wname = str2wcstring(name);
if EnvVar::flags_for(&wname).contains(EnvVarFlags::READ_ONLY) {
return Err(RegexError::InvalidCaptureGroupName(wname));
}

View File

@@ -1088,6 +1088,7 @@ pub fn test(parser: &Parser, streams: &mut IoStreams, argv: &mut [&wstr]) -> Bui
mod tests {
use super::test as builtin_test;
use crate::builtins::prelude::*;
use crate::common::str2wcstring;
use crate::io::{IoChain, OutputStream};
use crate::tests::prelude::*;
@@ -1100,7 +1101,7 @@ fn run_one_test_test_mbracket(expected: i32, lst: &[&str], bracket: bool) -> boo
argv.push(L!("test").to_owned());
}
for s in lst {
argv.push(WString::from_str(s));
argv.push(str2wcstring(s));
}
if bracket {
argv.push(L!("]").to_owned())

View File

@@ -2633,6 +2633,7 @@ mod tests {
sort_and_prioritize,
};
use crate::abbrs::{self, Abbreviation, with_abbrs_mut};
use crate::common::str2wcstring;
use crate::env::{EnvMode, EnvSetMode, Environment};
use crate::io::IoChain;
use crate::operation_context::{
@@ -3281,7 +3282,7 @@ macro_rules! perform_one_completion_cd_test {
vars.parent
.vars
.insert(L!("AUTOSUGGEST_TEST_LOC").to_owned(), WString::from_str(wd));
.insert(L!("AUTOSUGGEST_TEST_LOC").to_owned(), str2wcstring(wd));
perform_one_autosuggestion_cd_test!("cd $AUTOSUGGEST_TEST_LOC/0", "foobar/", &vars);
perform_one_autosuggestion_cd_test!("cd ~/test_autosuggest_suggest_specia", "l/", &vars);

View File

@@ -248,6 +248,7 @@ fn populate_variables(s: &[u8], out_vars: &mut VarTable) -> UvarFormat {
let Ok(line) = std::str::from_utf8(line) else {
continue;
};
// TODO: investigate whether this should use str2wcstring
wide_line = WString::from_str(line);
match format {

View File

@@ -7,7 +7,7 @@
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
use std::sync::{Arc, Mutex};
use crate::common::{ScopeGuard, escape};
use crate::common::{ScopeGuard, escape, str2wcstring};
use crate::flog::flog;
use crate::io::{IoChain, IoStreams};
use crate::job_group::MaybeJobId;
@@ -402,7 +402,7 @@ pub fn get_desc(parser: &Parser, evt: &Event) -> WString {
EventDescription::Any => unreachable!(),
};
WString::from_str(&s)
str2wcstring(&s)
}
/// Add an event handler.

View File

@@ -1591,6 +1591,7 @@ mod tests {
use crate::abbrs::Abbreviation;
use crate::abbrs::{self};
use crate::abbrs::{with_abbrs, with_abbrs_mut};
use crate::common::str2wcstring;
use crate::complete::{CompletionList, CompletionReceiver};
use crate::env::{EnvMode, EnvStackSetResult};
use crate::expand::{ExpandResultCode, expand_to_receiver};
@@ -1953,7 +1954,7 @@ fn test_expand_overflow() {
// Make a list of 64 elements, then expand it cartesian-style 64 times.
// This is far too large to expand.
let vals: Vec<WString> = (1..=64).map(|i| i.to_wstring()).collect();
let expansion = WString::from_str(&str::repeat("$bigvar", 64));
let expansion = str2wcstring(str::repeat("$bigvar", 64));
let parser = TestParser::new();
parser.vars().push(true);

View File

@@ -426,6 +426,7 @@ pub fn fs_is_case_insensitive(
#[cfg(test)]
mod tests {
use super::{FileTester, IsErr, IsFile, PathFlags, is_potential_path};
use crate::common::osstr2wcstring;
use crate::env::EnvStack;
use crate::operation_context::{EXPANSION_LIMIT_DEFAULT, OperationContext};
use crate::prelude::*;
@@ -454,10 +455,7 @@ fn filepath(&self, name: &str) -> PathBuf {
}
fn file_tester(&self) -> FileTester<'_> {
FileTester::new(
WString::from_str(self.tempdir.path().to_str().unwrap()),
&self.ctx,
)
FileTester::new(osstr2wcstring(self.tempdir.path()), &self.ctx)
}
}

View File

@@ -41,8 +41,9 @@ fn gettext(message: MaybeStatic) -> &'static wstr {
LazyLock::new(|| Mutex::new(HashMap::default()));
let mut localizations_to_wide = LOCALIZATION_TO_WIDE.lock().unwrap();
if !localizations_to_wide.contains_key(localized_str) {
let localization_wstr =
Box::leak(WString::from_str(localized_str).into_boxed_utfstr());
use crate::common::str2wcstring;
let localization_wstr = Box::leak(str2wcstring(localized_str).into_boxed_utfstr());
localizations_to_wide.insert(localized_str, localization_wstr);
}
return localizations_to_wide.get(localized_str).unwrap();

View File

@@ -88,6 +88,10 @@ fn append_space_separated_list<S: AsRef<str>>(
for lang in list {
string.push(' ');
string.push_utfstr(&crate::common::escape(
// lang is already PUA-encoded at this point. The reason we convert the PUA-encoded
// WString into a String is to enable comparison with the language names we have
// available. We could use WString for lang, but that would require converting our
// stored languages names as WString as well.
WString::from_str(lang.as_ref()).as_utfstr(),
));
}

View File

@@ -1487,6 +1487,7 @@ mod tests {
use crate::ast::{
self, Ast, Castable, JobList, JobPipeline, Kind, Node, Traversal, is_same_node,
};
use crate::common::str2wcstring;
use crate::env::EnvStack;
use crate::expand::ExpandFlags;
use crate::io::{IoBufferfill, IoChain};
@@ -1513,7 +1514,7 @@ macro_rules! detect_errors {
}
fn detect_argument_errors(src: &str) -> Result<(), ParserTestErrorBits> {
let src = WString::from_str(src);
let src = str2wcstring(src);
let ast = ast::parse_argument_list(&src, ParseTreeFlags::default(), None);
if ast.errored() {
return Err(ParserTestErrorBits::ERROR);