mirror of
https://github.com/fish-shell/fish-shell.git
synced 2026-06-08 02:31:18 -03:00
Optimize wcstod_underscores
Prior to this change, wcstod_underscores would create a "pruned" string without underscores, and then parse that. Switch instead to a filtered Iterator, which can ignore underscores as it traverses the string. Add a test that completes in a reasonable time only if wcstod_underscores is not quadratic. (cherry picked from commit b4577c10e5cf9239bc93c605f431a56d98266a1e)
This commit is contained in:
committed by
Fabian Boehm
parent
82c4896809
commit
49c848ed42
@@ -224,63 +224,57 @@ pub fn wcstod_underscores<Chars>(s: Chars, consumed: &mut usize) -> Result<f64,
|
||||
}
|
||||
}
|
||||
|
||||
let is_sign = |c: char| "+-".contains(c);
|
||||
let is_inf_or_nan_char = |c: char| "iInN".contains(c);
|
||||
// Detect "infinity" and "nan" case-insensitively.
|
||||
{
|
||||
// Skip leading underscores.
|
||||
let mut inf_nan_chars = chars.clone();
|
||||
while inf_nan_chars.peek() == Some(&'_') {
|
||||
inf_nan_chars.next();
|
||||
}
|
||||
// Skip at most one leading sign.
|
||||
if matches!(inf_nan_chars.peek(), Some('+') | Some('-')) {
|
||||
inf_nan_chars.next();
|
||||
}
|
||||
|
||||
// We don't do any underscore-stripping for infinity/NaN.
|
||||
let mut is_inf_nan = false;
|
||||
if let Some(&c1) = chars.peek() {
|
||||
if is_inf_or_nan_char(c1) {
|
||||
is_inf_nan = true;
|
||||
} else if is_sign(c1) {
|
||||
// FIXME make this more efficient
|
||||
let mut copy = chars.clone();
|
||||
copy.next();
|
||||
if let Some(&c2) = copy.peek() {
|
||||
if is_inf_or_nan_char(c2) {
|
||||
is_inf_nan = true;
|
||||
}
|
||||
// Look for characters leading "infinity" or "nan", case-insensitive.
|
||||
if inf_nan_chars.next().map_or(false, |c| "iInN".contains(c)) {
|
||||
// We think it's inf or nan, so use the parser without underscores.
|
||||
// Note that leading underscores will now trigger an error.
|
||||
let f = wcstod_inner(chars, '.', consumed)?;
|
||||
*consumed += leading_whitespace;
|
||||
return Ok(f);
|
||||
}
|
||||
}
|
||||
|
||||
// Parse with an iterator which filters out underscores.
|
||||
let chars_no_uscore = chars.clone().filter(|&c| c != '_');
|
||||
|
||||
// No spaces allowed after underscores.
|
||||
if let Some(c) = chars_no_uscore.clone().next() {
|
||||
if c.is_whitespace() {
|
||||
return Err(Error::Empty);
|
||||
}
|
||||
}
|
||||
|
||||
let mut non_uscores_consumed = 0;
|
||||
let f = wcstod_inner(chars_no_uscore, '.', &mut non_uscores_consumed)?;
|
||||
|
||||
// We consumed non_underscores_consumed characters, skipping underscores.
|
||||
// Count how many we skipped. Also skip trailing underscores.
|
||||
let mut remaining_chars = non_uscores_consumed;
|
||||
let consumed_chars_with_uscores = chars
|
||||
.take_while(|&c| {
|
||||
if c == '_' {
|
||||
true
|
||||
} else if remaining_chars > 0 {
|
||||
remaining_chars -= 1;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
if is_inf_nan {
|
||||
let f = wcstod_inner(chars, '.', consumed)?;
|
||||
*consumed += leading_whitespace;
|
||||
return Ok(f);
|
||||
}
|
||||
// We build a string to pass to the system wcstod, pruned of underscores. We will take all
|
||||
// leading alphanumeric characters that can appear in a strtod numeric literal, dots (.), and
|
||||
// signs (+/-). In order to be more clever, for example to stop earlier in the case of strings
|
||||
// like "123xxxxx", we would need to do a full parse, because sometimes 'a' is a hex digit and
|
||||
// sometimes it is the end of the parse, sometimes a dot '.' is a decimal delimiter and
|
||||
// sometimes it is the end of the valid parse, as in "1_2.3_4.5_6", etc.
|
||||
let mut pruned = vec![];
|
||||
// We keep track of the positions *in the pruned string* where there used to be underscores. We
|
||||
// will pass the pruned version of the input string to the system wcstod, which in turn will
|
||||
// tell us how many characters it consumed. Then we will set our own endptr based on (1) the
|
||||
// number of characters consumed from the pruned string, and (2) how many underscores came
|
||||
// before the last consumed character. The alternative to doing it this way (for example, "only
|
||||
// deleting the correct underscores") would require actually parsing the input string, so that
|
||||
// we can know when to stop grabbing characters and dropping underscores, as in "1_2.3_4.5_6".
|
||||
let mut underscores = vec![];
|
||||
// If we wanted to future-proof against a strtod from the future that, say, allows octal
|
||||
// literals using 0o, etc., we could just use iswalnum, instead of iswxdigit and P/p/X/x checks.
|
||||
for c in chars.take_while(|&c| c.is_ascii_hexdigit() || "PpXx._".contains(c) || is_sign(c)) {
|
||||
if c == '_' {
|
||||
underscores.push(pruned.len());
|
||||
} else {
|
||||
pruned.push(c)
|
||||
}
|
||||
}
|
||||
|
||||
let mut pruned_consumed = 0;
|
||||
let f = wcstod_inner(pruned.into_iter(), '.', &mut pruned_consumed)?;
|
||||
let underscores_consumed = underscores
|
||||
.into_iter()
|
||||
.take_while(|&n| n <= pruned_consumed)
|
||||
})
|
||||
.count();
|
||||
|
||||
*consumed = leading_whitespace + pruned_consumed + underscores_consumed;
|
||||
*consumed = leading_whitespace + consumed_chars_with_uscores;
|
||||
Ok(f)
|
||||
}
|
||||
|
||||
@@ -711,7 +705,7 @@ fn wcstod_underscores() {
|
||||
assert_eq!(test("1 "), Ok((1.0, 1)));
|
||||
assert_eq!(test("infinity_"), Ok((f64::INFINITY, 8)));
|
||||
assert_eq!(test(" -INFINITY"), Ok((f64::NEG_INFINITY, 10)));
|
||||
assert_eq!(test("_infinity"), Err(Error::Empty));
|
||||
assert_eq!(test("_infinity"), Err(Error::InvalidChar));
|
||||
{
|
||||
let (f, n) = test("nan(0)").unwrap();
|
||||
assert!(f.is_nan());
|
||||
@@ -722,7 +716,7 @@ fn wcstod_underscores() {
|
||||
assert!(f.is_nan());
|
||||
assert_eq!(n, 3);
|
||||
}
|
||||
assert_eq!(test("_nan(0)"), Err(Error::Empty));
|
||||
assert_eq!(test("_nan(0)"), Err(Error::InvalidChar));
|
||||
// We don't strip the underscores in this commented-out test case, and the behavior is
|
||||
// implementation-defined, so we don't actually know how many characters will get consumed. On
|
||||
// macOS the strtod man page only says what happens with an alphanumeric string passed to nan(),
|
||||
@@ -735,4 +729,25 @@ fn wcstod_underscores() {
|
||||
assert_eq!(test("Also none"), Err(Error::InvalidChar));
|
||||
assert_eq!(test(" Also none"), Err(Error::InvalidChar));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wcstod_underscores_repeated() {
|
||||
// A test inspired by #9984.
|
||||
// Emulate builtin_math by parsing all the numbers out of a huge string with underscores.
|
||||
// This should complete quickly!
|
||||
let count = 25_000;
|
||||
let mut repeated_string = "12_34.56+".repeat(count);
|
||||
repeated_string.pop(); // trailing +
|
||||
let expected = 12_34.56 * (count as f64);
|
||||
|
||||
let mut sum = 0.0;
|
||||
let mut offset = 0;
|
||||
while offset < repeated_string.len() {
|
||||
let mut consumed = 0;
|
||||
sum += super::wcstod_underscores(repeated_string[offset..].chars(), &mut consumed)
|
||||
.unwrap();
|
||||
offset += consumed;
|
||||
}
|
||||
assert!((sum - expected).abs() <= 1e-4);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user