diff --git a/src/builtins/read.rs b/src/builtins/read.rs index 5a53f3dc3..b386cc8f4 100644 --- a/src/builtins/read.rs +++ b/src/builtins/read.rs @@ -14,6 +14,7 @@ use crate::input_common::decode_input_byte; use crate::input_common::terminal_protocols_disable_ifn; use crate::input_common::DecodeState; +use crate::input_common::InvalidPolicy; use crate::nix::isatty; use crate::reader::commandline_set_buffer; use crate::reader::ReaderConfig; @@ -358,16 +359,19 @@ fn read_one_char_at_a_time( unconsumed.push(b); nbytes += 1; let mut consumed = 0; - match decode_input_byte(buff, &mut state, &unconsumed, &mut consumed) { + match decode_input_byte( + buff, + InvalidPolicy::Passthrough, + &mut state, + &unconsumed, + &mut consumed, + ) { DecodeState::Incomplete => continue, DecodeState::Complete => { unconsumed.clear(); break Some(buff.as_char_slice().last().unwrap()); } - DecodeState::Error => { - state = zero_mbstate(); - unconsumed.clear(); - } + DecodeState::Error => unreachable!(), } }; diff --git a/src/input_common.rs b/src/input_common.rs index 0a24110c3..b614250f4 100644 --- a/src/input_common.rs +++ b/src/input_common.rs @@ -873,6 +873,7 @@ fn try_readch(&mut self, blocking: bool) -> Option { } match decode_input_byte( &mut seq, + InvalidPolicy::Error, &mut state, &buffer[..i + 1], &mut consumed, @@ -1693,8 +1694,15 @@ pub(crate) enum DecodeState { Error, } +#[derive(Eq, PartialEq)] +pub(crate) enum InvalidPolicy { + Error, + Passthrough, +} + pub(crate) fn decode_input_byte( out_seq: &mut WString, + invalid_policy: InvalidPolicy, state: &mut mbstate_t, buffer: &[u8], consumed: &mut usize, @@ -1710,6 +1718,19 @@ pub(crate) fn decode_input_byte( out_seq.push(res); return Complete; } + let mut invalid = |out_seq: &mut WString, log_error: fn()| match invalid_policy { + InvalidPolicy::Error => { + (log_error)(); + Error + } + InvalidPolicy::Passthrough => { + for &b in &buffer[*consumed..] { + out_seq.push(encode_byte_to_char(b)); + } + *consumed = buffer.len(); + Complete + } + }; let mut codepoint = u32::from(res); match unsafe { mbrtowc( @@ -1721,9 +1742,7 @@ pub(crate) fn decode_input_byte( } as isize { -1 => { - FLOG!(reader, "Illegal input"); - *consumed += 1; - return Error; + return invalid(out_seq, || FLOG!(reader, "Illegal input encoding")); } -2 => { // Sequence not yet complete. @@ -1739,11 +1758,7 @@ pub(crate) fn decode_input_byte( return Complete; } } - for &b in &buffer[*consumed..] { - out_seq.push(encode_byte_to_char(b)); - *consumed += 1; - } - Complete + invalid(out_seq, || FLOG!(reader, "Illegal codepoint")) } pub(crate) fn unblock_input(mut wait_guard: MutexGuard>) -> bool { diff --git a/tests/checks/read.fish b/tests/checks/read.fish index bc96e3074..c3e087d94 100644 --- a/tests/checks/read.fish +++ b/tests/checks/read.fish @@ -431,3 +431,7 @@ printf \xf9\x98\xb1\x83\x8b | read -z out_of_range_codepoint set -S out_of_range_codepoint # CHECK: $out_of_range_codepoint: set in global scope, unexported, with 1 elements # CHECK: $out_of_range_codepoint[1]: |\Xf9\X98\Xb1\X83\X8b| + +printf \xff | { read invalid_utf8; set -S invalid_utf8 } +# CHECK: $invalid_utf8: set in global scope, unexported, with 1 elements +# CHECK: $invalid_utf8[1]: |\Xff|