mirror of
https://github.com/fish-shell/fish-shell.git
synced 2026-06-07 01:51:14 -03:00
builtin read: always handle out-of-range codepoints (Rust port regression)
As mentioned in https://github.com/fish-shell/fish-shell/pull/9688#discussion_r1155089596, commitb77d1d0e2b(Stop crashing on invalid Unicode input, 2024-02-27), Rust's char type doesn't support arbitrary 32-bit values. Out-of-range Unicode codepoints would cause crashes. That commit addressed this by converting the encoded bytes (e.g. UTF-8) to special private-use-area characters that fish knows about. It didn't bother to update the code path in builtin read that relies on mbrtowc as well. Fix that. Move and rename parse_codepoint() and rename/reorder its input/output parameters. Fixes #11383 (cherry picked from commitd9ba27f58f)
This commit is contained in:
@@ -12,8 +12,9 @@
|
||||
use crate::env::Environment;
|
||||
use crate::env::READ_BYTE_LIMIT;
|
||||
use crate::env::{EnvVar, EnvVarFlags};
|
||||
use crate::input_common::decode_input_byte;
|
||||
use crate::input_common::terminal_protocols_disable_ifn;
|
||||
use crate::libc::MB_CUR_MAX;
|
||||
use crate::input_common::DecodeState;
|
||||
use crate::nix::isatty;
|
||||
use crate::reader::commandline_set_buffer;
|
||||
use crate::reader::ReaderConfig;
|
||||
@@ -23,7 +24,6 @@
|
||||
use crate::wcstringutil::split_about;
|
||||
use crate::wcstringutil::split_string_tok;
|
||||
use crate::wutil;
|
||||
use crate::wutil::encoding::mbrtowc;
|
||||
use crate::wutil::encoding::zero_mbstate;
|
||||
use crate::wutil::perror;
|
||||
use libc::SEEK_CUR;
|
||||
@@ -341,70 +341,61 @@ fn read_one_char_at_a_time(
|
||||
split_null: bool,
|
||||
) -> Option<c_int> {
|
||||
let mut exit_res = STATUS_CMD_OK;
|
||||
let mut eof = false;
|
||||
let mut nbytes = 0;
|
||||
|
||||
let mut unconsumed = vec![];
|
||||
|
||||
loop {
|
||||
let mut finished = false;
|
||||
let mut res = '\x00';
|
||||
let mut state = zero_mbstate();
|
||||
|
||||
while !finished {
|
||||
let chars_read = buff.len();
|
||||
let res = loop {
|
||||
let mut b = [0_u8; 1];
|
||||
match read_blocked(fd, &mut b) {
|
||||
Ok(0) | Err(_) => {
|
||||
eof = true;
|
||||
break;
|
||||
break None;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
let b = b[0];
|
||||
|
||||
unconsumed.push(b);
|
||||
nbytes += 1;
|
||||
if MB_CUR_MAX() == 1 {
|
||||
res = char::from(b);
|
||||
finished = true;
|
||||
} else {
|
||||
let sz = unsafe {
|
||||
mbrtowc(
|
||||
std::ptr::addr_of_mut!(res).cast(),
|
||||
std::ptr::addr_of!(b).cast(),
|
||||
1,
|
||||
&mut state,
|
||||
)
|
||||
} as isize;
|
||||
if sz == -1 {
|
||||
let mut consumed = 0;
|
||||
match decode_input_byte(buff, &mut state, &unconsumed, &mut consumed) {
|
||||
DecodeState::Incomplete => continue,
|
||||
DecodeState::Complete => {
|
||||
unconsumed.clear();
|
||||
break Some(buff.as_char_slice().last().unwrap());
|
||||
}
|
||||
DecodeState::Error => {
|
||||
state = zero_mbstate();
|
||||
} else if sz != -2 {
|
||||
finished = true;
|
||||
unconsumed.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if nbytes > READ_BYTE_LIMIT.load(Ordering::Relaxed) {
|
||||
// Historical behavior: do not include the codepoint that made us overflow.
|
||||
buff.truncate(chars_read);
|
||||
exit_res = STATUS_READ_TOO_MUCH;
|
||||
break;
|
||||
}
|
||||
if eof {
|
||||
let Some(&res) = res else {
|
||||
// EOF
|
||||
if buff.is_empty() {
|
||||
exit_res = STATUS_CMD_ERROR;
|
||||
}
|
||||
break;
|
||||
};
|
||||
if res == if split_null { '\0' } else { '\n' } {
|
||||
buff.pop();
|
||||
break;
|
||||
}
|
||||
if !split_null && res == '\n' {
|
||||
break;
|
||||
}
|
||||
if split_null && res == '\0' {
|
||||
break;
|
||||
}
|
||||
|
||||
buff.push(res);
|
||||
if nchars > 0 && nchars <= buff.len() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if buff.is_empty() && eof {
|
||||
exit_res = STATUS_CMD_ERROR;
|
||||
}
|
||||
|
||||
exit_res
|
||||
}
|
||||
|
||||
|
||||
@@ -21,7 +21,6 @@
|
||||
use crate::wutil::encoding::{mbrtowc, mbstate_t, zero_mbstate};
|
||||
use crate::wutil::fish_wcstol;
|
||||
use std::collections::VecDeque;
|
||||
use std::ops::ControlFlow;
|
||||
use std::os::fd::RawFd;
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use std::ptr;
|
||||
@@ -702,14 +701,14 @@ fn try_readch(&mut self, blocking: bool) -> Option<CharEvent> {
|
||||
_ => 0,
|
||||
});
|
||||
}
|
||||
match self.parse_codepoint(
|
||||
&mut state,
|
||||
match decode_input_byte(
|
||||
&mut seq,
|
||||
&mut state,
|
||||
&buffer[..i + 1],
|
||||
&mut consumed,
|
||||
) {
|
||||
ControlFlow::Continue(/*codepoint_complete=*/ false) => (),
|
||||
ControlFlow::Continue(/*codepoint_complete=*/ true) => {
|
||||
DecodeState::Incomplete => (),
|
||||
DecodeState::Complete => {
|
||||
if have_escape_prefix && i != 0 {
|
||||
have_escape_prefix = false;
|
||||
let c = seq.as_char_slice().last().unwrap();
|
||||
@@ -719,7 +718,7 @@ fn try_readch(&mut self, blocking: bool) -> Option<CharEvent> {
|
||||
break true;
|
||||
}
|
||||
}
|
||||
ControlFlow::Break(()) => {
|
||||
DecodeState::Error => {
|
||||
self.push_front(CharEvent::from_check_exit());
|
||||
break false;
|
||||
}
|
||||
@@ -794,65 +793,6 @@ fn parse_escape_sequence(
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_codepoint(
|
||||
&mut self,
|
||||
state: &mut mbstate_t,
|
||||
out_seq: &mut WString,
|
||||
buffer: &[u8],
|
||||
consumed: &mut usize,
|
||||
) -> ControlFlow<(), bool> {
|
||||
let mut res: char = '\0';
|
||||
let read_byte = *buffer.last().unwrap();
|
||||
if crate::libc::MB_CUR_MAX() == 1 {
|
||||
// single-byte locale, all values are legal
|
||||
// FIXME: this looks wrong, this falsely assumes that
|
||||
// the single-byte locale is compatible with Unicode upper-ASCII.
|
||||
res = read_byte.into();
|
||||
out_seq.push(res);
|
||||
return ControlFlow::Continue(true);
|
||||
}
|
||||
let mut codepoint = u32::from(res);
|
||||
let sz = unsafe {
|
||||
mbrtowc(
|
||||
std::ptr::addr_of_mut!(codepoint).cast(),
|
||||
std::ptr::addr_of!(read_byte).cast(),
|
||||
1,
|
||||
state,
|
||||
)
|
||||
} as isize;
|
||||
match sz {
|
||||
-1 => {
|
||||
FLOG!(reader, "Illegal input");
|
||||
*consumed += 1;
|
||||
return ControlFlow::Break(());
|
||||
}
|
||||
-2 => {
|
||||
// Sequence not yet complete.
|
||||
return ControlFlow::Continue(false);
|
||||
}
|
||||
0 => {
|
||||
// Actual nul char.
|
||||
*consumed += 1;
|
||||
out_seq.push('\0');
|
||||
return ControlFlow::Continue(true);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
if let Some(res) = char::from_u32(codepoint) {
|
||||
// Sequence complete.
|
||||
if !fish_reserved_codepoint(res) {
|
||||
*consumed += 1;
|
||||
out_seq.push(res);
|
||||
return ControlFlow::Continue(true);
|
||||
}
|
||||
}
|
||||
for &b in &buffer[*consumed..] {
|
||||
out_seq.push(encode_byte_to_char(b));
|
||||
*consumed += 1;
|
||||
}
|
||||
ControlFlow::Continue(true)
|
||||
}
|
||||
|
||||
fn parse_csi(&mut self, buffer: &mut Vec<u8>) -> Option<Key> {
|
||||
let mut next_char = |zelf: &mut Self| zelf.try_readb(buffer).unwrap_or(0xff);
|
||||
// The maximum number of CSI parameters is defined by NPAR, nominally 16.
|
||||
@@ -1322,6 +1262,65 @@ fn has_lookahead(&self) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) enum DecodeState {
|
||||
Incomplete,
|
||||
Complete,
|
||||
Error,
|
||||
}
|
||||
|
||||
pub(crate) fn decode_input_byte(
|
||||
out_seq: &mut WString,
|
||||
state: &mut mbstate_t,
|
||||
buffer: &[u8],
|
||||
consumed: &mut usize,
|
||||
) -> DecodeState {
|
||||
use DecodeState::*;
|
||||
let mut res: char = '\0';
|
||||
let read_byte = *buffer.last().unwrap();
|
||||
if crate::libc::MB_CUR_MAX() == 1 {
|
||||
// single-byte locale, all values are legal
|
||||
// FIXME: this looks wrong, this falsely assumes that
|
||||
// the single-byte locale is compatible with Unicode upper-ASCII.
|
||||
res = read_byte.into();
|
||||
out_seq.push(res);
|
||||
return Complete;
|
||||
}
|
||||
let mut codepoint = u32::from(res);
|
||||
match unsafe {
|
||||
mbrtowc(
|
||||
std::ptr::addr_of_mut!(codepoint).cast(),
|
||||
std::ptr::addr_of!(read_byte).cast(),
|
||||
1,
|
||||
state,
|
||||
)
|
||||
} as isize
|
||||
{
|
||||
-1 => {
|
||||
FLOG!(reader, "Illegal input");
|
||||
*consumed += 1;
|
||||
return Error;
|
||||
}
|
||||
-2 => {
|
||||
// Sequence not yet complete.
|
||||
return Incomplete;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
if let Some(res) = char::from_u32(codepoint) {
|
||||
// Sequence complete.
|
||||
if !fish_reserved_codepoint(res) {
|
||||
*consumed += 1;
|
||||
out_seq.push(res);
|
||||
return Complete;
|
||||
}
|
||||
}
|
||||
for &b in &buffer[*consumed..] {
|
||||
out_seq.push(encode_byte_to_char(b));
|
||||
*consumed += 1;
|
||||
}
|
||||
Complete
|
||||
}
|
||||
|
||||
/// A simple, concrete implementation of InputEventQueuer.
|
||||
pub struct InputEventQueue {
|
||||
data: InputData,
|
||||
|
||||
Reference in New Issue
Block a user