From 7f8d247211fdfc8c19a4ada250fba231c6971fec Mon Sep 17 00:00:00 2001 From: Johannes Altmanninger Date: Sun, 5 Feb 2023 09:35:06 +0100 Subject: [PATCH] Port parse_constants.h to Rust --- fish-rust/build.rs | 2 + fish-rust/src/ffi.rs | 7 + fish-rust/src/lib.rs | 2 + fish-rust/src/parse_constants.rs | 724 +++++++++++++++++++++++++++++++ fish-rust/src/tokenizer.rs | 49 +++ src/ast.cpp | 30 +- src/builtins/complete.cpp | 10 +- src/complete.cpp | 12 +- src/expand.cpp | 14 +- src/fish.cpp | 8 +- src/fish_tests.cpp | 79 ++-- src/highlight.cpp | 8 +- src/history.cpp | 10 +- src/parse_constants.h | 157 ++----- src/parse_execution.cpp | 50 ++- src/parse_tree.cpp | 179 +------- src/parse_tree.h | 9 +- src/parse_util.cpp | 51 +-- src/parser.cpp | 23 +- src/reader.cpp | 14 +- src/tokenizer.cpp | 22 +- src/tokenizer.h | 6 +- 22 files changed, 982 insertions(+), 484 deletions(-) create mode 100644 fish-rust/src/parse_constants.rs create mode 100644 fish-rust/src/tokenizer.rs diff --git a/fish-rust/build.rs b/fish-rust/build.rs index 7338b357b..c485a3374 100644 --- a/fish-rust/build.rs +++ b/fish-rust/build.rs @@ -23,8 +23,10 @@ fn main() -> miette::Result<()> { "src/ffi_init.rs", "src/ffi_tests.rs", "src/future_feature_flags.rs", + "src/parse_constants.rs", "src/redirection.rs", "src/smoke.rs", + "src/tokenizer.rs", "src/topic_monitor.rs", "src/util.rs", "src/builtins/shared.rs", diff --git a/fish-rust/src/ffi.rs b/fish-rust/src/ffi.rs index 7ae4de648..dfb334684 100644 --- a/fish-rust/src/ffi.rs +++ b/fish-rust/src/ffi.rs @@ -16,10 +16,12 @@ #include "io.h" #include "parse_util.h" #include "wildcard.h" + #include "tokenizer.h" #include "parser.h" #include "proc.h" #include "common.h" #include "builtin.h" + #include "fallback.h" safety!(unsafe_ffi) @@ -30,10 +32,15 @@ generate_pod!("pipes_ffi_t") generate!("make_pipes_ffi") + generate!("valid_var_name_char") + generate!("get_flog_file_fd") generate!("parse_util_unescape_wildcards") + generate!("fish_wcwidth") + generate!("fish_wcswidth") + generate!("wildcard_match") generate!("wgettext_ptr") diff --git a/fish-rust/src/lib.rs b/fish-rust/src/lib.rs index 61457184c..0e94619de 100644 --- a/fish-rust/src/lib.rs +++ b/fish-rust/src/lib.rs @@ -15,9 +15,11 @@ mod ffi_tests; mod flog; mod future_feature_flags; +mod parse_constants; mod redirection; mod signal; mod smoke; +mod tokenizer; mod topic_monitor; mod util; mod wchar; diff --git a/fish-rust/src/parse_constants.rs b/fish-rust/src/parse_constants.rs new file mode 100644 index 000000000..0118c8f03 --- /dev/null +++ b/fish-rust/src/parse_constants.rs @@ -0,0 +1,724 @@ +//! Constants used in the programmatic representation of fish code. + +use crate::ffi::{fish_wcswidth, fish_wcwidth, wcharz_t}; +use crate::tokenizer::variable_assignment_equals_pos; +use crate::wchar::{wstr, WString, L}; +use crate::wchar_ffi::{wcharz, WCharFromFFI, WCharToFFI}; +use crate::wutil::{sprintf, wgettext_fmt}; +use cxx::{CxxWString, UniquePtr}; +use std::ops::{BitAnd, BitOrAssign}; +use widestring_suffix::widestrs; + +type SourceOffset = u32; + +pub const SOURCE_OFFSET_INVALID: SourceOffset = SourceOffset::MAX; +pub const SOURCE_LOCATION_UNKNOWN: usize = usize::MAX; + +pub struct ParseTreeFlags(u8); + +pub const PARSE_FLAG_NONE: ParseTreeFlags = ParseTreeFlags(0); +/// attempt to build a "parse tree" no matter what. this may result in a 'forest' of +/// disconnected trees. this is intended to be used by syntax highlighting. +pub const PARSE_FLAG_CONTINUE_AFTER_ERROR: ParseTreeFlags = ParseTreeFlags(1 << 0); +/// include comment tokens. +pub const PARSE_FLAG_INCLUDE_COMMENTS: ParseTreeFlags = ParseTreeFlags(1 << 1); +/// indicate that the tokenizer should accept incomplete tokens */ +pub const PARSE_FLAG_ACCEPT_INCOMPLETE_TOKENS: ParseTreeFlags = ParseTreeFlags(1 << 2); +/// indicate that the parser should not generate the terminate token, allowing an 'unfinished' +/// tree where some nodes may have no productions. +pub const PARSE_FLAG_LEAVE_UNTERMINATED: ParseTreeFlags = ParseTreeFlags(1 << 3); +/// indicate that the parser should generate job_list entries for blank lines. +pub const PARSE_FLAG_SHOW_BLANK_LINES: ParseTreeFlags = ParseTreeFlags(1 << 4); +/// indicate that extra semis should be generated. +pub const PARSE_FLAG_SHOW_EXTRA_SEMIS: ParseTreeFlags = ParseTreeFlags(1 << 5); + +impl BitAnd for ParseTreeFlags { + type Output = bool; + fn bitand(self, rhs: Self) -> Self::Output { + (self.0 & rhs.0) != 0 + } +} +impl BitOrAssign for ParseTreeFlags { + fn bitor_assign(&mut self, rhs: Self) { + self.0 |= rhs.0 + } +} + +#[derive(PartialEq, Eq)] +pub struct ParserTestErrorBits(u8); + +pub const PARSER_TEST_ERROR: ParserTestErrorBits = ParserTestErrorBits(1); +pub const PARSER_TEST_INCOMPLETE: ParserTestErrorBits = ParserTestErrorBits(2); + +impl BitAnd for ParserTestErrorBits { + type Output = bool; + fn bitand(self, rhs: Self) -> Self::Output { + (self.0 & rhs.0) != 0 + } +} +impl BitOrAssign for ParserTestErrorBits { + fn bitor_assign(&mut self, rhs: Self) { + self.0 |= rhs.0 + } +} + +#[cxx::bridge] +mod parse_constants_ffi { + extern "C++" { + include!("wutil.h"); + type wcharz_t = super::wcharz_t; + } + + /// A range of source code. + #[derive(PartialEq, Eq)] + struct SourceRange { + start: u32, + length: u32, + } + + extern "Rust" { + fn end(self: &SourceRange) -> u32; + fn contains_inclusive(self: &SourceRange, loc: u32) -> bool; + } + + /// IMPORTANT: If the following enum table is modified you must also update token_type_description below. + /// TODO above comment can be removed when we drop the FFI and get real enums. + enum ParseTokenType { + invalid = 1, + + // Terminal types. + string, + pipe, + redirection, + background, + andand, + oror, + end, + // Special terminal type that means no more tokens forthcoming. + terminate, + // Very special terminal types that don't appear in the production list. + error, + tokenizer_error, + comment, + } + + #[repr(u8)] + enum ParseKeyword { + // 'none' is not a keyword, it is a sentinel indicating nothing. + none, + + kw_and, + kw_begin, + kw_builtin, + kw_case, + kw_command, + kw_else, + kw_end, + kw_exclam, + kw_exec, + kw_for, + kw_function, + kw_if, + kw_in, + kw_not, + kw_or, + kw_switch, + kw_time, + kw_while, + } + + extern "Rust" { + fn token_type_description(token_type: ParseTokenType) -> wcharz_t; + fn keyword_description(keyword: ParseKeyword) -> wcharz_t; + fn keyword_from_string(s: wcharz_t) -> ParseKeyword; + } + + // Statement decorations like 'command' or 'exec'. + enum StatementDecoration { + none, + command, + builtin, + exec, + } + + // Parse error code list. + enum ParseErrorCode { + none, + + // Matching values from enum parser_error. + syntax, + cmdsubst, + + generic, // unclassified error types + + // Tokenizer errors. + tokenizer_unterminated_quote, + tokenizer_unterminated_subshell, + tokenizer_unterminated_slice, + tokenizer_unterminated_escape, + tokenizer_other, + + unbalancing_end, // end outside of block + unbalancing_else, // else outside of if + unbalancing_case, // case outside of switch + bare_variable_assignment, // a=b without command + andor_in_pipeline, // "and" or "or" after a pipe + } + + struct parse_error_t { + text: UniquePtr, + code: ParseErrorCode, + source_start: usize, + source_length: usize, + } + + extern "Rust" { + type ParseError; + fn code(self: &ParseError) -> ParseErrorCode; + fn source_start(self: &ParseError) -> usize; + fn text(self: &ParseError) -> UniquePtr; + + #[cxx_name = "describe"] + fn describe_ffi( + self: &ParseError, + src: &CxxWString, + is_interactive: bool, + ) -> UniquePtr; + #[cxx_name = "describe_with_prefix"] + fn describe_with_prefix_ffi( + self: &ParseError, + src: &CxxWString, + prefix: &CxxWString, + is_interactive: bool, + skip_caret: bool, + ) -> UniquePtr; + + fn describe_with_prefix( + self: &parse_error_t, + src: &CxxWString, + prefix: &CxxWString, + is_interactive: bool, + skip_caret: bool, + ) -> UniquePtr; + + type ParseErrorList; + fn new_parse_error_list() -> Box; + #[cxx_name = "offset_source_start"] + fn offset_source_start_ffi(self: &mut ParseErrorList, amt: usize); + fn size(self: &ParseErrorList) -> usize; + fn at(self: &ParseErrorList, offset: usize) -> *const ParseError; + fn empty(self: &ParseErrorList) -> bool; + fn push_back(self: &mut ParseErrorList, error: &parse_error_t); + fn append(self: &mut ParseErrorList, other: *mut ParseErrorList); + fn erase(self: &mut ParseErrorList, index: usize); + fn clear(self: &mut ParseErrorList); + } + + extern "Rust" { + #[cxx_name = "token_type_user_presentable_description"] + fn token_type_user_presentable_description_ffi( + type_: ParseTokenType, + keyword: ParseKeyword, + ) -> UniquePtr; + } + + // The location of a pipeline. + enum PipelinePosition { + none, // not part of a pipeline + first, // first command in a pipeline + subsequent, // second or further command in a pipeline + } +} + +pub use parse_constants_ffi::{ + parse_error_t, ParseErrorCode, ParseKeyword, ParseTokenType, SourceRange, +}; + +impl SourceRange { + fn end(&self) -> SourceOffset { + self.start.checked_add(self.length).expect("Overflow") + } + + // \return true if a location is in this range, including one-past-the-end. + fn contains_inclusive(&self, loc: SourceOffset) -> bool { + self.start <= loc && loc - self.start <= self.length + } +} + +impl From for &'static wstr { + #[widestrs] + fn from(token_type: ParseTokenType) -> Self { + match token_type { + ParseTokenType::comment => "ParseTokenType::comment"L, + ParseTokenType::error => "ParseTokenType::error"L, + ParseTokenType::tokenizer_error => "ParseTokenType::tokenizer_error"L, + ParseTokenType::background => "ParseTokenType::background"L, + ParseTokenType::end => "ParseTokenType::end"L, + ParseTokenType::pipe => "ParseTokenType::pipe"L, + ParseTokenType::redirection => "ParseTokenType::redirection"L, + ParseTokenType::string => "ParseTokenType::string"L, + ParseTokenType::andand => "ParseTokenType::andand"L, + ParseTokenType::oror => "ParseTokenType::oror"L, + ParseTokenType::terminate => "ParseTokenType::terminate"L, + ParseTokenType::invalid => "ParseTokenType::invalid"L, + _ => "unknown token type"L, + } + } +} + +fn token_type_description(token_type: ParseTokenType) -> wcharz_t { + let s: &'static wstr = token_type.into(); + wcharz!(s) +} + +impl From for &'static wstr { + #[widestrs] + fn from(keyword: ParseKeyword) -> Self { + match keyword { + ParseKeyword::kw_exclam => "!"L, + ParseKeyword::kw_and => "and"L, + ParseKeyword::kw_begin => "begin"L, + ParseKeyword::kw_builtin => "builtin"L, + ParseKeyword::kw_case => "case"L, + ParseKeyword::kw_command => "command"L, + ParseKeyword::kw_else => "else"L, + ParseKeyword::kw_end => "end"L, + ParseKeyword::kw_exec => "exec"L, + ParseKeyword::kw_for => "for"L, + ParseKeyword::kw_function => "function"L, + ParseKeyword::kw_if => "if"L, + ParseKeyword::kw_in => "in"L, + ParseKeyword::kw_not => "not"L, + ParseKeyword::kw_or => "or"L, + ParseKeyword::kw_switch => "switch"L, + ParseKeyword::kw_time => "time"L, + ParseKeyword::kw_while => "while"L, + _ => "unknown_keyword"L, + } + } +} + +fn keyword_description(keyword: ParseKeyword) -> wcharz_t { + let s: &'static wstr = keyword.into(); + wcharz!(s) +} + +impl From<&wstr> for ParseKeyword { + fn from(s: &wstr) -> Self { + let s: Vec = s.encode_utf8().collect(); + match unsafe { std::str::from_utf8_unchecked(&s) } { + "!" => ParseKeyword::kw_exclam, + "and" => ParseKeyword::kw_and, + "begin" => ParseKeyword::kw_begin, + "builtin" => ParseKeyword::kw_builtin, + "case" => ParseKeyword::kw_case, + "command" => ParseKeyword::kw_command, + "else" => ParseKeyword::kw_else, + "end" => ParseKeyword::kw_end, + "exec" => ParseKeyword::kw_exec, + "for" => ParseKeyword::kw_for, + "function" => ParseKeyword::kw_function, + "if" => ParseKeyword::kw_if, + "in" => ParseKeyword::kw_in, + "not" => ParseKeyword::kw_not, + "or" => ParseKeyword::kw_or, + "switch" => ParseKeyword::kw_switch, + "time" => ParseKeyword::kw_time, + "while" => ParseKeyword::kw_while, + _ => ParseKeyword::none, + } + } +} + +fn keyword_from_string<'a>(s: impl Into<&'a wstr>) -> ParseKeyword { + let s: &wstr = s.into(); + ParseKeyword::from(s) +} + +#[derive(Clone)] +struct ParseError { + /// Text of the error. + text: WString, + /// Code for the error. + code: ParseErrorCode, + /// Offset and length of the token in the source code that triggered this error. + source_start: usize, + source_length: usize, +} + +impl Default for ParseError { + fn default() -> ParseError { + ParseError { + text: L!("").to_owned(), + code: ParseErrorCode::none, + source_start: 0, + source_length: 0, + } + } +} + +impl ParseError { + /// Return a string describing the error, suitable for presentation to the user. If + /// is_interactive is true, the offending line with a caret is printed as well. + pub fn describe(self: &ParseError, src: &wstr, is_interactive: bool) -> WString { + self.describe_with_prefix(src, L!(""), is_interactive, false) + } + + /// Return a string describing the error, suitable for presentation to the user, with the given + /// prefix. If skip_caret is false, the offending line with a caret is printed as well. + pub fn describe_with_prefix( + self: &ParseError, + src: &wstr, + prefix: &wstr, + is_interactive: bool, + skip_caret: bool, + ) -> WString { + let mut result = prefix.to_owned(); + let context = wstr::from_char_slice( + &src.as_char_slice()[self.source_start..self.source_start + self.source_length], + ); + // Some errors don't have their message passed in, so we construct them here. + // This affects e.g. `eval "a=(foo)"` + match self.code { + ParseErrorCode::andor_in_pipeline => { + result += wstr::from_char_slice( + wgettext_fmt!(INVALID_PIPELINE_CMD_ERR_MSG, context).as_char_slice(), + ); + } + ParseErrorCode::bare_variable_assignment => { + let assignment_src = context; + #[allow(clippy::explicit_auto_deref)] + let equals_pos = variable_assignment_equals_pos(assignment_src).unwrap(); + let variable = &assignment_src[..equals_pos]; + let value = &assignment_src[equals_pos + 1..]; + result += wstr::from_char_slice( + wgettext_fmt!(ERROR_BAD_COMMAND_ASSIGN_ERR_MSG, variable, value) + .as_char_slice(), + ); + } + _ => { + if skip_caret && self.text.is_empty() { + return L!("").to_owned(); + } + result += wstr::from_char_slice(self.text.as_char_slice()); + } + } + + let mut start = self.source_start; + let mut len = self.source_length; + if start >= src.len() { + // If we are past the source, we clamp it to the end. + start = src.len() - 1; + len = 0; + } + + if start + len > src.len() { + len = src.len() - self.source_start; + } + + if skip_caret { + return result; + } + + // Locate the beginning of this line of source. + let mut line_start = 0; + + // Look for a newline prior to source_start. If we don't find one, start at the beginning of + // the string; otherwise start one past the newline. Note that source_start may itself point + // at a newline; we want to find the newline before it. + if start > 0 { + let prefix = &src.as_char_slice()[..start]; + let newline_left_of_start = prefix.iter().rev().position(|c| *c == '\n'); + if let Some(left_of_start) = newline_left_of_start { + line_start = start - left_of_start; + } + } + // Look for the newline after the source range. If the source range itself includes a + // newline, that's the one we want, so start just before the end of the range. + let last_char_in_range = if len == 0 { start } else { start + len - 1 }; + let line_end = src.as_char_slice()[last_char_in_range..] + .iter() + .position(|c| *c == '\n') + .map(|pos| pos + last_char_in_range) + .unwrap_or(src.len()); + + assert!(line_end >= line_start); + assert!(start >= line_start); + + // Don't include the caret and line if we're interactive and this is the first line, because + // then it's obvious. + let interactive_skip_caret = is_interactive && start == 0; + if interactive_skip_caret { + return result; + } + + // Append the line of text. + if !result.is_empty() { + result += "\n"; + } + result += wstr::from_char_slice(&src.as_char_slice()[line_start..line_end]); + + // Append the caret line. The input source may include tabs; for that reason we + // construct a "caret line" that has tabs in corresponding positions. + let mut caret_space_line = WString::new(); + caret_space_line.reserve(start - line_start); + for i in line_start..start { + let wc = src.as_char_slice()[i]; + if wc == '\t' { + caret_space_line += "\t"; + } else if wc == '\n' { + // It's possible that the start points at a newline itself. In that case, + // pretend it's a space. We only expect this to be at the end of the string. + caret_space_line += " "; + } else { + let width = fish_wcwidth(wc.into()).0; + if width > 0 { + caret_space_line += " ".repeat(width as usize).as_str(); + } + } + } + result += "\n"; + result += wstr::from_char_slice(caret_space_line.as_char_slice()); + result += "^"; + if len > 1 { + // Add a squiggle under the error location. + // We do it like this + // ^~~^ + // With a "^" under the start and end, and squiggles in-between. + let width = fish_wcswidth(unsafe { src.as_ptr().add(start) }, len).0; + if width >= 2 { + // Subtract one for each of the carets - this is important in case + // the starting char has a width of > 1. + result += "~".repeat(width as usize - 2).as_str(); + result += "^"; + } + } + result + } +} + +impl From<&parse_error_t> for ParseError { + fn from(error: &parse_error_t) -> Self { + ParseError { + text: error.text.from_ffi(), + code: error.code, + source_start: error.source_start, + source_length: error.source_length, + } + } +} + +impl parse_error_t { + fn describe_with_prefix( + self: &parse_error_t, + src: &CxxWString, + prefix: &CxxWString, + is_interactive: bool, + skip_caret: bool, + ) -> UniquePtr { + ParseError::from(self).describe_with_prefix_ffi(src, prefix, is_interactive, skip_caret) + } +} + +impl ParseError { + fn code(&self) -> ParseErrorCode { + self.code + } + fn source_start(&self) -> usize { + self.source_start + } + fn text(&self) -> UniquePtr { + self.text.to_ffi() + } + + fn describe_ffi( + self: &ParseError, + src: &CxxWString, + is_interactive: bool, + ) -> UniquePtr { + self.describe(&src.from_ffi(), is_interactive).to_ffi() + } + + fn describe_with_prefix_ffi( + self: &ParseError, + src: &CxxWString, + prefix: &CxxWString, + is_interactive: bool, + skip_caret: bool, + ) -> UniquePtr { + self.describe_with_prefix( + &src.from_ffi(), + &prefix.from_ffi(), + is_interactive, + skip_caret, + ) + .to_ffi() + } +} + +#[widestrs] +pub fn token_type_user_presentable_description( + type_: ParseTokenType, + keyword: ParseKeyword, +) -> WString { + if keyword != ParseKeyword::none { + return sprintf!("keyword: '%ls'"L, Into::<&'static wstr>::into(keyword)); + } + match type_ { + ParseTokenType::string => "a string"L.to_owned(), + ParseTokenType::pipe => "a pipe"L.to_owned(), + ParseTokenType::redirection => "a redirection"L.to_owned(), + ParseTokenType::background => "a '&'"L.to_owned(), + ParseTokenType::andand => "'&&'"L.to_owned(), + ParseTokenType::oror => "'||'"L.to_owned(), + ParseTokenType::end => "end of the statement"L.to_owned(), + ParseTokenType::terminate => "end of the input"L.to_owned(), + ParseTokenType::error => "a parse error"L.to_owned(), + ParseTokenType::tokenizer_error => "an incomplete token"L.to_owned(), + ParseTokenType::comment => "a comment"L.to_owned(), + _ => sprintf!("a %ls"L, Into::<&'static wstr>::into(type_)), + } +} + +fn token_type_user_presentable_description_ffi( + type_: ParseTokenType, + keyword: ParseKeyword, +) -> UniquePtr { + token_type_user_presentable_description(type_, keyword).to_ffi() +} + +/// TODO This should be type alias once we drop the FFI. +pub struct ParseErrorList(Vec); + +/// Helper function to offset error positions by the given amount. This is used when determining +/// errors in a substring of a larger source buffer. +pub fn parse_error_offset_source_start(errors: &mut ParseErrorList, amt: usize) { + if amt > 0 { + for ref mut error in errors.0.iter_mut() { + // Preserve the special meaning of -1 as 'unknown'. + if error.source_start != SOURCE_LOCATION_UNKNOWN { + error.source_start += amt; + } + } + } +} + +fn new_parse_error_list() -> Box { + Box::new(ParseErrorList(Vec::new())) +} + +impl ParseErrorList { + fn offset_source_start_ffi(&mut self, amt: usize) { + parse_error_offset_source_start(self, amt) + } + + fn size(&self) -> usize { + self.0.len() + } + + fn at(&self, offset: usize) -> *const ParseError { + &self.0[offset] + } + + fn empty(&self) -> bool { + self.0.is_empty() + } + + fn push_back(&mut self, error: &parse_error_t) { + self.0.push(error.into()) + } + + fn append(&mut self, other: *mut ParseErrorList) { + self.0.append(&mut (unsafe { &*other }.0.clone())); + } + + fn erase(&mut self, index: usize) { + self.0.remove(index); + } + + fn clear(&mut self) { + self.0.clear() + } +} + +/// Maximum number of function calls. +pub const FISH_MAX_STACK_DEPTH: usize = 128; + +/// Maximum number of nested string substitutions (in lieu of evals) +/// Reduced under TSAN: our CI test creates 500 jobs and this is very slow with TSAN. +#[cfg(feature = "FISH_TSAN_WORKAROUNDS")] +pub const FISH_MAX_EVAL_DEPTH: usize = 250; +#[cfg(not(feature = "FISH_TSAN_WORKAROUNDS"))] +pub const FISH_MAX_EVAL_DEPTH: usize = 500; + +/// Error message on a function that calls itself immediately. +pub const INFINITE_FUNC_RECURSION_ERR_MSG: &str = + "The function '%ls' calls itself immediately, which would result in an infinite loop."; + +/// Error message on reaching maximum call stack depth. +pub const CALL_STACK_LIMIT_EXCEEDED_ERR_MSG: &str = + "The call stack limit has been exceeded. Do you have an accidental infinite loop?"; + +/// Error message when encountering an unknown builtin name. +pub const UNKNOWN_BUILTIN_ERR_MSG: &str = "Unknown builtin '%ls'"; + +/// Error message when encountering a failed expansion, e.g. for the variable name in for loops. +pub const FAILED_EXPANSION_VARIABLE_NAME_ERR_MSG: &str = "Unable to expand variable name '%ls'"; + +/// Error message when encountering an illegal file descriptor. +pub const ILLEGAL_FD_ERR_MSG: &str = "Illegal file descriptor in redirection '%ls'"; + +/// Error message for wildcards with no matches. +pub const WILDCARD_ERR_MSG: &str = "No matches for wildcard '%ls'. See `help wildcards-globbing`."; + +/// Error when using break outside of loop. +pub const INVALID_BREAK_ERR_MSG: &str = "'break' while not inside of loop"; + +/// Error when using continue outside of loop. +pub const INVALID_CONTINUE_ERR_MSG: &str = "'continue' while not inside of loop"; + +/// Error message when a command may not be in a pipeline. +pub const INVALID_PIPELINE_CMD_ERR_MSG: &str = "The '%ls' command can not be used in a pipeline"; + +// Error messages. The number is a reminder of how many format specifiers are contained. + +/// Error for $^. +pub const ERROR_BAD_VAR_CHAR1: &str = "$%lc is not a valid variable in fish."; + +/// Error for ${a}. +pub const ERROR_BRACKETED_VARIABLE1: &str = + "Variables cannot be bracketed. In fish, please use {$%ls}."; + +/// Error for "${a}". +pub const ERROR_BRACKETED_VARIABLE_QUOTED1: &str = + "Variables cannot be bracketed. In fish, please use \"$%ls\"."; + +/// Error issued on $?. +pub const ERROR_NOT_STATUS: &str = "$? is not the exit status. In fish, please use $status."; + +/// Error issued on $$. +pub const ERROR_NOT_PID: &str = "$$ is not the pid. In fish, please use $fish_pid."; + +/// Error issued on $#. +pub const ERROR_NOT_ARGV_COUNT: &str = "$# is not supported. In fish, please use 'count $argv'."; + +/// Error issued on $@. +pub const ERROR_NOT_ARGV_AT: &str = "$@ is not supported. In fish, please use $argv."; + +/// Error issued on $*. +pub const ERROR_NOT_ARGV_STAR: &str = "$* is not supported. In fish, please use $argv."; + +/// Error issued on $. +pub const ERROR_NO_VAR_NAME: &str = "Expected a variable name after this $."; + +/// Error message for Posix-style assignment: foo=bar. +pub const ERROR_BAD_COMMAND_ASSIGN_ERR_MSG: &str = + "Unsupported use of '='. In fish, please use 'set %ls %ls'."; + +/// Error message for a command like `time foo &`. +pub const ERROR_TIME_BACKGROUND: &str = + "'time' is not supported for background jobs. Consider using 'command time'."; + +/// Error issued on { echo; echo }. +pub const ERROR_NO_BRACE_GROUPING: &str = + "'{ ... }' is not supported for grouping commands. Please use 'begin; ...; end'"; diff --git a/fish-rust/src/tokenizer.rs b/fish-rust/src/tokenizer.rs new file mode 100644 index 000000000..39114a3ef --- /dev/null +++ b/fish-rust/src/tokenizer.rs @@ -0,0 +1,49 @@ +//! A specialized tokenizer for tokenizing the fish language. In the future, the tokenizer should be +//! extended to support marks, tokenizing multiple strings and disposing of unused string segments. +use crate::ffi::{valid_var_name_char, wchar_t}; +use crate::wchar::wstr; +use crate::wchar_ffi::WCharFromFFI; +use cxx::{CxxWString, SharedPtr}; + +#[cxx::bridge] +mod tokenizer_ffi { + extern "Rust" { + #[cxx_name = "variable_assignment_equals_pos"] + fn variable_assignment_equals_pos_ffi(txt: &CxxWString) -> SharedPtr; + } +} + +/// The position of the equal sign in a variable assignment like foo=bar. +/// +/// Return the location of the equals sign, or none if the string does +/// not look like a variable assignment like FOO=bar. The detection +/// works similar as in some POSIX shells: only letters and numbers qre +/// allowed on the left hand side, no quotes or escaping. +pub fn variable_assignment_equals_pos(txt: &wstr) -> Option { + let mut found_potential_variable = false; + + // TODO bracket indexing + for (i, c) in txt.chars().enumerate() { + if !found_potential_variable { + if !valid_var_name_char(c as wchar_t) { + return None; + } + found_potential_variable = true; + } else { + if c == '=' { + return Some(i); + } + if !valid_var_name_char(c as wchar_t) { + return None; + } + } + } + None +} + +fn variable_assignment_equals_pos_ffi(txt: &CxxWString) -> SharedPtr { + match variable_assignment_equals_pos(&txt.from_ffi()) { + Some(p) => SharedPtr::new(p), + None => SharedPtr::null(), + } +} diff --git a/src/ast.cpp b/src/ast.cpp index b461f528d..f14bf3e7b 100644 --- a/src/ast.cpp +++ b/src/ast.cpp @@ -31,7 +31,7 @@ static tok_flags_t tokenizer_flags_from_parse_flags(parse_tree_flags_t flags) { // Given an expanded string, returns any keyword it matches. static parse_keyword_t keyword_with_name(const wcstring &name) { - return str_to_enum(name.c_str(), keyword_enum_map, keyword_enum_map_len); + return keyword_from_string(name.c_str()); } static bool is_keyword_char(wchar_t c) { @@ -177,7 +177,7 @@ class token_stream_t { result.has_dash_prefix = !text.empty() && text.at(0) == L'-'; result.is_help_argument = (text == L"-h" || text == L"--help"); result.is_newline = (result.type == parse_token_type_t::end && text == L"\n"); - result.may_be_variable_assignment = variable_assignment_equals_pos(text).has_value(); + result.may_be_variable_assignment = variable_assignment_equals_pos(text) != nullptr; result.tok_error = token.error; // These assertions are totally bogus. Basically our tokenizer works in size_t but we work @@ -396,13 +396,15 @@ static wcstring token_types_user_presentable_description( std::initializer_list types) { assert(types.size() > 0 && "Should not be empty list"); if (types.size() == 1) { - return token_type_user_presentable_description(*types.begin()); + return *token_type_user_presentable_description(*types.begin(), parse_keyword_t::none); } size_t idx = 0; wcstring res; for (parse_token_type_t type : types) { const wchar_t *optor = (idx++ ? L" or " : L""); - append_format(res, L"%ls%ls", optor, token_type_user_presentable_description(type).c_str()); + append_format( + res, L"%ls%ls", optor, + token_type_user_presentable_description(type, parse_keyword_t::none)->c_str()); } return res; } @@ -635,7 +637,7 @@ struct populator_t { if (out_errors_) { parse_error_t err; - err.text = vformat_string(fmt, va); + err.text = std::make_unique(vformat_string(fmt, va)); err.code = code; err.source_start = range.start; err.source_length = range.length; @@ -682,9 +684,10 @@ struct populator_t { "Should not attempt to consume terminate token"); auto tok = consume_any_token(); if (tok.type != type) { - parse_error(tok, parse_error_code_t::generic, _(L"Expected %ls, but found %ls"), - token_type_user_presentable_description(type).c_str(), - tok.user_presentable_description().c_str()); + parse_error( + tok, parse_error_code_t::generic, _(L"Expected %ls, but found %ls"), + token_type_user_presentable_description(type, parse_keyword_t::none)->c_str(), + tok.user_presentable_description().c_str()); return source_range_t{0, 0}; } return tok.range(); @@ -702,10 +705,11 @@ struct populator_t { // TODO: this is a crummy message if we get a tokenizer error, for example: // complete -c foo -a "'abc" if (this->top_type_ == type_t::freestanding_argument_list) { - this->parse_error( - tok, parse_error_code_t::generic, _(L"Expected %ls, but found %ls"), - token_type_user_presentable_description(parse_token_type_t::string).c_str(), - tok.user_presentable_description().c_str()); + this->parse_error(tok, parse_error_code_t::generic, _(L"Expected %ls, but found %ls"), + token_type_user_presentable_description(parse_token_type_t::string, + parse_keyword_t::none) + ->c_str(), + tok.user_presentable_description().c_str()); return; } @@ -1376,7 +1380,7 @@ wcstring ast_t::dump(const wcstring &orig) const { desc = L""; break; default: - desc = token_type_user_presentable_description(n->type); + desc = *token_type_user_presentable_description(n->type, parse_keyword_t::none); break; } append_format(result, L"%ls", desc.c_str()); diff --git a/src/builtins/complete.cpp b/src/builtins/complete.cpp index d01e747c4..8b781a16d 100644 --- a/src/builtins/complete.cpp +++ b/src/builtins/complete.cpp @@ -337,12 +337,12 @@ maybe_t builtin_complete(parser_t &parser, io_streams_t &streams, const wch } for (const auto &condition_string : condition) { - parse_error_list_t errors; - if (parse_util_detect_errors(condition_string, &errors)) { - for (const auto &error : errors) { + auto errors = new_parse_error_list(); + if (parse_util_detect_errors(condition_string, &*errors)) { + for (size_t i = 0; i < errors->size(); i++) { wcstring prefix(wcstring(cmd) + L": -n '" + condition_string + L"': "); - streams.err.append(error.describe_with_prefix(condition_string, prefix, - parser.is_interactive(), false)); + streams.err.append(*errors->at(i)->describe_with_prefix( + condition_string, prefix, parser.is_interactive(), false)); streams.err.push_back(L'\n'); } return STATUS_CMD_ERROR; diff --git a/src/complete.cpp b/src/complete.cpp index c90de110c..96e7445a0 100644 --- a/src/complete.cpp +++ b/src/complete.cpp @@ -1318,8 +1318,8 @@ cleanup_t completer_t::apply_var_assignments(const wcstring_list_t &var_assignme const expand_flags_t expand_flags = expand_flag::skip_cmdsubst; const block_t *block = ctx.parser->push_block(block_t::variable_assignment_block()); for (const wcstring &var_assign : var_assignments) { - maybe_t equals_pos = variable_assignment_equals_pos(var_assign); - assert(equals_pos.has_value() && "All variable assignments should have equals position"); + auto equals_pos = variable_assignment_equals_pos(var_assign); + assert(equals_pos && "All variable assignments should have equals position"); const wcstring variable_name = var_assign.substr(0, *equals_pos); const wcstring expression = var_assign.substr(*equals_pos + 1); @@ -1406,7 +1406,7 @@ void completer_t::walk_wrap_chain(const wcstring &cmd, const wcstring &cmdline, size_t wrapped_command_offset_in_wt = wcstring::npos; while (auto tok = tokenizer.next()) { wcstring tok_src = tok->get_source(wt); - if (variable_assignment_equals_pos(tok_src).has_value()) { + if (variable_assignment_equals_pos(tok_src)) { ad->var_assignments->push_back(std::move(tok_src)); } else { wrapped_command_offset_in_wt = tok->offset; @@ -1553,7 +1553,7 @@ void completer_t::perform_for_commandline(wcstring cmdline) { for (const tok_t &tok : tokens) { if (tok.location_in_or_at_end_of_source_range(cursor_pos)) break; wcstring tok_src = tok.get_source(cmdline); - if (!variable_assignment_equals_pos(tok_src).has_value()) break; + if (!variable_assignment_equals_pos(tok_src)) break; var_assignments.push_back(std::move(tok_src)); } tokens.erase(tokens.begin(), tokens.begin() + var_assignments.size()); @@ -1603,8 +1603,8 @@ void completer_t::perform_for_commandline(wcstring cmdline) { } if (cmd_tok.location_in_or_at_end_of_source_range(cursor_pos)) { - maybe_t equal_sign_pos = variable_assignment_equals_pos(current_token); - if (equal_sign_pos.has_value()) { + auto equal_sign_pos = variable_assignment_equals_pos(current_token); + if (equal_sign_pos) { complete_param_expand(current_token, true /* do_file */); return; } diff --git a/src/expand.cpp b/src/expand.cpp index 97ef1fc4e..6dc045c60 100644 --- a/src/expand.cpp +++ b/src/expand.cpp @@ -75,10 +75,10 @@ static void append_syntax_error(parse_error_list_t *errors, size_t source_start, va_list va; va_start(va, fmt); - error.text = vformat_string(fmt, va); + error.text = std::make_unique(vformat_string(fmt, va)); va_end(va); - errors->push_back(error); + errors->push_back(std::move(error)); } /// Append a cmdsub error to the given error list. But only do so if the error hasn't already been @@ -95,14 +95,14 @@ static void append_cmdsub_error(parse_error_list_t *errors, size_t source_start, va_list va; va_start(va, fmt); - error.text = vformat_string(fmt, va); + error.text = std::make_unique(vformat_string(fmt, va)); va_end(va); - for (const auto &it : *errors) { - if (error.text == it.text) return; + for (size_t i = 0; i < errors->size(); i++) { + if (*error.text == *errors->at(i)->text()) return; } - errors->push_back(error); + errors->push_back(std::move(error)); } /// Append an overflow error, when expansion produces too much data. @@ -113,7 +113,7 @@ static expand_result_t append_overflow_error(parse_error_list_t *errors, error.source_start = source_start; error.source_length = 0; error.code = parse_error_code_t::generic; - error.text = _(L"Expansion produced too many results"); + error.text = std::make_unique(_(L"Expansion produced too many results")); errors->push_back(std::move(error)); } return expand_result_t::make_error(STATUS_EXPAND_ERROR); diff --git a/src/fish.cpp b/src/fish.cpp index 12375b0b8..b602bd8e1 100644 --- a/src/fish.cpp +++ b/src/fish.cpp @@ -263,11 +263,11 @@ static int run_command_list(parser_t &parser, const std::vector &cm for (const auto &cmd : cmds) { wcstring cmd_wcs = str2wcstring(cmd); // Parse into an ast and detect errors. - parse_error_list_t errors; - auto ast = ast::ast_t::parse(cmd_wcs, parse_flag_none, &errors); + auto errors = new_parse_error_list(); + auto ast = ast::ast_t::parse(cmd_wcs, parse_flag_none, &*errors); bool errored = ast.errored(); if (!errored) { - errored = parse_util_detect_errors(ast, cmd_wcs, &errors); + errored = parse_util_detect_errors(ast, cmd_wcs, &*errors); } if (!errored) { // Construct a parsed source ref. @@ -277,7 +277,7 @@ static int run_command_list(parser_t &parser, const std::vector &cm parser.eval(ps, io); } else { wcstring sb; - parser.get_backtrace(cmd_wcs, errors, sb); + parser.get_backtrace(cmd_wcs, *errors, sb); std::fwprintf(stderr, L"%ls", sb.c_str()); } } diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index c69b095c9..3257ffced 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -2105,15 +2105,15 @@ static bool expand_test(const wchar_t *in, expand_flags_t flags, ...) { va_list va; bool res = true; wchar_t *arg; - parse_error_list_t errors; + auto errors = new_parse_error_list(); pwd_environment_t pwd{}; operation_context_t ctx{parser_t::principal_parser().shared(), pwd, no_cancel}; - if (expand_string(in, &output, flags, ctx, &errors) == expand_result_t::error) { - if (errors.empty()) { + if (expand_string(in, &output, flags, ctx, &*errors) == expand_result_t::error) { + if (errors->empty()) { err(L"Bug: Parse error reported but no error text found."); } else { - err(L"%ls", errors.at(0).describe(in, ctx.parser->is_interactive()).c_str()); + err(L"%ls", errors->at(0)->describe(in, ctx.parser->is_interactive())->c_str()); } return false; } @@ -2324,14 +2324,14 @@ static void test_expand_overflow() { int set = parser->vars().set(L"bigvar", ENV_LOCAL, std::move(vals)); do_test(set == ENV_OK); - parse_error_list_t errors; + auto errors = new_parse_error_list(); operation_context_t ctx{parser, parser->vars(), no_cancel}; // We accept only 1024 completions. completion_receiver_t output{1024}; - auto res = expand_string(expansion, &output, expand_flags_t{}, ctx, &errors); - do_test(!errors.empty()); + auto res = expand_string(expansion, &output, expand_flags_t{}, ctx, &*errors); + do_test(!errors->empty()); do_test(res == expand_result_t::error); parser->vars().pop(); @@ -4965,7 +4965,7 @@ static void test_new_parser_fuzzing() { wcstring src; src.reserve(128); - parse_error_list_t errors; + auto errors = new_parse_error_list(); double start = timef(); bool log_it = true; @@ -4989,7 +4989,7 @@ static void test_new_parser_fuzzing() { // Parse a statement, returning the command, args (joined by spaces), and the decoration. Returns // true if successful. static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *out_joined_args, - enum statement_decoration_t *out_deco) { + statement_decoration_t *out_deco) { using namespace ast; out_cmd->clear(); out_joined_args->clear(); @@ -5062,7 +5062,7 @@ static void test_new_parser_ll2() { wcstring src; wcstring cmd; wcstring args; - enum statement_decoration_t deco; + statement_decoration_t deco; } tests[] = {{L"echo hello", L"echo", L"hello", statement_decoration_t::none}, {L"command echo hello", L"echo", L"hello", statement_decoration_t::command}, {L"exec echo hello", L"echo", L"hello", statement_decoration_t::exec}, @@ -5079,7 +5079,7 @@ static void test_new_parser_ll2() { for (const auto &test : tests) { wcstring cmd, args; - enum statement_decoration_t deco = statement_decoration_t::none; + statement_decoration_t deco = statement_decoration_t::none; bool success = test_1_parse_ll2(test.src, &cmd, &args, &deco); if (!success) err(L"Parse of '%ls' failed on line %ld", test.cmd.c_str(), (long)__LINE__); if (cmd != test.cmd) @@ -5135,20 +5135,20 @@ static void test_new_parser_ad_hoc() { ast = ast_t::parse(L"a=", parse_flag_leave_unterminated); do_test(!ast.errored()); - parse_error_list_t errors; - ast = ast_t::parse(L"begin; echo (", parse_flag_leave_unterminated, &errors); - do_test(errors.size() == 1 && - errors.at(0).code == parse_error_code_t::tokenizer_unterminated_subshell); + auto errors = new_parse_error_list(); + ast = ast_t::parse(L"begin; echo (", parse_flag_leave_unterminated, &*errors); + do_test(errors->size() == 1 && + errors->at(0)->code() == parse_error_code_t::tokenizer_unterminated_subshell); - errors.clear(); - ast = ast_t::parse(L"for x in (", parse_flag_leave_unterminated, &errors); - do_test(errors.size() == 1 && - errors.at(0).code == parse_error_code_t::tokenizer_unterminated_subshell); + errors->clear(); + ast = ast_t::parse(L"for x in (", parse_flag_leave_unterminated, &*errors); + do_test(errors->size() == 1 && + errors->at(0)->code() == parse_error_code_t::tokenizer_unterminated_subshell); - errors.clear(); - ast = ast_t::parse(L"begin; echo '", parse_flag_leave_unterminated, &errors); - do_test(errors.size() == 1 && - errors.at(0).code == parse_error_code_t::tokenizer_unterminated_quote); + errors->clear(); + ast = ast_t::parse(L"begin; echo '", parse_flag_leave_unterminated, &*errors); + do_test(errors->size() == 1 && + errors->at(0)->code() == parse_error_code_t::tokenizer_unterminated_quote); } static void test_new_parser_errors() { @@ -5179,24 +5179,24 @@ static void test_new_parser_errors() { const wcstring src = test.src; parse_error_code_t expected_code = test.code; - parse_error_list_t errors; - auto ast = ast::ast_t::parse(src, parse_flag_none, &errors); + auto errors = new_parse_error_list(); + auto ast = ast::ast_t::parse(src, parse_flag_none, &*errors); if (!ast.errored()) { err(L"Source '%ls' was expected to fail to parse, but succeeded", src.c_str()); } - if (errors.size() != 1) { + if (errors->size() != 1) { err(L"Source '%ls' was expected to produce 1 error, but instead produced %lu errors", - src.c_str(), errors.size()); - for (const auto &err : errors) { - fprintf(stderr, "%ls\n", err.describe(src, false).c_str()); + src.c_str(), errors->size()); + for (size_t i = 0; i < errors->size(); i++) { + fprintf(stderr, "%ls\n", errors->at(i)->describe(src, false)->c_str()); } - } else if (errors.at(0).code != expected_code) { + } else if (errors->at(0)->code() != expected_code) { err(L"Source '%ls' was expected to produce error code %lu, but instead produced error " L"code %lu", - src.c_str(), expected_code, (unsigned long)errors.at(0).code); - for (const auto &error : errors) { - err(L"\t\t%ls", error.describe(src, true).c_str()); + src.c_str(), expected_code, (unsigned long)errors->at(0)->code()); + for (size_t i = 0; i < errors->size(); i++) { + err(L"\t\t%ls", errors->at(i)->describe(src, true)->c_str()); } } } @@ -5289,13 +5289,14 @@ static void test_error_messages() { {L"echo \"foo\"$\"bar\"", ERROR_NO_VAR_NAME}, {L"echo foo $ bar", ERROR_NO_VAR_NAME}}; - parse_error_list_t errors; + auto errors = new_parse_error_list(); for (const auto &test : error_tests) { - errors.clear(); - parse_util_detect_errors(test.src, &errors); - do_test(!errors.empty()); - if (!errors.empty()) { - do_test1(string_matches_format(errors.at(0).text, test.error_text_format), test.src); + errors->clear(); + parse_util_detect_errors(test.src, &*errors); + do_test(!errors->empty()); + if (!errors->empty()) { + do_test1(string_matches_format(*errors->at(0)->text(), test.error_text_format), + test.src); } } } diff --git a/src/highlight.cpp b/src/highlight.cpp index 292570c16..f424c3057 100644 --- a/src/highlight.cpp +++ b/src/highlight.cpp @@ -394,7 +394,7 @@ rgb_color_t highlight_color_resolver_t::resolve_spec(const highlight_spec_t &hig return iter->second; } -static bool command_is_valid(const wcstring &cmd, enum statement_decoration_t decoration, +static bool command_is_valid(const wcstring &cmd, statement_decoration_t decoration, const wcstring &working_directory, const environment_t &vars); static bool has_expand_reserved(const wcstring &str) { @@ -1057,7 +1057,7 @@ void highlighter_t::visit(const ast::variable_assignment_t &varas) { color_as_argument(varas); // Highlight the '=' in variable assignments as an operator. auto where = variable_assignment_equals_pos(varas.source(this->buff)); - if (where.has_value()) { + if (where) { size_t equals_loc = varas.source_range().start + *where; this->color_array.at(equals_loc) = highlight_role_t::operat; auto var_name = varas.source(this->buff).substr(0, *where); @@ -1079,7 +1079,7 @@ void highlighter_t::visit(const ast::decorated_statement_t &stmt) { if (!this->io_still_ok()) { // We cannot check if the command is invalid, so just assume it's valid. is_valid_cmd = true; - } else if (variable_assignment_equals_pos(*cmd).has_value()) { + } else if (variable_assignment_equals_pos(*cmd)) { is_valid_cmd = true; } else { // Check to see if the command is valid. @@ -1305,7 +1305,7 @@ highlighter_t::color_array_t highlighter_t::highlight() { } // namespace /// Determine if a command is valid. -static bool command_is_valid(const wcstring &cmd, enum statement_decoration_t decoration, +static bool command_is_valid(const wcstring &cmd, statement_decoration_t decoration, const wcstring &working_directory, const environment_t &vars) { // Determine which types we check, based on the decoration. bool builtin_ok = true, function_ok = true, abbreviation_ok = true, command_ok = true, diff --git a/src/history.cpp b/src/history.cpp index 6ce463b0e..567316ec7 100644 --- a/src/history.cpp +++ b/src/history.cpp @@ -586,8 +586,8 @@ void history_impl_t::populate_from_file_contents() { if (file_contents) { size_t cursor = 0; maybe_t offset; - while ((offset = - file_contents->offset_of_next_item(&cursor, boundary_timestamp)).has_value()) { + while ((offset = file_contents->offset_of_next_item(&cursor, boundary_timestamp)) + .has_value()) { // Remember this item. old_item_offsets.push_back(*offset); } @@ -1205,9 +1205,9 @@ static bool should_import_bash_history_line(const wcstring &line) { if (ast::ast_t::parse(line).errored()) return false; // In doing this test do not allow incomplete strings. Hence the "false" argument. - parse_error_list_t errors; - parse_util_detect_errors(line, &errors); - return errors.empty(); + auto errors = new_parse_error_list(); + parse_util_detect_errors(line, &*errors); + return errors->empty(); } /// Import a bash command history file. Bash's history format is very simple: just lines with #s for diff --git a/src/parse_constants.h b/src/parse_constants.h index 5400c4c94..a7c3e75e6 100644 --- a/src/parse_constants.h +++ b/src/parse_constants.h @@ -2,10 +2,7 @@ #ifndef FISH_PARSE_CONSTANTS_H #define FISH_PARSE_CONSTANTS_H -#include "config.h" - #include "common.h" -#include "enum_map.h" using source_offset_t = uint32_t; constexpr source_offset_t SOURCE_OFFSET_INVALID = static_cast(-1); @@ -16,33 +13,31 @@ constexpr source_offset_t SOURCE_OFFSET_INVALID = static_cast(- exit_without_destructors(-1); \ } while (0) -// A range of source code. +#if INCLUDE_RUST_HEADERS + +#include "parse_constants.rs.h" + +using source_range_t = SourceRange; +using parse_token_type_t = ParseTokenType; +using parse_keyword_t = ParseKeyword; +using statement_decoration_t = StatementDecoration; +using parse_error_code_t = ParseErrorCode; +using pipeline_position_t = PipelinePosition; +using parse_error_list_t = ParseErrorList; + +#else + +// Hacks to allow us to compile without Rust headers. + +#include "config.h" + struct source_range_t { source_offset_t start; source_offset_t length; - - source_offset_t end() const { - assert(start + length >= start && "Overflow"); - return start + length; - } - - bool operator==(const source_range_t &rhs) const { - return start == rhs.start && length == rhs.length; - } - - bool operator!=(const source_range_t &rhs) const { return !(*this == rhs); } - - // \return true if a location is in this range, including one-past-the-end. - bool contains_inclusive(source_offset_t loc) const { - return start <= loc && loc - start <= length; - } }; -// IMPORTANT: If the following enum table is modified you must also update token_enum_map below. enum class parse_token_type_t : uint8_t { invalid = 1, - - // Terminal types. string, pipe, redirection, @@ -50,37 +45,14 @@ enum class parse_token_type_t : uint8_t { andand, oror, end, - // Special terminal type that means no more tokens forthcoming. terminate, - // Very special terminal types that don't appear in the production list. error, tokenizer_error, comment, }; -const enum_map token_enum_map[] = { - {parse_token_type_t::comment, L"parse_token_type_t::comment"}, - {parse_token_type_t::error, L"parse_token_type_t::error"}, - {parse_token_type_t::tokenizer_error, L"parse_token_type_t::tokenizer_error"}, - {parse_token_type_t::background, L"parse_token_type_t::background"}, - {parse_token_type_t::end, L"parse_token_type_t::end"}, - {parse_token_type_t::pipe, L"parse_token_type_t::pipe"}, - {parse_token_type_t::redirection, L"parse_token_type_t::redirection"}, - {parse_token_type_t::string, L"parse_token_type_t::string"}, - {parse_token_type_t::andand, L"parse_token_type_t::andand"}, - {parse_token_type_t::oror, L"parse_token_type_t::oror"}, - {parse_token_type_t::terminate, L"parse_token_type_t::terminate"}, - {parse_token_type_t::invalid, L"parse_token_type_t::invalid"}, - {parse_token_type_t::invalid, nullptr}}; - -// IMPORTANT: If the following enum is modified you must update the corresponding keyword_enum_map -// array below. -// -// IMPORTANT: These enums must start at zero. enum class parse_keyword_t : uint8_t { - // 'none' is not a keyword, it is a sentinel indicating nothing. none, - kw_and, kw_begin, kw_builtin, @@ -101,28 +73,6 @@ enum class parse_keyword_t : uint8_t { kw_while, }; -const enum_map keyword_enum_map[] = {{parse_keyword_t::kw_exclam, L"!"}, - {parse_keyword_t::kw_and, L"and"}, - {parse_keyword_t::kw_begin, L"begin"}, - {parse_keyword_t::kw_builtin, L"builtin"}, - {parse_keyword_t::kw_case, L"case"}, - {parse_keyword_t::kw_command, L"command"}, - {parse_keyword_t::kw_else, L"else"}, - {parse_keyword_t::kw_end, L"end"}, - {parse_keyword_t::kw_exec, L"exec"}, - {parse_keyword_t::kw_for, L"for"}, - {parse_keyword_t::kw_function, L"function"}, - {parse_keyword_t::kw_if, L"if"}, - {parse_keyword_t::kw_in, L"in"}, - {parse_keyword_t::kw_not, L"not"}, - {parse_keyword_t::kw_or, L"or"}, - {parse_keyword_t::kw_switch, L"switch"}, - {parse_keyword_t::kw_time, L"time"}, - {parse_keyword_t::kw_while, L"while"}, - {parse_keyword_t::none, nullptr}}; -#define keyword_enum_map_len (sizeof keyword_enum_map / sizeof *keyword_enum_map) - -// Statement decorations like 'command' or 'exec'. enum class statement_decoration_t : uint8_t { none, command, @@ -130,46 +80,38 @@ enum class statement_decoration_t : uint8_t { exec, }; -// Parse error code list. enum class parse_error_code_t : uint8_t { none, - - // Matching values from enum parser_error. syntax, cmdsubst, - - generic, // unclassified error types - - // Tokenizer errors. + generic, tokenizer_unterminated_quote, tokenizer_unterminated_subshell, tokenizer_unterminated_slice, tokenizer_unterminated_escape, tokenizer_other, - - unbalancing_end, // end outside of block - unbalancing_else, // else outside of if - unbalancing_case, // case outside of switch - bare_variable_assignment, // a=b without command - andor_in_pipeline, // "and" or "or" after a pipe + unbalancing_end, + unbalancing_else, + unbalancing_case, + bare_variable_assignment, + andor_in_pipeline, }; +struct ParseErrorList; +using parse_error_list_t = ParseErrorList; + +#endif + +// Special source_start value that means unknown. +#define SOURCE_LOCATION_UNKNOWN (static_cast(-1)) + enum { parse_flag_none = 0, - - /// Attempt to build a "parse tree" no matter what. This may result in a 'forest' of - /// disconnected trees. This is intended to be used by syntax highlighting. parse_flag_continue_after_error = 1 << 0, - /// Include comment tokens. parse_flag_include_comments = 1 << 1, - /// Indicate that the tokenizer should accept incomplete tokens */ parse_flag_accept_incomplete_tokens = 1 << 2, - /// Indicate that the parser should not generate the terminate token, allowing an 'unfinished' - /// tree where some nodes may have no productions. parse_flag_leave_unterminated = 1 << 3, - /// Indicate that the parser should generate job_list entries for blank lines. parse_flag_show_blank_lines = 1 << 4, - /// Indicate that extra semis should be generated. parse_flag_show_extra_semis = 1 << 5, }; using parse_tree_flags_t = uint8_t; @@ -177,41 +119,6 @@ using parse_tree_flags_t = uint8_t; enum { PARSER_TEST_ERROR = 1, PARSER_TEST_INCOMPLETE = 2 }; using parser_test_error_bits_t = uint8_t; -struct parse_error_t { - /// Text of the error. - wcstring text; - /// Code for the error. - enum parse_error_code_t code; - /// Offset and length of the token in the source code that triggered this error. - size_t source_start; - size_t source_length; - /// Return a string describing the error, suitable for presentation to the user. If - /// is_interactive is true, the offending line with a caret is printed as well. - wcstring describe(const wcstring &src, bool is_interactive) const; - /// Return a string describing the error, suitable for presentation to the user, with the given - /// prefix. If skip_caret is false, the offending line with a caret is printed as well. - wcstring describe_with_prefix(const wcstring &src, const wcstring &prefix, bool is_interactive, - bool skip_caret) const; -}; -typedef std::vector parse_error_list_t; - -wcstring token_type_user_presentable_description(parse_token_type_t type, - parse_keyword_t keyword = parse_keyword_t::none); - -// Special source_start value that means unknown. -#define SOURCE_LOCATION_UNKNOWN (static_cast(-1)) - -/// Helper function to offset error positions by the given amount. This is used when determining -/// errors in a substring of a larger source buffer. -void parse_error_offset_source_start(parse_error_list_t *errors, size_t amt); - -// The location of a pipeline. -enum class pipeline_position_t : uint8_t { - none, // not part of a pipeline - first, // first command in a pipeline - subsequent // second or further command in a pipeline -}; - /// Maximum number of function calls. #define FISH_MAX_STACK_DEPTH 128 diff --git a/src/parse_execution.cpp b/src/parse_execution.cpp index f89058cee..6d0ee614f 100644 --- a/src/parse_execution.cpp +++ b/src/parse_execution.cpp @@ -503,14 +503,14 @@ end_execution_reason_t parse_execution_context_t::run_switch_statement( // Expand it. We need to offset any errors by the position of the string. completion_list_t switch_values_expanded; - parse_error_list_t errors; + auto errors = new_parse_error_list(); auto expand_ret = - expand_string(switch_value, &switch_values_expanded, expand_flags_t{}, ctx, &errors); - parse_error_offset_source_start(&errors, statement.argument.range.start); + expand_string(switch_value, &switch_values_expanded, expand_flags_t{}, ctx, &*errors); + errors->offset_source_start(statement.argument.range.start); switch (expand_ret.result) { case expand_result_t::error: - return report_errors(expand_ret.status, errors); + return report_errors(expand_ret.status, *errors); case expand_result_t::cancel: return end_execution_reason_t::cancelled; @@ -666,18 +666,20 @@ end_execution_reason_t parse_execution_context_t::report_error(int status, const auto r = node.source_range(); // Create an error. - parse_error_list_t error_list = parse_error_list_t(1); - parse_error_t *error = &error_list.at(0); - error->source_start = r.start; - error->source_length = r.length; - error->code = parse_error_code_t::syntax; // hackish + auto error_list = new_parse_error_list(); + parse_error_t error; + error.source_start = r.start; + error.source_length = r.length; + error.code = parse_error_code_t::syntax; // hackish va_list va; va_start(va, fmt); - error->text = vformat_string(fmt, va); + error.text = std::make_unique(vformat_string(fmt, va)); va_end(va); - return this->report_errors(status, error_list); + error_list->push_back(std::move(error)); + + return this->report_errors(status, *error_list); } end_execution_reason_t parse_execution_context_t::report_errors( @@ -814,7 +816,7 @@ end_execution_reason_t parse_execution_context_t::expand_command( // Here we're expanding a command, for example $HOME/bin/stuff or $randomthing. The first // completion becomes the command itself, everything after becomes arguments. Command // substitutions are not supported. - parse_error_list_t errors; + auto errors = new_parse_error_list(); // Get the unexpanded command string. We expect to always get it here. wcstring unexp_cmd = get_source(statement.command); @@ -822,14 +824,14 @@ end_execution_reason_t parse_execution_context_t::expand_command( // Expand the string to produce completions, and report errors. expand_result_t expand_err = - expand_to_command_and_args(unexp_cmd, ctx, out_cmd, out_args, &errors); + expand_to_command_and_args(unexp_cmd, ctx, out_cmd, out_args, &*errors); if (expand_err == expand_result_t::error) { // Issue #5812 - the expansions were done on the command token, // excluding prefixes such as " " or "if ". // This means that the error positions are relative to the beginning // of the token; we need to make them relative to the original source. - parse_error_offset_source_start(&errors, pos_of_command_token); - return report_errors(STATUS_ILLEGAL_CMD, errors); + errors->offset_source_start(pos_of_command_token); + return report_errors(STATUS_ILLEGAL_CMD, *errors); } else if (expand_err == expand_result_t::wildcard_no_match) { return report_error(STATUS_UNMATCHED_WILDCARD, statement, WILDCARD_ERR_MSG, get_source(statement).c_str()); @@ -949,14 +951,14 @@ end_execution_reason_t parse_execution_context_t::expand_arguments_from_nodes( assert(arg_node->has_source() && "Argument should have source"); // Expand this string. - parse_error_list_t errors; + auto errors = new_parse_error_list(); arg_expanded.clear(); auto expand_ret = - expand_string(get_source(*arg_node), &arg_expanded, expand_flags_t{}, ctx, &errors); - parse_error_offset_source_start(&errors, arg_node->range.start); + expand_string(get_source(*arg_node), &arg_expanded, expand_flags_t{}, ctx, &*errors); + errors->offset_source_start(arg_node->range.start); switch (expand_ret.result) { case expand_result_t::error: { - return this->report_errors(expand_ret.status, errors); + return this->report_errors(expand_ret.status, *errors); } case expand_result_t::cancel: { @@ -1100,18 +1102,18 @@ end_execution_reason_t parse_execution_context_t::apply_variable_assignments( for (const ast::variable_assignment_t &variable_assignment : variable_assignment_list) { const wcstring &source = get_source(variable_assignment); auto equals_pos = variable_assignment_equals_pos(source); - assert(equals_pos.has_value()); + assert(equals_pos); const wcstring variable_name = source.substr(0, *equals_pos); const wcstring expression = source.substr(*equals_pos + 1); completion_list_t expression_expanded; - parse_error_list_t errors; + auto errors = new_parse_error_list(); // TODO this is mostly copied from expand_arguments_from_nodes, maybe extract to function auto expand_ret = - expand_string(expression, &expression_expanded, expand_flags_t{}, ctx, &errors); - parse_error_offset_source_start(&errors, variable_assignment.range.start + *equals_pos + 1); + expand_string(expression, &expression_expanded, expand_flags_t{}, ctx, &*errors); + errors->offset_source_start(variable_assignment.range.start + *equals_pos + 1); switch (expand_ret.result) { case expand_result_t::error: - return this->report_errors(expand_ret.status, errors); + return this->report_errors(expand_ret.status, *errors); case expand_result_t::cancel: return end_execution_reason_t::cancelled; diff --git a/src/parse_tree.cpp b/src/parse_tree.cpp index 223b8e0b2..3942f6e4d 100644 --- a/src/parse_tree.cpp +++ b/src/parse_tree.cpp @@ -34,183 +34,6 @@ parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err) { } } -/// Returns a string description of this parse error. -wcstring parse_error_t::describe_with_prefix(const wcstring &src, const wcstring &prefix, - bool is_interactive, bool skip_caret) const { - wcstring result = prefix; - // Some errors don't have their message passed in, so we construct them here. - // This affects e.g. `eval "a=(foo)"` - switch (code) { - default: - if (skip_caret && this->text.empty()) return L""; - result.append(this->text); - break; - case parse_error_code_t::andor_in_pipeline: - append_format(result, INVALID_PIPELINE_CMD_ERR_MSG, - src.substr(this->source_start, this->source_length).c_str()); - break; - case parse_error_code_t::bare_variable_assignment: { - wcstring assignment_src = src.substr(this->source_start, this->source_length); - maybe_t equals_pos = variable_assignment_equals_pos(assignment_src); - assert(equals_pos.has_value()); - wcstring variable = assignment_src.substr(0, *equals_pos); - wcstring value = assignment_src.substr(*equals_pos + 1); - append_format(result, ERROR_BAD_COMMAND_ASSIGN_ERR_MSG, variable.c_str(), - value.c_str()); - break; - } - } - - size_t start = source_start; - size_t len = source_length; - if (start >= src.size()) { - // If we are past the source, we clamp it to the end. - start = src.size() - 1; - len = 0; - } - - if (start + len > src.size()) { - len = src.size() - source_start; - } - - if (skip_caret) { - return result; - } - - // Locate the beginning of this line of source. - size_t line_start = 0; - - // Look for a newline prior to source_start. If we don't find one, start at the beginning of - // the string; otherwise start one past the newline. Note that source_start may itself point - // at a newline; we want to find the newline before it. - if (start > 0) { - size_t newline = src.find_last_of(L'\n', start - 1); - if (newline != wcstring::npos) { - line_start = newline + 1; - } - } - // Look for the newline after the source range. If the source range itself includes a - // newline, that's the one we want, so start just before the end of the range. - size_t last_char_in_range = (len == 0 ? start : start + len - 1); - size_t line_end = src.find(L'\n', last_char_in_range); - if (line_end == wcstring::npos) { - line_end = src.size(); - } - - assert(line_end >= line_start); - assert(start >= line_start); - - // Don't include the caret and line if we're interactive and this is the first line, because - // then it's obvious. - bool interactive_skip_caret = is_interactive && start == 0; - if (interactive_skip_caret) { - return result; - } - - // Append the line of text. - if (!result.empty()) result.push_back(L'\n'); - result.append(src, line_start, line_end - line_start); - - // Append the caret line. The input source may include tabs; for that reason we - // construct a "caret line" that has tabs in corresponding positions. - wcstring caret_space_line; - caret_space_line.reserve(start - line_start); - for (size_t i = line_start; i < start; i++) { - wchar_t wc = src.at(i); - if (wc == L'\t') { - caret_space_line.push_back(L'\t'); - } else if (wc == L'\n') { - // It's possible that the start points at a newline itself. In that case, - // pretend it's a space. We only expect this to be at the end of the string. - caret_space_line.push_back(L' '); - } else { - int width = fish_wcwidth(wc); - if (width > 0) { - caret_space_line.append(static_cast(width), L' '); - } - } - } - result.push_back(L'\n'); - result.append(caret_space_line); - result.push_back(L'^'); - if (len > 1) { - // Add a squiggle under the error location. - // We do it like this - // ^~~^ - // With a "^" under the start and end, and squiggles in-between. - auto width = fish_wcswidth(src.c_str() + start, len); - if (width >= 2) { - // Subtract one for each of the carets - this is important in case - // the starting char has a width of > 1. - result.append(width - 2, L'~'); - result.push_back(L'^'); - } - } - return result; -} - -wcstring parse_error_t::describe(const wcstring &src, bool is_interactive) const { - return this->describe_with_prefix(src, wcstring(), is_interactive, false); -} - -void parse_error_offset_source_start(parse_error_list_t *errors, size_t amt) { - if (amt > 0 && errors != nullptr) { - for (parse_error_t &error : *errors) { - // Preserve the special meaning of -1 as 'unknown'. - if (error.source_start != SOURCE_LOCATION_UNKNOWN) { - error.source_start += amt; - } - } - } -} - -/// Returns a string description for the given token type. -const wchar_t *token_type_description(parse_token_type_t type) { - const wchar_t *description = enum_to_str(type, token_enum_map); - if (description) return description; - return L"unknown_token_type"; -} - -const wchar_t *keyword_description(parse_keyword_t type) { - const wchar_t *keyword = enum_to_str(type, keyword_enum_map); - if (keyword) return keyword; - return L"unknown_keyword"; -} - -wcstring token_type_user_presentable_description(parse_token_type_t type, parse_keyword_t keyword) { - if (keyword != parse_keyword_t::none) { - return format_string(L"keyword '%ls'", keyword_description(keyword)); - } - - switch (type) { - case parse_token_type_t::string: - return L"a string"; - case parse_token_type_t::pipe: - return L"a pipe"; - case parse_token_type_t::redirection: - return L"a redirection"; - case parse_token_type_t::background: - return L"a '&'"; - case parse_token_type_t::andand: - return L"'&&'"; - case parse_token_type_t::oror: - return L"'||'"; - case parse_token_type_t::end: - return L"end of the statement"; - case parse_token_type_t::terminate: - return L"end of the input"; - case parse_token_type_t::error: - return L"a parse error"; - case parse_token_type_t::tokenizer_error: - return L"an incomplete token"; - case parse_token_type_t::comment: - return L"a comment"; - default: { - return format_string(L"a %ls", token_type_description(type)); - } - } -} - /// Returns a string description of the given parse token. wcstring parse_token_t::describe() const { wcstring result = token_type_description(type); @@ -222,7 +45,7 @@ wcstring parse_token_t::describe() const { /// A string description appropriate for presentation to the user. wcstring parse_token_t::user_presentable_description() const { - return token_type_user_presentable_description(type, keyword); + return *token_type_user_presentable_description(type, keyword); } parsed_source_t::parsed_source_t(wcstring &&s, ast::ast_t &&ast) diff --git a/src/parse_tree.h b/src/parse_tree.h index 95ae4b603..7814155e6 100644 --- a/src/parse_tree.h +++ b/src/parse_tree.h @@ -11,10 +11,8 @@ /// A struct representing the token type that we use internally. struct parse_token_t { - enum parse_token_type_t type; // The type of the token as represented by the parser - enum parse_keyword_t keyword { - parse_keyword_t::none - }; // Any keyword represented by this token + parse_token_type_t type; // The type of the token as represented by the parser + parse_keyword_t keyword{parse_keyword_t::none}; // Any keyword represented by this token bool has_dash_prefix{false}; // Hackish: whether the source contains a dash prefix bool is_help_argument{false}; // Hackish: whether the source looks like '-h' or '--help' bool is_newline{false}; // Hackish: if TOK_END, whether the source is a newline. @@ -39,9 +37,6 @@ struct parse_token_t { constexpr parse_token_t(parse_token_type_t type) : type(type) {} }; -const wchar_t *token_type_description(parse_token_type_t type); -const wchar_t *keyword_description(parse_keyword_t type); - parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err); /// A type wrapping up a parse tree and the original source behind it. diff --git a/src/parse_util.cpp b/src/parse_util.cpp index 9c8d5a648..6573b0a63 100644 --- a/src/parse_util.cpp +++ b/src/parse_util.cpp @@ -819,7 +819,7 @@ static bool append_syntax_error(parse_error_list_t *errors, size_t source_locati va_list va; va_start(va, fmt); - error.text = vformat_string(fmt, va); + error.text = std::make_unique(vformat_string(fmt, va)); va_end(va); errors->push_back(std::move(error)); @@ -1031,17 +1031,17 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argumen err |= check_subtoken(checked, paren_begin - has_dollar); assert(paren_begin < paren_end && "Parens out of order?"); - parse_error_list_t subst_errors; - err |= parse_util_detect_errors(subst, &subst_errors); + auto subst_errors = new_parse_error_list(); + err |= parse_util_detect_errors(subst, &*subst_errors); // Our command substitution produced error offsets relative to its source. Tweak the // offsets of the errors in the command substitution to account for both its offset // within the string, and the offset of the node. size_t error_offset = paren_begin + 1 + source_start; - parse_error_offset_source_start(&subst_errors, error_offset); + subst_errors->offset_source_start(error_offset); if (out_errors != nullptr) { - out_errors->insert(out_errors->end(), subst_errors.begin(), subst_errors.end()); + out_errors->append(&*subst_errors); } checked = paren_end + 1; @@ -1185,9 +1185,9 @@ static bool detect_errors_in_decorated_statement(const wcstring &buff_src, // Check that we can expand the command. // Make a new error list so we can fix the offset for just those, then append later. wcstring command; - parse_error_list_t new_errors; + auto new_errors = new_parse_error_list(); if (expand_to_command_and_args(unexp_command, operation_context_t::empty(), &command, - nullptr, &new_errors, + nullptr, &*new_errors, true /* skip wildcards */) == expand_result_t::error) { errored = true; } @@ -1244,8 +1244,8 @@ static bool detect_errors_in_decorated_statement(const wcstring &buff_src, // The expansion errors here go from the *command* onwards, // so we need to offset them by the *command* offset, // excluding the decoration. - parse_error_offset_source_start(&new_errors, dst.command.source_range().start); - vec_append(*parse_errors, std::move(new_errors)); + new_errors->offset_source_start(dst.command.source_range().start); + parse_errors->append(&*new_errors); } } return errored; @@ -1352,18 +1352,19 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, // Parse the input string into an ast. Some errors are detected here. using namespace ast; - parse_error_list_t parse_errors; - auto ast = ast_t::parse(buff_src, parse_flags, &parse_errors); + auto parse_errors = new_parse_error_list(); + auto ast = ast_t::parse(buff_src, parse_flags, &*parse_errors); if (allow_incomplete) { // Issue #1238: If the only error was unterminated quote, then consider this to have parsed // successfully. - size_t idx = parse_errors.size(); + size_t idx = parse_errors->size(); while (idx--) { - if (parse_errors.at(idx).code == parse_error_code_t::tokenizer_unterminated_quote || - parse_errors.at(idx).code == parse_error_code_t::tokenizer_unterminated_subshell) { + if (parse_errors->at(idx)->code() == parse_error_code_t::tokenizer_unterminated_quote || + parse_errors->at(idx)->code() == + parse_error_code_t::tokenizer_unterminated_subshell) { // Remove this error, since we don't consider it a real error. has_unclosed_quote_or_subshell = true; - parse_errors.erase(parse_errors.begin() + idx); + parse_errors->erase(idx); } } } @@ -1376,8 +1377,8 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, } // Early parse error, stop here. - if (!parse_errors.empty()) { - if (out_errors) vec_append(*out_errors, std::move(parse_errors)); + if (!parse_errors->empty()) { + if (out_errors) out_errors->append(&*parse_errors); return PARSER_TEST_ERROR; } @@ -1390,24 +1391,24 @@ maybe_t parse_util_detect_errors_in_argument_list(const wcstring &arg_ // Helper to return a description of the first error. auto get_error_text = [&](const parse_error_list_t &errors) { assert(!errors.empty() && "Expected an error"); - return errors.at(0).describe_with_prefix(arg_list_src, prefix, false /* not interactive */, - false /* don't skip caret */); + return *errors.at(0)->describe_with_prefix( + arg_list_src, prefix, false /* not interactive */, false /* don't skip caret */); }; // Parse the string as a freestanding argument list. using namespace ast; - parse_error_list_t errors; - auto ast = ast_t::parse_argument_list(arg_list_src, parse_flag_none, &errors); - if (!errors.empty()) { - return get_error_text(errors); + auto errors = new_parse_error_list(); + auto ast = ast_t::parse_argument_list(arg_list_src, parse_flag_none, &*errors); + if (!errors->empty()) { + return get_error_text(*errors); } // Get the root argument list and extract arguments from it. // Test each of these. for (const argument_t &arg : ast.top()->as()->arguments) { const wcstring arg_src = arg.source(arg_list_src); - if (parse_util_detect_errors_in_argument(arg, arg_src, &errors)) { - return get_error_text(errors); + if (parse_util_detect_errors_in_argument(arg, arg_src, &*errors)) { + return get_error_text(*errors); } } return none(); diff --git a/src/parser.cpp b/src/parser.cpp index 89c962f27..452fe496f 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -439,10 +439,11 @@ wcstring parser_t::current_line() { // Use an error with empty text. assert(source_offset >= 0); parse_error_t empty_error = {}; + empty_error.text = std::make_unique(); empty_error.source_start = source_offset; - wcstring line_info = empty_error.describe_with_prefix(execution_context->get_source(), prefix, - is_interactive(), skip_caret); + wcstring line_info = *empty_error.describe_with_prefix(execution_context->get_source(), prefix, + is_interactive(), skip_caret); if (!line_info.empty()) { line_info.push_back(L'\n'); } @@ -499,13 +500,13 @@ profile_item_t *parser_t::create_profile_item() { eval_res_t parser_t::eval(const wcstring &cmd, const io_chain_t &io, const job_group_ref_t &job_group, enum block_type_t block_type) { // Parse the source into a tree, if we can. - parse_error_list_t error_list; - if (parsed_source_ref_t ps = parse_source(wcstring{cmd}, parse_flag_none, &error_list)) { + auto error_list = new_parse_error_list(); + if (parsed_source_ref_t ps = parse_source(wcstring{cmd}, parse_flag_none, &*error_list)) { return this->eval(ps, io, job_group, block_type); } else { // Get a backtrace. This includes the message. wcstring backtrace_and_desc; - this->get_backtrace(cmd, error_list, backtrace_and_desc); + this->get_backtrace(cmd, *error_list, backtrace_and_desc); // Print it. std::fwprintf(stderr, L"%ls\n", backtrace_and_desc.c_str()); @@ -623,20 +624,20 @@ template eval_res_t parser_t::eval_node(const parsed_source_ref_t &, const ast:: void parser_t::get_backtrace(const wcstring &src, const parse_error_list_t &errors, wcstring &output) const { if (!errors.empty()) { - const parse_error_t &err = errors.at(0); + const auto *err = errors.at(0); // Determine if we want to try to print a caret to point at the source error. The - // err.source_start <= src.size() check is due to the nasty way that slices work, which is + // err.source_start() <= src.size() check is due to the nasty way that slices work, which is // by rewriting the source. size_t which_line = 0; bool skip_caret = true; - if (err.source_start != SOURCE_LOCATION_UNKNOWN && err.source_start <= src.size()) { + if (err->source_start() != SOURCE_LOCATION_UNKNOWN && err->source_start() <= src.size()) { // Determine which line we're on. - which_line = 1 + std::count(src.begin(), src.begin() + err.source_start, L'\n'); + which_line = 1 + std::count(src.begin(), src.begin() + err->source_start(), L'\n'); // Don't include the caret if we're interactive, this is the first line of text, and our // source is at its beginning, because then it's obvious. - skip_caret = (is_interactive() && which_line == 1 && err.source_start == 0); + skip_caret = (is_interactive() && which_line == 1 && err->source_start() == 0); } wcstring prefix; @@ -655,7 +656,7 @@ void parser_t::get_backtrace(const wcstring &src, const parse_error_list_t &erro } const wcstring description = - err.describe_with_prefix(src, prefix, is_interactive(), skip_caret); + *err->describe_with_prefix(src, prefix, is_interactive(), skip_caret); if (!description.empty()) { output.append(description); output.push_back(L'\n'); diff --git a/src/reader.cpp b/src/reader.cpp index a52101fec..4b7dc81a1 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -2755,13 +2755,13 @@ static eval_res_t reader_run_command(parser_t &parser, const wcstring &cmd) { } static parser_test_error_bits_t reader_shell_test(const parser_t &parser, const wcstring &bstr) { - parse_error_list_t errors; + auto errors = new_parse_error_list(); parser_test_error_bits_t res = - parse_util_detect_errors(bstr, &errors, true /* do accept incomplete */); + parse_util_detect_errors(bstr, &*errors, true /* do accept incomplete */); if (res & PARSER_TEST_ERROR) { wcstring error_desc; - parser.get_backtrace(bstr, errors, error_desc); + parser.get_backtrace(bstr, *errors, error_desc); // Ensure we end with a newline. Also add an initial newline, because it's likely the user // just hit enter and so there's junk on the current line. @@ -4719,11 +4719,11 @@ static int read_ni(parser_t &parser, int fd, const io_chain_t &io) { } // Parse into an ast and detect errors. - parse_error_list_t errors; - auto ast = ast::ast_t::parse(str, parse_flag_none, &errors); + auto errors = new_parse_error_list(); + auto ast = ast::ast_t::parse(str, parse_flag_none, &*errors); bool errored = ast.errored(); if (!errored) { - errored = parse_util_detect_errors(ast, str, &errors); + errored = parse_util_detect_errors(ast, str, &*errors); } if (!errored) { // Construct a parsed source ref. @@ -4733,7 +4733,7 @@ static int read_ni(parser_t &parser, int fd, const io_chain_t &io) { return 0; } else { wcstring sb; - parser.get_backtrace(str, errors, sb); + parser.get_backtrace(str, *errors, sb); std::fwprintf(stderr, L"%ls", sb.c_str()); return 1; } diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index 942ceacc0..568407897 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -669,7 +669,7 @@ wcstring tok_command(const wcstring &str) { return {}; } wcstring text = t.text_of(*token); - if (variable_assignment_equals_pos(text).has_value()) { + if (variable_assignment_equals_pos(text)) { continue; } return text; @@ -885,23 +885,3 @@ move_word_state_machine_t::move_word_state_machine_t(move_word_style_t syl) : state(0), style(syl) {} void move_word_state_machine_t::reset() { state = 0; } - -// Return the location of the equals sign, or none if the string does -// not look like a variable assignment like FOO=bar. The detection -// works similar as in some POSIX shells: only letters and numbers qre -// allowed on the left hand side, no quotes or escaping. -maybe_t variable_assignment_equals_pos(const wcstring &txt) { - enum { init, has_some_variable_identifier } state = init; - // TODO bracket indexing - for (size_t i = 0; i < txt.size(); i++) { - wchar_t c = txt[i]; - if (state == init) { - if (!valid_var_name_char(c)) return {}; - state = has_some_variable_identifier; - } else { - if (c == '=') return {i}; - if (!valid_var_name_char(c)) return {}; - } - } - return {}; -} diff --git a/src/tokenizer.h b/src/tokenizer.h index 3fd12466f..475247614 100644 --- a/src/tokenizer.h +++ b/src/tokenizer.h @@ -10,6 +10,9 @@ #include "maybe.h" #include "parse_constants.h" #include "redirection.h" +#if INCLUDE_RUST_HEADERS +#include "tokenizer.rs.h" +#endif /// Token types. XXX Why this isn't parse_token_type_t, I'm not really sure. enum class token_type_t : uint8_t { @@ -208,7 +211,4 @@ class move_word_state_machine_t { void reset(); }; -/// The position of the equal sign in a variable assignment like foo=bar. -maybe_t variable_assignment_equals_pos(const wcstring &txt); - #endif