mirror of
https://github.com/fish-shell/fish-shell.git
synced 2026-04-27 23:11:15 -03:00
Having the prelude in wchar is not great. The wchar module was empty except for the prelude, and its prelude included things from wutil. Having a top-level prelude module in the main crate resolves this. It allows us to completely remove the wchar module, and a top-level prelude module makes more sense conceptually. Putting non-wchar things into the prelude also becomes more sensible, if we ever want to do that. Closes #12182
2424 lines
84 KiB
Rust
2424 lines
84 KiB
Rust
//! Various mostly unrelated utility functions related to parsing, loading and evaluating fish code.
|
||
use crate::ast::{
|
||
self, Ast, Keyword, Kind, Leaf, Node, NodeVisitor, Token, Traversal, is_same_node,
|
||
};
|
||
use crate::builtins::shared::builtin_exists;
|
||
use crate::common::{
|
||
EscapeFlags, EscapeStringStyle, UnescapeFlags, UnescapeStringStyle, escape_string,
|
||
unescape_string, valid_var_name, valid_var_name_char,
|
||
};
|
||
use crate::expand::{
|
||
BRACE_BEGIN, BRACE_END, BRACE_SEP, ExpandFlags, ExpandResultCode, INTERNAL_SEPARATOR,
|
||
VARIABLE_EXPAND, VARIABLE_EXPAND_EMPTY, VARIABLE_EXPAND_SINGLE, expand_one,
|
||
expand_to_command_and_args,
|
||
};
|
||
use crate::future_feature_flags::{FeatureFlag, feature_test};
|
||
use crate::operation_context::OperationContext;
|
||
use crate::parse_constants::{
|
||
ERROR_BAD_VAR_CHAR1, ERROR_BRACKETED_VARIABLE_QUOTED1, ERROR_BRACKETED_VARIABLE1,
|
||
ERROR_NO_VAR_NAME, ERROR_NOT_ARGV_AT, ERROR_NOT_ARGV_COUNT, ERROR_NOT_ARGV_STAR, ERROR_NOT_PID,
|
||
ERROR_NOT_STATUS, INVALID_BREAK_ERR_MSG, INVALID_CONTINUE_ERR_MSG,
|
||
INVALID_PIPELINE_CMD_ERR_MSG, ParseError, ParseErrorCode, ParseErrorList, ParseKeyword,
|
||
ParseTokenType, ParseTreeFlags, ParserTestErrorBits, PipelinePosition, SourceRange,
|
||
StatementDecoration, UNKNOWN_BUILTIN_ERR_MSG, parse_error_offset_source_start,
|
||
};
|
||
use crate::prelude::*;
|
||
use crate::tokenizer::{
|
||
TOK_ACCEPT_UNFINISHED, TOK_SHOW_COMMENTS, Tok, TokenType, Tokenizer, comment_end,
|
||
is_token_delimiter, quote_end,
|
||
};
|
||
use crate::wcstringutil::count_newlines;
|
||
use crate::wcstringutil::truncate;
|
||
use crate::wildcard::{ANY_CHAR, ANY_STRING, ANY_STRING_RECURSIVE};
|
||
use std::ops::Range;
|
||
use std::{iter, ops};
|
||
|
||
/// Handles slices: the square brackets in an expression like $foo[5..4]
|
||
/// Return the length of the slice starting at `in`, or 0 if there is no slice, or None on error.
|
||
/// This never accepts incomplete slices.
|
||
pub fn parse_util_slice_length(input: &wstr) -> Option<usize> {
|
||
let openc = '[';
|
||
let closec = ']';
|
||
let mut escaped = false;
|
||
|
||
// Check for initial opening [
|
||
let mut chars = input.chars();
|
||
if chars.next() != Some(openc) {
|
||
return Some(0);
|
||
}
|
||
let mut bracket_count = 1;
|
||
|
||
let mut pos = 0;
|
||
while let Some(c) = chars.next() {
|
||
pos += 1;
|
||
if !escaped {
|
||
if ['\'', '"'].contains(&c) {
|
||
let oldpos = pos;
|
||
pos = quote_end(input, pos, c)?;
|
||
// We need to advance the iterator as well
|
||
if pos - oldpos > 0 {
|
||
// nth(0) advances by 1
|
||
chars.nth(pos - oldpos - 1)?;
|
||
} else {
|
||
// Quotes aren't over, slice is invalid
|
||
return None;
|
||
}
|
||
} else if c == openc {
|
||
bracket_count += 1;
|
||
} else if c == closec {
|
||
bracket_count -= 1;
|
||
if bracket_count == 0 {
|
||
// pos points at the closing ], so add 1.
|
||
return Some(pos + 1);
|
||
}
|
||
}
|
||
}
|
||
if c == '\\' {
|
||
escaped = !escaped;
|
||
} else {
|
||
escaped = false;
|
||
}
|
||
}
|
||
assert!(bracket_count > 0, "Should have unclosed brackets");
|
||
|
||
None
|
||
}
|
||
|
||
#[derive(Debug, Default, Eq, PartialEq)]
|
||
pub struct Parentheses {
|
||
range: Range<usize>,
|
||
num_closing: usize,
|
||
}
|
||
|
||
impl Parentheses {
|
||
pub fn start(&self) -> usize {
|
||
self.range.start
|
||
}
|
||
pub fn end(&self) -> usize {
|
||
self.range.end
|
||
}
|
||
pub fn opening(&self) -> Range<usize> {
|
||
self.range.start..self.range.start + 1
|
||
}
|
||
pub fn closing(&self) -> Range<usize> {
|
||
self.range.end - self.num_closing..self.range.end
|
||
}
|
||
pub fn command(&self) -> Range<usize> {
|
||
self.range.start + 1..self.range.end - self.num_closing
|
||
}
|
||
}
|
||
|
||
#[derive(Eq, PartialEq, Debug)]
|
||
pub enum MaybeParentheses {
|
||
Error,
|
||
None,
|
||
CommandSubstitution(Parentheses),
|
||
}
|
||
|
||
/// Alternative API. Iterate over command substitutions.
|
||
///
|
||
/// \param str the string to search for subshells
|
||
/// \param inout_cursor_offset On input, the location to begin the search. On output, either the end
|
||
/// of the string, or just after the closed-paren.
|
||
/// \param out_contents On output, the contents of the command substitution
|
||
/// \param out_start On output, the offset of the start of the command substitution (open paren)
|
||
/// \param out_end On output, the offset of the end of the command substitution (close paren), or
|
||
/// the end of the string if it was incomplete
|
||
/// \param accept_incomplete whether to permit missing closing parenthesis
|
||
/// \param inout_is_quoted whether the cursor is in a double-quoted context.
|
||
/// \param out_has_dollar whether the command substitution has the optional leading $.
|
||
/// Return -1 on syntax error, 0 if no subshells exist and 1 on success
|
||
#[allow(clippy::too_many_arguments)]
|
||
pub fn parse_util_locate_cmdsubst_range(
|
||
s: &wstr,
|
||
inout_cursor_offset: &mut usize,
|
||
accept_incomplete: bool,
|
||
inout_is_quoted: Option<&mut bool>,
|
||
out_has_dollar: Option<&mut bool>,
|
||
) -> MaybeParentheses {
|
||
// Nothing to do if the offset is at or past the end of the string.
|
||
if *inout_cursor_offset >= s.len() {
|
||
return MaybeParentheses::None;
|
||
}
|
||
|
||
// Defer to the wonky version.
|
||
let ret = parse_util_locate_cmdsub(
|
||
s,
|
||
*inout_cursor_offset,
|
||
accept_incomplete,
|
||
inout_is_quoted,
|
||
out_has_dollar,
|
||
);
|
||
match &ret {
|
||
MaybeParentheses::Error | MaybeParentheses::None => (),
|
||
MaybeParentheses::CommandSubstitution(parens) => {
|
||
*inout_cursor_offset = parens.end();
|
||
}
|
||
}
|
||
ret
|
||
}
|
||
|
||
/// Find the beginning and end of the command substitution under the cursor. If no subshell is
|
||
/// found, the entire string is returned. If the current command substitution is not ended, i.e. the
|
||
/// closing parenthesis is missing, then the string from the beginning of the substitution to the
|
||
/// end of the string is returned.
|
||
///
|
||
/// \param buff the string to search for subshells
|
||
/// \param cursor_pos the position of the cursor
|
||
/// \param a the start of the searched string
|
||
/// \param b the end of the searched string
|
||
pub fn parse_util_cmdsubst_extent(buff: &wstr, cursor: usize) -> ops::Range<usize> {
|
||
// The tightest command substitution found so far.
|
||
let mut result = 0..buff.len();
|
||
let mut pos = 0;
|
||
loop {
|
||
let parens = match parse_util_locate_cmdsub(buff, pos, true, None, None) {
|
||
// No subshell found, all done.
|
||
MaybeParentheses::Error | MaybeParentheses::None => break,
|
||
MaybeParentheses::CommandSubstitution(parens) => parens,
|
||
};
|
||
|
||
let command = parens.command();
|
||
if command.start <= cursor && command.end >= cursor {
|
||
// This command substitution surrounds the cursor, so it's a tighter fit.
|
||
result = command;
|
||
// pos is where to begin looking for the next one. But if we reached the end there's no
|
||
// next one.
|
||
if result.start >= result.end {
|
||
break;
|
||
}
|
||
pos = result.start + 1;
|
||
} else if cursor < command.start {
|
||
// This command substitution starts at or after the cursor. Since it was the first
|
||
// command substitution in the string, we're done.
|
||
break;
|
||
} else {
|
||
// This command substitution ends before the cursor. Skip it.
|
||
assert!(command.end < cursor);
|
||
pos = parens.end();
|
||
assert!(pos <= buff.len());
|
||
}
|
||
}
|
||
result
|
||
}
|
||
|
||
fn parse_util_locate_cmdsub(
|
||
input: &wstr,
|
||
cursor: usize,
|
||
allow_incomplete: bool,
|
||
mut inout_is_quoted: Option<&mut bool>,
|
||
mut out_has_dollar: Option<&mut bool>,
|
||
) -> MaybeParentheses {
|
||
let input = input.as_char_slice();
|
||
|
||
let mut escaped = false;
|
||
let mut is_token_begin = true;
|
||
let mut syntax_error = false;
|
||
let mut paran_count = 0;
|
||
let mut quoted_cmdsubs = vec![];
|
||
|
||
let mut pos = cursor;
|
||
let mut last_dollar = None;
|
||
let mut paran_begin = None;
|
||
let mut paran_end = None;
|
||
enum Quote {
|
||
Real(char),
|
||
VirtualDouble,
|
||
}
|
||
fn process_opening_quote(
|
||
input: &[char],
|
||
inout_is_quoted: &mut Option<&mut bool>,
|
||
paran_count: i32,
|
||
quoted_cmdsubs: &mut Vec<i32>,
|
||
mut pos: usize,
|
||
last_dollar: &mut Option<usize>,
|
||
quote: Quote,
|
||
) -> Option<usize> {
|
||
let quote = match quote {
|
||
Quote::Real(q) => q,
|
||
Quote::VirtualDouble => {
|
||
pos = pos.saturating_sub(1);
|
||
'"'
|
||
}
|
||
};
|
||
let q_end = quote_end(input.into(), pos, quote)?;
|
||
// Found a valid closing quote.
|
||
if input[q_end] == '$' {
|
||
// The closing quote is another quoted command substitution.
|
||
*last_dollar = Some(q_end);
|
||
quoted_cmdsubs.push(paran_count);
|
||
}
|
||
// We want to report whether the outermost command substitution between
|
||
// paran_begin..paran_end is quoted.
|
||
if paran_count == 0 {
|
||
inout_is_quoted
|
||
.as_mut()
|
||
.map(|is_quoted| **is_quoted = input[q_end] == '$');
|
||
}
|
||
Some(q_end)
|
||
}
|
||
|
||
if inout_is_quoted
|
||
.as_ref()
|
||
.is_some_and(|is_quoted| **is_quoted)
|
||
&& !input.is_empty()
|
||
{
|
||
pos = process_opening_quote(
|
||
input,
|
||
&mut inout_is_quoted,
|
||
paran_count,
|
||
&mut quoted_cmdsubs,
|
||
pos,
|
||
&mut last_dollar,
|
||
Quote::VirtualDouble,
|
||
)
|
||
.map_or(input.len(), |pos| pos + 1);
|
||
}
|
||
|
||
while pos < input.len() {
|
||
let c = input[pos];
|
||
if !escaped {
|
||
if ['\'', '"'].contains(&c) {
|
||
match process_opening_quote(
|
||
input,
|
||
&mut inout_is_quoted,
|
||
paran_count,
|
||
&mut quoted_cmdsubs,
|
||
pos,
|
||
&mut last_dollar,
|
||
Quote::Real(c),
|
||
) {
|
||
Some(q_end) => pos = q_end,
|
||
None => break,
|
||
}
|
||
} else if c == '\\' {
|
||
escaped = true;
|
||
} else if c == '#' && is_token_begin {
|
||
pos = comment_end(input.into(), pos) - 1;
|
||
} else if c == '$' {
|
||
last_dollar = Some(pos);
|
||
} else if c == '(' {
|
||
if paran_count == 0 && paran_begin.is_none() {
|
||
paran_begin = Some(pos);
|
||
out_has_dollar
|
||
.as_mut()
|
||
.map(|has_dollar| **has_dollar = last_dollar == Some(pos.wrapping_sub(1)));
|
||
}
|
||
|
||
paran_count += 1;
|
||
} else if c == ')' {
|
||
paran_count -= 1;
|
||
|
||
if paran_count == 0 {
|
||
assert!(paran_end.is_none());
|
||
paran_end = Some(pos);
|
||
break;
|
||
}
|
||
|
||
if paran_count < 0 {
|
||
syntax_error = true;
|
||
break;
|
||
}
|
||
|
||
// Check if the ) did complete a quoted command substitution.
|
||
if quoted_cmdsubs.last() == Some(¶n_count) {
|
||
quoted_cmdsubs.pop();
|
||
// Quoted command substitutions temporarily close double quotes.
|
||
// In "foo$(bar)baz$(qux)", after the ), we need to act as if there was a double quote.
|
||
match process_opening_quote(
|
||
input,
|
||
&mut inout_is_quoted,
|
||
paran_count,
|
||
&mut quoted_cmdsubs,
|
||
pos,
|
||
&mut last_dollar,
|
||
Quote::VirtualDouble,
|
||
) {
|
||
Some(q_end) => pos = q_end,
|
||
None => break,
|
||
}
|
||
}
|
||
}
|
||
is_token_begin = is_token_delimiter(c, input.get(pos + 1).copied());
|
||
} else {
|
||
escaped = false;
|
||
is_token_begin = false;
|
||
}
|
||
pos += 1;
|
||
}
|
||
|
||
syntax_error |= paran_count < 0;
|
||
syntax_error |= paran_count > 0 && !allow_incomplete;
|
||
|
||
if syntax_error {
|
||
return MaybeParentheses::Error;
|
||
}
|
||
|
||
let Some(paran_begin) = paran_begin else {
|
||
return MaybeParentheses::None;
|
||
};
|
||
|
||
let end = if paran_count != 0 {
|
||
input.len()
|
||
} else {
|
||
paran_end.unwrap() + 1
|
||
};
|
||
|
||
let parens = Parentheses {
|
||
range: paran_begin..end,
|
||
num_closing: if paran_count == 0 { 1 } else { 0 },
|
||
};
|
||
|
||
MaybeParentheses::CommandSubstitution(parens)
|
||
}
|
||
|
||
/// Find the beginning and end of the process definition under the cursor
|
||
///
|
||
/// \param buff the string to search for subshells
|
||
/// \param cursor_pos the position of the cursor
|
||
/// \param a the start of the process
|
||
/// \param b the end of the process
|
||
/// \param tokens the tokens in the process
|
||
pub fn parse_util_process_extent(
|
||
buff: &wstr,
|
||
cursor_pos: usize,
|
||
out_tokens: Option<&mut Vec<Tok>>,
|
||
) -> ops::Range<usize> {
|
||
job_or_process_extent(true, buff, cursor_pos, out_tokens)
|
||
}
|
||
|
||
/// Find the beginning and end of the process definition under the cursor
|
||
///
|
||
/// \param buff the string to search for subshells
|
||
/// \param cursor_pos the position of the cursor
|
||
/// \param a the start of the process
|
||
/// \param b the end of the process
|
||
/// \param tokens the tokens in the process
|
||
pub fn parse_util_job_extent(
|
||
buff: &wstr,
|
||
cursor_pos: usize,
|
||
out_tokens: Option<&mut Vec<Tok>>,
|
||
) -> ops::Range<usize> {
|
||
job_or_process_extent(false, buff, cursor_pos, out_tokens)
|
||
}
|
||
|
||
/// Get the beginning and end of the job or process definition under the cursor.
|
||
fn job_or_process_extent(
|
||
process: bool,
|
||
buff: &wstr,
|
||
cursor_pos: usize,
|
||
mut out_tokens: Option<&mut Vec<Tok>>,
|
||
) -> ops::Range<usize> {
|
||
let mut finished = false;
|
||
|
||
let cmdsub_range = parse_util_cmdsubst_extent(buff, cursor_pos);
|
||
assert!(cursor_pos >= cmdsub_range.start);
|
||
let pos = cursor_pos - cmdsub_range.start;
|
||
|
||
let mut result = cmdsub_range.clone();
|
||
for token in Tokenizer::new(
|
||
&buff[cmdsub_range.clone()],
|
||
TOK_ACCEPT_UNFINISHED | TOK_SHOW_COMMENTS,
|
||
) {
|
||
let tok_begin = token.offset();
|
||
if finished {
|
||
break;
|
||
}
|
||
match token.type_ {
|
||
TokenType::Pipe
|
||
| TokenType::End
|
||
| TokenType::Background
|
||
| TokenType::AndAnd
|
||
| TokenType::OrOr
|
||
| TokenType::LeftBrace
|
||
| TokenType::RightBrace
|
||
if (token.type_ != TokenType::Pipe || process) =>
|
||
{
|
||
if tok_begin >= pos {
|
||
finished = true;
|
||
result.end = cmdsub_range.start + tok_begin;
|
||
} else {
|
||
// Statement at cursor might start after this token.
|
||
result.start = cmdsub_range.start + tok_begin + token.length();
|
||
out_tokens.as_mut().map(|tokens| tokens.clear());
|
||
}
|
||
continue; // Do not add this to tokens
|
||
}
|
||
_ => (),
|
||
}
|
||
out_tokens.as_mut().map(|tokens| tokens.push(token));
|
||
}
|
||
result
|
||
}
|
||
|
||
/// Find the beginning and end of the token under the cursor and the token before the current token.
|
||
///
|
||
/// \param buff the string to search for subshells
|
||
/// \param cursor_pos the position of the cursor
|
||
pub fn parse_util_token_extent(buff: &wstr, cursor_pos: usize) -> (Range<usize>, Range<usize>) {
|
||
let cmdsubst_range = parse_util_cmdsubst_extent(buff, cursor_pos);
|
||
let cmdsubst_begin = cmdsubst_range.start;
|
||
|
||
// pos is equivalent to cursor_pos within the range of the command substitution {begin, end}.
|
||
let offset_within_cmdsubst = cursor_pos - cmdsubst_range.start;
|
||
|
||
let mut cur_begin = cmdsubst_begin + offset_within_cmdsubst;
|
||
let mut cur_end = cur_begin;
|
||
let mut prev_begin = cur_begin;
|
||
let mut prev_end = cur_begin;
|
||
|
||
assert!(cmdsubst_begin <= buff.len());
|
||
assert!(cmdsubst_range.end <= buff.len());
|
||
|
||
for token in Tokenizer::new(&buff[cmdsubst_range], TOK_ACCEPT_UNFINISHED) {
|
||
let tok_begin = token.offset();
|
||
let mut tok_end = tok_begin;
|
||
|
||
// Calculate end of token.
|
||
if token.type_ == TokenType::String {
|
||
tok_end += token.length();
|
||
}
|
||
|
||
// Cursor was before beginning of this token, means that the cursor is between two tokens,
|
||
// so we set it to a zero element string and break.
|
||
if tok_begin > offset_within_cmdsubst {
|
||
cur_begin = cmdsubst_begin + offset_within_cmdsubst;
|
||
cur_end = cur_begin;
|
||
break;
|
||
}
|
||
|
||
// If cursor is inside the token, this is the token we are looking for. If so, set
|
||
// cur_begin and cur_end and break.
|
||
if token.type_ == TokenType::String && tok_end >= offset_within_cmdsubst {
|
||
cur_begin = cmdsubst_begin + token.offset();
|
||
cur_end = cur_begin + token.length();
|
||
break;
|
||
}
|
||
|
||
// Remember previous string token.
|
||
if token.type_ == TokenType::String {
|
||
prev_begin = cmdsubst_begin + token.offset();
|
||
prev_end = prev_begin + token.length();
|
||
}
|
||
}
|
||
|
||
assert!(prev_begin <= buff.len());
|
||
assert!(prev_end >= prev_begin);
|
||
assert!(prev_end <= buff.len());
|
||
(cur_begin..cur_end, prev_begin..prev_end)
|
||
}
|
||
|
||
/// Get the line number at the specified character offset.
|
||
pub fn parse_util_lineno(s: &wstr, offset: usize) -> usize {
|
||
// Return the line number of position offset, starting with 1.
|
||
if s.is_empty() {
|
||
return 1;
|
||
}
|
||
|
||
let end = offset.min(s.len());
|
||
count_newlines(&s[..end]) + 1
|
||
}
|
||
|
||
/// Calculate the line number of the specified cursor position.
|
||
pub fn parse_util_get_line_from_offset(s: &wstr, pos: usize) -> isize {
|
||
// Return the line pos is on, or -1 if it's after the end.
|
||
if pos > s.len() {
|
||
return -1;
|
||
}
|
||
count_newlines(&s[..pos]).try_into().unwrap()
|
||
}
|
||
|
||
/// Get the offset of the first character on the specified line.
|
||
pub fn parse_util_get_offset_from_line(s: &wstr, line: i32) -> Option<usize> {
|
||
// Return the first position on line X, counting from 0.
|
||
if line < 0 {
|
||
return None;
|
||
}
|
||
if line == 0 {
|
||
return Some(0);
|
||
}
|
||
|
||
let mut count = 0;
|
||
for (pos, _) in s.chars().enumerate().filter(|(_, c)| *c == '\n') {
|
||
count += 1;
|
||
if count == line {
|
||
return Some(pos + 1);
|
||
}
|
||
}
|
||
None
|
||
}
|
||
|
||
/// Return the total offset of the buffer for the cursor position nearest to the specified position.
|
||
pub fn parse_util_get_offset(s: &wstr, line: i32, line_offset: isize) -> Option<usize> {
|
||
let off = parse_util_get_offset_from_line(s, line)?;
|
||
let off2 = parse_util_get_offset_from_line(s, line + 1).unwrap_or(s.len() + 1);
|
||
|
||
let mut line_offset = line_offset as usize;
|
||
if line_offset >= off2 - off - 1 {
|
||
line_offset = off2 - off - 1;
|
||
}
|
||
|
||
Some(off + line_offset)
|
||
}
|
||
|
||
/// Return the given string, unescaping wildcard characters but not performing any other character
|
||
/// transformation.
|
||
pub fn parse_util_unescape_wildcards(s: &wstr) -> WString {
|
||
let mut result = WString::with_capacity(s.len());
|
||
let unesc_qmark = !feature_test(FeatureFlag::QuestionMarkNoGlob);
|
||
|
||
let mut i = 0;
|
||
while i < s.len() {
|
||
let c = s.char_at(i);
|
||
if c == '*' {
|
||
result.push(ANY_STRING);
|
||
} else if c == '?' && unesc_qmark {
|
||
result.push(ANY_CHAR);
|
||
} else if (c == '\\' && s.char_at(i + 1) == '*')
|
||
|| (unesc_qmark && c == '\\' && s.char_at(i + 1) == '?')
|
||
{
|
||
result.push(s.char_at(i + 1));
|
||
i += 1;
|
||
} else if c == '\\' && s.char_at(i + 1) == '\\' {
|
||
// Not a wildcard, but ensure the next iteration doesn't see this escaped backslash.
|
||
result.push_utfstr(L!("\\\\"));
|
||
i += 1;
|
||
} else {
|
||
result.push(c);
|
||
}
|
||
i += 1;
|
||
}
|
||
result
|
||
}
|
||
|
||
/// Return if the given string contains wildcard characters.
|
||
pub fn parse_util_contains_wildcards(s: &wstr) -> bool {
|
||
let unesc_qmark = !feature_test(FeatureFlag::QuestionMarkNoGlob);
|
||
|
||
let mut i = 0;
|
||
while i < s.len() {
|
||
let c = s.as_char_slice()[i];
|
||
#[allow(clippy::if_same_then_else)]
|
||
if c == '*' {
|
||
return true;
|
||
} else if unesc_qmark && c == '?' {
|
||
return true;
|
||
} else if c == '\\' {
|
||
if s.char_at(i + 1) == '*' {
|
||
i += 1;
|
||
} else if unesc_qmark && s.char_at(i + 1) == '?' {
|
||
i += 1;
|
||
} else if s.char_at(i + 1) == '\\' {
|
||
// Not a wildcard, but ensure the next iteration doesn't see this escaped backslash.
|
||
i += 1;
|
||
}
|
||
}
|
||
i += 1;
|
||
}
|
||
false
|
||
}
|
||
|
||
/// Escape any wildcard characters in the given string. e.g. convert
|
||
/// "a*b" to "a\*b".
|
||
pub fn parse_util_escape_wildcards(s: &wstr) -> WString {
|
||
let mut result = WString::with_capacity(s.len());
|
||
let unesc_qmark = !feature_test(FeatureFlag::QuestionMarkNoGlob);
|
||
|
||
for c in s.chars() {
|
||
if c == '*' {
|
||
result.push_str("\\*");
|
||
} else if unesc_qmark && c == '?' {
|
||
result.push_str("\\?");
|
||
} else if c == '\\' {
|
||
result.push_str("\\\\");
|
||
} else {
|
||
result.push(c);
|
||
}
|
||
}
|
||
result
|
||
}
|
||
|
||
/// Checks if the specified string is a help option.
|
||
pub fn parse_util_argument_is_help(s: &wstr) -> bool {
|
||
[L!("-h"), L!("--help")].contains(&s)
|
||
}
|
||
|
||
/// Returns true if the specified command is a builtin that may not be used in a pipeline.
|
||
fn parser_is_pipe_forbidden(word: &wstr) -> bool {
|
||
[
|
||
L!("exec"),
|
||
L!("case"),
|
||
L!("break"),
|
||
L!("return"),
|
||
L!("continue"),
|
||
]
|
||
.contains(&word)
|
||
}
|
||
|
||
// Return a pointer to the first argument node of an argument_or_redirection_list_t, or nullptr if
|
||
// there are no arguments.
|
||
fn get_first_arg(list: &ast::ArgumentOrRedirectionList) -> Option<&ast::Argument> {
|
||
for v in list.iter() {
|
||
if v.is_argument() {
|
||
return Some(v.argument());
|
||
}
|
||
}
|
||
None
|
||
}
|
||
|
||
/// Given a wide character immediately after a dollar sign, return the appropriate error message.
|
||
/// For example, if wc is @, then the variable name was $@ and we suggest $argv.
|
||
fn error_for_character(c: char) -> WString {
|
||
match c {
|
||
'?' => wgettext!(ERROR_NOT_STATUS).to_owned(),
|
||
'#' => wgettext!(ERROR_NOT_ARGV_COUNT).to_owned(),
|
||
'@' => wgettext!(ERROR_NOT_ARGV_AT).to_owned(),
|
||
'*' => wgettext!(ERROR_NOT_ARGV_STAR).to_owned(),
|
||
_ if [
|
||
'$',
|
||
VARIABLE_EXPAND,
|
||
VARIABLE_EXPAND_SINGLE,
|
||
VARIABLE_EXPAND_EMPTY,
|
||
]
|
||
.contains(&c) =>
|
||
{
|
||
wgettext!(ERROR_NOT_PID).to_owned()
|
||
}
|
||
_ if [BRACE_END, '}', ',', BRACE_SEP].contains(&c) => {
|
||
wgettext!(ERROR_NO_VAR_NAME).to_owned()
|
||
}
|
||
_ => wgettext_fmt!(ERROR_BAD_VAR_CHAR1, c),
|
||
}
|
||
}
|
||
|
||
/// Attempts to escape the string 'cmd' using the given quote type, as determined by the quote
|
||
/// character. The quote can be a single quote or double quote, or L'\0' to indicate no quoting (and
|
||
/// thus escaping should be with backslashes). Optionally do not escape tildes.
|
||
pub fn parse_util_escape_string_with_quote(
|
||
cmd: &wstr,
|
||
quote: Option<char>,
|
||
escape_flags: EscapeFlags,
|
||
) -> WString {
|
||
let Some(quote) = quote else {
|
||
return escape_string(cmd, EscapeStringStyle::Script(escape_flags));
|
||
};
|
||
// Here we are going to escape a string with quotes.
|
||
// A few characters cannot be represented inside quotes, e.g. newlines. In that case,
|
||
// terminate the quote and then re-enter it.
|
||
let mut result = WString::new();
|
||
result.reserve(cmd.len());
|
||
for c in cmd.chars() {
|
||
match c {
|
||
'\n' => {
|
||
for c in [quote, '\\', 'n', quote] {
|
||
result.push(c);
|
||
}
|
||
}
|
||
'\t' => {
|
||
for c in [quote, '\\', 't', quote] {
|
||
result.push(c);
|
||
}
|
||
}
|
||
'\x08' => {
|
||
for c in [quote, '\\', 'b', quote] {
|
||
result.push(c);
|
||
}
|
||
}
|
||
'\r' => {
|
||
for c in [quote, '\\', 'r', quote] {
|
||
result.push(c);
|
||
}
|
||
}
|
||
'\\' => {
|
||
result.push_str("\\\\");
|
||
}
|
||
'$' => {
|
||
if quote == '"' {
|
||
result.push('\\');
|
||
}
|
||
result.push('$');
|
||
}
|
||
_ => {
|
||
if c == quote {
|
||
result.push('\\');
|
||
}
|
||
result.push(c);
|
||
}
|
||
}
|
||
}
|
||
result
|
||
}
|
||
|
||
/// Given a string, parse it as fish code and then return the indents. The return value has the same
|
||
/// size as the string.
|
||
pub fn parse_util_compute_indents(src: &wstr) -> Vec<i32> {
|
||
compute_indents(src, 0)
|
||
}
|
||
|
||
fn compute_indents(src: &wstr, initial_indent: i32) -> Vec<i32> {
|
||
// Make a vector the same size as the input string, which contains the indents. Initialize them
|
||
// to 0.
|
||
let mut indents = vec![0; src.len()];
|
||
|
||
// Simple trick: if our source does not contain a newline, then all indents are 0.
|
||
if !src.chars().any(|c| c == '\n') {
|
||
return indents;
|
||
}
|
||
|
||
// Parse the string. We pass continue_after_error to produce a forest; the trailing indent of
|
||
// the last node we visited becomes the input indent of the next. I.e. in the case of 'switch
|
||
// foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it
|
||
// were a case item list.
|
||
let ast = ast::parse(
|
||
src,
|
||
ParseTreeFlags::CONTINUE_AFTER_ERROR
|
||
| ParseTreeFlags::INCLUDE_COMMENTS
|
||
| ParseTreeFlags::ACCEPT_INCOMPLETE_TOKENS
|
||
| ParseTreeFlags::LEAVE_UNTERMINATED,
|
||
None,
|
||
);
|
||
{
|
||
let mut iv = IndentVisitor::new(src, &mut indents, initial_indent);
|
||
iv.visit(ast.top());
|
||
iv.record_line_continuations_until(iv.indents.len());
|
||
iv.indents[iv.last_leaf_end..].fill(iv.last_indent);
|
||
|
||
// All newlines now get the *next* indent.
|
||
// For example, in this code:
|
||
// if true
|
||
// stuff
|
||
// the newline "belongs" to the if statement as it ends its job.
|
||
// But when rendered, it visually belongs to the job list.
|
||
|
||
let mut idx = src.len();
|
||
let mut next_indent = iv.last_indent;
|
||
let src = src.as_char_slice();
|
||
while idx != 0 {
|
||
idx -= 1;
|
||
if src[idx] == '\n' {
|
||
let empty_middle_line = src.get(idx + 1) == Some(&'\n');
|
||
let is_trailing_unclosed = idx == src.len() - 1 && iv.unclosed;
|
||
if !empty_middle_line && !is_trailing_unclosed {
|
||
iv.indents[idx] = next_indent;
|
||
}
|
||
} else {
|
||
next_indent = iv.indents[idx];
|
||
}
|
||
}
|
||
// Add an extra level of indentation to continuation lines.
|
||
for mut idx in iv.line_continuations {
|
||
loop {
|
||
indents[idx] = indents[idx].wrapping_add(1);
|
||
idx += 1;
|
||
if idx == src.len() || src[idx] == '\n' {
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
indents
|
||
}
|
||
|
||
// The number of spaces per indent isn't supposed to be configurable.
|
||
// See discussion at https://github.com/fish-shell/fish-shell/pull/6790
|
||
pub const SPACES_PER_INDENT: usize = 4;
|
||
|
||
pub fn apply_indents(src: &wstr, indents: &[i32]) -> WString {
|
||
let mut indented = WString::new();
|
||
for (i, c) in src.chars().enumerate() {
|
||
indented.push(c);
|
||
if c != '\n' || i + 1 == src.len() {
|
||
continue;
|
||
}
|
||
indented.extend(std::iter::repeat_n(
|
||
' ',
|
||
SPACES_PER_INDENT * usize::try_from(indents[i]).unwrap(),
|
||
));
|
||
}
|
||
indented
|
||
}
|
||
|
||
// Visit all of our nodes. When we get a job_list or case_item_list, increment indent while
|
||
// visiting its children.
|
||
struct IndentVisitor<'a> {
|
||
// The parent node of the node we are currently visiting, or None if we are the root.
|
||
parent: Option<&'a dyn ast::Node>,
|
||
|
||
// companion: Pin<&'a mut IndentVisitor>,
|
||
// The one-past-the-last index of the most recently encountered leaf node.
|
||
// We use this to populate the indents even if there's no tokens in the range.
|
||
last_leaf_end: usize,
|
||
|
||
// The last indent which we assigned.
|
||
last_indent: i32,
|
||
|
||
// Whether we have an unfinished quote or command substitution.
|
||
unclosed: bool,
|
||
|
||
// The source we are indenting.
|
||
src: &'a wstr,
|
||
|
||
// List of indents, which we populate.
|
||
indents: &'a mut Vec<i32>,
|
||
|
||
// Initialize our starting indent to -1, as our top-level node is a job list which
|
||
// will immediately increment it.
|
||
indent: i32,
|
||
|
||
// List of locations of escaped newline characters.
|
||
line_continuations: Vec<usize>,
|
||
}
|
||
|
||
impl<'a> IndentVisitor<'a> {
|
||
fn new(src: &'a wstr, indents: &'a mut Vec<i32>, initial_indent: i32) -> Self {
|
||
Self {
|
||
parent: None,
|
||
last_leaf_end: 0,
|
||
last_indent: initial_indent - 1,
|
||
unclosed: false,
|
||
src,
|
||
indents,
|
||
indent: initial_indent - 1,
|
||
line_continuations: vec![],
|
||
}
|
||
}
|
||
/// Return whether a maybe_newlines node contains at least one newline.
|
||
fn has_newline(&self, nls: &ast::MaybeNewlines) -> bool {
|
||
nls.source(self.src).chars().any(|c| c == '\n')
|
||
}
|
||
fn record_line_continuations_until(&mut self, offset: usize) {
|
||
let gap_text = &self.src[self.last_leaf_end..offset];
|
||
let gap_text = gap_text.as_char_slice();
|
||
let Some(escaped_nl) = gap_text.windows(2).position(|w| *w == ['\\', '\n']) else {
|
||
return;
|
||
};
|
||
if gap_text[..escaped_nl].contains(&'#') {
|
||
return;
|
||
}
|
||
let mut newline = escaped_nl + 1;
|
||
// The gap text might contain multiple newlines if there are multiple lines that
|
||
// don't contain an AST node, for example, comment lines, or lines containing only
|
||
// the escaped newline.
|
||
loop {
|
||
self.line_continuations.push(self.last_leaf_end + newline);
|
||
match gap_text[newline + 1..].iter().position(|c| *c == '\n') {
|
||
Some(nextnl) => newline = newline + 1 + nextnl,
|
||
None => break,
|
||
}
|
||
}
|
||
}
|
||
|
||
fn indent_leaf(&mut self, range: SourceRange) {
|
||
let node_src = &self.src[range.start()..range.end()];
|
||
// Common case optimization.
|
||
if node_src.contains('(') /*)*/ && !node_src.contains('\n') {
|
||
self.indents[range.start()..range.end()].fill(self.indent);
|
||
return;
|
||
}
|
||
|
||
let mut done = range.start();
|
||
let mut cursor = 0;
|
||
let mut is_double_quoted = false;
|
||
let mut was_double_quoted;
|
||
loop {
|
||
was_double_quoted = is_double_quoted;
|
||
let parens = match parse_util_locate_cmdsubst_range(
|
||
node_src,
|
||
&mut cursor,
|
||
/*accept_incomplete=*/ true,
|
||
Some(&mut is_double_quoted),
|
||
None,
|
||
) {
|
||
MaybeParentheses::Error => break,
|
||
MaybeParentheses::None => {
|
||
break;
|
||
}
|
||
MaybeParentheses::CommandSubstitution(parens) => parens,
|
||
};
|
||
|
||
let command = parens.command();
|
||
self.indent_string_part(done..range.start() + command.start, was_double_quoted);
|
||
let cmdsub_contents = &node_src[command.clone()];
|
||
let indents = compute_indents(cmdsub_contents, self.indent + 1);
|
||
self.indents[range.start() + command.start..range.start() + command.end]
|
||
.copy_from_slice(&indents);
|
||
|
||
done = range.start() + command.end;
|
||
if parens.closing().is_empty() {
|
||
self.unclosed = true;
|
||
}
|
||
}
|
||
self.indent_string_part(done..range.end(), was_double_quoted);
|
||
}
|
||
|
||
fn indent_string_part(&mut self, range: Range<usize>, is_double_quoted: bool) {
|
||
let mut start = range.start;
|
||
let mut quoted = false;
|
||
if is_double_quoted {
|
||
match quote_end(self.src, range.start, '"') {
|
||
Some(q_end) => {
|
||
// We may be (in) a multi-line string, so don't indent.
|
||
start = q_end + 1;
|
||
}
|
||
None => quoted = true,
|
||
}
|
||
}
|
||
let mut done = start;
|
||
if !quoted {
|
||
let part = &self.src[done..range.end];
|
||
let mut callback = |offset| {
|
||
if !quoted {
|
||
// Quote open event. Indent unquoted part, including the opening quote.
|
||
self.indents[done..start + offset + 1].fill(self.indent);
|
||
done = start + offset + 1;
|
||
} else {
|
||
// Quote close. Don't indent, in case it's a multiline string.
|
||
// Mark the first line as indented but only to make tests look prettier.
|
||
let first_line_length = self.src[start..start + offset]
|
||
.chars()
|
||
.take_while(|&c| c != '\n')
|
||
.count();
|
||
self.indents[start..start + first_line_length].fill(self.indent);
|
||
done = start + offset;
|
||
}
|
||
quoted = !quoted;
|
||
};
|
||
for _token in Tokenizer::with_quote_events(part, TOK_ACCEPT_UNFINISHED, &mut callback) {
|
||
}
|
||
}
|
||
if !quoted {
|
||
self.indents[done..range.end].fill(self.indent);
|
||
} else {
|
||
self.unclosed = true;
|
||
}
|
||
}
|
||
}
|
||
|
||
impl<'a> NodeVisitor<'a> for IndentVisitor<'a> {
|
||
// Default implementation is to just visit children.
|
||
fn visit(&mut self, node: &'a dyn Node) {
|
||
let mut inc_dec = (0, 0);
|
||
match node.kind() {
|
||
Kind::JobList(_) | Kind::AndorJobList(_) => {
|
||
// Job lists are never unwound.
|
||
inc_dec = (1, 1);
|
||
}
|
||
|
||
// Increment indents for conditions in headers (#1665).
|
||
Kind::JobConjunction(_node) => {
|
||
let parent_kind = self.parent.unwrap().kind();
|
||
if matches!(parent_kind, Kind::IfClause(_) | Kind::WhileHeader(_)) {
|
||
inc_dec = (1, 1);
|
||
}
|
||
}
|
||
|
||
// Increment indents for JobContinuation if it contains a newline.
|
||
// This is a bit of a hack - it indents cases like:
|
||
// cmd1 |
|
||
// ....cmd2
|
||
// but avoids "double indenting" if there's no newline:
|
||
// cmd1 | while cmd2
|
||
// ....cmd3
|
||
// end
|
||
// See #7252.
|
||
Kind::JobContinuation(node) => {
|
||
if self.has_newline(&node.newlines) {
|
||
inc_dec = (1, 1);
|
||
}
|
||
}
|
||
|
||
// Likewise for && and ||.
|
||
Kind::JobConjunctionContinuation(node) => {
|
||
if self.has_newline(&node.newlines) {
|
||
inc_dec = (1, 1);
|
||
}
|
||
}
|
||
|
||
Kind::CaseItemList(_) => {
|
||
// Here's a hack. Consider:
|
||
// switch abc
|
||
// cas
|
||
//
|
||
// fish will see that 'cas' is not valid inside a switch statement because it is
|
||
// not "case". It will then unwind back to the top level job list, producing a
|
||
// parse tree like:
|
||
//
|
||
// job_list
|
||
// switch_job
|
||
// <err>
|
||
// normal_job
|
||
// cas
|
||
//
|
||
// And so we will think that the 'cas' job is at the same level as the switch.
|
||
// To address this, if we see that the switch statement was not closed, do not
|
||
// decrement the indent afterwards.
|
||
let Kind::SwitchStatement(switchs) = self.parent.unwrap().kind() else {
|
||
panic!("Expected switch statement");
|
||
};
|
||
let dec = if switchs.end.has_source() { 1 } else { 0 };
|
||
inc_dec = (1, dec);
|
||
}
|
||
|
||
Kind::Token(node) => {
|
||
let token_type = node.token_type();
|
||
let parent_kind = self.parent.unwrap().kind();
|
||
if matches!(parent_kind, Kind::BeginHeader(_)) && token_type == ParseTokenType::End
|
||
{
|
||
// The newline after "begin" is optional, so it is part of the header.
|
||
// The header is not in the indented block, so indent the newline here.
|
||
if node.source(self.src) == "\n" {
|
||
inc_dec = (1, 1);
|
||
}
|
||
}
|
||
}
|
||
|
||
_ => {}
|
||
}
|
||
let range = node.source_range();
|
||
if range.length() > 0 && node.as_leaf().is_some() {
|
||
self.record_line_continuations_until(range.start());
|
||
self.indents[self.last_leaf_end..range.start()].fill(self.last_indent);
|
||
}
|
||
|
||
self.indent += inc_dec.0;
|
||
|
||
// If we increased the indentation, apply it to the remainder of the string, even if the
|
||
// list is empty. For example (where _ represents the cursor):
|
||
//
|
||
// if foo
|
||
// _
|
||
//
|
||
// we want to indent the newline.
|
||
if inc_dec.0 != 0 {
|
||
self.last_indent = self.indent;
|
||
}
|
||
|
||
// If this is a leaf node, apply the current indentation.
|
||
if node.as_leaf().is_some() && range.length() != 0 {
|
||
let leading_spaces = self.src[..range.start()]
|
||
.chars()
|
||
.rev()
|
||
.take_while(|&c| c == ' ')
|
||
.count();
|
||
self.indents[range.start() - leading_spaces..range.start()].fill(self.indent);
|
||
self.indent_leaf(range);
|
||
self.last_leaf_end = range.end();
|
||
self.last_indent = self.indent;
|
||
}
|
||
|
||
let saved = self.parent.replace(node);
|
||
node.accept(self);
|
||
self.parent = saved;
|
||
self.indent -= inc_dec.1;
|
||
}
|
||
}
|
||
|
||
/// Given a string, detect parse errors in it. If allow_incomplete is set, then if the string is
|
||
/// incomplete (e.g. an unclosed quote), an error is not returned and the ParserTestErrorBits::INCOMPLETE bit
|
||
/// is set in the return value. If allow_incomplete is not set, then incomplete strings result in an
|
||
/// error.
|
||
pub fn parse_util_detect_errors(
|
||
buff_src: &wstr,
|
||
mut out_errors: Option<&mut ParseErrorList>,
|
||
allow_incomplete: bool, /*=false*/
|
||
) -> Result<(), ParserTestErrorBits> {
|
||
// Whether there's an unclosed quote or subshell, and therefore unfinished. This is only set if
|
||
// allow_incomplete is set.
|
||
let mut has_unclosed_quote_or_subshell = false;
|
||
|
||
let parse_flags = if allow_incomplete {
|
||
ParseTreeFlags::LEAVE_UNTERMINATED
|
||
} else {
|
||
ParseTreeFlags::empty()
|
||
};
|
||
|
||
// Parse the input string into an ast. Some errors are detected here.
|
||
let mut parse_errors = ParseErrorList::new();
|
||
let ast = ast::parse(buff_src, parse_flags, Some(&mut parse_errors));
|
||
if allow_incomplete {
|
||
// Issue #1238: If the only error was unterminated quote, then consider this to have parsed
|
||
// successfully.
|
||
parse_errors.retain(|parse_error| {
|
||
if [
|
||
ParseErrorCode::TokenizerUnterminatedQuote,
|
||
ParseErrorCode::TokenizerUnterminatedSubshell,
|
||
]
|
||
.contains(&parse_error.code)
|
||
{
|
||
// Remove this error, since we don't consider it a real error.
|
||
has_unclosed_quote_or_subshell = true;
|
||
false
|
||
} else {
|
||
true
|
||
}
|
||
});
|
||
}
|
||
|
||
// has_unclosed_quote_or_subshell may only be set if allow_incomplete is true.
|
||
assert!(!has_unclosed_quote_or_subshell || allow_incomplete);
|
||
if has_unclosed_quote_or_subshell {
|
||
// We do not bother to validate the rest of the tree in this case.
|
||
return Err(ParserTestErrorBits::INCOMPLETE);
|
||
}
|
||
|
||
// Early parse error, stop here.
|
||
if !parse_errors.is_empty() {
|
||
if let Some(errors) = out_errors.as_mut() {
|
||
errors.extend(parse_errors);
|
||
}
|
||
return Err(ParserTestErrorBits::ERROR);
|
||
}
|
||
|
||
// Defer to the tree-walking version.
|
||
parse_util_detect_errors_in_ast(&ast, buff_src, out_errors)
|
||
}
|
||
|
||
/// Like parse_util_detect_errors but accepts an already-parsed ast.
|
||
/// The top of the ast is assumed to be a job list.
|
||
pub fn parse_util_detect_errors_in_ast(
|
||
ast: &Ast,
|
||
buff_src: &wstr,
|
||
mut out_errors: Option<&mut ParseErrorList>,
|
||
) -> Result<(), ParserTestErrorBits> {
|
||
let mut res = ParserTestErrorBits::default();
|
||
|
||
// Whether we encountered a parse error.
|
||
let mut errored = false;
|
||
|
||
// Whether we encountered an unclosed block. We detect this via an 'end_command' block without
|
||
// source.
|
||
let mut has_unclosed_block = false;
|
||
|
||
// Whether we encounter a missing statement, i.e. a newline after a pipe. This is found by
|
||
// detecting job_continuations that have source for pipes but not the statement.
|
||
let mut has_unclosed_pipe = false;
|
||
|
||
// Whether we encounter a missing job, i.e. a newline after && or ||. This is found by
|
||
// detecting job_conjunction_continuations that have source for && or || but not the job.
|
||
let mut has_unclosed_conjunction = false;
|
||
|
||
// Expand all commands.
|
||
// Verify 'or' and 'and' not used inside pipelines.
|
||
// Verify return only within a function.
|
||
// Verify no variable expansions.
|
||
|
||
let mut traversal = ast::Traversal::new(ast.top());
|
||
while let Some(node) = traversal.next() {
|
||
match node.kind() {
|
||
Kind::JobContinuation(jc) => {
|
||
// Somewhat clumsy way of checking for a statement without source in a pipeline.
|
||
// See if our pipe has source but our statement does not.
|
||
if jc.pipe.has_source() && jc.statement.try_source_range().is_none() {
|
||
has_unclosed_pipe = true;
|
||
}
|
||
}
|
||
Kind::JobConjunction(job_conjunction) => {
|
||
errored |= detect_errors_in_job_conjunction(job_conjunction, &mut out_errors);
|
||
}
|
||
Kind::JobConjunctionContinuation(jcc) => {
|
||
// Somewhat clumsy way of checking for a job without source in a conjunction.
|
||
// See if our conjunction operator (&& or ||) has source but our job does not.
|
||
if jcc.conjunction.has_source() && jcc.job.try_source_range().is_none() {
|
||
has_unclosed_conjunction = true;
|
||
}
|
||
}
|
||
Kind::Argument(arg) => {
|
||
let arg_src = arg.source(buff_src);
|
||
res |= parse_util_detect_errors_in_argument(arg, arg_src, &mut out_errors)
|
||
.err()
|
||
.unwrap_or_default();
|
||
}
|
||
Kind::JobPipeline(job) => {
|
||
// Disallow background in the following cases:
|
||
//
|
||
// foo & ; and bar
|
||
// foo & ; or bar
|
||
// if foo & ; end
|
||
// while foo & ; end
|
||
// If it's not a background job, nothing to do.
|
||
if job.bg.is_some() {
|
||
errored |= detect_errors_in_backgrounded_job(&traversal, job, &mut out_errors);
|
||
}
|
||
}
|
||
Kind::DecoratedStatement(stmt) => {
|
||
errored |= detect_errors_in_decorated_statement(
|
||
buff_src,
|
||
&traversal,
|
||
stmt,
|
||
&mut out_errors,
|
||
);
|
||
}
|
||
Kind::BlockStatement(block) => {
|
||
// If our 'end' had no source, we are unsourced.
|
||
if !block.end.has_source() {
|
||
has_unclosed_block = true;
|
||
}
|
||
errored |= detect_errors_in_block_redirection_list(
|
||
node,
|
||
&block.args_or_redirs,
|
||
&mut out_errors,
|
||
);
|
||
}
|
||
Kind::BraceStatement(brace_statement) => {
|
||
// If our closing brace had no source, we are unsourced.
|
||
if !brace_statement.right_brace.has_source() {
|
||
has_unclosed_block = true;
|
||
}
|
||
errored |= detect_errors_in_block_redirection_list(
|
||
node,
|
||
&brace_statement.args_or_redirs,
|
||
&mut out_errors,
|
||
);
|
||
}
|
||
Kind::IfStatement(ifs) => {
|
||
// If our 'end' had no source, we are unsourced.
|
||
if !ifs.end.has_source() {
|
||
has_unclosed_block = true;
|
||
}
|
||
errored |= detect_errors_in_block_redirection_list(
|
||
node,
|
||
&ifs.args_or_redirs,
|
||
&mut out_errors,
|
||
);
|
||
}
|
||
Kind::SwitchStatement(switchs) => {
|
||
// If our 'end' had no source, we are unsourced.
|
||
if !switchs.end.has_source() {
|
||
has_unclosed_block = true;
|
||
}
|
||
errored |= detect_errors_in_block_redirection_list(
|
||
node,
|
||
&switchs.args_or_redirs,
|
||
&mut out_errors,
|
||
);
|
||
}
|
||
_ => {}
|
||
}
|
||
}
|
||
|
||
if errored {
|
||
res |= ParserTestErrorBits::ERROR;
|
||
}
|
||
|
||
if has_unclosed_block || has_unclosed_pipe || has_unclosed_conjunction {
|
||
res |= ParserTestErrorBits::INCOMPLETE;
|
||
}
|
||
if res == ParserTestErrorBits::default() {
|
||
Ok(())
|
||
} else {
|
||
Err(res)
|
||
}
|
||
}
|
||
|
||
/// Detect errors in the specified string when parsed as an argument list. Returns the text of an
|
||
/// error, or none if no error occurred.
|
||
pub fn parse_util_detect_errors_in_argument_list(
|
||
arg_list_src: &wstr,
|
||
prefix: &wstr,
|
||
) -> Result<(), WString> {
|
||
// Helper to return a description of the first error.
|
||
let get_error_text = |errors: &ParseErrorList| {
|
||
assert!(!errors.is_empty(), "Expected an error");
|
||
Err(errors[0].describe_with_prefix(
|
||
arg_list_src,
|
||
prefix,
|
||
false, /* not interactive */
|
||
false, /* don't skip caret */
|
||
))
|
||
};
|
||
|
||
// Parse the string as a freestanding argument list.
|
||
let mut errors = ParseErrorList::new();
|
||
let ast = ast::parse_argument_list(arg_list_src, ParseTreeFlags::empty(), Some(&mut errors));
|
||
if !errors.is_empty() {
|
||
return get_error_text(&errors);
|
||
}
|
||
|
||
// Get the root argument list and extract arguments from it.
|
||
// Test each of these.
|
||
let arg_list: &ast::FreestandingArgumentList = ast.top();
|
||
let args = &arg_list.arguments;
|
||
for arg in args.iter() {
|
||
let arg_src = arg.source(arg_list_src);
|
||
if parse_util_detect_errors_in_argument(arg, arg_src, &mut Some(&mut errors)).is_err() {
|
||
return get_error_text(&errors);
|
||
}
|
||
}
|
||
Ok(())
|
||
}
|
||
|
||
/// Append a syntax error to the given error list.
|
||
macro_rules! append_syntax_error {
|
||
(
|
||
$errors:expr, $source_location:expr,
|
||
$source_length:expr, $fmt:expr
|
||
$(, $arg:expr)* $(,)?
|
||
) => {
|
||
{
|
||
append_syntax_error_formatted!(
|
||
$errors, $source_location, $source_length,
|
||
wgettext_fmt!($fmt $(, $arg)*))
|
||
}
|
||
}
|
||
}
|
||
|
||
macro_rules! append_syntax_error_formatted {
|
||
(
|
||
$errors:expr, $source_location:expr,
|
||
$source_length:expr, $text:expr
|
||
) => {{
|
||
if let Some(ref mut errors) = $errors.as_mut() {
|
||
let mut error = ParseError::default();
|
||
error.source_start = $source_location;
|
||
error.source_length = $source_length;
|
||
error.code = ParseErrorCode::Syntax;
|
||
error.text = $text;
|
||
errors.push(error);
|
||
}
|
||
true
|
||
}};
|
||
}
|
||
|
||
/// Test if this argument contains any errors. Detected errors include syntax errors in command
|
||
/// substitutions, improperly escaped characters and improper use of the variable expansion
|
||
/// operator.
|
||
pub fn parse_util_detect_errors_in_argument(
|
||
arg: &ast::Argument,
|
||
arg_src: &wstr,
|
||
out_errors: &mut Option<&mut ParseErrorList>,
|
||
) -> Result<(), ParserTestErrorBits> {
|
||
let Some(source_range) = arg.try_source_range() else {
|
||
return Ok(());
|
||
};
|
||
|
||
let source_start = source_range.start();
|
||
let mut err = ParserTestErrorBits::default();
|
||
|
||
let check_subtoken =
|
||
|begin: usize, end: usize, out_errors: &mut Option<&mut ParseErrorList>| {
|
||
let Some(unesc) = unescape_string(
|
||
&arg_src[begin..end],
|
||
UnescapeStringStyle::Script(UnescapeFlags::SPECIAL),
|
||
) else {
|
||
if out_errors.is_some() {
|
||
let src = arg_src.as_char_slice();
|
||
if src.len() == 2
|
||
&& src[0] == '\\'
|
||
&& (src[1] == 'c'
|
||
|| src[1].to_lowercase().eq(['u'])
|
||
|| src[1].to_lowercase().eq(['x']))
|
||
{
|
||
append_syntax_error!(
|
||
out_errors,
|
||
source_start + begin,
|
||
end - begin,
|
||
"Incomplete escape sequence '%s'",
|
||
arg_src
|
||
);
|
||
return ParserTestErrorBits::ERROR;
|
||
}
|
||
append_syntax_error!(
|
||
out_errors,
|
||
source_start + begin,
|
||
end - begin,
|
||
"Invalid token '%s'",
|
||
arg_src
|
||
);
|
||
}
|
||
return ParserTestErrorBits::ERROR;
|
||
};
|
||
|
||
let mut err = ParserTestErrorBits::default();
|
||
// Check for invalid variable expansions.
|
||
let unesc = unesc.as_char_slice();
|
||
for (idx, c) in unesc.iter().enumerate() {
|
||
if ![VARIABLE_EXPAND, VARIABLE_EXPAND_SINGLE].contains(c) {
|
||
continue;
|
||
}
|
||
let next_char = unesc.get(idx + 1).copied().unwrap_or('\0');
|
||
if ![VARIABLE_EXPAND, VARIABLE_EXPAND_SINGLE, '('].contains(&next_char)
|
||
&& !valid_var_name_char(next_char)
|
||
{
|
||
err = ParserTestErrorBits::ERROR;
|
||
if let Some(out_errors) = out_errors {
|
||
let mut first_dollar = idx;
|
||
while first_dollar > 0
|
||
&& [VARIABLE_EXPAND, VARIABLE_EXPAND_SINGLE]
|
||
.contains(&unesc[first_dollar - 1])
|
||
{
|
||
first_dollar -= 1;
|
||
}
|
||
parse_util_expand_variable_error(
|
||
unesc.into(),
|
||
source_start,
|
||
first_dollar,
|
||
out_errors,
|
||
);
|
||
}
|
||
}
|
||
}
|
||
|
||
err
|
||
};
|
||
|
||
let mut cursor = 0;
|
||
let mut checked = 0;
|
||
|
||
let mut do_loop = true;
|
||
let mut is_quoted = false;
|
||
while do_loop {
|
||
let mut has_dollar = false;
|
||
match parse_util_locate_cmdsubst_range(
|
||
arg_src,
|
||
&mut cursor,
|
||
false,
|
||
Some(&mut is_quoted),
|
||
Some(&mut has_dollar),
|
||
) {
|
||
MaybeParentheses::Error => {
|
||
err |= ParserTestErrorBits::ERROR;
|
||
append_syntax_error!(out_errors, source_start, 1, "Mismatched parenthesis");
|
||
return Err(err);
|
||
}
|
||
MaybeParentheses::None => {
|
||
do_loop = false;
|
||
}
|
||
MaybeParentheses::CommandSubstitution(parens) => {
|
||
err |= check_subtoken(
|
||
checked,
|
||
parens.start() - if has_dollar { 1 } else { 0 },
|
||
out_errors,
|
||
);
|
||
let mut subst_errors = ParseErrorList::new();
|
||
if let Err(subst_err) = parse_util_detect_errors(
|
||
&arg_src[parens.command()],
|
||
Some(&mut subst_errors),
|
||
false,
|
||
) {
|
||
err |= subst_err;
|
||
}
|
||
|
||
// Our command substitution produced error offsets relative to its source. Tweak the
|
||
// offsets of the errors in the command substitution to account for both its offset
|
||
// within the string, and the offset of the node.
|
||
let error_offset = parens.start() + 1 + source_start;
|
||
parse_error_offset_source_start(&mut subst_errors, error_offset);
|
||
if let Some(out_errors) = out_errors {
|
||
out_errors.extend(subst_errors);
|
||
}
|
||
|
||
checked = parens.end();
|
||
}
|
||
}
|
||
}
|
||
|
||
err |= check_subtoken(checked, arg_src.len(), out_errors);
|
||
|
||
if err.is_empty() { Ok(()) } else { Err(err) }
|
||
}
|
||
|
||
fn detect_errors_in_job_conjunction(
|
||
job_conjunction: &ast::JobConjunction,
|
||
parse_errors: &mut Option<&mut ParseErrorList>,
|
||
) -> bool {
|
||
// Disallow background immediately before conjunction continuations. For example:
|
||
// foo & && bar
|
||
// foo & || baz
|
||
let continuations = &job_conjunction.continuations;
|
||
let jobs = iter::once(&job_conjunction.job)
|
||
.chain(continuations.iter().map(|continuation| &continuation.job));
|
||
for (job, continuation) in jobs.zip(continuations.iter()) {
|
||
if job.bg.is_some() {
|
||
let conjunction = &continuation.conjunction;
|
||
return append_syntax_error!(
|
||
parse_errors,
|
||
conjunction.source_range().start(),
|
||
conjunction.source_range().length(),
|
||
BOOL_AFTER_BACKGROUND_ERROR_MSG,
|
||
if conjunction.token_type() == ParseTokenType::AndAnd {
|
||
L!("&&")
|
||
} else {
|
||
L!("||")
|
||
}
|
||
);
|
||
}
|
||
}
|
||
|
||
false
|
||
}
|
||
|
||
/// Given that the job should be backgrounded, return true if we detect any errors.
|
||
fn detect_errors_in_backgrounded_job(
|
||
traversal: &Traversal,
|
||
job: &ast::JobPipeline,
|
||
parse_errors: &mut Option<&mut ParseErrorList>,
|
||
) -> bool {
|
||
let Some(source_range) = job.try_source_range() else {
|
||
return false;
|
||
};
|
||
|
||
let mut errored = false;
|
||
// Disallow background in the following cases:
|
||
// foo & ; and bar
|
||
// foo & ; or bar
|
||
// if foo & ; end
|
||
// while foo & ; end
|
||
let Kind::JobConjunction(job_conj) = traversal.parent(job).kind() else {
|
||
return false;
|
||
};
|
||
|
||
let job_conj_parent = traversal.parent(job_conj);
|
||
if matches!(
|
||
job_conj_parent.kind(),
|
||
Kind::IfClause(_) | Kind::WhileHeader(_)
|
||
) {
|
||
errored = append_syntax_error!(
|
||
parse_errors,
|
||
source_range.start(),
|
||
source_range.length(),
|
||
BACKGROUND_IN_CONDITIONAL_ERROR_MSG
|
||
);
|
||
} else if let Kind::JobList(jlist) = job_conj_parent.kind() {
|
||
// This isn't very complete, e.g. we don't catch 'foo & ; not and bar'.
|
||
// Find the index of ourselves in the job list.
|
||
let index = jlist
|
||
.iter()
|
||
.position(|job| is_same_node(job, job_conj))
|
||
.expect("Should have found the job in the list");
|
||
|
||
// Try getting the next job and check its decorator.
|
||
if let Some(next) = jlist.get(index + 1) {
|
||
if let Some(deco) = &next.decorator {
|
||
assert!(
|
||
[ParseKeyword::And, ParseKeyword::Or].contains(&deco.keyword()),
|
||
"Unexpected decorator keyword"
|
||
);
|
||
let deco_name = if deco.keyword() == ParseKeyword::And {
|
||
L!("and")
|
||
} else {
|
||
L!("or")
|
||
};
|
||
errored = append_syntax_error!(
|
||
parse_errors,
|
||
deco.source_range().start(),
|
||
deco.source_range().length(),
|
||
BOOL_AFTER_BACKGROUND_ERROR_MSG,
|
||
deco_name
|
||
);
|
||
}
|
||
}
|
||
}
|
||
errored
|
||
}
|
||
|
||
/// Given a source buffer `buff_src` and decorated statement `dst` within it, return true if there
|
||
/// is an error and false if not.
|
||
fn detect_errors_in_decorated_statement(
|
||
buff_src: &wstr,
|
||
traversal: &ast::Traversal,
|
||
dst: &ast::DecoratedStatement,
|
||
parse_errors: &mut Option<&mut ParseErrorList>,
|
||
) -> bool {
|
||
let mut errored = false;
|
||
let source_start = dst.source_range().start();
|
||
let source_length = dst.source_range().length();
|
||
let decoration = dst.decoration();
|
||
|
||
// Determine if the first argument is help.
|
||
let mut first_arg_is_help = false;
|
||
if let Some(arg) = get_first_arg(&dst.args_or_redirs) {
|
||
let arg_src = arg.source(buff_src);
|
||
first_arg_is_help = parse_util_argument_is_help(arg_src);
|
||
}
|
||
|
||
// Get the statement we are part of.
|
||
let Kind::Statement(st) = traversal.parent(dst).kind() else {
|
||
panic!();
|
||
};
|
||
|
||
// Walk up to the job.
|
||
let job = traversal
|
||
.parent_nodes()
|
||
.find_map(|n| match n.kind() {
|
||
Kind::JobPipeline(job) => Some(job),
|
||
_ => None,
|
||
})
|
||
.expect("should have found the job");
|
||
|
||
// Check our pipeline position.
|
||
let pipe_pos = if job.continuation.is_empty() {
|
||
PipelinePosition::None
|
||
} else if is_same_node(&job.statement, st) {
|
||
PipelinePosition::First
|
||
} else {
|
||
PipelinePosition::Subsequent
|
||
};
|
||
|
||
// Check that we don't try to pipe through exec.
|
||
let is_in_pipeline = pipe_pos != PipelinePosition::None;
|
||
if is_in_pipeline && decoration == StatementDecoration::Exec {
|
||
errored = append_syntax_error!(
|
||
parse_errors,
|
||
source_start,
|
||
source_length,
|
||
INVALID_PIPELINE_CMD_ERR_MSG,
|
||
"exec"
|
||
);
|
||
}
|
||
|
||
// This is a somewhat stale check that 'and' and 'or' are not in pipelines, except at the
|
||
// beginning. We can't disallow them as commands entirely because we need to support 'and
|
||
// --help', etc.
|
||
if pipe_pos == PipelinePosition::Subsequent {
|
||
// We only reject it if we have no decoration.
|
||
// `echo foo | command time something`
|
||
// is entirely fair and valid.
|
||
// Other current decorations like "exec"
|
||
// are already forbidden.
|
||
if dst.decoration() == StatementDecoration::None {
|
||
// check if our command is 'and' or 'or'. This is very clumsy; we don't catch e.g. quoted
|
||
// commands.
|
||
let command = dst.command.source(buff_src);
|
||
if [L!("and"), L!("or")].contains(&command) {
|
||
errored = append_syntax_error!(
|
||
parse_errors,
|
||
source_start,
|
||
source_length,
|
||
INVALID_PIPELINE_CMD_ERR_MSG,
|
||
command
|
||
);
|
||
}
|
||
|
||
// Similarly for time (#8841).
|
||
if command == "time" {
|
||
errored = append_syntax_error!(
|
||
parse_errors,
|
||
source_start,
|
||
source_length,
|
||
TIME_IN_PIPELINE_ERR_MSG
|
||
);
|
||
}
|
||
}
|
||
}
|
||
|
||
// $status specifically is invalid as a command,
|
||
// to avoid people trying `if $status`.
|
||
// We see this surprisingly regularly.
|
||
let com = dst.command.source(buff_src);
|
||
if com == "$status" {
|
||
errored = append_syntax_error!(
|
||
parse_errors,
|
||
source_start,
|
||
source_length,
|
||
"$status is not valid as a command. See `help %s`",
|
||
help_section!("language#conditions")
|
||
);
|
||
}
|
||
|
||
let unexp_command = com;
|
||
if !unexp_command.is_empty() {
|
||
// Check that we can expand the command.
|
||
// Make a new error list so we can fix the offset for just those, then append later.
|
||
let mut new_errors = ParseErrorList::new();
|
||
let mut command = WString::new();
|
||
if matches!(
|
||
expand_to_command_and_args(
|
||
unexp_command,
|
||
&OperationContext::empty(),
|
||
&mut command,
|
||
None,
|
||
Some(&mut new_errors),
|
||
true, /* skip wildcards */
|
||
)
|
||
.result,
|
||
ExpandResultCode::error | ExpandResultCode::overflow
|
||
) {
|
||
errored = true;
|
||
}
|
||
|
||
// Check that pipes are sound.
|
||
if !errored && parser_is_pipe_forbidden(&command) && is_in_pipeline {
|
||
errored = append_syntax_error!(
|
||
parse_errors,
|
||
source_start,
|
||
source_length,
|
||
INVALID_PIPELINE_CMD_ERR_MSG,
|
||
command
|
||
);
|
||
}
|
||
|
||
// Check that we don't break or continue from outside a loop.
|
||
if !errored && (command == "break" || command == "continue") && !first_arg_is_help {
|
||
// Walk up until we hit a 'for' or 'while' loop. If we hit a function first,
|
||
// stop the search; we can't break an outer loop from inside a function.
|
||
// This is a little funny because we can't tell if it's a 'for' or 'while'
|
||
// loop from the ancestor alone; we need the header. That is, we hit a
|
||
// block_statement, and have to check its header.
|
||
let mut found_loop = false;
|
||
for block in traversal.parent_nodes().filter_map(|anc| match anc.kind() {
|
||
Kind::BlockStatement(block) => Some(block),
|
||
_ => None,
|
||
}) {
|
||
match block.header {
|
||
ast::BlockStatementHeader::For(_) | ast::BlockStatementHeader::While(_) => {
|
||
// This is a loop header, so we can break or continue.
|
||
found_loop = true;
|
||
break;
|
||
}
|
||
ast::BlockStatementHeader::Function(_) => {
|
||
// This is a function header, so we cannot break or
|
||
// continue. We stop our search here.
|
||
found_loop = false;
|
||
break;
|
||
}
|
||
_ => {}
|
||
}
|
||
}
|
||
|
||
if !found_loop {
|
||
errored = if command == "break" {
|
||
append_syntax_error!(
|
||
parse_errors,
|
||
source_start,
|
||
source_length,
|
||
INVALID_BREAK_ERR_MSG
|
||
)
|
||
} else {
|
||
append_syntax_error!(
|
||
parse_errors,
|
||
source_start,
|
||
source_length,
|
||
INVALID_CONTINUE_ERR_MSG
|
||
)
|
||
}
|
||
}
|
||
}
|
||
|
||
// Check that we don't do an invalid builtin (issue #1252).
|
||
if !errored && decoration == StatementDecoration::Builtin {
|
||
let mut command = unexp_command.to_owned();
|
||
if expand_one(
|
||
&mut command,
|
||
ExpandFlags::FAIL_ON_CMDSUBST,
|
||
&OperationContext::empty(),
|
||
match parse_errors {
|
||
Some(pe) => Some(pe),
|
||
None => None,
|
||
},
|
||
) && !builtin_exists(unexp_command)
|
||
{
|
||
errored = append_syntax_error!(
|
||
parse_errors,
|
||
source_start,
|
||
source_length,
|
||
UNKNOWN_BUILTIN_ERR_MSG,
|
||
unexp_command
|
||
);
|
||
}
|
||
}
|
||
|
||
if let Some(parse_errors) = parse_errors {
|
||
// The expansion errors here go from the *command* onwards,
|
||
// so we need to offset them by the *command* offset,
|
||
// excluding the decoration.
|
||
parse_error_offset_source_start(&mut new_errors, dst.command.source_range().start());
|
||
parse_errors.extend(new_errors);
|
||
}
|
||
}
|
||
errored
|
||
}
|
||
|
||
// Given we have a trailing ArgumentOrRedirectionList, like `begin; end > /dev/null`, verify that
|
||
// there are no arguments in the list. The parent of the list is provided.
|
||
fn detect_errors_in_block_redirection_list(
|
||
parent: &dyn Node,
|
||
args_or_redirs: &ast::ArgumentOrRedirectionList,
|
||
out_errors: &mut Option<&mut ParseErrorList>,
|
||
) -> bool {
|
||
let Some(first_arg) = get_first_arg(args_or_redirs) else {
|
||
return false;
|
||
};
|
||
let r = first_arg.source_range();
|
||
if let Kind::BraceStatement(_) = parent.kind() {
|
||
append_syntax_error!(out_errors, r.start(), r.length(), RIGHT_BRACE_ARG_ERR_MSG);
|
||
} else {
|
||
append_syntax_error!(out_errors, r.start(), r.length(), END_ARG_ERR_MSG);
|
||
}
|
||
true
|
||
}
|
||
|
||
/// Given a string containing a variable expansion error, append an appropriate error to the errors
|
||
/// list. The global_token_pos is the offset of the token in the larger source, and the dollar_pos
|
||
/// is the offset of the offending dollar sign within the token.
|
||
pub fn parse_util_expand_variable_error(
|
||
token: &wstr,
|
||
global_token_pos: usize,
|
||
dollar_pos: usize,
|
||
errors: &mut ParseErrorList,
|
||
) {
|
||
let mut errors = Some(errors);
|
||
// Note that dollar_pos is probably VARIABLE_EXPAND or VARIABLE_EXPAND_SINGLE, not a literal
|
||
// dollar sign.
|
||
let token = token.as_char_slice();
|
||
let double_quotes = token[dollar_pos] == VARIABLE_EXPAND_SINGLE;
|
||
let start_error_count = errors.as_ref().unwrap().len();
|
||
let global_dollar_pos = global_token_pos + dollar_pos;
|
||
let global_after_dollar_pos = global_dollar_pos + 1;
|
||
let char_after_dollar = token.get(dollar_pos + 1).copied().unwrap_or('\0');
|
||
|
||
match char_after_dollar {
|
||
BRACE_BEGIN | '{' => {
|
||
// The BRACE_BEGIN is for unquoted, the { is for quoted. Anyways we have (possible
|
||
// quoted) ${. See if we have a }, and the stuff in between is variable material. If so,
|
||
// report a bracket error. Otherwise just complain about the ${.
|
||
let mut looks_like_variable = false;
|
||
let closing_bracket = token
|
||
.iter()
|
||
.skip(dollar_pos + 2)
|
||
.position(|c| {
|
||
*c == if char_after_dollar == '{' {
|
||
'}'
|
||
} else {
|
||
BRACE_END
|
||
}
|
||
})
|
||
.map(|p| p + dollar_pos + 2);
|
||
let mut var_name = L!("");
|
||
if let Some(var_end) = closing_bracket {
|
||
let var_start = dollar_pos + 2;
|
||
var_name = (&token[var_start..var_end]).into();
|
||
looks_like_variable = valid_var_name(var_name);
|
||
}
|
||
if looks_like_variable {
|
||
if double_quotes {
|
||
append_syntax_error!(
|
||
errors,
|
||
global_after_dollar_pos,
|
||
1,
|
||
ERROR_BRACKETED_VARIABLE_QUOTED1,
|
||
truncate(var_name, var_err_len, None)
|
||
);
|
||
} else {
|
||
append_syntax_error!(
|
||
errors,
|
||
global_after_dollar_pos,
|
||
1,
|
||
ERROR_BRACKETED_VARIABLE1,
|
||
truncate(var_name, var_err_len, None),
|
||
);
|
||
}
|
||
} else {
|
||
append_syntax_error!(errors, global_after_dollar_pos, 1, ERROR_BAD_VAR_CHAR1, '{');
|
||
}
|
||
}
|
||
INTERNAL_SEPARATOR => {
|
||
// e.g.: echo foo"$"baz
|
||
// These are only ever quotes, not command substitutions. Command substitutions are
|
||
// handled earlier.
|
||
append_syntax_error!(errors, global_dollar_pos, 1, ERROR_NO_VAR_NAME);
|
||
}
|
||
'\0' => {
|
||
append_syntax_error!(errors, global_dollar_pos, 1, ERROR_NO_VAR_NAME);
|
||
}
|
||
_ => {
|
||
let mut token_stop_char = char_after_dollar;
|
||
// Unescape (see issue #50).
|
||
if token_stop_char == ANY_CHAR {
|
||
token_stop_char = '?';
|
||
} else if [ANY_STRING, ANY_STRING_RECURSIVE].contains(&token_stop_char) {
|
||
token_stop_char = '*';
|
||
}
|
||
|
||
// Determine which error message to use. The format string may not consume all the
|
||
// arguments we pass but that's harmless.
|
||
append_syntax_error_formatted!(
|
||
errors,
|
||
global_after_dollar_pos,
|
||
1,
|
||
error_for_character(token_stop_char)
|
||
);
|
||
}
|
||
}
|
||
|
||
// We should have appended exactly one error.
|
||
assert!(errors.as_ref().unwrap().len() == start_error_count + 1);
|
||
}
|
||
|
||
localizable_consts!(
|
||
/// Error message for use of backgrounded commands before and/or.
|
||
pub(crate) BOOL_AFTER_BACKGROUND_ERROR_MSG
|
||
"The '%s' command can not be used immediately after a backgrounded job"
|
||
|
||
/// Error message for backgrounded commands as conditionals.
|
||
BACKGROUND_IN_CONDITIONAL_ERROR_MSG
|
||
"Backgrounded commands can not be used as conditionals"
|
||
|
||
/// Error message for arguments to 'end'
|
||
END_ARG_ERR_MSG
|
||
"'end' does not take arguments. Did you forget a ';'?"
|
||
|
||
RIGHT_BRACE_ARG_ERR_MSG
|
||
"'}' does not take arguments. Did you forget a ';'?"
|
||
|
||
/// Error message when 'time' is in a pipeline.
|
||
TIME_IN_PIPELINE_ERR_MSG
|
||
"The 'time' command may only be at the beginning of a pipeline"
|
||
);
|
||
|
||
/// Maximum length of a variable name to show in error reports before truncation
|
||
const var_err_len: usize = 16;
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::{
|
||
BOOL_AFTER_BACKGROUND_ERROR_MSG, parse_util_cmdsubst_extent, parse_util_compute_indents,
|
||
parse_util_detect_errors, parse_util_escape_string_with_quote, parse_util_process_extent,
|
||
parse_util_slice_length,
|
||
};
|
||
use crate::common::EscapeFlags;
|
||
use crate::parse_constants::{
|
||
ERROR_BAD_VAR_CHAR1, ERROR_BRACKETED_VARIABLE_QUOTED1, ERROR_BRACKETED_VARIABLE1,
|
||
ERROR_NO_VAR_NAME, ERROR_NOT_ARGV_AT, ERROR_NOT_ARGV_COUNT, ERROR_NOT_ARGV_STAR,
|
||
ERROR_NOT_PID, ERROR_NOT_STATUS,
|
||
};
|
||
use crate::prelude::*;
|
||
use crate::tests::prelude::*;
|
||
use pcre2::utf32::Regex;
|
||
|
||
#[test]
|
||
#[serial]
|
||
fn test_error_messages() {
|
||
let _cleanup = test_init();
|
||
// Given a format string, returns a list of non-empty strings separated by format specifiers. The
|
||
// format specifiers themselves are omitted.
|
||
fn separate_by_format_specifiers(format: &wstr) -> Vec<&wstr> {
|
||
let format_specifier_regex = Regex::new(L!(r"%[cds]").as_char_slice()).unwrap();
|
||
let mut result = vec![];
|
||
let mut offset = 0;
|
||
for mtch in format_specifier_regex.find_iter(format.as_char_slice()) {
|
||
let mtch = mtch.unwrap();
|
||
let component = &format[offset..mtch.start()];
|
||
result.push(component);
|
||
offset = mtch.end();
|
||
}
|
||
result.push(&format[offset..]);
|
||
// Avoid mismatch from localized quotes.
|
||
for component in &mut result {
|
||
*component = component.trim_matches('\'');
|
||
}
|
||
result
|
||
}
|
||
|
||
// Given a format string 'format', return true if the string may have been produced by that format
|
||
// string. We do this by splitting the format string around the format specifiers, and then ensuring
|
||
// that each of the remaining chunks is found (in order) in the string.
|
||
fn string_matches_format(s: &wstr, format: &wstr) -> bool {
|
||
let components = separate_by_format_specifiers(format);
|
||
assert!(!components.is_empty());
|
||
let mut idx = 0;
|
||
for component in components {
|
||
let Some(relpos) = s[idx..].find(component) else {
|
||
return false;
|
||
};
|
||
idx += relpos + component.len();
|
||
assert!(idx <= s.len());
|
||
}
|
||
true
|
||
}
|
||
|
||
macro_rules! validate {
|
||
($src:expr, $error_text_format:expr) => {
|
||
let mut errors = vec![];
|
||
let res = parse_util_detect_errors(L!($src), Some(&mut errors), false);
|
||
let fmt = wgettext!($error_text_format);
|
||
assert!(res.is_err());
|
||
assert!(
|
||
string_matches_format(&errors[0].text, fmt),
|
||
"command '{}' is expected to match error pattern '{}' but is '{}'",
|
||
$src,
|
||
$error_text_format.localize(),
|
||
&errors[0].text
|
||
);
|
||
};
|
||
}
|
||
|
||
validate!("echo $^", ERROR_BAD_VAR_CHAR1);
|
||
validate!("echo foo${a}bar", ERROR_BRACKETED_VARIABLE1);
|
||
validate!("echo foo\"${a}\"bar", ERROR_BRACKETED_VARIABLE_QUOTED1);
|
||
validate!("echo foo\"${\"bar", ERROR_BAD_VAR_CHAR1);
|
||
validate!("echo $?", ERROR_NOT_STATUS);
|
||
validate!("echo $$", ERROR_NOT_PID);
|
||
validate!("echo $#", ERROR_NOT_ARGV_COUNT);
|
||
validate!("echo $@", ERROR_NOT_ARGV_AT);
|
||
validate!("echo $*", ERROR_NOT_ARGV_STAR);
|
||
validate!("echo $", ERROR_NO_VAR_NAME);
|
||
validate!("echo foo\"$\"bar", ERROR_NO_VAR_NAME);
|
||
validate!("echo \"foo\"$\"bar\"", ERROR_NO_VAR_NAME);
|
||
validate!("echo foo $ bar", ERROR_NO_VAR_NAME);
|
||
validate!("echo 1 & && echo 2", BOOL_AFTER_BACKGROUND_ERROR_MSG);
|
||
validate!(
|
||
"echo 1 && echo 2 & && echo 3",
|
||
BOOL_AFTER_BACKGROUND_ERROR_MSG
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn test_parse_util_process_extent() {
|
||
macro_rules! validate {
|
||
($commandline:literal, $cursor:expr, $expected_range:expr) => {
|
||
assert_eq!(
|
||
parse_util_process_extent(L!($commandline), $cursor, None),
|
||
$expected_range
|
||
);
|
||
};
|
||
}
|
||
validate!("for file in (path base\necho", 22, 13..22);
|
||
validate!("begin\n\n\nec", 10, 6..10);
|
||
validate!("begin; echo; end", 12, 12..16);
|
||
}
|
||
|
||
#[test]
|
||
#[serial]
|
||
fn test_parse_util_cmdsubst_extent() {
|
||
let _cleanup = test_init();
|
||
let a = L!("echo (echo (echo hi");
|
||
assert_eq!(parse_util_cmdsubst_extent(a, 0), 0..a.len());
|
||
assert_eq!(parse_util_cmdsubst_extent(a, 1), 0..a.len());
|
||
assert_eq!(parse_util_cmdsubst_extent(a, 2), 0..a.len());
|
||
assert_eq!(parse_util_cmdsubst_extent(a, 3), 0..a.len());
|
||
assert_eq!(
|
||
parse_util_cmdsubst_extent(a, 8),
|
||
"echo (".chars().count()..a.len()
|
||
);
|
||
assert_eq!(
|
||
parse_util_cmdsubst_extent(a, 17),
|
||
"echo (echo (".chars().count()..a.len()
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
#[serial]
|
||
fn test_parse_util_slice_length() {
|
||
let _cleanup = test_init();
|
||
assert_eq!(parse_util_slice_length(L!("[2]")), Some(3));
|
||
assert_eq!(parse_util_slice_length(L!("[12]")), Some(4));
|
||
assert_eq!(parse_util_slice_length(L!("[\"foo\"]")), Some(7));
|
||
assert_eq!(parse_util_slice_length(L!("[\"foo\"")), None);
|
||
}
|
||
|
||
#[test]
|
||
#[serial]
|
||
fn test_escape_quotes() {
|
||
let _cleanup = test_init();
|
||
macro_rules! validate {
|
||
($cmd:expr, $quote:expr, $no_tilde:expr, $expected:expr) => {
|
||
assert_eq!(
|
||
parse_util_escape_string_with_quote(
|
||
L!($cmd),
|
||
$quote,
|
||
if $no_tilde {
|
||
EscapeFlags::NO_TILDE
|
||
} else {
|
||
EscapeFlags::empty()
|
||
}
|
||
),
|
||
L!($expected)
|
||
);
|
||
};
|
||
}
|
||
macro_rules! validate_no_quoted {
|
||
($cmd:expr, $quote:expr, $no_tilde:expr, $expected:expr) => {
|
||
assert_eq!(
|
||
parse_util_escape_string_with_quote(
|
||
L!($cmd),
|
||
$quote,
|
||
EscapeFlags::NO_QUOTED
|
||
| if $no_tilde {
|
||
EscapeFlags::NO_TILDE
|
||
} else {
|
||
EscapeFlags::empty()
|
||
}
|
||
),
|
||
L!($expected)
|
||
);
|
||
};
|
||
}
|
||
|
||
validate!("abc~def", None, false, "'abc~def'");
|
||
validate!("abc~def", None, true, "abc~def");
|
||
validate!("~abc", None, false, "'~abc'");
|
||
validate!("~abc", None, true, "~abc");
|
||
|
||
// These are "raw string literals"
|
||
validate_no_quoted!("abc", None, false, "abc");
|
||
validate_no_quoted!("abc~def", None, false, "abc\\~def");
|
||
validate_no_quoted!("abc~def", None, true, "abc~def");
|
||
validate_no_quoted!("abc\\~def", None, false, "abc\\\\\\~def");
|
||
validate_no_quoted!("abc\\~def", None, true, "abc\\\\~def");
|
||
validate_no_quoted!("~abc", None, false, "\\~abc");
|
||
validate_no_quoted!("~abc", None, true, "~abc");
|
||
validate_no_quoted!("~abc|def", None, false, "\\~abc\\|def");
|
||
validate_no_quoted!("|abc~def", None, false, "\\|abc\\~def");
|
||
validate_no_quoted!("|abc~def", None, true, "\\|abc~def");
|
||
validate_no_quoted!("foo\nbar", None, false, "foo\\nbar");
|
||
|
||
// Note tildes are not expanded inside quotes, so no_tilde is ignored with a quote.
|
||
validate_no_quoted!("abc", Some('\''), false, "abc");
|
||
validate_no_quoted!("abc\\def", Some('\''), false, "abc\\\\def");
|
||
validate_no_quoted!("abc'def", Some('\''), false, "abc\\'def");
|
||
validate_no_quoted!("~abc'def", Some('\''), false, "~abc\\'def");
|
||
validate_no_quoted!("~abc'def", Some('\''), true, "~abc\\'def");
|
||
validate_no_quoted!("foo\nba'r", Some('\''), false, "foo'\\n'ba\\'r");
|
||
validate_no_quoted!("foo\\\\bar", Some('\''), false, "foo\\\\\\\\bar");
|
||
|
||
validate_no_quoted!("abc", Some('"'), false, "abc");
|
||
validate_no_quoted!("abc\\def", Some('"'), false, "abc\\\\def");
|
||
validate_no_quoted!("~abc'def", Some('"'), false, "~abc'def");
|
||
validate_no_quoted!("~abc'def", Some('"'), true, "~abc'def");
|
||
validate_no_quoted!("foo\nba'r", Some('"'), false, "foo\"\\n\"ba'r");
|
||
validate_no_quoted!("foo\\\\bar", Some('"'), false, "foo\\\\\\\\bar");
|
||
}
|
||
|
||
#[test]
|
||
#[serial]
|
||
fn test_indents() {
|
||
let _cleanup = test_init();
|
||
// A struct which is either text or a new indent.
|
||
struct Segment {
|
||
// The indent to set
|
||
indent: i32,
|
||
text: &'static str,
|
||
}
|
||
fn do_validate(segments: &[Segment]) {
|
||
// Compute the indents.
|
||
let mut expected_indents = vec![];
|
||
let mut text = WString::new();
|
||
for segment in segments {
|
||
text.push_str(segment.text);
|
||
for _ in segment.text.chars() {
|
||
expected_indents.push(segment.indent);
|
||
}
|
||
}
|
||
let indents = parse_util_compute_indents(&text);
|
||
assert_eq!(indents, expected_indents);
|
||
}
|
||
macro_rules! validate {
|
||
( $( $(,)? $indent:literal, $text:literal )* $(,)? ) => {
|
||
let segments = vec![
|
||
$(
|
||
Segment{ indent: $indent, text: $text },
|
||
)*
|
||
];
|
||
do_validate(&segments);
|
||
};
|
||
}
|
||
|
||
#[rustfmt::skip]
|
||
#[allow(clippy::redundant_closure_call)]
|
||
(|| {
|
||
validate!(
|
||
0, "if", 1, " foo",
|
||
0, "\nend"
|
||
);
|
||
validate!(
|
||
0, "if", 1, " foo",
|
||
1, "\nfoo",
|
||
0, "\nend"
|
||
);
|
||
|
||
validate!(
|
||
0, "if", 1, " foo",
|
||
1, "\nif", 2, " bar",
|
||
1, "\nend",
|
||
0, "\nend"
|
||
);
|
||
|
||
validate!(
|
||
0, "if", 1, " foo",
|
||
1, "\nif", 2, " bar",
|
||
2, "\n",
|
||
1, "\nend\n"
|
||
);
|
||
|
||
validate!(
|
||
0, "if", 1, " foo",
|
||
1, "\nif", 2, " bar",
|
||
2, "\n"
|
||
);
|
||
|
||
validate!(
|
||
0, "begin",
|
||
1, "\nfoo",
|
||
1, "\n"
|
||
);
|
||
|
||
validate!(
|
||
0, "begin",
|
||
1, "\n;",
|
||
0, "end",
|
||
0, "\nfoo", 0, "\n"
|
||
);
|
||
|
||
validate!(
|
||
0, "begin",
|
||
1, "\n;",
|
||
0, "end",
|
||
0, "\nfoo", 0, "\n"
|
||
);
|
||
|
||
validate!(
|
||
0, "if", 1, " foo",
|
||
1, "\nif", 2, " bar",
|
||
2, "\nbaz",
|
||
1, "\nend", 1, "\n"
|
||
);
|
||
|
||
validate!(
|
||
0, "switch foo",
|
||
1, "\n"
|
||
);
|
||
|
||
validate!(
|
||
0, "switch foo",
|
||
1, "\ncase bar",
|
||
1, "\ncase baz",
|
||
2, "\nquux",
|
||
2, "\nquux"
|
||
);
|
||
|
||
validate!(
|
||
0,
|
||
"switch foo",
|
||
1,
|
||
"\ncas" // parse error indentation handling
|
||
);
|
||
|
||
validate!(
|
||
0, "while",
|
||
1, " false",
|
||
1, "\n# comment", // comment indentation handling
|
||
1, "\ncommand",
|
||
1, "\n# comment 2"
|
||
);
|
||
|
||
validate!(
|
||
0, "begin",
|
||
1, "\n", // "begin" is special because this newline belongs to the block header
|
||
1, "\n"
|
||
);
|
||
|
||
// Continuation lines.
|
||
validate!(
|
||
0, "echo 'continuation line' \\",
|
||
1, "\ncont",
|
||
0, "\n"
|
||
);
|
||
validate!(
|
||
0, "echo 'empty continuation line' \\",
|
||
1, "\n"
|
||
);
|
||
validate!(
|
||
0, "begin # continuation line in block",
|
||
1, "\necho \\",
|
||
2, "\ncont"
|
||
);
|
||
validate!(
|
||
0, "begin # empty continuation line in block",
|
||
1, "\necho \\",
|
||
2, "\n",
|
||
0, "\nend"
|
||
);
|
||
validate!(
|
||
0, "echo 'multiple continuation lines' \\",
|
||
1, "\nline1 \\",
|
||
1, "\n# comment",
|
||
1, "\n# more comment",
|
||
1, "\nline2 \\",
|
||
1, "\n"
|
||
);
|
||
validate!(
|
||
0, "echo # inline comment ending in \\",
|
||
0, "\nline"
|
||
);
|
||
validate!(
|
||
0, "# line comment ending in \\",
|
||
0, "\nline"
|
||
);
|
||
validate!(
|
||
0, "echo 'multiple empty continuation lines' \\",
|
||
1, "\n\\",
|
||
1, "\n",
|
||
0, "\n"
|
||
);
|
||
validate!(
|
||
0, "echo 'multiple statements with continuation lines' \\",
|
||
1, "\nline 1",
|
||
0, "\necho \\",
|
||
1, "\n"
|
||
);
|
||
// This is an edge case, probably okay to change the behavior here.
|
||
validate!(
|
||
0, "begin",
|
||
1, " \\",
|
||
2, "\necho 'continuation line in block header' \\",
|
||
2, "\n",
|
||
1, "\n",
|
||
0, "\nend"
|
||
);
|
||
validate!(
|
||
0, "if", 1, " true",
|
||
1, "\n begin",
|
||
2, "\n echo",
|
||
1, "\n end",
|
||
0, "\nend",
|
||
);
|
||
|
||
// Quotes and command substitutions.
|
||
validate!(
|
||
0, "if", 1, " foo \"",
|
||
0, "\nquoted",
|
||
);
|
||
validate!(
|
||
0, "if", 1, " foo \"",
|
||
0, "\n",
|
||
);
|
||
validate!(
|
||
0, "echo (",
|
||
1, "\n", // )
|
||
);
|
||
validate!(
|
||
0, "echo \"$(",
|
||
1, "\n" // )
|
||
);
|
||
validate!(
|
||
0, "echo (", // )
|
||
1, "\necho \"",
|
||
0, "\n"
|
||
);
|
||
validate!(
|
||
0, "echo (", // )
|
||
1, "\necho (", // )
|
||
2, "\necho"
|
||
);
|
||
validate!(
|
||
0, "if", 1, " true",
|
||
1, "\n echo \"line1",
|
||
0, "\nline2 ", 1, "$(",
|
||
2, "\n echo line3",
|
||
0, "\n) line4",
|
||
0, "\nline5\"",
|
||
);
|
||
validate!(
|
||
0, r#"echo "$()"'"#,
|
||
0, "\n"
|
||
);
|
||
validate!(
|
||
0, r#"""#,
|
||
0, "\n",
|
||
0, r#"$()"$() ""#
|
||
);
|
||
})();
|
||
}
|
||
}
|