mirror of
https://github.com/fish-shell/fish-shell.git
synced 2026-05-31 03:51:14 -03:00
Correct the positioning of the error caret
When an error occurs midway through a token, like abc(def, make the caret point at the location of the error (i.e. the paren) instead of at the beginning of the token.
This commit is contained in:
@@ -472,6 +472,26 @@ static void test_tok()
|
||||
err(L"Too few tokens returned from tokenizer");
|
||||
}
|
||||
}
|
||||
|
||||
/* Test some errors */
|
||||
{
|
||||
tok_t token;
|
||||
tokenizer_t t(L"abc\\", 0);
|
||||
do_test(t.next(&token));
|
||||
do_test(token.type == TOK_ERROR);
|
||||
do_test(token.error == TOK_UNTERMINATED_ESCAPE);
|
||||
do_test(token.error_offset == 3);
|
||||
}
|
||||
|
||||
{
|
||||
tok_t token;
|
||||
tokenizer_t t(L"abc defg(hij (klm)", 0);
|
||||
do_test(t.next(&token));
|
||||
do_test(t.next(&token));
|
||||
do_test(token.type == TOK_ERROR);
|
||||
do_test(token.error == TOK_UNTERMINATED_SUBSHELL);
|
||||
do_test(token.error_offset == 4);
|
||||
}
|
||||
|
||||
/* Test redirection_type_for_string */
|
||||
if (redirection_type_for_string(L"<") != TOK_REDIRECT_IN) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
|
||||
|
||||
@@ -641,7 +641,7 @@ public:
|
||||
void accept_tokens(parse_token_t token1, parse_token_t token2);
|
||||
|
||||
/* Report tokenizer errors */
|
||||
void report_tokenizer_error(parse_token_t token, int tok_err, const wcstring &tok_error);
|
||||
void report_tokenizer_error(const tok_t &tok);
|
||||
|
||||
/* Indicate if we hit a fatal error */
|
||||
bool has_fatal_error(void) const
|
||||
@@ -896,10 +896,10 @@ void parse_ll_t::parse_error_failed_production(struct parse_stack_element_t &sta
|
||||
}
|
||||
}
|
||||
|
||||
void parse_ll_t::report_tokenizer_error(parse_token_t token, int tok_err_code, const wcstring &tok_error)
|
||||
void parse_ll_t::report_tokenizer_error(const tok_t &tok)
|
||||
{
|
||||
parse_error_code_t parse_error_code;
|
||||
switch (tok_err_code)
|
||||
switch (tok.error)
|
||||
{
|
||||
case TOK_UNTERMINATED_QUOTE:
|
||||
parse_error_code = parse_error_tokenizer_unterminated_quote;
|
||||
@@ -919,7 +919,7 @@ void parse_ll_t::report_tokenizer_error(parse_token_t token, int tok_err_code, c
|
||||
break;
|
||||
|
||||
}
|
||||
this->parse_error(token, parse_error_code, L"%ls", tok_error.c_str());
|
||||
this->parse_error_at_location(tok.offset + tok.error_offset, parse_error_code, L"%ls", tok.text.c_str());
|
||||
}
|
||||
|
||||
void parse_ll_t::parse_error_unexpected_token(const wchar_t *expected, parse_token_t token)
|
||||
@@ -1336,7 +1336,7 @@ bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags,
|
||||
/* Handle tokenizer errors. This is a hack because really the parser should report this for itself; but it has no way of getting the tokenizer message */
|
||||
if (queue[1].type == parse_special_type_tokenizer_error)
|
||||
{
|
||||
parser.report_tokenizer_error(queue[1], tokenizer_token.error, tokenizer_token.text);
|
||||
parser.report_tokenizer_error(tokenizer_token);
|
||||
}
|
||||
|
||||
/* Handle errors */
|
||||
|
||||
@@ -21,7 +21,7 @@ segments.
|
||||
#include "tokenizer.h"
|
||||
|
||||
/* Wow what a hack */
|
||||
#define TOK_CALL_ERROR(t, e, x) do { (t)->call_error((e), (t)->squash_errors ? L"" : (x)); } while (0)
|
||||
#define TOK_CALL_ERROR(t, e, x, where) do { (t)->call_error((e), where, (t)->squash_errors ? L"" : (x)); } while (0)
|
||||
|
||||
/**
|
||||
Error string for unexpected end of string
|
||||
@@ -38,6 +38,12 @@ segments.
|
||||
*/
|
||||
#define SQUARE_BRACKET_ERROR _( L"Unexpected end of string, square brackets do not match" )
|
||||
|
||||
/**
|
||||
Error string for unterminated escape (backslash without continuation)
|
||||
*/
|
||||
#define UNTERMINATED_ESCAPE_ERROR _( L"Unexpected end of string, incomplete escape sequence" )
|
||||
|
||||
|
||||
|
||||
/**
|
||||
Error string for invalid redirections
|
||||
@@ -52,14 +58,15 @@ segments.
|
||||
/**
|
||||
Set the latest tokens string to be the specified error message
|
||||
*/
|
||||
void tokenizer_t::call_error(enum tokenizer_error error_type, const wchar_t *error_message)
|
||||
void tokenizer_t::call_error(enum tokenizer_error error_type, const wchar_t *where, const wchar_t *error_message)
|
||||
{
|
||||
this->last_type = TOK_ERROR;
|
||||
this->error = error_type;
|
||||
this->global_error_offset = where ? where - this->orig_buff : 0;
|
||||
this->last_token = error_message;
|
||||
}
|
||||
|
||||
tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(b), orig_buff(b), last_type(TOK_NONE), last_pos(0), has_next(false), accept_unfinished(false), show_comments(false), show_blank_lines(false), error(TOK_ERROR_NONE), squash_errors(false), continue_line_after_comment(false)
|
||||
tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(b), orig_buff(b), last_type(TOK_NONE), last_pos(0), has_next(false), accept_unfinished(false), show_comments(false), show_blank_lines(false), error(TOK_ERROR_NONE), global_error_offset(-1), squash_errors(false), continue_line_after_comment(false)
|
||||
{
|
||||
assert(b != NULL);
|
||||
|
||||
@@ -79,14 +86,23 @@ bool tokenizer_t::next(struct tok_t *result)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t current_pos = this->buff - this->orig_buff;
|
||||
|
||||
result->text = this->last_token;
|
||||
result->type = this->last_type;
|
||||
result->offset = last_pos;
|
||||
result->offset = this->last_pos;
|
||||
result->error = this->last_type == TOK_ERROR ? this->error : TOK_ERROR_NONE;
|
||||
assert(this->buff >= this->orig_buff);
|
||||
|
||||
/* Compute error offset */
|
||||
result->error_offset = 0;
|
||||
if (this->last_type == TOK_ERROR && this->global_error_offset >= this->last_pos && this->global_error_offset < current_pos)
|
||||
{
|
||||
result->error_offset = this->global_error_offset - this->last_pos;
|
||||
}
|
||||
|
||||
assert(this->buff >= this->orig_buff);
|
||||
size_t current_pos = this->buff - this->orig_buff;
|
||||
result->length = current_pos >= this->last_pos ? current_pos - this->last_pos : 0;
|
||||
|
||||
this->tok_next();
|
||||
@@ -140,12 +156,15 @@ static int myal(wchar_t c)
|
||||
*/
|
||||
void tokenizer_t::read_string()
|
||||
{
|
||||
const wchar_t *start;
|
||||
long len;
|
||||
int do_loop=1;
|
||||
int paran_count=0;
|
||||
size_t paran_count=0;
|
||||
|
||||
// up to 96 open parens, before we give up on good error reporting
|
||||
const size_t paran_offsets_max = 96;
|
||||
size_t paran_offsets[paran_offsets_max];
|
||||
|
||||
start = this->buff;
|
||||
const wchar_t * const start = this->buff;
|
||||
bool is_first = true;
|
||||
|
||||
enum tok_mode_t
|
||||
@@ -162,12 +181,13 @@ void tokenizer_t::read_string()
|
||||
{
|
||||
if (*this->buff == L'\\')
|
||||
{
|
||||
const wchar_t *error_location = this->buff;
|
||||
this->buff++;
|
||||
if (*this->buff == L'\0')
|
||||
{
|
||||
if ((!this->accept_unfinished))
|
||||
{
|
||||
TOK_CALL_ERROR(this, TOK_UNTERMINATED_ESCAPE, QUOTE_ERROR);
|
||||
TOK_CALL_ERROR(this, TOK_UNTERMINATED_ESCAPE, UNTERMINATED_ESCAPE_ERROR, error_location);
|
||||
return;
|
||||
}
|
||||
else
|
||||
@@ -191,6 +211,7 @@ void tokenizer_t::read_string()
|
||||
case L'(':
|
||||
{
|
||||
paran_count=1;
|
||||
paran_offsets[0] = this->buff - this->orig_buff;
|
||||
mode = mode_subshell;
|
||||
break;
|
||||
}
|
||||
@@ -213,11 +234,12 @@ void tokenizer_t::read_string()
|
||||
}
|
||||
else
|
||||
{
|
||||
const wchar_t *error_loc = this->buff;
|
||||
this->buff += wcslen(this->buff);
|
||||
|
||||
if (! this->accept_unfinished)
|
||||
{
|
||||
TOK_CALL_ERROR(this, TOK_UNTERMINATED_QUOTE, QUOTE_ERROR);
|
||||
TOK_CALL_ERROR(this, TOK_UNTERMINATED_QUOTE, QUOTE_ERROR, error_loc);
|
||||
return;
|
||||
}
|
||||
do_loop = 0;
|
||||
@@ -239,6 +261,7 @@ void tokenizer_t::read_string()
|
||||
|
||||
case mode_array_brackets_and_subshell:
|
||||
case mode_subshell:
|
||||
{
|
||||
switch (*this->buff)
|
||||
{
|
||||
case L'\'':
|
||||
@@ -251,10 +274,11 @@ void tokenizer_t::read_string()
|
||||
}
|
||||
else
|
||||
{
|
||||
const wchar_t *error_loc = this->buff;
|
||||
this->buff += wcslen(this->buff);
|
||||
if ((!this->accept_unfinished))
|
||||
{
|
||||
TOK_CALL_ERROR(this, TOK_UNTERMINATED_QUOTE, QUOTE_ERROR);
|
||||
TOK_CALL_ERROR(this, TOK_UNTERMINATED_QUOTE, QUOTE_ERROR, error_loc);
|
||||
return;
|
||||
}
|
||||
do_loop = 0;
|
||||
@@ -264,9 +288,14 @@ void tokenizer_t::read_string()
|
||||
}
|
||||
|
||||
case L'(':
|
||||
if (paran_count < paran_offsets_max)
|
||||
{
|
||||
paran_offsets[paran_count] = this->buff - this->orig_buff;
|
||||
}
|
||||
paran_count++;
|
||||
break;
|
||||
case L')':
|
||||
assert(paran_count > 0);
|
||||
paran_count--;
|
||||
if (paran_count == 0)
|
||||
{
|
||||
@@ -278,12 +307,15 @@ void tokenizer_t::read_string()
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case mode_array_brackets:
|
||||
{
|
||||
switch (*this->buff)
|
||||
{
|
||||
case L'(':
|
||||
paran_count=1;
|
||||
paran_offsets[0] = this->buff - this->orig_buff;
|
||||
mode = mode_array_brackets_and_subshell;
|
||||
break;
|
||||
|
||||
@@ -296,6 +328,7 @@ void tokenizer_t::read_string()
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -312,12 +345,27 @@ void tokenizer_t::read_string()
|
||||
switch (mode)
|
||||
{
|
||||
case mode_subshell:
|
||||
TOK_CALL_ERROR(this, TOK_UNTERMINATED_SUBSHELL, PARAN_ERROR);
|
||||
{
|
||||
// Determine the innermost opening paran offset by interrogating paran_offsets
|
||||
assert(paran_count > 0);
|
||||
size_t offset_of_open_paran = 0;
|
||||
if (paran_count <= paran_offsets_max)
|
||||
{
|
||||
offset_of_open_paran = paran_offsets[paran_count - 1];
|
||||
}
|
||||
|
||||
TOK_CALL_ERROR(this, TOK_UNTERMINATED_SUBSHELL, PARAN_ERROR, this->orig_buff + offset_of_open_paran);
|
||||
break;
|
||||
}
|
||||
|
||||
case mode_array_brackets:
|
||||
case mode_array_brackets_and_subshell:
|
||||
TOK_CALL_ERROR(this, TOK_UNTERMINATED_SUBSHELL, SQUARE_BRACKET_ERROR); // TOK_UNTERMINATED_SUBSHELL is a lie but nobody actually looks at it
|
||||
{
|
||||
size_t offset_of_bracket = 0;
|
||||
TOK_CALL_ERROR(this, TOK_UNTERMINATED_SUBSHELL, SQUARE_BRACKET_ERROR, this->orig_buff + offset_of_bracket); // TOK_UNTERMINATED_SUBSHELL is a lie but nobody actually looks at it
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
assert(0 && "Unexpected mode in read_string");
|
||||
break;
|
||||
@@ -612,7 +660,7 @@ void tokenizer_t::tok_next()
|
||||
size_t consumed = read_redirection_or_fd_pipe(this->buff, &mode, &fd);
|
||||
if (consumed == 0 || fd < 0)
|
||||
{
|
||||
TOK_CALL_ERROR(this, TOK_OTHER, REDIRECT_ERROR);
|
||||
TOK_CALL_ERROR(this, TOK_OTHER, REDIRECT_ERROR, this->buff);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -626,6 +674,7 @@ void tokenizer_t::tok_next()
|
||||
default:
|
||||
{
|
||||
/* Maybe a redirection like '2>&1', maybe a pipe like 2>|, maybe just a string */
|
||||
const wchar_t *error_location = this->buff;
|
||||
size_t consumed = 0;
|
||||
enum token_type mode = TOK_NONE;
|
||||
int fd = -1;
|
||||
@@ -637,7 +686,7 @@ void tokenizer_t::tok_next()
|
||||
/* It looks like a redirection or a pipe. But we don't support piping fd 0. Note that fd 0 may be -1, indicating overflow; but we don't treat that as a tokenizer error. */
|
||||
if (mode == TOK_PIPE && fd == 0)
|
||||
{
|
||||
TOK_CALL_ERROR(this, TOK_OTHER, PIPE_ERROR);
|
||||
TOK_CALL_ERROR(this, TOK_OTHER, PIPE_ERROR, error_location);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
@@ -79,13 +79,16 @@ struct tok_t
|
||||
/* If an error, this is the error code */
|
||||
enum tokenizer_error error;
|
||||
|
||||
/* If an error, this is the offset of the error within the token. A value of 0 means it occurred at 'offset' */
|
||||
size_t error_offset;
|
||||
|
||||
/* Offset of the token */
|
||||
size_t offset;
|
||||
|
||||
/* Length of the token */
|
||||
size_t length;
|
||||
|
||||
tok_t() : type(TOK_NONE), error(TOK_ERROR_NONE), offset(-1), length(-1) {}
|
||||
tok_t() : type(TOK_NONE), error(TOK_ERROR_NONE), error_offset(-1), offset(-1), length(-1) {}
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -119,13 +122,15 @@ class tokenizer_t
|
||||
bool show_blank_lines;
|
||||
/** Last error */
|
||||
tokenizer_error error;
|
||||
/** Last error offset, in "global" coordinates (relative to orig_buff) */
|
||||
size_t global_error_offset;
|
||||
/* Whether we are squashing errors */
|
||||
bool squash_errors;
|
||||
|
||||
/* Whether to continue the previous line after the comment */
|
||||
bool continue_line_after_comment;
|
||||
|
||||
void call_error(enum tokenizer_error error_type, const wchar_t *error_message);
|
||||
void call_error(enum tokenizer_error error_type, const wchar_t *where, const wchar_t *error_message);
|
||||
void read_string();
|
||||
void read_comment();
|
||||
void tok_next();
|
||||
|
||||
Reference in New Issue
Block a user