From d5d9b9284ad990c69d1fa17294d00a81d4cfc9b6 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Wed, 11 Dec 2013 18:34:28 -0800 Subject: [PATCH] Initial work towards rewriting detect_errors to use new parser. Low-level tests currently pass; high level tests fail. --- highlight.cpp | 3 +- parse_constants.h | 2 + parse_productions.cpp | 15 ++- parse_tree.cpp | 84 +++++++++++++++-- parse_tree.h | 26 ++++-- parser.cpp | 209 +++++++++++++++++++++++++++++++++++++++++- parser.h | 5 +- 7 files changed, 316 insertions(+), 28 deletions(-) diff --git a/highlight.cpp b/highlight.cpp index 23fe912b0..3c60150cd 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -2089,8 +2089,9 @@ const highlighter_t::color_array_t & highlighter_t::highlight() case symbol_decorated_statement: case symbol_if_statement: { - // Color the 'end' this->color_children(node, parse_token_type_string, HIGHLIGHT_COMMAND); + // Color the 'end' + this->color_children(node, symbol_end_command, HIGHLIGHT_COMMAND); } break; diff --git a/parse_constants.h b/parse_constants.h index d9f362120..706c31b84 100644 --- a/parse_constants.h +++ b/parse_constants.h @@ -47,6 +47,8 @@ enum parse_token_type_t symbol_redirection, symbol_optional_background, + + symbol_end_command, // Terminal types parse_token_type_string, diff --git a/parse_productions.cpp b/parse_productions.cpp index 9053c5da7..f79e945ae 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -187,7 +187,7 @@ RESOLVE(statement) PRODUCTIONS(if_statement) = { - {symbol_if_clause, symbol_else_clause, KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list} + {symbol_if_clause, symbol_else_clause, symbol_end_command, symbol_arguments_or_redirections_list} }; RESOLVE_ONLY(if_statement) @@ -231,7 +231,7 @@ RESOLVE(else_continuation) PRODUCTIONS(switch_statement) = { - { KEYWORD(parse_keyword_switch), parse_token_type_string, parse_token_type_end, symbol_case_item_list, KEYWORD(parse_keyword_end)} + { KEYWORD(parse_keyword_switch), parse_token_type_string, parse_token_type_end, symbol_case_item_list, symbol_end_command} }; RESOLVE_ONLY(switch_statement) @@ -272,7 +272,7 @@ RESOLVE(argument_list) PRODUCTIONS(block_statement) = { - {symbol_block_header, parse_token_type_end, symbol_job_list, KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list} + {symbol_block_header, parse_token_type_end, symbol_job_list, symbol_end_command, symbol_arguments_or_redirections_list} }; RESOLVE_ONLY(block_statement) @@ -287,8 +287,6 @@ RESOLVE(block_header) { switch (token1.keyword) { - case parse_keyword_else: - return NO_PRODUCTION; case parse_keyword_for: return 0; case parse_keyword_while: @@ -443,6 +441,12 @@ RESOLVE(optional_background) } } +PRODUCTIONS(end_command) = +{ + {KEYWORD(parse_keyword_end)} +}; +RESOLVE_ONLY(end_command) + #define TEST(sym) case (symbol_##sym): production_list = & productions_ ## sym ; resolver = resolve_ ## sym ; break; const production_t *parse_productions::production_for_token(parse_token_type_t node_type, const parse_token_t &input1, const parse_token_t &input2, production_option_idx_t *out_which_production, wcstring *out_error_text) { @@ -483,6 +487,7 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n TEST(argument) TEST(redirection) TEST(optional_background) + TEST(end_command) case parse_token_type_string: case parse_token_type_pipe: diff --git a/parse_tree.cpp b/parse_tree.cpp index 3521dedfe..b8cb348c0 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -110,6 +110,10 @@ wcstring token_type_description(parse_token_type_t type) return L"symbol_argument"; case symbol_redirection: return L"symbol_redirection"; + case symbol_optional_background: + return L"optional_background"; + case symbol_end_command: + return L"symbol_end_command"; case parse_token_type_string: @@ -124,8 +128,7 @@ wcstring token_type_description(parse_token_type_t type) return L"token_end"; case parse_token_type_terminate: return L"token_terminate"; - case symbol_optional_background: - return L"optional_background"; + case parse_special_type_parse_error: return L"parse_error"; @@ -1057,21 +1060,37 @@ const parse_node_t *parse_node_tree_t::get_parent(const parse_node_t &node, pars return result; } -static void find_nodes_recursive(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, parse_node_tree_t::parse_node_list_t *result) +const parse_node_t *parse_node_tree_t::get_first_ancestor_of_type(const parse_node_t &node, parse_token_type_t desired_type) const { - if (parent.type == type) result->push_back(&parent); - for (size_t i=0; i < parent.child_count; i++) + const parse_node_t *ancestor = &node; + while ((ancestor = this->get_parent(*ancestor))) { - const parse_node_t *child = tree.get_child(parent, i); - assert(child != NULL); - find_nodes_recursive(tree, *child, type, result); + if (ancestor->type == desired_type) + { + break; + } + } + return ancestor; +} + +static void find_nodes_recursive(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, parse_node_tree_t::parse_node_list_t *result, size_t max_count) +{ + if (result->size() < max_count) + { + if (parent.type == type) result->push_back(&parent); + for (size_t i=0; i < parent.child_count; i++) + { + const parse_node_t *child = tree.get_child(parent, i); + assert(child != NULL); + find_nodes_recursive(tree, *child, type, result, max_count); + } } } -parse_node_tree_t::parse_node_list_t parse_node_tree_t::find_nodes(const parse_node_t &parent, parse_token_type_t type) const +parse_node_tree_t::parse_node_list_t parse_node_tree_t::find_nodes(const parse_node_t &parent, parse_token_type_t type, size_t max_count) const { parse_node_list_t result; - find_nodes_recursive(*this, parent, type, &result); + find_nodes_recursive(*this, parent, type, &result, max_count); return result; } @@ -1188,6 +1207,37 @@ bool parse_node_tree_t::command_for_plain_statement(const parse_node_t &node, co return result; } +bool parse_node_tree_t::plain_statement_is_in_pipeline(const parse_node_t &node, bool include_first) const +{ + // Moderately nasty hack! Walk up our ancestor chain and see if we are in a job_continuation. This checks if we are in the second or greater element in a pipeline; if we are the first element we treat this as false + bool result = false; + const parse_node_t *ancestor = &node; + + if (ancestor) + ancestor = this->get_parent(*ancestor, symbol_decorated_statement); + if (ancestor) + ancestor = this->get_parent(*ancestor, symbol_statement); + if (ancestor) + ancestor = this->get_parent(*ancestor); + + if (ancestor) + { + if (ancestor->type == symbol_job_continuation) + { + // Second or more in a pipeline + result = true; + } + else if (ancestor->type == symbol_job && include_first) + { + // Check to see if we have a job continuation that's not empty + const parse_node_t *continuation = this->get_child(*ancestor, 1, symbol_job_continuation); + result = (continuation != NULL && continuation->child_count > 0); + } + } + + return result; +} + enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redirection_node, const wcstring &src, wcstring *out_target) const { assert(redirection_node.type == symbol_redirection); @@ -1205,3 +1255,17 @@ enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redi } return result; } + +const parse_node_t *parse_node_tree_t::header_node_for_block_statement(const parse_node_t &node) +{ + const parse_node_t *result = NULL; + if (node.type == symbol_block_statement) + { + const parse_node_t *block_header = this->get_child(node, 0, symbol_block_header); + if (block_header != NULL) + { + result = this->get_child(*block_header, 0); + } + } + return result; +} diff --git a/parse_tree.h b/parse_tree.h index 6c8f20f73..e65d1bafd 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -159,15 +159,19 @@ class parse_node_tree_t : public std::vector { public: - /* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. */ - const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; + /* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. + */ + const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; /* Get the node corresponding to the parent of the given node, or NULL if there is no such child. If expected_type is provided, only returns the parent if it is of that type. Note the asymmetry: get_child asserts since the children are known, but get_parent does not, since the parent may not be known. */ const parse_node_t *get_parent(const parse_node_t &node, parse_token_type_t expected_type = token_type_invalid) const; + + /* Returns the first ancestor of the given type, or NULL. */ + const parse_node_t *get_first_ancestor_of_type(const parse_node_t &node, parse_token_type_t desired_type) const; - /* Find all the nodes of a given type underneath a given node */ + /* Find all the nodes of a given type underneath a given node, up to max_count of them */ typedef std::vector parse_node_list_t; - parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const; + parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type, size_t max_count = (size_t)(-1)) const; /* Finds the last node of a given type underneath a given node, or NULL if it could not be found. If parent is NULL, this finds the last node in the tree of that type. */ const parse_node_t *find_last_node_of_type(parse_token_type_t type, const parse_node_t *parent = NULL) const; @@ -186,8 +190,14 @@ class parse_node_tree_t : public std::vector /* Given a plain statement, get the command by reference (from the child node). Returns true if successful. Clears the command on failure. */ bool command_for_plain_statement(const parse_node_t &node, const wcstring &src, wcstring *out_cmd) const; + /* Given a plain statement, return true if the statement is part of a pipeline. If include_first is set, the first command in a pipeline is considered part of it; otherwise only the second or additional commands are */ + bool plain_statement_is_in_pipeline(const parse_node_t &node, bool include_first) const; + /* Given a redirection, get the redirection type (or TOK_NONE) and target (file path, or fd) */ enum token_type type_for_redirection(const parse_node_t &node, const wcstring &src, wcstring *out_target) const; + + /* If the given node is a block statement, returns the header node (for_header, while_header, begin_header, or function_header). Otherwise returns NULL */ + const parse_node_t *header_node_for_block_statement(const parse_node_t &node); }; /* Fish grammar: @@ -210,19 +220,19 @@ class parse_node_tree_t : public std::vector # A block is a conditional, loop, or begin/end - if_statement = if_clause else_clause arguments_or_redirections_list + if_statement = if_clause else_clause end_command arguments_or_redirections_list if_clause = job STATEMENT_TERMINATOR job_list else_clause = | else_continuation else_continuation = if_clause else_clause | STATEMENT_TERMINATOR job_list - switch_statement = SWITCH STATEMENT_TERMINATOR case_item_list + switch_statement = SWITCH STATEMENT_TERMINATOR case_item_list end_command case_item_list = | case_item case_item_list case_item = CASE argument_list STATEMENT_TERMINATOR job_list - block_statement = block_header job_list arguments_or_redirections_list + block_statement = block_header job_list end_command arguments_or_redirections_list block_header = for_header | while_header | function_header | begin_header for_header = FOR var_name IN arguments_or_redirections_list while_header = WHILE statement @@ -252,6 +262,8 @@ class parse_node_tree_t : public std::vector terminator = | optional_background = | + + end_command = END */ diff --git a/parser.cpp b/parser.cpp index 221b93dac..6053b4a34 100644 --- a/parser.cpp +++ b/parser.cpp @@ -44,6 +44,7 @@ The fish parser. Contains functions for parsing and evaluating code. #include "path.h" #include "signal.h" #include "complete.h" +#include "parse_tree.h" /** Maximum number of function calls, i.e. recursion depth. @@ -550,14 +551,16 @@ void parser_t::allow_function() forbidden_function.pop_back(); } -void parser_t::error(int ec, int p, const wchar_t *str, ...) +void parser_t::error(int ec, size_t p, const wchar_t *str, ...) { va_list va; CHECK(str,); error_code = ec; - err_pos = p; + + assert(p <= INT_MAX); + err_pos = static_cast(p); va_start(va, str); err_buff = vformat_string(str, va); @@ -1148,7 +1151,7 @@ const wchar_t *parser_t::get_buffer() const } -int parser_t::is_help(const wchar_t *s, int min_match) const +int parser_t::is_help(const wchar_t *s, int min_match) { CHECK(s, 0); @@ -2889,6 +2892,21 @@ int parser_t::test_args(const wchar_t * buff, wcstring *out, const wchar_t *pre return err; } +// Check if the first argument under the given node is --help +static bool first_argument_is_help(const parse_node_tree_t &node_tree, const parse_node_t &node, const wcstring &src) +{ + bool is_help = false; + const parse_node_tree_t::parse_node_list_t arg_nodes = node_tree.find_nodes(node, symbol_argument, 1); + if (! arg_nodes.empty()) + { + // Check the first argument only + const parse_node_t &arg = *arg_nodes.at(0); + const wcstring first_arg_src = arg.get_source(src); + is_help = parser_t::is_help(first_arg_src.c_str(), 3); + } + return is_help; +} + // helper type used in parser::test below struct block_info_t { @@ -2897,6 +2915,191 @@ struct block_info_t }; parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *out, const wchar_t *prefix) +{ + ASSERT_IS_MAIN_THREAD(); + + if (! buff) + return PARSER_TEST_ERROR; + + const wcstring buff_src = buff; + parse_node_tree_t node_tree; + parse_error_list_t parse_errors; + + // Whether we encountered a parse error + bool errored = false; + long error_line = -1; + + // Whether we encountered an unclosed block + // We detect this via an 'end_command' block without source + bool has_unclosed_block = false; + + bool parsed = parse_t::parse(buff_src, 0, &node_tree, &parse_errors); + if (! parsed) + { + // report errors + if (out) + { + for (size_t i=0; i < parse_errors.size(); i++) + { + const parse_error_t &error = parse_errors.at(i); + this->error(SYNTAX_ERROR, error.source_start, L"%ls", error.text.c_str()); + } + } + errored = true; + error_line = __LINE__; + } + + // Expand all commands + // Verify 'or' and 'and' not used inside pipelines + // Verify pipes via parser_is_pipe_forbidden + // Verify return only within a function + + if (! errored) + { + const size_t node_tree_size = node_tree.size(); + for (size_t i=0; i < node_tree_size; i++) + { + const parse_node_t &node = node_tree.at(i); + if (node.type == symbol_end_command && ! node.has_source()) + { + // an 'end' without source is an unclosed block + has_unclosed_block = true; + } + else if (node.type == symbol_plain_statement) + { + wcstring command; + if (node_tree.command_for_plain_statement(node, buff_src, &command)) + { + // Check that we can expand the command + if (! expand_one(command, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS)) + { + error(SYNTAX_ERROR, node.source_start, ILLEGAL_CMD_ERR_MSG, command.c_str()); + errored = true; + error_line = __LINE__; + } + + // Check that pipes are sound + bool is_boolean_command = contains(command, L"or", L"and"); + bool is_pipe_forbidden = parser_is_pipe_forbidden(command); + if (! errored && (is_boolean_command || is_pipe_forbidden)) + { + // 'or' and 'and' can be first in the pipeline. forbidden commands cannot be in a pipeline at all + if (node_tree.plain_statement_is_in_pipeline(node, is_pipe_forbidden)) + { + error(SYNTAX_ERROR, node.source_start, EXEC_ERR_MSG); + errored = true; + error_line = __LINE__; + } + } + + // Check that we don't return from outside a function + // But we allow it if it's 'return --help' + if (! errored && command == L"return") + { + const parse_node_t *ancestor = &node; + bool found_function = false; + while (ancestor != NULL) + { + const parse_node_t *possible_function_header = node_tree.header_node_for_block_statement(*ancestor); + if (possible_function_header != NULL && possible_function_header->type == symbol_function_header) + { + found_function = true; + break; + } + ancestor = node_tree.get_parent(*ancestor); + + } + if (! found_function && ! first_argument_is_help(node_tree, node, buff_src)) + { + error(SYNTAX_ERROR, node.source_start, INVALID_RETURN_ERR_MSG); + errored = true; + error_line = __LINE__; + } + } + + // Check that we don't return from outside a function + if (! errored && (command == L"break" || command == L"continue")) + { + // Walk up until we hit a 'for' or 'while' loop. If we hit a function first, stop the search; we can't break an outer loop from inside a function. + // This is a little funny because we can't tell if it's a 'for' or 'while' loop from the ancestor alone; we need the header. That is, we hit a block_statement, and have to check its header. + bool found_loop = false, end_search = false; + const parse_node_t *ancestor = &node; + while (ancestor != NULL && ! end_search) + { + bool end_search = false; + const parse_node_t *loop_or_function_header = node_tree.header_node_for_block_statement(*ancestor); + if (loop_or_function_header != NULL) + { + switch (loop_or_function_header->type) + { + case symbol_while_header: + case symbol_for_header: + // this is a loop header, so we can break or continue + found_loop = true; + end_search = true; + + case symbol_function_header: + // this is a function header, so we cannot break or continue. We stop our search here. + found_loop = false; + end_search = true; + break; + + default: + // most likely begin / end style block, which makes no difference + break; + } + } + ancestor = node_tree.get_parent(*ancestor); + } + + + + const parse_node_t *function_node = node_tree.get_first_ancestor_of_type(node, symbol_function_header); + if (function_node == NULL) + { + // Ok, this looks bad: return not in a function! + // But we allow it if it's 'return --help' + // Get the arguments + bool is_help = false; + const parse_node_tree_t::parse_node_list_t arg_nodes = node_tree.find_nodes(node, symbol_argument); + if (! arg_nodes.empty()) + { + // Check the first argument only + const parse_node_t &arg = *arg_nodes.at(0); + const wcstring first_arg_src = arg.get_source(buff_src); + is_help = parser_t::is_help(first_arg_src.c_str(), 3); + } + + // If it's not help, then it's an invalid return + if (! is_help) + { + error(SYNTAX_ERROR, node.source_start, INVALID_RETURN_ERR_MSG); + errored = true; + error_line = __LINE__; + } + } + } + } + } + } + } + + parser_test_error_bits_t res = 0; + + if (errored) + res |= PARSER_TEST_ERROR; + + if (has_unclosed_block) + res |= PARSER_TEST_INCOMPLETE; + + error_code=0; + + + return res; + +} + +parser_test_error_bits_t parser_t::detect_errors2(const wchar_t *buff, wcstring *out, const wchar_t *prefix) { ASSERT_IS_MAIN_THREAD(); diff --git a/parser.h b/parser.h index 39973665d..b2fbfe134 100644 --- a/parser.h +++ b/parser.h @@ -420,7 +420,7 @@ class parser_t \param p The character offset at which the error occured \param str The printf-style error message filter */ - void error(int ec, int p, const wchar_t *str, ...); + void error(int ec, size_t p, const wchar_t *str, ...); /** Returns a string describing the current parser pisition in the format 'FILENAME (line LINE_NUMBER): LINE'. @@ -488,6 +488,7 @@ class parser_t \param prefix the prefix string to prepend to each error message written to the \c out buffer */ parser_test_error_bits_t detect_errors(const wchar_t * buff, wcstring *out = NULL, const wchar_t *prefix = NULL); + parser_test_error_bits_t detect_errors2(const wchar_t * buff, wcstring *out = NULL, const wchar_t *prefix = NULL); /** Test if the specified string can be parsed as an argument list, @@ -524,7 +525,7 @@ class parser_t \param s the string to test \param min_match is the minimum number of characters that must match in a long style option, i.e. the longest common prefix between --help and any other option. If less than 3, 3 will be assumed. */ - int is_help(const wchar_t *s, int min_match) const; + static int is_help(const wchar_t *s, int min_match); /** Returns the file currently evaluated by the parser. This can be