From 14741518a7fc52f110dcd5ca71216b423520b789 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 6 Oct 2013 16:23:45 -0700 Subject: [PATCH] Command highlighting now works --- fish_tests.cpp | 17 +++-- highlight.cpp | 168 +++++++++++++++++++++++++++++++++++++----- highlight.h | 4 + parse_productions.cpp | 42 +++++------ parse_tree.cpp | 117 +++++++++++++++++++++++------ parse_tree.h | 25 ++++++- 6 files changed, 299 insertions(+), 74 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index 010303191..894408591 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -1922,11 +1922,10 @@ static void test_new_parser_fuzzing(void) { parse_t parser; parse_node_tree_t parse_tree; - parse_error_list_t errors; for (size_t i=0; i < len; i++) { const parser_fuzz_token_t &token = tokens[i]; - parser.parse_1_token(token.token_type, token.keyword, &parse_tree, &errors); + parser.parse_1_token(token.token_type, token.keyword, &parse_tree, NULL); } // keep going until we wrap @@ -1940,7 +1939,7 @@ static void test_new_parser_fuzzing(void) __attribute__((unused)) static void test_new_parser(void) { - say(L"Testing new parser!"); + say(L"Testing new parser"); const wcstring src = L"echo hello world"; parse_node_tree_t parse_tree; parse_t parser; @@ -2050,7 +2049,7 @@ static void test_highlighting(void) if (expected_colors.at(i) != colors.at(i)) { const wcstring spaces(i, L' '); - err(L"Wrong color at index %lu in text (expected %d, actual %d):\n%ls\n%ls^", i, expected_colors.at(i), colors.at(i), text.c_str(), spaces.c_str()); + err(L"Wrong color at index %lu in text (expected %#x, actual %#x):\n%ls\n%ls^", i, expected_colors.at(i), colors.at(i), text.c_str(), spaces.c_str()); } } } @@ -2079,10 +2078,12 @@ int main(int argc, char **argv) reader_init(); env_init(); - //test_new_parser_fuzzing(); - //test_new_parser_correctness(); - //test_highlighting(); - //test_new_parser(); + test_highlighting(); + return 0; + test_new_parser_fuzzing(); + test_new_parser_correctness(); + test_highlighting(); + test_new_parser(); test_format(); test_escape(); diff --git a/highlight.cpp b/highlight.cpp index dc221a2f2..9837d95cc 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -667,18 +667,19 @@ static void highlight_param(const wcstring &buffstr, std::vector &colors, w } } -static int has_expand_reserved(const wchar_t *str) +static bool has_expand_reserved(const wcstring &str) { - while (*str) + bool result = false; + for (size_t i=0; i < str.size(); i++) { - if (*str >= EXPAND_RESERVED && - *str <= EXPAND_RESERVED_END) + wchar_t wc = str.at(i); + if (wc >= EXPAND_RESERVED && wc <= EXPAND_RESERVED_END) { - return 1; + result = true; + break; } - str++; } - return 0; + return result; } /* Parse a command line. Return by reference the last command, its arguments, and the offset in the string of the beginning of the last argument. This is used by autosuggestions */ @@ -712,7 +713,7 @@ static bool autosuggest_parse_command(const wcstring &str, wcstring *out_command /* Command. First check that the command actually exists. */ wcstring local_cmd = tok_last(&tok); bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES); - if (! expanded || has_expand_reserved(cmd.c_str())) + if (! expanded || has_expand_reserved(cmd)) { /* We can't expand this cmd, ignore it */ } @@ -1027,7 +1028,7 @@ static void tokenize(const wchar_t * const buff, std::vector &color, const */ cmd = tok_last(&tok); bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS); - if (! expanded || has_expand_reserved(cmd.c_str())) + if (! expanded || has_expand_reserved(cmd)) { color.at(tok_get_pos(&tok)) = HIGHLIGHT_ERROR; } @@ -1308,17 +1309,22 @@ static void tokenize(const wchar_t * const buff, std::vector &color, const } } -void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); - -// PCA This function does I/O, (calls is_potential_path, path_get_path, maybe others) and so ought to only run on a background thread void highlight_shell(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) { - ASSERT_IS_BACKGROUND_THREAD(); - if (0) + if (1) { highlight_shell_magic(buff, color, pos, error, vars); - return; } + else + { + highlight_shell_classic(buff, color, pos, error, vars); + } +} + +// PCA This function does I/O, (calls is_potential_path, path_get_path, maybe others) and so ought to only run on a background thread +void highlight_shell_classic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) +{ + ASSERT_IS_BACKGROUND_THREAD(); const size_t length = buff.size(); assert(buff.size() == color.size()); @@ -1461,6 +1467,7 @@ static void color_node(const parse_node_t &node, int color, std::vector &co std::fill(color_array.begin() + node.source_start, color_array.begin() + source_end, color); } +/* This function is a disaster badly in need of refactoring */ static void color_argument(const wcstring &buffstr, std::vector::iterator colors, int normal_status) { const size_t buff_len = buffstr.size(); @@ -1743,6 +1750,28 @@ static void color_argument(const wcstring &buffstr, std::vector::iterator c } } +// Indicates whether the source range of the given node forms a valid path in the given working_directory +static bool node_is_potential_path(const wcstring &src, const parse_node_t &node, const wcstring &working_directory) +{ + if (! node.has_source()) + return false; + + + /* Get the node source, unescape it, and then pass it to is_potential_path along with the working directory (as a one element list) */ + bool result = false; + wcstring token(src, node.source_start, node.source_length); + if (unescape_string(token, 1)) + { + /* Big hack: is_potential_path expects a tilde, but unescape_string gives us HOME_DIRECTORY. Put it back. */ + if (! token.empty() && token.at(0) == HOME_DIRECTORY) + token.at(0) = L'~'; + + const wcstring_list_t working_directory_list(1, working_directory); + result = is_potential_path(token, working_directory_list, PATH_EXPAND_TILDE); + } + return result; +} + // Color all of the arguments of the given command static void color_arguments(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &parent, std::vector &color_array) { @@ -1754,22 +1783,87 @@ static void color_arguments(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t *child = nodes.at(i); assert(child != NULL && child->type == symbol_argument); param.assign(src, child->source_start, child->source_length); - color_argument(param, color_array.begin() + child->source_start, HIGHLIGHT_NORMAL); + color_argument(param, color_array.begin() + child->source_start, HIGHLIGHT_PARAM); } } +/* Color all the children of the command with the given type */ static void color_children(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, int color, std::vector &color_array) { for (node_offset_t idx=0; idx < parent.child_count; idx++) { const parse_node_t *child = tree.get_child(parent, idx); - if (child != NULL && child->type == type && child->has_source()) + if (child != NULL && child->type == type) { color_node(*child, color, color_array); } } } +/* Color a possibly decorated command */ +static void color_command(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &cmd_node, enum parse_statement_decoration_t decoration, std::vector &color_array, const wcstring &working_directory, const env_vars_snapshot_t &vars) +{ + if (! cmd_node.has_source()) + return; + + /* Get the source of the command */ + wcstring cmd(src, cmd_node.source_start, cmd_node.source_length); + + /* Try expanding it. If we cannot, it's an error. */ + bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS); + if (! expanded || has_expand_reserved(cmd)) + { + color_node(cmd_node, HIGHLIGHT_ERROR, color_array); + return; + } + + /* Determine which types we check, based on the decoration */ + bool builtin_ok = true, function_ok = true, abbreviation_ok = true, command_ok = true, implicit_cd_ok = true; + if (decoration == parse_statement_decoration_command) + { + builtin_ok = false; + function_ok = false; + abbreviation_ok = false; + command_ok = true; + implicit_cd_ok = false; + } + else if (decoration == parse_statement_decoration_builtin) + { + builtin_ok = true; + function_ok = false; + abbreviation_ok = false; + command_ok = false; + implicit_cd_ok = false; + } + + /* Check them */ + bool is_valid = false; + + /* Builtins */ + if (! is_valid && builtin_ok) + is_valid = builtin_exists(cmd); + + /* Functions */ + if (! is_valid && function_ok) + is_valid = function_exists_no_autoload(cmd, vars); + + /* Abbreviations */ + if (! is_valid && abbreviation_ok) + is_valid = expand_abbreviation(cmd, NULL); + + /* Regular commands */ + if (! is_valid && command_ok) + is_valid = path_get_path(cmd, NULL, vars); + + /* Implicit cd */ + if (! is_valid && implicit_cd_ok) + is_valid = path_can_be_implicit_cd(cmd, NULL, working_directory.c_str(), vars); + + /* Color the node */ + int color = is_valid ? HIGHLIGHT_COMMAND : HIGHLIGHT_ERROR; + color_node(cmd_node, color, color_array); +} + void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) { ASSERT_IS_BACKGROUND_THREAD(); @@ -1780,7 +1874,7 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t if (length == 0) return; - std::fill(color.begin(), color.end(), -1); + std::fill(color.begin(), color.end(), 0); /* Do something sucky and get the current working directory on this background thread. This should really be passed in. */ const wcstring working_directory = env_get_pwd_slash(); @@ -1790,6 +1884,11 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t parse_t parser; parser.parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL); +#if 0 + const wcstring dump = parse_dump_tree(parse_tree, buff); + fprintf(stderr, "%ls\n", dump.c_str()); +#endif + /* Walk the node tree */ for (parse_node_tree_t::const_iterator iter = parse_tree.begin(); iter != parse_tree.end(); ++iter) { @@ -1837,7 +1936,12 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t case symbol_plain_statement: { // Color the command - color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color); + const parse_node_t *cmd = parse_tree.get_child(node, 0, parse_token_type_string); + if (cmd != NULL) + { + enum parse_statement_decoration_t decoration = static_cast(node.tag); + color_command(buff, parse_tree, *cmd, decoration, color, working_directory, vars); + } // Color arguments const parse_node_t *arguments = parse_tree.get_child(node, 1, symbol_arguments_or_redirections_list); @@ -1867,6 +1971,32 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t break; } } + + if (pos <= buff.size()) + { + /* If the cursor is over an argument, and that argument is a valid path, underline it */ + for (parse_node_tree_t::const_iterator iter = parse_tree.begin(); iter != parse_tree.end(); ++iter) + { + const parse_node_t &node = *iter; + /* See if this node contains the cursor */ + if (node.type == symbol_argument && node.source_contains_location(pos)) + { + /* See if this is a valid path */ + if (node_is_potential_path(buff, node, working_directory)) + { + /* It is, underline it. */ + for (size_t i=node.source_start; i < node.source_start + node.source_length; i++) + { + /* Don't color HIGHLIGHT_ERROR because it looks dorky. For example, trying to cd into a non-directory would show an underline and also red. */ + if (! (color.at(i) & HIGHLIGHT_ERROR)) + { + color.at(i) |= HIGHLIGHT_VALID_PATH; + } + } + } + } + } + } } /** diff --git a/highlight.h b/highlight.h index ea8557918..eb123258c 100644 --- a/highlight.h +++ b/highlight.h @@ -134,5 +134,9 @@ enum typedef unsigned int path_flags_t; bool is_potential_path(const wcstring &const_path, const wcstring_list_t &directories, path_flags_t flags, wcstring *out_path = NULL); +/* For testing */ +void highlight_shell_classic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); +void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); + #endif diff --git a/parse_productions.cpp b/parse_productions.cpp index 3165a2f0d..4876ba58e 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -30,7 +30,7 @@ static bool production_is_valid(const production_options_t production_list, prod #define RESOLVE(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) #define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) { return 0; } -#define PRODUCE_KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) +#define KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) /* A job_list is a list of jobs, separated by semicolons or newlines */ @@ -167,20 +167,20 @@ RESOLVE(statement) PRODUCTIONS(if_statement) = { - {symbol_if_clause, symbol_else_clause, PRODUCE_KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list} + {symbol_if_clause, symbol_else_clause, KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list} }; RESOLVE_ONLY(if_statement) PRODUCTIONS(if_clause) = { - { PRODUCE_KEYWORD(parse_keyword_if), symbol_job, parse_token_type_end, symbol_job_list } + { KEYWORD(parse_keyword_if), symbol_job, parse_token_type_end, symbol_job_list } }; RESOLVE_ONLY(if_clause) PRODUCTIONS(else_clause) = { { }, - { PRODUCE_KEYWORD(parse_keyword_else), symbol_else_continuation } + { KEYWORD(parse_keyword_else), symbol_else_continuation } }; RESOLVE(else_clause) { @@ -211,7 +211,7 @@ RESOLVE(else_continuation) PRODUCTIONS(switch_statement) = { - { PRODUCE_KEYWORD(parse_keyword_switch), parse_token_type_string, parse_token_type_end, symbol_case_item_list, PRODUCE_KEYWORD(parse_keyword_end)} + { KEYWORD(parse_keyword_switch), parse_token_type_string, parse_token_type_end, symbol_case_item_list, KEYWORD(parse_keyword_end)} }; RESOLVE_ONLY(switch_statement) @@ -230,7 +230,7 @@ RESOLVE(case_item_list) PRODUCTIONS(case_item) = { - {PRODUCE_KEYWORD(parse_keyword_case), symbol_argument_list, parse_token_type_end, symbol_job_list} + {KEYWORD(parse_keyword_case), symbol_argument_list, parse_token_type_end, symbol_job_list} }; RESOLVE_ONLY(case_item) @@ -258,7 +258,7 @@ RESOLVE(argument_list) PRODUCTIONS(block_statement) = { - {symbol_block_header, parse_token_type_end, symbol_job_list, PRODUCE_KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list} + {symbol_block_header, parse_token_type_end, symbol_job_list, KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list} }; RESOLVE_ONLY(block_statement) @@ -290,34 +290,34 @@ RESOLVE(block_header) PRODUCTIONS(for_header) = { - {PRODUCE_KEYWORD(parse_keyword_for), parse_token_type_string, PRODUCE_KEYWORD(parse_keyword_in), symbol_arguments_or_redirections_list} + {KEYWORD(parse_keyword_for), parse_token_type_string, KEYWORD(parse_keyword_in), symbol_arguments_or_redirections_list} }; RESOLVE_ONLY(for_header) PRODUCTIONS(while_header) = { - {PRODUCE_KEYWORD(parse_keyword_while), symbol_statement} + {KEYWORD(parse_keyword_while), symbol_statement} }; RESOLVE_ONLY(while_header) PRODUCTIONS(begin_header) = { - {PRODUCE_KEYWORD(parse_keyword_begin)} + {KEYWORD(parse_keyword_begin)} }; RESOLVE_ONLY(begin_header) PRODUCTIONS(function_header) = { - {PRODUCE_KEYWORD(parse_keyword_function), parse_token_type_string, symbol_argument_list} + {KEYWORD(parse_keyword_function), parse_token_type_string, symbol_argument_list} }; RESOLVE_ONLY(function_header) /* A boolean statement is AND or OR or NOT */ PRODUCTIONS(boolean_statement) = { - {PRODUCE_KEYWORD(parse_keyword_and), symbol_statement}, - {PRODUCE_KEYWORD(parse_keyword_or), symbol_statement}, - {PRODUCE_KEYWORD(parse_keyword_not), symbol_statement} + {KEYWORD(parse_keyword_and), symbol_statement}, + {KEYWORD(parse_keyword_or), symbol_statement}, + {KEYWORD(parse_keyword_not), symbol_statement} }; RESOLVE(boolean_statement) { @@ -336,19 +336,19 @@ RESOLVE(boolean_statement) PRODUCTIONS(decorated_statement) = { - {PRODUCE_KEYWORD(parse_keyword_command), symbol_plain_statement}, - {PRODUCE_KEYWORD(parse_keyword_builtin), symbol_plain_statement}, - {symbol_plain_statement} + {symbol_plain_statement}, + {KEYWORD(parse_keyword_command), symbol_plain_statement}, + {KEYWORD(parse_keyword_builtin), symbol_plain_statement}, }; RESOLVE(decorated_statement) { switch (token_keyword) { - case parse_keyword_command: - return 0; - case parse_keyword_builtin: - return 1; default: + return 0; + case parse_keyword_command: + return 1; + case parse_keyword_builtin: return 2; } } diff --git a/parse_tree.cpp b/parse_tree.cpp index b64c6d9f3..698297160 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -339,9 +339,12 @@ class parse_ll_t /* Whether we ran into a fatal error, including parse errors or tokenizer errors */ bool fatal_errored; + /* Whether we should collect error messages or not */ + bool should_generate_error_messages; + /* List of errors we have encountered */ parse_error_list_t errors; - + /* The symbol stack can contain terminal types or symbols. Symbols go on to do productions, but terminal types are just matched against input tokens. */ bool top_node_handle_terminal_types(parse_token_t token); @@ -453,7 +456,7 @@ class parse_ll_t public: /* Constructor */ - parse_ll_t() : fatal_errored(false) + parse_ll_t() : fatal_errored(false), should_generate_error_messages(true) { this->symbol_stack.reserve(16); this->nodes.reserve(64); @@ -469,6 +472,12 @@ class parse_ll_t return this->fatal_errored; } + /* Indicate whether we want to generate error messages */ + void set_should_generate_error_messages(bool flag) + { + this->should_generate_error_messages = flag; + } + /* Clear the parse symbol stack (but not the node tree). Add a new job_list_t goal node. This is called from the constructor */ void reset_symbols(void); @@ -564,30 +573,36 @@ void parse_ll_t::acquire_output(parse_node_tree_t *output, parse_error_list_t *e void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...) { - //this->dump_stack(); - parse_error_t err; - - va_list va; - va_start(va, fmt); - err.text = vformat_string(fmt, va); - va_end(va); - - err.source_start = token.source_start; - err.source_length = token.source_length; - this->errors.push_back(err); this->fatal_errored = true; + if (this->should_generate_error_messages) + { + //this->dump_stack(); + parse_error_t err; + + va_list va; + va_start(va, fmt); + err.text = vformat_string(fmt, va); + va_end(va); + + err.source_start = token.source_start; + err.source_length = token.source_length; + this->errors.push_back(err); + } } void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) { - wcstring desc = token_type_description(token.type); - parse_error_t error; - error.text = format_string(L"Expected a %ls, instead got a token of type %ls", expected, desc.c_str()); - error.source_start = token.source_start; - error.source_start = token.source_length; - errors.push_back(error); fatal_errored = true; + if (this->should_generate_error_messages) + { + wcstring desc = token_type_description(token.type); + parse_error_t error; + error.text = format_string(L"Expected a %ls, instead got a token of type %ls", expected, desc.c_str()); + error.source_start = token.source_start; + error.source_start = token.source_length; + errors.push_back(error); + } } void parse_ll_t::reset_symbols(void) @@ -725,7 +740,14 @@ void parse_ll_t::accept_token(parse_token_t token) const production_t *production = production_for_token(stack_elem.type, token.type, token.keyword, &node.production_idx, &node.tag, NULL /* error text */); if (production == NULL) { - this->parse_error(token, L"Unable to produce a '%ls' from input '%ls'", stack_elem.describe().c_str(), token.describe().c_str()); + if (should_generate_error_messages) + { + this->parse_error(token, L"Unable to produce a '%ls' from input '%ls'", stack_elem.describe().c_str(), token.describe().c_str()); + } + else + { + this->parse_error(token, NULL); + } // parse_error sets fatal_errored, which ends the loop } else @@ -737,7 +759,7 @@ void parse_ll_t::accept_token(parse_token_t token) // If we end up with an empty stack, something bad happened, like an unbalanced end if (symbol_stack.empty()) { - this->parse_error(token, L"All symbols removed from symbol stack. Likely unbalanced else or end?", stack_elem.describe().c_str(), token.describe().c_str()); + this->parse_error(token, L"All symbols removed from symbol stack. Likely unbalanced else or end?"); } } } @@ -793,11 +815,46 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) return result; } +// Set type-specific tags for nodes +// This is not in parse_ll_t because it knows about different node types +static void tag_nodes(const wcstring &src, parse_node_tree_t *tree) +{ + size_t count = tree->size(); + for (size_t i=0; i < count; i++) + { + const parse_node_t &node = tree->at(i); + switch (node.type) + { + case symbol_decorated_statement: + { + // Set a tag on the plain statement to indicate the decoration type + // The decoration types matches the production + bool is_decorated = (node.production_idx > 0); + + // Get the plain statement and set the tag equal to the production index we used + // This is an enum parse_statement_decoration_t + node_offset_t statement_idx = (is_decorated ? 1 : 0); + parse_node_t *plain_statement = tree->get_child(node, statement_idx, symbol_plain_statement); + if (plain_statement != NULL) + { + plain_statement->tag = static_cast(node.production_idx); + } + } + break; + + default: + break; + } + } +} + bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it) { tok_flags_t tok_options = TOK_SQUASH_ERRORS; if (parse_flags & parse_flag_include_comments) tok_options |= TOK_SHOW_COMMENTS; + + this->parser->set_should_generate_error_messages(errors != NULL); tokenizer_t tok = tokenizer_t(str.c_str(), tok_options); for (; tok_has_next(&tok) && ! this->parser->has_fatal_error(); tok_next(&tok)) @@ -835,7 +892,9 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n // Teach each node where its source range is this->parser->determine_node_ranges(); - + + // Tag nodes + #if 0 wcstring result = dump_tree(this->parser->nodes, str); fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); @@ -845,6 +904,9 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n // Acquire the output from the parser this->parser->acquire_output(output, errors); + // Set node tags + tag_nodes(str, output); + // Indicate if we had a fatal error return ! this->parser->has_fatal_error(); } @@ -859,6 +921,9 @@ bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keywo token.keyword = keyword; token.source_start = -1; token.source_length = 0; + + bool wants_errors = (errors != NULL); + this->parser->set_should_generate_error_messages(wants_errors); this->parser->accept_token(token); @@ -889,6 +954,14 @@ const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, nod return result; } +/* Hackish non-const version of get_child */ +parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) +{ + const parse_node_tree_t *const_this = this; + const parse_node_t *result = const_this->get_child(parent, which, expected_type); + return const_cast(result); +} + static void find_nodes_recursive(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, parse_node_tree_t::parse_node_list_t *result) { if (parent.type == type) result->push_back(&parent); diff --git a/parse_tree.h b/parse_tree.h index 18e3cffa2..f577a7def 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -124,7 +124,7 @@ enum /* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */ parse_flag_continue_after_error = 1 << 0, - + /* Include comment tokens */ parse_flag_include_comments = 1 << 1 }; @@ -155,7 +155,7 @@ wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src); wcstring token_type_description(parse_token_type_t type); wcstring keyword_description(parse_keyword_t type); -/** Base class for nodes of a parse tree */ +/** Class for nodes of a parse tree */ class parse_node_t { public: @@ -193,24 +193,40 @@ class parse_node_t return child_start + which; } + /* Indicate if this node has a range of source code associated with it */ bool has_source() const { return source_start != (size_t)(-1); } + + /* Indicate if this node's source range contains a given location. The funny math makes this modulo-overflow safe, though overflow is not expected. */ + bool source_contains_location(size_t where) const + { + return this->has_source() && where >= source_start && where - source_start < source_length; + } }; +/* The parse tree itself */ class parse_node_tree_t : public std::vector { public: /* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. */ const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; + parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid); /* Find all the nodes of a given type underneath a given node */ typedef std::vector parse_node_list_t; parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const; }; +/* Statement decorations, stored in the tag of plain_statement. This matches the order of productions in decorated_statement */ +enum parse_statement_decoration_t +{ + parse_statement_decoration_none, + parse_statement_decoration_command, + parse_statement_decoration_builtin +}; /* Fish grammar: @@ -259,9 +275,10 @@ class parse_node_tree_t : public std::vector boolean_statement = AND statement | OR statement | NOT statement # A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" +# The tag of a plain statement indicates which mode to use - decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement - plain_statement = COMMAND arguments_or_redirections_list optional_background + decorated_statement = plain_statement | COMMAND plain_statement | BUILTIN plain_statement + plain_statement = arguments_or_redirections_list optional_background arguments_or_redirections_list = | argument_or_redirection arguments_or_redirections_list