From 45c9e3b0f176bbd5e487b95c4f20e6eec00ea7ca Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 28 Jun 2020 16:53:58 -0700 Subject: [PATCH 01/13] parsed_source_ref to always make a job_list Removed an unnecessary param in preparation for more changes. --- src/parse_tree.cpp | 6 +++--- src/parse_tree.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/parse_tree.cpp b/src/parse_tree.cpp index 1373b1229..ee0281bbf 100644 --- a/src/parse_tree.cpp +++ b/src/parse_tree.cpp @@ -1215,10 +1215,10 @@ const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, nod return result; } -parsed_source_ref_t parse_source(wcstring src, parse_tree_flags_t flags, parse_error_list_t *errors, - parse_token_type_t goal) { +parsed_source_ref_t parse_source(wcstring src, parse_tree_flags_t flags, + parse_error_list_t *errors) { parse_node_tree_t tree; - if (!parse_tree_from_string(src, flags, &tree, errors, goal)) return {}; + if (!parse_tree_from_string(src, flags, &tree, errors, symbol_job_list)) return {}; return std::make_shared(std::move(src), std::move(tree)); } diff --git a/src/parse_tree.h b/src/parse_tree.h index 61082d227..3e98104fa 100644 --- a/src/parse_tree.h +++ b/src/parse_tree.h @@ -225,8 +225,8 @@ struct parsed_source_t { }; /// Return a shared pointer to parsed_source_t, or null on failure. using parsed_source_ref_t = std::shared_ptr; -parsed_source_ref_t parse_source(wcstring src, parse_tree_flags_t flags, parse_error_list_t *errors, - parse_token_type_t goal = symbol_job_list); +parsed_source_ref_t parse_source(wcstring src, parse_tree_flags_t flags, + parse_error_list_t *errors); /// Error message for improper use of the exec builtin. #define EXEC_ERR_MSG _(L"The '%ls' command can not be used in a pipeline") From 4d4455007d3928e7c81b8c43c07c973e83c86175 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 20 Jun 2020 15:27:10 -0700 Subject: [PATCH 02/13] Introduce a new fish ast This is the first commit of a series intended to replace the existing "parse tree" machinery. It adds a new abstract syntax tree and uses a more normal recursive descent parser. Initially there are no users of the new ast. The following commits will replace parse_tree -> ast for all usages. --- CMakeLists.txt | 2 +- src/ast.cpp | 1206 +++++++++++++++++++++++++++++++++++++++ src/ast.h | 1018 +++++++++++++++++++++++++++++++++ src/ast_node_types.inc | 60 ++ src/fish_indent.cpp | 7 + src/flog.h | 1 + src/parse_constants.h | 34 ++ src/parse_grammar.h | 2 +- src/parse_productions.h | 1 + src/parse_tree.cpp | 11 +- src/parse_tree.h | 38 +- 11 files changed, 2350 insertions(+), 30 deletions(-) create mode 100644 src/ast.cpp create mode 100644 src/ast.h create mode 100644 src/ast_node_types.inc diff --git a/CMakeLists.txt b/CMakeLists.txt index e5284f092..a360bde47 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -121,7 +121,7 @@ set(FISH_SRCS src/wcstringutil.cpp src/wgetopt.cpp src/wildcard.cpp src/wutil.cpp src/future_feature_flags.cpp src/redirection.cpp src/topic_monitor.cpp src/flog.cpp src/trace.cpp src/timer.cpp src/null_terminated_array.cpp - src/operation_context.cpp src/fd_monitor.cpp src/termsize.cpp + src/operation_context.cpp src/fd_monitor.cpp src/termsize.cpp src/ast.cpp ) # Header files are just globbed. diff --git a/src/ast.cpp b/src/ast.cpp new file mode 100644 index 000000000..3f5013258 --- /dev/null +++ b/src/ast.cpp @@ -0,0 +1,1206 @@ +#include "config.h" // IWYU pragma: keep + +#include "ast.h" + +#include + +#include "common.h" +#include "flog.h" +#include "parse_constants.h" +#include "parse_tree.h" +#include "wutil.h" + +namespace { + +/// \return tokenizer flags corresponding to parse tree flags. +static tok_flags_t tokenizer_flags_from_parse_flags(parse_tree_flags_t flags) { + tok_flags_t tok_flags = 0; + // Note we do not need to respect parse_flag_show_blank_lines, no clients are interested in + // them. + if (flags & parse_flag_include_comments) tok_flags |= TOK_SHOW_COMMENTS; + if (flags & parse_flag_accept_incomplete_tokens) tok_flags |= TOK_ACCEPT_UNFINISHED; + if (flags & parse_flag_continue_after_error) tok_flags |= TOK_CONTINUE_AFTER_ERROR; + return tok_flags; +} + +/// A token stream generates a sequence of parser tokens, permitting arbitrary lookahead. +class token_stream_t { + public: + explicit token_stream_t(const wcstring &src, parse_tree_flags_t flags) + : src_(src), tok_(src_.c_str(), tokenizer_flags_from_parse_flags(flags)) {} + + /// \return the token at the given index, without popping it. If the token streamĀ is exhausted, + /// it will have parse_token_type_terminate. idx = 0 means the next token, idx = 1 means the + /// next-next token, and so forth. + /// We must have that idx < kMaxLookahead. + const parse_token_t &peek(size_t idx = 0) { + assert(idx < kMaxLookahead && "Trying to look too far ahead"); + while (idx >= count_) { + lookahead_.at(mask(start_ + count_)) = next_from_tok(); + count_ += 1; + } + return lookahead_.at(mask(start_ + idx)); + } + + /// Pop the next token. + parse_token_t pop() { + if (count_ == 0) { + return next_from_tok(); + } + parse_token_t result = std::move(lookahead_[start_]); + start_ = mask(start_ + 1); + count_ -= 1; + return result; + } + + /// Provide the orignal source code. + const wcstring &source() const { return src_; } + + /// Any comment nodes are collected here. + /// These are only collected if parse_flag_include_comments is set. + std::vector comment_ranges; + + private: + // Helper to mask our circular buffer. + static constexpr size_t mask(size_t idx) { return idx % kMaxLookahead; } + + parse_token_t next_from_tok() { + for (;;) { + maybe_t tokenizer_tok{}; + parse_token_t res = next_parse_token(&tok_, &tokenizer_tok, &storage_); + if (res.type == parse_special_type_comment) { + comment_ranges.push_back(res.range()); + continue; + } + return res; + } + } + + // The maximum number of lookahead supported. + static constexpr size_t kMaxLookahead = 2; + + // We implement a queue with a simple circular buffer. + // Note that peek() returns an address, so we must not move elements which are peek'd. + // This prevents using vector (which may reallocate). + // Deque would work but is too heavyweight for just 2 items. + std::array lookahead_ = { + {token_type_invalid, token_type_invalid}}; + + // Starting index in our lookahead. + // The "first" token is at this index. + size_t start_ = 0; + + // Number of items in our lookahead. + size_t count_ = 0; + + // A reference to the original source. + const wcstring &src_; + + // The tokenizer to generate new tokens. + tokenizer_t tok_; + + // Temporary storage. + wcstring storage_; +}; + +} // namespace + +namespace ast { + +/// Given a node which we believe to be some sort of block statement, attempt to return a source +/// range for the block's keyword (for, if, etc) and a user-presentable description. This is used to +/// provide better error messages. \return {nullptr, nullptr} if we couldn't find it. Note at this +/// point the parse tree is incomplete; in particular parent nodes are not set. +static std::pair find_block_open_keyword(const node_t *node) { + const node_t *cursor = node; + while (cursor != nullptr) { + switch (cursor->type) { + case type_t::block_statement: + cursor = cursor->as()->header.contents.get(); + break; + case type_t::for_header: { + const auto *h = cursor->as(); + return std::make_pair(h->kw_for.range, L"for loop"); + } + case type_t::while_header: { + const auto *h = cursor->as(); + return std::make_pair(h->kw_while.range, L"while loop"); + } + case type_t::function_header: { + const auto *h = cursor->as(); + return std::make_pair(h->kw_function.range, L"function definition"); + } + case type_t::begin_header: { + const auto *h = cursor->as(); + return std::make_pair(h->kw_begin.range, L"begin"); + } + case type_t::if_statement: { + const auto *h = cursor->as(); + return std::make_pair(h->if_clause.kw_if.range, L"if statement"); + } + case type_t::switch_statement: { + const auto *h = cursor->as(); + return std::make_pair(h->kw_switch.range, L"switch statement"); + } + default: + return std::make_pair(source_range_t{}, nullptr); + } + } + return std::make_pair(source_range_t{}, nullptr); +} + +/// \return the decoration for this statement. +parse_statement_decoration_t decorated_statement_t::decoration() const { + if (!opt_decoration) { + return parse_statement_decoration_none; + } + switch (opt_decoration->kw) { + case parse_keyword_t::kw_command: + return parse_statement_decoration_command; + case parse_keyword_t::kw_builtin: + return parse_statement_decoration_builtin; + case parse_keyword_t::kw_exec: + return parse_statement_decoration_exec; + default: + assert(0 && "Unexpected keyword in statement decoration"); + return parse_statement_decoration_none; + } +} + +/// \return a string literal name for an ast type. +const wchar_t *ast_type_to_string(type_t type) { + switch (type) { +#define ELEM(T) \ + case type_t::T: \ + return L"" #T; +#include "ast_node_types.inc" + } + assert(0 && "unreachable"); + return L"(unknown)"; +} + +wcstring node_t::describe() const { + wcstring res = ast_type_to_string(this->type); + if (const auto *n = this->try_as()) { + append_format(res, L" '%ls'", token_type_description(n->type)); + } else if (const auto *n = this->try_as()) { + append_format(res, L" '%ls'", keyword_description(n->kw)); + } + return res; +} + +node_t::~node_t() = default; + +/// From C++14. +template +using enable_if_t = typename std::enable_if::type; + +struct source_range_visitor_t { + template + enable_if_t visit(const Node &node) { + if (node.unsourced) any_unsourced = true; + // Union with our range. + if (node.range.length > 0) { + if (total.length == 0) { + total = node.range; + } else { + auto end = + std::max(total.start + total.length, node.range.start + node.range.length); + total.start = std::min(total.start, node.range.start); + total.length = end - total.start; + } + } + } + + // Other node types recurse. + template + enable_if_t visit(const Node &node) { + node_visitor(*this).accept_children_of(node); + } + + // Total range we have encountered. + source_range_t total{0, 0}; + + // Whether any node was found to be unsourced. + bool any_unsourced{false}; +}; + +maybe_t node_t::try_source_range() const { + source_range_visitor_t v; + node_visitor(v).accept(this); + if (v.any_unsourced) return none(); + return v.total; +} + +// Helper to describe a list of keywords. +// TODO: these need to be localized properly. +static wcstring keywords_user_presentable_description(std::initializer_list kws) { + assert(kws.size() > 0 && "Should not be empty list"); + if (kws.size() == 1) { + return format_string(L"keyword '%ls'", keyword_description(*kws.begin())); + } + size_t idx = 0; + wcstring res = L"keywords "; + for (parse_keyword_t kw : kws) { + const wchar_t *optor = (idx++ ? L" or " : L""); + append_format(res, L"%ls'%ls'", optor, keyword_description(kw)); + } + return res; +} + +// Helper to describe a list of token types. +// TODO: these need to be localized properly. +static wcstring token_types_user_presentable_description( + std::initializer_list types) { + assert(types.size() > 0 && "Should not be empty list"); + if (types.size() == 1) { + return token_type_user_presentable_description(*types.begin()); + } + size_t idx = 0; + wcstring res; + for (parse_token_type_t type : types) { + const wchar_t *optor = (idx++ ? L" or " : L""); + append_format(res, L"%ls%ls", optor, token_type_user_presentable_description(type).c_str()); + } + return res; +} + +class ast_t::populator_t { + template + using unique_ptr = std::unique_ptr; + + public: + // Populate \p ast from \p src and \p flags, returning errors (if not null). + populator_t(ast_t *ast, const wcstring &src, parse_tree_flags_t flags, type_t top_type, + parse_error_list_t *out_errors) + : ast_(ast), + flags_(flags), + tokens_(src, flags), + top_type_(top_type), + out_errors_(out_errors) { + assert((top_type == type_t::job_list || top_type == type_t::freestanding_argument_list) && + "Invalid top type"); + if (top_type == type_t::job_list) { + unique_ptr list = allocate(); + this->populate_list(*list, true /* exhaust_stream */); + this->ast_->top_ = std::move(list); + } else { + unique_ptr list = + allocate(); + this->populate_list(list->arguments, true /* exhaust_stream */); + this->ast_->top_ = std::move(list); + } + // Chomp trailing extras, etc. + chomp_extras(type_t::job_list); + + // Acquire any comments. + this->ast_->extras_.comments = std::move(tokens_.comment_ranges); + + assert(this->ast_->top_ && "Should have parsed a node"); + } + + // Given a node type, allocate it and invoke its default constructor. + // \return the resulting Node pointer. It is never null. + template + unique_ptr allocate() { + unique_ptr node = make_unique(); + FLOGF(ast_construction, L"%*smake %ls %p", spaces(), "", ast_type_to_string(Node::AstType), + node.get()); + return node; + } + + // Given a node type, allocate it, invoke its default constructor, + // and then visit it as a field. + // \return the resulting Node pointer. It is never null. + template + unique_ptr allocate_visit() { + unique_ptr node = allocate(); + this->visit_node_field(*node); + return node; + } + + /// Helper for FLOGF. This returns a number of spaces appropriate for a '%*c' format. + int spaces() const { return static_cast(visit_stack_.size() * 2); } + + /// The status of our parser. + enum class status_t { + // Parsing is going just fine, thanks for asking. + ok, + + // We have exhausted the token stream, but the caller was OK with an incomplete parse tree. + // All further leaf nodes should have the unsourced flag set. + unsourcing, + + // We encountered an parse error and are "unwinding." + // Do not consume any tokens until we get back to a list type which stops unwinding. + unwinding, + }; + + /// \return the parser's status. + status_t status() { + if (unwinding_) { + return status_t::unwinding; + } else if ((flags_ & parse_flag_leave_unterminated) && + peek_type() == parse_token_type_terminate) { + return status_t::unsourcing; + } + return status_t::ok; + } + + /// \return whether the status is unwinding. + /// This is more efficient than checking the status directly. + bool is_unwinding() { return unwinding_; } + + /// \return whether any leaf nodes we visit should be marked as unsourced. + bool unsource_leaves() { + status_t s = status(); + return s == status_t::unsourcing || s == status_t::unwinding; + } + + /// \return whether we permit an incomplete parse tree. + bool allow_incomplete() const { return flags_ & parse_flag_leave_unterminated; } + + /// This indicates a bug in fish code. + void internal_error(const char *func, const wchar_t *fmt, ...) const { + va_list va; + va_start(va, fmt); + wcstring msg = vformat_string(fmt, va); + va_end(va); + + FLOG(debug, "Internal parse error from", func, "- this indicates a bug in fish.", msg); + FLOG(debug, "Encountered while parsing:<<<\n%ls\n>>>", tokens_.source().c_str()); + abort(); + } + + /// \return whether a list type \p type allows arbitrary newlines in it. + bool list_type_chomps_newlines(type_t type) const { + switch (type) { + case type_t::argument_list: + // Hackish. If we are producing a freestanding argument list, then it allows + // semicolons, for hysterical raisins. + return top_type_ == type_t::freestanding_argument_list; + + case type_t::argument_or_redirection_list: + // No newlines inside arguments. + return false; + + case type_t::variable_assignment_list: + // No newlines inside variable assignment lists. + return false; + + case type_t::job_list: + // Like echo a \n \n echo b + return true; + + case type_t::case_item_list: + // Like switch foo \n \n \n case a \n end + return true; + + case type_t::andor_job_list: + // Like while true ; \n \n and true ; end + return true; + + case type_t::elseif_clause_list: + // Like if true ; \n \n else if false; end + return true; + + case type_t::job_conjunction_continuation_list: + // This would be like echo a && echo b \n && echo c + // We could conceivably support this but do not now. + return false; + + case type_t::job_continuation_list: + // This would be like echo a \n | echo b + // We could conceivably support this but do not now. + return false; + + default: + internal_error(__FUNCTION__, L"Type %ls not handled", ast_type_to_string(type)); + return false; + } + } + + /// \return whether a list type \p type allows arbitrary semicolons in it. + bool list_type_chomps_semis(type_t type) const { + switch (type) { + case type_t::argument_list: + // Hackish. If we are producing a freestanding argument list, then it allows + // semicolons, for hysterical raisins. + // That is, this is OK: complete -c foo -a 'x ; y ; z' + // But this is not: foo x ; y ; z + return top_type_ == type_t::freestanding_argument_list; + + case type_t::argument_or_redirection_list: + case type_t::variable_assignment_list: + return false; + + case type_t::job_list: + // Like echo a ; ; echo b + return true; + + case type_t::case_item_list: + // Like switch foo ; ; ; case a \n end + // This is historically allowed. + return true; + + case type_t::andor_job_list: + // Like while true ; ; ; and true ; end + return true; + + case type_t::elseif_clause_list: + // Like if true ; ; ; else if false; end + return false; + + case type_t::job_conjunction_continuation_list: + // Like echo a ; ; && echo b. Not supported. + return false; + + case type_t::job_continuation_list: + // This would be like echo a ; | echo b + // Not supported. + // We could conceivably support this but do not now. + return false; + + default: + internal_error(__FUNCTION__, L"Type %ls not handled", ast_type_to_string(type)); + return false; + } + } + + // Chomp extra comments, semicolons, etc. for a given list type. + void chomp_extras(type_t type) { + bool chomp_semis = list_type_chomps_semis(type); + bool chomp_newlines = list_type_chomps_newlines(type); + for (;;) { + const auto &peek = this->tokens_.peek(); + if (chomp_newlines && peek.type == parse_token_type_end && peek.is_newline) { + // Just skip this newline, no need to save it. + this->tokens_.pop(); + } else if (chomp_semis && peek.type == parse_token_type_end && !peek.is_newline) { + auto tok = this->tokens_.pop(); + // Perhaps save this extra semi. + if (flags_ & parse_flag_show_extra_semis) { + ast_->extras_.semis.push_back(tok.range()); + } + } else { + break; + } + } + } + + /// \return whether a list type should recover from errors.s + /// That is, whether we should stop unwinding when we encounter this type. + bool list_type_stops_unwind(type_t type) const { + return type == type_t::job_list && (flags_ & parse_flag_continue_after_error); + } + + /// Report an error based on \p fmt for the source range \p range. + void parse_error_impl(source_range_t range, parse_error_code_t code, const wchar_t *fmt, + va_list va) { + ast_->any_error_ = true; + + // Ignore additional parse errors while unwinding. + // These may come about e.g. from `true | and`. + if (unwinding_) return; + unwinding_ = true; + + FLOGF(ast_construction, L"%*sparse error - begin unwinding", spaces(), ""); + // TODO: can store this conditionally dependent on flags. + if (range.start != SOURCE_OFFSET_INVALID) { + ast_->extras_.errors.push_back(range); + } + + if (out_errors_) { + parse_error_t err; + err.text = vformat_string(fmt, va); + err.code = code; + err.source_start = range.start; + err.source_length = range.length; + out_errors_->push_back(std::move(err)); + } + } + + /// Report an error based on \p fmt for the source range \p range. + void parse_error(source_range_t range, parse_error_code_t code, const wchar_t *fmt, ...) { + va_list va; + va_start(va, fmt); + parse_error_impl(range, code, fmt, va); + va_end(va); + } + + /// Report an error based on \p fmt for the source range \p range. + void parse_error(const parse_token_t &token, parse_error_code_t code, const wchar_t *fmt, ...) { + va_list va; + va_start(va, fmt); + parse_error_impl(token.range(), code, fmt, va); + va_end(va); + } + + // \return a reference to a non-comment token at index \p idx. + const parse_token_t &peek_token(size_t idx = 0) { return tokens_.peek(idx); } + + // \return the type of a non-comment token. + parse_token_type_t peek_type(size_t idx = 0) { return peek_token(idx).type; } + + // Consume the next token, chomping any comments. + // It is an error to call this unless we know there is a non-terminate token available. + // \return the token. + parse_token_t consume_any_token() { + parse_token_t tok = tokens_.pop(); + assert(tok.type != parse_special_type_comment && "Should not be a comment"); + assert(tok.type != parse_token_type_terminate && + "Cannot consume terminate token, caller should check status first"); + return tok; + } + + // Consume the next token which is expected to be of the given type. + source_range_t consume_token_type(parse_token_type_t type) { + assert(type != parse_token_type_terminate && + "Should not attempt to consume terminate token"); + auto tok = consume_any_token(); + if (tok.type != type) { + parse_error(tok, parse_error_generic, _(L"Expected %ls, but found %ls"), + token_type_user_presentable_description(type).c_str(), + tok.user_presentable_description().c_str()); + return source_range_t{0, 0}; + } + return tok.range(); + } + + // The next token could not be parsed at the top level. + // For example a trailing end like `begin ; end ; end` + // Or an unexpected redirection like `>` + // Consume it and add an error. + void consume_excess_token_generating_error() { + auto tok = consume_any_token(); + + // In the rare case that we are parsing a freestanding argument list and not a job list, + // generate a generic error. + // TODO: this is a crummy message if we get a tokenizer error, for example: + // complete -c foo -a "'abc" + if (this->top_type_ == type_t::freestanding_argument_list) { + this->parse_error( + tok, parse_error_generic, _(L"Expected %ls, but found %ls"), + token_type_user_presentable_description(parse_token_type_string).c_str(), + tok.user_presentable_description().c_str()); + return; + } + + assert(this->top_type_ == type_t::job_list); + switch (tok.type) { + case parse_token_type_string: + // There are three keywords which end a job list. + switch (tok.keyword) { + case parse_keyword_t::kw_end: + this->parse_error(tok, parse_error_unbalancing_end, + _(L"'end' outside of a block")); + break; + case parse_keyword_t::kw_else: + this->parse_error(tok, parse_error_unbalancing_else, + _(L"'else' builtin not inside of if block")); + break; + case parse_keyword_t::kw_case: + this->parse_error(tok, parse_error_unbalancing_case, + _(L"'case' builtin not inside of switch block")); + break; + default: + internal_error(__FUNCTION__, + L"Token %ls should not have prevented parsing a job list", + tok.user_presentable_description().c_str()); + break; + } + break; + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_type_background: + case parse_token_type_andand: + case parse_token_type_oror: + parse_error(tok, parse_error_generic, _(L"Expected a string, but found %ls"), + tok.user_presentable_description().c_str()); + break; + + case parse_special_type_tokenizer_error: + parse_error(tok, parse_error_from_tokenizer_error(tok.tok_error), L"%ls", + tokenizer_get_error_message(tok.tok_error)); + break; + + case parse_token_type_end: + internal_error(__FUNCTION__, L"End token should never be excess"); + break; + case parse_token_type_terminate: + internal_error(__FUNCTION__, L"Terminate token should never be excess"); + break; + default: + internal_error(__FUNCTION__, L"Unexpected excess token type: %ls", + tok.user_presentable_description().c_str()); + break; + } + } + + // Our can_parse implementations are for optional values and for lists. + // A true return means we should descend into the production, false means stop. + // Note that the argument is always nullptr and should be ignored. It is provided strictly for + // overloading purposes. + bool can_parse(job_conjunction_t *) { + const auto &token = peek_token(); + if (token.type != parse_token_type_string) return false; + switch (peek_token().keyword) { + case parse_keyword_t::kw_end: + case parse_keyword_t::kw_else: + case parse_keyword_t::kw_case: + // These end a job list. + return false; + case parse_keyword_t::none: + default: + return true; + } + } + + bool can_parse(argument_t *) { return peek_type() == parse_token_type_string; } + bool can_parse(redirection_t *) { return peek_type() == parse_token_type_redirection; } + bool can_parse(argument_or_redirection_t *) { + return can_parse((argument_t *)nullptr) || can_parse((redirection_t *)nullptr); + } + + bool can_parse(variable_assignment_t *) { + // We can parse a variable_assignment if our token is a variable assignment and the next + // token is a string. If the next token is not a string, then we have either a bare + // assignment like `foo=bar` or perhaps `foo=bar | `, etc. In that case we want to allow + // statement to see this assignment so it can produce an error. + return peek_token(0).may_be_variable_assignment && peek_type(1) == parse_token_type_string; + } + + template + bool can_parse(token_t *tok) { + return tok->allows_token(peek_token().type); + } + + // Note we have specific overloads for our keyword nodes, as they need custom logic. + bool can_parse(job_conjunction_t::decorator_t *) { + // This is for a job conjunction like `and stuff` + // But if it's `and --help` then we treat it as an ordinary command. + return job_conjunction_t::decorator_t::allows_keyword(peek_token(0).keyword) && + !peek_token(1).is_help_argument; + } + + bool can_parse(decorated_statement_t::decorator_t *) { + // Here the keyword is 'command' or 'builtin' or 'exec'. + // `command stuff` executes a command called stuff. + // `command -n` passes the -n argument to the 'command' builtin. + // `command` by itself is a command. + if (!decorated_statement_t::decorator_t::allows_keyword(peek_token(0).keyword)) { + return false; + } + // Is it like `command --stuff` or `command` by itself? + auto tok1 = peek_token(1); + return tok1.type == parse_token_type_string && !tok1.is_dash_prefix_string(); + } + + bool can_parse(keyword_t *) { + // Time keyword is only the time builtin if the next argument doesn't have a dash. + return keyword_t::allows_keyword(peek_token(0).keyword) && + !peek_token(1).is_dash_prefix_string(); + } + + bool can_parse(job_continuation_t *) { return peek_type() == parse_token_type_pipe; } + + bool can_parse(job_conjunction_continuation_t *) { + auto type = peek_type(); + return type == parse_token_type_andand || type == parse_token_type_oror; + } + + bool can_parse(andor_job_t *) { + switch (peek_token().keyword) { + case parse_keyword_t::kw_and: + case parse_keyword_t::kw_or: { + // Check that the argument to and/or is a string that's not help. Otherwise it's + // either 'and + // --help' or a naked 'and', and not part of this list. + const auto &nexttok = peek_token(1); + return nexttok.type == parse_token_type_string && !nexttok.is_help_argument; + } + default: + return false; + } + } + + bool can_parse(elseif_clause_t *) { + return peek_token(0).keyword == parse_keyword_t::kw_else && + peek_token(1).keyword == parse_keyword_t::kw_if; + } + + bool can_parse(else_clause_t *) { return peek_token().keyword == parse_keyword_t::kw_else; } + bool can_parse(case_item_t *) { return peek_token().keyword == parse_keyword_t::kw_case; } + + // Given that we are a list of type ListNodeType, whose contents type is ContentsNode, populate + // as many elements as we can. + // If exhaust_stream is set, then keep going until we get parse_token_type_terminate. + template + void populate_list(list_t &list, bool exhaust_stream = false) { + // Do not attempt to parse a list if we are unwinding. + if (is_unwinding()) { + assert(!exhaust_stream && + "exhaust_stream should only be set at top level, and so we should not be " + "unwinding"); + // Mark in the list that it was unwound. + FLOGF(ast_construction, L"%*sunwinding %ls", spaces(), "", + ast_type_to_string(ListType)); + assert(list.empty() && "Should be an empty list"); + return; + } + + for (;;) { + // If we are unwinding, then either we recover or we break the loop, dependent on the + // loop type. + if (is_unwinding()) { + if (!list_type_stops_unwind(ListType)) { + break; + } + // We are going to stop unwinding. + // Rather hackish. Just chomp until we get to a string or end node. + for (auto type = peek_type(); + type != parse_token_type_string && type != parse_token_type_terminate && + type != parse_token_type_end; + type = peek_type()) { + parse_token_t tok = tokens_.pop(); + ast_->extras_.errors.push_back(tok.range()); + FLOGF(ast_construction, L"%*schomping range %u-%u", spaces(), "", + tok.source_start, tok.source_length); + } + FLOGF(ast_construction, L"%*sdone unwinding", spaces(), ""); + unwinding_ = false; + } + + // Chomp semis and newlines. + chomp_extras(ListType); + + // Now try parsing a node. + if (auto node = this->try_parse()) { + list.contents.push_back(std::move(node)); + } else if (exhaust_stream && peek_type() != parse_token_type_terminate) { + // We aren't allowed to stop. Produce an error and keep going. + consume_excess_token_generating_error(); + } else { + // We either stop once we can't parse any more of this contents node, or we + // exhausted the stream as requested. + break; + } + } + + FLOGF(ast_construction, L"%*s%ls size: %lu", spaces(), "", ast_type_to_string(ListType), + (unsigned long)list.count()); + } + + /// Allocate and populate a statement contents pointer. + /// This must never return null. + statement_t::contents_ptr_t allocate_populate_statement_contents() { + // In case we get a parse error, we still need to return something non-null. Use a decorated + // statement; all of its leaf nodes will end up unsourced. + auto got_error = [this] { + assert(unwinding_ && "Should have produced an error"); + return this->allocate_visit(); + }; + + using pkt = parse_keyword_t; + const auto &token1 = peek_token(0); + if (token1.type == parse_token_type_terminate && allow_incomplete()) { + // This may happen if we just have a 'time' prefix. + // Construct a decorated statement, which will be unsourced. + return this->allocate_visit(); + } else if (token1.type != parse_token_type_string) { + // We may be unwinding already; do not produce another error. + // For example in `true | and`. + parse_error(token1, parse_error_generic, _(L"Expected a command, but found %ls"), + token1.user_presentable_description().c_str()); + return got_error(); + } else if (token1.may_be_variable_assignment) { + // Here we have a variable assignment which we chose to not parse as a variable + // assignment because there was no string after it. + parse_error(token1, parse_error_bare_variable_assignment, L""); + return got_error(); + } + + // The only block-like builtin that takes any parameters is 'function'. So go to decorated + // statements if the subsequent token looks like '--'. The logic here is subtle: + // + // If we are 'begin', then we expect to be invoked with no arguments. + // If we are 'function', then we are a non-block if we are invoked with -h or --help + // If we are anything else, we require an argument, so do the same thing if the subsequent + // token is a statement terminator. + if (token1.type == parse_token_type_string) { + const auto &token2 = peek_token(1); + // If we are a function, then look for help arguments. Otherwise, if the next token + // looks like an option (starts with a dash), then parse it as a decorated statement. + if (token1.keyword == pkt::kw_function && token2.is_help_argument) { + return allocate_visit(); + } else if (token1.keyword != pkt::kw_function && token2.has_dash_prefix) { + return allocate_visit(); + } + + // Likewise if the next token doesn't look like an argument at all. This corresponds to + // e.g. a "naked if". + bool naked_invocation_invokes_help = + (token1.keyword != pkt::kw_begin && token1.keyword != pkt::kw_end); + if (naked_invocation_invokes_help && (token2.type == parse_token_type_end || + token2.type == parse_token_type_terminate)) { + return allocate_visit(); + } + } + + switch (token1.keyword) { + case pkt::kw_not: + case pkt::kw_exclam: + return allocate_visit(); + case pkt::kw_for: + case pkt::kw_while: + case pkt::kw_function: + case pkt::kw_begin: + return allocate_visit(); + case pkt::kw_if: + return allocate_visit(); + case pkt::kw_switch: + return allocate_visit(); + + case pkt::kw_end: + // 'end' is forbidden as a command. + // For example, `if end` or `while end` will produce this error. + // We still have to descend into the decorated statement because + // we can't leave our pointer as null. + parse_error(token1, parse_error_generic, _(L"Expected a command, but found %ls"), + token1.user_presentable_description().c_str()); + return got_error(); + + default: + return allocate_visit(); + } + } + + /// Allocate and populate a block statement header. + /// This must never return null. + block_statement_t::header_ptr_t allocate_populate_block_header() { + switch (peek_token().keyword) { + case parse_keyword_t::kw_for: + return allocate_visit(); + case parse_keyword_t::kw_while: + return allocate_visit(); + case parse_keyword_t::kw_function: + return allocate_visit(); + case parse_keyword_t::kw_begin: + return allocate_visit(); + default: + internal_error(__FUNCTION__, L"should not have descended into block_header"); + DIE("Unreachable"); + } + } + + template + unique_ptr try_parse() { + if (!can_parse((AstNode *)nullptr)) return nullptr; + return allocate_visit(); + } + + void visit_node_field(argument_t &arg) { + if (unsource_leaves()) { + arg.unsourced = true; + return; + } + arg.range = consume_token_type(parse_token_type_string); + } + + void visit_node_field(variable_assignment_t &varas) { + if (unsource_leaves()) { + varas.unsourced = true; + return; + } + if (!peek_token().may_be_variable_assignment) { + internal_error(__FUNCTION__, + L"Should not have created variable_assignment_t from this token"); + } + varas.range = consume_token_type(parse_token_type_string); + } + + void visit_node_field(job_continuation_t &node) { + // Special error handling to catch 'and' and 'or' in pipelines, like `true | and false`. + const auto &tok = peek_token(1); + if (tok.keyword == parse_keyword_t::kw_and || tok.keyword == parse_keyword_t::kw_or) { + const wchar_t *cmdname = (tok.keyword == parse_keyword_t::kw_and ? L"and" : L"or"); + parse_error(tok, parse_error_andor_in_pipeline, EXEC_ERR_MSG, cmdname); + } + node.accept(*this); + } + + // Visit branch nodes by just calling accept() to visit their fields. + template + enable_if_t visit_node_field(Node &node) { + // This field is a direct embedding of an AST value. + node.accept(*this); + } + + template + void visit_pointer_field(Node *&node) { + // This field is a pointer embedding of an ast node. + // Allocate and populate it. + node = allocate_visit(); + } + + // Overload for token fields. + template + void visit_node_field(token_t &token) { + if (unsource_leaves()) { + token.unsourced = true; + return; + } + + if (!token.allows_token(peek_token().type)) { + const auto &peek = peek_token(); + parse_error(peek, parse_error_generic, L"Expected %ls, but found %ls", + token_types_user_presentable_description({TokTypes...}).c_str(), + peek.user_presentable_description().c_str()); + token.unsourced = true; + return; + } + parse_token_t tok = consume_any_token(); + token.type = tok.type; + token.range = tok.range(); + } + + // Overload for keyword fields. + template + void visit_node_field(keyword_t &keyword) { + if (unsource_leaves()) { + keyword.unsourced = true; + return; + } + + if (!keyword.allows_keyword(peek_token().keyword)) { + keyword.unsourced = true; + const auto &peek = peek_token(); + + // Special error reporting for keyword_t. + bool specially_handled = false; + std::array allowed = {{KWs...}}; + if (allowed.size() == 1 && allowed[0] == parse_keyword_t::kw_end) { + assert(!visit_stack_.empty() && "Visit stack should not be empty"); + auto p = find_block_open_keyword(visit_stack_.back()); + source_range_t kw_range = p.first; + const wchar_t *kw_name = p.second; + if (kw_name) { + this->parse_error(kw_range, parse_error_generic, + L"Missing end to balance this %ls", kw_name); + } + } + if (!specially_handled) { + parse_error(peek, parse_error_generic, L"Expected %ls, but found %ls", + keywords_user_presentable_description({KWs...}).c_str(), + peek.user_presentable_description().c_str()); + } + return; + } + parse_token_t tok = consume_any_token(); + keyword.kw = tok.keyword; + keyword.range = tok.range(); + } + + // Overload for maybe_newlines + void visit_node_field(maybe_newlines_t &nls) { + if (unsource_leaves()) { + nls.unsourced = true; + return; + } + // TODO: it would be nice to have the start offset be the current position in the token + // stream, even if there are no newlines. + nls.range = {0, 0}; + while (peek_token().is_newline) { + auto r = consume_token_type(parse_token_type_end); + if (nls.range.length == 0) { + nls.range = r; + } else { + nls.range.length = r.start + r.length - nls.range.start; + } + } + } + + template + void visit_optional_field(optional_t &ptr) { + // This field is an optional node. + ptr.contents = this->try_parse(); + } + + template + void visit_list_field(list_t &list) { + // This field is an embedding of an array of (pointers to) ContentsNode. + // Parse as many as we can. + populate_list(list); + } + + // We currently only have a handful of union pointer types. + // Handle them directly. + void visit_union_field(statement_t::contents_ptr_t &ptr) { + ptr = this->allocate_populate_statement_contents(); + assert(ptr && "Statement contents must never be null"); + } + + void visit_union_field(argument_or_redirection_t::contents_ptr_t &ptr) { + if (auto arg = try_parse()) { + ptr.contents = std::move(arg); + } else if (auto redir = try_parse()) { + ptr.contents = std::move(redir); + } else { + internal_error(__FUNCTION__, L"Unable to parse argument or redirection"); + } + assert(ptr && "Statement contents must never be null"); + } + + void visit_union_field(block_statement_t::header_ptr_t &ptr) { + ptr = this->allocate_populate_block_header(); + assert(ptr && "Header pointer must never be null"); + } + + void will_visit_fields_of(const node_t &node) { + FLOGF(ast_construction, L"%*swill_visit %ls %p", spaces(), "", node.describe().c_str(), + (const void *)&node); + visit_stack_.push_back(&node); + } + + void did_visit_fields_of(const node_t &node) { + assert(!visit_stack_.empty() && visit_stack_.back() == &node && + "Node was not at the top of the visit stack"); + visit_stack_.pop_back(); + FLOGF(ast_construction, L"%*sdid_visit %ls %p", spaces(), "", node.describe().c_str(), + (const void *)&node); + } + + // The ast which we are populating. + ast_t *const ast_; + + // Flags controlling parsing. + parse_tree_flags_t flags_{}; + + // Stream of tokens which we consume. + token_stream_t tokens_; + + // The type which we are attempting to parse, typically job_list but may be + // freestanding_argument_list. + const type_t top_type_; + + // If set, we are unwinding due to error recovery. + bool unwinding_{false}; + + // A stack containing the nodes whose fields we are visiting. + std::vector visit_stack_{}; + + // If non-null, populate with errors. + parse_error_list_t *out_errors_{}; +}; + +// Set the parent fields of all nodes in the tree rooted at \p node. +static void set_parents(const node_t *top) { + struct parent_setter_t { + void visit(const node_t &node) { + const_cast(node).parent = parent_; + const node_t *saved = parent_; + parent_ = &node; + node_visitor(*this).accept_children_of(&node); + parent_ = saved; + } + + const node_t *parent_{nullptr}; + }; + struct parent_setter_t ps; + node_visitor(ps).accept(top); +} + +// static +ast_t ast_t::parse_from_top(const wcstring &src, parse_tree_flags_t parse_flags, + parse_error_list_t *out_errors, type_t top) { + ast_t ast; + + // Populate our ast. + populator_t pop(&ast, src, parse_flags, top, out_errors); + + // Set all parent nodes. + // It turns out to be more convenient to do this after the parse phase. + set_parents(ast.top()); + + return ast; +} + +// static +ast_t ast_t::parse(const wcstring &src, parse_tree_flags_t flags, parse_error_list_t *out_errors) { + return parse_from_top(src, flags, out_errors, type_t::job_list); +} + +// static +ast_t ast_t::parse_argument_list(const wcstring &src, parse_tree_flags_t flags, + parse_error_list_t *out_errors) { + return parse_from_top(src, flags, out_errors, type_t::freestanding_argument_list); +} + +// \return the depth of a node, i.e. number of parent links. +static int get_depth(const node_t *node) { + int result = 0; + for (const node_t *cursor = node->parent; cursor; cursor = cursor->parent) { + result += 1; + } + return result; +} + +wcstring ast_t::dump(const wcstring &orig) const { + wcstring result; + + // Return a string that repeats "| " \p amt times. + auto pipespace = [](int amt) { + std::string result; + result.reserve(amt * 2); + for (int i = 0; i < amt; i++) result.append("! "); + return result; + }; + + traversal_t tv = this->walk(); + while (const auto *node = tv.next()) { + int depth = get_depth(node); + // dot-| padding + append_format(result, L"%s", pipespace(depth).c_str()); + if (const auto *n = node->try_as()) { + append_format(result, L"argument"); + if (auto argsrc = n->try_source(orig)) { + append_format(result, L": '%ls'", argsrc->c_str()); + } + } else if (const auto *n = node->try_as()) { + append_format(result, L"keyword: %ls", keyword_description(n->kw)); + } else if (const auto *n = node->try_as()) { + wcstring desc; + switch (n->type) { + case parse_token_type_string: + desc = format_string(L"string"); + if (auto strsource = n->try_source(orig)) { + append_format(desc, L": '%ls'", strsource->c_str()); + } + break; + case parse_token_type_redirection: + desc = L"redirection"; + if (auto strsource = n->try_source(orig)) { + append_format(desc, L": '%ls'", strsource->c_str()); + } + break; + case parse_token_type_end: + desc = L"<;>"; + break; + case token_type_invalid: + // This may occur with errors, e.g. we expected to see a string but saw a + // redirection. + desc = L""; + break; + default: + desc = token_type_user_presentable_description(n->type); + break; + } + append_format(result, L"%ls", desc.c_str()); + } else { + append_format(result, L"%ls", node->describe().c_str()); + } + append_format(result, L"\n"); + } + return result; +} +} // namespace ast diff --git a/src/ast.h b/src/ast.h new file mode 100644 index 000000000..26152c8d5 --- /dev/null +++ b/src/ast.h @@ -0,0 +1,1018 @@ +// Programmatic representation of fish grammar. + +#ifndef FISH_AST_H +#define FISH_AST_H + +#include +#include +#include + +#include "flog.h" +#include "parse_constants.h" +#include "tokenizer.h" + +namespace ast { + +/** + * This defines the fish abstract syntax tree. + * The fish ast is a tree data structure. The nodes of the tree + * are divided into three types: + * + * - leaf nodes refer to a range of source, and have no child nodes. + * - branch nodes have ONLY child nodes, and no other fields. + * - list nodes contain a list of some other node type (branch or leaf). + * + * Most clients will be interested in visiting the nodes of an ast. + * See node_visitation_t below. + */ + +struct node_t; + +// Our node categories. +// Note these are not stored directly in a node; they are provided in the Category static constexpr +// variable in each node. +enum class category_t : uint8_t { + branch, + leaf, + list, +}; + +// Declare our type enum. +// For each member of our ast, this creates an enum value. +// For example this creates `type_t::job_list`. +enum class type_t : uint8_t { +#define ELEM(T) T, +#include "ast_node_types.inc" +}; + +// Helper to return a string description of a type. +const wchar_t *ast_type_to_string(type_t type); + +// Forward declare all AST structs. +#define ELEM(T) struct T##_t; +#include "ast_node_types.inc" + +/* + * A FieldVisitor is something which can visit the fields of an ast node. + * This is used during ast construction. + * + * To trigger field visitation, use the accept() function: + * MyFieldVisitor v; + * node->accept(v); + * + * Example FieldVisitor: + * + * struct MyFieldVisitor { + * + * /// will_visit (did_visit) is called before (after) a node's fields are visited. + * void will_visit_fields_of(node_t &node); + * void did_visit_fields_of(node_t &node); + * + * /// These are invoked with the concrete type of each node, + * /// so they may be overloaded to distinguish node types. + * /// Example: + * void will_visit_fields_of(job_t &job); + * + * /// The visitor needs to be prepared for the following four field types. + * /// Naturally the vistor may overload visit_field to carve this + * /// arbitrarily finely. + * + * /// A field may be a "direct embedding" of a node. + * /// That is, an ast node may have another node as a member. + * template + * void visit_node_field(Node &node); + + * /// A field may be a list_t of (pointers to) some other node type. + * template + * void visit_list_field(list_t &list); + * + * /// A field may be a unique_ptr to another node. + * /// Every such pointer must be non-null after construction. + * template + * void visit_pointer_field(std::unique_ptr &ptr); + * + * /// A field may be optional, meaning it may or may not exist. + * template + * void visit_optional_field(optional_t &opt); + * + * /// A field may be a union pointer, meaning it points to one of + * /// a fixed set of node types. A union pointer is never null + * /// after construction. + * template + * void visit_union_field(union_ptr_t &union_ptr); + * }; + */ + +// A union pointer field is a pointer to one of a fixed set of node types. +// It is never null after construction. +template +struct union_ptr_t { + std::unique_ptr contents{}; + + /// \return a pointer to the node contents. + const node_t *get() const { + assert(contents && "Null pointer"); + return contents.get(); + } + + /// \return whether we have non-null contents. + explicit operator bool() const { return contents != nullptr; } + + const node_t *operator->() const { return get(); } + + /// \return whether this union pointer can hold the given node. + static inline bool allows_node(const node_t &node); + + union_ptr_t() = default; + + template + /* implicit */ union_ptr_t(std::unique_ptr n) : contents(std::move(n)) { + // TODO: this could be made statically type safe. + assert(contents != nullptr && allows_node(*contents) && + "union_ptr constructed from invalid node type"); + } +}; + +// A pointer to something, or nullptr if not present. +template +struct optional_t { + std::unique_ptr contents{}; + + explicit operator bool() const { return contents != nullptr; } + + AstNode *operator->() const { + assert(contents && "Null pointer"); + return contents.get(); + } + + const AstNode &operator*() const { + assert(contents && "Null pointer"); + return *contents; + } + + bool has_value() const { return contents != nullptr; } +}; + + +namespace horrible_template_goop { + +// void if B is true, SFINAE'd away otherwise. +template +using only_if_t = typename std::enable_if::type; + +template +only_if_t visit_1_field(FieldVisitor &v, Field &field) { + v.visit_node_field(field); +} + +template +only_if_t visit_1_field(FieldVisitor &v, Field &field) { + v.visit_list_field(field); +} + +template +void visit_1_field(FieldVisitor &v, Field *&field) { + v.visit_pointer_field(field); +} + +template +void visit_1_field(FieldVisitor &v, optional_t &field) { + v.visit_optional_field(field); +} + +template +void visit_1_field(FieldVisitor &v, union_ptr_t &field) { + v.visit_union_field(field); +} + +// Call the field visit methods on visitor \p v passing field \p field. +template +void accept_field_visitor(FieldVisitor &v, bool /*reverse*/, Field &field) { + visit_1_field(v, field); +} + +// Call visit_field on visitor \p v, for the field \p field and also \p rest. +template +void accept_field_visitor(FieldVisitor &v, bool reverse, Field &field, Rest &... rest) { + if (!reverse) visit_1_field(v, field); + accept_field_visitor(v, reverse, rest...); + if (reverse) visit_1_field(v, field); +} + +} // namespace horrible_template_goop + +#define FIELDS(...) \ + template \ + void accept(FieldVisitor &visitor, bool reversed = false) { \ + visitor.will_visit_fields_of(*this); \ + horrible_template_goop::accept_field_visitor(visitor, reversed, __VA_ARGS__); \ + visitor.did_visit_fields_of(*this); \ + } + +/// node_t is the base node of all AST nodes. +/// It is not a template: it is possible to work concretely with this type. +struct node_t { + /// The type of this node. + const type_t type; + + /// The category of this node. + const category_t category; + + /// The parent node, or null if this is root. + const node_t *parent{nullptr}; + + constexpr explicit node_t(type_t t, category_t c) : type(t), category(c) {} + + /// Disallow copying, etc. + node_t(const node_t &) = delete; + node_t(node_t &&) = delete; + void operator=(const node_t &) = delete; + void operator=(node_t &&) = delete; + + /// Cast to a concrete node type, aborting on failure. + /// Example usage: + /// if (node->type == type_t::job_list) node->as()->... + template + To *as() { + assert(this->type == To::AstType && "Invalid type conversion"); + return static_cast(this); + } + + template + const To *as() const { + assert(this->type == To::AstType && "Invalid type conversion"); + return static_cast(this); + } + + /// Try casting to a concrete node type, except returns nullptr on failure. + /// Example ussage: + /// if (const auto *job_list = node->try_as()) job_list->... + template + To *try_as() { + if (this->type == To::AstType) return as(); + return nullptr; + } + + template + const To *try_as() const { + if (this->type == To::AstType) return as(); + return nullptr; + } + + /// Base accept() function which trampolines to overriding implementations for each node type. + /// This may be used when you don't know what the type of a particular node is. + template + void base_accept(FieldVisitor &v, bool reverse = false); + + /// \return a helpful string description of this node. + wcstring describe() const; + + /// \return the source range for this node, or none if unsourced. + /// This may return none if the parse was incomplete or had an error. + maybe_t try_source_range() const; + + /// \return the source range for this node, or an empty range {0, 0} if unsourced. + source_range_t source_range() const { + if (auto r = try_source_range()) return *r; + return source_range_t{0, 0}; + } + + /// \return the source code for this node, or none if unsourced. + maybe_t try_source(const wcstring &orig) const { + if (auto r = try_source_range()) return orig.substr(r->start, r->length); + return none(); + } + + /// \return the source code for this node, or an empty string if unsourced. + wcstring source(const wcstring &orig) const { + wcstring res{}; + if (auto s = try_source(orig)) res = s.acquire(); + return res; + } + + // We are a pure virtual class. + // Note that it is NOT necessary to declare virtual destructors for all subclasses - these will + // be made virtual automatically. + virtual ~node_t() = 0; +}; + +// Base class for all "branch" nodes: nodes with at least one ast child. +template +struct branch_t : public node_t { + static constexpr type_t AstType = Type; + static constexpr category_t Category = category_t::branch; + + branch_t() : node_t(Type, Category) {} +}; + +// Base class for all "leaf" nodes: nodes with no ast children. +// It declares an empty visit method to avoid requiring the CHILDREN macro. +template +struct leaf_t : public node_t { + static constexpr type_t AstType = Type; + static constexpr category_t Category = category_t::leaf; + + // Whether this node is "unsourced." This happens if for whatever reason we are unable to parse + // the node, either because we had a parse error and recovered, or because we accepted + // incomplete and the token stream was exhausted. + bool unsourced{false}; + + // The source range. + source_range_t range{0, 0}; + + // Convenience helper to return whether we are not unsourced. + bool has_source() const { return !unsourced; } + + template + void accept(FieldVisitor &visitor, bool /* reverse */ = false) { + visitor.will_visit_fields_of(*this); + visitor.did_visit_fields_of(*this); + } + + leaf_t() : node_t(Type, Category) {} +}; + +// A simple fixed-size array, possibly empty. +template +struct list_t : public node_t { + static constexpr type_t AstType = ListType; + static constexpr category_t Category = category_t::list; + + // A list wraps a "contents pointer" which is just a unique_ptr that converts to a reference. + // This enables more natural iteration: + // for (const argument_t &arg : argument_list) ... + struct contents_ptr_t { + std::unique_ptr ptr; + /* implicit */ contents_ptr_t(std::unique_ptr v) : ptr(std::move(v)) {} + + const ContentsNode *get() const { + assert(ptr && "Null pointer"); + return ptr.get(); + } + + /* implicit */ operator const ContentsNode &() const { return *get(); } + }; + std::vector contents{}; + + /// \return a node at a given index, or nullptr if out of range. + const ContentsNode *at(size_t idx, bool reverse = false) const { + if (idx >= count()) return nullptr; + return contents[reverse ? count() - idx - 1 : idx].get(); + } + + /// \return our count. + size_t count() const { return contents.size(); } + + /// \return whether we are empty. + bool empty() const { return contents.size() == 0; } + + /// Iteration support. + using iterator = typename decltype(contents)::const_iterator; + iterator begin() const { return contents.begin(); } + iterator end() const { return contents.end(); } + + // list types pretend their child nodes are direct embeddings. + // This isn't used during AST construction because we need to construct the list. + // It is used by node_visitation_t. + template + void accept(FieldVisitor &visitor, bool reverse = false) { + visitor.will_visit_fields_of(*this); + for (size_t i = 0; i < count(); i++) visitor.visit_node_field(*this->at(i, reverse)); + visitor.did_visit_fields_of(*this); + } + + list_t() : node_t(ListType, Category) {} +}; + +// Fully define all list types, as they are very uniform. +// This is where types like job_list_t come from. +#define ELEM(T) +#define ELEMLIST(ListT, ContentsT) \ + struct ListT##_t final : public list_t {}; +#include "ast_node_types.inc" + +struct keyword_base_t : public leaf_t { + // The keyword which was parsed. + parse_keyword_t kw; +}; + +// A keyword node is a node which contains a keyword, which must be one of the provided values. +template +struct keyword_t final : public keyword_base_t { + static bool allows_keyword(parse_keyword_t); +}; + +struct token_base_t : public leaf_t { + // The token type which was parsed. + parse_token_type_t type{token_type_invalid}; +}; + +// A token node is a node which contains a token, which must be one of the provided values. +template +struct token_t final : public token_base_t { + /// \return whether a token type is allowed in this token_t, i.e. is a member of our Toks list. + static bool allows_token(parse_token_type_t); +}; + +// Zero or more newlines. +struct maybe_newlines_t final : public leaf_t {}; + +// A single newline or semicolon, terminating statements. +// Note this is not a separate type, it is just a convenience typedef. +using semi_nl_t = token_t; + +// Convenience typedef for string nodes. +using string_t = token_t; + +// An argument is just a node whose source range determines its contents. +// This is a separate type because it is sometimes useful to find all arguments. +struct argument_t final : public leaf_t {}; + +// A redirection has an operator like > or 2>, and a target like /dev/null or &1. +// Note that pipes are not redirections. +struct redirection_t final : public branch_t { + token_t oper; + string_t target; + + FIELDS(oper, target) +}; + +// A variable_assignment_t contains a source range like FOO=bar. +struct variable_assignment_t final : public leaf_t {}; + +// An argument or redirection holds either an argument or redirection. +struct argument_or_redirection_t final : public branch_t { + using contents_ptr_t = union_ptr_t; + contents_ptr_t contents{}; + + /// \return whether this represents an argument. + bool is_argument() const { return contents->type == type_t::argument; } + + /// \return whether this represents a redirection + bool is_redirection() const { return contents->type == type_t::redirection; } + + /// \return this as an argument, assuming it wraps one. + const argument_t &argument() const { + assert(is_argument() && "Is not an argument"); + return *this->contents.contents->as(); + } + + /// \return this as an argument, assuming it wraps one. + const redirection_t &redirection() const { + assert(is_redirection() && "Is not a redirection"); + return *this->contents.contents->as(); + } + + FIELDS(contents); +}; + +// A statement is a normal command, or an if / while / etc +struct statement_t final : public branch_t { + using contents_ptr_t = union_ptr_t; + contents_ptr_t contents{}; + + FIELDS(contents) +}; + +// A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases +// like if statements, where we require a command). +struct job_t final : public branch_t { + // Maybe the time keyword. + optional_t> time; + + // A (possibly empty) list of variable assignments. + variable_assignment_list_t variables; + + // The statement. + statement_t statement; + + // Piped remainder. + job_continuation_list_t continuation; + + // Maybe backgrounded. + optional_t> bg; + + FIELDS(time, variables, statement, continuation, bg) +}; + +// A job_conjunction is a job followed by a && or || continuations. +struct job_conjunction_t final : public branch_t { + // The job conjunction decorator. + using decorator_t = keyword_t; + optional_t decorator{}; + + // The job itself. + job_t job; + + // The rest of the job conjunction, with && or ||s. + job_conjunction_continuation_list_t continuations; + + // A terminating semicolon or newline. + // This is marked optional because it may not be present, for example the command `echo foo` may + // not have a terminating newline. It will only fail to be present if we ran out of tokens. + optional_t semi_nl; + + FIELDS(decorator, job, continuations, semi_nl) +}; + +struct for_header_t final : public branch_t { + // 'for' + keyword_t kw_for; + + // var_name + string_t var_name; + + // 'in' + keyword_t kw_in; + + // list of arguments + argument_list_t args; + + // newline or semicolon + semi_nl_t semi_nl; + + FIELDS(kw_for, var_name, kw_in, args, semi_nl) +}; + +struct while_header_t final : public branch_t { + // 'while' + keyword_t kw_while; + + job_conjunction_t condition{}; + andor_job_list_t andor_tail{}; + + FIELDS(kw_while, condition, andor_tail) +}; + +struct function_header_t final : public branch_t { + // functions require at least one argument. + keyword_t kw_function; + argument_t first_arg; + argument_list_t args; + semi_nl_t semi_nl; + + FIELDS(kw_function, first_arg, args, semi_nl) +}; + +struct begin_header_t final : public branch_t { + keyword_t kw_begin; + + // Note that 'begin' does NOT require a semi or nl afterwards. + // This is valid: begin echo hi; end + optional_t semi_nl; + + FIELDS(kw_begin, semi_nl) +}; + +struct block_statement_t final : public branch_t { + // A header like for, while, etc. + using header_ptr_t = + union_ptr_t; + header_ptr_t header; + + // List of jobs in this block. + job_list_t jobs; + + // The 'end' node. + keyword_t end; + + // Arguments and redirections associated with the block. + argument_or_redirection_list_t args_or_redirs; + + FIELDS(header, jobs, end, args_or_redirs) +}; + +// Represents an 'if', either as the first part of an if statement or after an 'else'. +struct if_clause_t final : public branch_t { + // The 'if' keyword. + keyword_t kw_if; + + // The 'if' condition. + job_conjunction_t condition{}; + + // 'and/or' tail. + andor_job_list_t andor_tail{}; + + // The body to execute if the condition is true. + job_list_t body; + + FIELDS(kw_if, condition, andor_tail, body) +}; + +struct elseif_clause_t final : public branch_t { + // The 'else' keyword. + keyword_t kw_else; + + // The 'if' clause following it. + if_clause_t if_clause; + + FIELDS(kw_else, if_clause) +}; + +struct else_clause_t final : public branch_t { + // else ; body + keyword_t kw_else; + semi_nl_t semi_nl; + job_list_t body; + + FIELDS(kw_else, semi_nl, body) +}; + +struct if_statement_t final : public branch_t { + // if part + if_clause_t if_clause; + + // else if list + elseif_clause_list_t elseif_clauses; + + // else part + optional_t else_clause; + + // literal end + keyword_t end; + + // block args / redirs + argument_or_redirection_list_t args_or_redirs; + + FIELDS(if_clause, elseif_clauses, else_clause, end, args_or_redirs) +}; + +struct case_item_t final : public branch_t { + // case ; body + keyword_t kw_case; + argument_list_t arguments; + semi_nl_t semi_nl; + job_list_t body; + FIELDS(kw_case, arguments, semi_nl, body) +}; + +struct switch_statement_t final : public branch_t { + // switch ; body ; end args_redirs + keyword_t kw_switch; + argument_t argument; + semi_nl_t semi_nl; + case_item_list_t cases; + keyword_t end; + argument_or_redirection_list_t args_or_redirs; + + FIELDS(kw_switch, argument, semi_nl, cases, end, args_or_redirs) +}; + +// A decorated_statement is a command with a list of arguments_or_redirections, possibly with +// "builtin" or "command" or "exec" +struct decorated_statement_t final : public branch_t { + // An optional decoration (command, builtin, exec, etc). + using pk = parse_keyword_t; + using decorator_t = keyword_t; + optional_t opt_decoration; + + // Command to run. + string_t command; + + // Args and redirs + argument_or_redirection_list_t args_or_redirs; + + // Helper to return the decoration. + parse_statement_decoration_t decoration() const; + + FIELDS(opt_decoration, command, args_or_redirs) +}; + +// A not statement like `not true` or `! true` +struct not_statement_t final : public branch_t { + // Keyword, either not or exclam. + keyword_t kw; + + variable_assignment_list_t variables; + optional_t> time{}; + statement_t contents{}; + + FIELDS(kw, variables, time, contents) +}; + +struct job_continuation_t final : public branch_t { + token_t pipe; + maybe_newlines_t newlines; + variable_assignment_list_t variables; + statement_t statement; + + FIELDS(pipe, newlines, variables, statement) +}; + +struct job_conjunction_continuation_t final + : public branch_t { + // The && or || token. + token_t conjunction; + + // The job itself. + job_t job; + + FIELDS(conjunction, job) +}; + +// An andor_job just wraps a job, but requires that the job have an 'and' or 'or' job_decorator. +// Note this is only used for andor_job_list; jobs that are not part of an andor_job_list are not +// instances of this. +struct andor_job_t final : public branch_t { + job_conjunction_t job; + + FIELDS(job) +}; + +// A freestanding_argument_list is equivalent to a normal argument list, except it may contain +// TOK_END (newlines, and even semicolons, for historical reasons). +// In practice the tok_ends are ignored by fish code so we do not bother to store them. +struct freestanding_argument_list_t final : public branch_t { + argument_list_t arguments; + FIELDS(arguments) +}; + +template +void node_t::base_accept(FieldVisitor &v, bool reverse) { + switch (this->type) { +#define ELEM(T) \ + case type_t::T: \ + this->as()->accept(v, reverse); \ + break; + +#include "ast_node_types.inc" + } +} + +// static +template +bool token_t::allows_token(parse_token_type_t type) { + for (parse_token_type_t t : {Toks...}) { + if (type == t) return true; + } + return false; +} + +// static +template +bool keyword_t::allows_keyword(parse_keyword_t kw) { + for (parse_keyword_t k : {KWs...}) { + if (k == kw) return true; + } + return false; +} + +// static +template +bool union_ptr_t::allows_node(const node_t &node) { + for (type_t t : {Nodes::AstType...}) { + if (t == node.type) return true; + } + return false; +} + +/** + * A node visitor is like a field visitor, but adapted to only visit actual nodes, as const + * references. It calls the visit() function of its visitor with a const reference to each node + * found under a given node. + * + * Example: + * struct MyNodeVisitor { + * template + * void visit(const Node &n) {...} + * }; + */ +template +class node_visitation_t { + public: + explicit node_visitation_t(NodeVisitor &v, bool reverse = false) : v_(v), reverse_(reverse) {} + + // Visit the (direct) child nodes of a given node. + template + void accept_children_of(const Node &n) { + // We play fast and loose with const to avoid having to duplicate our FIELDS macros. + const_cast(n).accept(*this, reverse_); + } + + // Visit the (direct) child nodes of a given node. + void accept_children_of(const node_t *n) { + const_cast(n)->base_accept(*this, reverse_); + } + + // Invoke visit() on our visitor for a given node, resolving that node's type. + void accept(const node_t *n) { + assert(n && "Node should not be null"); + switch (n->type) { +#define ELEM(T) \ + case type_t::T: \ + v_.visit(*(n->as())); \ + break; +#include "ast_node_types.inc" + } + } + + // Here is our field visit implementations which adapt to the node visiting. + + // Direct embeddings. + template + void visit_node_field(const Node &node) { + v_.visit(node); + } + + // Pointer embeddings. + template + void visit_pointer_field(const Node *ptr) { + v_.visit(*ptr); + } + + // List embeddings. + template + void visit_list_field(const List &list) { + v_.visit(list); + } + + // Optional pointers get visited if not null. + template + void visit_optional_field(optional_t &node) { + if (node.contents) v_.visit(*node.contents); + } + + // Define our custom implementations of non-node fields. + // Union pointers just dispatch to the generic one. + template + void visit_union_field(union_ptr_t &ptr) { + assert(ptr && "Should not have null ptr"); + this->accept(ptr.contents.get()); + } + + void will_visit_fields_of(node_t &) {} + void did_visit_fields_of(node_t &) {} + + node_visitation_t(node_visitation_t &&) = default; + + // We cannot be copied. + node_visitation_t(const node_visitation_t &) = delete; + void operator=(const node_visitation_t &) = delete; + void operator=(node_visitation_t &&) = delete; + + private: + // Our adapted visitor. + NodeVisitor &v_; + + // Whether to iterate in reverse order. + const bool reverse_; +}; + +// Type-deducing helper. +template +node_visitation_t node_visitor(NodeVisitor &nv, bool reverse = false) { + return node_visitation_t(nv, reverse); +} + +// A way to visit nodes iteratively. +// This is pre-order. Each node is visited before its children. +// Example: +// traversal_t tv(start); +// while (const node_t *node = tv.next()) {...} +class traversal_t { + public: + // Construct starting with a node + traversal_t(const node_t *n) { + assert(n && "Should not have null node"); + push(n); + } + + // \return the next node, or nullptr if exhausted. + const node_t *next() { + if (stack_.empty()) return nullptr; + const node_t *node = stack_.back(); + stack_.pop_back(); + + // We want to visit in reverse order so the first child ends up on top of the stack. + node_visitor(*this, true /* reverse */).accept_children_of(node); + return node; + } + + private: + // Callback for node_visitation_t. + void visit(const node_t &node) { push(&node); } + + // Construct an empty visitor, used for iterator support. + traversal_t() = default; + + // \return whether we are finished visiting. + bool finished() const { return stack_.empty(); } + + // Append a node. + void push(const node_t *n) { + assert(n && "Should not push null node"); + stack_.push_back(n); + } + + // Stack of nodes. + std::vector stack_{}; + + friend class ast_t; + friend class node_visitation_t; +}; + +/// The ast type itself. +class ast_t { + public: + using source_range_list_t = std::vector; + + /// Construct an ast by parsing \p src as a job list. + /// The ast attempts to produce \p type as the result. + /// \p type may only be job_list or freestanding_argument_list. + static ast_t parse(const wcstring &src, parse_tree_flags_t flags = parse_flag_none, + parse_error_list_t *out_errors = nullptr); + + /// Like parse(), but constructs a freestanding_argument_list. + static ast_t parse_argument_list(const wcstring &src, + parse_tree_flags_t flags = parse_flag_none, + parse_error_list_t *out_errors = nullptr); + + /// \return a traversal, allowing iteration over the nodes. + traversal_t walk() const { return traversal_t{top()}; } + + /// \return the top node. This has the type requested in the 'parse' method. + const node_t *top() const { return top_.get(); } + + /// \return whether any errors were encountered during parsing. + bool errored() const { return any_error_; } + + /// \return a textual representation of the tree. + /// Pass the original source as \p orig. + wcstring dump(const wcstring &orig) const; + + /// Extra source ranges. + /// These are only generated if the corresponding flags are set. + struct extras_t { + /// Set of comments, sorted by offset. + source_range_list_t comments; + + /// Set of semicolons, sorted by offset. + source_range_list_t semis; + + /// Set of error ranges, sorted by offset. + source_range_list_t errors; + }; + + /// Access the set of extraneous source ranges. + const extras_t &extras() const { return extras_; } + + /// Iterator support. + class iterator { + public: + using iterator_category = std::input_iterator_tag; + using difference_type = void; + using value_type = node_t; + using pointer = const node_t *; + using reference = const node_t &; + + bool operator==(const iterator &rhs) { return current_ == rhs.current_; } + bool operator!=(const iterator &rhs) { return !(*this == rhs); } + + iterator &operator++() { + current_ = v_.next(); + return *this; + } + + const node_t &operator*() const { return *current_; } + + private: + explicit iterator(const node_t *start) : v_(start), current_(v_.next()) {} + iterator() = default; + + traversal_t v_{}; + const node_t *current_{}; + friend ast_t; + }; + + iterator begin() const { return iterator{top()}; } + iterator end() const { return iterator{}; } + + ast_t(ast_t &&) = default; + ast_t &operator=(ast_t &&) = default; + ast_t(const ast_t &) = delete; + void operator=(const ast_t &) = delete; + + private: + ast_t() = default; + + // Shared parsing code that takes the top type. + static ast_t parse_from_top(const wcstring &src, parse_tree_flags_t parse_flags, + parse_error_list_t *out_errors, type_t top); + + // The top node. + // Its type depends on what was requested to parse. + std::unique_ptr top_{}; + + /// Whether any errors were encountered during parsing. + bool any_error_{false}; + + /// Extra fields. + extras_t extras_{}; + + class populator_t; + friend populator_t; +}; + +} // namespace ast +#endif // FISH_AST_H diff --git a/src/ast_node_types.inc b/src/ast_node_types.inc new file mode 100644 index 000000000..b0ac3ea98 --- /dev/null +++ b/src/ast_node_types.inc @@ -0,0 +1,60 @@ +// Define ELEM and optionally ELEMLIST before including this file. +// ELEM is for ordinary nodes. +// ELEMLIST(x, y) marks list nodes and the type they contain. +#ifndef ELEMLIST +#define ELEMLIST(x, y) ELEM(x) +#endif + +ELEM(keyword_base) +ELEM(token_base) +ELEM(maybe_newlines) + +ELEM(argument) +ELEMLIST(argument_list, argument) + +ELEM(redirection) +ELEM(argument_or_redirection) +ELEMLIST(argument_or_redirection_list, argument_or_redirection) + +ELEM(variable_assignment) +ELEMLIST(variable_assignment_list, variable_assignment) + +ELEM(job) +ELEM(job_conjunction) +// For historical reasons, a job list is a list of job *conjunctions*. This should be fixed. +ELEMLIST(job_list, job_conjunction) +ELEM(job_conjunction_continuation) +ELEMLIST(job_conjunction_continuation_list, job_conjunction_continuation) + +ELEM(job_continuation) +ELEMLIST(job_continuation_list, job_continuation) + +ELEM(andor_job) +ELEMLIST(andor_job_list, andor_job) + +ELEM(statement) + +ELEM(not_statement) + +ELEM(block_statement) +ELEM(for_header) +ELEM(while_header) +ELEM(function_header) +ELEM(begin_header) + +ELEM(if_statement) +ELEM(if_clause) +ELEM(elseif_clause) +ELEMLIST(elseif_clause_list, elseif_clause) +ELEM(else_clause) + +ELEM(switch_statement) +ELEM(case_item) +ELEMLIST(case_item_list, case_item) + +ELEM(decorated_statement) + +ELEM(freestanding_argument_list) + +#undef ELEM +#undef ELEMLIST diff --git a/src/fish_indent.cpp b/src/fish_indent.cpp index 3b6ba0e6a..17197f84c 100644 --- a/src/fish_indent.cpp +++ b/src/fish_indent.cpp @@ -33,6 +33,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA #include #include +#include "ast.h" #include "color.h" #include "common.h" #include "env.h" @@ -404,6 +405,12 @@ static wcstring prettify(const wcstring &src, bool do_indent) { if (dump_parse_tree) { const wcstring dump = parse_dump_tree(parse_tree, src); std::fwprintf(stderr, L"%ls\n", dump.c_str()); + + auto ast = + ast::ast_t::parse(src, parse_flag_leave_unterminated | parse_flag_include_comments | + parse_flag_show_extra_semis); + wcstring ast_dump = ast.dump(src); + std::fwprintf(stderr, L"%ls\n", ast_dump.c_str()); } // We may have a forest of disconnected trees on a parse failure. We have to handle all nodes diff --git a/src/flog.h b/src/flog.h index 7512ea307..dcf5fd3cf 100644 --- a/src/flog.h +++ b/src/flog.h @@ -67,6 +67,7 @@ class category_list_t { category_t parse_productions{L"parse-productions", L"Resolving tokens"}; category_t parse_productions_chatty{L"parse-productions-chatty", L"Resolving tokens (chatty messages)"}; + category_t ast_construction{L"ast-construction", L"Parsing fish AST"}; category_t proc_job_run{L"proc-job-run", L"Jobs getting started or continued"}; diff --git a/src/parse_constants.h b/src/parse_constants.h index 6e654a7f6..e41992c50 100644 --- a/src/parse_constants.h +++ b/src/parse_constants.h @@ -13,6 +13,17 @@ exit_without_destructors(-1); \ } while (0) +// A range of source code. +struct source_range_t { + uint32_t start; + uint32_t length; + + uint32_t end() const { + assert(start + length >= start && "Overflow"); + return start + length; + } +}; + // IMPORTANT: If the following enum table is modified you must also update token_enum_map below. enum parse_token_type_t : uint8_t { token_type_invalid = 1, @@ -193,6 +204,26 @@ enum parse_error_code_t { parse_error_andor_in_pipeline, // "and" or "or" after a pipe }; +enum { + parse_flag_none = 0, + + /// Attempt to build a "parse tree" no matter what. This may result in a 'forest' of + /// disconnected trees. This is intended to be used by syntax highlighting. + parse_flag_continue_after_error = 1 << 0, + /// Include comment tokens. + parse_flag_include_comments = 1 << 1, + /// Indicate that the tokenizer should accept incomplete tokens */ + parse_flag_accept_incomplete_tokens = 1 << 2, + /// Indicate that the parser should not generate the terminate token, allowing an 'unfinished' + /// tree where some nodes may have no productions. + parse_flag_leave_unterminated = 1 << 3, + /// Indicate that the parser should generate job_list entries for blank lines. + parse_flag_show_blank_lines = 1 << 4, + /// Indicate that extra semis should be generated. + parse_flag_show_extra_semis = 1 << 5, +}; +typedef unsigned int parse_tree_flags_t; + enum { PARSER_TEST_ERROR = 1, PARSER_TEST_INCOMPLETE = 2 }; typedef unsigned int parser_test_error_bits_t; @@ -214,6 +245,9 @@ struct parse_error_t { }; typedef std::vector parse_error_list_t; +wcstring token_type_user_presentable_description(parse_token_type_t type, + parse_keyword_t keyword = parse_keyword_t::none); + // Special source_start value that means unknown. #define SOURCE_LOCATION_UNKNOWN (static_cast(-1)) diff --git a/src/parse_grammar.h b/src/parse_grammar.h index d5e9e6710..c89fd8816 100644 --- a/src/parse_grammar.h +++ b/src/parse_grammar.h @@ -255,7 +255,7 @@ DEF_ALT(variable_assignments) { // A string token like VAR=value DEF(variable_assignment) produces_single{BODY(variable_assignment)}; -// A statement is a normal command, or an if / while / and etc +// A statement is a normal command, or an if / while / etc DEF_ALT(statement) { using nots = single; using block = single; diff --git a/src/parse_productions.h b/src/parse_productions.h index 3a6e4b257..02a9592f1 100644 --- a/src/parse_productions.h +++ b/src/parse_productions.h @@ -4,6 +4,7 @@ #include +#include "ast.h" #include "parse_constants.h" struct parse_token_t; diff --git a/src/parse_tree.cpp b/src/parse_tree.cpp index ee0281bbf..b3eadcd8d 100644 --- a/src/parse_tree.cpp +++ b/src/parse_tree.cpp @@ -30,7 +30,7 @@ static bool production_is_empty(const production_element_t *production) { return *production == token_type_invalid; } -static parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err) { +parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err) { switch (err) { case tokenizer_error_t::none: return parse_error_none; @@ -168,8 +168,7 @@ const wchar_t *keyword_description(parse_keyword_t type) { return L"unknown_keyword"; } -static wcstring token_type_user_presentable_description( - parse_token_type_t type, parse_keyword_t keyword = parse_keyword_t::none) { +wcstring token_type_user_presentable_description(parse_token_type_t type, parse_keyword_t keyword) { if (keyword != parse_keyword_t::none) { return format_string(L"keyword '%ls'", keyword_description(keyword)); } @@ -1078,8 +1077,7 @@ static inline bool is_help_argument(const wcstring &txt) { } /// Return a new parse token, advancing the tokenizer. -static inline parse_token_t next_parse_token(tokenizer_t *tok, maybe_t *out_token, - wcstring *storage) { +parse_token_t next_parse_token(tokenizer_t *tok, maybe_t *out_token, wcstring *storage) { *out_token = tok->next(); if (!out_token->has_value()) { return kTerminalToken; @@ -1098,7 +1096,8 @@ static inline parse_token_t next_parse_token(tokenizer_t *tok, maybe_t *o result.is_help_argument = result.has_dash_prefix && is_help_argument(text); result.is_newline = (result.type == parse_token_type_end && text == L"\n"); result.preceding_escaped_nl = token.preceding_escaped_nl; - result.may_be_variable_assignment = bool(variable_assignment_equals_pos(text)); + result.may_be_variable_assignment = variable_assignment_equals_pos(text).has_value(); + result.tok_error = token.error; // These assertions are totally bogus. Basically our tokenizer works in size_t but we work in // uint32_t to save some space. If we have a source file larger than 4 GB, we'll probably just diff --git a/src/parse_tree.h b/src/parse_tree.h index 3e98104fa..34525db6a 100644 --- a/src/parse_tree.h +++ b/src/parse_tree.h @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -25,11 +26,6 @@ typedef uint32_t source_offset_t; constexpr source_offset_t SOURCE_OFFSET_INVALID = static_cast(-1); -struct source_range_t { - uint32_t start; - uint32_t length; -}; - /// A struct representing the token type that we use internally. struct parse_token_t { enum parse_token_type_t type; // The type of the token as represented by the parser @@ -41,38 +37,36 @@ struct parse_token_t { bool is_newline{false}; // Hackish: if TOK_END, whether the source is a newline. bool preceding_escaped_nl{false}; // Whether there was an escaped newline preceding this token. bool may_be_variable_assignment{false}; // Hackish: whether this token is a string like FOO=bar + tokenizer_error_t tok_error{tokenizer_error_t::none}; // If this is a tokenizer error, that error. source_offset_t source_start{SOURCE_OFFSET_INVALID}; source_offset_t source_length{0}; + /// \return the source range. + source_range_t range() const { + return source_range_t{source_start, source_length}; + } + + /// \return whether we are a string with the dash prefix set. + bool is_dash_prefix_string() const { + return type == parse_token_type_string && has_dash_prefix; + } + wcstring describe() const; wcstring user_presentable_description() const; constexpr parse_token_t(parse_token_type_t type) : type(type) {} }; -enum { - parse_flag_none = 0, - - /// Attempt to build a "parse tree" no matter what. This may result in a 'forest' of - /// disconnected trees. This is intended to be used by syntax highlighting. - parse_flag_continue_after_error = 1 << 0, - /// Include comment tokens. - parse_flag_include_comments = 1 << 1, - /// Indicate that the tokenizer should accept incomplete tokens */ - parse_flag_accept_incomplete_tokens = 1 << 2, - /// Indicate that the parser should not generate the terminate token, allowing an 'unfinished' - /// tree where some nodes may have no productions. - parse_flag_leave_unterminated = 1 << 3, - /// Indicate that the parser should generate job_list entries for blank lines. - parse_flag_show_blank_lines = 1 << 4 -}; -typedef unsigned int parse_tree_flags_t; +/// Return a new parse token, advancing the tokenizer. +parse_token_t next_parse_token(tokenizer_t *tok, maybe_t *out_token, wcstring *storage); wcstring parse_dump_tree(const parse_node_tree_t &nodes, const wcstring &src); const wchar_t *token_type_description(parse_token_type_t type); const wchar_t *keyword_description(parse_keyword_t type); +parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err); + // Node flags. enum { /// Flag indicating that the node has associated comment nodes. From 8d9725c301ad6f52887bd11c15adba6c801bb294 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 20 Jun 2020 15:27:15 -0700 Subject: [PATCH 03/13] Adopt the new AST in highlighting This switches syntax highlighting from parsing with parse_tree to the new ast. --- src/fish_tests.cpp | 6 + src/highlight.cpp | 706 +++++++++++++++++++++------------------------ 2 files changed, 342 insertions(+), 370 deletions(-) diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index e0f52fe47..9ad95fd97 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -4862,6 +4862,12 @@ static void test_highlighting() { {L")", highlight_role_t::error}, }); + highlight_tests.push_back({ + {L"echo", highlight_role_t::command}, + {L"stuff", highlight_role_t::param}, + {L"# comment", highlight_role_t::comment}, + }); + auto &vars = parser_t::principal_parser().vars(); // Verify variables and wildcards in commands using /bin/cat. vars.set(L"VARIABLE_IN_COMMAND", ENV_LOCAL, {L"a"}); diff --git a/src/highlight.cpp b/src/highlight.cpp index 3b45f934d..656f0122c 100644 --- a/src/highlight.cpp +++ b/src/highlight.cpp @@ -16,6 +16,7 @@ #include #include +#include "ast.h" #include "builtin.h" #include "color.h" #include "common.h" @@ -31,14 +32,11 @@ #include "parse_util.h" #include "parser.h" #include "path.h" -#include "tnode.h" #include "tokenizer.h" #include "wcstringutil.h" #include "wildcard.h" #include "wutil.h" // IWYU pragma: keep -namespace g = grammar; - #define CURSOR_POSITION_INVALID static_cast(-1) static const wchar_t *get_highlight_var_name(highlight_role_t role) { @@ -338,12 +336,11 @@ static bool is_potential_cd_path(const wcstring &path, const wcstring &working_d // Given a plain statement node in a parse tree, get the command and return it, expanded // appropriately for commands. If we succeed, return true. -static bool plain_statement_get_expanded_command(const wcstring &src, - tnode_t stmt, - const operation_context_t &ctx, - wcstring *out_cmd) { +static bool statement_get_expanded_command(const wcstring &src, + const ast::decorated_statement_t &stmt, + const operation_context_t &ctx, wcstring *out_cmd) { // Get the command. Try expanding it. If we cannot, it's an error. - maybe_t cmd = command_for_plain_statement(stmt, src); + maybe_t cmd = stmt.command.source(src); if (!cmd) return false; expand_result_t err = expand_to_command_and_args(*cmd, ctx, out_cmd, nullptr); return err == expand_result_t::ok; @@ -384,6 +381,9 @@ rgb_color_t highlight_get_color(const highlight_spec_t &highlight, bool is_backg return result; } +static bool command_is_valid(const wcstring &cmd, enum parse_statement_decoration_t decoration, + const wcstring &working_directory, const environment_t &vars); + static bool has_expand_reserved(const wcstring &str) { bool result = false; for (auto wc : str) { @@ -399,27 +399,22 @@ static bool has_expand_reserved(const wcstring &str) { // command (as a string), if any. This is used to validate autosuggestions. static bool autosuggest_parse_command(const wcstring &buff, const operation_context_t &ctx, wcstring *out_expanded_command, wcstring *out_arg) { - // Parse the buffer. - parse_node_tree_t parse_tree; - parse_tree_from_string(buff, - parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, - &parse_tree, nullptr); + auto ast = ast::ast_t::parse( + buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens); // Find the first statement. - tnode_t first_statement{}; - for (const auto &node : parse_tree) { - if (node.type == symbol_plain_statement) { - first_statement = tnode_t(&parse_tree, &node); - break; - } + const ast::decorated_statement_t *first_statement = nullptr; + if (const ast::job_conjunction_t *jc = ast.top()->as()->at(0)) { + first_statement = jc->job.statement.contents->try_as(); } if (first_statement && - plain_statement_get_expanded_command(buff, first_statement, ctx, out_expanded_command)) { - // Find the first argument. - auto args_and_redirs = first_statement.child<1>(); - if (auto arg = args_and_redirs.next_in_list()) { - *out_arg = arg.get_source(buff); + statement_get_expanded_command(buff, *first_statement, ctx, out_expanded_command)) { + // Check if the first argument or redirection is, in fact, an argument. + if (const auto *arg_or_redir = first_statement->args_or_redirs.at(0)) { + if (arg_or_redir && arg_or_redir->is_argument()) { + *out_arg = arg_or_redir->argument().source(buff); + } } return true; } @@ -775,31 +770,56 @@ class highlighter_t { const bool io_ok; // Working directory. const wcstring working_directory; + // The ast we produced. + ast::ast_t ast; // The resulting colors. using color_array_t = std::vector; color_array_t color_array; - // The parse tree of the buff. - parse_node_tree_t parse_tree; + + // Flags we use for AST parsing. + static constexpr parse_tree_flags_t ast_flags = + parse_flag_continue_after_error | parse_flag_include_comments | + parse_flag_accept_incomplete_tokens | parse_flag_leave_unterminated | + parse_flag_show_extra_semis; + // Color a command. - void color_command(tnode_t node); - // Color an argument. - void color_argument(tnode_t node); + void color_command(const ast::string_t &node); + // Color a node as if it were an argument. + void color_as_argument(const ast::node_t &node); // Color a redirection. - void color_redirection(tnode_t node); - // Color a list of arguments. If cmd_is_cd is true, then the arguments are for 'cd'; detect - // invalid directories. - void color_arguments(const std::vector> &args, bool cmd_is_cd = false); - // Color the redirections of the given node. - void color_redirections(tnode_t list); + void color_redirection(const ast::redirection_t &node); // Color all the children of the command with the given type. - void color_children(const parse_node_t &parent, parse_token_type_t type, - highlight_spec_t color); + void color_children(const ast::node_t &parent, ast::type_t type, highlight_spec_t color); // Colors the source range of a node with a given color. - void color_node(const parse_node_t &node, highlight_spec_t color); + void color_node(const ast::node_t &node, highlight_spec_t color); + // Colors a range with a given color. + void color_range(source_range_t range, highlight_spec_t color); // return whether a plain statement is 'cd'. - bool is_cd(tnode_t stmt) const; + bool is_cd(const ast::decorated_statement_t &stmt) const; + + /// \return a substring of our buffer. + wcstring get_source(source_range_t r) const; public: + // Visit the children of a node. + void visit_children(const ast::node_t &node) { + ast::node_visitor(*this).accept_children_of(&node); + } + + // AST visitor implementations. + void visit(const ast::keyword_base_t &kw); + void visit(const ast::token_base_t &tok); + void visit(const ast::redirection_t &redir); + void visit(const ast::variable_assignment_t &varas); + void visit(const ast::semi_nl_t &semi_nl); + void visit(const ast::decorated_statement_t &stmt); + + // Visit an argument, perhaps knowing that our command is cd. + void visit(const ast::argument_t &arg, bool cmd_is_cd = false); + + // Default implementation is to just visit children. + void visit(const ast::node_t &node) { visit_children(node); } + // Constructor highlighter_t(const wcstring &str, size_t pos, const operation_context_t &ctx, wcstring wd, bool can_do_io) @@ -808,52 +828,44 @@ class highlighter_t { ctx(ctx), io_ok(can_do_io), working_directory(std::move(wd)), - color_array(str.size()) { - // Parse the tree. - parse_tree_from_string(buff, - parse_flag_continue_after_error | parse_flag_include_comments | - parse_flag_accept_incomplete_tokens, - &this->parse_tree, nullptr); - } + ast(ast::ast_t::parse(buff, ast_flags)) {} // Perform highlighting, returning an array of colors. color_array_t highlight(); }; -void highlighter_t::color_node(const parse_node_t &node, highlight_spec_t color) { - // Can only color nodes with valid source ranges. - if (!node.has_source() || node.source_length == 0) return; - - // Fill the color array with our color in the corresponding range. - size_t source_end = node.source_start + node.source_length; - assert(source_end >= node.source_start); - assert(source_end <= color_array.size()); - - std::fill(this->color_array.begin() + node.source_start, this->color_array.begin() + source_end, - color); +wcstring highlighter_t::get_source(source_range_t r) const { + assert(r.start + r.length >= r.start && "Overflow"); + assert(r.start + r.length <= this->buff.size() && "Out of range"); + return this->buff.substr(r.start, r.length); } -void highlighter_t::color_command(tnode_t node) { - auto source_range = node.source_range(); - if (!source_range) return; +void highlighter_t::color_node(const ast::node_t &node, highlight_spec_t color) { + color_range(node.source_range(), color); +} - const wcstring cmd_str = node.get_source(this->buff); +void highlighter_t::color_range(source_range_t range, highlight_spec_t color) { + assert(range.start + range.length <= this->color_array.size() && "Range out of bounds"); + std::fill_n(this->color_array.begin() + range.start, range.length, color); +} + +void highlighter_t::color_command(const ast::string_t &node) { + source_range_t source_range = node.source_range(); + const wcstring cmd_str = get_source(source_range); // Get an iterator to the colors associated with the argument. - const size_t arg_start = source_range->start; + const size_t arg_start = source_range.start; const color_array_t::iterator colors = color_array.begin() + arg_start; color_string_internal(cmd_str, highlight_role_t::command, colors); } // node does not necessarily have type symbol_argument here. -void highlighter_t::color_argument(tnode_t node) { +void highlighter_t::color_as_argument(const ast::node_t &node) { auto source_range = node.source_range(); - if (!source_range) return; - - const wcstring arg_str = node.get_source(this->buff); + const wcstring arg_str = get_source(source_range); // Get an iterator to the colors associated with the argument. - const size_t arg_start = source_range->start; + const size_t arg_start = source_range.start; const color_array_t::iterator arg_colors = color_array.begin() + arg_start; // Color this argument without concern for command substitutions. @@ -905,15 +917,13 @@ void highlighter_t::color_argument(tnode_t node) { /// Indicates whether the source range of the given node forms a valid path in the given /// working_directory. -static bool node_is_potential_path(const wcstring &src, const parse_node_t &node, - const operation_context_t &ctx, - const wcstring &working_directory) { - if (!node.has_source()) return false; - +static bool range_is_potential_path(const wcstring &src, const source_range_t &range, + const operation_context_t &ctx, + const wcstring &working_directory) { // Get the node source, unescape it, and then pass it to is_potential_path along with the // working directory (as a one element list). bool result = false; - wcstring token(src, node.source_start, node.source_length); + wcstring token = src.substr(range.start, range.length); if (unescape_string_in_place(&token, UNESCAPE_SPECIAL)) { // Big hack: is_potential_path expects a tilde, but unescape_string gives us HOME_DIRECTORY. // Put it back. @@ -925,172 +935,257 @@ static bool node_is_potential_path(const wcstring &src, const parse_node_t &node return result; } -bool highlighter_t::is_cd(tnode_t stmt) const { - bool cmd_is_cd = false; - if (this->io_ok && stmt.has_source()) { - wcstring cmd_str; - if (plain_statement_get_expanded_command(this->buff, stmt, ctx, &cmd_str)) { - cmd_is_cd = (cmd_str == L"cd"); - } +bool highlighter_t::is_cd(const ast::decorated_statement_t &stmt) const { + wcstring cmd_str; + if (this->io_ok && statement_get_expanded_command(this->buff, stmt, ctx, &cmd_str)) { + return cmd_str == L"cd"; } - return cmd_is_cd; + return false; } -// Color all of the arguments of the given node list, which should be argument_list or -// argument_or_redirection_list. -void highlighter_t::color_arguments(const std::vector> &args, bool cmd_is_cd) { - // Find all the arguments of this list. - for (tnode_t arg : args) { - this->color_argument(arg.child<0>()); +void highlighter_t::visit(const ast::keyword_base_t &kw) { + highlight_role_t role = highlight_role_t::normal; + switch (kw.kw) { + case parse_keyword_t::kw_begin: + case parse_keyword_t::kw_builtin: + case parse_keyword_t::kw_case: + case parse_keyword_t::kw_command: + case parse_keyword_t::kw_else: + case parse_keyword_t::kw_end: + case parse_keyword_t::kw_exec: + case parse_keyword_t::kw_for: + case parse_keyword_t::kw_function: + case parse_keyword_t::kw_if: + case parse_keyword_t::kw_in: + case parse_keyword_t::kw_switch: + case parse_keyword_t::kw_while: + role = highlight_role_t::command; + break; - if (cmd_is_cd) { - // Mark this as an error if it's not 'help' and not a valid cd path. - wcstring param = arg.get_source(this->buff); - if (expand_one(param, expand_flag::skip_cmdsubst, ctx)) { - bool is_help = string_prefixes_string(param, L"--help") || - string_prefixes_string(param, L"-h"); - if (!is_help && this->io_ok && - !is_potential_cd_path(param, working_directory, ctx, PATH_EXPAND_TILDE)) { - this->color_node(arg, highlight_role_t::error); - } + case parse_keyword_t::kw_and: + case parse_keyword_t::kw_or: + case parse_keyword_t::kw_not: + case parse_keyword_t::kw_exclam: + case parse_keyword_t::kw_time: + role = highlight_role_t::operat; + break; + + case parse_keyword_t::none: + break; + } + color_node(kw, role); +} + +void highlighter_t::visit(const ast::token_base_t &tok) { + maybe_t role = highlight_role_t::normal; + switch (tok.type) { + case parse_token_type_end: + case parse_token_type_pipe: + case parse_token_type_background: + role = highlight_role_t::statement_terminator; + break; + + case parse_token_type_andand: + case parse_token_type_oror: + role = highlight_role_t::operat; + break; + + case parse_token_type_string: + // Assume all strings are params. This handles e.g. the variables a for header or + // function header. Other strings (like arguments to commands) need more complex + // handling, which occurs in their respective overrides of visit(). + role = highlight_role_t::param; + + default: + break; + } + if (role) color_node(tok, *role); +} + +void highlighter_t::visit(const ast::semi_nl_t &semi_nl) { + color_node(semi_nl, highlight_role_t::statement_terminator); +} + +void highlighter_t::visit(const ast::argument_t &arg, bool cmd_is_cd) { + color_as_argument(arg); + if (cmd_is_cd && io_ok) { + // Mark this as an error if it's not 'help' and not a valid cd path. + wcstring param = arg.source(this->buff); + if (expand_one(param, expand_flag::skip_cmdsubst, ctx)) { + bool is_help = + string_prefixes_string(param, L"--help") || string_prefixes_string(param, L"-h"); + if (!is_help && this->io_ok && + !is_potential_cd_path(param, working_directory, ctx, PATH_EXPAND_TILDE)) { + this->color_node(arg, highlight_role_t::error); } } } } -void highlighter_t::color_redirection(tnode_t redirection_node) { - if (!redirection_node.has_source()) return; +void highlighter_t::visit(const ast::variable_assignment_t &varas) { + color_as_argument(varas); + // TODO: Color the '=' in the variable assignment as an operator, for fun. + // if (auto where = variable_assignment_equals_pos(varas.source(this->buff))) { + // this->color_array.at(*where) = highlight_role_t::operat; + // } +} - tnode_t redir_prim = redirection_node.child<0>(); // like 2> - tnode_t redir_target = redirection_node.child<1>(); // like &1 or file path +void highlighter_t::visit(const ast::decorated_statement_t &stmt) { + // Color any decoration. + if (stmt.opt_decoration) this->visit(*stmt.opt_decoration); - if (redir_prim) { - wcstring target; - const maybe_t redirect = - redirection_for_node(redirection_node, this->buff, &target); + // Color the command's source code. + // If we get no source back, there's nothing to color. + maybe_t cmd = stmt.command.try_source(this->buff); + if (!cmd.has_value()) return; - // We may get a missing redirection type if the redirection is invalid. - auto hl = (redirect && redirect->is_valid()) ? highlight_role_t::redirection - : highlight_role_t::error; - this->color_node(redir_prim, hl); + wcstring expanded_cmd; + bool is_valid_cmd = false; + if (!this->io_ok) { + // We cannot check if the command is invalid, so just assume it's valid. + is_valid_cmd = true; + } else if (variable_assignment_equals_pos(*cmd)) { + is_valid_cmd = true; + } else { + // Check to see if the command is valid. + // Try expanding it. If we cannot, it's an error. + bool expanded = statement_get_expanded_command(buff, stmt, ctx, &expanded_cmd); + if (expanded && !has_expand_reserved(expanded_cmd)) { + is_valid_cmd = + command_is_valid(expanded_cmd, stmt.decoration(), working_directory, ctx.vars); + } + } - // Check if the argument contains a command substitution. If so, highlight it as a param - // even though it's a command redirection, and don't try to do any other validation. - if (parse_util_locate_cmdsubst(target.c_str(), nullptr, nullptr, true) != 0) { - this->color_argument(redir_target); + // Color our statement. + if (is_valid_cmd) { + this->color_command(stmt.command); + } else { + this->color_node(stmt.command, highlight_role_t::error); + } + + // Color arguments and redirections. + // Except if our command is 'cd' we have special logic for how arguments are colored. + bool is_cd = (expanded_cmd == L"cd"); + for (const ast::argument_or_redirection_t &v : stmt.args_or_redirs) { + if (v.is_argument()) { + this->visit(v.argument(), is_cd); } else { - // No command substitution, so we can highlight the target file or fd. For example, - // disallow redirections into a non-existent directory. - bool target_is_valid = true; + this->visit(v.redirection()); + } + } +} - if (!redirect || !redirect->is_valid()) { - // not a valid redirection - target_is_valid = false; - } else if (!this->io_ok) { - // I/O is disallowed, so we don't have much hope of catching anything but gross - // errors. Assume it's valid. - target_is_valid = true; - } else if (!expand_one(target, expand_flag::skip_cmdsubst, ctx)) { - // Could not be expanded. - target_is_valid = false; - } else { - // Ok, we successfully expanded our target. Now verify that it works with this - // redirection. We will probably need it as a path (but not in the case of fd - // redirections). Note that the target is now unescaped. - const wcstring target_path = - path_apply_working_directory(target, this->working_directory); - switch (redirect->mode) { - case redirection_mode_t::fd: { - if (target == L"-") { - target_is_valid = true; - } else { - int fd = fish_wcstoi(target.c_str()); - target_is_valid = !errno && fd >= 0; - } - break; +void highlighter_t::visit(const ast::redirection_t &redir) { + maybe_t oper = + pipe_or_redir_t::from_string(redir.oper.source(this->buff)); // like 2> + wcstring target = redir.target.source(this->buff); // like &1 or file path + + assert(oper.has_value() && + "Should have successfully parsed a pipe_or_redir_t since it was in our ast"); + + // Color the > part. + // It may have parsed successfully yet still be invalid (e.g. 9999999999999>&1) + // If so, color the whole thing invalid and stop. + if (!oper->is_valid()) { + this->color_node(redir, highlight_role_t::error); + return; + } + + // Color the operator part like 2>. + this->color_node(redir.oper, highlight_role_t::redirection); + + // Color the target part. + // Check if the argument contains a command substitution. If so, highlight it as a param + // even though it's a command redirection, and don't try to do any other validation. + if (parse_util_locate_cmdsubst(target.c_str(), nullptr, nullptr, true) != 0) { + this->color_as_argument(redir.target); + } else { + // No command substitution, so we can highlight the target file or fd. For example, + // disallow redirections into a non-existent directory. + bool target_is_valid = true; + if (!this->io_ok) { + // I/O is disallowed, so we don't have much hope of catching anything but gross + // errors. Assume it's valid. + target_is_valid = true; + } else if (!expand_one(target, expand_flag::skip_cmdsubst, ctx)) { + // Could not be expanded. + target_is_valid = false; + } else { + // Ok, we successfully expanded our target. Now verify that it works with this + // redirection. We will probably need it as a path (but not in the case of fd + // redirections). Note that the target is now unescaped. + const wcstring target_path = + path_apply_working_directory(target, this->working_directory); + switch (oper->mode) { + case redirection_mode_t::fd: { + if (target == L"-") { + target_is_valid = true; + } else { + int fd = fish_wcstoi(target.c_str()); + target_is_valid = !errno && fd >= 0; } - case redirection_mode_t::input: { - // Input redirections must have a readable non-directory. - struct stat buf = {}; - target_is_valid = !waccess(target_path, R_OK) && - !wstat(target_path, &buf) && !S_ISDIR(buf.st_mode); - break; + break; + } + case redirection_mode_t::input: { + // Input redirections must have a readable non-directory. + struct stat buf = {}; + target_is_valid = !waccess(target_path, R_OK) && !wstat(target_path, &buf) && + !S_ISDIR(buf.st_mode); + break; + } + case redirection_mode_t::overwrite: + case redirection_mode_t::append: + case redirection_mode_t::noclob: { + // Test whether the file exists, and whether it's writable (possibly after + // creating it). access() returns failure if the file does not exist. + bool file_exists = false, file_is_writable = false; + int err = 0; + + struct stat buf = {}; + if (wstat(target_path, &buf) < 0) { + err = errno; } - case redirection_mode_t::overwrite: - case redirection_mode_t::append: - case redirection_mode_t::noclob: { - // Test whether the file exists, and whether it's writable (possibly after - // creating it). access() returns failure if the file does not exist. - bool file_exists = false, file_is_writable = false; - int err = 0; - struct stat buf = {}; - if (wstat(target_path, &buf) < 0) { - err = errno; - } + if (string_suffixes_string(L"/", target)) { + // Redirections to things that are directories is definitely not + // allowed. + file_exists = false; + file_is_writable = false; + } else if (err == 0) { + // No err. We can write to it if it's not a directory and we have + // permission. + file_exists = true; + file_is_writable = !S_ISDIR(buf.st_mode) && !waccess(target_path, W_OK); + } else if (err == ENOENT) { + // File does not exist. Check if its parent directory is writable. + wcstring parent = wdirname(target_path); - if (string_suffixes_string(L"/", target)) { - // Redirections to things that are directories is definitely not - // allowed. - file_exists = false; - file_is_writable = false; - } else if (err == 0) { - // No err. We can write to it if it's not a directory and we have - // permission. - file_exists = true; - file_is_writable = !S_ISDIR(buf.st_mode) && !waccess(target_path, W_OK); - } else if (err == ENOENT) { - // File does not exist. Check if its parent directory is writable. - wcstring parent = wdirname(target_path); + // Ensure that the parent ends with the path separator. This will ensure + // that we get an error if the parent directory is not really a + // directory. + if (!string_suffixes_string(L"/", parent)) parent.push_back(L'/'); - // Ensure that the parent ends with the path separator. This will ensure - // that we get an error if the parent directory is not really a - // directory. - if (!string_suffixes_string(L"/", parent)) parent.push_back(L'/'); - - // Now the file is considered writable if the parent directory is - // writable. - file_exists = false; - file_is_writable = (0 == waccess(parent, W_OK)); - } else { - // Other errors we treat as not writable. This includes things like - // ENOTDIR. - file_exists = false; - file_is_writable = false; - } - - // NOCLOB means that we must not overwrite files that exist. - target_is_valid = - file_is_writable && - !(file_exists && redirect->mode == redirection_mode_t::noclob); - break; + // Now the file is considered writable if the parent directory is + // writable. + file_exists = false; + file_is_writable = (0 == waccess(parent, W_OK)); + } else { + // Other errors we treat as not writable. This includes things like + // ENOTDIR. + file_exists = false; + file_is_writable = false; } + + // NOCLOB means that we must not overwrite files that exist. + target_is_valid = + file_is_writable && + !(file_exists && oper->mode == redirection_mode_t::noclob); + break; } } - - if (redir_target) { - auto hl = target_is_valid ? highlight_role_t::redirection : highlight_role_t::error; - this->color_node(redir_target, hl); - } - } - } -} - -/// Color all of the redirections of the given command. -void highlighter_t::color_redirections(tnode_t list) { - for (const auto &node : list.descendants()) { - this->color_redirection(node); - } -} - -/// Color all the children of the command with the given type. -void highlighter_t::color_children(const parse_node_t &parent, parse_token_type_t type, - highlight_spec_t color) { - for (node_offset_t idx = 0; idx < parent.child_count; idx++) { - const parse_node_t *child = this->parse_tree.get_child(parent, idx); - if (child != nullptr && child->type == type) { - this->color_node(*child, color); } + this->color_node(redir.target, + target_is_valid ? highlight_role_t::redirection : highlight_role_t::error); } } @@ -1145,171 +1240,42 @@ highlighter_t::color_array_t highlighter_t::highlight() { ASSERT_IS_BACKGROUND_THREAD(); } - const size_t length = buff.size(); - assert(this->buff.size() == this->color_array.size()); - if (length == 0) return color_array; - - // Start out at zero. + this->color_array.resize(this->buff.size()); std::fill(this->color_array.begin(), this->color_array.end(), highlight_spec_t{}); - // Walk the node tree. - for (const parse_node_t &node : parse_tree) { - if (ctx.check_cancel()) return std::move(color_array); - switch (node.type) { - // Color direct string descendants, e.g. 'for' and 'in'. - case symbol_while_header: - case symbol_begin_header: - case symbol_function_header: - case symbol_if_clause: - case symbol_else_clause: - case symbol_case_item: - case symbol_decorated_statement: - case symbol_if_statement: { - this->color_children(node, parse_token_type_string, highlight_role_t::command); - break; - } - case symbol_switch_statement: { - tnode_t switchn(&parse_tree, &node); - auto literal_switch = switchn.child<0>(); - auto switch_arg = switchn.child<1>(); - this->color_node(literal_switch, highlight_role_t::command); - this->color_node(switch_arg, highlight_role_t::param); - break; - } - case symbol_for_header: { - tnode_t fhead(&parse_tree, &node); - // Color the 'for' and 'in' as commands. - auto literal_for = fhead.child<0>(); - auto literal_in = fhead.child<2>(); - this->color_node(literal_for, highlight_role_t::command); - this->color_node(literal_in, highlight_role_t::command); + this->visit_children(*ast.top()); + if (ctx.check_cancel()) return std::move(color_array); - // Color the variable name as a parameter. - this->color_argument(fhead.child<1>()); - break; - } - - case parse_token_type_andand: - case parse_token_type_oror: - this->color_node(node, highlight_role_t::operat); - break; - - case symbol_not_statement: - this->color_children(node, parse_token_type_string, highlight_role_t::operat); - break; - - case symbol_job_decorator: - this->color_node(node, highlight_role_t::operat); - break; - - case symbol_variable_assignment: { - tnode_t variable_assignment = {&parse_tree, &node}; - this->color_argument(variable_assignment.child<0>()); - break; - } - - case parse_token_type_pipe: - case parse_token_type_background: - case parse_token_type_end: - case symbol_optional_background: { - this->color_node(node, highlight_role_t::statement_terminator); - break; - } - case symbol_optional_time: { - this->color_node(node, highlight_role_t::operat); - break; - } - case symbol_plain_statement: { - tnode_t stmt(&parse_tree, &node); - // Get the decoration from the parent. - enum parse_statement_decoration_t decoration = get_decoration(stmt); - - // Color the command. - tnode_t cmd_node = stmt.child<0>(); - maybe_t cmd = cmd_node.get_source(buff); - if (!cmd) { - break; // not much as we can do without a node that has source text - } - - bool is_valid_cmd = false; - if (!this->io_ok) { - // We cannot check if the command is invalid, so just assume it's valid. - is_valid_cmd = true; - } else if (variable_assignment_equals_pos(*cmd)) { - is_valid_cmd = true; - } else { - wcstring expanded_cmd; - // Check to see if the command is valid. - // Try expanding it. If we cannot, it's an error. - bool expanded = - plain_statement_get_expanded_command(buff, stmt, ctx, &expanded_cmd); - if (expanded && !has_expand_reserved(expanded_cmd)) { - is_valid_cmd = - command_is_valid(expanded_cmd, decoration, working_directory, ctx.vars); - } - } - if (!is_valid_cmd) { - this->color_node(*cmd_node, highlight_role_t::error); - } else { - this->color_command(cmd_node); - } - break; - } - // Only work on root lists, so that we don't re-color child lists. - case symbol_arguments_or_redirections_list: { - tnode_t list(&parse_tree, &node); - if (argument_list_is_root(list)) { - bool cmd_is_cd = is_cd(list.try_get_parent()); - this->color_arguments(list.descendants(), cmd_is_cd); - this->color_redirections(list); - } - break; - } - case symbol_argument_list: { - tnode_t list(&parse_tree, &node); - if (argument_list_is_root(list)) { - this->color_arguments(list.descendants()); - } - break; - } - case symbol_end_command: { - this->color_node(node, highlight_role_t::command); - break; - } - case parse_special_type_parse_error: - case parse_special_type_tokenizer_error: { - this->color_node(node, highlight_role_t::error); - break; - } - case parse_special_type_comment: { - this->color_node(node, highlight_role_t::comment); - break; - } - default: { - break; - } - } + // Color every comment. + const auto &extras = ast.extras(); + for (const source_range_t &r : extras.comments) { + this->color_range(r, highlight_role_t::comment); } - if (!this->io_ok || this->cursor_pos > this->buff.size()) { - return std::move(color_array); + // Color every extra semi. + for (const source_range_t &r : extras.semis) { + this->color_range(r, highlight_role_t::statement_terminator); } - // If the cursor is over an argument, and that argument is a valid path, underline it. - for (const auto &node : parse_tree) { - // Must be an argument with source. - if (node.type != symbol_argument || !node.has_source()) continue; + // Color every error range. + for (const source_range_t &r : extras.errors) { + this->color_range(r, highlight_role_t::error); + } - if (ctx.check_cancel()) return std::move(color_array); - - // Underline every valid path. - if (node_is_potential_path(buff, node, ctx, working_directory)) { - // It is, underline it. - for (size_t i = node.source_start; i < node.source_start + node.source_length; i++) { + // Underline every valid path. + if (io_ok) { + for (const ast::node_t &node : ast) { + const ast::argument_t *arg = node.try_as(); + if (!arg || arg->unsourced) continue; + if (ctx.check_cancel()) break; + if (range_is_potential_path(buff, arg->range, ctx, working_directory)) { // Don't color highlight_role_t::error because it looks dorky. For example, // trying to cd into a non-directory would show an underline and also red. - if (this->color_array.at(i).foreground != highlight_role_t::error) { - this->color_array.at(i).valid_path = true; + for (size_t i = arg->range.start, end = arg->range.start + arg->range.length; + i < end; i++) { + if (this->color_array.at(i).foreground != highlight_role_t::error) { + this->color_array.at(i).valid_path = true; + } } } } From 0d4f9c6220854a74afe6ecbfda2d79c53a293aca Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 20 Jun 2020 17:28:31 -0700 Subject: [PATCH 04/13] Adopt the new AST in abbreviation expansion This switches abbreviation expansion from parsing with parse_tree to the new ast. --- src/reader.cpp | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/src/reader.cpp b/src/reader.cpp index 6ec992276..063e3b379 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -44,6 +44,7 @@ #include #include +#include "ast.h" #include "color.h" #include "common.h" #include "complete.h" @@ -935,33 +936,29 @@ maybe_t reader_expand_abbreviation_in_command(const wcstring &cmdline, s const size_t subcmd_cursor_pos = cursor_pos - subcmd_offset; // Parse this subcmd. - parse_node_tree_t parse_tree; - parse_tree_from_string(subcmd, - parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, - &parse_tree, nullptr); + using namespace ast; + auto ast = + ast_t::parse(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens | + parse_flag_leave_unterminated); // Look for plain statements where the cursor is at the end of the command. - using namespace grammar; - tnode_t matching_cmd_node; - for (const parse_node_t &node : parse_tree) { - // Only interested in plain statements with source. - if (node.type != symbol_plain_statement || !node.has_source()) continue; + const ast::string_t *matching_cmd_node = nullptr; + for (const node_t &n : ast) { + const decorated_statement_t *stmt = n.try_as(); + if (!stmt) continue; - // Get the command node. Skip it if we can't or it has no source. - tnode_t statement(&parse_tree, &node); - tnode_t cmd_node = statement.child<0>(); + // Skip if we have a decoration. + if (stmt->opt_decoration) continue; - // Skip decorated statements. - if (get_decoration(statement) != parse_statement_decoration_none) continue; - - auto msource = cmd_node.source_range(); + // See if the command's source range range contains our cursor, including at the end. + auto msource = stmt->command.try_source_range(); if (!msource) continue; // Now see if its source range contains our cursor, including at the end. if (subcmd_cursor_pos >= msource->start && subcmd_cursor_pos <= msource->start + msource->length) { // Success! - matching_cmd_node = cmd_node; + matching_cmd_node = &stmt->command; break; } } @@ -969,11 +966,12 @@ maybe_t reader_expand_abbreviation_in_command(const wcstring &cmdline, s // Now if we found a command node, expand it. maybe_t result{}; if (matching_cmd_node) { - const wcstring token = matching_cmd_node.get_source(subcmd); + assert(!matching_cmd_node->unsourced && "Should not be unsourced"); + const wcstring token = matching_cmd_node->source(subcmd); if (auto abbreviation = expand_abbreviation(token, vars)) { // There was an abbreviation! Replace the token in the full command. Maintain the // relative position of the cursor. - source_range_t r = *matching_cmd_node.source_range(); + source_range_t r = matching_cmd_node->source_range(); result = edit_t(subcmd_offset + r.start, r.length, std::move(*abbreviation)); } } From 46c4ec8d68527584b861e9627617e2ecafc6126b Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 20 Jun 2020 17:32:31 -0700 Subject: [PATCH 05/13] Adopt the new AST in completion argument lists This switches completion argument list expansion from parsing with parse_tree to the new ast. --- src/parser.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/parser.cpp b/src/parser.cpp index 3fbc65200..c48ec3868 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -11,6 +11,7 @@ #include #include +#include "ast.h" #include "common.h" #include "env.h" #include "event.h" @@ -328,19 +329,18 @@ completion_list_t parser_t::expand_argument_list(const wcstring &arg_list_src, expand_flags_t eflags, const operation_context_t &ctx) { // Parse the string as an argument list. - parse_node_tree_t tree; - if (!parse_tree_from_string(arg_list_src, parse_flag_none, &tree, nullptr /* errors */, - symbol_freestanding_argument_list)) { + auto ast = ast::ast_t::parse_argument_list(arg_list_src); + if (ast.errored()) { // Failed to parse. Here we expect to have reported any errors in test_args. return {}; } // Get the root argument list and extract arguments from it. completion_list_t result; - assert(!tree.empty()); - tnode_t arg_list(&tree, &tree.at(0)); - while (auto arg = arg_list.next_in_list()) { - const wcstring arg_src = arg.get_source(arg_list_src); + const ast::freestanding_argument_list_t *list = + ast.top()->as(); + for (const ast::argument_t &arg : list->arguments) { + wcstring arg_src = arg.source(arg_list_src); if (expand_string(arg_src, &result, eflags, ctx) == expand_result_t::error) { break; // failed to expand a string } From 358d7072a2ccad80fcaa4d2d9d10425d8b3ff49f Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 20 Jun 2020 17:43:54 -0700 Subject: [PATCH 06/13] Adopt the new AST in bash history import This switches bash history importing from parsing with parse_tree to the new ast. --- src/history.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/history.cpp b/src/history.cpp index 8a8f070cc..0e2aa1385 100644 --- a/src/history.cpp +++ b/src/history.cpp @@ -29,6 +29,7 @@ #include #include +#include "ast.h" #include "common.h" #include "env.h" #include "fallback.h" // IWYU pragma: keep @@ -1096,8 +1097,7 @@ void history_impl_t::populate_from_config_path() { static bool should_import_bash_history_line(const wcstring &line) { if (line.empty()) return false; - parse_node_tree_t parse_tree; - if (!parse_tree_from_string(line, parse_flag_none, &parse_tree, nullptr)) return false; + if (ast::ast_t::parse(line).errored()) return false; // In doing this test do not allow incomplete strings. Hence the "false" argument. parse_error_list_t errors; From 7bea5ffa2eb0223d79f65a6a7ce4025e1c000463 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 20 Jun 2020 18:22:11 -0700 Subject: [PATCH 07/13] Adopt the new AST in parse_util_compute_indents This switches parse_util_compute_indents from parsing with parse_tree to the new ast. It also reworks the parse_util_compute_indents tests, because parse_util_compute_indents will be the backing for fish_indent. --- src/fish_tests.cpp | 154 +++++++++++++++-------- src/parse_util.cpp | 299 +++++++++++++++++++-------------------------- 2 files changed, 230 insertions(+), 223 deletions(-) diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index 9ad95fd97..1b1292299 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -1268,75 +1268,121 @@ static void test_cancellation() { parser.clear_cancel(); } +namespace indent_tests { +// A struct which is either text or a new indent. +struct segment_t { + // The indent to set + int indent{0}; + const char *text{nullptr}; + + /* implicit */ segment_t(int indent) : indent(indent) {} + /* implicit */ segment_t(const char *text) : text(text) {} +}; + +using test_t = std::vector; +using test_list_t = std::vector; + +// Add a new test to a test list based on a series of ints and texts. +template +void add_test(test_list_t *v, const Types &... types) { + segment_t segments[] = {types...}; + v->emplace_back(std::begin(segments), std::end(segments)); +} +} // namespace indent_tests + static void test_indents() { say(L"Testing indents"); + using namespace indent_tests; - // Here are the components of our source and the indents we expect those to be. - struct indent_component_t { - const wchar_t *txt; - int indent; - }; + test_list_t tests; + add_test(&tests, // + 0, "if", 1, " foo", // + 0, "\nend"); - const indent_component_t components1[] = {{L"if foo", 0}, {L"end", 0}, {NULL, -1}}; + add_test(&tests, // + 0, "if", 1, " foo", // + 1, "\nfoo", // + 0, "\nend"); - const indent_component_t components2[] = {{L"if foo", 0}, - {L"", 1}, // trailing newline! - {NULL, -1}}; + add_test(&tests, // + 0, "if", 1, " foo", // + 1, "\nif", 2, " bar", // + 1, "\nend", // + 0, "\nend"); - const indent_component_t components3[] = {{L"if foo", 0}, - {L"foo", 1}, - {L"end", 0}, // trailing newline! - {NULL, -1}}; + add_test(&tests, // + 0, "if", 1, " foo", // + 1, "\nif", 2, " bar", // + 1, "\n", // FIXME: this should be 2 but parse_util_compute_indents has a bug + 1, "\nend\n"); - const indent_component_t components4[] = {{L"if foo", 0}, {L"if bar", 1}, {L"end", 1}, - {L"end", 0}, {L"", 0}, {NULL, -1}}; + add_test(&tests, // + 0, "if", 1, " foo", // + 1, "\nif", 2, " bar", // + 2, "\n"); - const indent_component_t components5[] = {{L"if foo", 0}, {L"if bar", 1}, {L"", 2}, {NULL, -1}}; + add_test(&tests, // + 0, "begin", // + 1, "\nfoo", // + 1, "\n"); - const indent_component_t components6[] = {{L"begin", 0}, {L"foo", 1}, {L"", 1}, {NULL, -1}}; + add_test(&tests, // + 0, "begin", // + 1, "\n;", // + 0, "end", // + 0, "\nfoo", 0, "\n"); - const indent_component_t components7[] = {{L"begin", 0}, {L";", 1}, {L"end", 0}, - {L"foo", 0}, {L"", 0}, {NULL, -1}}; + add_test(&tests, // + 0, "begin", // + 1, "\n;", // + 0, "end", // + 0, "\nfoo", 0, "\n"); - const indent_component_t components8[] = {{L"if foo", 0}, {L"if bar", 1}, {L"baz", 2}, - {L"end", 1}, {L"", 1}, {NULL, -1}}; + add_test(&tests, // + 0, "if", 1, " foo", // + 1, "\nif", 2, " bar", // + 2, "\nbaz", // + 1, "\nend", 1, "\n"); - const indent_component_t components9[] = {{L"switch foo", 0}, {L"", 1}, {NULL, -1}}; + add_test(&tests, // + 0, "switch foo", // + 1, "\n" // + ); - const indent_component_t components10[] = { - {L"switch foo", 0}, {L"case bar", 1}, {L"case baz", 1}, {L"quux", 2}, {L"", 2}, {NULL, -1}}; + add_test(&tests, // + 0, "switch foo", // + 1, "\ncase bar", // + 1, "\ncase baz", // + 2, "\nquux", // + 2, "\nquux" // + ); - const indent_component_t components11[] = {{L"switch foo", 0}, - {L"cas", 1}, // parse error indentation handling - {NULL, -1}}; + add_test(&tests, // + 0, "switch foo", // + 1, "\ncas" // parse error indentation handling + ); - const indent_component_t components12[] = {{L"while false", 0}, - {L"# comment", 1}, // comment indentation handling - {L"command", 1}, // comment indentation handling - {L"# comment2", 1}, // comment indentation handling - {NULL, -1}}; + add_test(&tests, // + 0, "while", 1, " false", // + 1, "\n# comment", // comment indentation handling + 1, "\ncommand", // + 1, "\n# comment 2" // + ); - const indent_component_t *tests[] = {components1, components2, components3, components4, - components5, components6, components7, components8, - components9, components10, components11, components12}; - for (size_t which = 0; which < sizeof tests / sizeof *tests; which++) { - const indent_component_t *components = tests[which]; - // Count how many we have. - size_t component_count = 0; - while (components[component_count].txt != NULL) { - component_count++; - } - - // Generate the expected indents. + int test_idx = 0; + for (const test_t &test : tests) { + // Construct the input text and expected indents. wcstring text; std::vector expected_indents; - for (size_t i = 0; i < component_count; i++) { - if (i > 0) { - text.push_back(L'\n'); - expected_indents.push_back(components[i].indent); + int current_indent = 0; + for (const segment_t &segment : test) { + if (!segment.text) { + current_indent = segment.indent; + } else { + wcstring tmp = str2wcstring(segment.text); + text.append(tmp); + expected_indents.insert(expected_indents.end(), tmp.size(), current_indent); } - text.append(components[i].txt); - expected_indents.resize(text.size(), components[i].indent); } do_test(expected_indents.size() == text.size()); @@ -1350,11 +1396,13 @@ static void test_indents() { do_test(expected_indents.size() == indents.size()); for (size_t i = 0; i < text.size(); i++) { if (expected_indents.at(i) != indents.at(i)) { - err(L"Wrong indent at index %lu in test #%lu (expected %d, actual %d):\n%ls\n", i, - which + 1, expected_indents.at(i), indents.at(i), text.c_str()); - break; // don't keep showing errors for the rest of the line + err(L"Wrong indent at index %lu (char 0x%02x) in test #%lu (expected %d, actual " + L"%d):\n%ls\n", + i, text.at(i), test_idx, expected_indents.at(i), indents.at(i), text.c_str()); + break; // don't keep showing errors for the rest of the test } } + test_idx++; } } diff --git a/src/parse_util.cpp b/src/parse_util.cpp index f9789f733..77f1c0c63 100644 --- a/src/parse_util.cpp +++ b/src/parse_util.cpp @@ -14,6 +14,7 @@ #include #include +#include "ast.h" #include "builtin.h" #include "common.h" #include "expand.h" @@ -565,121 +566,16 @@ wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote, return result; } -/// We are given a parse tree, the index of a node within the tree, its indent, and a vector of -/// indents the same size as the original source string. Set the indent correspdonding to the node's -/// source range, if appropriate. -/// -/// trailing_indent is the indent for nodes with unrealized source, i.e. if I type 'if false ' -/// then we have an if node with an empty job list (without source) but we want the last line to be -/// indented anyways. -/// -/// switch statements also indent. -/// -/// max_visited_node_idx is the largest index we visited. -static void compute_indents_recursive(const parse_node_tree_t &tree, node_offset_t node_idx, - int node_indent, parse_token_type_t parent_type, - std::vector *indents, int *trailing_indent, - node_offset_t *max_visited_node_idx) { - // Guard against incomplete trees. - if (node_idx > tree.size()) return; - - // Update max_visited_node_idx. - if (node_idx > *max_visited_node_idx) *max_visited_node_idx = node_idx; - - // We could implement this by utilizing the fish grammar. But there's an easy trick instead: - // almost everything that wraps a job list should be indented by 1. So just find all of the job - // lists. One exception is switch, which wraps a case_item_list instead of a job_list. The other - // exception is job_list itself: a job_list is a job and a job_list, and we want that child list - // to be indented the same as the parent. So just find all job_lists whose parent is not a - // job_list, and increment their indent by 1. We also want to treat andor_job_list like - // job_lists. - const parse_node_t &node = tree.at(node_idx); - const parse_token_type_t node_type = node.type; - - // Increment the indent if we are either a root job_list, or root case_item_list. - const bool is_root_job_list = node_type != parent_type && (node_type == symbol_job_list || - node_type == symbol_andor_job_list); - const bool is_root_case_item_list = - node_type == symbol_case_item_list && parent_type != symbol_case_item_list; - if (is_root_job_list || is_root_case_item_list) { - node_indent += 1; - } - - // If we have source, store the trailing indent unconditionally. If we do not have source, store - // the trailing indent only if ours is bigger; this prevents the trailing "run" of terminal job - // lists from affecting the trailing indent. For example, code like this: - // - // if foo - // - // will be parsed as this: - // - // job_list - // job - // if_statement - // job [if] - // job_list [empty] - // job_list [empty] - // - // There's two "terminal" job lists, and we want the innermost one. - // - // Note we are relying on the fact that nodes are in the same order as the source, i.e. an - // in-order traversal of the node tree also traverses the source from beginning to end. - if (node.has_source() || node_indent > *trailing_indent) { - *trailing_indent = node_indent; - } - - // Store the indent into the indent array. - if (node.source_start != SOURCE_OFFSET_INVALID && node.source_start < indents->size()) { - if (node.has_source()) { - // A normal non-empty node. Store the indent unconditionally. - indents->at(node.source_start) = node_indent; - } else { - // An empty node. We have a source offset but no source length. This can come about when - // a node is legitimately empty: - // - // while true; end - // - // The job_list inside the while loop is empty. It still has a source offset (at the end - // of the while statement) but no source extent. We still need to capture that indent, - // because there may be comments inside: - // - // while true - // # loop forever - // end - // - // The 'loop forever' comment must be indented, by virtue of storing the indent. - // - // Now consider what happens if we remove the end: - // - // while true - // # loop forever - // - // Now both the job_list and end_command are unmaterialized. However, we want the indent - // to be of the job_list and not the end_command. Therefore, we only store the indent - // if it's bigger. - if (node_indent > indents->at(node.source_start)) { - indents->at(node.source_start) = node_indent; - } - } - } - - // Recursive to all our children. - for (node_offset_t idx = 0; idx < node.child_count; idx++) { - // Note we pass our type to our child, which becomes its parent node type. - compute_indents_recursive(tree, node.child_start + idx, node_indent, node_type, indents, - trailing_indent, max_visited_node_idx); - } -} - std::vector parse_util_compute_indents(const wcstring &src) { // Make a vector the same size as the input string, which contains the indents. Initialize them - // to -1. + // to 0. + static wcstring ssss; + ssss = src; const size_t src_size = src.size(); - std::vector indents(src_size, -1); + std::vector indents(src_size, 0); // Simple trick: if our source does not contain a newline, then all indents are 0. if (src.find('\n') == wcstring::npos) { - std::fill(indents.begin(), indents.end(), 0); return indents; } @@ -687,78 +583,141 @@ std::vector parse_util_compute_indents(const wcstring &src) { // the last node we visited becomes the input indent of the next. I.e. in the case of 'switch // foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it // were a case item list. - parse_node_tree_t tree; - parse_tree_from_string(src, - parse_flag_continue_after_error | parse_flag_include_comments | - parse_flag_accept_incomplete_tokens, - &tree, nullptr /* errors */); + using namespace ast; + auto ast = + ast_t::parse(src, parse_flag_continue_after_error | parse_flag_include_comments | + parse_flag_accept_incomplete_tokens | parse_flag_leave_unterminated); - // Start indenting at the first node. If we have a parse error, we'll have to start indenting - // from the top again. - node_offset_t start_node_idx = 0; - int last_trailing_indent = 0; + // Visit all of our nodes. When we get a job_list or case_item_list, increment indent while + // visiting its children. + struct indent_visitor_t { + explicit indent_visitor_t(std::vector &indents) : indents(indents) {} - while (start_node_idx < tree.size()) { - // The indent that we'll get for the last line. - int trailing_indent = 0; + void visit(const node_t &node) { + int inc = 0; + int dec = 0; + switch (node.type) { + case type_t::job_list: + case type_t::andor_job_list: + // Job lists are never unwound. + inc = 1; + dec = 1; + break; - // Biggest offset we visited. - node_offset_t max_visited_node_idx = 0; + // Increment indents for conditions in headers (#1665). + case type_t::job_conjunction: + if (node.parent->type == type_t::while_header || + node.parent->type == type_t::if_clause) { + inc = 1; + dec = 1; + } + break; - // Invoke the recursive version. As a hack, pass job_list for the 'parent' token type, which - // will prevent the really-root job list from indenting. - compute_indents_recursive(tree, start_node_idx, last_trailing_indent, symbol_job_list, - &indents, &trailing_indent, &max_visited_node_idx); + // Increment indents for piped remainders. + case type_t::job_continuation_list: + if (node.as()->count() > 0) { + inc = 1; + dec = 1; + } + break; - // We may have more to indent. The trailing indent becomes our current indent. Start at the - // node after the last we visited. - last_trailing_indent = trailing_indent; - start_node_idx = max_visited_node_idx + 1; - } + case type_t::case_item_list: + // Here's a hack. Consider: + // switch abc + // cas + // + // fish will see that 'cas' is not valid inside a switch statement because it is + // not "case". It will then unwind back to the top level job list, producing a + // parse tree like: + // + // job_list + // switch_job + // + // normal_job + // cas + // + // And so we will think that the 'cas' job is at the same level as the switch. + // To address this, if we see that the switch statement was not closed, do not + // decrement the indent afterwards. + inc = 1; + dec = node.parent->as()->end.unsourced ? 0 : 1; + break; - // Handle comments. Each comment node has a parent (which is whatever the top of the symbol - // stack was when the comment was encountered). So the source range of the comment has the same - // indent as its parent. - const size_t tree_size = tree.size(); - for (node_offset_t i = 0; i < tree_size; i++) { - const parse_node_t &node = tree.at(i); - if (node.type == parse_special_type_comment && node.has_source() && - node.parent < tree_size) { - const parse_node_t &parent = tree.at(node.parent); - if (parent.source_start != SOURCE_OFFSET_INVALID) { - indents.at(node.source_start) = indents.at(parent.source_start); + default: + break; } - } - } + indent += inc; - // Now apply the indents. The indents array has -1 for places where the indent does not change, - // so start at each value and extend it along the run of -1s. - int last_indent = 0; - for (size_t i = 0; i < src_size; i++) { - int this_indent = indents.at(i); - if (this_indent < 0) { - indents.at(i) = last_indent; + // If we increased the indentation, apply it to the remainder of the string, even if the + // list is empty. For example (where _ represents the cursor): + // + // if foo + // _ + // + // we want to indent the newline. + if (inc) { + std::fill(indents.begin() + last_leaf_end, indents.end(), indent); + last_indent = indent; + } + + // If this is a leaf node, apply the current indentation. + if (node.category == category_t::leaf) { + auto range = node.source_range(); + if (range.length > 0) { + // Fill to the end. + // Later nodes will come along and overwrite these. + std::fill(indents.begin() + range.start, indents.end(), indent); + last_leaf_end = range.start + range.length; + last_indent = indent; + } + } + + + node_visitor(*this).accept_children_of(&node); + indent -= dec; + } + + // The one-past-the-last index of the most recently encountered leaf node. + // We use this to populate the indents even if there's no tokens in the range. + size_t last_leaf_end{0}; + + // The last indent which we assigned. + int last_indent{-1}; + + // List of indents, which we populate. + std::vector &indents; + + // Initialize our starting indent to -1, as our top-level node is a job list which + // willĀ immediately increment it. + int indent{-1}; + }; + + indent_visitor_t iv(indents); + node_visitor(iv).accept(ast.top()); + + // All newlines now get the *next* indent. + // For example, in this code: + // if true + // stuff + // the newline "belongs" to the if statement as it ends its job. + // But when rendered, it visually belongs to the job list. + + // FIXME: if there's a middle newline, we will indent it wrongly. + // For example: + // if true + // + // end + // Here the middle newline should be indented by 1. + + size_t idx = src_size; + int next_indent = iv.last_indent; + while (idx--) { + if (src.at(idx) == L'\n') { + indents.at(idx) = next_indent; } else { - // New indent level. - last_indent = this_indent; - // Make all whitespace before a token have the new level. This avoid using the wrong - // indentation level if a new line starts with whitespace. - size_t prev_char_idx = i; - while (prev_char_idx--) { - if (!std::wcschr(L" \n\t\r", src.at(prev_char_idx))) break; - indents.at(prev_char_idx) = last_indent; - } + next_indent = indents.at(idx); } } - - // Ensure trailing whitespace has the trailing indent. This makes sure a new line is correctly - // indented even if it is empty. - size_t suffix_idx = src_size; - while (suffix_idx--) { - if (!std::wcschr(L" \n\t\r", src.at(suffix_idx))) break; - indents.at(suffix_idx) = last_trailing_indent; - } - return indents; } From 202fdfa54a35be3a253fd16c79ccb1b087a135b6 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Wed, 1 Jul 2020 21:06:58 -0700 Subject: [PATCH 08/13] Adopt the new AST in parse_util_detect_errors This switches parse_util_detect_errors from parsing with parse_tree to the new ast. --- src/fish_tests.cpp | 20 ++-- src/parse_tree.h | 1 + src/parse_util.cpp | 282 ++++++++++++++++++++++++--------------------- src/parse_util.h | 9 +- 4 files changed, 172 insertions(+), 140 deletions(-) diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index 1b1292299..08e14bfce 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -40,6 +40,7 @@ #include #include +#include "ast.h" #include "autoload.h" #include "builtin.h" #include "color.h" @@ -978,15 +979,18 @@ static void test_debounce_timeout() { } static parser_test_error_bits_t detect_argument_errors(const wcstring &src) { - parse_node_tree_t tree; - if (!parse_tree_from_string(src, parse_flag_none, &tree, NULL, symbol_argument_list)) { + using namespace ast; + auto ast = ast_t::parse_argument_list(src, parse_flag_none); + if (ast.errored()) { return PARSER_TEST_ERROR; } - - assert(!tree.empty()); //!OCLINT(multiple unary operator) - tnode_t arg_list{&tree, &tree.at(0)}; - auto first_arg = arg_list.next_in_list(); - return parse_util_detect_errors_in_argument(first_arg, first_arg.get_source(src)); + const ast::argument_t *first_arg = + ast.top()->as()->arguments.at(0); + if (!first_arg) { + err(L"Failed to parse an argument"); + return 0; + } + return parse_util_detect_errors_in_argument(*first_arg, first_arg->source(src)); } /// Test the parser. @@ -1084,7 +1088,7 @@ static void test_parser() { } if (parse_util_detect_errors(L"echo (\nfoo\n bar") != PARSER_TEST_INCOMPLETE) { - err(L"unterminated multiline subhsell not reported properly"); + err(L"unterminated multiline subshell not reported properly"); } if (parse_util_detect_errors(L"begin ; true ; end | ") != PARSER_TEST_INCOMPLETE) { diff --git a/src/parse_tree.h b/src/parse_tree.h index 34525db6a..8f8d54f74 100644 --- a/src/parse_tree.h +++ b/src/parse_tree.h @@ -42,6 +42,7 @@ struct parse_token_t { source_offset_t source_length{0}; /// \return the source range. + /// Note the start may be invalid. source_range_t range() const { return source_range_t{source_start, source_length}; } diff --git a/src/parse_util.cpp b/src/parse_util.cpp index 77f1c0c63..8a74c905c 100644 --- a/src/parse_util.cpp +++ b/src/parse_util.cpp @@ -749,17 +749,13 @@ bool parse_util_argument_is_help(const wchar_t *s) { return std::wcscmp(L"-h", s) == 0 || std::wcscmp(L"--help", s) == 0; } -/// Check if the first argument under the given node is --help. -static bool first_argument_is_help(tnode_t statement, - const wcstring &src) { - bool is_help = false; - auto arg_nodes = get_argument_nodes(statement.child<1>()); - if (!arg_nodes.empty()) { - // Check the first argument only. - wcstring first_arg_src = arg_nodes.front().get_source(src); - is_help = parse_util_argument_is_help(first_arg_src.c_str()); +// \return a pointer to the first argument node of an argument_or_redirection_list_t, or nullptr if +// there are no arguments. +const ast::argument_t *get_first_arg(const ast::argument_or_redirection_list_t &list) { + for (const ast::argument_or_redirection_t &v : list) { + if (v.is_argument()) return &v.argument(); } - return is_help; + return nullptr; } /// Given a wide character immediately after a dollar sign, return the appropriate error message. @@ -915,11 +911,13 @@ static parser_test_error_bits_t detect_dollar_cmdsub_errors(size_t arg_src_offse /// Test if this argument contains any errors. Detected errors include syntax errors in command /// substitutions, improperly escaped characters and improper use of the variable expansion /// operator. -parser_test_error_bits_t parse_util_detect_errors_in_argument(tnode_t node, +parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argument_t &arg, const wcstring &arg_src, parse_error_list_t *out_errors) { - assert(node.has_source() && "argument has no source"); - auto source_start = node.source_range()->start; + maybe_t source_range = arg.try_source_range(); + if (!source_range.has_value()) return 0; + + size_t source_start = source_range->start; int err = 0; wchar_t *paran_begin, *paran_end; int do_loop = 1; @@ -1013,10 +1011,10 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(tnode_t job, +static bool detect_errors_in_backgrounded_job(const ast::job_t &job, parse_error_list_t *parse_errors) { - namespace g = grammar; - auto source_range = job.source_range(); + using namespace ast; + auto source_range = job.try_source_range(); if (!source_range) return false; bool errored = false; @@ -1025,54 +1023,77 @@ static bool detect_errors_in_backgrounded_job(tnode_t job, // foo & ; or bar // if foo & ; end // while foo & ; end - auto job_conj = job.try_get_parent(); - if (job_conj.try_get_parent()) { - errored = append_syntax_error(parse_errors, source_range->start, - BACKGROUND_IN_CONDITIONAL_ERROR_MSG); - } else if (job_conj.try_get_parent()) { - errored = append_syntax_error(parse_errors, source_range->start, - BACKGROUND_IN_CONDITIONAL_ERROR_MSG); - } else if (auto jlist = job_conj.try_get_parent()) { - // This isn't very complete, e.g. we don't catch 'foo & ; not and bar'. - // Fetch the job list and then advance it by one. - auto first_jconj = jlist.next_in_list(); - assert(first_jconj == job.try_get_parent() && - "Expected first job to be the node we found"); - (void)first_jconj; + const job_conjunction_t *job_conj = job.parent->try_as(); + if (!job_conj) return false; - // Try getting the next job's decorator. - if (auto next_job_dec = jlist.next_in_list()) { - // The next job is indeed a boolean statement. - parse_job_decoration_t bool_type = bool_statement_type(next_job_dec); - if (bool_type == parse_job_decoration_and) { - errored = append_syntax_error(parse_errors, next_job_dec.source_range()->start, - BOOL_AFTER_BACKGROUND_ERROR_MSG, L"and"); - } else if (bool_type == parse_job_decoration_or) { - errored = append_syntax_error(parse_errors, next_job_dec.source_range()->start, - BOOL_AFTER_BACKGROUND_ERROR_MSG, L"or"); + if (job_conj->parent->try_as()) { + errored = append_syntax_error(parse_errors, source_range->start, + BACKGROUND_IN_CONDITIONAL_ERROR_MSG); + } else if (job_conj->parent->try_as()) { + errored = append_syntax_error(parse_errors, source_range->start, + BACKGROUND_IN_CONDITIONAL_ERROR_MSG); + } else if (const ast::job_list_t *jlist = job_conj->parent->try_as()) { + // This isn't very complete, e.g. we don't catch 'foo & ; not and bar'. + // Find the index of ourselves in the job list. + size_t index; + for (index = 0; index < jlist->count(); index++) { + if (jlist->at(index) == job_conj) break; + } + assert(index < jlist->count() && "Should have found the job in the list"); + + // Try getting the next job and check its decorator. + if (const job_conjunction_t *next = jlist->at(index + 1)) { + if (const keyword_base_t *deco = next->decorator.contents.get()) { + assert( + (deco->kw == parse_keyword_t::kw_and || deco->kw == parse_keyword_t::kw_or) && + "Unexpected decorator keyword"); + const wchar_t *deco_name = (deco->kw == parse_keyword_t::kw_and ? L"and" : L"or"); + errored = append_syntax_error(parse_errors, deco->source_range().start, + BOOL_AFTER_BACKGROUND_ERROR_MSG, deco_name); } } } return errored; } -static bool detect_errors_in_plain_statement(const wcstring &buff_src, - const parse_node_tree_t &node_tree, - tnode_t pst, - parse_error_list_t *parse_errors) { - using namespace grammar; +static bool detect_errors_in_decorated_statement(const wcstring &buff_src, + const ast::decorated_statement_t &dst, + parse_error_list_t *parse_errors) { + using namespace ast; bool errored = false; - auto source_start = pst.source_range()->start; + auto source_start = dst.source_range().start; + const parse_statement_decoration_t decoration = dst.decoration(); - // In a few places below, we want to know if we are in a pipeline. - tnode_t st = pst.try_get_parent().try_get_parent(); - pipeline_position_t pipe_pos = get_pipeline_position(st); - bool is_in_pipeline = (pipe_pos != pipeline_position_t::none); + // Determine if the first argument is help. + bool first_arg_is_help = false; + if (const auto *arg = get_first_arg(dst.args_or_redirs)) { + wcstring arg_src = arg->source(buff_src); + first_arg_is_help = parse_util_argument_is_help(arg_src.c_str()); + } - // We need to know the decoration. - const enum parse_statement_decoration_t decoration = get_decoration(pst); + // Get the statement we are part of. + const statement_t *st = dst.parent->as(); + + // Walk up to the job. + const ast::job_t *job = nullptr; + for (const node_t *cursor = st; job == nullptr; cursor = cursor->parent) { + assert(cursor && "Reached root without finding a job"); + job = cursor->try_as(); + } + assert(job && "Should have found the job"); + + // Check our pipeline position. + pipeline_position_t pipe_pos; + if (job->continuation.empty()) { + pipe_pos = pipeline_position_t::none; + } else if (&job->statement == st) { + pipe_pos = pipeline_position_t::first; + } else { + pipe_pos = pipeline_position_t::subsequent; + } // Check that we don't try to pipe through exec. + bool is_in_pipeline = (pipe_pos != pipeline_position_t::none); if (is_in_pipeline && decoration == parse_statement_decoration_exec) { errored = append_syntax_error(parse_errors, source_start, EXEC_ERR_MSG, L"exec"); } @@ -1083,14 +1104,14 @@ static bool detect_errors_in_plain_statement(const wcstring &buff_src, if (pipe_pos == pipeline_position_t::subsequent) { // check if our command is 'and' or 'or'. This is very clumsy; we don't catch e.g. quoted // commands. - wcstring command = pst.child<0>().get_source(buff_src); + wcstring command = dst.command.source(buff_src); if (command == L"and" || command == L"or") { errored = append_syntax_error(parse_errors, source_start, EXEC_ERR_MSG, command.c_str()); } } - if (maybe_t unexp_command = command_for_plain_statement(pst, buff_src)) { + if (maybe_t unexp_command = dst.command.try_source(buff_src)) { wcstring command; // Check that we can expand the command. if (expand_to_command_and_args(*unexp_command, operation_context_t::empty(), &command, @@ -1107,40 +1128,40 @@ static bool detect_errors_in_plain_statement(const wcstring &buff_src, // Check that we don't return from outside a function. But we allow it if it's // 'return --help'. - if (!errored && command == L"return") { + if (!errored && command == L"return" && !first_arg_is_help) { + // See if we are in a function. bool found_function = false; - for (const parse_node_t *ancestor = pst.node(); ancestor != nullptr; - ancestor = node_tree.get_parent(*ancestor)) { - auto fh = tnode_t::try_create(&node_tree, ancestor) - .child<0>() - .try_get_child(); - if (fh) { - found_function = true; - break; + for (const node_t *cursor = &dst; cursor != nullptr; cursor = cursor->parent) { + if (const auto *bs = cursor->try_as()) { + if (bs->header->type == type_t::function_header) { + found_function = true; + break; + } } } - if (!found_function && !first_argument_is_help(pst, buff_src)) { + + if (!found_function) { errored = append_syntax_error(parse_errors, source_start, INVALID_RETURN_ERR_MSG); } } // Check that we don't break or continue from outside a loop. - if (!errored && (command == L"break" || command == L"continue")) { + if (!errored && (command == L"break" || command == L"continue") && !first_arg_is_help) { // Walk up until we hit a 'for' or 'while' loop. If we hit a function first, // stop the search; we can't break an outer loop from inside a function. // This is a little funny because we can't tell if it's a 'for' or 'while' // loop from the ancestor alone; we need the header. That is, we hit a // block_statement, and have to check its header. bool found_loop = false; - for (const parse_node_t *ancestor = pst.node(); ancestor != nullptr; - ancestor = node_tree.get_parent(*ancestor)) { - tnode_t bh = - tnode_t::try_create(&node_tree, ancestor).child<0>(); - if (bh.try_get_child() || bh.try_get_child()) { + for (const node_t *ancestor = &dst; ancestor != nullptr; ancestor = ancestor->parent) { + const auto *block = ancestor->try_as(); + if (!block) continue; + if (block->header->type == type_t::for_header || + block->header->type == type_t::while_header) { // This is a loop header, so we can break or continue. found_loop = true; break; - } else if (bh.try_get_child()) { + } else if (block->header->type == type_t::function_header) { // This is a function header, so we cannot break or // continue. We stop our search here. found_loop = false; @@ -1148,7 +1169,7 @@ static bool detect_errors_in_plain_statement(const wcstring &buff_src, } } - if (!found_loop && !first_argument_is_help(pst, buff_src)) { + if (!found_loop) { errored = append_syntax_error( parse_errors, source_start, (command == L"break" ? INVALID_BREAK_ERR_MSG : INVALID_CONTINUE_ERR_MSG)); @@ -1167,12 +1188,22 @@ static bool detect_errors_in_plain_statement(const wcstring &buff_src, return errored; } +// Given we have a trailing argument_or_redirection_list, like `begin; end > /dev/null`, verify that +// there are no arguments in the list. +static bool detect_errors_in_block_redirection_list( + const ast::argument_or_redirection_list_t &args_or_redirs, parse_error_list_t *out_errors) { + if (const auto *first_arg = get_first_arg(args_or_redirs)) { + return append_syntax_error(out_errors, first_arg->source_range().start, + BACKGROUND_IN_CONDITIONAL_ERROR_MSG); + } + return false; +} + parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, parse_error_list_t *out_errors, bool allow_incomplete, parsed_source_ref_t *out_pstree) { namespace g = grammar; - parse_node_tree_t node_tree; parse_error_list_t parse_errors; parser_test_error_bits_t res = 0; @@ -1192,12 +1223,15 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, // allow_incomplete is set. bool has_unclosed_quote_or_subshell = false; - // Parse the input string into a parse tree. Some errors are detected here. - bool parsed = parse_tree_from_string( - buff_src, allow_incomplete ? parse_flag_leave_unterminated : parse_flag_none, &node_tree, - &parse_errors); + const parse_tree_flags_t parse_flags = + allow_incomplete ? parse_flag_leave_unterminated : parse_flag_none; + // Parse the input string into an ast. Some errors are detected here. + using namespace ast; + auto ast = ast_t::parse(buff_src, parse_flags, &parse_errors); if (allow_incomplete) { + // Issue #1238: If the only error was unterminated quote, then consider this to have parsed + // successfully. size_t idx = parse_errors.size(); while (idx--) { if (parse_errors.at(idx).code == parse_error_tokenizer_unterminated_quote || @@ -1209,19 +1243,14 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, } } - // Issue #1238: If the only error was unterminated quote, then consider this to have parsed - // successfully. A better fix would be to have parse_tree_from_string return this information - // directly (but it would be a shame to munge up its nice bool return). - if (parse_errors.empty() && has_unclosed_quote_or_subshell) { - parsed = true; - } - - if (!parsed) { - errored = true; - } - // has_unclosed_quote_or_subshell may only be set if allow_incomplete is true. assert(!has_unclosed_quote_or_subshell || allow_incomplete); + if (has_unclosed_quote_or_subshell) { + // We do not bother to validate the rest of the tree in this case. + return PARSER_TEST_INCOMPLETE; + } + + errored = !parse_errors.empty(); // Expand all commands. // Verify 'or' and 'and' not used inside pipelines. @@ -1230,21 +1259,17 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, // Verify no variable expansions. if (!errored) { - for (const parse_node_t &node : node_tree) { - if (node.type == symbol_end_command && !node.has_source()) { - // An 'end' without source is an unclosed block. - has_unclosed_block = true; - } else if (node.type == symbol_statement && !node.has_source()) { - // Check for a statement without source in a pipeline, i.e. unterminated pipeline. - auto pipe_pos = get_pipeline_position({&node_tree, &node}); - if (pipe_pos != pipeline_position_t::none) { + for (const node_t &node : ast) { + if (const job_continuation_t *jc = node.try_as()) { + // Somewhat clumsy way of checking for a statement without source in a pipeline. + // See if our pipe has source but our statement does not. + if (!jc->pipe.unsourced && !jc->statement.try_source_range().has_value()) { has_unclosed_pipe = true; } - } else if (node.type == symbol_argument) { - tnode_t arg{&node_tree, &node}; - const wcstring arg_src = node.get_source(buff_src); - res |= parse_util_detect_errors_in_argument(arg, arg_src, &parse_errors); - } else if (node.type == symbol_job) { + } else if (const argument_t *arg = node.try_as()) { + wcstring arg_src = arg->source(buff_src); + res |= parse_util_detect_errors_in_argument(*arg, arg_src, &parse_errors); + } else if (const ast::job_t *job = node.try_as()) { // Disallow background in the following cases: // // foo & ; and bar @@ -1252,25 +1277,27 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, // if foo & ; end // while foo & ; end // If it's not a background job, nothing to do. - auto job = tnode_t{&node_tree, &node}; - if (job_node_is_background(job)) { - errored |= detect_errors_in_backgrounded_job(job, &parse_errors); + if (job->bg) { + errored |= detect_errors_in_backgrounded_job(*job, &parse_errors); } - } else if (node.type == symbol_arguments_or_redirections_list) { - // verify no arguments to the end command of if, switch, begin (#986). - auto list = tnode_t{&node_tree, &node}; - if (list.try_get_parent() || - list.try_get_parent() || - list.try_get_parent()) { - if (auto arg = list.next_in_list()) { - errored = append_syntax_error(&parse_errors, arg.source_range()->start, - END_ARG_ERR_MSG); - } - } - } else if (node.type == symbol_plain_statement) { - tnode_t pst{&node_tree, &node}; + } else if (const ast::decorated_statement_t *stmt = + node.try_as()) { + errored |= detect_errors_in_decorated_statement(buff_src, *stmt, &parse_errors); + } else if (const auto *block = node.try_as()) { + // If our 'end' had no source, we are unsourced. + if (block->end.unsourced) has_unclosed_block = true; errored |= - detect_errors_in_plain_statement(buff_src, node_tree, pst, &parse_errors); + detect_errors_in_block_redirection_list(block->args_or_redirs, &parse_errors); + } else if (const auto *ifs = node.try_as()) { + // If our 'end' had no source, we are unsourced. + if (ifs->end.unsourced) has_unclosed_block = true; + errored |= + detect_errors_in_block_redirection_list(ifs->args_or_redirs, &parse_errors); + } else if (const auto *switchs = node.try_as()) { + // If our 'end' had no source, we are unsourced. + if (switchs->end.unsourced) has_unclosed_block = true; + errored |= + detect_errors_in_block_redirection_list(switchs->args_or_redirs, &parse_errors); } } } @@ -1285,7 +1312,8 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, } if (out_pstree != nullptr) { - *out_pstree = std::make_shared(buff_src, std::move(node_tree)); + // TODO: legacy + *out_pstree = parse_source(buff_src, parse_flags, nullptr); } return res; @@ -1300,25 +1328,21 @@ maybe_t parse_util_detect_errors_in_argument_list(const wcstring &arg_ false /* don't skip caret */); }; - // Parse the string as an argument list. + // Parse the string as a freestanding argument list. + using namespace ast; parse_error_list_t errors; - parse_node_tree_t tree; - if (!parse_tree_from_string(arg_list_src, parse_flag_none, &tree, &errors, - symbol_freestanding_argument_list)) { - // Failed to parse. + auto ast = ast_t::parse_argument_list(arg_list_src, parse_flag_none, &errors); + if (!errors.empty()) { return get_error_text(errors); } // Get the root argument list and extract arguments from it. // Test each of these. - assert(!tree.empty() && "Should have parsed a tree"); - tnode_t arg_list(&tree, &tree.at(0)); - while (auto arg = arg_list.next_in_list()) { - const wcstring arg_src = arg.get_source(arg_list_src); + for (const argument_t &arg : ast.top()->as()->arguments) { + const wcstring arg_src = arg.source(arg_list_src); if (parse_util_detect_errors_in_argument(arg, arg_src, &errors)) { return get_error_text(errors); } } - return none(); } diff --git a/src/parse_util.h b/src/parse_util.h index d7857b742..fd348ab9b 100644 --- a/src/parse_util.h +++ b/src/parse_util.h @@ -10,6 +10,10 @@ #include "parse_tree.h" #include "tokenizer.h" +namespace ast { +struct argument_t; +} + /// Find the beginning and end of the first subshell in the specified string. /// /// \param in the string to search for subshells @@ -141,10 +145,9 @@ maybe_t parse_util_detect_errors_in_argument_list(const wcstring &arg_ /// Test if this argument contains any errors. Detected errors include syntax errors in command /// substitutions, improperly escaped characters and improper use of the variable expansion /// operator. This does NOT currently detect unterminated quotes. -class parse_node_t; + parser_test_error_bits_t parse_util_detect_errors_in_argument( - tnode_t node, const wcstring &arg_src, - parse_error_list_t *out_errors = nullptr); + const ast::argument_t &arg, const wcstring &arg_src, parse_error_list_t *out_errors = nullptr); /// Given a string containing a variable expansion error, append an appropriate error to the errors /// list. The global_token_pos is the offset of the token in the larger source, and the dollar_pos From 6b24edccf620f91119997f1451de8fd78924da67 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 21 Jun 2020 16:45:26 -0700 Subject: [PATCH 09/13] Adopt the new AST in add_pending_with_file_detection This switches add_pending_with_file_detection from parsing with parse_tree to the new ast. --- src/history.cpp | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/src/history.cpp b/src/history.cpp index 0e2aa1385..d069a8278 100644 --- a/src/history.cpp +++ b/src/history.cpp @@ -1274,38 +1274,33 @@ void history_t::add_pending_with_file_detection(const wcstring &str, // Find all arguments that look like they could be file paths. bool needs_sync_write = false; - parse_node_tree_t tree; - parse_tree_from_string(str, parse_flag_none, &tree, nullptr); + using namespace ast; + auto ast = ast_t::parse(str); path_list_t potential_paths; - for (const parse_node_t &node : tree) { - if (!node.has_source()) { - continue; - } - - if (node.type == symbol_argument) { - wcstring potential_path = node.get_source(str); + for (const node_t &node : ast) { + if (const argument_t *arg = node.try_as()) { + wcstring potential_path = arg->source(str); bool unescaped = unescape_string_in_place(&potential_path, UNESCAPE_DEFAULT); if (unescaped && string_could_be_path(potential_path)) { potential_paths.push_back(potential_path); } - } else if (node.type == symbol_plain_statement) { + } else if (const decorated_statement_t *stmt = node.try_as()) { // Hack hack hack - if the command is likely to trigger an exit, then don't do // background file detection, because we won't be able to write it to our history file // before we exit. // Also skip it for 'echo'. This is because echo doesn't take file paths, but also // because the history file test wants to find the commands in the history file // immediately after running them, so it can't tolerate the asynchronous file detection. - if (get_decoration({&tree, &node}) == parse_statement_decoration_exec) { + if (stmt->decoration() == parse_statement_decoration_exec) { needs_sync_write = true; } - if (maybe_t command = command_for_plain_statement({&tree, &node}, str)) { - unescape_string_in_place(&*command, UNESCAPE_DEFAULT); - if (*command == L"exit" || *command == L"reboot" || *command == L"restart" || - *command == L"echo") { - needs_sync_write = true; - } + wcstring command = stmt->command.source(str); + unescape_string_in_place(&command, UNESCAPE_DEFAULT); + if (command == L"exit" || command == L"reboot" || command == L"restart" || + command == L"echo") { + needs_sync_write = true; } } } From 886603b2ca6aa49fd49b42b51eae057cc50aae32 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 28 Jun 2020 14:48:16 -0700 Subject: [PATCH 10/13] Adopt the new AST in fish_indent This switches fish_indent from parsing with parse_tree to the new ast. This is the most difficult transition because the new ast retains less lexical information than the old parse tree. The strategy is: 1. Use parse_util_compute_indents to compute indenting for each token. 2. Compute the "gap text" between the text of significant tokens. This contains whitespace, comments, etc. 3. "Fix up" the gap text while leaving the significant tokens alone. --- src/fish_indent.cpp | 738 +++++++++++++++++++++++++++------------ tests/checks/indent.fish | 13 +- 2 files changed, 519 insertions(+), 232 deletions(-) diff --git a/src/fish_indent.cpp b/src/fish_indent.cpp index 17197f84c..491f99e49 100644 --- a/src/fish_indent.cpp +++ b/src/fish_indent.cpp @@ -44,6 +44,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA #include "operation_context.h" #include "output.h" #include "parse_constants.h" +#include "parse_util.h" #include "print_help.h" #include "tnode.h" #include "wutil.h" // IWYU pragma: keep @@ -52,8 +53,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // See discussion at https://github.com/fish-shell/fish-shell/pull/6790 #define SPACES_PER_INDENT 4 -// An indent_t represents an abstract indent depth. 2 means we are in a doubly-nested block, etc. -using indent_t = unsigned int; static bool dump_parse_tree = false; static int ret = 0; @@ -84,232 +83,539 @@ static wcstring read_file(FILE *f) { return result; } -struct prettifier_t { +namespace { +/// From C++14. +template +using enable_if_t = typename std::enable_if::type; + +/// \return the number of escaping backslashes before a character. +/// \p idx may be "one past the end." +size_t count_preceding_backslashes(const wcstring &text, size_t idx) { + assert(idx <= text.size() && "Out of bounds"); + size_t backslashes = 0; + while (backslashes < idx && text.at(idx - backslashes - 1) == L'\\') { + backslashes++; + } + return backslashes; +} + +/// \return whether a character at a given index is escaped. +/// A character is escaped if it has an odd number of backslashes. +bool char_is_escaped(const wcstring &text, size_t idx) { + return count_preceding_backslashes(text, idx) % 2 == 1; +} + +using namespace ast; +struct pretty_printer_t { + // Note: this got somewhat more complicated after introducing the new AST, because that AST no + // longer encodes detailed lexical information (e.g. every newline). This feels more complex + // than necessary and would probably benefit from a more layered approach where we identify + // certain runs, weight line breaks, have a cost model, etc. + pretty_printer_t(const wcstring &src, bool do_indent) + : source(src), + indents(do_indent ? parse_util_compute_indents(source) : std::vector(src.size(), 0)), + ast(ast_t::parse(src, parse_flags())), + do_indent(do_indent), + gaps(compute_gaps()), + preferred_semi_locations(compute_preferred_semi_locations()) { + assert(indents.size() == source.size() && "indents and source should be same length"); + } + // Original source. const wcstring &source; + // The indents of our string. + // This has the same length as 'source' and describes the indentation level. + const std::vector indents; + + // The parsed ast. + const ast_t ast; + // The prettifier output. wcstring output; + // The indent of the source range which we are currently emitting. + int current_indent{0}; + // Whether to indent, or just insert spaces. const bool do_indent; - // Whether we are at the beginning of a new line. - bool has_new_line = true; + // Whether the next gap text should hide the first newline. + bool gap_text_mask_newline{false}; - // Whether the last token was a semicolon. - bool last_was_semicolon = false; + // The "gaps": a sorted set of ranges between tokens. + // These contain whitespace, comments, semicolons, and other lexical elements which are not + // present in the ast. + const std::vector gaps; - // Whether we need to append a continuation new line before continuing. - bool needs_continuation_newline = false; + // The sorted set of source offsets of nl_semi_t which should be set as semis, not newlines. + // This is computed ahead of time for convenience. + const std::vector preferred_semi_locations; - // Additional indentation due to line continuation (escaped newline) - uint32_t line_continuation_indent = 0; + // Flags we support. + using gap_flags_t = uint32_t; + enum { + default_flags = 0, - prettifier_t(const wcstring &source, bool do_indent) : source(source), do_indent(do_indent) {} + // Whether to allow line splitting via escaped newlines. + // For example, in argument lists: + // + // echo a \ + // b + // + // If this is not set, then split-lines will be joined. + allow_escaped_newlines = 1 << 0, - void prettify_node(const parse_node_tree_t &tree, node_offset_t node_idx, indent_t node_indent, - parse_token_type_t parent_type); + // Whether to require a space before this token. + // This is used when emitting semis: + // echo a; echo b; + // No space required between 'a' and ';', or 'b' and ';'. + skip_space = 1 << 1, + }; - void maybe_prepend_escaped_newline(const parse_node_t &node) { - if (node.has_preceding_escaped_newline()) { - output.append(L" \\"); - append_newline(true); + // \return gap text flags for the gap text that comes *before* a given node type. + static gap_flags_t gap_text_flags_before_node(const node_t &node) { + gap_flags_t result = default_flags; + switch (node.type) { + // Allow escaped newlines in argument and redirection lists. + case type_t::argument: + case type_t::redirection: + result |= allow_escaped_newlines; + break; + + case type_t::token_base: + // Allow escaped newlines before && and ||, and also pipes. + switch (node.as()->type) { + case parse_token_type_andand: + case parse_token_type_oror: + case parse_token_type_pipe: + result |= allow_escaped_newlines; + break; + default: + break; + } + break; + + default: + break; + } + return result; + } + + // \return whether we are at the start of a new line. + bool at_line_start() const { return output.empty() || output.back() == L'\n'; } + + // \return whether we have a space before the output. + // This ignores escaped spaces and escaped newlines. + bool has_preceding_space() const { + long idx = static_cast(output.size()) - 1; + // Skip escaped newlines. + // This is historical. Example: + // + // cmd1 \ + // | cmd2 + // + // we want the pipe to "see" the space after cmd1. + // TODO: this is too tricky, we should factor this better. + while (idx >= 0 && output.at(idx) == L'\n') { + size_t backslashes = count_preceding_backslashes(source, idx); + if (backslashes % 2 == 0) { + // Not escaped. + return false; + } + idx -= (1 + backslashes); + } + return idx >= 0 && output.at(idx) == L' ' && !char_is_escaped(output, idx); + } + + // Entry point. Prettify our source code and return it. + wcstring prettify() { + output = wcstring{}; + node_visitor(*this).accept(ast.top()); + + // Trailing gap text. + emit_gap_text_before(source_range_t{(uint32_t)source.size(), 0}, default_flags); + + // Replace all trailing newlines with just a single one. + while (!output.empty() && at_line_start()) { + output.pop_back(); + } + emit_newline(); + + wcstring result = std::move(output); + return result; + } + + // \return a substring of source. + wcstring substr(source_range_t r) const { return source.substr(r.start, r.length); } + + // Return the gap ranges from our ast. + std::vector compute_gaps() const { + auto range_compare = [](source_range_t r1, source_range_t r2) { + if (r1.start != r2.start) return r1.start < r2.start; + return r1.length < r2.length; + }; + // Collect the token ranges into a list. + std::vector tok_ranges; + for (const node_t &node : ast) { + if (node.category == category_t::leaf) { + auto r = node.source_range(); + if (r.length > 0) tok_ranges.push_back(r); + } + } + // Place a zero length range at end to aid in our inverting. + tok_ranges.push_back(source_range_t{(uint32_t)source.size(), 0}); + + // Our tokens should be sorted. + assert(std::is_sorted(tok_ranges.begin(), tok_ranges.end(), range_compare)); + + // For each range, add a gap range between the previous range and this range. + std::vector gaps; + uint32_t prev_end = 0; + for (source_range_t tok_range : tok_ranges) { + assert(tok_range.start >= prev_end && + "Token range should not overlap or be out of order"); + if (tok_range.start >= prev_end) { + gaps.push_back(source_range_t{prev_end, tok_range.start - prev_end}); + } + prev_end = tok_range.start + tok_range.length; + } + return gaps; + } + + // Return sorted list of semi-preferring semi_nl nodes. + std::vector compute_preferred_semi_locations() const { + std::vector result; + auto mark_as_semi = [&result](const optional_t &n) { + if (n && n->has_source()) result.push_back(n->range.start); + }; + + // andor_job_lists get semis if they are short enough. + for (const auto &node : ast) { + // See if we have a condition and an andor_job_list. + const optional_t *condition = nullptr; + const andor_job_list_t *andors = nullptr; + if (const auto *ifc = node.try_as()) { + condition = &ifc->condition.semi_nl; + andors = &ifc->andor_tail; + } else if (const auto *wc = node.try_as()) { + condition = &wc->condition.semi_nl; + andors = &wc->andor_tail; + } + + // This describes the heuristic of when to place and_or job lists on separate lines. + // That is, do we want: + // if true; and false + // or do we want: + // if true + // and false + // Lists with two or fewer get semis. + // Note the effective count is then three, because this list does not include the main + // condition. + if (andors && andors->count() > 0 && andors->count() <= 2) { + if (condition) mark_as_semi(*condition); + // Mark all but last of the andor list. + for (uint32_t i = 0; i + 1 < andors->count(); i++) { + mark_as_semi(andors->at(i)->job.semi_nl); + } + } + } + + // `x ; and y` gets semis if it has them already, and they are on the same line. + for (const auto &node : ast) { + if (const auto *job_list = node.try_as()) { + const semi_nl_t *prev_job_semi_nl = nullptr; + for (const job_conjunction_t &job : *job_list) { + // Set up prev_job_semi_nl for the next iteration to make control flow easier. + const semi_nl_t *prev = prev_job_semi_nl; + prev_job_semi_nl = job.semi_nl.contents.get(); + + // Is this an 'and' or 'or' job? + if (!job.decorator) continue; + + // Now see if we want to mark 'prev' as allowing a semi. + // Did we have a previous semi_nl which was a newline? + if (!prev || substr(prev->range) != L";") continue; + + // Is there a newline between them? + assert(prev->range.start <= job.decorator->range.start && + "Ranges out of order"); + auto start = source.begin() + prev->range.start; + auto end = source.begin() + job.decorator->range.end(); + if (std::find(start, end, L'\n') == end) { + // We're going to allow the previous semi_nl to be a semi. + result.push_back(prev->range.start); + } + } + } + } + std::sort(result.begin(), result.end()); + return result; + } + + // Emit a space or indent as necessary, depending on the previous output. + void emit_space_or_indent(gap_flags_t flags = default_flags) { + if (at_line_start()) { + output.append(SPACES_PER_INDENT * current_indent, L' '); + } else if (!(flags & skip_space) && !has_preceding_space()) { + output.append(1, L' '); } } - void append_newline(bool is_continuation = false) { - output.push_back('\n'); - has_new_line = true; - needs_continuation_newline = false; - line_continuation_indent = is_continuation ? 1 : 0; + // Emit "gap text:" newlines and comments from the original source. + // Gap text may be a few things: + // + // 1. Just a space is common. We will trim the spaces to be empty. + // + // Here the gap text is the comment, followed by the newline: + // + // echo abc # arg + // echo def + // + // 2. It may also be an escaped newline: + // Here the gap text is a space, backslash, newline, space. + // + // echo \ + // hi + // + // 3. Lastly it may be an error, if there was an error token. Here the gap text is the pipe: + // + // begin | stuff + // + // We do not handle errors here - instead our caller does. + void emit_gap_text(const wcstring &gap_text, gap_flags_t flags) { + // Common case: if we are only spaces, do nothing. + if (gap_text.find_first_not_of(L' ') == wcstring::npos) return; + + // Look to see if there is an escaped newline. + // Emit it if either we allow it, or it comes before the first comment. + // Note we do not have to be concerned with escaped backslashes or escaped #s. This is gap + // text - we already know it has no semantic significance. + size_t escaped_nl = gap_text.find(L"\\\n"); + if (escaped_nl != wcstring::npos) { + size_t comment_idx = gap_text.find(L'#'); + if ((flags & allow_escaped_newlines) || + (comment_idx != wcstring::npos && escaped_nl < comment_idx)) { + // Emit a space before the escaped newline. + if (!at_line_start() && !has_preceding_space()) { + output.append(L" "); + } + output.append(L"\\\n"); + } + } + + // It seems somewhat ambiguous whether we always get a newline after a comment. Ensure we + // always emit one. + bool needs_nl = false; + + tokenizer_t tokenizer(gap_text.c_str(), TOK_SHOW_COMMENTS | TOK_SHOW_BLANK_LINES); + while (maybe_t tok = tokenizer.next()) { + wcstring tok_text = tokenizer.text_of(*tok); + + if (needs_nl) { + emit_newline(); + needs_nl = false; + if (tok_text == L"\n") continue; + } else if (gap_text_mask_newline) { + // We only respect mask_newline the first time through the loop. + gap_text_mask_newline = false; + if (tok_text == L"\n") continue; + } + + if (tok->type == token_type_t::comment) { + emit_space_or_indent(); + output.append(tok_text); + needs_nl = true; + } else if (tok->type == token_type_t::end) { + // This may be either a newline or semicolon. + // Semicolons found here are not part of the ast and can simply be removed. + // Newlines are preserved unless mask_newline is set. + if (tok_text == L"\n") { + emit_newline(); + } + } else { + fprintf(stderr, + "Gap text should only have comments and newlines - instead found token " + "type %d with text: %ls\n", + (int)tok->type, tok_text.c_str()); + DIE("Gap text should only have comments and newlines"); + } + } + if (needs_nl) emit_newline(); } - // Append whitespace as necessary. If we have a newline, append the appropriate indent. - // Otherwise, append a space. - void append_whitespace(indent_t node_indent) { - if (needs_continuation_newline) { - append_newline(true); + /// \return the gap text ending at a given index into the string, or empty if none. + source_range_t gap_text_to(uint32_t end) const { + auto where = std::lower_bound( + gaps.begin(), gaps.end(), end, + [](source_range_t r, uint32_t end) { return r.start + r.length < end; }); + if (where == gaps.end() || where->start + where->length != end) { + // Not found. + return source_range_t{0, 0}; + } else { + return *where; } - if (!has_new_line) { - output.push_back(L' '); - } else if (do_indent) { - output.append((node_indent + line_continuation_indent) * SPACES_PER_INDENT, L' '); + } + + /// \return whether a range \p r overlaps an error range from our ast. + bool range_contained_error(source_range_t r) const { + const auto &errs = ast.extras().errors; + auto range_is_before = [](source_range_t x, source_range_t y) { + return x.start + x.length <= y.start; + }; + assert(std::is_sorted(errs.begin(), errs.end(), range_is_before) && + "Error ranges should be sorted"); + return std::binary_search(errs.begin(), errs.end(), r, range_is_before); + } + + // Emit the gap text before a source range. + void emit_gap_text_before(source_range_t r, gap_flags_t flags) { + assert(r.start <= source.size() && "source out of bounds"); + uint32_t start = r.start; + if (start < indents.size()) current_indent = indents.at(start); + + // Find the gap text which ends at start. + source_range_t range = gap_text_to(start); + if (range.length > 0) { + // If this range contained an error, append the gap text without modification. + // For example in: echo foo " + // We don't want to mess with the quote. + if (range_contained_error(range)) { + output.append(substr(range)); + } else { + emit_gap_text(substr(range), flags); + } } + // Always clear gap_text_mask_newline after emitting even empty gap text. + gap_text_mask_newline = false; + } + + /// Given a string \p input, remove unnecessary quotes, etc. + wcstring clean_text(const wcstring &input) { + // Unescape the string - this leaves special markers around if there are any + // expansions or anything. We specifically tell it to not compute backslash-escapes + // like \U or \x, because we want to leave them intact. + wcstring unescaped = input; + unescape_string_in_place(&unescaped, UNESCAPE_SPECIAL | UNESCAPE_NO_BACKSLASHES); + + // Remove INTERNAL_SEPARATOR because that's a quote. + auto quote = [](wchar_t ch) { return ch == INTERNAL_SEPARATOR; }; + unescaped.erase(std::remove_if(unescaped.begin(), unescaped.end(), quote), unescaped.end()); + + // If no non-"good" char is left, use the unescaped version. + // This can be extended to other characters, but giving the precise list is tough, + // can change over time (see "^", "%" and "?", in some cases "{}") and it just makes + // people feel more at ease. + auto goodchars = [](wchar_t ch) { + return fish_iswalnum(ch) || ch == L'_' || ch == L'-' || ch == L'/'; + }; + if (std::find_if_not(unescaped.begin(), unescaped.end(), goodchars) == unescaped.end() && + !unescaped.empty()) { + return unescaped; + } else { + return input; + } + } + + // Emit a range of original text. This indents as needed, and also inserts preceding gap text. + // If \p tolerate_line_splitting is set, then permit escaped newlines; otherwise collapse such + // lines. + void emit_text(source_range_t r, gap_flags_t flags) { + emit_gap_text_before(r, flags); + current_indent = indents.at(r.start); + if (r.length > 0) { + emit_space_or_indent(flags); + output.append(clean_text(substr(r))); + } + } + + template + void emit_node_text(const leaf_t &node) { + emit_text(node.range, gap_text_flags_before_node(node)); + } + + // Emit one newline. + void emit_newline() { output.push_back(L'\n'); } + + // Emit a semicolon. + void emit_semi() { output.push_back(L';'); } + + // For branch and list nodes, default is to visit their children. + template + enable_if_t visit(const Node &node) { + node_visitor(*this).accept_children_of(node); + } + + template + enable_if_t visit(const Node &node) { + node_visitor(*this).accept_children_of(node); + } + + // Leaf nodes we just visit their text. + void visit(const keyword_base_t &node) { emit_node_text(node); } + void visit(const token_base_t &node) { emit_node_text(node); } + void visit(const argument_t &node) { emit_node_text(node); } + void visit(const variable_assignment_t &node) { emit_node_text(node); } + + void visit(const semi_nl_t &node) { + // These are semicolons or newlines which are part of the ast. That means it includes e.g. + // ones terminating a job or 'if' header, but not random semis in job lists. We respect + // preferred_semi_locations to decide whether or not these should stay as newlines or + // become semicolons. + + // Check if we should prefer a semicolon. + bool prefer_semi = node.range.length > 0 && + std::binary_search(preferred_semi_locations.begin(), + preferred_semi_locations.end(), node.range.start); + emit_gap_text_before(node.range, gap_text_flags_before_node(node)); + + // Don't emit anything if the gap text put us on a newline (because it had a comment). + if (!at_line_start()) { + prefer_semi ? emit_semi() : emit_newline(); + + // If it was a semi but we emitted a newline, swallow a subsequent newline. + if (!prefer_semi && substr(node.range) == L";") { + gap_text_mask_newline = true; + } + } + } + + void visit(const redirection_t &node) { + // No space between a redirection operator and its target (#2899). + emit_text(node.oper.range, default_flags); + emit_text(node.target.range, skip_space); + } + + void visit(const maybe_newlines_t &node) { + // Our newlines may have comments embedded in them, example: + // cmd | + // # something + // cmd2 + // Treat it as gap text. + if (node.range.length > 0) { + auto flags = gap_text_flags_before_node(node); + current_indent = indents.at(node.range.start); + emit_gap_text_before(node.range, flags); + wcstring text = source.substr(node.range.start, node.range.length); + emit_gap_text(text, flags); + } + } + + void visit(const begin_header_t &node) { + // 'begin' does not require a newline after it, but we insert one. + node_visitor(*this).accept_children_of(node); + if (!at_line_start()) { + emit_newline(); + } + } + + // The flags we use to parse. + static parse_tree_flags_t parse_flags() { + return parse_flag_continue_after_error | parse_flag_include_comments | + parse_flag_leave_unterminated | parse_flag_show_blank_lines; } }; - -// Dump a parse tree node in a form helpful to someone debugging the behavior of this program. -static void dump_node(indent_t node_indent, const parse_node_t &node, const wcstring &source) { - wchar_t nextc = L' '; - wchar_t prevc = L' '; - wcstring source_txt; - if (node.source_start != SOURCE_OFFSET_INVALID && node.source_length != SOURCE_OFFSET_INVALID) { - int nextc_idx = node.source_start + node.source_length; - if (static_cast(nextc_idx) < source.size()) { - nextc = source[node.source_start + node.source_length]; - } - if (node.source_start > 0) prevc = source[node.source_start - 1]; - source_txt = source.substr(node.source_start, node.source_length); - } - wchar_t prevc_str[4] = {prevc, 0, 0, 0}; - wchar_t nextc_str[4] = {nextc, 0, 0, 0}; - if (prevc < L' ') { - prevc_str[0] = L'\\'; - prevc_str[1] = L'c'; - prevc_str[2] = prevc + '@'; - } - if (nextc < L' ') { - nextc_str[0] = L'\\'; - nextc_str[1] = L'c'; - nextc_str[2] = nextc + '@'; - } - std::fwprintf(stderr, L"{off %4u, len %4u, indent %2u, kw %ls, %ls} [%ls|%ls|%ls]\n", - node.source_start, node.source_length, node_indent, - keyword_description(node.keyword), token_type_description(node.type), prevc_str, - source_txt.c_str(), nextc_str); -} - -void prettifier_t::prettify_node(const parse_node_tree_t &tree, node_offset_t node_idx, - indent_t node_indent, parse_token_type_t parent_type) { - // Use an explicit stack to avoid stack overflow. - struct pending_node_t { - node_offset_t index; - indent_t indent; - parse_token_type_t parent_type; - }; - std::stack pending_node_stack; - - pending_node_stack.push({node_idx, node_indent, parent_type}); - while (!pending_node_stack.empty()) { - pending_node_t args = pending_node_stack.top(); - pending_node_stack.pop(); - auto node_idx = args.index; - auto node_indent = args.indent; - auto parent_type = args.parent_type; - - const parse_node_t &node = tree.at(node_idx); - const parse_token_type_t node_type = node.type; - const parse_token_type_t prev_node_type = - node_idx > 0 ? tree.at(node_idx - 1).type : token_type_invalid; - - // Increment the indent if we are either a root job_list, or root case_item_list, or in an - // if or while header (#1665). - const bool is_root_job_list = - node_type == symbol_job_list && parent_type != symbol_job_list; - const bool is_root_case_list = - node_type == symbol_case_item_list && parent_type != symbol_case_item_list; - const bool is_if_while_header = - (node_type == symbol_job_conjunction || node_type == symbol_andor_job_list) && - (parent_type == symbol_if_clause || parent_type == symbol_while_header); - - if (is_root_job_list || is_root_case_list || is_if_while_header) { - node_indent += 1; - } - - if (dump_parse_tree) dump_node(node_indent, node, source); - - // Prepend any escaped newline, but only for certain cases. - // We allow it to split arguments (including at the end - this is like trailing commas in - // lists, makes for better diffs), to separate pipelines (but it has to be *before* the - // pipe, so the pipe symbol is the first thing on the new line after the indent) and to - // separate &&/|| job lists (`and` and `or` are handled separately below, as they *allow* - // semicolons) - // TODO: Handle - // foo | \ - // bar - // so it just removes the escape - pipes don't need it. This was changed in some fish - // version, figure out which it was and if it is worth supporting. - if (prev_node_type == symbol_arguments_or_redirections_list || - prev_node_type == symbol_argument_list || node_type == parse_token_type_andand || - node_type == parse_token_type_pipe || node_type == parse_token_type_end) { - maybe_prepend_escaped_newline(node); - } - - // handle comments, which come before the text - if (node.has_comments()) { - auto comment_nodes = tree.comment_nodes_for_node(node); - for (const auto &comment : comment_nodes) { - maybe_prepend_escaped_newline(*comment.node()); - append_whitespace(node_indent); - auto source_range = comment.source_range(); - output.append(source, source_range->start, source_range->length); - needs_continuation_newline = true; - } - } - - if (node_type == parse_token_type_end) { - // For historical reasons, semicolon also get "TOK_END". - // We need to distinguish between them, because otherwise `a;;;;` gets extra lines - // instead of the semicolons. Semicolons are just ignored, unless they are followed by a - // command. So `echo;` removes the semicolon, but `echo; echo` removes it and adds a - // newline. - last_was_semicolon = false; - if (node.get_source(source) == L"\n") { - append_newline(); - } else if (!has_new_line) { - // The semicolon is only useful if we haven't just had a newline. - last_was_semicolon = true; - } - } else if ((node_type >= FIRST_PARSE_TOKEN_TYPE && node_type <= LAST_PARSE_TOKEN_TYPE) || - node_type == parse_special_type_parse_error) { - if (last_was_semicolon) { - // We keep the semicolon for `; and` and `; or`, - // others we turn into newlines. - if (node.keyword != parse_keyword_t::kw_and && - node.keyword != parse_keyword_t::kw_or) { - append_newline(); - } else { - output.push_back(L';'); - } - last_was_semicolon = false; - } - - if (node.has_source()) { - // Some type representing a particular token. - if (prev_node_type != parse_token_type_redirection) { - append_whitespace(node_indent); - } - wcstring unescaped{source, node.source_start, node.source_length}; - // Unescape the string - this leaves special markers around if there are any - // expansions or anything. We specifically tell it to not compute backslash-escapes - // like \U or \x, because we want to leave them intact. - unescape_string_in_place(&unescaped, UNESCAPE_SPECIAL | UNESCAPE_NO_BACKSLASHES); - - // Remove INTERNAL_SEPARATOR because that's a quote. - auto quote = [](wchar_t ch) { return ch == INTERNAL_SEPARATOR; }; - unescaped.erase(std::remove_if(unescaped.begin(), unescaped.end(), quote), - unescaped.end()); - - // If no non-"good" char is left, use the unescaped version. - // This can be extended to other characters, but giving the precise list is tough, - // can change over time (see "^", "%" and "?", in some cases "{}") and it just makes - // people feel more at ease. - auto goodchars = [](wchar_t ch) { - return fish_iswalnum(ch) || ch == L'_' || ch == L'-' || ch == L'/'; - }; - if (std::find_if_not(unescaped.begin(), unescaped.end(), goodchars) == - unescaped.end() && - !unescaped.empty()) { - output.append(unescaped); - } else { - output.append(source, node.source_start, node.source_length); - } - has_new_line = false; - } - } - - // Put all children in stack in reversed order - // This way they will be processed in correct order. - for (node_offset_t idx = node.child_count; idx > 0; idx--) { - // Note: We pass our type to our child, which becomes its parent node type. - // Note: While node.child_start could be -1 (NODE_OFFSET_INVALID) the addition is safe - // because we won't execute this call in that case since node.child_count should be - // zero. - pending_node_stack.push({node.child_start + (idx - 1), node_indent, node_type}); - } - } -} +} // namespace static const char *highlight_role_to_string(highlight_role_t role) { #define TEST_ROLE(x) \ @@ -395,17 +701,7 @@ static std::string make_pygments_csv(const wcstring &src) { // Entry point for prettification. static wcstring prettify(const wcstring &src, bool do_indent) { - parse_node_tree_t parse_tree; - int parse_flags = (parse_flag_continue_after_error | parse_flag_include_comments | - parse_flag_leave_unterminated | parse_flag_show_blank_lines); - if (!parse_tree_from_string(src, parse_flags, &parse_tree, nullptr)) { - return src; // we return the original string on failure - } - if (dump_parse_tree) { - const wcstring dump = parse_dump_tree(parse_tree, src); - std::fwprintf(stderr, L"%ls\n", dump.c_str()); - auto ast = ast::ast_t::parse(src, parse_flag_leave_unterminated | parse_flag_include_comments | parse_flag_show_extra_semis); @@ -413,17 +709,9 @@ static wcstring prettify(const wcstring &src, bool do_indent) { std::fwprintf(stderr, L"%ls\n", ast_dump.c_str()); } - // We may have a forest of disconnected trees on a parse failure. We have to handle all nodes - // that have no parent, and all parse errors. - prettifier_t prettifier{src, do_indent}; - for (node_offset_t i = 0; i < parse_tree.size(); i++) { - const parse_node_t &node = parse_tree.at(i); - if (node.parent == NODE_OFFSET_INVALID || node.type == parse_special_type_parse_error) { - // A root node. - prettifier.prettify_node(parse_tree, i, 0, symbol_job_list); - } - } - return std::move(prettifier.output); + pretty_printer_t printer{src, do_indent}; + wcstring output = printer.prettify(); + return output; } /// Given a string and list of colors of the same size, return the string with HTML span elements diff --git a/tests/checks/indent.fish b/tests/checks/indent.fish index e3a41f93f..f6d1ff98d 100644 --- a/tests/checks/indent.fish +++ b/tests/checks/indent.fish @@ -49,7 +49,7 @@ end' | $fish_indent #CHECK: c #CHECK: echo thing #CHECK: end - + echo 'echo foo | echo banana' | $fish_indent #CHECK: echo foo | @@ -57,12 +57,11 @@ echo banana' | $fish_indent echo 'echo foo \\ ;' | $fish_indent -#CHECK: echo foo \ -#CHECK: +#CHECK: echo foo echo 'echo foo \\ ' | $fish_indent -#CHECK: echo foo \ +#CHECK: echo foo echo -n ' begin @@ -201,9 +200,9 @@ end; echo alpha " #CHECK: begin #CHECK: {{ }}echo hi #CHECK: else +#CHECK: #CHECK: {{^}}echo bye -#CHECK: end -#CHECK: echo alpha " +#CHECK: end; echo alpha " # issue 1665 echo -n ' @@ -285,7 +284,7 @@ echo bye #CHECK: #CHECK: echo hi | #CHECK: -#CHECK: echo bye +#CHECK: {{ }}echo bye echo 'a;;;;;;' | $fish_indent #CHECK: a From 6c6088f45c65a70e1639eba1b564f6050b866343 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 2 Jul 2020 00:12:28 -0700 Subject: [PATCH 11/13] Adopt the new AST in fish_tests This switches fish_tests from parse_tree to the new AST. --- src/fish_tests.cpp | 99 +++++++++++++++++++++++++--------------------- 1 file changed, 54 insertions(+), 45 deletions(-) diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index 08e14bfce..e61046d5a 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -4350,12 +4350,12 @@ static void test_new_parser_correctness() { {L"true || false; and true", true}, {L"true || ||", false}, {L"|| true", false}, - {L"true || \n\n false", true}, + {L"true || \n\n false", false}, }; for (const auto &test : parser_tests) { - parse_node_tree_t parse_tree; - bool success = parse_tree_from_string(test.src, parse_flag_none, &parse_tree, NULL); + auto ast = ast::ast_t::parse(test.src); + bool success = !ast.errored(); if (success && !test.ok) { err(L"\"%ls\" should NOT have parsed, but did", test.src); } else if (!success && test.ok) { @@ -4384,7 +4384,7 @@ static inline bool string_for_permutation(const wcstring *fuzzes, size_t fuzz_co } static void test_new_parser_fuzzing() { - say(L"Fuzzing parser (node size: %lu)", sizeof(parse_node_t)); + say(L"Fuzzing parser"); const wcstring fuzzes[] = { L"if", L"else", L"for", L"in", L"while", L"begin", L"function", L"switch", L"case", L"end", L"and", L"or", L"not", L"command", @@ -4395,7 +4395,6 @@ static void test_new_parser_fuzzing() { wcstring src; src.reserve(128); - parse_node_tree_t node_tree; parse_error_list_t errors; double start = timef(); @@ -4409,7 +4408,7 @@ static void test_new_parser_fuzzing() { unsigned long permutation = 0; while (string_for_permutation(fuzzes, sizeof fuzzes / sizeof *fuzzes, len, permutation++, &src)) { - parse_tree_from_string(src, parse_flag_continue_after_error, &node_tree, &errors); + ast::ast_t::parse(src); } if (log_it) std::fwprintf(stderr, L"done (%lu)\n", permutation); } @@ -4421,33 +4420,36 @@ static void test_new_parser_fuzzing() { // true if successful. static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *out_joined_args, enum parse_statement_decoration_t *out_deco) { + using namespace ast; out_cmd->clear(); out_joined_args->clear(); *out_deco = parse_statement_decoration_none; - parse_node_tree_t tree; - if (!parse_tree_from_string(src, parse_flag_none, &tree, NULL)) { - return false; - } + auto ast = ast_t::parse(src); + if (ast.errored()) return false; // Get the statement. Should only have one. - tnode_t job_list{&tree, &tree.at(0)}; - auto stmts = job_list.descendants(); - if (stmts.size() != 1) { - say(L"Unexpected number of statements (%lu) found in '%ls'", stmts.size(), src.c_str()); - return false; + const decorated_statement_t *statement = nullptr; + for (const auto &n : ast) { + if (const auto *tmp = n.try_as()) { + if (statement) { + say(L"More than one decorated statement found in '%ls'", src.c_str()); + return false; + } + statement = tmp; + } } - tnode_t stmt = stmts.at(0); // Return its decoration and command. - *out_deco = get_decoration(stmt); - *out_cmd = *command_for_plain_statement(stmt, src); + *out_deco = statement->decoration(); + *out_cmd = statement->command.source(src); // Return arguments separated by spaces. bool first = true; - for (auto arg_node : stmt.descendants()) { + for (const ast::argument_or_redirection_t &arg : statement->args_or_redirs) { + if (!arg.is_argument()) continue; if (!first) out_joined_args->push_back(L' '); - out_joined_args->append(arg_node.get_source(src)); + out_joined_args->append(arg.source(src)); first = false; } @@ -4456,19 +4458,22 @@ static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *o // Verify that 'function -h' and 'function --help' are plain statements but 'function --foo' is // not (issue #1240). -template +template static void check_function_help(const wchar_t *src) { - parse_node_tree_t tree; - if (!parse_tree_from_string(src, parse_flag_none, &tree, NULL)) { + using namespace ast; + auto ast = ast_t::parse(src); + if (ast.errored()) { err(L"Failed to parse '%ls'", src); } - tnode_t node{&tree, &tree.at(0)}; - auto node_list = node.descendants(); - if (node_list.size() == 0) { - err(L"Failed to find node of type '%ls'", token_type_description(Type::token)); - } else if (node_list.size() > 1) { - err(L"Found too many nodes of type '%ls'", token_type_description(Type::token)); + int count = 0; + for (const node_t &node : ast) { + count += (node.type == Type); + } + if (count == 0) { + err(L"Failed to find node of type '%ls'", ast_type_to_string(Type)); + } else if (count > 1) { + err(L"Found too many nodes of type '%ls'", ast_type_to_string(Type)); } } @@ -4515,30 +4520,32 @@ static void test_new_parser_ll2() { test.src.c_str(), (int)test.deco, (int)deco, (long)__LINE__); } - check_function_help(L"function -h"); - check_function_help(L"function --help"); - check_function_help(L"function --foo; end"); - check_function_help(L"function foo; end"); + check_function_help(L"function -h"); + check_function_help(L"function --help"); + check_function_help(L"function --foo; end"); + check_function_help(L"function foo; end"); } static void test_new_parser_ad_hoc() { + using namespace ast; // Very ad-hoc tests for issues encountered. say(L"Testing new parser ad hoc tests"); // Ensure that 'case' terminates a job list. const wcstring src = L"switch foo ; case bar; case baz; end"; - parse_node_tree_t parse_tree; - bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, NULL); - if (!success) { + auto ast = ast_t::parse(src); + if (ast.errored()) { err(L"Parsing failed"); } - // Expect three case_item_lists: one for each case, and a terminal one. The bug was that we'd + // Expect two case_item_lists. The bug was that we'd // try to run a command 'case'. - tnode_t root{&parse_tree, &parse_tree.at(0)}; - auto node_list = root.descendants(); - if (node_list.size() != 3) { - err(L"Expected 3 case item nodes, found %lu", node_list.size()); + int count = 0; + for (const auto &n : ast) { + count += (n.type == type_t::case_item); + } + if (count != 2) { + err(L"Expected 2 case item nodes, found %d", count); } } @@ -4559,7 +4566,7 @@ static void test_new_parser_errors() { {L"if true ; end ; else", parse_error_unbalancing_else}, {L"case", parse_error_unbalancing_case}, - {L"if true ; case ; end", parse_error_unbalancing_case}, + {L"if true ; case ; end", parse_error_generic}, }; for (const auto &test : tests) { @@ -4567,15 +4574,17 @@ static void test_new_parser_errors() { parse_error_code_t expected_code = test.code; parse_error_list_t errors; - parse_node_tree_t parse_tree; - bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, &errors); - if (success) { + auto ast = ast::ast_t::parse(src, parse_flag_none, &errors); + if (!ast.errored()) { err(L"Source '%ls' was expected to fail to parse, but succeeded", src.c_str()); } if (errors.size() != 1) { err(L"Source '%ls' was expected to produce 1 error, but instead produced %lu errors", src.c_str(), errors.size()); + for (const auto &err : errors) { + fprintf(stderr, "%ls\n", err.describe(src, false).c_str()); + } } else if (errors.at(0).code != expected_code) { err(L"Source '%ls' was expected to produce error code %lu, but instead produced error " L"code %lu", From 3534c07584fec4fc7d69842ce4dc32ca8c407f16 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Fri, 3 Jul 2020 11:16:51 -0700 Subject: [PATCH 12/13] Adopt the new AST in parse_execution parse_execution is what turns a parsed tree into jobs, etc. Switch it from parse_tree to the new AST. --- src/builtin_function.cpp | 5 +- src/builtin_function.h | 7 +- src/exec.cpp | 6 +- src/fish_tests.cpp | 2 + src/function.cpp | 17 +- src/function.h | 6 +- src/parse_execution.cpp | 756 ++++++++++++++++++++++----------------- src/parse_execution.h | 89 ++--- src/parse_tree.cpp | 14 +- src/parse_tree.h | 15 +- src/parse_util.cpp | 4 +- src/parser.cpp | 16 +- src/parser.h | 4 +- src/proc.h | 8 +- 14 files changed, 530 insertions(+), 419 deletions(-) diff --git a/src/builtin_function.cpp b/src/builtin_function.cpp index 28b40706d..5300b3e07 100644 --- a/src/builtin_function.cpp +++ b/src/builtin_function.cpp @@ -200,8 +200,7 @@ static int validate_function_name(int argc, const wchar_t *const *argv, wcstring /// Define a function. Calls into `function.cpp` to perform the heavy lifting of defining a /// function. int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_list_t &c_args, - const parsed_source_ref_t &source, - tnode_t func_node) { + const parsed_source_ref_t &source, const ast::block_statement_t &func_node) { assert(source && "Missing source in builtin_function"); // The wgetopt function expects 'function' as the first argument. Make a new wcstring_list with // that property. This is needed because this builtin has a different signature than the other @@ -252,7 +251,7 @@ int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_lis props->shadow_scope = opts.shadow_scope; props->named_arguments = std::move(opts.named_arguments); props->parsed_source = source; - props->func_node = func_node; + props->func_node = &func_node; // Populate inherit_vars. for (const wcstring &name : opts.inherit_vars) { diff --git a/src/builtin_function.h b/src/builtin_function.h index 9499a9a9f..4da1a378c 100644 --- a/src/builtin_function.h +++ b/src/builtin_function.h @@ -8,7 +8,10 @@ class parser_t; struct io_streams_t; +namespace ast { +struct block_statement_t; +} + int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_list_t &c_args, - const parsed_source_ref_t &source, - tnode_t func_node); + const parsed_source_ref_t &source, const ast::block_statement_t &func_node); #endif diff --git a/src/exec.cpp b/src/exec.cpp index bca3f90cd..45ce0cae1 100644 --- a/src/exec.cpp +++ b/src/exec.cpp @@ -623,10 +623,10 @@ static proc_performer_t get_performer_for_process(process_t *p, job_t *job, if (p->type == process_type_t::block_node) { const parsed_source_ref_t &source = p->block_node_source; - tnode_t node = p->internal_block_node; + const ast::statement_t *node = p->internal_block_node; assert(source && node && "Process is missing node info"); return [=](parser_t &parser) { - return parser.eval_node(source, node, io_chain, job_group).status; + return parser.eval_node(source, *node, io_chain, job_group).status; }; } else { assert(p->type == process_type_t::function); @@ -638,7 +638,7 @@ static proc_performer_t get_performer_for_process(process_t *p, job_t *job, auto argv = move_to_sharedptr(p->get_argv_array().to_list()); return [=](parser_t &parser) { // Pull out the job list from the function. - tnode_t body = props->func_node.child<1>(); + const ast::job_list_t &body = props->func_node->jobs; const block_t *fb = function_prepare_environment(parser, *argv, *props); auto res = parser.eval_node(props->parsed_source, body, io_chain, job_group); function_restore_environment(parser, fb); diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index e61046d5a..5aa171a8b 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -4567,6 +4567,8 @@ static void test_new_parser_errors() { {L"case", parse_error_unbalancing_case}, {L"if true ; case ; end", parse_error_generic}, + + {L"true | and", parse_error_andor_in_pipeline}, }; for (const auto &test : tests) { diff --git a/src/function.cpp b/src/function.cpp index e841d55fc..7ec28275e 100644 --- a/src/function.cpp +++ b/src/function.cpp @@ -224,17 +224,14 @@ bool function_get_definition(const wcstring &name, wcstring &out_definition) { const function_info_t *func = funcset->get_info(name); if (!func || !func->props) return false; // We want to preserve comments that the AST attaches to the header (#5285). - // Take everything from the end of the header to the end of the body. + // Take everything from the end of the header to the 'end' keyword. const auto &props = func->props; - namespace g = grammar; - tnode_t header = props->func_node.child<0>(); - tnode_t jobs = props->func_node.child<1>(); - auto header_src = header.source_range(); - auto jobs_src = jobs.source_range(); - if (header_src && jobs_src) { + auto header_src = props->func_node->header->try_source_range(); + auto end_kw_src = props->func_node->end.try_source_range(); + if (header_src && end_kw_src) { uint32_t body_start = header_src->start + header_src->length; - uint32_t body_end = jobs_src->start + jobs_src->length; - assert(body_start <= jobs_src->start && "job list must come after header"); + uint32_t body_end = end_kw_src->start; + assert(body_start <= body_end && "end keyword should come after header"); out_definition = wcstring(props->parsed_source->src, body_start, body_end - body_start); } return true; @@ -313,7 +310,7 @@ int function_get_definition_lineno(const wcstring &name) { // return one plus the number of newlines at offsets less than the start of our function's // statement (which includes the header). // TODO: merge with line_offset_of_character_at_offset? - auto source_range = func->props->func_node.source_range(); + auto source_range = func->props->func_node->try_source_range(); assert(source_range && "Function has no source range"); uint32_t func_start = source_range->start; const wcstring &source = func->props->parsed_source->src; diff --git a/src/function.h b/src/function.h index 3f612efab..2de5f3081 100644 --- a/src/function.h +++ b/src/function.h @@ -15,6 +15,10 @@ class parser_t; +namespace ast { +struct block_statement_t; +} + /// A function's constant properties. These do not change once initialized. struct function_properties_t { /// Parsed source containing the function. @@ -23,7 +27,7 @@ struct function_properties_t { /// Node containing the function statement, pointing into parsed_source. /// We store block_statement, not job_list, so that comments attached to the header are /// preserved. - tnode_t func_node; + const ast::block_statement_t *func_node; /// List of all named arguments for this function. wcstring_list_t named_arguments; diff --git a/src/parse_execution.cpp b/src/parse_execution.cpp index ef3b77f6b..22f4669e4 100644 --- a/src/parse_execution.cpp +++ b/src/parse_execution.cpp @@ -25,6 +25,7 @@ #include #include +#include "ast.h" #include "builtin.h" #include "builtin_function.h" #include "common.h" @@ -44,40 +45,74 @@ #include "proc.h" #include "reader.h" #include "timer.h" -#include "tnode.h" #include "tokenizer.h" #include "trace.h" #include "util.h" #include "wildcard.h" #include "wutil.h" -namespace g = grammar; - /// These are the specific statement types that support redirections. -static constexpr bool type_is_redirectable_block(parse_token_type_t type) { - return type == symbol_block_statement || type == symbol_if_statement || - type == symbol_switch_statement; +static constexpr bool type_is_redirectable_block(ast::type_t type) { + using t = ast::type_t; + return type == t::block_statement || type == t::if_statement || type == t::switch_statement; } -static bool specific_statement_type_is_redirectable_block(const parse_node_t &node) { +static bool specific_statement_type_is_redirectable_block(const ast::node_t &node) { return type_is_redirectable_block(node.type); } /// Get the name of a redirectable block, for profiling purposes. -static wcstring profiling_cmd_name_for_redirectable_block(const parse_node_t &node, - const parse_node_tree_t &tree, - const wcstring &src) { +static wcstring profiling_cmd_name_for_redirectable_block(const ast::node_t &node, + const parsed_source_t &pstree) { + using namespace ast; assert(specific_statement_type_is_redirectable_block(node)); - assert(node.has_source()); + + auto source_range = node.try_source_range(); + assert(source_range.has_value() && "No source range for block"); + + size_t src_end = 0; + switch (node.type) { + case type_t::block_statement: { + const node_t *block_header = node.as()->header.get(); + switch (block_header->type) { + case type_t::for_header: + src_end = block_header->as()->semi_nl.source_range().start; + break; + + case type_t::while_header: + src_end = block_header->as()->condition.source_range().end(); + break; + + case type_t::function_header: + src_end = block_header->as()->semi_nl.source_range().start; + break; + + case type_t::begin_header: + src_end = block_header->as()->kw_begin.source_range().end(); + break; + + default: + DIE("Unexpected block header type"); + } + } break; + + case type_t::if_statement: + src_end = node.as()->if_clause.condition.source_range().end(); + break; + + case type_t::switch_statement: + src_end = node.as()->semi_nl.source_range().start; + break; + + default: + DIE("Not a redirectable block type"); + break; + } + + assert(src_end >= source_range->start && "Invalid source end"); // Get the source for the block, and cut it at the next statement terminator. - const size_t src_start = node.source_start; - - auto term = tree.find_child(node); - assert(term.has_source() && term.source_range()->start >= src_start); - size_t src_len = term.source_range()->start - src_start; - - wcstring result = wcstring(src, src_start, src_len); + wcstring result = pstree.src.substr(source_range->start, src_end - source_range->start); result.append(L"..."); return result; } @@ -98,12 +133,13 @@ parse_execution_context_t::parse_execution_context_t(parsed_source_ref_t pstree, // Utilities -wcstring parse_execution_context_t::get_source(const parse_node_t &node) const { - return node.get_source(pstree->src); +wcstring parse_execution_context_t::get_source(const ast::node_t &node) const { + return node.source(pstree->src); } -tnode_t parse_execution_context_t::infinite_recursive_statement_in_job_list( - tnode_t job_list, wcstring *out_func_name) const { +const ast::decorated_statement_t * +parse_execution_context_t::infinite_recursive_statement_in_job_list(const ast::job_list_t &jobs, + wcstring *out_func_name) const { // This is a bit fragile. It is a test to see if we are inside of function call, but not inside // a block in that function call. If, in the future, the rules for what block scopes are pushed // on function invocation changes, then this check will break. @@ -111,60 +147,67 @@ tnode_t parse_execution_context_t::infinite_recursive_statem bool is_within_function_call = (current && parent && current->type() == block_type_t::top && parent->is_function_call()); if (!is_within_function_call) { - return {}; + return nullptr; } // Get the function name of the immediate block. const wcstring &forbidden_function_name = parent->function_name; // Get the first job in the job list. - tnode_t first_job = job_list.try_get_child().child<0>(); - if (!first_job) { - return {}; - } + const ast::job_conjunction_t *jc = jobs.at(0); + if (!jc) return nullptr; + const ast::job_t *job = &jc->job; - // Here's the statement node we find that's infinite recursive. - tnode_t infinite_recursive_statement; + // Helper to return if a statement is infinitely recursive in this function. + auto statement_recurses = + [&](const ast::statement_t &stat) -> const ast::decorated_statement_t * { + // Ignore non-decorated statements like `if`, etc. + const ast::decorated_statement_t *dc = + stat.contents.contents->try_as(); + if (!dc) return nullptr; - // Ignore the jobs variable assigment and "time" prefixes. - tnode_t statement = first_job.child<2>(); - tnode_t continuation = first_job.child<3>(); - const null_environment_t nullenv{}; - while (statement) { - // Get the list of plain statements. // Ignore statements with decorations like 'builtin' or 'command', since those // are not infinite recursion. In particular that is what enables 'wrapper functions'. - tnode_t plain_statement = - statement.try_get_child() - .try_get_child(); - if (plain_statement) { - maybe_t cmd = command_for_plain_statement(plain_statement, pstree->src); - if (cmd && - expand_one(*cmd, {expand_flag::skip_cmdsubst, expand_flag::skip_variables}, ctx) && - cmd == forbidden_function_name) { - // This is it. - infinite_recursive_statement = plain_statement; - if (out_func_name != nullptr) { - *out_func_name = forbidden_function_name; - } + if (dc->decoration() != parse_statement_decoration_none) return nullptr; + + // Check the command. + wcstring cmd = dc->command.source(pstree->src); + bool forbidden = + !cmd.empty() && + expand_one(cmd, {expand_flag::skip_cmdsubst, expand_flag::skip_variables}, ctx) && + cmd == forbidden_function_name; + return forbidden ? dc : nullptr; + }; + + const ast::decorated_statement_t *infinite_recursive_statement = nullptr; + + // Check main statement. + infinite_recursive_statement = statement_recurses(jc->job.statement); + + // Check piped remainder. + if (!infinite_recursive_statement) { + for (const ast::job_continuation_t &c : job->continuation) { + if (const auto *s = statement_recurses(c.statement)) { + infinite_recursive_statement = s; break; } } - statement = continuation.next_in_list(); } + if (infinite_recursive_statement && out_func_name) { + *out_func_name = forbidden_function_name; + } + // may be null return infinite_recursive_statement; } process_type_t parse_execution_context_t::process_type_for_command( - tnode_t statement, const wcstring &cmd) const { + const ast::decorated_statement_t &statement, const wcstring &cmd) const { enum process_type_t process_type = process_type_t::external; // Determine the process type, which depends on the statement decoration (command, builtin, // etc). - enum parse_statement_decoration_t decoration = get_decoration(statement); - - switch (decoration) { + switch (statement.decoration()) { case parse_statement_decoration_exec: process_type = process_type_t::exec; break; @@ -209,31 +252,33 @@ maybe_t parse_execution_context_t::check_end_execution() } /// Return whether the job contains a single statement, of block type, with no redirections. -bool parse_execution_context_t::job_is_simple_block(tnode_t job_node) const { - tnode_t statement = job_node.child<2>(); - +bool parse_execution_context_t::job_is_simple_block(const ast::job_t &job) const { + using namespace ast; // Must be no pipes. - if (job_node.child<3>().try_get_child()) { + if (!job.continuation.empty()) { return false; } - // Helper to check if an argument or redirection list has no redirections. - auto is_empty = [](tnode_t lst) -> bool { - return !lst.next_in_list(); + // Helper to check if an argument_or_redirection_list_t has no redirections. + auto no_redirs = [](const argument_or_redirection_list_t &list) -> bool { + for (const argument_or_redirection_t &val : list) { + if (val.is_redirection()) return false; + } + return true; }; // Check if we're a block statement with redirections. We do it this obnoxious way to preserve // type safety (in case we add more specific statement types). - const parse_node_t &specific_statement = statement.get_child_node<0>(); - switch (specific_statement.type) { - case symbol_block_statement: - return is_empty(statement.require_get_child().child<3>()); - case symbol_switch_statement: - return is_empty(statement.require_get_child().child<5>()); - case symbol_if_statement: - return is_empty(statement.require_get_child().child<3>()); - case symbol_not_statement: - case symbol_decorated_statement: + const node_t &ss = *job.statement.contents.contents; + switch (ss.type) { + case type_t::block_statement: + return no_redirs(ss.as()->args_or_redirs); + case type_t::switch_statement: + return no_redirs(ss.as()->args_or_redirs); + case type_t::if_statement: + return no_redirs(ss.as()->args_or_redirs); + case type_t::not_statement: + case type_t::decorated_statement: // not block statements return false; default: @@ -243,14 +288,19 @@ bool parse_execution_context_t::job_is_simple_block(tnode_t job_node) co } end_execution_reason_t parse_execution_context_t::run_if_statement( - tnode_t statement, const block_t *associated_block) { + const ast::if_statement_t &statement, const block_t *associated_block) { + using namespace ast; + using job_list_t = ast::job_list_t; end_execution_reason_t result = end_execution_reason_t::ok; // We have a sequence of if clauses, with a final else, resulting in a single job list that we // execute. - tnode_t job_list_to_execute; - tnode_t if_clause = statement.child<0>(); - tnode_t else_clause = statement.child<1>(); + const job_list_t *job_list_to_execute = nullptr; + const if_clause_t *if_clause = &statement.if_clause; + + // Index of the *next* elseif_clause to test. + const elseif_clause_list_t &elseif_clauses = statement.elseif_clauses; + size_t next_elseif_idx = 0; // We start with the 'if'. trace_if_enabled(*parser, L"if"); @@ -262,59 +312,54 @@ end_execution_reason_t parse_execution_context_t::run_if_statement( } // An if condition has a job and a "tail" of andor jobs, e.g. "foo ; and bar; or baz". - tnode_t condition_head = if_clause.child<1>(); - tnode_t condition_boolean_tail = if_clause.child<3>(); - // Check the condition and the tail. We treat end_execution_reason_t::error here as failure, // in accordance with historic behavior. - end_execution_reason_t cond_ret = run_job_conjunction(condition_head, associated_block); + end_execution_reason_t cond_ret = + run_job_conjunction(if_clause->condition, associated_block); if (cond_ret == end_execution_reason_t::ok) { - cond_ret = run_job_list(condition_boolean_tail, associated_block); + cond_ret = run_job_list(if_clause->andor_tail, associated_block); } const bool take_branch = (cond_ret == end_execution_reason_t::ok) && parser->get_last_status() == EXIT_SUCCESS; if (take_branch) { // Condition succeeded. - job_list_to_execute = if_clause.child<4>(); + job_list_to_execute = &if_clause->body; break; } - auto else_cont = else_clause.try_get_child(); - if (!else_cont) { - // 'if' condition failed, no else clause, return 0, we're done. - parser->set_last_statuses(statuses_t::just(STATUS_CMD_OK)); - break; + + // See if we have an elseif. + const auto *elseif_clause = elseif_clauses.at(next_elseif_idx++); + if (elseif_clause) { + trace_if_enabled(*parser, L"else if"); + if_clause = &elseif_clause->if_clause; } else { - // We have an 'else continuation' (either else-if or else). - if (auto maybe_if_clause = else_cont.try_get_child()) { - // it's an 'else if', go to the next one. - if_clause = maybe_if_clause; - else_clause = else_cont.try_get_child(); - assert(else_clause && "Expected to have an else clause"); - trace_if_enabled(*parser, L"else if"); - } else { - // It's the final 'else', we're done. - job_list_to_execute = else_cont.try_get_child(); - assert(job_list_to_execute && "Should have a job list"); - trace_if_enabled(*parser, L"else"); - break; - } + break; } } - // Execute any job list we got. - if (job_list_to_execute) { + if (!job_list_to_execute) { + // our ifs and elseifs failed. + // Check our else body. + if (statement.else_clause) { + trace_if_enabled(*parser, L"else"); + job_list_to_execute = &statement.else_clause->body; + } + } + + if (!job_list_to_execute) { + // 'if' condition failed, no else clause, return 0, we're done. + // No job list means no successful conditions, so return 0 (issue #1443). + parser->set_last_statuses(statuses_t::just(STATUS_CMD_OK)); + } else { + // Execute the job list we got. block_t *ib = parser->push_block(block_t::if_block()); - run_job_list(job_list_to_execute, ib); + run_job_list(*job_list_to_execute, ib); if (auto ret = check_end_execution()) { result = *ret; } parser->pop_block(ib); - } else { - // No job list means no successful conditions, so return 0 (issue #1443). - parser->set_last_statuses(statuses_t::just(STATUS_CMD_OK)); } - trace_if_enabled(*parser, L"end if"); // It's possible there's a last-minute cancellation (issue #1297). @@ -327,7 +372,7 @@ end_execution_reason_t parse_execution_context_t::run_if_statement( } end_execution_reason_t parse_execution_context_t::run_begin_statement( - tnode_t contents) { + const ast::job_list_t &contents) { // Basic begin/end block. Push a scope block, run jobs, pop it trace_if_enabled(*parser, L"begin"); block_t *sb = parser->push_block(block_t::scope_block(block_type_t::begin)); @@ -339,10 +384,12 @@ end_execution_reason_t parse_execution_context_t::run_begin_statement( // Define a function. end_execution_reason_t parse_execution_context_t::run_function_statement( - tnode_t statement, tnode_t header) { + const ast::block_statement_t &statement, const ast::function_header_t &header) { + using namespace ast; // Get arguments. wcstring_list_t arguments; - argument_node_list_t arg_nodes = header.descendants(); + ast_args_list_t arg_nodes = get_argument_nodes(header.args); + arg_nodes.insert(arg_nodes.begin(), &header.first_arg); end_execution_reason_t result = this->expand_arguments_from_nodes(arg_nodes, &arguments, failglob); @@ -362,48 +409,46 @@ end_execution_reason_t parse_execution_context_t::run_function_statement( } end_execution_reason_t parse_execution_context_t::run_block_statement( - tnode_t statement, const block_t *associated_block) { - tnode_t bheader = statement.child<0>(); - tnode_t contents = statement.child<1>(); - + const ast::block_statement_t &statement, const block_t *associated_block) { + const ast::node_t &bh = *statement.header.contents; + const ast::job_list_t &contents = statement.jobs; end_execution_reason_t ret = end_execution_reason_t::ok; - if (auto header = bheader.try_get_child()) { - ret = run_for_statement(header, contents); - } else if (auto header = bheader.try_get_child()) { - ret = run_while_statement(header, contents, associated_block); - } else if (auto header = bheader.try_get_child()) { - ret = run_function_statement(statement, header); - } else if (auto header = bheader.try_get_child()) { + if (const auto *fh = bh.try_as()) { + ret = run_for_statement(*fh, contents); + } else if (const auto *wh = bh.try_as()) { + ret = run_while_statement(*wh, contents, associated_block); + } else if (const auto *fh = bh.try_as()) { + ret = run_function_statement(statement, *fh); + } else if (bh.try_as()) { ret = run_begin_statement(contents); } else { - FLOGF(error, L"Unexpected block header: %ls\n", bheader.node()->describe().c_str()); + FLOGF(error, L"Unexpected block header: %ls\n", bh.describe().c_str()); PARSER_DIE(); } return ret; } end_execution_reason_t parse_execution_context_t::run_for_statement( - tnode_t header, tnode_t block_contents) { + const ast::for_header_t &header, const ast::job_list_t &block_contents) { // Get the variable name: `for var_name in ...`. We expand the variable name. It better result // in just one. - tnode_t var_name_node = header.child<1>(); - wcstring for_var_name = get_source(var_name_node); + wcstring for_var_name = header.var_name.source(get_source()); if (!expand_one(for_var_name, expand_flags_t{}, ctx)) { - return report_error(STATUS_EXPAND_ERROR, var_name_node, + return report_error(STATUS_EXPAND_ERROR, header.var_name, FAILED_EXPANSION_VARIABLE_NAME_ERR_MSG, for_var_name.c_str()); } // Get the contents to iterate over. wcstring_list_t arguments; - end_execution_reason_t ret = this->expand_arguments_from_nodes( - get_argument_nodes(header.child<3>()), &arguments, nullglob); + ast_args_list_t arg_nodes = get_argument_nodes(header.args); + end_execution_reason_t ret = this->expand_arguments_from_nodes(arg_nodes, &arguments, nullglob); if (ret != end_execution_reason_t::ok) { return ret; } auto var = parser->vars().get(for_var_name, ENV_DEFAULT); if (var && var->read_only()) { - return report_error(STATUS_INVALID_ARGS, var_name_node, + return report_error(STATUS_INVALID_ARGS, header.var_name, L"You cannot use read-only variable '%ls' in a for loop", for_var_name.c_str()); } @@ -416,7 +461,7 @@ end_execution_reason_t parse_execution_context_t::run_for_statement( assert(retval == ENV_OK); if (!valid_var_name(for_var_name)) { - return report_error(STATUS_INVALID_ARGS, var_name_node, BUILTIN_ERR_VARNAME, L"for", + return report_error(STATUS_INVALID_ARGS, header.var_name, BUILTIN_ERR_VARNAME, L"for", for_var_name.c_str()); } @@ -454,17 +499,16 @@ end_execution_reason_t parse_execution_context_t::run_for_statement( } end_execution_reason_t parse_execution_context_t::run_switch_statement( - tnode_t statement) { + const ast::switch_statement_t &statement) { // Get the switch variable. - tnode_t switch_value_n = statement.child<1>(); - const wcstring switch_value = get_source(switch_value_n); + const wcstring switch_value = get_source(statement.argument); // Expand it. We need to offset any errors by the position of the string. completion_list_t switch_values_expanded; parse_error_list_t errors; auto expand_ret = expand_string(switch_value, &switch_values_expanded, expand_flag::no_descriptions, ctx, &errors); - parse_error_offset_source_start(&errors, switch_value_n.source_range()->start); + parse_error_offset_source_start(&errors, statement.argument.range.start); switch (expand_ret.result) { case expand_result_t::error: @@ -474,12 +518,12 @@ end_execution_reason_t parse_execution_context_t::run_switch_statement( return end_execution_reason_t::cancelled; case expand_result_t::wildcard_no_match: - return report_error(STATUS_UNMATCHED_WILDCARD, switch_value_n, WILDCARD_ERR_MSG, - get_source(switch_value_n).c_str()); + return report_error(STATUS_UNMATCHED_WILDCARD, statement.argument, WILDCARD_ERR_MSG, + get_source(statement.argument).c_str()); case expand_result_t::ok: if (switch_values_expanded.size() > 1) { - return report_error(STATUS_INVALID_ARGS, switch_value_n, + return report_error(STATUS_INVALID_ARGS, statement.argument, _(L"switch: Expected at most one argument, got %lu\n"), switch_values_expanded.size()); } @@ -497,9 +541,8 @@ end_execution_reason_t parse_execution_context_t::run_switch_statement( block_t *sb = parser->push_block(block_t::switch_block()); // Expand case statements. - tnode_t case_item_list = statement.child<3>(); - tnode_t matching_case_item{}; - while (auto case_item = case_item_list.next_in_list()) { + const ast::case_item_t *matching_case_item = nullptr; + for (const ast::case_item_t &case_item : statement.cases) { if (auto ret = check_end_execution()) { result = *ret; break; @@ -508,7 +551,7 @@ end_execution_reason_t parse_execution_context_t::run_switch_statement( // Expand arguments. A case item list may have a wildcard that fails to expand to // anything. We also report case errors, but don't stop execution; i.e. a case item that // contains an unexpandable process will report and then fail to match. - auto arg_nodes = get_argument_nodes(case_item.child<1>()); + ast_args_list_t arg_nodes = get_argument_nodes(case_item.arguments); wcstring_list_t case_args; end_execution_reason_t case_result = this->expand_arguments_from_nodes(arg_nodes, &case_args, failglob); @@ -520,7 +563,7 @@ end_execution_reason_t parse_execution_context_t::run_switch_statement( // If this matched, we're done. if (match) { - matching_case_item = case_item; + matching_case_item = &case_item; break; } } @@ -531,8 +574,7 @@ end_execution_reason_t parse_execution_context_t::run_switch_statement( if (matching_case_item) { // Success, evaluate the job list. assert(result == end_execution_reason_t::ok && "Expected success"); - auto job_list = matching_case_item.child<3>(); - result = this->run_job_list(job_list, sb); + result = this->run_job_list(matching_case_item->body, sb); } parser->pop_block(sb); @@ -540,7 +582,7 @@ end_execution_reason_t parse_execution_context_t::run_switch_statement( } end_execution_reason_t parse_execution_context_t::run_while_statement( - tnode_t header, tnode_t contents, + const ast::while_header_t &header, const ast::job_list_t &contents, const block_t *associated_block) { end_execution_reason_t ret = end_execution_reason_t::ok; @@ -555,10 +597,6 @@ end_execution_reason_t parse_execution_context_t::run_while_statement( // affordance for the first condition. bool first_cond_check = true; - // The conditions of the while loop. - tnode_t condition_head = header.child<1>(); - tnode_t condition_boolean_tail = header.child<3>(); - trace_if_enabled(*parser, L"while"); // Run while the condition is true. @@ -571,9 +609,9 @@ end_execution_reason_t parse_execution_context_t::run_while_statement( // Check the condition. end_execution_reason_t cond_ret = - this->run_job_conjunction(condition_head, associated_block); + this->run_job_conjunction(header.condition, associated_block); if (cond_ret == end_execution_reason_t::ok) { - cond_ret = run_job_list(condition_boolean_tail, associated_block); + cond_ret = run_job_list(header.andor_tail, associated_block); } // If the loop condition failed to execute, then exit the loop without modifying the exit @@ -623,13 +661,15 @@ end_execution_reason_t parse_execution_context_t::run_while_statement( } // Reports an error. Always returns end_execution_reason_t::error. -end_execution_reason_t parse_execution_context_t::report_error(int status, const parse_node_t &node, +end_execution_reason_t parse_execution_context_t::report_error(int status, const ast::node_t &node, const wchar_t *fmt, ...) const { + auto r = node.source_range(); + // Create an error. parse_error_list_t error_list = parse_error_list_t(1); parse_error_t *error = &error_list.at(0); - error->source_start = node.source_start; - error->source_length = node.source_length; + error->source_start = r.start; + error->source_length = r.length; error->code = parse_error_syntax; // hackish va_list va; @@ -662,9 +702,27 @@ end_execution_reason_t parse_execution_context_t::report_errors( return end_execution_reason_t::error; } +// static +parse_execution_context_t::ast_args_list_t parse_execution_context_t::get_argument_nodes( + const ast::argument_list_t &args) { + ast_args_list_t result; + for (const ast::argument_t &arg : args) result.push_back(&arg); + return result; +} + +// static +parse_execution_context_t::ast_args_list_t parse_execution_context_t::get_argument_nodes( + const ast::argument_or_redirection_list_t &args) { + ast_args_list_t result; + for (const ast::argument_or_redirection_t &v : args) { + if (v.is_argument()) result.push_back(&v.argument()); + } + return result; +} + /// Handle the case of command not found. end_execution_reason_t parse_execution_context_t::handle_command_not_found( - const wcstring &cmd_str, tnode_t statement, int err_code) { + const wcstring &cmd_str, const ast::decorated_statement_t &statement, int err_code) { // We couldn't find the specified command. This is a non-fatal error. We want to set the exit // status to 127, which is the standard number used by other shells like bash and zsh. @@ -677,7 +735,7 @@ end_execution_reason_t parse_execution_context_t::handle_command_not_found( // error messages. wcstring_list_t event_args; { - auto args = get_argument_nodes(statement.child<1>()); + ast_args_list_t args = get_argument_nodes(statement.args_or_redirs); end_execution_reason_t arg_result = this->expand_arguments_from_nodes(args, &event_args, failglob); @@ -696,7 +754,7 @@ end_execution_reason_t parse_execution_context_t::handle_command_not_found( } end_execution_reason_t parse_execution_context_t::expand_command( - tnode_t statement, wcstring *out_cmd, + const ast::decorated_statement_t &statement, wcstring *out_cmd, wcstring_list_t *out_args) const { // Here we're expanding a command, for example $HOME/bin/stuff or $randomthing. The first // completion becomes the command itself, everything after becomes arguments. Command @@ -704,8 +762,8 @@ end_execution_reason_t parse_execution_context_t::expand_command( parse_error_list_t errors; // Get the unexpanded command string. We expect to always get it here. - wcstring unexp_cmd = *command_for_plain_statement(statement, pstree->src); - size_t pos_of_command_token = statement.child<0>().source_range()->start; + wcstring unexp_cmd = get_source(statement.command); + size_t pos_of_command_token = statement.command.range.start; // Expand the string to produce completions, and report errors. expand_result_t expand_err = @@ -715,7 +773,7 @@ end_execution_reason_t parse_execution_context_t::expand_command( // excluding prefixes such as " " or "if ". // This means that the error positions are relative to the beginning // of the token; we need to make them relative to the original source. - for (auto &error : errors) error.source_start += pos_of_command_token; + parse_error_offset_source_start(&errors, pos_of_command_token); return report_errors(STATUS_ILLEGAL_CMD, errors); } else if (expand_err == expand_result_t::wildcard_no_match) { return report_error(STATUS_UNMATCHED_WILDCARD, statement, WILDCARD_ERR_MSG, @@ -734,7 +792,7 @@ end_execution_reason_t parse_execution_context_t::expand_command( /// Creates a 'normal' (non-block) process. end_execution_reason_t parse_execution_context_t::populate_plain_process( - job_t *job, process_t *proc, tnode_t statement) { + job_t *job, process_t *proc, const ast::decorated_statement_t &statement) { assert(job != nullptr); assert(proc != nullptr); @@ -765,11 +823,9 @@ end_execution_reason_t parse_execution_context_t::populate_plain_process( const int no_cmd_err_code = errno; // If the specified command does not exist, and is undecorated, try using an implicit cd. - if (!has_command && get_decoration(statement) == parse_statement_decoration_none) { + if (!has_command && statement.decoration() == parse_statement_decoration_none) { // Implicit cd requires an empty argument and redirection list. - tnode_t args = statement.child<1>(); - if (args_from_cmd_expansion.empty() && !args.try_get_child() && - !args.try_get_child()) { + if (statement.args_or_redirs.empty()) { // Ok, no arguments or redirections; check to see if the command is a directory. use_implicit_cd = path_as_implicit_cd(cmd, parser->vars().get_pwd_slash(), parser->vars()) @@ -804,7 +860,8 @@ end_execution_reason_t parse_execution_context_t::populate_plain_process( cmd_args.push_back(cmd); cmd_args.insert(cmd_args.end(), args_from_cmd_expansion.begin(), args_from_cmd_expansion.end()); - argument_node_list_t arg_nodes = statement.descendants(); + + ast_args_list_t arg_nodes = get_argument_nodes(statement.args_or_redirs); end_execution_reason_t arg_result = this->expand_arguments_from_nodes(arg_nodes, &cmd_args, glob_behavior); if (arg_result != end_execution_reason_t::ok) { @@ -812,7 +869,7 @@ end_execution_reason_t parse_execution_context_t::populate_plain_process( } // The set of IO redirections that we construct for the process. - auto reason = this->determine_redirections(statement.child<1>(), &redirections); + auto reason = this->determine_redirections(statement.args_or_redirs, &redirections); if (reason != end_execution_reason_t::ok) { return reason; } @@ -832,23 +889,23 @@ end_execution_reason_t parse_execution_context_t::populate_plain_process( // Determine the list of arguments, expanding stuff. Reports any errors caused by expansion. If we // have a wildcard that could not be expanded, report the error and continue. end_execution_reason_t parse_execution_context_t::expand_arguments_from_nodes( - const argument_node_list_t &argument_nodes, wcstring_list_t *out_arguments, + const ast_args_list_t &argument_nodes, wcstring_list_t *out_arguments, globspec_t glob_behavior) { // Get all argument nodes underneath the statement. We guess we'll have that many arguments (but // may have more or fewer, if there are wildcards involved). out_arguments->reserve(out_arguments->size() + argument_nodes.size()); completion_list_t arg_expanded; - for (const auto &arg_node : argument_nodes) { + for (const ast::argument_t *arg_node : argument_nodes) { // Expect all arguments to have source. - assert(arg_node.has_source()); - const wcstring arg_str = arg_node.get_source(pstree->src); + assert(arg_node->has_source()); + const wcstring arg_str = get_source(*arg_node); // Expand this string. parse_error_list_t errors; arg_expanded.clear(); auto expand_ret = expand_string(arg_str, &arg_expanded, expand_flag::no_descriptions, ctx, &errors); - parse_error_offset_source_start(&errors, arg_node.source_range()->start); + parse_error_offset_source_start(&errors, arg_node->range.start); switch (expand_ret.result) { case expand_result_t::error: { return this->report_errors(expand_ret.status, errors); @@ -862,8 +919,8 @@ end_execution_reason_t parse_execution_context_t::expand_arguments_from_nodes( // For no_exec, ignore the error - this might work at runtime. if (no_exec()) return end_execution_reason_t::ok; // Report the unmatched wildcard error and stop processing. - return report_error(STATUS_UNMATCHED_WILDCARD, arg_node, WILDCARD_ERR_MSG, - get_source(arg_node).c_str()); + return report_error(STATUS_UNMATCHED_WILDCARD, *arg_node, WILDCARD_ERR_MSG, + get_source(*arg_node).c_str()); } break; } @@ -892,41 +949,42 @@ end_execution_reason_t parse_execution_context_t::expand_arguments_from_nodes( } end_execution_reason_t parse_execution_context_t::determine_redirections( - tnode_t node, redirection_spec_list_t *out_redirections) { + const ast::argument_or_redirection_list_t &list, redirection_spec_list_t *out_redirections) { // Get all redirection nodes underneath the statement. - while (auto redirect_node = node.next_in_list()) { - wcstring target; // file path or target fd - auto redirect = redirection_for_node(redirect_node, pstree->src, &target); + for (const ast::argument_or_redirection_t &arg_or_redir : list) { + if (!arg_or_redir.is_redirection()) continue; + const ast::redirection_t &redir_node = arg_or_redir.redirection(); - if (!redirect || !redirect->is_valid()) { + maybe_t oper = pipe_or_redir_t::from_string(get_source(redir_node.oper)); + if (!oper || !oper->is_valid()) { // TODO: figure out if this can ever happen. If so, improve this error message. - return report_error(STATUS_INVALID_ARGS, redirect_node, _(L"Invalid redirection: %ls"), - redirect_node.get_source(pstree->src).c_str()); + return report_error(STATUS_INVALID_ARGS, redir_node, _(L"Invalid redirection: %ls"), + get_source(redir_node).c_str()); } // PCA: I can't justify this skip_variables flag. It was like this when I got here. + wcstring target = get_source(redir_node.target); bool target_expanded = expand_one(target, no_exec() ? expand_flag::skip_variables : expand_flags_t{}, ctx); if (!target_expanded || target.empty()) { // TODO: Improve this error message. - return report_error(STATUS_INVALID_ARGS, redirect_node, + return report_error(STATUS_INVALID_ARGS, redir_node, _(L"Invalid redirection target: %ls"), target.c_str()); } // Make a redirection spec from the redirect token. - assert(redirect && redirect->is_valid() && "expected to have a valid redirection"); - - redirection_spec_t spec{redirect->fd, redirect->mode, std::move(target)}; + assert(oper && oper->is_valid() && "expected to have a valid redirection"); + redirection_spec_t spec{oper->fd, oper->mode, std::move(target)}; // Validate this spec. if (spec.mode == redirection_mode_t::fd && !spec.is_close() && !spec.get_target_as_fd()) { const wchar_t *fmt = _(L"Requested redirection to '%ls', which is not a valid file descriptor"); - return report_error(STATUS_INVALID_ARGS, redirect_node, fmt, spec.target.c_str()); + return report_error(STATUS_INVALID_ARGS, redir_node, fmt, spec.target.c_str()); } out_redirections->push_back(std::move(spec)); - if (redirect->stderr_merge) { + if (oper->stderr_merge) { // This was a redirect like &> which also modifies stderr. // Also redirect stderr to stdout. out_redirections->push_back(get_stderr_merge()); @@ -936,57 +994,70 @@ end_execution_reason_t parse_execution_context_t::determine_redirections( } end_execution_reason_t parse_execution_context_t::populate_not_process( - job_t *job, process_t *proc, tnode_t not_statement) { + job_t *job, process_t *proc, const ast::not_statement_t ¬_statement) { auto &flags = job->mut_flags(); flags.negate = !flags.negate; - auto optional_time = not_statement.require_get_child(); - if (optional_time.tag() == parse_optional_time_time) { + if (not_statement.time) { flags.has_time_prefix = true; if (!job->mut_flags().foreground) { return this->report_error(STATUS_INVALID_ARGS, not_statement, ERROR_TIME_BACKGROUND); } } - return this->populate_job_process( - job, proc, not_statement.require_get_child(), - not_statement.require_get_child()); + return this->populate_job_process(job, proc, not_statement.contents, not_statement.variables); } template end_execution_reason_t parse_execution_context_t::populate_block_process( - job_t *job, process_t *proc, tnode_t statement, - tnode_t specific_statement) { + job_t *job, process_t *proc, const ast::statement_t &statement, + const Type &specific_statement) { + using namespace ast; // We handle block statements by creating process_type_t::block_node, that will bounce back to // us when it's time to execute them. UNUSED(job); - static_assert(Type::token == symbol_block_statement || Type::token == symbol_if_statement || - Type::token == symbol_switch_statement, + static_assert(Type::AstType == type_t::block_statement || + Type::AstType == type_t::if_statement || + Type::AstType == type_t::switch_statement, "Invalid block process"); - assert(statement && "statement missing"); - assert(specific_statement && "specific_statement missing"); - // The set of IO redirections that we construct for the process. - // TODO: fix this ugly find_child. - auto arguments = specific_statement.template find_child(); + // Get the argument or redirections list. + // TODO: args_or_redirs should be available without resolving the statement type. + const argument_or_redirection_list_t *args_or_redirs = nullptr; + + // Upcast to permit dropping the 'template' keyword. + const node_t &ss = specific_statement; + switch (Type::AstType) { + case type_t::block_statement: + args_or_redirs = &ss.as()->args_or_redirs; + break; + case type_t::if_statement: + args_or_redirs = &ss.as()->args_or_redirs; + break; + case type_t::switch_statement: + args_or_redirs = &ss.as()->args_or_redirs; + break; + default: + DIE("Unexpected block node type"); + } + assert(args_or_redirs && "Should have args_or_redirs"); + redirection_spec_list_t redirections; - auto reason = this->determine_redirections(arguments, &redirections); + auto reason = this->determine_redirections(*args_or_redirs, &redirections); if (reason == end_execution_reason_t::ok) { proc->type = process_type_t::block_node; proc->block_node_source = pstree; - proc->internal_block_node = statement; + proc->internal_block_node = &statement; proc->set_redirection_specs(std::move(redirections)); } return reason; } end_execution_reason_t parse_execution_context_t::apply_variable_assignments( - process_t *proc, tnode_t variable_assignments, + process_t *proc, const ast::variable_assignment_list_t &variable_assignment_list, const block_t **block) { - variable_assignment_node_list_t assignment_list = - get_variable_assignment_nodes(variable_assignments); - if (assignment_list.empty()) return end_execution_reason_t::ok; + if (variable_assignment_list.empty()) return end_execution_reason_t::ok; *block = parser->push_block(block_t::variable_assignment_block()); - for (const auto &variable_assignment : assignment_list) { - const wcstring &source = variable_assignment.get_source(pstree->src); + for (const ast::variable_assignment_t &variable_assignment : variable_assignment_list) { + const wcstring &source = get_source(variable_assignment); auto equals_pos = variable_assignment_equals_pos(source); assert(equals_pos); const wcstring variable_name = source.substr(0, *equals_pos); @@ -996,8 +1067,7 @@ end_execution_reason_t parse_execution_context_t::apply_variable_assignments( // TODO this is mostly copied from expand_arguments_from_nodes, maybe extract to function auto expand_ret = expand_string(expression, &expression_expanded, expand_flag::no_descriptions, ctx, &errors); - parse_error_offset_source_start( - &errors, variable_assignment.source_range()->start + *equals_pos + 1); + parse_error_offset_source_start(&errors, variable_assignment.range.start + *equals_pos + 1); switch (expand_ret.result) { case expand_result_t::error: return this->report_errors(expand_ret.status, errors); @@ -1024,10 +1094,11 @@ end_execution_reason_t parse_execution_context_t::apply_variable_assignments( } end_execution_reason_t parse_execution_context_t::populate_job_process( - job_t *job, process_t *proc, tnode_t statement, - tnode_t variable_assignments) { + job_t *job, process_t *proc, const ast::statement_t &statement, + const ast::variable_assignment_list_t &variable_assignments) { + using namespace ast; // Get the "specific statement" which is boolean / block / if / switch / decorated. - const parse_node_t &specific_statement = statement.get_child_node<0>(); + const node_t &specific_statement = *statement.contents.contents; const block_t *block = nullptr; end_execution_reason_t result = @@ -1038,27 +1109,26 @@ end_execution_reason_t parse_execution_context_t::populate_job_process( if (result != end_execution_reason_t::ok) return result; switch (specific_statement.type) { - case symbol_not_statement: { - result = this->populate_not_process(job, proc, {&tree(), &specific_statement}); + case type_t::not_statement: { + result = + this->populate_not_process(job, proc, *specific_statement.as()); break; } - case symbol_block_statement: - result = this->populate_block_process( - job, proc, statement, tnode_t(&tree(), &specific_statement)); + case type_t::block_statement: + result = this->populate_block_process(job, proc, statement, + *specific_statement.as()); break; - case symbol_if_statement: - result = this->populate_block_process( - job, proc, statement, tnode_t(&tree(), &specific_statement)); + case type_t::if_statement: + result = this->populate_block_process(job, proc, statement, + *specific_statement.as()); break; - case symbol_switch_statement: - result = this->populate_block_process( - job, proc, statement, tnode_t(&tree(), &specific_statement)); + case type_t::switch_statement: + result = this->populate_block_process(job, proc, statement, + *specific_statement.as()); break; - case symbol_decorated_statement: { - // Get the plain statement. It will pull out the decoration itself. - tnode_t dec_stat{&tree(), &specific_statement}; - auto plain_statement = dec_stat.find_child(); - result = this->populate_plain_process(job, proc, plain_statement); + case type_t::decorated_statement: { + result = this->populate_plain_process(job, proc, + *specific_statement.as()); break; } default: { @@ -1073,47 +1143,36 @@ end_execution_reason_t parse_execution_context_t::populate_job_process( } end_execution_reason_t parse_execution_context_t::populate_job_from_job_node( - job_t *j, tnode_t job_node, const block_t *associated_block) { + job_t *j, const ast::job_t &job_node, const block_t *associated_block) { UNUSED(associated_block); // Tell the job what its command is. j->set_command(get_source(job_node)); - // We are going to construct process_t structures for every statement in the job. Get the first - // statement. - tnode_t optional_time = job_node.child<0>(); - tnode_t variable_assignments = job_node.child<1>(); - tnode_t statement = job_node.child<2>(); - + // We are going to construct process_t structures for every statement in the job. // Create processes. Each one may fail. process_list_t processes; processes.emplace_back(new process_t()); - if (optional_time.tag() == parse_optional_time_time) { + if (job_node.time) { j->mut_flags().has_time_prefix = true; - if (job_node_is_background(job_node)) { + if (job_node.bg) { return this->report_error(STATUS_INVALID_ARGS, job_node, ERROR_TIME_BACKGROUND); } } - end_execution_reason_t result = - this->populate_job_process(j, processes.back().get(), statement, variable_assignments); + end_execution_reason_t result = this->populate_job_process( + j, processes.back().get(), job_node.statement, job_node.variables); - // Construct process_ts for job continuations (pipelines), by walking the list until we hit the - // terminal (empty) job continuation. - tnode_t job_cont = job_node.child<3>(); - assert(job_cont); - while (auto pipe = job_cont.try_get_child()) { + // Construct process_ts for job continuations (pipelines). + for (const ast::job_continuation_t &jc : job_node.continuation) { if (result != end_execution_reason_t::ok) { break; } - auto variable_assignments = job_cont.require_get_child(); - auto statement = job_cont.require_get_child(); - // Handle the pipe, whose fd may not be the obvious stdout. - auto parsed_pipe = pipe_or_redir_t::from_string(get_source(pipe)); + auto parsed_pipe = pipe_or_redir_t::from_string(get_source(jc.pipe)); assert(parsed_pipe.has_value() && parsed_pipe->is_pipe && "Failed to parse valid pipe"); if (!parsed_pipe->is_valid()) { - result = report_error(STATUS_INVALID_ARGS, pipe, ILLEGAL_FD_ERR_MSG, - get_source(pipe).c_str()); + result = report_error(STATUS_INVALID_ARGS, jc.pipe, ILLEGAL_FD_ERR_MSG, + get_source(jc.pipe).c_str()); break; } processes.back()->pipe_write_fd = parsed_pipe->fd; @@ -1127,12 +1186,7 @@ end_execution_reason_t parse_execution_context_t::populate_job_from_job_node( // Store the new process (and maybe with an error). processes.emplace_back(new process_t()); - result = - this->populate_job_process(j, processes.back().get(), statement, variable_assignments); - - // Get the next continuation. - job_cont = job_cont.require_get_child(); - assert(job_cont); + result = this->populate_job_process(j, processes.back().get(), jc.statement, jc.variables); } // Inform our processes of who is first and last @@ -1158,7 +1212,7 @@ static bool remove_job(parser_t &parser, job_t *job) { return false; } -end_execution_reason_t parse_execution_context_t::run_1_job(tnode_t job_node, +end_execution_reason_t parse_execution_context_t::run_1_job(const ast::job_t &job_node, const block_t *associated_block) { if (auto ret = check_end_execution()) { return *ret; @@ -1180,7 +1234,7 @@ end_execution_reason_t parse_execution_context_t::run_1_job(tnode_t job_ scoped_push saved_eval_level(&parser->eval_level, parser->eval_level + 1); // Save the node index. - scoped_push> saved_node(&executing_job_node, job_node); + scoped_push saved_node(&executing_job_node, &job_node); // Profiling support. long long start_time = 0, parse_time = 0, exec_time = 0; @@ -1194,34 +1248,33 @@ end_execution_reason_t parse_execution_context_t::run_1_job(tnode_t job_ // However, if there are no redirections, then we can just jump into the block directly, which // is significantly faster. if (job_is_simple_block(job_node)) { - tnode_t optional_time = job_node.child<0>(); + bool do_time = job_node.time.has_value(); // If no-exec has been given, there is nothing to time. - cleanup_t timer = push_timer(optional_time.tag() == parse_optional_time_time && !no_exec()); - tnode_t variable_assignments = job_node.child<1>(); + cleanup_t timer = push_timer(do_time && !no_exec()); const block_t *block = nullptr; end_execution_reason_t result = - this->apply_variable_assignments(nullptr, variable_assignments, &block); + this->apply_variable_assignments(nullptr, job_node.variables, &block); cleanup_t scope([&]() { if (block) parser->pop_block(block); }); - tnode_t statement = job_node.child<2>(); - const parse_node_t &specific_statement = statement.get_child_node<0>(); - assert(specific_statement_type_is_redirectable_block(specific_statement)); + const ast::node_t *specific_statement = job_node.statement.contents.get(); + assert(specific_statement_type_is_redirectable_block(*specific_statement)); if (result == end_execution_reason_t::ok) { - switch (specific_statement.type) { - case symbol_block_statement: { - result = - this->run_block_statement({&tree(), &specific_statement}, associated_block); + switch (specific_statement->type) { + case ast::type_t::block_statement: { + result = this->run_block_statement( + *specific_statement->as(), associated_block); break; } - case symbol_if_statement: { - result = - this->run_if_statement({&tree(), &specific_statement}, associated_block); + case ast::type_t::if_statement: { + result = this->run_if_statement(*specific_statement->as(), + associated_block); break; } - case symbol_switch_statement: { - result = this->run_switch_statement({&tree(), &specific_statement}); + case ast::type_t::switch_statement: { + result = this->run_switch_statement( + *specific_statement->as()); break; } default: { @@ -1240,8 +1293,8 @@ end_execution_reason_t parse_execution_context_t::run_1_job(tnode_t job_ profile_item->level = parser->eval_level; profile_item->parse = 0; profile_item->exec = static_cast(exec_time - start_time); - profile_item->cmd = profiling_cmd_name_for_redirectable_block( - specific_statement, this->tree(), this->pstree->src); + profile_item->cmd = + profiling_cmd_name_for_redirectable_block(*specific_statement, *this->pstree); profile_item->skipped = false; } @@ -1258,7 +1311,7 @@ end_execution_reason_t parse_execution_context_t::run_1_job(tnode_t job_ job_t::properties_t props{}; props.wants_terminal = wants_job_control && !ld.is_event; - props.initial_background = job_node_is_background(job_node); + props.initial_background = job_node.bg.has_value(); props.skip_notification = ld.is_subshell || ld.is_block || ld.is_event || !parser->is_interactive(); props.from_event_handler = ld.is_event; @@ -1329,29 +1382,36 @@ end_execution_reason_t parse_execution_context_t::run_1_job(tnode_t job_ } end_execution_reason_t parse_execution_context_t::run_job_conjunction( - tnode_t job_expr, const block_t *associated_block) { - end_execution_reason_t result = end_execution_reason_t::ok; - tnode_t cursor = job_expr; - // continuation is the parent of the cursor - tnode_t continuation; - while (cursor) { - if (auto reason = check_end_execution()) { - result = *reason; - break; + const ast::job_conjunction_t &job_expr, const block_t *associated_block) { + if (auto reason = check_end_execution()) { + return *reason; + } + end_execution_reason_t result = run_1_job(job_expr.job, associated_block); + + for (const ast::job_conjunction_continuation_t &jc : job_expr.continuations) { + if (result != end_execution_reason_t::ok) { + return result; } + if (auto reason = check_end_execution()) { + return *reason; + } + // Check the conjunction type. bool skip = false; - if (continuation) { - // Check the conjunction type. - parse_job_decoration_t conj = bool_statement_type(continuation); - assert((conj == parse_job_decoration_and || conj == parse_job_decoration_or) && - "Unexpected conjunction"); - skip = should_skip(conj); + switch (jc.conjunction.type) { + case parse_token_type_andand: + // AND. Skip if the last job failed. + skip = parser->get_last_status() != 0; + break; + case parse_token_type_oror: + // OR. Skip if the last job succeeded. + skip = parser->get_last_status() == 0; + break; + default: + DIE("Unexpected job conjunction type"); } if (!skip) { - result = run_1_job(cursor.child<0>(), associated_block); + result = run_1_job(jc.job, associated_block); } - continuation = cursor.child<1>(); - cursor = continuation.try_get_child(); } return result; } @@ -1369,66 +1429,86 @@ bool parse_execution_context_t::should_skip(parse_job_decoration_t type) const { } } -template -end_execution_reason_t parse_execution_context_t::run_job_list(tnode_t job_list, - const block_t *associated_block) { - // We handle both job_list and andor_job_list uniformly. - static_assert(Type::token == symbol_job_list || Type::token == symbol_andor_job_list, - "Not a job list"); - - end_execution_reason_t result = end_execution_reason_t::ok; - while (auto job_conj = job_list.template next_in_list()) { - if (auto reason = check_end_execution()) { - result = *reason; - break; - } - - // Maybe skip the job if it has a leading and/or. - // Skipping is treated as success. - if (should_skip(get_decorator(job_conj))) { - result = end_execution_reason_t::ok; - } else { - result = this->run_job_conjunction(job_conj, associated_block); +end_execution_reason_t parse_execution_context_t::test_and_run_1_job_conjunction( + const ast::job_conjunction_t &jc, const block_t *associated_block) { + // Test this job conjunction if it has an 'and' or 'or' decorator. + // If it passes, then run it. + if (auto reason = check_end_execution()) { + return *reason; + } + // Maybe skip the job if it has a leading and/or. + bool skip = false; + if (jc.decorator.has_value()) { + switch (jc.decorator->kw) { + case parse_keyword_t::kw_and: + // AND. Skip if the last job failed. + skip = parser->get_last_status() != 0; + break; + case parse_keyword_t::kw_or: + // OR. Skip if the last job succeeded. + skip = parser->get_last_status() == 0; + break; + default: + DIE("Unexpected keyword"); } } + // Skipping is treated as success. + if (skip) { + return end_execution_reason_t::ok; + } else { + return this->run_job_conjunction(jc, associated_block); + } +} +end_execution_reason_t parse_execution_context_t::run_job_list(const ast::job_list_t &job_list_node, + const block_t *associated_block) { + auto result = end_execution_reason_t::ok; + for (const ast::job_conjunction_t &jc : job_list_node) { + result = test_and_run_1_job_conjunction(jc, associated_block); + } // Returns the result of the last job executed or skipped. return result; } -end_execution_reason_t parse_execution_context_t::eval_node(tnode_t statement, +end_execution_reason_t parse_execution_context_t::run_job_list( + const ast::andor_job_list_t &job_list_node, const block_t *associated_block) { + auto result = end_execution_reason_t::ok; + for (const ast::andor_job_t &aoj : job_list_node) { + result = test_and_run_1_job_conjunction(aoj.job, associated_block); + } + // Returns the result of the last job executed or skipped. + return result; +} + +end_execution_reason_t parse_execution_context_t::eval_node(const ast::statement_t &statement, const block_t *associated_block) { - assert(statement && "Empty node in eval_node"); - assert(statement.matches_node_tree(tree()) && "statement has unexpected tree"); + // Note we only expect block-style statements here. No not statements. enum end_execution_reason_t status = end_execution_reason_t::ok; - if (auto block = statement.try_get_child()) { - status = this->run_block_statement(block, associated_block); - } else if (auto ifstat = statement.try_get_child()) { - status = this->run_if_statement(ifstat, associated_block); - } else if (auto switchstat = statement.try_get_child()) { - status = this->run_switch_statement(switchstat); + const ast::node_t *contents = statement.contents.get(); + if (const auto *block = contents->try_as()) { + status = this->run_block_statement(*block, associated_block); + } else if (const auto *ifstat = contents->try_as()) { + status = this->run_if_statement(*ifstat, associated_block); + } else if (const auto *switchstat = contents->try_as()) { + status = this->run_switch_statement(*switchstat); } else { - FLOGF(error, L"Unexpected node %ls found in %s", statement.node()->describe().c_str(), + FLOGF(error, L"Unexpected node %ls found in %s", statement.describe().c_str(), __FUNCTION__); abort(); } return status; } -end_execution_reason_t parse_execution_context_t::eval_node(tnode_t job_list, +end_execution_reason_t parse_execution_context_t::eval_node(const ast::job_list_t &job_list, const block_t *associated_block) { - // Apply this block IO for the duration of this function. - assert(job_list && "Empty node in eval_node"); - assert(job_list.matches_node_tree(tree()) && "job_list has unexpected tree"); assert(associated_block && "Null block"); // Check for infinite recursion: a function which immediately calls itself.. wcstring func_name; - auto infinite_recursive_node = - this->infinite_recursive_statement_in_job_list(job_list, &func_name); - if (infinite_recursive_node) { + if (const auto *infinite_recursive_node = + this->infinite_recursive_statement_in_job_list(job_list, &func_name)) { // We have an infinite recursion. - return this->report_error(STATUS_CMD_ERROR, infinite_recursive_node, + return this->report_error(STATUS_CMD_ERROR, *infinite_recursive_node, INFINITE_FUNC_RECURSION_ERR_MSG, func_name.c_str()); } @@ -1439,14 +1519,14 @@ end_execution_reason_t parse_execution_context_t::eval_node(tnode_t return this->run_job_list(job_list, associated_block); } -int parse_execution_context_t::line_offset_of_node(tnode_t node) { +int parse_execution_context_t::line_offset_of_node(const ast::job_t *node) { // If we're not executing anything, return -1. if (!node) { return -1; } // If for some reason we're executing a node without source, return -1. - auto range = node.source_range(); + auto range = node->try_source_range(); if (!range) { return -1; } @@ -1501,7 +1581,7 @@ int parse_execution_context_t::get_current_line_number() { int parse_execution_context_t::get_current_source_offset() const { int result = -1; if (executing_job_node) { - if (auto range = executing_job_node.source_range()) { + if (auto range = executing_job_node->try_source_range()) { result = static_cast(range->start); } } diff --git a/src/parse_execution.h b/src/parse_execution.h index 8ac778a0f..fe91ba902 100644 --- a/src/parse_execution.h +++ b/src/parse_execution.h @@ -4,6 +4,7 @@ #include +#include "ast.h" #include "common.h" #include "io.h" #include "parse_constants.h" @@ -38,7 +39,7 @@ class parse_execution_context_t { const operation_context_t &ctx; // The currently executing job node, used to indicate the line number. - tnode_t executing_job_node{}; + const ast::job_t *executing_job_node{}; // Cached line number information. size_t cached_lineno_offset = 0; @@ -59,88 +60,94 @@ class parse_execution_context_t { // Report an error, setting $status to \p status. Always returns // 'end_execution_reason_t::error'. - end_execution_reason_t report_error(int status, const parse_node_t &node, const wchar_t *fmt, + end_execution_reason_t report_error(int status, const ast::node_t &node, const wchar_t *fmt, ...) const; end_execution_reason_t report_errors(int status, const parse_error_list_t &error_list) const; /// Command not found support. end_execution_reason_t handle_command_not_found(const wcstring &cmd, - tnode_t statement, + const ast::decorated_statement_t &statement, int err_code); // Utilities - wcstring get_source(const parse_node_t &node) const; - tnode_t infinite_recursive_statement_in_job_list( - tnode_t job_list, wcstring *out_func_name) const; + wcstring get_source(const ast::node_t &node) const; + const ast::decorated_statement_t *infinite_recursive_statement_in_job_list( + const ast::job_list_t &job_list, wcstring *out_func_name) const; // Expand a command which may contain variables, producing an expand command and possibly // arguments. Prints an error message on error. - end_execution_reason_t expand_command(tnode_t statement, + end_execution_reason_t expand_command(const ast::decorated_statement_t &statement, wcstring *out_cmd, wcstring_list_t *out_args) const; /// Return whether we should skip a job with the given bool statement type. bool should_skip(parse_job_decoration_t type) const; /// Indicates whether a job is a simple block (one block, no redirections). - bool job_is_simple_block(tnode_t job) const; + bool job_is_simple_block(const ast::job_t &job) const; - enum process_type_t process_type_for_command(tnode_t statement, + enum process_type_t process_type_for_command(const ast::decorated_statement_t &statement, const wcstring &cmd) const; end_execution_reason_t apply_variable_assignments( - process_t *proc, tnode_t variable_assignments, + process_t *proc, const ast::variable_assignment_list_t &variable_assignments, const block_t **block); // These create process_t structures from statements. end_execution_reason_t populate_job_process( - job_t *job, process_t *proc, tnode_t statement, - tnode_t variable_assignments); + job_t *job, process_t *proc, const ast::statement_t &statement, + const ast::variable_assignment_list_t &variable_assignments_list_t); end_execution_reason_t populate_not_process(job_t *job, process_t *proc, - tnode_t not_statement); + const ast::not_statement_t ¬_statement); end_execution_reason_t populate_plain_process(job_t *job, process_t *proc, - tnode_t statement); + const ast::decorated_statement_t &statement); template end_execution_reason_t populate_block_process(job_t *job, process_t *proc, - tnode_t statement, - tnode_t specific_statement); + const ast::statement_t &statement, + const Type &specific_statement); // These encapsulate the actual logic of various (block) statements. - end_execution_reason_t run_block_statement(tnode_t statement, + end_execution_reason_t run_block_statement(const ast::block_statement_t &statement, const block_t *associated_block); - end_execution_reason_t run_for_statement(tnode_t header, - tnode_t contents); - end_execution_reason_t run_if_statement(tnode_t statement, + end_execution_reason_t run_for_statement(const ast::for_header_t &header, + const ast::job_list_t &contents); + end_execution_reason_t run_if_statement(const ast::if_statement_t &statement, const block_t *associated_block); - end_execution_reason_t run_switch_statement(tnode_t statement); - end_execution_reason_t run_while_statement(tnode_t header, - tnode_t contents, + end_execution_reason_t run_switch_statement(const ast::switch_statement_t &statement); + end_execution_reason_t run_while_statement(const ast::while_header_t &header, + const ast::job_list_t &contents, const block_t *associated_block); - end_execution_reason_t run_function_statement(tnode_t statement, - tnode_t header); - end_execution_reason_t run_begin_statement(tnode_t contents); + end_execution_reason_t run_function_statement(const ast::block_statement_t &statement, + const ast::function_header_t &header); + end_execution_reason_t run_begin_statement(const ast::job_list_t &contents); enum globspec_t { failglob, nullglob }; - using argument_node_list_t = std::vector>; - end_execution_reason_t expand_arguments_from_nodes(const argument_node_list_t &argument_nodes, + using ast_args_list_t = std::vector; + + static ast_args_list_t get_argument_nodes(const ast::argument_list_t &args); + static ast_args_list_t get_argument_nodes(const ast::argument_or_redirection_list_t &args); + + end_execution_reason_t expand_arguments_from_nodes(const ast_args_list_t &argument_nodes, wcstring_list_t *out_arguments, globspec_t glob_behavior); // Determines the list of redirections for a node. - end_execution_reason_t determine_redirections( - tnode_t node, - redirection_spec_list_t *out_redirections); + end_execution_reason_t determine_redirections(const ast::argument_or_redirection_list_t &list, + redirection_spec_list_t *out_redirections); - end_execution_reason_t run_1_job(tnode_t job, const block_t *associated_block); - end_execution_reason_t run_job_conjunction(tnode_t job_expr, + end_execution_reason_t run_1_job(const ast::job_t &job, const block_t *associated_block); + end_execution_reason_t test_and_run_1_job_conjunction(const ast::job_conjunction_t &jc, + const block_t *associated_block); + end_execution_reason_t run_job_conjunction(const ast::job_conjunction_t &job_expr, const block_t *associated_block); - template - end_execution_reason_t run_job_list(tnode_t job_list_node, + end_execution_reason_t run_job_list(const ast::job_list_t &job_list_node, const block_t *associated_block); - end_execution_reason_t populate_job_from_job_node(job_t *j, tnode_t job_node, + end_execution_reason_t run_job_list(const ast::andor_job_list_t &job_list_node, + const block_t *associated_block); + end_execution_reason_t populate_job_from_job_node(job_t *j, const ast::job_t &job_node, const block_t *associated_block); // Returns the line number of the node. Not const since it touches cached_lineno_offset. - int line_offset_of_node(tnode_t node); + int line_offset_of_node(const ast::job_t *node); int line_offset_of_character_at_offset(size_t offset); public: @@ -159,14 +166,14 @@ class parse_execution_context_t { /// Returns the source string. const wcstring &get_source() const { return pstree->src; } - /// Return the parse tree. - const parse_node_tree_t &tree() const { return pstree->tree; } + /// Return the parsed ast. + const ast::ast_t &ast() const { return *pstree->ast; } /// Start executing at the given node. Returns 0 if there was no error, 1 if there was an /// error. - end_execution_reason_t eval_node(tnode_t statement, + end_execution_reason_t eval_node(const ast::statement_t &statement, const block_t *associated_block); - end_execution_reason_t eval_node(tnode_t job_list, + end_execution_reason_t eval_node(const ast::job_list_t &job_list, const block_t *associated_block); }; diff --git a/src/parse_tree.cpp b/src/parse_tree.cpp index b3eadcd8d..8337800b7 100644 --- a/src/parse_tree.cpp +++ b/src/parse_tree.cpp @@ -1214,11 +1214,19 @@ const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, nod return result; } +parsed_source_t::parsed_source_t(wcstring s, ast::ast_t &&ast) + : src(std::move(s)), ast(make_unique(std::move(ast))) {} + +parsed_source_t::~parsed_source_t() = default; + parsed_source_ref_t parse_source(wcstring src, parse_tree_flags_t flags, parse_error_list_t *errors) { - parse_node_tree_t tree; - if (!parse_tree_from_string(src, flags, &tree, errors, symbol_job_list)) return {}; - return std::make_shared(std::move(src), std::move(tree)); + using namespace ast; + ast_t ast = ast_t::parse(src, flags, errors); + if (ast.errored() && !(flags & parse_flag_continue_after_error)) { + return nullptr; + } + return std::make_shared(std::move(src), std::move(ast)); } const parse_node_t &parse_node_tree_t::find_child(const parse_node_t &parent, diff --git a/src/parse_tree.h b/src/parse_tree.h index 8f8d54f74..5e10d9e1a 100644 --- a/src/parse_tree.h +++ b/src/parse_tree.h @@ -206,19 +206,26 @@ bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, parse_token_type_t goal = symbol_job_list); +namespace ast { +class ast_t; +} + /// A type wrapping up a parse tree and the original source behind it. struct parsed_source_t { wcstring src; - parse_node_tree_t tree; + std::unique_ptr ast; - parsed_source_t(wcstring s, parse_node_tree_t t) : src(std::move(s)), tree(std::move(t)) {} + parsed_source_t(wcstring s, ast::ast_t &&ast); + ~parsed_source_t(); parsed_source_t(const parsed_source_t &) = delete; void operator=(const parsed_source_t &) = delete; - parsed_source_t(parsed_source_t &&) = default; - parsed_source_t &operator=(parsed_source_t &&) = default; + parsed_source_t(parsed_source_t &&) = delete; + parsed_source_t &operator=(parsed_source_t &&) = delete; }; + /// Return a shared pointer to parsed_source_t, or null on failure. +/// If parse_flag_continue_after_error is not set, this will return null on any error. using parsed_source_ref_t = std::shared_ptr; parsed_source_ref_t parse_source(wcstring src, parse_tree_flags_t flags, parse_error_list_t *errors); diff --git a/src/parse_util.cpp b/src/parse_util.cpp index 8a74c905c..4ee438946 100644 --- a/src/parse_util.cpp +++ b/src/parse_util.cpp @@ -1311,9 +1311,9 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, *out_errors = std::move(parse_errors); } + // \return the ast to our caller if requested. if (out_pstree != nullptr) { - // TODO: legacy - *out_pstree = parse_source(buff_src, parse_flags, nullptr); + *out_pstree = std::make_shared(buff_src, std::move(ast)); } return res; diff --git a/src/parser.cpp b/src/parser.cpp index c48ec3868..12f670548 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -656,10 +656,10 @@ eval_res_t parser_t::eval(const wcstring &cmd, const io_chain_t &io, eval_res_t parser_t::eval(const parsed_source_ref_t &ps, const io_chain_t &io, const job_group_ref_t &job_group, enum block_type_t block_type) { assert(block_type == block_type_t::top || block_type == block_type_t::subst); - if (!ps->tree.empty()) { - // Execute the first node. - tnode_t start{&ps->tree, &ps->tree.front()}; - return this->eval_node(ps, start, io, job_group, block_type); + const auto *job_list = ps->ast->top()->as(); + if (!job_list->empty()) { + // Execute the top job list. + return this->eval_node(ps, *job_list, io, job_group, block_type); } else { auto status = proc_status_t::from_exit_code(get_last_status()); bool break_expand = false; @@ -669,11 +669,11 @@ eval_res_t parser_t::eval(const parsed_source_ref_t &ps, const io_chain_t &io, } template -eval_res_t parser_t::eval_node(const parsed_source_ref_t &ps, tnode_t node, +eval_res_t parser_t::eval_node(const parsed_source_ref_t &ps, const T &node, const io_chain_t &block_io, const job_group_ref_t &job_group, block_type_t block_type) { static_assert( - std::is_same::value || std::is_same::value, + std::is_same::value || std::is_same::value, "Unexpected node type"); // Handle cancellation requests. If our block stack is currently empty, then we already did // successfully cancel (or there was nothing to cancel); clear the flag. If our block stack is @@ -725,9 +725,9 @@ eval_res_t parser_t::eval_node(const parsed_source_ref_t &ps, tnode_t node, } // Explicit instantiations. TODO: use overloads instead? -template eval_res_t parser_t::eval_node(const parsed_source_ref_t &, tnode_t, +template eval_res_t parser_t::eval_node(const parsed_source_ref_t &, const ast::statement_t &, const io_chain_t &, const job_group_ref_t &, block_type_t); -template eval_res_t parser_t::eval_node(const parsed_source_ref_t &, tnode_t, +template eval_res_t parser_t::eval_node(const parsed_source_ref_t &, const ast::job_list_t &, const io_chain_t &, const job_group_ref_t &, block_type_t); void parser_t::get_backtrace(const wcstring &src, const parse_error_list_t &errors, diff --git a/src/parser.h b/src/parser.h index be8b001aa..4987c706d 100644 --- a/src/parser.h +++ b/src/parser.h @@ -300,9 +300,9 @@ class parser_t : public std::enable_shared_from_this { block_type_t block_type = block_type_t::top); /// Evaluates a node. - /// The node type must be grammar::statement or grammar::job_list. + /// The node type must be ast_t::statement_t or ast::job_list_t. template - eval_res_t eval_node(const parsed_source_ref_t &ps, tnode_t node, const io_chain_t &block_io, + eval_res_t eval_node(const parsed_source_ref_t &ps, const T &node, const io_chain_t &block_io, const job_group_ref_t &job_group, block_type_t block_type = block_type_t::top); diff --git a/src/proc.h b/src/proc.h index eaae95e1c..c17dc216a 100644 --- a/src/proc.h +++ b/src/proc.h @@ -44,6 +44,10 @@ enum class job_control_t { none, }; +namespace ast { +struct statement_t; +} + /// A proc_status_t is a value type that encapsulates logic around exited vs stopped vs signaled, /// etc. class proc_status_t { @@ -261,10 +265,10 @@ class process_t { /// Type of process. process_type_t type{process_type_t::external}; - /// For internal block processes only, the node offset of the statement. + /// For internal block processes only, the node of the statement. /// This is always either block, ifs, or switchs, never boolean or decorated. parsed_source_ref_t block_node_source{}; - tnode_t internal_block_node{}; + const ast::statement_t *internal_block_node{}; struct concrete_assignment { wcstring variable_name; From 0c22f67bdee555c3a1f5a18bb546ab2de4085c86 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 2 Jul 2020 14:51:45 -0700 Subject: [PATCH 13/13] Remove the old parser bits Now that everything has been migrated to the new AST, remove as much of the parse_tree bits as possible --- CMakeLists.txt | 4 +- src/complete.cpp | 1 - src/fish_indent.cpp | 1 - src/fish_tests.cpp | 1 - src/flog.h | 3 - src/function.h | 1 - src/history.cpp | 1 - src/parse_constants.h | 72 +-- src/parse_execution.cpp | 22 +- src/parse_execution.h | 5 +- src/parse_grammar.h | 401 -------------- src/parse_grammar_elements.inc | 37 -- src/parse_productions.cpp | 466 ---------------- src/parse_productions.h | 50 -- src/parse_tree.cpp | 956 +-------------------------------- src/parse_tree.h | 132 ----- src/parse_util.cpp | 2 - src/parse_util.h | 1 - src/parser.cpp | 1 - src/proc.h | 1 - src/reader.cpp | 1 - src/tnode.cpp | 152 ------ src/tnode.h | 278 ---------- 23 files changed, 14 insertions(+), 2575 deletions(-) delete mode 100644 src/parse_grammar.h delete mode 100644 src/parse_grammar_elements.inc delete mode 100644 src/parse_productions.cpp delete mode 100644 src/parse_productions.h delete mode 100644 src/tnode.cpp delete mode 100644 src/tnode.h diff --git a/CMakeLists.txt b/CMakeLists.txt index a360bde47..1d0430429 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -114,10 +114,10 @@ set(FISH_SRCS src/fallback.cpp src/fish_version.cpp src/function.cpp src/highlight.cpp src/history.cpp src/history_file.cpp src/input.cpp src/input_common.cpp src/intern.cpp src/io.cpp src/iothread.cpp src/kill.cpp src/output.cpp src/pager.cpp - src/parse_execution.cpp src/parse_productions.cpp src/parse_tree.cpp + src/parse_execution.cpp src/parse_tree.cpp src/parse_util.cpp src/parser.cpp src/parser_keywords.cpp src/path.cpp src/postfork.cpp src/proc.cpp src/reader.cpp src/sanity.cpp src/screen.cpp - src/signal.cpp src/tinyexpr.cpp src/tnode.cpp src/tokenizer.cpp src/utf8.cpp src/util.cpp + src/signal.cpp src/tinyexpr.cpp src/tokenizer.cpp src/utf8.cpp src/util.cpp src/wcstringutil.cpp src/wgetopt.cpp src/wildcard.cpp src/wutil.cpp src/future_feature_flags.cpp src/redirection.cpp src/topic_monitor.cpp src/flog.cpp src/trace.cpp src/timer.cpp src/null_terminated_array.cpp diff --git a/src/complete.cpp b/src/complete.cpp index 8f694512f..ba245581a 100644 --- a/src/complete.cpp +++ b/src/complete.cpp @@ -45,7 +45,6 @@ #include "path.h" #include "proc.h" #include "reader.h" -#include "tnode.h" #include "util.h" #include "wcstringutil.h" #include "wildcard.h" diff --git a/src/fish_indent.cpp b/src/fish_indent.cpp index 491f99e49..e52625152 100644 --- a/src/fish_indent.cpp +++ b/src/fish_indent.cpp @@ -46,7 +46,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA #include "parse_constants.h" #include "parse_util.h" #include "print_help.h" -#include "tnode.h" #include "wutil.h" // IWYU pragma: keep // The number of spaces per indent isn't supposed to be configurable. diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index 5aa171a8b..d4f094910 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -76,7 +76,6 @@ #include "signal.h" #include "termsize.h" #include "timer.h" -#include "tnode.h" #include "tokenizer.h" #include "topic_monitor.h" #include "utf8.h" diff --git a/src/flog.h b/src/flog.h index dcf5fd3cf..eed0c0771 100644 --- a/src/flog.h +++ b/src/flog.h @@ -64,9 +64,6 @@ class category_list_t { category_t exec_fork{L"exec-fork", L"Calls to fork()"}; category_t output_invalid{L"output-invalid", L"Trying to print invalid output"}; - category_t parse_productions{L"parse-productions", L"Resolving tokens"}; - category_t parse_productions_chatty{L"parse-productions-chatty", - L"Resolving tokens (chatty messages)"}; category_t ast_construction{L"ast-construction", L"Parsing fish AST"}; category_t proc_job_run{L"proc-job-run", L"Jobs getting started or continued"}; diff --git a/src/function.h b/src/function.h index 2de5f3081..ba7b2e05a 100644 --- a/src/function.h +++ b/src/function.h @@ -11,7 +11,6 @@ #include "env.h" #include "event.h" #include "parse_tree.h" -#include "tnode.h" class parser_t; diff --git a/src/history.cpp b/src/history.cpp index d069a8278..39fd62965 100644 --- a/src/history.cpp +++ b/src/history.cpp @@ -45,7 +45,6 @@ #include "parser.h" #include "path.h" #include "reader.h" -#include "tnode.h" #include "wcstringutil.h" #include "wildcard.h" // IWYU pragma: keep #include "wutil.h" // IWYU pragma: keep diff --git a/src/parse_constants.h b/src/parse_constants.h index e41992c50..454067f9d 100644 --- a/src/parse_constants.h +++ b/src/parse_constants.h @@ -6,7 +6,6 @@ #include "common.h" -#define PARSE_ASSERT(a) assert(a) #define PARSER_DIE() \ do { \ FLOG(error, L"Parser dying!"); \ @@ -27,44 +26,7 @@ struct source_range_t { // IMPORTANT: If the following enum table is modified you must also update token_enum_map below. enum parse_token_type_t : uint8_t { token_type_invalid = 1, - // Non-terminal tokens - symbol_job_list, - symbol_job_conjunction, - symbol_job_conjunction_continuation, - symbol_job_decorator, - symbol_job, - symbol_job_continuation, - symbol_statement, - symbol_block_statement, - symbol_block_header, - symbol_for_header, - symbol_while_header, - symbol_begin_header, - symbol_function_header, - symbol_if_statement, - symbol_if_clause, - symbol_else_clause, - symbol_else_continuation, - symbol_switch_statement, - symbol_case_item_list, - symbol_case_item, - symbol_not_statement, - symbol_decorated_statement, - symbol_plain_statement, - symbol_variable_assignment, - symbol_variable_assignments, - symbol_arguments_or_redirections_list, - symbol_andor_job_list, - symbol_argument_list, - // Freestanding argument lists are parsed from the argument list supplied to 'complete -a'. - // They are not generated by parse trees rooted in symbol_job_list. - symbol_freestanding_argument_list, - symbol_argument, - symbol_redirection, - symbol_optional_background, - symbol_optional_newlines, - symbol_optional_time, - symbol_end_command, + // Terminal types. parse_token_type_string, parse_token_type_pipe, @@ -79,13 +41,6 @@ enum parse_token_type_t : uint8_t { parse_special_type_parse_error, parse_special_type_tokenizer_error, parse_special_type_comment, - - LAST_TOKEN_TYPE = parse_special_type_comment, - FIRST_TERMINAL_TYPE = parse_token_type_string, - LAST_TERMINAL_TYPE = parse_token_type_terminate, - LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate, - FIRST_PARSE_TOKEN_TYPE = parse_token_type_string, - LAST_PARSE_TOKEN_TYPE = parse_token_type_end }; const enum_map token_enum_map[] = { @@ -100,9 +55,6 @@ const enum_map token_enum_map[] = { {parse_token_type_andand, L"parse_token_type_andand"}, {parse_token_type_oror, L"parse_token_type_oror"}, {parse_token_type_terminate, L"parse_token_type_terminate"}, -// Define all symbols -#define ELEM(sym) {symbol_##sym, L"symbol_" #sym}, -#include "parse_grammar_elements.inc" {token_type_invalid, L"token_type_invalid"}, {token_type_invalid, nullptr}}; #define token_enum_map_len (sizeof token_enum_map / sizeof *token_enum_map) @@ -158,7 +110,7 @@ const enum_map keyword_enum_map[] = {{parse_keyword_t::kw_excla // Node tag values. -// Statement decorations, stored in node tag. +// Statement decorations. enum parse_statement_decoration_t { parse_statement_decoration_none, parse_statement_decoration_command, @@ -166,19 +118,6 @@ enum parse_statement_decoration_t { parse_statement_decoration_exec, }; -// Job decorations, stored in node tag. -enum parse_job_decoration_t { - parse_job_decoration_none, - parse_job_decoration_and, - parse_job_decoration_or, -}; - -// Whether a statement is backgrounded. -enum parse_optional_background_t { parse_no_background, parse_background }; - -// Whether a job is prefixed with "time". -enum parse_optional_time_t { parse_optional_time_no_time, parse_optional_time_time }; - // Parse error code list. enum parse_error_code_t { parse_error_none, @@ -255,6 +194,13 @@ wcstring token_type_user_presentable_description(parse_token_type_t type, /// errors in a substring of a larger source buffer. void parse_error_offset_source_start(parse_error_list_t *errors, size_t amt); +// The location of a pipeline. +enum class pipeline_position_t { + none, // not part of a pipeline + first, // first command in a pipeline + subsequent // second or further command in a pipeline +}; + /// Maximum number of function calls. #define FISH_MAX_STACK_DEPTH 128 diff --git a/src/parse_execution.cpp b/src/parse_execution.cpp index 22f4669e4..71b1a36a3 100644 --- a/src/parse_execution.cpp +++ b/src/parse_execution.cpp @@ -1,11 +1,4 @@ -// Provides the "linkage" between a parse_node_tree_t and actual execution structures (job_t, etc.) -// -// A note on error handling: fish has two kind of errors, fatal parse errors non-fatal runtime -// errors. A fatal error prevents execution of the entire file, while a non-fatal error skips that -// job. -// -// Non-fatal errors are printed as soon as they are encountered; otherwise you would have to wait -// for the execution to finish to see them. +// Provides the "linkage" between an ast and actual execution structures (job_t, etc.) #include "config.h" // IWYU pragma: keep #include "parse_execution.h" @@ -1416,19 +1409,6 @@ end_execution_reason_t parse_execution_context_t::run_job_conjunction( return result; } -bool parse_execution_context_t::should_skip(parse_job_decoration_t type) const { - switch (type) { - case parse_job_decoration_and: - // AND. Skip if the last job failed. - return parser->get_last_status() != 0; - case parse_job_decoration_or: - // OR. Skip if the last job succeeded. - return parser->get_last_status() == 0; - default: - return false; - } -} - end_execution_reason_t parse_execution_context_t::test_and_run_1_job_conjunction( const ast::job_conjunction_t &jc, const block_t *associated_block) { // Test this job conjunction if it has an 'and' or 'or' decorator. diff --git a/src/parse_execution.h b/src/parse_execution.h index fe91ba902..0775fcb96 100644 --- a/src/parse_execution.h +++ b/src/parse_execution.h @@ -1,4 +1,4 @@ -// Provides the "linkage" between a parse_node_tree_t and actual execution structures (job_t, etc.). +// Provides the "linkage" between an ast and actual execution structures (job_t, etc.). #ifndef FISH_PARSE_EXECUTION_H #define FISH_PARSE_EXECUTION_H @@ -79,9 +79,6 @@ class parse_execution_context_t { end_execution_reason_t expand_command(const ast::decorated_statement_t &statement, wcstring *out_cmd, wcstring_list_t *out_args) const; - /// Return whether we should skip a job with the given bool statement type. - bool should_skip(parse_job_decoration_t type) const; - /// Indicates whether a job is a simple block (one block, no redirections). bool job_is_simple_block(const ast::job_t &job) const; diff --git a/src/parse_grammar.h b/src/parse_grammar.h deleted file mode 100644 index c89fd8816..000000000 --- a/src/parse_grammar.h +++ /dev/null @@ -1,401 +0,0 @@ -// Programmatic representation of fish grammar -#ifndef FISH_PARSE_GRAMMAR_H -#define FISH_PARSE_GRAMMAR_H - -#include -#include -#include - -#include "parse_constants.h" -#include "tokenizer.h" - -struct parse_token_t; -typedef uint8_t parse_node_tag_t; - -using parse_node_tag_t = uint8_t; -struct parse_token_t; -namespace grammar { - -using production_element_t = uint8_t; - -enum { - // The maximum length of any seq production. - MAX_PRODUCTION_LENGTH = 6 -}; - -// Define primitive types. -template -struct primitive { - using type_tuple = std::tuple<>; - static constexpr parse_token_type_t token = Token; - static constexpr production_element_t element() { return Token; } -}; - -using tok_end = primitive; -using tok_string = primitive; -using tok_pipe = primitive; -using tok_background = primitive; -using tok_redirection = primitive; -using tok_andand = primitive; -using tok_oror = primitive; - -// Define keyword types. -template -struct keyword { - using type_tuple = std::tuple<>; - static constexpr parse_token_type_t token = parse_token_type_string; - static constexpr production_element_t element() { - // Convert a parse_keyword_t enum to a production_element_t enum. - return static_cast(Keyword) + LAST_TOKEN_OR_SYMBOL + 1; - } -}; - -// Define special types. -// Comments are not emitted as part of productions, but specially by the parser. -struct comment { - using type_tuple = std::tuple<>; - static constexpr parse_token_type_t token = parse_special_type_comment; -}; - -// Forward declare all the symbol types. -#define ELEM(T) struct T; -#include "parse_grammar_elements.inc" - -// A production is a sequence of production elements. -// +1 to hold the terminating token_type_invalid -template -using production_t = std::array; - -// This is an ugly hack to avoid ODR violations -// Given some type, return a pointer to its production. -template -const production_element_t *production_for() { - static constexpr auto prod = T::production; - return prod.data(); -} - -// Get some production element. -template -constexpr production_element_t element() { - return T::element(); -} - -// Template goo. -namespace detail { -template -struct tuple_contains; - -template -struct tuple_contains> : std::false_type {}; - -template -struct tuple_contains> : tuple_contains> {}; - -template -struct tuple_contains> : std::true_type {}; - -struct void_type { - using type = void; -}; - -// Support for checking whether the index N is valid for T::type_tuple. -template -static constexpr bool index_valid() { - return N < std::tuple_size::value; -} - -// Get the Nth type of T::type_tuple. -template -using tuple_element = std::tuple_element; - -// Get the Nth type of T::type_tuple, or void if N is out of bounds. -template -using tuple_element_or_void = - typename std::conditional(), tuple_element, void_type>::type::type; - -// Make a tuple by mapping the Nth item of a list of 'seq's. -template -struct tuple_nther { - // A tuple of the Nth types of tuples (or voids). - using type = std::tuple...>; -}; - -// Given a list of Options, each one a seq, check to see if any of them contain type Desired at -// index Index. -template -inline constexpr bool type_possible() { - using nths = typename tuple_nther::type; - return tuple_contains::value; -} -} // namespace detail - -// Partial specialization hack. -#define ELEM(T) \ - template <> \ - constexpr production_element_t element() { \ - return symbol_##T; \ - } -#include "parse_grammar_elements.inc" - -// Empty produces nothing. -struct empty { - using type_tuple = std::tuple<>; - static constexpr production_t<0> production = {{token_type_invalid}}; - static const production_element_t *resolve(const parse_token_t &, const parse_token_t &, - parse_node_tag_t *) { - return production_for(); - } -}; - -// Sequence represents a list of (at least two) productions. -template -struct seq { - static constexpr production_t<1 + sizeof...(Ts)> production = { - {element(), element()..., token_type_invalid}}; - - static_assert(1 + sizeof...(Ts) <= MAX_PRODUCTION_LENGTH, "MAX_PRODUCTION_LENGTH too small"); - - using type_tuple = std::tuple; - - template - static constexpr bool type_possible() { - using element_t = detail::tuple_element_or_void; - return std::is_same::value; - } - - static const production_element_t *resolve(const parse_token_t &, const parse_token_t &, - parse_node_tag_t *) { - return production_for(); - } -}; - -template -using produces_sequence = seq; - -// Ergonomic way to create a production for a single element. -template -using single = seq; - -template -using produces_single = single; - -// Alternative represents a choice. -struct alternative {}; - -// Following are the grammar productions. -#define BODY(T) static constexpr parse_token_type_t token = symbol_##T; - -#define DEF(T) struct T : public - -#define DEF_ALT(T) struct T : public alternative -#define ALT_BODY(T, ...) \ - BODY(T) \ - using type_tuple = std::tuple<>; \ - template \ - static constexpr bool type_possible() { \ - return detail::type_possible(); \ - } \ - static const production_element_t *resolve(const parse_token_t &, const parse_token_t &, \ - parse_node_tag_t *) - -// A job_list is a list of job_conjunctions, separated by semicolons or newlines -DEF_ALT(job_list) { - using normal = seq; - using empty_line = seq; - using empty = grammar::empty; - ALT_BODY(job_list, normal, empty_line, empty); -}; - -// Job decorators are 'and' and 'or'. These apply to the whole job. -DEF_ALT(job_decorator) { - using ands = single>; - using ors = single>; - using empty = grammar::empty; - ALT_BODY(job_decorator, ands, ors, empty); -}; - -// A job_conjunction is a job followed by a continuation. -DEF(job_conjunction) produces_sequence{BODY(job_conjunction)}; - -DEF_ALT(job_conjunction_continuation) { - using andands = seq; - using orors = seq; - using empty = grammar::empty; - ALT_BODY(job_conjunction_continuation, andands, orors, empty); -}; - -/// The time builtin. -DEF_ALT(optional_time) { - using empty = grammar::empty; - using time = single>; - ALT_BODY(optional_time, empty, time); -}; - -// A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases -// like if statements, where we require a command). To represent "non-empty", we require a -// statement, followed by a possibly empty job_continuation, and then optionally a background -// specifier '&' -DEF(job) -produces_sequence{BODY(job)}; - -DEF_ALT(job_continuation) { - using piped = - seq; - using empty = grammar::empty; - ALT_BODY(job_continuation, piped, empty); -}; - -// A list of assignments like HOME=$PWD -DEF_ALT(variable_assignments) { - using empty = grammar::empty; - using var = seq; - ALT_BODY(variable_assignments, empty, var); -}; -// A string token like VAR=value -DEF(variable_assignment) produces_single{BODY(variable_assignment)}; - -// A statement is a normal command, or an if / while / etc -DEF_ALT(statement) { - using nots = single; - using block = single; - using ifs = single; - using switchs = single; - using decorated = single; - ALT_BODY(statement, nots, block, ifs, switchs, decorated); -}; - -// A block is a conditional, loop, or begin/end -DEF(if_statement) -produces_sequence{ - BODY(if_statement)}; - -DEF(if_clause) -produces_sequence, job_conjunction, tok_end, andor_job_list, - job_list>{BODY(if_clause)}; - -DEF_ALT(else_clause) { - using empty = grammar::empty; - using else_cont = seq, else_continuation>; - ALT_BODY(else_clause, empty, else_cont); -}; - -DEF_ALT(else_continuation) { - using else_if = seq; - using else_only = seq; - ALT_BODY(else_continuation, else_if, else_only); -}; - -DEF(switch_statement) -produces_sequence, argument, tok_end, case_item_list, - end_command, arguments_or_redirections_list>{BODY(switch_statement)}; - -DEF_ALT(case_item_list) { - using empty = grammar::empty; - using case_items = seq; - using blank_line = seq; - ALT_BODY(case_item_list, empty, case_items, blank_line); -}; - -DEF(case_item) -produces_sequence, argument_list, tok_end, job_list>{ - BODY(case_item)}; - -DEF(block_statement) -produces_sequence{ - BODY(block_statement)}; - -DEF_ALT(block_header) { - using forh = single; - using whileh = single; - using funch = single; - using beginh = single; - ALT_BODY(block_header, forh, whileh, funch, beginh); -}; - -DEF(for_header) -produces_sequence, tok_string, keyword, - argument_list, tok_end>{BODY(for_header)}; - -DEF(while_header) -produces_sequence, job_conjunction, tok_end, andor_job_list>{ - BODY(while_header)}; - -DEF(begin_header) produces_single>{BODY(begin_header)}; - -// Functions take arguments, and require at least one (the name). No redirections allowed. -DEF(function_header) -produces_sequence, argument, argument_list, tok_end>{ - BODY(function_header)}; - -DEF_ALT(not_statement) { - using nots = - seq, variable_assignments, optional_time, statement>; - using exclams = - seq, variable_assignments, optional_time, statement>; - ALT_BODY(not_statement, nots, exclams); -}; - -// An andor_job_list is zero or more job lists, where each starts with an `and` or `or` boolean -// statement. -DEF_ALT(andor_job_list) { - using empty = grammar::empty; - using andor_job = seq; - using empty_line = seq; - ALT_BODY(andor_job_list, empty, andor_job, empty_line); -}; - -// A decorated_statement is a command with a list of arguments_or_redirections, possibly with -// "builtin" or "command" or "exec" -DEF_ALT(decorated_statement) { - using plains = single; - using cmds = seq, plain_statement>; - using builtins = seq, plain_statement>; - using execs = seq, plain_statement>; - ALT_BODY(decorated_statement, plains, cmds, builtins, execs); -}; - -DEF(plain_statement) -produces_sequence{BODY(plain_statement)}; - -DEF_ALT(argument_list) { - using empty = grammar::empty; - using arg = seq; - ALT_BODY(argument_list, empty, arg); -}; - -DEF_ALT(arguments_or_redirections_list) { - using empty = grammar::empty; - using arg = seq; - using redir = seq; - ALT_BODY(arguments_or_redirections_list, empty, arg, redir); -}; - -DEF(argument) produces_single{BODY(argument)}; -DEF(redirection) produces_sequence{BODY(redirection)}; - -DEF_ALT(optional_background) { - using empty = grammar::empty; - using background = single; - ALT_BODY(optional_background, empty, background); -}; - -DEF(end_command) produces_single>{BODY(end_command)}; - -// Note optional_newlines only allows newline-style tok_end, not semicolons. -DEF_ALT(optional_newlines) { - using empty = grammar::empty; - using newlines = seq; - ALT_BODY(optional_newlines, empty, newlines); -}; - -// A freestanding_argument_list is equivalent to a normal argument list, except it may contain -// TOK_END (newlines, and even semicolons, for historical reasons) -DEF_ALT(freestanding_argument_list) { - using empty = grammar::empty; - using arg = seq; - using semicolon = seq; - ALT_BODY(freestanding_argument_list, empty, arg, semicolon); -}; -} // namespace grammar -#endif diff --git a/src/parse_grammar_elements.inc b/src/parse_grammar_elements.inc deleted file mode 100644 index 621792b7a..000000000 --- a/src/parse_grammar_elements.inc +++ /dev/null @@ -1,37 +0,0 @@ -// Define ELEM before including this file. -ELEM(job_list) -ELEM(job) -ELEM(job_decorator) -ELEM(job_conjunction) -ELEM(job_conjunction_continuation) -ELEM(job_continuation) -ELEM(statement) -ELEM(if_statement) -ELEM(if_clause) -ELEM(else_clause) -ELEM(else_continuation) -ELEM(switch_statement) -ELEM(case_item_list) -ELEM(case_item) -ELEM(block_statement) -ELEM(block_header) -ELEM(for_header) -ELEM(while_header) -ELEM(begin_header) -ELEM(function_header) -ELEM(not_statement) -ELEM(andor_job_list) -ELEM(decorated_statement) -ELEM(variable_assignment) -ELEM(variable_assignments) -ELEM(plain_statement) -ELEM(argument_list) -ELEM(arguments_or_redirections_list) -ELEM(argument) -ELEM(redirection) -ELEM(optional_background) -ELEM(optional_newlines) -ELEM(optional_time) -ELEM(end_command) -ELEM(freestanding_argument_list) -#undef ELEM diff --git a/src/parse_productions.cpp b/src/parse_productions.cpp deleted file mode 100644 index dee4a7ac9..000000000 --- a/src/parse_productions.cpp +++ /dev/null @@ -1,466 +0,0 @@ -#include "config.h" // IWYU pragma: keep - -#include "parse_productions.h" - -#include - -#include "common.h" -#include "flog.h" -#include "parse_constants.h" -#include "parse_grammar.h" -#include "parse_tree.h" - -using namespace parse_productions; -using namespace grammar; - -#define NO_PRODUCTION nullptr - -// Herein are encoded the productions for our LL2 fish grammar. -// -// Each symbol (e.g. symbol_job_list) has a corresponding function (e.g. resolve_job_list). The -// function accepts two tokens, representing the first and second lookahead, and returns a -// production representing the rule, or NULL on error. There is also a tag value which is returned -// by reference; the tag is a sort of node annotation. -// -// Productions are generally a static const array, and we return a pointer to the array (yes, -// really). - -#define RESOLVE(SYM) \ - const production_element_t *SYM::resolve( \ - const parse_token_t &token1, const parse_token_t &token2, parse_node_tag_t *out_tag) - -/// A job_list is a list of jobs, separated by semicolons or newlines. -RESOLVE(job_list) { - UNUSED(token2); - UNUSED(out_tag); - - switch (token1.type) { - case parse_token_type_string: { - // Some keywords are special. - switch (token1.keyword) { - case parse_keyword_t::kw_end: - case parse_keyword_t::kw_else: - case parse_keyword_t::kw_case: { - return production_for(); // end this job list - } - default: { - return production_for(); // normal string - } - } - } - case parse_token_type_pipe: - case parse_token_type_redirection: - case parse_token_type_background: { - return production_for(); - } - case parse_token_type_end: { - return production_for(); - } - case parse_token_type_terminate: { - return production_for(); // no more commands, just transition to empty - } - default: { - return NO_PRODUCTION; - } - } -} - -// A job decorator is AND or OR -RESOLVE(job_decorator) { - // If it's followed by --help, it's not a decoration. - if (token2.is_help_argument) { - *out_tag = parse_job_decoration_none; - return production_for(); - } - - switch (token1.keyword) { - case parse_keyword_t::kw_and: { - *out_tag = parse_job_decoration_and; - return production_for(); - } - case parse_keyword_t::kw_or: { - *out_tag = parse_job_decoration_or; - return production_for(); - } - default: { - *out_tag = parse_job_decoration_none; - return production_for(); - } - } -} - -RESOLVE(job_conjunction_continuation) { - UNUSED(token2); - UNUSED(out_tag); - switch (token1.type) { - case parse_token_type_andand: - *out_tag = parse_job_decoration_and; - return production_for(); - case parse_token_type_oror: - *out_tag = parse_job_decoration_or; - return production_for(); - default: - return production_for(); - } -} - -RESOLVE(job_continuation) { - UNUSED(token2); - UNUSED(out_tag); - - switch (token1.type) { - case parse_token_type_pipe: { - return production_for(); // pipe, continuation - } - default: { - return production_for(); // not a pipe, no job continuation - } - } -} - -// A statement is a normal command, or an if / while / and etc. -RESOLVE(statement) { - UNUSED(out_tag); - - // The only block-like builtin that takes any parameters is 'function' So go to decorated - // statements if the subsequent token looks like '--'. The logic here is subtle: - // - // If we are 'begin', then we expect to be invoked with no arguments. - // If we are 'function', then we are a non-block if we are invoked with -h or --help - // If we are anything else, we require an argument, so do the same thing if the subsequent token - // is a statement terminator. - if (token1.type == parse_token_type_string) { - // If we are a function, then look for help arguments. Otherwise, if the next token looks - // like an option (starts with a dash), then parse it as a decorated statement. - if (token1.keyword == parse_keyword_t::kw_function && token2.is_help_argument) { - return production_for(); - } else if (token1.keyword != parse_keyword_t::kw_function && token2.has_dash_prefix) { - return production_for(); - } - - // Likewise if the next token doesn't look like an argument at all. This corresponds to e.g. - // a "naked if". - bool naked_invocation_invokes_help = (token1.keyword != parse_keyword_t::kw_begin && - token1.keyword != parse_keyword_t::kw_end); - if (naked_invocation_invokes_help && - (token2.type == parse_token_type_end || token2.type == parse_token_type_terminate)) { - return production_for(); - } - } - - switch (token1.type) { - case parse_token_type_string: { - switch (token1.keyword) { - case parse_keyword_t::kw_not: - case parse_keyword_t::kw_exclam: { - return production_for(); - } - case parse_keyword_t::kw_for: - case parse_keyword_t::kw_while: - case parse_keyword_t::kw_function: - case parse_keyword_t::kw_begin: { - return production_for(); - } - case parse_keyword_t::kw_if: { - return production_for(); - } - case parse_keyword_t::kw_else: { - return NO_PRODUCTION; - } - case parse_keyword_t::kw_switch: { - return production_for(); - } - case parse_keyword_t::kw_end: { - return NO_PRODUCTION; - } - // All other keywords fall through to decorated statement. - default: { - return production_for(); - } - } - } - case parse_token_type_pipe: - case parse_token_type_redirection: - case parse_token_type_background: - case parse_token_type_terminate: { - return NO_PRODUCTION; - } - default: { - return NO_PRODUCTION; - } - } -} - -RESOLVE(else_clause) { - UNUSED(token2); - UNUSED(out_tag); - - switch (token1.keyword) { - case parse_keyword_t::kw_else: { - return production_for(); - } - default: { - return production_for(); - } - } -} - -RESOLVE(else_continuation) { - UNUSED(token2); - UNUSED(out_tag); - - switch (token1.keyword) { - case parse_keyword_t::kw_if: { - return production_for(); - } - default: { - return production_for(); - } - } -} - -RESOLVE(case_item_list) { - UNUSED(token2); - UNUSED(out_tag); - - if (token1.keyword == parse_keyword_t::kw_case) - return production_for(); - else if (token1.type == parse_token_type_end) - return production_for(); - else - return production_for(); -} - -RESOLVE(not_statement) { - UNUSED(token2); - UNUSED(out_tag); - switch (token1.keyword) { - case parse_keyword_t::kw_not: - return production_for(); - case parse_keyword_t::kw_exclam: - return production_for(); - default: - return NO_PRODUCTION; - } -} - -RESOLVE(andor_job_list) { - UNUSED(out_tag); - - if (token1.type == parse_token_type_end) { - return production_for(); - } else if (token1.keyword == parse_keyword_t::kw_and || - token1.keyword == parse_keyword_t::kw_or) { - // Check that the argument to and/or is a string that's not help. Otherwise it's either 'and - // --help' or a naked 'and', and not part of this list. - if (token2.type == parse_token_type_string && !token2.is_help_argument) { - return production_for(); - } - } - // All other cases end the list. - return production_for(); -} - -RESOLVE(argument_list) { - UNUSED(token2); - UNUSED(out_tag); - switch (token1.type) { - case parse_token_type_string: { - return production_for(); - } - default: { - return production_for(); - } - } -} - -RESOLVE(freestanding_argument_list) { - UNUSED(token2); - UNUSED(out_tag); - - switch (token1.type) { - case parse_token_type_string: { - return production_for(); - } - case parse_token_type_end: { - return production_for(); - } - default: { - return production_for(); - } - } -} - -RESOLVE(block_header) { - UNUSED(token2); - UNUSED(out_tag); - - switch (token1.keyword) { - case parse_keyword_t::kw_for: { - return production_for(); - } - case parse_keyword_t::kw_while: { - return production_for(); - } - case parse_keyword_t::kw_function: { - return production_for(); - } - case parse_keyword_t::kw_begin: { - return production_for(); - } - default: { - return NO_PRODUCTION; - } - } -} - -RESOLVE(variable_assignments) { - UNUSED(token2); - UNUSED(out_tag); - if (token1.may_be_variable_assignment) { - assert(token1.type == parse_token_type_string); - return production_for(); - } - return production_for(); -} - -RESOLVE(decorated_statement) { - // and/or are typically parsed in job_conjunction at the beginning of a job - // However they may be reached here through e.g. true && and false. - // Refuse to parse them as a command except for --help. See #6089. - if ((token1.keyword == parse_keyword_t::kw_and || token1.keyword == parse_keyword_t::kw_or) && - !token2.is_help_argument) { - return NO_PRODUCTION; - } - - // If this is e.g. 'command --help' then the command is 'command' and not a decoration. If the - // second token is not a string, then this is a naked 'command' and we should execute it as - // undecorated. - if (token2.type != parse_token_type_string || token2.has_dash_prefix) { - return production_for(); - } - - switch (token1.keyword) { - case parse_keyword_t::kw_command: { - *out_tag = parse_statement_decoration_command; - return production_for(); - } - case parse_keyword_t::kw_builtin: { - *out_tag = parse_statement_decoration_builtin; - return production_for(); - } - case parse_keyword_t::kw_exec: { - *out_tag = parse_statement_decoration_exec; - return production_for(); - } - default: { - *out_tag = parse_statement_decoration_none; - return production_for(); - } - } -} - -RESOLVE(arguments_or_redirections_list) { - UNUSED(token2); - UNUSED(out_tag); - - switch (token1.type) { - case parse_token_type_string: - return production_for(); - case parse_token_type_redirection: - return production_for(); - default: - return production_for(); - } -} - -RESOLVE(optional_newlines) { - UNUSED(token2); - UNUSED(out_tag); - if (token1.is_newline) return production_for(); - return production_for(); -} - -RESOLVE(optional_background) { - UNUSED(token2); - - switch (token1.type) { - case parse_token_type_background: { - *out_tag = parse_background; - return production_for(); - } - default: { - *out_tag = parse_no_background; - return production_for(); - } - } -} - -RESOLVE(optional_time) { - if (token1.keyword == parse_keyword_t::kw_time && !token2.is_help_argument) { - *out_tag = parse_optional_time_time; - return production_for