From 45c9e3b0f176bbd5e487b95c4f20e6eec00ea7ca Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Sun, 28 Jun 2020 16:53:58 -0700
Subject: [PATCH 01/13] parsed_source_ref to always make a job_list

Removed an unnecessary param in preparation for more changes.
---
 src/parse_tree.cpp | 6 +++---
 src/parse_tree.h   | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/parse_tree.cpp b/src/parse_tree.cpp
index 1373b1229..ee0281bbf 100644
--- a/src/parse_tree.cpp
+++ b/src/parse_tree.cpp
@@ -1215,10 +1215,10 @@ const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, nod
     return result;
 }
 
-parsed_source_ref_t parse_source(wcstring src, parse_tree_flags_t flags, parse_error_list_t *errors,
-                                 parse_token_type_t goal) {
+parsed_source_ref_t parse_source(wcstring src, parse_tree_flags_t flags,
+                                 parse_error_list_t *errors) {
     parse_node_tree_t tree;
-    if (!parse_tree_from_string(src, flags, &tree, errors, goal)) return {};
+    if (!parse_tree_from_string(src, flags, &tree, errors, symbol_job_list)) return {};
     return std::make_shared<parsed_source_t>(std::move(src), std::move(tree));
 }
 
diff --git a/src/parse_tree.h b/src/parse_tree.h
index 61082d227..3e98104fa 100644
--- a/src/parse_tree.h
+++ b/src/parse_tree.h
@@ -225,8 +225,8 @@ struct parsed_source_t {
 };
 /// Return a shared pointer to parsed_source_t, or null on failure.
 using parsed_source_ref_t = std::shared_ptr<const parsed_source_t>;
-parsed_source_ref_t parse_source(wcstring src, parse_tree_flags_t flags, parse_error_list_t *errors,
-                                 parse_token_type_t goal = symbol_job_list);
+parsed_source_ref_t parse_source(wcstring src, parse_tree_flags_t flags,
+                                 parse_error_list_t *errors);
 
 /// Error message for improper use of the exec builtin.
 #define EXEC_ERR_MSG _(L"The '%ls' command can not be used in a pipeline")

From 4d4455007d3928e7c81b8c43c07c973e83c86175 Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Sat, 20 Jun 2020 15:27:10 -0700
Subject: [PATCH 02/13] Introduce a new fish ast

This is the first commit of a series intended to replace the existing
"parse tree" machinery. It adds a new abstract syntax tree and uses a more
normal recursive descent parser.

Initially there are no users of the new ast. The following commits will
replace parse_tree -> ast for all usages.
---
 CMakeLists.txt          |    2 +-
 src/ast.cpp             | 1206 +++++++++++++++++++++++++++++++++++++++
 src/ast.h               | 1018 +++++++++++++++++++++++++++++++++
 src/ast_node_types.inc  |   60 ++
 src/fish_indent.cpp     |    7 +
 src/flog.h              |    1 +
 src/parse_constants.h   |   34 ++
 src/parse_grammar.h     |    2 +-
 src/parse_productions.h |    1 +
 src/parse_tree.cpp      |   11 +-
 src/parse_tree.h        |   38 +-
 11 files changed, 2350 insertions(+), 30 deletions(-)
 create mode 100644 src/ast.cpp
 create mode 100644 src/ast.h
 create mode 100644 src/ast_node_types.inc

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e5284f092..a360bde47 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -121,7 +121,7 @@ set(FISH_SRCS
     src/wcstringutil.cpp src/wgetopt.cpp src/wildcard.cpp src/wutil.cpp
     src/future_feature_flags.cpp src/redirection.cpp src/topic_monitor.cpp
     src/flog.cpp src/trace.cpp src/timer.cpp src/null_terminated_array.cpp
-    src/operation_context.cpp src/fd_monitor.cpp src/termsize.cpp
+    src/operation_context.cpp src/fd_monitor.cpp src/termsize.cpp src/ast.cpp
 )
 
 # Header files are just globbed.
diff --git a/src/ast.cpp b/src/ast.cpp
new file mode 100644
index 000000000..3f5013258
--- /dev/null
+++ b/src/ast.cpp
@@ -0,0 +1,1206 @@
+#include "config.h"  // IWYU pragma: keep
+
+#include "ast.h"
+
+#include <array>
+
+#include "common.h"
+#include "flog.h"
+#include "parse_constants.h"
+#include "parse_tree.h"
+#include "wutil.h"
+
+namespace {
+
+/// \return tokenizer flags corresponding to parse tree flags.
+static tok_flags_t tokenizer_flags_from_parse_flags(parse_tree_flags_t flags) {
+    tok_flags_t tok_flags = 0;
+    // Note we do not need to respect parse_flag_show_blank_lines, no clients are interested in
+    // them.
+    if (flags & parse_flag_include_comments) tok_flags |= TOK_SHOW_COMMENTS;
+    if (flags & parse_flag_accept_incomplete_tokens) tok_flags |= TOK_ACCEPT_UNFINISHED;
+    if (flags & parse_flag_continue_after_error) tok_flags |= TOK_CONTINUE_AFTER_ERROR;
+    return tok_flags;
+}
+
+/// A token stream generates a sequence of parser tokens, permitting arbitrary lookahead.
+class token_stream_t {
+   public:
+    explicit token_stream_t(const wcstring &src, parse_tree_flags_t flags)
+        : src_(src), tok_(src_.c_str(), tokenizer_flags_from_parse_flags(flags)) {}
+
+    /// \return the token at the given index, without popping it. If the token stream is exhausted,
+    /// it will have parse_token_type_terminate. idx = 0 means the next token, idx = 1 means the
+    /// next-next token, and so forth.
+    /// We must have that idx < kMaxLookahead.
+    const parse_token_t &peek(size_t idx = 0) {
+        assert(idx < kMaxLookahead && "Trying to look too far ahead");
+        while (idx >= count_) {
+            lookahead_.at(mask(start_ + count_)) = next_from_tok();
+            count_ += 1;
+        }
+        return lookahead_.at(mask(start_ + idx));
+    }
+
+    /// Pop the next token.
+    parse_token_t pop() {
+        if (count_ == 0) {
+            return next_from_tok();
+        }
+        parse_token_t result = std::move(lookahead_[start_]);
+        start_ = mask(start_ + 1);
+        count_ -= 1;
+        return result;
+    }
+
+    /// Provide the orignal source code.
+    const wcstring &source() const { return src_; }
+
+    /// Any comment nodes are collected here.
+    /// These are only collected if parse_flag_include_comments is set.
+    std::vector<source_range_t> comment_ranges;
+
+   private:
+    // Helper to mask our circular buffer.
+    static constexpr size_t mask(size_t idx) { return idx % kMaxLookahead; }
+
+    parse_token_t next_from_tok() {
+        for (;;) {
+            maybe_t<tok_t> tokenizer_tok{};
+            parse_token_t res = next_parse_token(&tok_, &tokenizer_tok, &storage_);
+            if (res.type == parse_special_type_comment) {
+                comment_ranges.push_back(res.range());
+                continue;
+            }
+            return res;
+        }
+    }
+
+    // The maximum number of lookahead supported.
+    static constexpr size_t kMaxLookahead = 2;
+
+    // We implement a queue with a simple circular buffer.
+    // Note that peek() returns an address, so we must not move elements which are peek'd.
+    // This prevents using vector (which may reallocate).
+    // Deque would work but is too heavyweight for just 2 items.
+    std::array<parse_token_t, kMaxLookahead> lookahead_ = {
+        {token_type_invalid, token_type_invalid}};
+
+    // Starting index in our lookahead.
+    // The "first" token is at this index.
+    size_t start_ = 0;
+
+    // Number of items in our lookahead.
+    size_t count_ = 0;
+
+    // A reference to the original source.
+    const wcstring &src_;
+
+    // The tokenizer to generate new tokens.
+    tokenizer_t tok_;
+
+    // Temporary storage.
+    wcstring storage_;
+};
+
+}  // namespace
+
+namespace ast {
+
+/// Given a node which we believe to be some sort of block statement, attempt to return a source
+/// range for the block's keyword (for, if, etc) and a user-presentable description. This is used to
+/// provide better error messages. \return {nullptr, nullptr} if we couldn't find it. Note at this
+/// point the parse tree is incomplete; in particular parent nodes are not set.
+static std::pair<source_range_t, const wchar_t *> find_block_open_keyword(const node_t *node) {
+    const node_t *cursor = node;
+    while (cursor != nullptr) {
+        switch (cursor->type) {
+            case type_t::block_statement:
+                cursor = cursor->as<block_statement_t>()->header.contents.get();
+                break;
+            case type_t::for_header: {
+                const auto *h = cursor->as<for_header_t>();
+                return std::make_pair(h->kw_for.range, L"for loop");
+            }
+            case type_t::while_header: {
+                const auto *h = cursor->as<while_header_t>();
+                return std::make_pair(h->kw_while.range, L"while loop");
+            }
+            case type_t::function_header: {
+                const auto *h = cursor->as<function_header_t>();
+                return std::make_pair(h->kw_function.range, L"function definition");
+            }
+            case type_t::begin_header: {
+                const auto *h = cursor->as<begin_header_t>();
+                return std::make_pair(h->kw_begin.range, L"begin");
+            }
+            case type_t::if_statement: {
+                const auto *h = cursor->as<if_statement_t>();
+                return std::make_pair(h->if_clause.kw_if.range, L"if statement");
+            }
+            case type_t::switch_statement: {
+                const auto *h = cursor->as<switch_statement_t>();
+                return std::make_pair(h->kw_switch.range, L"switch statement");
+            }
+            default:
+                return std::make_pair(source_range_t{}, nullptr);
+        }
+    }
+    return std::make_pair(source_range_t{}, nullptr);
+}
+
+/// \return the decoration for this statement.
+parse_statement_decoration_t decorated_statement_t::decoration() const {
+    if (!opt_decoration) {
+        return parse_statement_decoration_none;
+    }
+    switch (opt_decoration->kw) {
+        case parse_keyword_t::kw_command:
+            return parse_statement_decoration_command;
+        case parse_keyword_t::kw_builtin:
+            return parse_statement_decoration_builtin;
+        case parse_keyword_t::kw_exec:
+            return parse_statement_decoration_exec;
+        default:
+            assert(0 && "Unexpected keyword in statement decoration");
+            return parse_statement_decoration_none;
+    }
+}
+
+/// \return a string literal name for an ast type.
+const wchar_t *ast_type_to_string(type_t type) {
+    switch (type) {
+#define ELEM(T)     \
+    case type_t::T: \
+        return L"" #T;
+#include "ast_node_types.inc"
+    }
+    assert(0 && "unreachable");
+    return L"(unknown)";
+}
+
+wcstring node_t::describe() const {
+    wcstring res = ast_type_to_string(this->type);
+    if (const auto *n = this->try_as<token_base_t>()) {
+        append_format(res, L" '%ls'", token_type_description(n->type));
+    } else if (const auto *n = this->try_as<keyword_base_t>()) {
+        append_format(res, L" '%ls'", keyword_description(n->kw));
+    }
+    return res;
+}
+
+node_t::~node_t() = default;
+
+/// From C++14.
+template <bool B, typename T = void>
+using enable_if_t = typename std::enable_if<B, T>::type;
+
+struct source_range_visitor_t {
+    template <typename Node>
+    enable_if_t<Node::Category == category_t::leaf> visit(const Node &node) {
+        if (node.unsourced) any_unsourced = true;
+        // Union with our range.
+        if (node.range.length > 0) {
+            if (total.length == 0) {
+                total = node.range;
+            } else {
+                auto end =
+                    std::max(total.start + total.length, node.range.start + node.range.length);
+                total.start = std::min(total.start, node.range.start);
+                total.length = end - total.start;
+            }
+        }
+    }
+
+    // Other node types recurse.
+    template <typename Node>
+    enable_if_t<Node::Category != category_t::leaf> visit(const Node &node) {
+        node_visitor(*this).accept_children_of(node);
+    }
+
+    // Total range we have encountered.
+    source_range_t total{0, 0};
+
+    // Whether any node was found to be unsourced.
+    bool any_unsourced{false};
+};
+
+maybe_t<source_range_t> node_t::try_source_range() const {
+    source_range_visitor_t v;
+    node_visitor(v).accept(this);
+    if (v.any_unsourced) return none();
+    return v.total;
+}
+
+// Helper to describe a list of keywords.
+// TODO: these need to be localized properly.
+static wcstring keywords_user_presentable_description(std::initializer_list<parse_keyword_t> kws) {
+    assert(kws.size() > 0 && "Should not be empty list");
+    if (kws.size() == 1) {
+        return format_string(L"keyword '%ls'", keyword_description(*kws.begin()));
+    }
+    size_t idx = 0;
+    wcstring res = L"keywords ";
+    for (parse_keyword_t kw : kws) {
+        const wchar_t *optor = (idx++ ? L" or " : L"");
+        append_format(res, L"%ls'%ls'", optor, keyword_description(kw));
+    }
+    return res;
+}
+
+// Helper to describe a list of token types.
+// TODO: these need to be localized properly.
+static wcstring token_types_user_presentable_description(
+    std::initializer_list<parse_token_type_t> types) {
+    assert(types.size() > 0 && "Should not be empty list");
+    if (types.size() == 1) {
+        return token_type_user_presentable_description(*types.begin());
+    }
+    size_t idx = 0;
+    wcstring res;
+    for (parse_token_type_t type : types) {
+        const wchar_t *optor = (idx++ ? L" or " : L"");
+        append_format(res, L"%ls%ls", optor, token_type_user_presentable_description(type).c_str());
+    }
+    return res;
+}
+
+class ast_t::populator_t {
+    template <typename T>
+    using unique_ptr = std::unique_ptr<T>;
+
+   public:
+    // Populate \p ast from \p src and \p flags, returning errors (if not null).
+    populator_t(ast_t *ast, const wcstring &src, parse_tree_flags_t flags, type_t top_type,
+                parse_error_list_t *out_errors)
+        : ast_(ast),
+          flags_(flags),
+          tokens_(src, flags),
+          top_type_(top_type),
+          out_errors_(out_errors) {
+        assert((top_type == type_t::job_list || top_type == type_t::freestanding_argument_list) &&
+               "Invalid top type");
+        if (top_type == type_t::job_list) {
+            unique_ptr<job_list_t> list = allocate<job_list_t>();
+            this->populate_list(*list, true /* exhaust_stream */);
+            this->ast_->top_ = std::move(list);
+        } else {
+            unique_ptr<freestanding_argument_list_t> list =
+                allocate<freestanding_argument_list_t>();
+            this->populate_list(list->arguments, true /* exhaust_stream */);
+            this->ast_->top_ = std::move(list);
+        }
+        // Chomp trailing extras, etc.
+        chomp_extras(type_t::job_list);
+
+        // Acquire any comments.
+        this->ast_->extras_.comments = std::move(tokens_.comment_ranges);
+
+        assert(this->ast_->top_ && "Should have parsed a node");
+    }
+
+    // Given a node type, allocate it and invoke its default constructor.
+    // \return the resulting Node pointer. It is never null.
+    template <typename Node>
+    unique_ptr<Node> allocate() {
+        unique_ptr<Node> node = make_unique<Node>();
+        FLOGF(ast_construction, L"%*smake %ls %p", spaces(), "", ast_type_to_string(Node::AstType),
+              node.get());
+        return node;
+    }
+
+    // Given a node type, allocate it, invoke its default constructor,
+    // and then visit it as a field.
+    // \return the resulting Node pointer. It is never null.
+    template <typename Node>
+    unique_ptr<Node> allocate_visit() {
+        unique_ptr<Node> node = allocate<Node>();
+        this->visit_node_field(*node);
+        return node;
+    }
+
+    /// Helper for FLOGF. This returns a number of spaces appropriate for a '%*c' format.
+    int spaces() const { return static_cast<int>(visit_stack_.size() * 2); }
+
+    /// The status of our parser.
+    enum class status_t {
+        // Parsing is going just fine, thanks for asking.
+        ok,
+
+        // We have exhausted the token stream, but the caller was OK with an incomplete parse tree.
+        // All further leaf nodes should have the unsourced flag set.
+        unsourcing,
+
+        // We encountered an parse error and are "unwinding."
+        // Do not consume any tokens until we get back to a list type which stops unwinding.
+        unwinding,
+    };
+
+    /// \return the parser's status.
+    status_t status() {
+        if (unwinding_) {
+            return status_t::unwinding;
+        } else if ((flags_ & parse_flag_leave_unterminated) &&
+                   peek_type() == parse_token_type_terminate) {
+            return status_t::unsourcing;
+        }
+        return status_t::ok;
+    }
+
+    /// \return whether the status is unwinding.
+    /// This is more efficient than checking the status directly.
+    bool is_unwinding() { return unwinding_; }
+
+    /// \return whether any leaf nodes we visit should be marked as unsourced.
+    bool unsource_leaves() {
+        status_t s = status();
+        return s == status_t::unsourcing || s == status_t::unwinding;
+    }
+
+    /// \return whether we permit an incomplete parse tree.
+    bool allow_incomplete() const { return flags_ & parse_flag_leave_unterminated; }
+
+    /// This indicates a bug in fish code.
+    void internal_error(const char *func, const wchar_t *fmt, ...) const {
+        va_list va;
+        va_start(va, fmt);
+        wcstring msg = vformat_string(fmt, va);
+        va_end(va);
+
+        FLOG(debug, "Internal parse error from", func, "- this indicates a bug in fish.", msg);
+        FLOG(debug, "Encountered while parsing:<<<\n%ls\n>>>", tokens_.source().c_str());
+        abort();
+    }
+
+    /// \return whether a list type \p type allows arbitrary newlines in it.
+    bool list_type_chomps_newlines(type_t type) const {
+        switch (type) {
+            case type_t::argument_list:
+                // Hackish. If we are producing a freestanding argument list, then it allows
+                // semicolons, for hysterical raisins.
+                return top_type_ == type_t::freestanding_argument_list;
+
+            case type_t::argument_or_redirection_list:
+                // No newlines inside arguments.
+                return false;
+
+            case type_t::variable_assignment_list:
+                // No newlines inside variable assignment lists.
+                return false;
+
+            case type_t::job_list:
+                // Like echo a \n \n echo b
+                return true;
+
+            case type_t::case_item_list:
+                // Like switch foo \n \n \n case a \n end
+                return true;
+
+            case type_t::andor_job_list:
+                // Like while true ; \n \n and true ; end
+                return true;
+
+            case type_t::elseif_clause_list:
+                // Like if true ; \n \n else if false; end
+                return true;
+
+            case type_t::job_conjunction_continuation_list:
+                // This would be like echo a && echo b \n && echo c
+                // We could conceivably support this but do not now.
+                return false;
+
+            case type_t::job_continuation_list:
+                // This would be like echo a \n | echo b
+                // We could conceivably support this but do not now.
+                return false;
+
+            default:
+                internal_error(__FUNCTION__, L"Type %ls not handled", ast_type_to_string(type));
+                return false;
+        }
+    }
+
+    /// \return whether a list type \p type allows arbitrary semicolons in it.
+    bool list_type_chomps_semis(type_t type) const {
+        switch (type) {
+            case type_t::argument_list:
+                // Hackish. If we are producing a freestanding argument list, then it allows
+                // semicolons, for hysterical raisins.
+                // That is, this is OK: complete -c foo -a 'x ; y ; z'
+                // But this is not: foo x ; y ; z
+                return top_type_ == type_t::freestanding_argument_list;
+
+            case type_t::argument_or_redirection_list:
+            case type_t::variable_assignment_list:
+                return false;
+
+            case type_t::job_list:
+                // Like echo a ; ;  echo b
+                return true;
+
+            case type_t::case_item_list:
+                // Like switch foo ; ; ;  case a \n end
+                // This is historically allowed.
+                return true;
+
+            case type_t::andor_job_list:
+                // Like while true ; ; ;  and true ; end
+                return true;
+
+            case type_t::elseif_clause_list:
+                // Like if true ; ; ;  else if false; end
+                return false;
+
+            case type_t::job_conjunction_continuation_list:
+                // Like echo a ; ; && echo b. Not supported.
+                return false;
+
+            case type_t::job_continuation_list:
+                // This would be like echo a ; | echo b
+                // Not supported.
+                // We could conceivably support this but do not now.
+                return false;
+
+            default:
+                internal_error(__FUNCTION__, L"Type %ls not handled", ast_type_to_string(type));
+                return false;
+        }
+    }
+
+    // Chomp extra comments, semicolons, etc. for a given list type.
+    void chomp_extras(type_t type) {
+        bool chomp_semis = list_type_chomps_semis(type);
+        bool chomp_newlines = list_type_chomps_newlines(type);
+        for (;;) {
+            const auto &peek = this->tokens_.peek();
+            if (chomp_newlines && peek.type == parse_token_type_end && peek.is_newline) {
+                // Just skip this newline, no need to save it.
+                this->tokens_.pop();
+            } else if (chomp_semis && peek.type == parse_token_type_end && !peek.is_newline) {
+                auto tok = this->tokens_.pop();
+                // Perhaps save this extra semi.
+                if (flags_ & parse_flag_show_extra_semis) {
+                    ast_->extras_.semis.push_back(tok.range());
+                }
+            } else {
+                break;
+            }
+        }
+    }
+
+    /// \return whether a list type should recover from errors.s
+    /// That is, whether we should stop unwinding when we encounter this type.
+    bool list_type_stops_unwind(type_t type) const {
+        return type == type_t::job_list && (flags_ & parse_flag_continue_after_error);
+    }
+
+    /// Report an error based on \p fmt for the source range \p range.
+    void parse_error_impl(source_range_t range, parse_error_code_t code, const wchar_t *fmt,
+                          va_list va) {
+        ast_->any_error_ = true;
+
+        // Ignore additional parse errors while unwinding.
+        // These may come about e.g. from `true | and`.
+        if (unwinding_) return;
+        unwinding_ = true;
+
+        FLOGF(ast_construction, L"%*sparse error - begin unwinding", spaces(), "");
+        // TODO: can store this conditionally dependent on flags.
+        if (range.start != SOURCE_OFFSET_INVALID) {
+            ast_->extras_.errors.push_back(range);
+        }
+
+        if (out_errors_) {
+            parse_error_t err;
+            err.text = vformat_string(fmt, va);
+            err.code = code;
+            err.source_start = range.start;
+            err.source_length = range.length;
+            out_errors_->push_back(std::move(err));
+        }
+    }
+
+    /// Report an error based on \p fmt for the source range \p range.
+    void parse_error(source_range_t range, parse_error_code_t code, const wchar_t *fmt, ...) {
+        va_list va;
+        va_start(va, fmt);
+        parse_error_impl(range, code, fmt, va);
+        va_end(va);
+    }
+
+    /// Report an error based on \p fmt for the source range \p range.
+    void parse_error(const parse_token_t &token, parse_error_code_t code, const wchar_t *fmt, ...) {
+        va_list va;
+        va_start(va, fmt);
+        parse_error_impl(token.range(), code, fmt, va);
+        va_end(va);
+    }
+
+    // \return a reference to a non-comment token at index \p idx.
+    const parse_token_t &peek_token(size_t idx = 0) { return tokens_.peek(idx); }
+
+    // \return the type of a non-comment token.
+    parse_token_type_t peek_type(size_t idx = 0) { return peek_token(idx).type; }
+
+    // Consume the next token, chomping any comments.
+    // It is an error to call this unless we know there is a non-terminate token available.
+    // \return the token.
+    parse_token_t consume_any_token() {
+        parse_token_t tok = tokens_.pop();
+        assert(tok.type != parse_special_type_comment && "Should not be a comment");
+        assert(tok.type != parse_token_type_terminate &&
+               "Cannot consume terminate token, caller should check status first");
+        return tok;
+    }
+
+    // Consume the next token which is expected to be of the given type.
+    source_range_t consume_token_type(parse_token_type_t type) {
+        assert(type != parse_token_type_terminate &&
+               "Should not attempt to consume terminate token");
+        auto tok = consume_any_token();
+        if (tok.type != type) {
+            parse_error(tok, parse_error_generic, _(L"Expected %ls, but found %ls"),
+                        token_type_user_presentable_description(type).c_str(),
+                        tok.user_presentable_description().c_str());
+            return source_range_t{0, 0};
+        }
+        return tok.range();
+    }
+
+    // The next token could not be parsed at the top level.
+    // For example a trailing end like `begin ; end ; end`
+    // Or an unexpected redirection like `>`
+    // Consume it and add an error.
+    void consume_excess_token_generating_error() {
+        auto tok = consume_any_token();
+
+        // In the rare case that we are parsing a freestanding argument list and not a job list,
+        // generate a generic error.
+        // TODO: this is a crummy message if we get a tokenizer error, for example:
+        //   complete -c foo -a "'abc"
+        if (this->top_type_ == type_t::freestanding_argument_list) {
+            this->parse_error(
+                tok, parse_error_generic, _(L"Expected %ls, but found %ls"),
+                token_type_user_presentable_description(parse_token_type_string).c_str(),
+                tok.user_presentable_description().c_str());
+            return;
+        }
+
+        assert(this->top_type_ == type_t::job_list);
+        switch (tok.type) {
+            case parse_token_type_string:
+                // There are three keywords which end a job list.
+                switch (tok.keyword) {
+                    case parse_keyword_t::kw_end:
+                        this->parse_error(tok, parse_error_unbalancing_end,
+                                          _(L"'end' outside of a block"));
+                        break;
+                    case parse_keyword_t::kw_else:
+                        this->parse_error(tok, parse_error_unbalancing_else,
+                                          _(L"'else' builtin not inside of if block"));
+                        break;
+                    case parse_keyword_t::kw_case:
+                        this->parse_error(tok, parse_error_unbalancing_case,
+                                          _(L"'case' builtin not inside of switch block"));
+                        break;
+                    default:
+                        internal_error(__FUNCTION__,
+                                       L"Token %ls should not have prevented parsing a job list",
+                                       tok.user_presentable_description().c_str());
+                        break;
+                }
+                break;
+            case parse_token_type_pipe:
+            case parse_token_type_redirection:
+            case parse_token_type_background:
+            case parse_token_type_andand:
+            case parse_token_type_oror:
+                parse_error(tok, parse_error_generic, _(L"Expected a string, but found %ls"),
+                            tok.user_presentable_description().c_str());
+                break;
+
+            case parse_special_type_tokenizer_error:
+                parse_error(tok, parse_error_from_tokenizer_error(tok.tok_error), L"%ls",
+                            tokenizer_get_error_message(tok.tok_error));
+                break;
+
+            case parse_token_type_end:
+                internal_error(__FUNCTION__, L"End token should never be excess");
+                break;
+            case parse_token_type_terminate:
+                internal_error(__FUNCTION__, L"Terminate token should never be excess");
+                break;
+            default:
+                internal_error(__FUNCTION__, L"Unexpected excess token type: %ls",
+                               tok.user_presentable_description().c_str());
+                break;
+        }
+    }
+
+    // Our can_parse implementations are for optional values and for lists.
+    // A true return means we should descend into the production, false means stop.
+    // Note that the argument is always nullptr and should be ignored. It is provided strictly for
+    // overloading purposes.
+    bool can_parse(job_conjunction_t *) {
+        const auto &token = peek_token();
+        if (token.type != parse_token_type_string) return false;
+        switch (peek_token().keyword) {
+            case parse_keyword_t::kw_end:
+            case parse_keyword_t::kw_else:
+            case parse_keyword_t::kw_case:
+                // These end a job list.
+                return false;
+            case parse_keyword_t::none:
+            default:
+                return true;
+        }
+    }
+
+    bool can_parse(argument_t *) { return peek_type() == parse_token_type_string; }
+    bool can_parse(redirection_t *) { return peek_type() == parse_token_type_redirection; }
+    bool can_parse(argument_or_redirection_t *) {
+        return can_parse((argument_t *)nullptr) || can_parse((redirection_t *)nullptr);
+    }
+
+    bool can_parse(variable_assignment_t *) {
+        // We can parse a variable_assignment if our token is a variable assignment and the next
+        // token is a string. If the next token is not a string, then we have either a bare
+        // assignment like `foo=bar` or perhaps `foo=bar | `, etc. In that case we want to allow
+        // statement to see this assignment so it can produce an error.
+        return peek_token(0).may_be_variable_assignment && peek_type(1) == parse_token_type_string;
+    }
+
+    template <parse_token_type_t... Tok>
+    bool can_parse(token_t<Tok...> *tok) {
+        return tok->allows_token(peek_token().type);
+    }
+
+    // Note we have specific overloads for our keyword nodes, as they need custom logic.
+    bool can_parse(job_conjunction_t::decorator_t *) {
+        // This is for a job conjunction like `and stuff`
+        // But if it's `and --help` then we treat it as an ordinary command.
+        return job_conjunction_t::decorator_t::allows_keyword(peek_token(0).keyword) &&
+               !peek_token(1).is_help_argument;
+    }
+
+    bool can_parse(decorated_statement_t::decorator_t *) {
+        // Here the keyword is 'command' or 'builtin' or 'exec'.
+        // `command stuff` executes a command called stuff.
+        // `command -n` passes the -n argument to the 'command' builtin.
+        // `command` by itself is a command.
+        if (!decorated_statement_t::decorator_t::allows_keyword(peek_token(0).keyword)) {
+            return false;
+        }
+        // Is it like `command --stuff` or `command` by itself?
+        auto tok1 = peek_token(1);
+        return tok1.type == parse_token_type_string && !tok1.is_dash_prefix_string();
+    }
+
+    bool can_parse(keyword_t<parse_keyword_t::kw_time> *) {
+        // Time keyword is only the time builtin if the next argument doesn't have a dash.
+        return keyword_t<parse_keyword_t::kw_time>::allows_keyword(peek_token(0).keyword) &&
+               !peek_token(1).is_dash_prefix_string();
+    }
+
+    bool can_parse(job_continuation_t *) { return peek_type() == parse_token_type_pipe; }
+
+    bool can_parse(job_conjunction_continuation_t *) {
+        auto type = peek_type();
+        return type == parse_token_type_andand || type == parse_token_type_oror;
+    }
+
+    bool can_parse(andor_job_t *) {
+        switch (peek_token().keyword) {
+            case parse_keyword_t::kw_and:
+            case parse_keyword_t::kw_or: {
+                // Check that the argument to and/or is a string that's not help. Otherwise it's
+                // either 'and
+                // --help' or a naked 'and', and not part of this list.
+                const auto &nexttok = peek_token(1);
+                return nexttok.type == parse_token_type_string && !nexttok.is_help_argument;
+            }
+            default:
+                return false;
+        }
+    }
+
+    bool can_parse(elseif_clause_t *) {
+        return peek_token(0).keyword == parse_keyword_t::kw_else &&
+               peek_token(1).keyword == parse_keyword_t::kw_if;
+    }
+
+    bool can_parse(else_clause_t *) { return peek_token().keyword == parse_keyword_t::kw_else; }
+    bool can_parse(case_item_t *) { return peek_token().keyword == parse_keyword_t::kw_case; }
+
+    // Given that we are a list of type ListNodeType, whose contents type is ContentsNode, populate
+    // as many elements as we can.
+    // If exhaust_stream is set, then keep going until we get parse_token_type_terminate.
+    template <type_t ListType, typename ContentsNode>
+    void populate_list(list_t<ListType, ContentsNode> &list, bool exhaust_stream = false) {
+        // Do not attempt to parse a list if we are unwinding.
+        if (is_unwinding()) {
+            assert(!exhaust_stream &&
+                   "exhaust_stream should only be set at top level, and so we should not be "
+                   "unwinding");
+            // Mark in the list that it was unwound.
+            FLOGF(ast_construction, L"%*sunwinding %ls", spaces(), "",
+                  ast_type_to_string(ListType));
+            assert(list.empty() && "Should be an empty list");
+            return;
+        }
+
+        for (;;) {
+            // If we are unwinding, then either we recover or we break the loop, dependent on the
+            // loop type.
+            if (is_unwinding()) {
+                if (!list_type_stops_unwind(ListType)) {
+                    break;
+                }
+                // We are going to stop unwinding.
+                // Rather hackish. Just chomp until we get to a string or end node.
+                for (auto type = peek_type();
+                     type != parse_token_type_string && type != parse_token_type_terminate &&
+                     type != parse_token_type_end;
+                     type = peek_type()) {
+                    parse_token_t tok = tokens_.pop();
+                    ast_->extras_.errors.push_back(tok.range());
+                    FLOGF(ast_construction, L"%*schomping range %u-%u", spaces(), "",
+                          tok.source_start, tok.source_length);
+                }
+                FLOGF(ast_construction, L"%*sdone unwinding", spaces(), "");
+                unwinding_ = false;
+            }
+
+            // Chomp semis and newlines.
+            chomp_extras(ListType);
+
+            // Now try parsing a node.
+            if (auto node = this->try_parse<ContentsNode>()) {
+                list.contents.push_back(std::move(node));
+            } else if (exhaust_stream && peek_type() != parse_token_type_terminate) {
+                // We aren't allowed to stop. Produce an error and keep going.
+                consume_excess_token_generating_error();
+            } else {
+                // We either stop once we can't parse any more of this contents node, or we
+                // exhausted the stream as requested.
+                break;
+            }
+        }
+
+        FLOGF(ast_construction, L"%*s%ls size: %lu", spaces(), "", ast_type_to_string(ListType),
+              (unsigned long)list.count());
+    }
+
+    /// Allocate and populate a statement contents pointer.
+    /// This must never return null.
+    statement_t::contents_ptr_t allocate_populate_statement_contents() {
+        // In case we get a parse error, we still need to return something non-null. Use a decorated
+        // statement; all of its leaf nodes will end up unsourced.
+        auto got_error = [this] {
+            assert(unwinding_ && "Should have produced an error");
+            return this->allocate_visit<decorated_statement_t>();
+        };
+
+        using pkt = parse_keyword_t;
+        const auto &token1 = peek_token(0);
+        if (token1.type == parse_token_type_terminate && allow_incomplete()) {
+            // This may happen if we just have a 'time' prefix.
+            // Construct a decorated statement, which will be unsourced.
+            return this->allocate_visit<decorated_statement_t>();
+        } else if (token1.type != parse_token_type_string) {
+            // We may be unwinding already; do not produce another error.
+            // For example in `true | and`.
+            parse_error(token1, parse_error_generic, _(L"Expected a command, but found %ls"),
+                        token1.user_presentable_description().c_str());
+            return got_error();
+        } else if (token1.may_be_variable_assignment) {
+            // Here we have a variable assignment which we chose to not parse as a variable
+            // assignment because there was no string after it.
+            parse_error(token1, parse_error_bare_variable_assignment, L"");
+            return got_error();
+        }
+
+        // The only block-like builtin that takes any parameters is 'function'. So go to decorated
+        // statements if the subsequent token looks like '--'. The logic here is subtle:
+        //
+        // If we are 'begin', then we expect to be invoked with no arguments.
+        // If we are 'function', then we are a non-block if we are invoked with -h or --help
+        // If we are anything else, we require an argument, so do the same thing if the subsequent
+        // token is a statement terminator.
+        if (token1.type == parse_token_type_string) {
+            const auto &token2 = peek_token(1);
+            // If we are a function, then look for help arguments. Otherwise, if the next token
+            // looks like an option (starts with a dash), then parse it as a decorated statement.
+            if (token1.keyword == pkt::kw_function && token2.is_help_argument) {
+                return allocate_visit<decorated_statement_t>();
+            } else if (token1.keyword != pkt::kw_function && token2.has_dash_prefix) {
+                return allocate_visit<decorated_statement_t>();
+            }
+
+            // Likewise if the next token doesn't look like an argument at all. This corresponds to
+            // e.g. a "naked if".
+            bool naked_invocation_invokes_help =
+                (token1.keyword != pkt::kw_begin && token1.keyword != pkt::kw_end);
+            if (naked_invocation_invokes_help && (token2.type == parse_token_type_end ||
+                                                  token2.type == parse_token_type_terminate)) {
+                return allocate_visit<decorated_statement_t>();
+            }
+        }
+
+        switch (token1.keyword) {
+            case pkt::kw_not:
+            case pkt::kw_exclam:
+                return allocate_visit<not_statement_t>();
+            case pkt::kw_for:
+            case pkt::kw_while:
+            case pkt::kw_function:
+            case pkt::kw_begin:
+                return allocate_visit<block_statement_t>();
+            case pkt::kw_if:
+                return allocate_visit<if_statement_t>();
+            case pkt::kw_switch:
+                return allocate_visit<switch_statement_t>();
+
+            case pkt::kw_end:
+                // 'end' is forbidden as a command.
+                // For example, `if end` or `while end` will produce this error.
+                // We still have to descend into the decorated statement because
+                // we can't leave our pointer as null.
+                parse_error(token1, parse_error_generic, _(L"Expected a command, but found %ls"),
+                            token1.user_presentable_description().c_str());
+                return got_error();
+
+            default:
+                return allocate_visit<decorated_statement_t>();
+        }
+    }
+
+    /// Allocate and populate a block statement header.
+    /// This must never return null.
+    block_statement_t::header_ptr_t allocate_populate_block_header() {
+        switch (peek_token().keyword) {
+            case parse_keyword_t::kw_for:
+                return allocate_visit<for_header_t>();
+            case parse_keyword_t::kw_while:
+                return allocate_visit<while_header_t>();
+            case parse_keyword_t::kw_function:
+                return allocate_visit<function_header_t>();
+            case parse_keyword_t::kw_begin:
+                return allocate_visit<begin_header_t>();
+            default:
+                internal_error(__FUNCTION__, L"should not have descended into block_header");
+                DIE("Unreachable");
+        }
+    }
+
+    template <typename AstNode>
+    unique_ptr<AstNode> try_parse() {
+        if (!can_parse((AstNode *)nullptr)) return nullptr;
+        return allocate_visit<AstNode>();
+    }
+
+    void visit_node_field(argument_t &arg) {
+        if (unsource_leaves()) {
+            arg.unsourced = true;
+            return;
+        }
+        arg.range = consume_token_type(parse_token_type_string);
+    }
+
+    void visit_node_field(variable_assignment_t &varas) {
+        if (unsource_leaves()) {
+            varas.unsourced = true;
+            return;
+        }
+        if (!peek_token().may_be_variable_assignment) {
+            internal_error(__FUNCTION__,
+                           L"Should not have created variable_assignment_t from this token");
+        }
+        varas.range = consume_token_type(parse_token_type_string);
+    }
+
+    void visit_node_field(job_continuation_t &node) {
+        // Special error handling to catch 'and' and 'or' in pipelines, like `true | and false`.
+        const auto &tok = peek_token(1);
+        if (tok.keyword == parse_keyword_t::kw_and || tok.keyword == parse_keyword_t::kw_or) {
+            const wchar_t *cmdname = (tok.keyword == parse_keyword_t::kw_and ? L"and" : L"or");
+            parse_error(tok, parse_error_andor_in_pipeline, EXEC_ERR_MSG, cmdname);
+        }
+        node.accept(*this);
+    }
+
+    // Visit branch nodes by just calling accept() to visit their fields.
+    template <typename Node>
+    enable_if_t<Node::Category == category_t::branch> visit_node_field(Node &node) {
+        // This field is a direct embedding of an AST value.
+        node.accept(*this);
+    }
+
+    template <typename Node>
+    void visit_pointer_field(Node *&node) {
+        // This field is a pointer embedding of an ast node.
+        // Allocate and populate it.
+        node = allocate_visit<Node>();
+    }
+
+    // Overload for token fields.
+    template <parse_token_type_t... TokTypes>
+    void visit_node_field(token_t<TokTypes...> &token) {
+        if (unsource_leaves()) {
+            token.unsourced = true;
+            return;
+        }
+
+        if (!token.allows_token(peek_token().type)) {
+            const auto &peek = peek_token();
+            parse_error(peek, parse_error_generic, L"Expected %ls, but found %ls",
+                        token_types_user_presentable_description({TokTypes...}).c_str(),
+                        peek.user_presentable_description().c_str());
+            token.unsourced = true;
+            return;
+        }
+        parse_token_t tok = consume_any_token();
+        token.type = tok.type;
+        token.range = tok.range();
+    }
+
+    // Overload for keyword fields.
+    template <parse_keyword_t... KWs>
+    void visit_node_field(keyword_t<KWs...> &keyword) {
+        if (unsource_leaves()) {
+            keyword.unsourced = true;
+            return;
+        }
+
+        if (!keyword.allows_keyword(peek_token().keyword)) {
+            keyword.unsourced = true;
+            const auto &peek = peek_token();
+
+            // Special error reporting for keyword_t<kw_end>.
+            bool specially_handled = false;
+            std::array<parse_keyword_t, sizeof...(KWs)> allowed = {{KWs...}};
+            if (allowed.size() == 1 && allowed[0] == parse_keyword_t::kw_end) {
+                assert(!visit_stack_.empty() && "Visit stack should not be empty");
+                auto p = find_block_open_keyword(visit_stack_.back());
+                source_range_t kw_range = p.first;
+                const wchar_t *kw_name = p.second;
+                if (kw_name) {
+                    this->parse_error(kw_range, parse_error_generic,
+                                      L"Missing end to balance this %ls", kw_name);
+                }
+            }
+            if (!specially_handled) {
+                parse_error(peek, parse_error_generic, L"Expected %ls, but found %ls",
+                            keywords_user_presentable_description({KWs...}).c_str(),
+                            peek.user_presentable_description().c_str());
+            }
+            return;
+        }
+        parse_token_t tok = consume_any_token();
+        keyword.kw = tok.keyword;
+        keyword.range = tok.range();
+    }
+
+    // Overload for maybe_newlines
+    void visit_node_field(maybe_newlines_t &nls) {
+        if (unsource_leaves()) {
+            nls.unsourced = true;
+            return;
+        }
+        // TODO: it would be nice to have the start offset be the current position in the token
+        // stream, even if there are no newlines.
+        nls.range = {0, 0};
+        while (peek_token().is_newline) {
+            auto r = consume_token_type(parse_token_type_end);
+            if (nls.range.length == 0) {
+                nls.range = r;
+            } else {
+                nls.range.length = r.start + r.length - nls.range.start;
+            }
+        }
+    }
+
+    template <typename AstNode>
+    void visit_optional_field(optional_t<AstNode> &ptr) {
+        // This field is an optional node.
+        ptr.contents = this->try_parse<AstNode>();
+    }
+
+    template <type_t ListNodeType, typename ContentsNode>
+    void visit_list_field(list_t<ListNodeType, ContentsNode> &list) {
+        // This field is an embedding of an array of (pointers to) ContentsNode.
+        // Parse as many as we can.
+        populate_list(list);
+    }
+
+    // We currently only have a handful of union pointer types.
+    // Handle them directly.
+    void visit_union_field(statement_t::contents_ptr_t &ptr) {
+        ptr = this->allocate_populate_statement_contents();
+        assert(ptr && "Statement contents must never be null");
+    }
+
+    void visit_union_field(argument_or_redirection_t::contents_ptr_t &ptr) {
+        if (auto arg = try_parse<argument_t>()) {
+            ptr.contents = std::move(arg);
+        } else if (auto redir = try_parse<redirection_t>()) {
+            ptr.contents = std::move(redir);
+        } else {
+            internal_error(__FUNCTION__, L"Unable to parse argument or redirection");
+        }
+        assert(ptr && "Statement contents must never be null");
+    }
+
+    void visit_union_field(block_statement_t::header_ptr_t &ptr) {
+        ptr = this->allocate_populate_block_header();
+        assert(ptr && "Header pointer must never be null");
+    }
+
+    void will_visit_fields_of(const node_t &node) {
+        FLOGF(ast_construction, L"%*swill_visit %ls %p", spaces(), "", node.describe().c_str(),
+              (const void *)&node);
+        visit_stack_.push_back(&node);
+    }
+
+    void did_visit_fields_of(const node_t &node) {
+        assert(!visit_stack_.empty() && visit_stack_.back() == &node &&
+               "Node was not at the top of the visit stack");
+        visit_stack_.pop_back();
+        FLOGF(ast_construction, L"%*sdid_visit %ls %p", spaces(), "", node.describe().c_str(),
+              (const void *)&node);
+    }
+
+    // The ast which we are populating.
+    ast_t *const ast_;
+
+    // Flags controlling parsing.
+    parse_tree_flags_t flags_{};
+
+    // Stream of tokens which we consume.
+    token_stream_t tokens_;
+
+    // The type which we are attempting to parse, typically job_list but may be
+    // freestanding_argument_list.
+    const type_t top_type_;
+
+    // If set, we are unwinding due to error recovery.
+    bool unwinding_{false};
+
+    // A stack containing the nodes whose fields we are visiting.
+    std::vector<const node_t *> visit_stack_{};
+
+    // If non-null, populate with errors.
+    parse_error_list_t *out_errors_{};
+};
+
+// Set the parent fields of all nodes in the tree rooted at \p node.
+static void set_parents(const node_t *top) {
+    struct parent_setter_t {
+        void visit(const node_t &node) {
+            const_cast<node_t &>(node).parent = parent_;
+            const node_t *saved = parent_;
+            parent_ = &node;
+            node_visitor(*this).accept_children_of(&node);
+            parent_ = saved;
+        }
+
+        const node_t *parent_{nullptr};
+    };
+    struct parent_setter_t ps;
+    node_visitor(ps).accept(top);
+}
+
+// static
+ast_t ast_t::parse_from_top(const wcstring &src, parse_tree_flags_t parse_flags,
+                            parse_error_list_t *out_errors, type_t top) {
+    ast_t ast;
+
+    // Populate our ast.
+    populator_t pop(&ast, src, parse_flags, top, out_errors);
+
+    // Set all parent nodes.
+    // It turns out to be more convenient to do this after the parse phase.
+    set_parents(ast.top());
+
+    return ast;
+}
+
+// static
+ast_t ast_t::parse(const wcstring &src, parse_tree_flags_t flags, parse_error_list_t *out_errors) {
+    return parse_from_top(src, flags, out_errors, type_t::job_list);
+}
+
+// static
+ast_t ast_t::parse_argument_list(const wcstring &src, parse_tree_flags_t flags,
+                                 parse_error_list_t *out_errors) {
+    return parse_from_top(src, flags, out_errors, type_t::freestanding_argument_list);
+}
+
+// \return the depth of a node, i.e. number of parent links.
+static int get_depth(const node_t *node) {
+    int result = 0;
+    for (const node_t *cursor = node->parent; cursor; cursor = cursor->parent) {
+        result += 1;
+    }
+    return result;
+}
+
+wcstring ast_t::dump(const wcstring &orig) const {
+    wcstring result;
+
+    // Return a string that repeats "| " \p amt times.
+    auto pipespace = [](int amt) {
+        std::string result;
+        result.reserve(amt * 2);
+        for (int i = 0; i < amt; i++) result.append("! ");
+        return result;
+    };
+
+    traversal_t tv = this->walk();
+    while (const auto *node = tv.next()) {
+        int depth = get_depth(node);
+        // dot-| padding
+        append_format(result, L"%s", pipespace(depth).c_str());
+        if (const auto *n = node->try_as<argument_t>()) {
+            append_format(result, L"argument");
+            if (auto argsrc = n->try_source(orig)) {
+                append_format(result, L": '%ls'", argsrc->c_str());
+            }
+        } else if (const auto *n = node->try_as<keyword_base_t>()) {
+            append_format(result, L"keyword: %ls", keyword_description(n->kw));
+        } else if (const auto *n = node->try_as<token_base_t>()) {
+            wcstring desc;
+            switch (n->type) {
+                case parse_token_type_string:
+                    desc = format_string(L"string");
+                    if (auto strsource = n->try_source(orig)) {
+                        append_format(desc, L": '%ls'", strsource->c_str());
+                    }
+                    break;
+                case parse_token_type_redirection:
+                    desc = L"redirection";
+                    if (auto strsource = n->try_source(orig)) {
+                        append_format(desc, L": '%ls'", strsource->c_str());
+                    }
+                    break;
+                case parse_token_type_end:
+                    desc = L"<;>";
+                    break;
+                case token_type_invalid:
+                    // This may occur with errors, e.g. we expected to see a string but saw a
+                    // redirection.
+                    desc = L"<error>";
+                    break;
+                default:
+                    desc = token_type_user_presentable_description(n->type);
+                    break;
+            }
+            append_format(result, L"%ls", desc.c_str());
+        } else {
+            append_format(result, L"%ls", node->describe().c_str());
+        }
+        append_format(result, L"\n");
+    }
+    return result;
+}
+}  // namespace ast
diff --git a/src/ast.h b/src/ast.h
new file mode 100644
index 000000000..26152c8d5
--- /dev/null
+++ b/src/ast.h
@@ -0,0 +1,1018 @@
+// Programmatic representation of fish grammar.
+
+#ifndef FISH_AST_H
+#define FISH_AST_H
+
+#include <array>
+#include <tuple>
+#include <type_traits>
+
+#include "flog.h"
+#include "parse_constants.h"
+#include "tokenizer.h"
+
+namespace ast {
+
+/**
+ * This defines the fish abstract syntax tree.
+ * The fish ast is a tree data structure. The nodes of the tree
+ * are divided into three types:
+ *
+ * - leaf nodes refer to a range of source, and have no child nodes.
+ * - branch nodes have ONLY child nodes, and no other fields.
+ * - list nodes contain a list of some other node type (branch or leaf).
+ *
+ * Most clients will be interested in visiting the nodes of an ast.
+ * See node_visitation_t below.
+ */
+
+struct node_t;
+
+// Our node categories.
+// Note these are not stored directly in a node; they are provided in the Category static constexpr
+// variable in each node.
+enum class category_t : uint8_t {
+    branch,
+    leaf,
+    list,
+};
+
+// Declare our type enum.
+// For each member of our ast, this creates an enum value.
+// For example this creates `type_t::job_list`.
+enum class type_t : uint8_t {
+#define ELEM(T) T,
+#include "ast_node_types.inc"
+};
+
+// Helper to return a string description of a type.
+const wchar_t *ast_type_to_string(type_t type);
+
+// Forward declare all AST structs.
+#define ELEM(T) struct T##_t;
+#include "ast_node_types.inc"
+
+/*
+ * A FieldVisitor is something which can visit the fields of an ast node.
+ * This is used during ast construction.
+ *
+ * To trigger field visitation, use the accept() function:
+ *    MyFieldVisitor v;
+ *    node->accept(v);
+ *
+ * Example FieldVisitor:
+ *
+ * struct MyFieldVisitor {
+ *
+ *    /// will_visit (did_visit) is called before (after) a node's fields are visited.
+ *    void will_visit_fields_of(node_t &node);
+ *    void did_visit_fields_of(node_t &node);
+ *
+ *    /// These are invoked with the concrete type of each node,
+ *    /// so they may be overloaded to distinguish node types.
+ *    /// Example:
+ *    void will_visit_fields_of(job_t &job);
+ *
+ *    /// The visitor needs to be prepared for the following four field types.
+ *    /// Naturally the vistor may overload visit_field to carve this
+ *    /// arbitrarily finely.
+ *
+ *    /// A field may be a "direct embedding" of a node.
+ *    /// That is, an ast node may have another node as a member.
+ *    template <typename Node>
+ *    void visit_node_field(Node &node);
+
+ *    /// A field may be a list_t of (pointers to) some other node type.
+ *    template <type_t List, typename Node>
+ *    void visit_list_field(list_t<List, Node> &list);
+ *
+ *    /// A field may be a unique_ptr to another node.
+ *    /// Every such pointer must be non-null after construction.
+ *    template <typename Node>
+ *    void visit_pointer_field(std::unique_ptr<Node> &ptr);
+ *
+ *    /// A field may be optional, meaning it may or may not exist.
+ *    template <typename Node>
+ *    void visit_optional_field(optional_t<NodeT> &opt);
+ *
+ *    /// A field may be a union pointer, meaning it points to one of
+ *    /// a fixed set of node types. A union pointer is never null
+ *    /// after construction.
+ *    template <typename... Nodes>
+ *    void visit_union_field(union_ptr_t<Nodes...> &union_ptr);
+ * };
+ */
+
+// A union pointer field is a pointer to one of a fixed set of node types.
+// It is never null after construction.
+template <typename... Nodes>
+struct union_ptr_t {
+    std::unique_ptr<node_t> contents{};
+
+    /// \return a pointer to the node contents.
+    const node_t *get() const {
+        assert(contents && "Null pointer");
+        return contents.get();
+    }
+
+    /// \return whether we have non-null contents.
+    explicit operator bool() const { return contents != nullptr; }
+
+    const node_t *operator->() const { return get(); }
+
+    /// \return whether this union pointer can hold the given node.
+    static inline bool allows_node(const node_t &node);
+
+    union_ptr_t() = default;
+
+    template <typename Node>
+    /* implicit */ union_ptr_t(std::unique_ptr<Node> n) : contents(std::move(n)) {
+        // TODO: this could be made statically type safe.
+        assert(contents != nullptr && allows_node(*contents) &&
+               "union_ptr constructed from invalid node type");
+    }
+};
+
+// A pointer to something, or nullptr if not present.
+template <typename AstNode>
+struct optional_t {
+    std::unique_ptr<AstNode> contents{};
+
+    explicit operator bool() const { return contents != nullptr; }
+
+    AstNode *operator->() const {
+        assert(contents && "Null pointer");
+        return contents.get();
+    }
+
+    const AstNode &operator*() const {
+        assert(contents && "Null pointer");
+        return *contents;
+    }
+
+    bool has_value() const { return contents != nullptr; }
+};
+
+
+namespace horrible_template_goop {
+
+// void if B is true, SFINAE'd away otherwise.
+template <bool B>
+using only_if_t = typename std::enable_if<B>::type;
+
+template <typename FieldVisitor, typename Field>
+only_if_t<Field::Category != category_t::list> visit_1_field(FieldVisitor &v, Field &field) {
+    v.visit_node_field(field);
+}
+
+template <typename FieldVisitor, typename Field>
+only_if_t<Field::Category == category_t::list> visit_1_field(FieldVisitor &v, Field &field) {
+    v.visit_list_field(field);
+}
+
+template <typename FieldVisitor, typename Field>
+void visit_1_field(FieldVisitor &v, Field *&field) {
+    v.visit_pointer_field(field);
+}
+
+template <typename FieldVisitor, typename Field>
+void visit_1_field(FieldVisitor &v, optional_t<Field> &field) {
+    v.visit_optional_field(field);
+}
+
+template <typename FieldVisitor, typename... Nodes>
+void visit_1_field(FieldVisitor &v, union_ptr_t<Nodes...> &field) {
+    v.visit_union_field(field);
+}
+
+// Call the field visit methods on visitor \p v passing field \p field.
+template <typename FieldVisitor, typename Field>
+void accept_field_visitor(FieldVisitor &v, bool /*reverse*/, Field &field) {
+    visit_1_field(v, field);
+}
+
+// Call visit_field on visitor \p v, for the field \p field and also \p rest.
+template <typename FieldVisitor, typename Field, typename... Rest>
+void accept_field_visitor(FieldVisitor &v, bool reverse, Field &field, Rest &... rest) {
+    if (!reverse) visit_1_field(v, field);
+    accept_field_visitor<FieldVisitor, Rest...>(v, reverse, rest...);
+    if (reverse) visit_1_field(v, field);
+}
+
+}  // namespace horrible_template_goop
+
+#define FIELDS(...)                                                                   \
+    template <typename FieldVisitor>                                                  \
+    void accept(FieldVisitor &visitor, bool reversed = false) {                       \
+        visitor.will_visit_fields_of(*this);                                          \
+        horrible_template_goop::accept_field_visitor(visitor, reversed, __VA_ARGS__); \
+        visitor.did_visit_fields_of(*this);                                           \
+    }
+
+/// node_t is the base node of all AST nodes.
+/// It is not a template: it is possible to work concretely with this type.
+struct node_t {
+    /// The type of this node.
+    const type_t type;
+
+    /// The category of this node.
+    const category_t category;
+
+    /// The parent node, or null if this is root.
+    const node_t *parent{nullptr};
+
+    constexpr explicit node_t(type_t t, category_t c) : type(t), category(c) {}
+
+    /// Disallow copying, etc.
+    node_t(const node_t &) = delete;
+    node_t(node_t &&) = delete;
+    void operator=(const node_t &) = delete;
+    void operator=(node_t &&) = delete;
+
+    /// Cast to a concrete node type, aborting on failure.
+    /// Example usage:
+    ///   if (node->type == type_t::job_list) node->as<job_list_t>()->...
+    template <typename To>
+    To *as() {
+        assert(this->type == To::AstType && "Invalid type conversion");
+        return static_cast<To *>(this);
+    }
+
+    template <typename To>
+    const To *as() const {
+        assert(this->type == To::AstType && "Invalid type conversion");
+        return static_cast<const To *>(this);
+    }
+
+    /// Try casting to a concrete node type, except returns nullptr on failure.
+    /// Example ussage:
+    ///     if (const auto *job_list = node->try_as<job_list_t>()) job_list->...
+    template <typename To>
+    To *try_as() {
+        if (this->type == To::AstType) return as<To>();
+        return nullptr;
+    }
+
+    template <typename To>
+    const To *try_as() const {
+        if (this->type == To::AstType) return as<To>();
+        return nullptr;
+    }
+
+    /// Base accept() function which trampolines to overriding implementations for each node type.
+    /// This may be used when you don't know what the type of a particular node is.
+    template <typename FieldVisitor>
+    void base_accept(FieldVisitor &v, bool reverse = false);
+
+    /// \return a helpful string description of this node.
+    wcstring describe() const;
+
+    /// \return the source range for this node, or none if unsourced.
+    /// This may return none if the parse was incomplete or had an error.
+    maybe_t<source_range_t> try_source_range() const;
+
+    /// \return the source range for this node, or an empty range {0, 0} if unsourced.
+    source_range_t source_range() const {
+        if (auto r = try_source_range()) return *r;
+        return source_range_t{0, 0};
+    }
+
+    /// \return the source code for this node, or none if unsourced.
+    maybe_t<wcstring> try_source(const wcstring &orig) const {
+        if (auto r = try_source_range()) return orig.substr(r->start, r->length);
+        return none();
+    }
+
+    /// \return the source code for this node, or an empty string if unsourced.
+    wcstring source(const wcstring &orig) const {
+        wcstring res{};
+        if (auto s = try_source(orig)) res = s.acquire();
+        return res;
+    }
+
+    // We are a pure virtual class.
+    // Note that it is NOT necessary to declare virtual destructors for all subclasses - these will
+    // be made virtual automatically.
+    virtual ~node_t() = 0;
+};
+
+// Base class for all "branch" nodes: nodes with at least one ast child.
+template <type_t Type>
+struct branch_t : public node_t {
+    static constexpr type_t AstType = Type;
+    static constexpr category_t Category = category_t::branch;
+
+    branch_t() : node_t(Type, Category) {}
+};
+
+// Base class for all "leaf" nodes: nodes with no ast children.
+// It declares an empty visit method to avoid requiring the CHILDREN macro.
+template <type_t Type>
+struct leaf_t : public node_t {
+    static constexpr type_t AstType = Type;
+    static constexpr category_t Category = category_t::leaf;
+
+    // Whether this node is "unsourced." This happens if for whatever reason we are unable to parse
+    // the node, either because we had a parse error and recovered, or because we accepted
+    // incomplete and the token stream was exhausted.
+    bool unsourced{false};
+
+    // The source range.
+    source_range_t range{0, 0};
+
+    // Convenience helper to return whether we are not unsourced.
+    bool has_source() const { return !unsourced; }
+
+    template <typename FieldVisitor>
+    void accept(FieldVisitor &visitor, bool /* reverse */ = false) {
+        visitor.will_visit_fields_of(*this);
+        visitor.did_visit_fields_of(*this);
+    }
+
+    leaf_t() : node_t(Type, Category) {}
+};
+
+// A simple fixed-size array, possibly empty.
+template <type_t ListType, typename ContentsNode>
+struct list_t : public node_t {
+    static constexpr type_t AstType = ListType;
+    static constexpr category_t Category = category_t::list;
+
+    // A list wraps a "contents pointer" which is just a unique_ptr that converts to a reference.
+    // This enables more natural iteration:
+    //    for (const argument_t &arg : argument_list) ...
+    struct contents_ptr_t {
+        std::unique_ptr<ContentsNode> ptr;
+        /* implicit */ contents_ptr_t(std::unique_ptr<ContentsNode> v) : ptr(std::move(v)) {}
+
+        const ContentsNode *get() const {
+            assert(ptr && "Null pointer");
+            return ptr.get();
+        }
+
+        /* implicit */ operator const ContentsNode &() const { return *get(); }
+    };
+    std::vector<contents_ptr_t> contents{};
+
+    /// \return a node at a given index, or nullptr if out of range.
+    const ContentsNode *at(size_t idx, bool reverse = false) const {
+        if (idx >= count()) return nullptr;
+        return contents[reverse ? count() - idx - 1 : idx].get();
+    }
+
+    /// \return our count.
+    size_t count() const { return contents.size(); }
+
+    /// \return whether we are empty.
+    bool empty() const { return contents.size() == 0; }
+
+    /// Iteration support.
+    using iterator = typename decltype(contents)::const_iterator;
+    iterator begin() const { return contents.begin(); }
+    iterator end() const { return contents.end(); }
+
+    // list types pretend their child nodes are direct embeddings.
+    // This isn't used during AST construction because we need to construct the list.
+    // It is used by node_visitation_t.
+    template <typename FieldVisitor>
+    void accept(FieldVisitor &visitor, bool reverse = false) {
+        visitor.will_visit_fields_of(*this);
+        for (size_t i = 0; i < count(); i++) visitor.visit_node_field(*this->at(i, reverse));
+        visitor.did_visit_fields_of(*this);
+    }
+
+    list_t() : node_t(ListType, Category) {}
+};
+
+// Fully define all list types, as they are very uniform.
+// This is where types like job_list_t come from.
+#define ELEM(T)
+#define ELEMLIST(ListT, ContentsT) \
+    struct ListT##_t final : public list_t<type_t::ListT, ContentsT##_t> {};
+#include "ast_node_types.inc"
+
+struct keyword_base_t : public leaf_t<type_t::keyword_base> {
+    // The keyword which was parsed.
+    parse_keyword_t kw;
+};
+
+// A keyword node is a node which contains a keyword, which must be one of the provided values.
+template <parse_keyword_t... KWs>
+struct keyword_t final : public keyword_base_t {
+    static bool allows_keyword(parse_keyword_t);
+};
+
+struct token_base_t : public leaf_t<type_t::token_base> {
+    // The token type which was parsed.
+    parse_token_type_t type{token_type_invalid};
+};
+
+// A token node is a node which contains a token, which must be one of the provided values.
+template <parse_token_type_t... Toks>
+struct token_t final : public token_base_t {
+    /// \return whether a token type is allowed in this token_t, i.e. is a member of our Toks list.
+    static bool allows_token(parse_token_type_t);
+};
+
+// Zero or more newlines.
+struct maybe_newlines_t final : public leaf_t<type_t::maybe_newlines> {};
+
+// A single newline or semicolon, terminating statements.
+// Note this is not a separate type, it is just a convenience typedef.
+using semi_nl_t = token_t<parse_token_type_end>;
+
+// Convenience typedef for string nodes.
+using string_t = token_t<parse_token_type_string>;
+
+// An argument is just a node whose source range determines its contents.
+// This is a separate type because it is sometimes useful to find all arguments.
+struct argument_t final : public leaf_t<type_t::argument> {};
+
+// A redirection has an operator like > or 2>, and a target like /dev/null or &1.
+// Note that pipes are not redirections.
+struct redirection_t final : public branch_t<type_t::redirection> {
+    token_t<parse_token_type_redirection> oper;
+    string_t target;
+
+    FIELDS(oper, target)
+};
+
+// A variable_assignment_t contains a source range like FOO=bar.
+struct variable_assignment_t final : public leaf_t<type_t::variable_assignment> {};
+
+// An argument or redirection holds either an argument or redirection.
+struct argument_or_redirection_t final : public branch_t<type_t::argument_or_redirection> {
+    using contents_ptr_t = union_ptr_t<argument_t, redirection_t>;
+    contents_ptr_t contents{};
+
+    /// \return whether this represents an argument.
+    bool is_argument() const { return contents->type == type_t::argument; }
+
+    /// \return whether this represents a redirection
+    bool is_redirection() const { return contents->type == type_t::redirection; }
+
+    /// \return this as an argument, assuming it wraps one.
+    const argument_t &argument() const {
+        assert(is_argument() && "Is not an argument");
+        return *this->contents.contents->as<argument_t>();
+    }
+
+    /// \return this as an argument, assuming it wraps one.
+    const redirection_t &redirection() const {
+        assert(is_redirection() && "Is not a redirection");
+        return *this->contents.contents->as<redirection_t>();
+    }
+
+    FIELDS(contents);
+};
+
+// A statement is a normal command, or an if / while / etc
+struct statement_t final : public branch_t<type_t::statement> {
+    using contents_ptr_t = union_ptr_t<not_statement_t, block_statement_t, if_statement_t,
+                                       switch_statement_t, decorated_statement_t>;
+    contents_ptr_t contents{};
+
+    FIELDS(contents)
+};
+
+// A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases
+// like if statements, where we require a command).
+struct job_t final : public branch_t<type_t::job> {
+    // Maybe the time keyword.
+    optional_t<keyword_t<parse_keyword_t::kw_time>> time;
+
+    // A (possibly empty) list of variable assignments.
+    variable_assignment_list_t variables;
+
+    // The statement.
+    statement_t statement;
+
+    // Piped remainder.
+    job_continuation_list_t continuation;
+
+    // Maybe backgrounded.
+    optional_t<token_t<parse_token_type_background>> bg;
+
+    FIELDS(time, variables, statement, continuation, bg)
+};
+
+// A job_conjunction is a job followed by a && or || continuations.
+struct job_conjunction_t final : public branch_t<type_t::job_conjunction> {
+    // The job conjunction decorator.
+    using decorator_t = keyword_t<parse_keyword_t::kw_and, parse_keyword_t::kw_or>;
+    optional_t<decorator_t> decorator{};
+
+    // The job itself.
+    job_t job;
+
+    // The rest of the job conjunction, with && or ||s.
+    job_conjunction_continuation_list_t continuations;
+
+    // A terminating semicolon or newline.
+    // This is marked optional because it may not be present, for example the command `echo foo` may
+    // not have a terminating newline. It will only fail to be present if we ran out of tokens.
+    optional_t<semi_nl_t> semi_nl;
+
+    FIELDS(decorator, job, continuations, semi_nl)
+};
+
+struct for_header_t final : public branch_t<type_t::for_header> {
+    // 'for'
+    keyword_t<parse_keyword_t::kw_for> kw_for;
+
+    // var_name
+    string_t var_name;
+
+    // 'in'
+    keyword_t<parse_keyword_t::kw_in> kw_in;
+
+    // list of arguments
+    argument_list_t args;
+
+    // newline or semicolon
+    semi_nl_t semi_nl;
+
+    FIELDS(kw_for, var_name, kw_in, args, semi_nl)
+};
+
+struct while_header_t final : public branch_t<type_t::while_header> {
+    // 'while'
+    keyword_t<parse_keyword_t::kw_while> kw_while;
+
+    job_conjunction_t condition{};
+    andor_job_list_t andor_tail{};
+
+    FIELDS(kw_while, condition, andor_tail)
+};
+
+struct function_header_t final : public branch_t<type_t::function_header> {
+    // functions require at least one argument.
+    keyword_t<parse_keyword_t::kw_function> kw_function;
+    argument_t first_arg;
+    argument_list_t args;
+    semi_nl_t semi_nl;
+
+    FIELDS(kw_function, first_arg, args, semi_nl)
+};
+
+struct begin_header_t final : public branch_t<type_t::begin_header> {
+    keyword_t<parse_keyword_t::kw_begin> kw_begin;
+
+    // Note that 'begin' does NOT require a semi or nl afterwards.
+    // This is valid: begin echo hi; end
+    optional_t<semi_nl_t> semi_nl;
+
+    FIELDS(kw_begin, semi_nl)
+};
+
+struct block_statement_t final : public branch_t<type_t::block_statement> {
+    // A header like for, while, etc.
+    using header_ptr_t =
+        union_ptr_t<for_header_t, while_header_t, function_header_t, begin_header_t>;
+    header_ptr_t header;
+
+    // List of jobs in this block.
+    job_list_t jobs;
+
+    // The 'end' node.
+    keyword_t<parse_keyword_t::kw_end> end;
+
+    // Arguments and redirections associated with the block.
+    argument_or_redirection_list_t args_or_redirs;
+
+    FIELDS(header, jobs, end, args_or_redirs)
+};
+
+// Represents an 'if', either as the first part of an if statement or after an 'else'.
+struct if_clause_t final : public branch_t<type_t::if_clause> {
+    // The 'if' keyword.
+    keyword_t<parse_keyword_t::kw_if> kw_if;
+
+    // The 'if' condition.
+    job_conjunction_t condition{};
+
+    // 'and/or' tail.
+    andor_job_list_t andor_tail{};
+
+    // The body to execute if the condition is true.
+    job_list_t body;
+
+    FIELDS(kw_if, condition, andor_tail, body)
+};
+
+struct elseif_clause_t final : public branch_t<type_t::elseif_clause> {
+    // The 'else' keyword.
+    keyword_t<parse_keyword_t::kw_else> kw_else;
+
+    // The 'if' clause following it.
+    if_clause_t if_clause;
+
+    FIELDS(kw_else, if_clause)
+};
+
+struct else_clause_t final : public branch_t<type_t::else_clause> {
+    // else ; body
+    keyword_t<parse_keyword_t::kw_else> kw_else;
+    semi_nl_t semi_nl;
+    job_list_t body;
+
+    FIELDS(kw_else, semi_nl, body)
+};
+
+struct if_statement_t final : public branch_t<type_t::if_statement> {
+    // if part
+    if_clause_t if_clause;
+
+    // else if list
+    elseif_clause_list_t elseif_clauses;
+
+    // else part
+    optional_t<else_clause_t> else_clause;
+
+    // literal end
+    keyword_t<parse_keyword_t::kw_end> end;
+
+    // block args / redirs
+    argument_or_redirection_list_t args_or_redirs;
+
+    FIELDS(if_clause, elseif_clauses, else_clause, end, args_or_redirs)
+};
+
+struct case_item_t final : public branch_t<type_t::case_item> {
+    // case <arguments> ; body
+    keyword_t<parse_keyword_t::kw_case> kw_case;
+    argument_list_t arguments;
+    semi_nl_t semi_nl;
+    job_list_t body;
+    FIELDS(kw_case, arguments, semi_nl, body)
+};
+
+struct switch_statement_t final : public branch_t<type_t::switch_statement> {
+    // switch <argument> ; body ; end args_redirs
+    keyword_t<parse_keyword_t::kw_switch> kw_switch;
+    argument_t argument;
+    semi_nl_t semi_nl;
+    case_item_list_t cases;
+    keyword_t<parse_keyword_t::kw_end> end;
+    argument_or_redirection_list_t args_or_redirs;
+
+    FIELDS(kw_switch, argument, semi_nl, cases, end, args_or_redirs)
+};
+
+// A decorated_statement is a command with a list of arguments_or_redirections, possibly with
+// "builtin" or "command" or "exec"
+struct decorated_statement_t final : public branch_t<type_t::decorated_statement> {
+    // An optional decoration (command, builtin, exec, etc).
+    using pk = parse_keyword_t;
+    using decorator_t = keyword_t<pk::kw_command, pk::kw_builtin, pk::kw_exec>;
+    optional_t<decorator_t> opt_decoration;
+
+    // Command to run.
+    string_t command;
+
+    // Args and redirs
+    argument_or_redirection_list_t args_or_redirs;
+
+    // Helper to return the decoration.
+    parse_statement_decoration_t decoration() const;
+
+    FIELDS(opt_decoration, command, args_or_redirs)
+};
+
+// A not statement like `not true` or `! true`
+struct not_statement_t final : public branch_t<type_t::not_statement> {
+    // Keyword, either not or exclam.
+    keyword_t<parse_keyword_t::kw_not, parse_keyword_t::kw_exclam> kw;
+
+    variable_assignment_list_t variables;
+    optional_t<keyword_t<parse_keyword_t::kw_time>> time{};
+    statement_t contents{};
+
+    FIELDS(kw, variables, time, contents)
+};
+
+struct job_continuation_t final : public branch_t<type_t::job_continuation> {
+    token_t<parse_token_type_pipe> pipe;
+    maybe_newlines_t newlines;
+    variable_assignment_list_t variables;
+    statement_t statement;
+
+    FIELDS(pipe, newlines, variables, statement)
+};
+
+struct job_conjunction_continuation_t final
+    : public branch_t<type_t::job_conjunction_continuation> {
+    // The && or || token.
+    token_t<parse_token_type_andand, parse_token_type_oror> conjunction;
+
+    // The job itself.
+    job_t job;
+
+    FIELDS(conjunction, job)
+};
+
+// An andor_job just wraps a job, but requires that the job have an 'and' or 'or' job_decorator.
+// Note this is only used for andor_job_list; jobs that are not part of an andor_job_list are not
+// instances of this.
+struct andor_job_t final : public branch_t<type_t::andor_job> {
+    job_conjunction_t job;
+
+    FIELDS(job)
+};
+
+// A freestanding_argument_list is equivalent to a normal argument list, except it may contain
+// TOK_END (newlines, and even semicolons, for historical reasons).
+// In practice the tok_ends are ignored by fish code so we do not bother to store them.
+struct freestanding_argument_list_t final : public branch_t<type_t::freestanding_argument_list> {
+    argument_list_t arguments;
+    FIELDS(arguments)
+};
+
+template <typename FieldVisitor>
+void node_t::base_accept(FieldVisitor &v, bool reverse) {
+    switch (this->type) {
+#define ELEM(T)                                \
+    case type_t::T:                            \
+        this->as<T##_t>()->accept(v, reverse); \
+        break;
+
+#include "ast_node_types.inc"
+    }
+}
+
+// static
+template <parse_token_type_t... Toks>
+bool token_t<Toks...>::allows_token(parse_token_type_t type) {
+    for (parse_token_type_t t : {Toks...}) {
+        if (type == t) return true;
+    }
+    return false;
+}
+
+// static
+template <parse_keyword_t... KWs>
+bool keyword_t<KWs...>::allows_keyword(parse_keyword_t kw) {
+    for (parse_keyword_t k : {KWs...}) {
+        if (k == kw) return true;
+    }
+    return false;
+}
+
+// static
+template <typename... Nodes>
+bool union_ptr_t<Nodes...>::allows_node(const node_t &node) {
+    for (type_t t : {Nodes::AstType...}) {
+        if (t == node.type) return true;
+    }
+    return false;
+}
+
+/**
+ * A node visitor is like a field visitor, but adapted to only visit actual nodes, as const
+ * references. It calls the visit() function of its visitor with a const reference to each node
+ * found under a given node.
+ *
+ * Example:
+ * struct MyNodeVisitor {
+ *    template <typename Node>
+ *    void visit(const Node &n) {...}
+ * };
+ */
+template <typename NodeVisitor>
+class node_visitation_t {
+   public:
+    explicit node_visitation_t(NodeVisitor &v, bool reverse = false) : v_(v), reverse_(reverse) {}
+
+    // Visit the (direct) child nodes of a given node.
+    template <typename Node>
+    void accept_children_of(const Node &n) {
+        // We play fast and loose with const to avoid having to duplicate our FIELDS macros.
+        const_cast<Node &>(n).accept(*this, reverse_);
+    }
+
+    // Visit the (direct) child nodes of a given node.
+    void accept_children_of(const node_t *n) {
+        const_cast<node_t *>(n)->base_accept(*this, reverse_);
+    }
+
+    // Invoke visit() on our visitor for a given node, resolving that node's type.
+    void accept(const node_t *n) {
+        assert(n && "Node should not be null");
+        switch (n->type) {
+#define ELEM(T)                      \
+    case type_t::T:                  \
+        v_.visit(*(n->as<T##_t>())); \
+        break;
+#include "ast_node_types.inc"
+        }
+    }
+
+    // Here is our field visit implementations which adapt to the node visiting.
+
+    // Direct embeddings.
+    template <typename Node>
+    void visit_node_field(const Node &node) {
+        v_.visit(node);
+    }
+
+    // Pointer embeddings.
+    template <typename Node>
+    void visit_pointer_field(const Node *ptr) {
+        v_.visit(*ptr);
+    }
+
+    // List embeddings.
+    template <typename List>
+    void visit_list_field(const List &list) {
+        v_.visit(list);
+    }
+
+    // Optional pointers get visited if not null.
+    template <typename Node>
+    void visit_optional_field(optional_t<Node> &node) {
+        if (node.contents) v_.visit(*node.contents);
+    }
+
+    // Define our custom implementations of non-node fields.
+    // Union pointers just dispatch to the generic one.
+    template <typename... Types>
+    void visit_union_field(union_ptr_t<Types...> &ptr) {
+        assert(ptr && "Should not have null ptr");
+        this->accept(ptr.contents.get());
+    }
+
+    void will_visit_fields_of(node_t &) {}
+    void did_visit_fields_of(node_t &) {}
+
+    node_visitation_t(node_visitation_t &&) = default;
+
+    // We cannot be copied.
+    node_visitation_t(const node_visitation_t &) = delete;
+    void operator=(const node_visitation_t &) = delete;
+    void operator=(node_visitation_t &&) = delete;
+
+   private:
+    // Our adapted visitor.
+    NodeVisitor &v_;
+
+    // Whether to iterate in reverse order.
+    const bool reverse_;
+};
+
+// Type-deducing helper.
+template <typename NodeVisitor>
+node_visitation_t<NodeVisitor> node_visitor(NodeVisitor &nv, bool reverse = false) {
+    return node_visitation_t<NodeVisitor>(nv, reverse);
+}
+
+// A way to visit nodes iteratively.
+// This is pre-order. Each node is visited before its children.
+// Example:
+//    traversal_t tv(start);
+//    while (const node_t *node = tv.next()) {...}
+class traversal_t {
+   public:
+    // Construct starting with a node
+    traversal_t(const node_t *n) {
+        assert(n && "Should not have null node");
+        push(n);
+    }
+
+    // \return the next node, or nullptr if exhausted.
+    const node_t *next() {
+        if (stack_.empty()) return nullptr;
+        const node_t *node = stack_.back();
+        stack_.pop_back();
+
+        // We want to visit in reverse order so the first child ends up on top of the stack.
+        node_visitor(*this, true /* reverse */).accept_children_of(node);
+        return node;
+    }
+
+   private:
+    // Callback for node_visitation_t.
+    void visit(const node_t &node) { push(&node); }
+
+    // Construct an empty visitor, used for iterator support.
+    traversal_t() = default;
+
+    // \return whether we are finished visiting.
+    bool finished() const { return stack_.empty(); }
+
+    // Append a node.
+    void push(const node_t *n) {
+        assert(n && "Should not push null node");
+        stack_.push_back(n);
+    }
+
+    // Stack of nodes.
+    std::vector<const node_t *> stack_{};
+
+    friend class ast_t;
+    friend class node_visitation_t<traversal_t>;
+};
+
+/// The ast type itself.
+class ast_t {
+   public:
+    using source_range_list_t = std::vector<source_range_t>;
+
+    /// Construct an ast by parsing \p src as a job list.
+    /// The ast attempts to produce \p type as the result.
+    /// \p type may only be job_list or freestanding_argument_list.
+    static ast_t parse(const wcstring &src, parse_tree_flags_t flags = parse_flag_none,
+                       parse_error_list_t *out_errors = nullptr);
+
+    /// Like parse(), but constructs a freestanding_argument_list.
+    static ast_t parse_argument_list(const wcstring &src,
+                                     parse_tree_flags_t flags = parse_flag_none,
+                                     parse_error_list_t *out_errors = nullptr);
+
+    /// \return a traversal, allowing iteration over the nodes.
+    traversal_t walk() const { return traversal_t{top()}; }
+
+    /// \return the top node. This has the type requested in the 'parse' method.
+    const node_t *top() const { return top_.get(); }
+
+    /// \return whether any errors were encountered during parsing.
+    bool errored() const { return any_error_; }
+
+    /// \return a textual representation of the tree.
+    /// Pass the original source as \p orig.
+    wcstring dump(const wcstring &orig) const;
+
+    /// Extra source ranges.
+    /// These are only generated if the corresponding flags are set.
+    struct extras_t {
+        /// Set of comments, sorted by offset.
+        source_range_list_t comments;
+
+        /// Set of semicolons, sorted by offset.
+        source_range_list_t semis;
+
+        /// Set of error ranges, sorted by offset.
+        source_range_list_t errors;
+    };
+
+    /// Access the set of extraneous source ranges.
+    const extras_t &extras() const { return extras_; }
+
+    /// Iterator support.
+    class iterator {
+       public:
+        using iterator_category = std::input_iterator_tag;
+        using difference_type = void;
+        using value_type = node_t;
+        using pointer = const node_t *;
+        using reference = const node_t &;
+
+        bool operator==(const iterator &rhs) { return current_ == rhs.current_; }
+        bool operator!=(const iterator &rhs) { return !(*this == rhs); }
+
+        iterator &operator++() {
+            current_ = v_.next();
+            return *this;
+        }
+
+        const node_t &operator*() const { return *current_; }
+
+       private:
+        explicit iterator(const node_t *start) : v_(start), current_(v_.next()) {}
+        iterator() = default;
+
+        traversal_t v_{};
+        const node_t *current_{};
+        friend ast_t;
+    };
+
+    iterator begin() const { return iterator{top()}; }
+    iterator end() const { return iterator{}; }
+
+    ast_t(ast_t &&) = default;
+    ast_t &operator=(ast_t &&) = default;
+    ast_t(const ast_t &) = delete;
+    void operator=(const ast_t &) = delete;
+
+   private:
+    ast_t() = default;
+
+    // Shared parsing code that takes the top type.
+    static ast_t parse_from_top(const wcstring &src, parse_tree_flags_t parse_flags,
+                                parse_error_list_t *out_errors, type_t top);
+
+    // The top node.
+    // Its type depends on what was requested to parse.
+    std::unique_ptr<node_t> top_{};
+
+    /// Whether any errors were encountered during parsing.
+    bool any_error_{false};
+
+    /// Extra fields.
+    extras_t extras_{};
+
+    class populator_t;
+    friend populator_t;
+};
+
+}  // namespace ast
+#endif // FISH_AST_H
diff --git a/src/ast_node_types.inc b/src/ast_node_types.inc
new file mode 100644
index 000000000..b0ac3ea98
--- /dev/null
+++ b/src/ast_node_types.inc
@@ -0,0 +1,60 @@
+// Define ELEM and optionally ELEMLIST before including this file.
+// ELEM is for ordinary nodes.
+// ELEMLIST(x, y) marks list nodes and the type they contain.
+#ifndef ELEMLIST
+#define ELEMLIST(x, y) ELEM(x)
+#endif
+
+ELEM(keyword_base)
+ELEM(token_base)
+ELEM(maybe_newlines)
+
+ELEM(argument)
+ELEMLIST(argument_list, argument)
+
+ELEM(redirection)
+ELEM(argument_or_redirection)
+ELEMLIST(argument_or_redirection_list, argument_or_redirection)
+
+ELEM(variable_assignment)
+ELEMLIST(variable_assignment_list, variable_assignment)
+
+ELEM(job)
+ELEM(job_conjunction)
+// For historical reasons, a job list is a list of job *conjunctions*. This should be fixed.
+ELEMLIST(job_list, job_conjunction)
+ELEM(job_conjunction_continuation)
+ELEMLIST(job_conjunction_continuation_list, job_conjunction_continuation)
+
+ELEM(job_continuation)
+ELEMLIST(job_continuation_list, job_continuation)
+
+ELEM(andor_job)
+ELEMLIST(andor_job_list, andor_job)
+
+ELEM(statement)
+
+ELEM(not_statement)
+
+ELEM(block_statement)
+ELEM(for_header)
+ELEM(while_header)
+ELEM(function_header)
+ELEM(begin_header)
+
+ELEM(if_statement)
+ELEM(if_clause)
+ELEM(elseif_clause)
+ELEMLIST(elseif_clause_list, elseif_clause)
+ELEM(else_clause)
+
+ELEM(switch_statement)
+ELEM(case_item)
+ELEMLIST(case_item_list, case_item)
+
+ELEM(decorated_statement)
+
+ELEM(freestanding_argument_list)
+
+#undef ELEM
+#undef ELEMLIST
diff --git a/src/fish_indent.cpp b/src/fish_indent.cpp
index 3b6ba0e6a..17197f84c 100644
--- a/src/fish_indent.cpp
+++ b/src/fish_indent.cpp
@@ -33,6 +33,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
 #include <tuple>
 #include <vector>
 
+#include "ast.h"
 #include "color.h"
 #include "common.h"
 #include "env.h"
@@ -404,6 +405,12 @@ static wcstring prettify(const wcstring &src, bool do_indent) {
     if (dump_parse_tree) {
         const wcstring dump = parse_dump_tree(parse_tree, src);
         std::fwprintf(stderr, L"%ls\n", dump.c_str());
+
+        auto ast =
+            ast::ast_t::parse(src, parse_flag_leave_unterminated | parse_flag_include_comments |
+                                       parse_flag_show_extra_semis);
+        wcstring ast_dump = ast.dump(src);
+        std::fwprintf(stderr, L"%ls\n", ast_dump.c_str());
     }
 
     // We may have a forest of disconnected trees on a parse failure. We have to handle all nodes
diff --git a/src/flog.h b/src/flog.h
index 7512ea307..dcf5fd3cf 100644
--- a/src/flog.h
+++ b/src/flog.h
@@ -67,6 +67,7 @@ class category_list_t {
     category_t parse_productions{L"parse-productions", L"Resolving tokens"};
     category_t parse_productions_chatty{L"parse-productions-chatty",
                                         L"Resolving tokens (chatty messages)"};
+    category_t ast_construction{L"ast-construction", L"Parsing fish AST"};
 
     category_t proc_job_run{L"proc-job-run", L"Jobs getting started or continued"};
 
diff --git a/src/parse_constants.h b/src/parse_constants.h
index 6e654a7f6..e41992c50 100644
--- a/src/parse_constants.h
+++ b/src/parse_constants.h
@@ -13,6 +13,17 @@
         exit_without_destructors(-1);  \
     } while (0)
 
+// A range of source code.
+struct source_range_t {
+    uint32_t start;
+    uint32_t length;
+
+    uint32_t end() const {
+        assert(start + length >= start && "Overflow");
+        return start + length;
+    }
+};
+
 // IMPORTANT: If the following enum table is modified you must also update token_enum_map below.
 enum parse_token_type_t : uint8_t {
     token_type_invalid = 1,
@@ -193,6 +204,26 @@ enum parse_error_code_t {
     parse_error_andor_in_pipeline,         // "and" or "or" after a pipe
 };
 
+enum {
+    parse_flag_none = 0,
+
+    /// Attempt to build a "parse tree" no matter what. This may result in a 'forest' of
+    /// disconnected trees. This is intended to be used by syntax highlighting.
+    parse_flag_continue_after_error = 1 << 0,
+    /// Include comment tokens.
+    parse_flag_include_comments = 1 << 1,
+    /// Indicate that the tokenizer should accept incomplete tokens */
+    parse_flag_accept_incomplete_tokens = 1 << 2,
+    /// Indicate that the parser should not generate the terminate token, allowing an 'unfinished'
+    /// tree where some nodes may have no productions.
+    parse_flag_leave_unterminated = 1 << 3,
+    /// Indicate that the parser should generate job_list entries for blank lines.
+    parse_flag_show_blank_lines = 1 << 4,
+    /// Indicate that extra semis should be generated.
+    parse_flag_show_extra_semis = 1 << 5,
+};
+typedef unsigned int parse_tree_flags_t;
+
 enum { PARSER_TEST_ERROR = 1, PARSER_TEST_INCOMPLETE = 2 };
 typedef unsigned int parser_test_error_bits_t;
 
@@ -214,6 +245,9 @@ struct parse_error_t {
 };
 typedef std::vector<parse_error_t> parse_error_list_t;
 
+wcstring token_type_user_presentable_description(parse_token_type_t type,
+                                                 parse_keyword_t keyword = parse_keyword_t::none);
+
 // Special source_start value that means unknown.
 #define SOURCE_LOCATION_UNKNOWN (static_cast<size_t>(-1))
 
diff --git a/src/parse_grammar.h b/src/parse_grammar.h
index d5e9e6710..c89fd8816 100644
--- a/src/parse_grammar.h
+++ b/src/parse_grammar.h
@@ -255,7 +255,7 @@ DEF_ALT(variable_assignments) {
 // A string token like VAR=value
 DEF(variable_assignment) produces_single<tok_string>{BODY(variable_assignment)};
 
-// A statement is a normal command, or an if / while / and etc
+// A statement is a normal command, or an if / while / etc
 DEF_ALT(statement) {
     using nots = single<not_statement>;
     using block = single<block_statement>;
diff --git a/src/parse_productions.h b/src/parse_productions.h
index 3a6e4b257..02a9592f1 100644
--- a/src/parse_productions.h
+++ b/src/parse_productions.h
@@ -4,6 +4,7 @@
 
 #include <sys/types.h>
 
+#include "ast.h"
 #include "parse_constants.h"
 
 struct parse_token_t;
diff --git a/src/parse_tree.cpp b/src/parse_tree.cpp
index ee0281bbf..b3eadcd8d 100644
--- a/src/parse_tree.cpp
+++ b/src/parse_tree.cpp
@@ -30,7 +30,7 @@ static bool production_is_empty(const production_element_t *production) {
     return *production == token_type_invalid;
 }
 
-static parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err) {
+parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err) {
     switch (err) {
         case tokenizer_error_t::none:
             return parse_error_none;
@@ -168,8 +168,7 @@ const wchar_t *keyword_description(parse_keyword_t type) {
     return L"unknown_keyword";
 }
 
-static wcstring token_type_user_presentable_description(
-    parse_token_type_t type, parse_keyword_t keyword = parse_keyword_t::none) {
+wcstring token_type_user_presentable_description(parse_token_type_t type, parse_keyword_t keyword) {
     if (keyword != parse_keyword_t::none) {
         return format_string(L"keyword '%ls'", keyword_description(keyword));
     }
@@ -1078,8 +1077,7 @@ static inline bool is_help_argument(const wcstring &txt) {
 }
 
 /// Return a new parse token, advancing the tokenizer.
-static inline parse_token_t next_parse_token(tokenizer_t *tok, maybe_t<tok_t> *out_token,
-                                             wcstring *storage) {
+parse_token_t next_parse_token(tokenizer_t *tok, maybe_t<tok_t> *out_token, wcstring *storage) {
     *out_token = tok->next();
     if (!out_token->has_value()) {
         return kTerminalToken;
@@ -1098,7 +1096,8 @@ static inline parse_token_t next_parse_token(tokenizer_t *tok, maybe_t<tok_t> *o
     result.is_help_argument = result.has_dash_prefix && is_help_argument(text);
     result.is_newline = (result.type == parse_token_type_end && text == L"\n");
     result.preceding_escaped_nl = token.preceding_escaped_nl;
-    result.may_be_variable_assignment = bool(variable_assignment_equals_pos(text));
+    result.may_be_variable_assignment = variable_assignment_equals_pos(text).has_value();
+    result.tok_error = token.error;
 
     // These assertions are totally bogus. Basically our tokenizer works in size_t but we work in
     // uint32_t to save some space. If we have a source file larger than 4 GB, we'll probably just
diff --git a/src/parse_tree.h b/src/parse_tree.h
index 3e98104fa..34525db6a 100644
--- a/src/parse_tree.h
+++ b/src/parse_tree.h
@@ -6,6 +6,7 @@
 #include <stdint.h>
 #include <sys/types.h>
 
+#include <deque>
 #include <memory>
 #include <vector>
 
@@ -25,11 +26,6 @@ typedef uint32_t source_offset_t;
 
 constexpr source_offset_t SOURCE_OFFSET_INVALID = static_cast<source_offset_t>(-1);
 
-struct source_range_t {
-    uint32_t start;
-    uint32_t length;
-};
-
 /// A struct representing the token type that we use internally.
 struct parse_token_t {
     enum parse_token_type_t type;  // The type of the token as represented by the parser
@@ -41,38 +37,36 @@ struct parse_token_t {
     bool is_newline{false};            // Hackish: if TOK_END, whether the source is a newline.
     bool preceding_escaped_nl{false};  // Whether there was an escaped newline preceding this token.
     bool may_be_variable_assignment{false};  // Hackish: whether this token is a string like FOO=bar
+    tokenizer_error_t tok_error{tokenizer_error_t::none}; // If this is a tokenizer error, that error.
     source_offset_t source_start{SOURCE_OFFSET_INVALID};
     source_offset_t source_length{0};
 
+    /// \return the source range.
+    source_range_t range() const {
+        return source_range_t{source_start, source_length};
+    }
+
+    /// \return whether we are a string with the dash prefix set.
+    bool is_dash_prefix_string() const {
+        return type == parse_token_type_string && has_dash_prefix;
+    }
+
     wcstring describe() const;
     wcstring user_presentable_description() const;
 
     constexpr parse_token_t(parse_token_type_t type) : type(type) {}
 };
 
-enum {
-    parse_flag_none = 0,
-
-    /// Attempt to build a "parse tree" no matter what. This may result in a 'forest' of
-    /// disconnected trees. This is intended to be used by syntax highlighting.
-    parse_flag_continue_after_error = 1 << 0,
-    /// Include comment tokens.
-    parse_flag_include_comments = 1 << 1,
-    /// Indicate that the tokenizer should accept incomplete tokens */
-    parse_flag_accept_incomplete_tokens = 1 << 2,
-    /// Indicate that the parser should not generate the terminate token, allowing an 'unfinished'
-    /// tree where some nodes may have no productions.
-    parse_flag_leave_unterminated = 1 << 3,
-    /// Indicate that the parser should generate job_list entries for blank lines.
-    parse_flag_show_blank_lines = 1 << 4
-};
-typedef unsigned int parse_tree_flags_t;
+/// Return a new parse token, advancing the tokenizer.
+parse_token_t next_parse_token(tokenizer_t *tok, maybe_t<tok_t> *out_token, wcstring *storage);
 
 wcstring parse_dump_tree(const parse_node_tree_t &nodes, const wcstring &src);
 
 const wchar_t *token_type_description(parse_token_type_t type);
 const wchar_t *keyword_description(parse_keyword_t type);
 
+parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err);
+
 // Node flags.
 enum {
     /// Flag indicating that the node has associated comment nodes.

From 8d9725c301ad6f52887bd11c15adba6c801bb294 Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Sat, 20 Jun 2020 15:27:15 -0700
Subject: [PATCH 03/13] Adopt the new AST in highlighting

This switches syntax highlighting from parsing with parse_tree to the new
ast.
---
 src/fish_tests.cpp |   6 +
 src/highlight.cpp  | 706 +++++++++++++++++++++------------------------
 2 files changed, 342 insertions(+), 370 deletions(-)

diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp
index e0f52fe47..9ad95fd97 100644
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@@ -4862,6 +4862,12 @@ static void test_highlighting() {
         {L")", highlight_role_t::error},
     });
 
+    highlight_tests.push_back({
+        {L"echo", highlight_role_t::command},
+        {L"stuff", highlight_role_t::param},
+        {L"# comment", highlight_role_t::comment},
+    });
+
     auto &vars = parser_t::principal_parser().vars();
     // Verify variables and wildcards in commands using /bin/cat.
     vars.set(L"VARIABLE_IN_COMMAND", ENV_LOCAL, {L"a"});
diff --git a/src/highlight.cpp b/src/highlight.cpp
index 3b45f934d..656f0122c 100644
--- a/src/highlight.cpp
+++ b/src/highlight.cpp
@@ -16,6 +16,7 @@
 #include <unordered_set>
 #include <utility>
 
+#include "ast.h"
 #include "builtin.h"
 #include "color.h"
 #include "common.h"
@@ -31,14 +32,11 @@
 #include "parse_util.h"
 #include "parser.h"
 #include "path.h"
-#include "tnode.h"
 #include "tokenizer.h"
 #include "wcstringutil.h"
 #include "wildcard.h"
 #include "wutil.h"  // IWYU pragma: keep
 
-namespace g = grammar;
-
 #define CURSOR_POSITION_INVALID static_cast<size_t>(-1)
 
 static const wchar_t *get_highlight_var_name(highlight_role_t role) {
@@ -338,12 +336,11 @@ static bool is_potential_cd_path(const wcstring &path, const wcstring &working_d
 
 // Given a plain statement node in a parse tree, get the command and return it, expanded
 // appropriately for commands. If we succeed, return true.
-static bool plain_statement_get_expanded_command(const wcstring &src,
-                                                 tnode_t<g::plain_statement> stmt,
-                                                 const operation_context_t &ctx,
-                                                 wcstring *out_cmd) {
+static bool statement_get_expanded_command(const wcstring &src,
+                                           const ast::decorated_statement_t &stmt,
+                                           const operation_context_t &ctx, wcstring *out_cmd) {
     // Get the command. Try expanding it. If we cannot, it's an error.
-    maybe_t<wcstring> cmd = command_for_plain_statement(stmt, src);
+    maybe_t<wcstring> cmd = stmt.command.source(src);
     if (!cmd) return false;
     expand_result_t err = expand_to_command_and_args(*cmd, ctx, out_cmd, nullptr);
     return err == expand_result_t::ok;
@@ -384,6 +381,9 @@ rgb_color_t highlight_get_color(const highlight_spec_t &highlight, bool is_backg
     return result;
 }
 
+static bool command_is_valid(const wcstring &cmd, enum parse_statement_decoration_t decoration,
+                             const wcstring &working_directory, const environment_t &vars);
+
 static bool has_expand_reserved(const wcstring &str) {
     bool result = false;
     for (auto wc : str) {
@@ -399,27 +399,22 @@ static bool has_expand_reserved(const wcstring &str) {
 // command (as a string), if any. This is used to validate autosuggestions.
 static bool autosuggest_parse_command(const wcstring &buff, const operation_context_t &ctx,
                                       wcstring *out_expanded_command, wcstring *out_arg) {
-    // Parse the buffer.
-    parse_node_tree_t parse_tree;
-    parse_tree_from_string(buff,
-                           parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens,
-                           &parse_tree, nullptr);
+    auto ast = ast::ast_t::parse(
+        buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens);
 
     // Find the first statement.
-    tnode_t<g::plain_statement> first_statement{};
-    for (const auto &node : parse_tree) {
-        if (node.type == symbol_plain_statement) {
-            first_statement = tnode_t<g::plain_statement>(&parse_tree, &node);
-            break;
-        }
+    const ast::decorated_statement_t *first_statement = nullptr;
+    if (const ast::job_conjunction_t *jc = ast.top()->as<ast::job_list_t>()->at(0)) {
+        first_statement = jc->job.statement.contents->try_as<ast::decorated_statement_t>();
     }
 
     if (first_statement &&
-        plain_statement_get_expanded_command(buff, first_statement, ctx, out_expanded_command)) {
-        // Find the first argument.
-        auto args_and_redirs = first_statement.child<1>();
-        if (auto arg = args_and_redirs.next_in_list<grammar::argument>()) {
-            *out_arg = arg.get_source(buff);
+        statement_get_expanded_command(buff, *first_statement, ctx, out_expanded_command)) {
+        // Check if the first argument or redirection is, in fact, an argument.
+        if (const auto *arg_or_redir = first_statement->args_or_redirs.at(0)) {
+            if (arg_or_redir && arg_or_redir->is_argument()) {
+                *out_arg = arg_or_redir->argument().source(buff);
+            }
         }
         return true;
     }
@@ -775,31 +770,56 @@ class highlighter_t {
     const bool io_ok;
     // Working directory.
     const wcstring working_directory;
+    // The ast we produced.
+    ast::ast_t ast;
     // The resulting colors.
     using color_array_t = std::vector<highlight_spec_t>;
     color_array_t color_array;
-    // The parse tree of the buff.
-    parse_node_tree_t parse_tree;
+
+    // Flags we use for AST parsing.
+    static constexpr parse_tree_flags_t ast_flags =
+        parse_flag_continue_after_error | parse_flag_include_comments |
+        parse_flag_accept_incomplete_tokens | parse_flag_leave_unterminated |
+        parse_flag_show_extra_semis;
+
     // Color a command.
-    void color_command(tnode_t<g::tok_string> node);
-    // Color an argument.
-    void color_argument(tnode_t<g::tok_string> node);
+    void color_command(const ast::string_t &node);
+    // Color a node as if it were an argument.
+    void color_as_argument(const ast::node_t &node);
     // Color a redirection.
-    void color_redirection(tnode_t<g::redirection> node);
-    // Color a list of arguments. If cmd_is_cd is true, then the arguments are for 'cd'; detect
-    // invalid directories.
-    void color_arguments(const std::vector<tnode_t<g::argument>> &args, bool cmd_is_cd = false);
-    // Color the redirections of the given node.
-    void color_redirections(tnode_t<g::arguments_or_redirections_list> list);
+    void color_redirection(const ast::redirection_t &node);
     // Color all the children of the command with the given type.
-    void color_children(const parse_node_t &parent, parse_token_type_t type,
-                        highlight_spec_t color);
+    void color_children(const ast::node_t &parent, ast::type_t type, highlight_spec_t color);
     // Colors the source range of a node with a given color.
-    void color_node(const parse_node_t &node, highlight_spec_t color);
+    void color_node(const ast::node_t &node, highlight_spec_t color);
+    // Colors a range with a given color.
+    void color_range(source_range_t range, highlight_spec_t color);
     // return whether a plain statement is 'cd'.
-    bool is_cd(tnode_t<g::plain_statement> stmt) const;
+    bool is_cd(const ast::decorated_statement_t &stmt) const;
+
+    /// \return a substring of our buffer.
+    wcstring get_source(source_range_t r) const;
 
    public:
+    // Visit the children of a node.
+    void visit_children(const ast::node_t &node) {
+        ast::node_visitor(*this).accept_children_of(&node);
+    }
+
+    // AST visitor implementations.
+    void visit(const ast::keyword_base_t &kw);
+    void visit(const ast::token_base_t &tok);
+    void visit(const ast::redirection_t &redir);
+    void visit(const ast::variable_assignment_t &varas);
+    void visit(const ast::semi_nl_t &semi_nl);
+    void visit(const ast::decorated_statement_t &stmt);
+
+    // Visit an argument, perhaps knowing that our command is cd.
+    void visit(const ast::argument_t &arg, bool cmd_is_cd = false);
+
+    // Default implementation is to just visit children.
+    void visit(const ast::node_t &node) { visit_children(node); }
+
     // Constructor
     highlighter_t(const wcstring &str, size_t pos, const operation_context_t &ctx, wcstring wd,
                   bool can_do_io)
@@ -808,52 +828,44 @@ class highlighter_t {
           ctx(ctx),
           io_ok(can_do_io),
           working_directory(std::move(wd)),
-          color_array(str.size()) {
-        // Parse the tree.
-        parse_tree_from_string(buff,
-                               parse_flag_continue_after_error | parse_flag_include_comments |
-                                   parse_flag_accept_incomplete_tokens,
-                               &this->parse_tree, nullptr);
-    }
+          ast(ast::ast_t::parse(buff, ast_flags)) {}
 
     // Perform highlighting, returning an array of colors.
     color_array_t highlight();
 };
 
-void highlighter_t::color_node(const parse_node_t &node, highlight_spec_t color) {
-    // Can only color nodes with valid source ranges.
-    if (!node.has_source() || node.source_length == 0) return;
-
-    // Fill the color array with our color in the corresponding range.
-    size_t source_end = node.source_start + node.source_length;
-    assert(source_end >= node.source_start);
-    assert(source_end <= color_array.size());
-
-    std::fill(this->color_array.begin() + node.source_start, this->color_array.begin() + source_end,
-              color);
+wcstring highlighter_t::get_source(source_range_t r) const {
+    assert(r.start + r.length >= r.start && "Overflow");
+    assert(r.start + r.length <= this->buff.size() && "Out of range");
+    return this->buff.substr(r.start, r.length);
 }
 
-void highlighter_t::color_command(tnode_t<g::tok_string> node) {
-    auto source_range = node.source_range();
-    if (!source_range) return;
+void highlighter_t::color_node(const ast::node_t &node, highlight_spec_t color) {
+    color_range(node.source_range(), color);
+}
 
-    const wcstring cmd_str = node.get_source(this->buff);
+void highlighter_t::color_range(source_range_t range, highlight_spec_t color) {
+    assert(range.start + range.length <= this->color_array.size() && "Range out of bounds");
+    std::fill_n(this->color_array.begin() + range.start, range.length, color);
+}
+
+void highlighter_t::color_command(const ast::string_t &node) {
+    source_range_t source_range = node.source_range();
+    const wcstring cmd_str = get_source(source_range);
 
     // Get an iterator to the colors associated with the argument.
-    const size_t arg_start = source_range->start;
+    const size_t arg_start = source_range.start;
     const color_array_t::iterator colors = color_array.begin() + arg_start;
     color_string_internal(cmd_str, highlight_role_t::command, colors);
 }
 
 // node does not necessarily have type symbol_argument here.
-void highlighter_t::color_argument(tnode_t<g::tok_string> node) {
+void highlighter_t::color_as_argument(const ast::node_t &node) {
     auto source_range = node.source_range();
-    if (!source_range) return;
-
-    const wcstring arg_str = node.get_source(this->buff);
+    const wcstring arg_str = get_source(source_range);
 
     // Get an iterator to the colors associated with the argument.
-    const size_t arg_start = source_range->start;
+    const size_t arg_start = source_range.start;
     const color_array_t::iterator arg_colors = color_array.begin() + arg_start;
 
     // Color this argument without concern for command substitutions.
@@ -905,15 +917,13 @@ void highlighter_t::color_argument(tnode_t<g::tok_string> node) {
 
 /// Indicates whether the source range of the given node forms a valid path in the given
 /// working_directory.
-static bool node_is_potential_path(const wcstring &src, const parse_node_t &node,
-                                   const operation_context_t &ctx,
-                                   const wcstring &working_directory) {
-    if (!node.has_source()) return false;
-
+static bool range_is_potential_path(const wcstring &src, const source_range_t &range,
+                                    const operation_context_t &ctx,
+                                    const wcstring &working_directory) {
     // Get the node source, unescape it, and then pass it to is_potential_path along with the
     // working directory (as a one element list).
     bool result = false;
-    wcstring token(src, node.source_start, node.source_length);
+    wcstring token = src.substr(range.start, range.length);
     if (unescape_string_in_place(&token, UNESCAPE_SPECIAL)) {
         // Big hack: is_potential_path expects a tilde, but unescape_string gives us HOME_DIRECTORY.
         // Put it back.
@@ -925,172 +935,257 @@ static bool node_is_potential_path(const wcstring &src, const parse_node_t &node
     return result;
 }
 
-bool highlighter_t::is_cd(tnode_t<g::plain_statement> stmt) const {
-    bool cmd_is_cd = false;
-    if (this->io_ok && stmt.has_source()) {
-        wcstring cmd_str;
-        if (plain_statement_get_expanded_command(this->buff, stmt, ctx, &cmd_str)) {
-            cmd_is_cd = (cmd_str == L"cd");
-        }
+bool highlighter_t::is_cd(const ast::decorated_statement_t &stmt) const {
+    wcstring cmd_str;
+    if (this->io_ok && statement_get_expanded_command(this->buff, stmt, ctx, &cmd_str)) {
+        return cmd_str == L"cd";
     }
-    return cmd_is_cd;
+    return false;
 }
 
-// Color all of the arguments of the given node list, which should be argument_list or
-// argument_or_redirection_list.
-void highlighter_t::color_arguments(const std::vector<tnode_t<g::argument>> &args, bool cmd_is_cd) {
-    // Find all the arguments of this list.
-    for (tnode_t<g::argument> arg : args) {
-        this->color_argument(arg.child<0>());
+void highlighter_t::visit(const ast::keyword_base_t &kw) {
+    highlight_role_t role = highlight_role_t::normal;
+    switch (kw.kw) {
+        case parse_keyword_t::kw_begin:
+        case parse_keyword_t::kw_builtin:
+        case parse_keyword_t::kw_case:
+        case parse_keyword_t::kw_command:
+        case parse_keyword_t::kw_else:
+        case parse_keyword_t::kw_end:
+        case parse_keyword_t::kw_exec:
+        case parse_keyword_t::kw_for:
+        case parse_keyword_t::kw_function:
+        case parse_keyword_t::kw_if:
+        case parse_keyword_t::kw_in:
+        case parse_keyword_t::kw_switch:
+        case parse_keyword_t::kw_while:
+            role = highlight_role_t::command;
+            break;
 
-        if (cmd_is_cd) {
-            // Mark this as an error if it's not 'help' and not a valid cd path.
-            wcstring param = arg.get_source(this->buff);
-            if (expand_one(param, expand_flag::skip_cmdsubst, ctx)) {
-                bool is_help = string_prefixes_string(param, L"--help") ||
-                               string_prefixes_string(param, L"-h");
-                if (!is_help && this->io_ok &&
-                    !is_potential_cd_path(param, working_directory, ctx, PATH_EXPAND_TILDE)) {
-                    this->color_node(arg, highlight_role_t::error);
-                }
+        case parse_keyword_t::kw_and:
+        case parse_keyword_t::kw_or:
+        case parse_keyword_t::kw_not:
+        case parse_keyword_t::kw_exclam:
+        case parse_keyword_t::kw_time:
+            role = highlight_role_t::operat;
+            break;
+
+        case parse_keyword_t::none:
+            break;
+    }
+    color_node(kw, role);
+}
+
+void highlighter_t::visit(const ast::token_base_t &tok) {
+    maybe_t<highlight_role_t> role = highlight_role_t::normal;
+    switch (tok.type) {
+        case parse_token_type_end:
+        case parse_token_type_pipe:
+        case parse_token_type_background:
+            role = highlight_role_t::statement_terminator;
+            break;
+
+        case parse_token_type_andand:
+        case parse_token_type_oror:
+            role = highlight_role_t::operat;
+            break;
+
+        case parse_token_type_string:
+            // Assume all strings are params. This handles e.g. the variables a for header or
+            // function header. Other strings (like arguments to commands) need more complex
+            // handling, which occurs in their respective overrides of visit().
+            role = highlight_role_t::param;
+
+        default:
+            break;
+    }
+    if (role) color_node(tok, *role);
+}
+
+void highlighter_t::visit(const ast::semi_nl_t &semi_nl) {
+    color_node(semi_nl, highlight_role_t::statement_terminator);
+}
+
+void highlighter_t::visit(const ast::argument_t &arg, bool cmd_is_cd) {
+    color_as_argument(arg);
+    if (cmd_is_cd && io_ok) {
+        // Mark this as an error if it's not 'help' and not a valid cd path.
+        wcstring param = arg.source(this->buff);
+        if (expand_one(param, expand_flag::skip_cmdsubst, ctx)) {
+            bool is_help =
+                string_prefixes_string(param, L"--help") || string_prefixes_string(param, L"-h");
+            if (!is_help && this->io_ok &&
+                !is_potential_cd_path(param, working_directory, ctx, PATH_EXPAND_TILDE)) {
+                this->color_node(arg, highlight_role_t::error);
             }
         }
     }
 }
 
-void highlighter_t::color_redirection(tnode_t<g::redirection> redirection_node) {
-    if (!redirection_node.has_source()) return;
+void highlighter_t::visit(const ast::variable_assignment_t &varas) {
+    color_as_argument(varas);
+    // TODO: Color the '=' in the variable assignment as an operator, for fun.
+    //    if (auto where = variable_assignment_equals_pos(varas.source(this->buff))) {
+    //        this->color_array.at(*where) = highlight_role_t::operat;
+    //    }
+}
 
-    tnode_t<g::tok_redirection> redir_prim = redirection_node.child<0>();  // like 2>
-    tnode_t<g::tok_string> redir_target = redirection_node.child<1>();     // like &1 or file path
+void highlighter_t::visit(const ast::decorated_statement_t &stmt) {
+    // Color any decoration.
+    if (stmt.opt_decoration) this->visit(*stmt.opt_decoration);
 
-    if (redir_prim) {
-        wcstring target;
-        const maybe_t<pipe_or_redir_t> redirect =
-            redirection_for_node(redirection_node, this->buff, &target);
+    // Color the command's source code.
+    // If we get no source back, there's nothing to color.
+    maybe_t<wcstring> cmd = stmt.command.try_source(this->buff);
+    if (!cmd.has_value()) return;
 
-        // We may get a missing redirection type if the redirection is invalid.
-        auto hl = (redirect && redirect->is_valid()) ? highlight_role_t::redirection
-                                                     : highlight_role_t::error;
-        this->color_node(redir_prim, hl);
+    wcstring expanded_cmd;
+    bool is_valid_cmd = false;
+    if (!this->io_ok) {
+        // We cannot check if the command is invalid, so just assume it's valid.
+        is_valid_cmd = true;
+    } else if (variable_assignment_equals_pos(*cmd)) {
+        is_valid_cmd = true;
+    } else {
+        // Check to see if the command is valid.
+        // Try expanding it. If we cannot, it's an error.
+        bool expanded = statement_get_expanded_command(buff, stmt, ctx, &expanded_cmd);
+        if (expanded && !has_expand_reserved(expanded_cmd)) {
+            is_valid_cmd =
+                command_is_valid(expanded_cmd, stmt.decoration(), working_directory, ctx.vars);
+        }
+    }
 
-        // Check if the argument contains a command substitution. If so, highlight it as a param
-        // even though it's a command redirection, and don't try to do any other validation.
-        if (parse_util_locate_cmdsubst(target.c_str(), nullptr, nullptr, true) != 0) {
-            this->color_argument(redir_target);
+    // Color our statement.
+    if (is_valid_cmd) {
+        this->color_command(stmt.command);
+    } else {
+        this->color_node(stmt.command, highlight_role_t::error);
+    }
+
+    // Color arguments and redirections.
+    // Except if our command is 'cd' we have special logic for how arguments are colored.
+    bool is_cd = (expanded_cmd == L"cd");
+    for (const ast::argument_or_redirection_t &v : stmt.args_or_redirs) {
+        if (v.is_argument()) {
+            this->visit(v.argument(), is_cd);
         } else {
-            // No command substitution, so we can highlight the target file or fd. For example,
-            // disallow redirections into a non-existent directory.
-            bool target_is_valid = true;
+            this->visit(v.redirection());
+        }
+    }
+}
 
-            if (!redirect || !redirect->is_valid()) {
-                // not a valid redirection
-                target_is_valid = false;
-            } else if (!this->io_ok) {
-                // I/O is disallowed, so we don't have much hope of catching anything but gross
-                // errors. Assume it's valid.
-                target_is_valid = true;
-            } else if (!expand_one(target, expand_flag::skip_cmdsubst, ctx)) {
-                // Could not be expanded.
-                target_is_valid = false;
-            } else {
-                // Ok, we successfully expanded our target. Now verify that it works with this
-                // redirection. We will probably need it as a path (but not in the case of fd
-                // redirections). Note that the target is now unescaped.
-                const wcstring target_path =
-                    path_apply_working_directory(target, this->working_directory);
-                switch (redirect->mode) {
-                    case redirection_mode_t::fd: {
-                        if (target == L"-") {
-                            target_is_valid = true;
-                        } else {
-                            int fd = fish_wcstoi(target.c_str());
-                            target_is_valid = !errno && fd >= 0;
-                        }
-                        break;
+void highlighter_t::visit(const ast::redirection_t &redir) {
+    maybe_t<pipe_or_redir_t> oper =
+        pipe_or_redir_t::from_string(redir.oper.source(this->buff));  // like 2>
+    wcstring target = redir.target.source(this->buff);                 // like &1 or file path
+
+    assert(oper.has_value() &&
+           "Should have successfully parsed a pipe_or_redir_t since it was in our ast");
+
+    // Color the > part.
+    // It may have parsed successfully yet still be invalid (e.g. 9999999999999>&1)
+    // If so, color the whole thing invalid and stop.
+    if (!oper->is_valid()) {
+        this->color_node(redir, highlight_role_t::error);
+        return;
+    }
+
+    // Color the operator part like 2>.
+    this->color_node(redir.oper, highlight_role_t::redirection);
+
+    // Color the target part.
+    // Check if the argument contains a command substitution. If so, highlight it as a param
+    // even though it's a command redirection, and don't try to do any other validation.
+    if (parse_util_locate_cmdsubst(target.c_str(), nullptr, nullptr, true) != 0) {
+        this->color_as_argument(redir.target);
+    } else {
+        // No command substitution, so we can highlight the target file or fd. For example,
+        // disallow redirections into a non-existent directory.
+        bool target_is_valid = true;
+        if (!this->io_ok) {
+            // I/O is disallowed, so we don't have much hope of catching anything but gross
+            // errors. Assume it's valid.
+            target_is_valid = true;
+        } else if (!expand_one(target, expand_flag::skip_cmdsubst, ctx)) {
+            // Could not be expanded.
+            target_is_valid = false;
+        } else {
+            // Ok, we successfully expanded our target. Now verify that it works with this
+            // redirection. We will probably need it as a path (but not in the case of fd
+            // redirections). Note that the target is now unescaped.
+            const wcstring target_path =
+                path_apply_working_directory(target, this->working_directory);
+            switch (oper->mode) {
+                case redirection_mode_t::fd: {
+                    if (target == L"-") {
+                        target_is_valid = true;
+                    } else {
+                        int fd = fish_wcstoi(target.c_str());
+                        target_is_valid = !errno && fd >= 0;
                     }
-                    case redirection_mode_t::input: {
-                        // Input redirections must have a readable non-directory.
-                        struct stat buf = {};
-                        target_is_valid = !waccess(target_path, R_OK) &&
-                                          !wstat(target_path, &buf) && !S_ISDIR(buf.st_mode);
-                        break;
+                    break;
+                }
+                case redirection_mode_t::input: {
+                    // Input redirections must have a readable non-directory.
+                    struct stat buf = {};
+                    target_is_valid = !waccess(target_path, R_OK) && !wstat(target_path, &buf) &&
+                                      !S_ISDIR(buf.st_mode);
+                    break;
+                }
+                case redirection_mode_t::overwrite:
+                case redirection_mode_t::append:
+                case redirection_mode_t::noclob: {
+                    // Test whether the file exists, and whether it's writable (possibly after
+                    // creating it). access() returns failure if the file does not exist.
+                    bool file_exists = false, file_is_writable = false;
+                    int err = 0;
+
+                    struct stat buf = {};
+                    if (wstat(target_path, &buf) < 0) {
+                        err = errno;
                     }
-                    case redirection_mode_t::overwrite:
-                    case redirection_mode_t::append:
-                    case redirection_mode_t::noclob: {
-                        // Test whether the file exists, and whether it's writable (possibly after
-                        // creating it). access() returns failure if the file does not exist.
-                        bool file_exists = false, file_is_writable = false;
-                        int err = 0;
 
-                        struct stat buf = {};
-                        if (wstat(target_path, &buf) < 0) {
-                            err = errno;
-                        }
+                    if (string_suffixes_string(L"/", target)) {
+                        // Redirections to things that are directories is definitely not
+                        // allowed.
+                        file_exists = false;
+                        file_is_writable = false;
+                    } else if (err == 0) {
+                        // No err. We can write to it if it's not a directory and we have
+                        // permission.
+                        file_exists = true;
+                        file_is_writable = !S_ISDIR(buf.st_mode) && !waccess(target_path, W_OK);
+                    } else if (err == ENOENT) {
+                        // File does not exist. Check if its parent directory is writable.
+                        wcstring parent = wdirname(target_path);
 
-                        if (string_suffixes_string(L"/", target)) {
-                            // Redirections to things that are directories is definitely not
-                            // allowed.
-                            file_exists = false;
-                            file_is_writable = false;
-                        } else if (err == 0) {
-                            // No err. We can write to it if it's not a directory and we have
-                            // permission.
-                            file_exists = true;
-                            file_is_writable = !S_ISDIR(buf.st_mode) && !waccess(target_path, W_OK);
-                        } else if (err == ENOENT) {
-                            // File does not exist. Check if its parent directory is writable.
-                            wcstring parent = wdirname(target_path);
+                        // Ensure that the parent ends with the path separator. This will ensure
+                        // that we get an error if the parent directory is not really a
+                        // directory.
+                        if (!string_suffixes_string(L"/", parent)) parent.push_back(L'/');
 
-                            // Ensure that the parent ends with the path separator. This will ensure
-                            // that we get an error if the parent directory is not really a
-                            // directory.
-                            if (!string_suffixes_string(L"/", parent)) parent.push_back(L'/');
-
-                            // Now the file is considered writable if the parent directory is
-                            // writable.
-                            file_exists = false;
-                            file_is_writable = (0 == waccess(parent, W_OK));
-                        } else {
-                            // Other errors we treat as not writable. This includes things like
-                            // ENOTDIR.
-                            file_exists = false;
-                            file_is_writable = false;
-                        }
-
-                        // NOCLOB means that we must not overwrite files that exist.
-                        target_is_valid =
-                            file_is_writable &&
-                            !(file_exists && redirect->mode == redirection_mode_t::noclob);
-                        break;
+                        // Now the file is considered writable if the parent directory is
+                        // writable.
+                        file_exists = false;
+                        file_is_writable = (0 == waccess(parent, W_OK));
+                    } else {
+                        // Other errors we treat as not writable. This includes things like
+                        // ENOTDIR.
+                        file_exists = false;
+                        file_is_writable = false;
                     }
+
+                    // NOCLOB means that we must not overwrite files that exist.
+                    target_is_valid =
+                        file_is_writable &&
+                        !(file_exists && oper->mode == redirection_mode_t::noclob);
+                    break;
                 }
             }
-
-            if (redir_target) {
-                auto hl = target_is_valid ? highlight_role_t::redirection : highlight_role_t::error;
-                this->color_node(redir_target, hl);
-            }
-        }
-    }
-}
-
-/// Color all of the redirections of the given command.
-void highlighter_t::color_redirections(tnode_t<g::arguments_or_redirections_list> list) {
-    for (const auto &node : list.descendants<g::redirection>()) {
-        this->color_redirection(node);
-    }
-}
-
-/// Color all the children of the command with the given type.
-void highlighter_t::color_children(const parse_node_t &parent, parse_token_type_t type,
-                                   highlight_spec_t color) {
-    for (node_offset_t idx = 0; idx < parent.child_count; idx++) {
-        const parse_node_t *child = this->parse_tree.get_child(parent, idx);
-        if (child != nullptr && child->type == type) {
-            this->color_node(*child, color);
         }
+        this->color_node(redir.target,
+                         target_is_valid ? highlight_role_t::redirection : highlight_role_t::error);
     }
 }
 
@@ -1145,171 +1240,42 @@ highlighter_t::color_array_t highlighter_t::highlight() {
         ASSERT_IS_BACKGROUND_THREAD();
     }
 
-    const size_t length = buff.size();
-    assert(this->buff.size() == this->color_array.size());
-    if (length == 0) return color_array;
-
-    // Start out at zero.
+    this->color_array.resize(this->buff.size());
     std::fill(this->color_array.begin(), this->color_array.end(), highlight_spec_t{});
 
-    // Walk the node tree.
-    for (const parse_node_t &node : parse_tree) {
-        if (ctx.check_cancel()) return std::move(color_array);
-        switch (node.type) {
-            // Color direct string descendants, e.g. 'for' and 'in'.
-            case symbol_while_header:
-            case symbol_begin_header:
-            case symbol_function_header:
-            case symbol_if_clause:
-            case symbol_else_clause:
-            case symbol_case_item:
-            case symbol_decorated_statement:
-            case symbol_if_statement: {
-                this->color_children(node, parse_token_type_string, highlight_role_t::command);
-                break;
-            }
-            case symbol_switch_statement: {
-                tnode_t<g::switch_statement> switchn(&parse_tree, &node);
-                auto literal_switch = switchn.child<0>();
-                auto switch_arg = switchn.child<1>();
-                this->color_node(literal_switch, highlight_role_t::command);
-                this->color_node(switch_arg, highlight_role_t::param);
-                break;
-            }
-            case symbol_for_header: {
-                tnode_t<g::for_header> fhead(&parse_tree, &node);
-                // Color the 'for' and 'in' as commands.
-                auto literal_for = fhead.child<0>();
-                auto literal_in = fhead.child<2>();
-                this->color_node(literal_for, highlight_role_t::command);
-                this->color_node(literal_in, highlight_role_t::command);
+    this->visit_children(*ast.top());
+    if (ctx.check_cancel()) return std::move(color_array);
 
-                // Color the variable name as a parameter.
-                this->color_argument(fhead.child<1>());
-                break;
-            }
-
-            case parse_token_type_andand:
-            case parse_token_type_oror:
-                this->color_node(node, highlight_role_t::operat);
-                break;
-
-            case symbol_not_statement:
-                this->color_children(node, parse_token_type_string, highlight_role_t::operat);
-                break;
-
-            case symbol_job_decorator:
-                this->color_node(node, highlight_role_t::operat);
-                break;
-
-            case symbol_variable_assignment: {
-                tnode_t<g::variable_assignment> variable_assignment = {&parse_tree, &node};
-                this->color_argument(variable_assignment.child<0>());
-                break;
-            }
-
-            case parse_token_type_pipe:
-            case parse_token_type_background:
-            case parse_token_type_end:
-            case symbol_optional_background: {
-                this->color_node(node, highlight_role_t::statement_terminator);
-                break;
-            }
-            case symbol_optional_time: {
-                this->color_node(node, highlight_role_t::operat);
-                break;
-            }
-            case symbol_plain_statement: {
-                tnode_t<g::plain_statement> stmt(&parse_tree, &node);
-                // Get the decoration from the parent.
-                enum parse_statement_decoration_t decoration = get_decoration(stmt);
-
-                // Color the command.
-                tnode_t<g::tok_string> cmd_node = stmt.child<0>();
-                maybe_t<wcstring> cmd = cmd_node.get_source(buff);
-                if (!cmd) {
-                    break;  // not much as we can do without a node that has source text
-                }
-
-                bool is_valid_cmd = false;
-                if (!this->io_ok) {
-                    // We cannot check if the command is invalid, so just assume it's valid.
-                    is_valid_cmd = true;
-                } else if (variable_assignment_equals_pos(*cmd)) {
-                    is_valid_cmd = true;
-                } else {
-                    wcstring expanded_cmd;
-                    // Check to see if the command is valid.
-                    // Try expanding it. If we cannot, it's an error.
-                    bool expanded =
-                        plain_statement_get_expanded_command(buff, stmt, ctx, &expanded_cmd);
-                    if (expanded && !has_expand_reserved(expanded_cmd)) {
-                        is_valid_cmd =
-                            command_is_valid(expanded_cmd, decoration, working_directory, ctx.vars);
-                    }
-                }
-                if (!is_valid_cmd) {
-                    this->color_node(*cmd_node, highlight_role_t::error);
-                } else {
-                    this->color_command(cmd_node);
-                }
-                break;
-            }
-            // Only work on root lists, so that we don't re-color child lists.
-            case symbol_arguments_or_redirections_list: {
-                tnode_t<g::arguments_or_redirections_list> list(&parse_tree, &node);
-                if (argument_list_is_root(list)) {
-                    bool cmd_is_cd = is_cd(list.try_get_parent<g::plain_statement>());
-                    this->color_arguments(list.descendants<g::argument>(), cmd_is_cd);
-                    this->color_redirections(list);
-                }
-                break;
-            }
-            case symbol_argument_list: {
-                tnode_t<g::argument_list> list(&parse_tree, &node);
-                if (argument_list_is_root(list)) {
-                    this->color_arguments(list.descendants<g::argument>());
-                }
-                break;
-            }
-            case symbol_end_command: {
-                this->color_node(node, highlight_role_t::command);
-                break;
-            }
-            case parse_special_type_parse_error:
-            case parse_special_type_tokenizer_error: {
-                this->color_node(node, highlight_role_t::error);
-                break;
-            }
-            case parse_special_type_comment: {
-                this->color_node(node, highlight_role_t::comment);
-                break;
-            }
-            default: {
-                break;
-            }
-        }
+    // Color every comment.
+    const auto &extras = ast.extras();
+    for (const source_range_t &r : extras.comments) {
+        this->color_range(r, highlight_role_t::comment);
     }
 
-    if (!this->io_ok || this->cursor_pos > this->buff.size()) {
-        return std::move(color_array);
+    // Color every extra semi.
+    for (const source_range_t &r : extras.semis) {
+        this->color_range(r, highlight_role_t::statement_terminator);
     }
 
-    // If the cursor is over an argument, and that argument is a valid path, underline it.
-    for (const auto &node : parse_tree) {
-        // Must be an argument with source.
-        if (node.type != symbol_argument || !node.has_source()) continue;
+    // Color every error range.
+    for (const source_range_t &r : extras.errors) {
+        this->color_range(r, highlight_role_t::error);
+    }
 
-        if (ctx.check_cancel()) return std::move(color_array);
-
-        // Underline every valid path.
-        if (node_is_potential_path(buff, node, ctx, working_directory)) {
-            // It is, underline it.
-            for (size_t i = node.source_start; i < node.source_start + node.source_length; i++) {
+    // Underline every valid path.
+    if (io_ok) {
+        for (const ast::node_t &node : ast) {
+            const ast::argument_t *arg = node.try_as<ast::argument_t>();
+            if (!arg || arg->unsourced) continue;
+            if (ctx.check_cancel()) break;
+            if (range_is_potential_path(buff, arg->range, ctx, working_directory)) {
                 // Don't color highlight_role_t::error because it looks dorky. For example,
                 // trying to cd into a non-directory would show an underline and also red.
-                if (this->color_array.at(i).foreground != highlight_role_t::error) {
-                    this->color_array.at(i).valid_path = true;
+                for (size_t i = arg->range.start, end = arg->range.start + arg->range.length;
+                     i < end; i++) {
+                    if (this->color_array.at(i).foreground != highlight_role_t::error) {
+                        this->color_array.at(i).valid_path = true;
+                    }
                 }
             }
         }

From 0d4f9c6220854a74afe6ecbfda2d79c53a293aca Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Sat, 20 Jun 2020 17:28:31 -0700
Subject: [PATCH 04/13] Adopt the new AST in abbreviation expansion

This switches abbreviation expansion from parsing with parse_tree to the
new ast.
---
 src/reader.cpp | 36 +++++++++++++++++-------------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/src/reader.cpp b/src/reader.cpp
index 6ec992276..063e3b379 100644
--- a/src/reader.cpp
+++ b/src/reader.cpp
@@ -44,6 +44,7 @@
 #include <set>
 #include <stack>
 
+#include "ast.h"
 #include "color.h"
 #include "common.h"
 #include "complete.h"
@@ -935,33 +936,29 @@ maybe_t<edit_t> reader_expand_abbreviation_in_command(const wcstring &cmdline, s
     const size_t subcmd_cursor_pos = cursor_pos - subcmd_offset;
 
     // Parse this subcmd.
-    parse_node_tree_t parse_tree;
-    parse_tree_from_string(subcmd,
-                           parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens,
-                           &parse_tree, nullptr);
+    using namespace ast;
+    auto ast =
+        ast_t::parse(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens |
+                                 parse_flag_leave_unterminated);
 
     // Look for plain statements where the cursor is at the end of the command.
-    using namespace grammar;
-    tnode_t<tok_string> matching_cmd_node;
-    for (const parse_node_t &node : parse_tree) {
-        // Only interested in plain statements with source.
-        if (node.type != symbol_plain_statement || !node.has_source()) continue;
+    const ast::string_t *matching_cmd_node = nullptr;
+    for (const node_t &n : ast) {
+        const decorated_statement_t *stmt = n.try_as<decorated_statement_t>();
+        if (!stmt) continue;
 
-        // Get the command node. Skip it if we can't or it has no source.
-        tnode_t<plain_statement> statement(&parse_tree, &node);
-        tnode_t<tok_string> cmd_node = statement.child<0>();
+        // Skip if we have a decoration.
+        if (stmt->opt_decoration) continue;
 
-        // Skip decorated statements.
-        if (get_decoration(statement) != parse_statement_decoration_none) continue;
-
-        auto msource = cmd_node.source_range();
+        // See if the command's source range range contains our cursor, including at the end.
+        auto msource = stmt->command.try_source_range();
         if (!msource) continue;
 
         // Now see if its source range contains our cursor, including at the end.
         if (subcmd_cursor_pos >= msource->start &&
             subcmd_cursor_pos <= msource->start + msource->length) {
             // Success!
-            matching_cmd_node = cmd_node;
+            matching_cmd_node = &stmt->command;
             break;
         }
     }
@@ -969,11 +966,12 @@ maybe_t<edit_t> reader_expand_abbreviation_in_command(const wcstring &cmdline, s
     // Now if we found a command node, expand it.
     maybe_t<edit_t> result{};
     if (matching_cmd_node) {
-        const wcstring token = matching_cmd_node.get_source(subcmd);
+        assert(!matching_cmd_node->unsourced && "Should not be unsourced");
+        const wcstring token = matching_cmd_node->source(subcmd);
         if (auto abbreviation = expand_abbreviation(token, vars)) {
             // There was an abbreviation! Replace the token in the full command. Maintain the
             // relative position of the cursor.
-            source_range_t r = *matching_cmd_node.source_range();
+            source_range_t r = matching_cmd_node->source_range();
             result = edit_t(subcmd_offset + r.start, r.length, std::move(*abbreviation));
         }
     }

From 46c4ec8d68527584b861e9627617e2ecafc6126b Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Sat, 20 Jun 2020 17:32:31 -0700
Subject: [PATCH 05/13] Adopt the new AST in completion argument lists

This switches completion argument list expansion from parsing with
parse_tree to the new ast.
---
 src/parser.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/parser.cpp b/src/parser.cpp
index 3fbc65200..c48ec3868 100644
--- a/src/parser.cpp
+++ b/src/parser.cpp
@@ -11,6 +11,7 @@
 #include <memory>
 #include <utility>
 
+#include "ast.h"
 #include "common.h"
 #include "env.h"
 #include "event.h"
@@ -328,19 +329,18 @@ completion_list_t parser_t::expand_argument_list(const wcstring &arg_list_src,
                                                  expand_flags_t eflags,
                                                  const operation_context_t &ctx) {
     // Parse the string as an argument list.
-    parse_node_tree_t tree;
-    if (!parse_tree_from_string(arg_list_src, parse_flag_none, &tree, nullptr /* errors */,
-                                symbol_freestanding_argument_list)) {
+    auto ast = ast::ast_t::parse_argument_list(arg_list_src);
+    if (ast.errored()) {
         // Failed to parse. Here we expect to have reported any errors in test_args.
         return {};
     }
 
     // Get the root argument list and extract arguments from it.
     completion_list_t result;
-    assert(!tree.empty());
-    tnode_t<grammar::freestanding_argument_list> arg_list(&tree, &tree.at(0));
-    while (auto arg = arg_list.next_in_list<grammar::argument>()) {
-        const wcstring arg_src = arg.get_source(arg_list_src);
+    const ast::freestanding_argument_list_t *list =
+        ast.top()->as<ast::freestanding_argument_list_t>();
+    for (const ast::argument_t &arg : list->arguments) {
+        wcstring arg_src = arg.source(arg_list_src);
         if (expand_string(arg_src, &result, eflags, ctx) == expand_result_t::error) {
             break;  // failed to expand a string
         }

From 358d7072a2ccad80fcaa4d2d9d10425d8b3ff49f Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Sat, 20 Jun 2020 17:43:54 -0700
Subject: [PATCH 06/13] Adopt the new AST in bash history import

This switches bash history importing from parsing with parse_tree to the
new ast.
---
 src/history.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/history.cpp b/src/history.cpp
index 8a8f070cc..0e2aa1385 100644
--- a/src/history.cpp
+++ b/src/history.cpp
@@ -29,6 +29,7 @@
 #include <type_traits>
 #include <unordered_set>
 
+#include "ast.h"
 #include "common.h"
 #include "env.h"
 #include "fallback.h"  // IWYU pragma: keep
@@ -1096,8 +1097,7 @@ void history_impl_t::populate_from_config_path() {
 static bool should_import_bash_history_line(const wcstring &line) {
     if (line.empty()) return false;
 
-    parse_node_tree_t parse_tree;
-    if (!parse_tree_from_string(line, parse_flag_none, &parse_tree, nullptr)) return false;
+    if (ast::ast_t::parse(line).errored()) return false;
 
     // In doing this test do not allow incomplete strings. Hence the "false" argument.
     parse_error_list_t errors;

From 7bea5ffa2eb0223d79f65a6a7ce4025e1c000463 Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Sat, 20 Jun 2020 18:22:11 -0700
Subject: [PATCH 07/13] Adopt the new AST in parse_util_compute_indents

This switches parse_util_compute_indents from parsing with parse_tree to
the new ast.

It also reworks the parse_util_compute_indents tests, because
parse_util_compute_indents will be the backing for fish_indent.
---
 src/fish_tests.cpp | 154 +++++++++++++++--------
 src/parse_util.cpp | 299 +++++++++++++++++++--------------------------
 2 files changed, 230 insertions(+), 223 deletions(-)

diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp
index 9ad95fd97..1b1292299 100644
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@@ -1268,75 +1268,121 @@ static void test_cancellation() {
     parser.clear_cancel();
 }
 
+namespace indent_tests {
+// A struct which is either text or a new indent.
+struct segment_t {
+    // The indent to set
+    int indent{0};
+    const char *text{nullptr};
+
+    /* implicit */ segment_t(int indent) : indent(indent) {}
+    /* implicit */ segment_t(const char *text) : text(text) {}
+};
+
+using test_t = std::vector<segment_t>;
+using test_list_t = std::vector<test_t>;
+
+// Add a new test to a test list based on a series of ints and texts.
+template <typename... Types>
+void add_test(test_list_t *v, const Types &... types) {
+    segment_t segments[] = {types...};
+    v->emplace_back(std::begin(segments), std::end(segments));
+}
+}  // namespace indent_tests
+
 static void test_indents() {
     say(L"Testing indents");
+    using namespace indent_tests;
 
-    // Here are the components of our source and the indents we expect those to be.
-    struct indent_component_t {
-        const wchar_t *txt;
-        int indent;
-    };
+    test_list_t tests;
+    add_test(&tests,              //
+             0, "if", 1, " foo",  //
+             0, "\nend");
 
-    const indent_component_t components1[] = {{L"if foo", 0}, {L"end", 0}, {NULL, -1}};
+    add_test(&tests,              //
+             0, "if", 1, " foo",  //
+             1, "\nfoo",          //
+             0, "\nend");
 
-    const indent_component_t components2[] = {{L"if foo", 0},
-                                              {L"", 1},  // trailing newline!
-                                              {NULL, -1}};
+    add_test(&tests,                //
+             0, "if", 1, " foo",    //
+             1, "\nif", 2, " bar",  //
+             1, "\nend",            //
+             0, "\nend");
 
-    const indent_component_t components3[] = {{L"if foo", 0},
-                                              {L"foo", 1},
-                                              {L"end", 0},  // trailing newline!
-                                              {NULL, -1}};
+    add_test(&tests,                //
+             0, "if", 1, " foo",    //
+             1, "\nif", 2, " bar",  //
+             1, "\n",  // FIXME: this should be 2 but parse_util_compute_indents has a bug
+             1, "\nend\n");
 
-    const indent_component_t components4[] = {{L"if foo", 0}, {L"if bar", 1}, {L"end", 1},
-                                              {L"end", 0},    {L"", 0},       {NULL, -1}};
+    add_test(&tests,                //
+             0, "if", 1, " foo",    //
+             1, "\nif", 2, " bar",  //
+             2, "\n");
 
-    const indent_component_t components5[] = {{L"if foo", 0}, {L"if bar", 1}, {L"", 2}, {NULL, -1}};
+    add_test(&tests,      //
+             0, "begin",  //
+             1, "\nfoo",  //
+             1, "\n");
 
-    const indent_component_t components6[] = {{L"begin", 0}, {L"foo", 1}, {L"", 1}, {NULL, -1}};
+    add_test(&tests,      //
+             0, "begin",  //
+             1, "\n;",    //
+             0, "end",    //
+             0, "\nfoo", 0, "\n");
 
-    const indent_component_t components7[] = {{L"begin", 0}, {L";", 1}, {L"end", 0},
-                                              {L"foo", 0},   {L"", 0},  {NULL, -1}};
+    add_test(&tests,      //
+             0, "begin",  //
+             1, "\n;",    //
+             0, "end",    //
+             0, "\nfoo", 0, "\n");
 
-    const indent_component_t components8[] = {{L"if foo", 0}, {L"if bar", 1}, {L"baz", 2},
-                                              {L"end", 1},    {L"", 1},       {NULL, -1}};
+    add_test(&tests,                //
+             0, "if", 1, " foo",    //
+             1, "\nif", 2, " bar",  //
+             2, "\nbaz",            //
+             1, "\nend", 1, "\n");
 
-    const indent_component_t components9[] = {{L"switch foo", 0}, {L"", 1}, {NULL, -1}};
+    add_test(&tests,           //
+             0, "switch foo",  //
+             1, "\n"           //
+    );
 
-    const indent_component_t components10[] = {
-        {L"switch foo", 0}, {L"case bar", 1}, {L"case baz", 1}, {L"quux", 2}, {L"", 2}, {NULL, -1}};
+    add_test(&tests,           //
+             0, "switch foo",  //
+             1, "\ncase bar",  //
+             1, "\ncase baz",  //
+             2, "\nquux",      //
+             2, "\nquux"       //
+    );
 
-    const indent_component_t components11[] = {{L"switch foo", 0},
-                                               {L"cas", 1},  // parse error indentation handling
-                                               {NULL, -1}};
+    add_test(&tests,           //
+             0, "switch foo",  //
+             1, "\ncas"        // parse error indentation handling
+    );
 
-    const indent_component_t components12[] = {{L"while false", 0},
-                                               {L"# comment", 1},   // comment indentation handling
-                                               {L"command", 1},     // comment indentation handling
-                                               {L"# comment2", 1},  // comment indentation handling
-                                               {NULL, -1}};
+    add_test(&tests,                   //
+             0, "while", 1, " false",  //
+             1, "\n# comment",         // comment indentation handling
+             1, "\ncommand",           //
+             1, "\n# comment 2"        //
+    );
 
-    const indent_component_t *tests[] = {components1, components2,  components3,  components4,
-                                         components5, components6,  components7,  components8,
-                                         components9, components10, components11, components12};
-    for (size_t which = 0; which < sizeof tests / sizeof *tests; which++) {
-        const indent_component_t *components = tests[which];
-        // Count how many we have.
-        size_t component_count = 0;
-        while (components[component_count].txt != NULL) {
-            component_count++;
-        }
-
-        // Generate the expected indents.
+    int test_idx = 0;
+    for (const test_t &test : tests) {
+        // Construct the input text and expected indents.
         wcstring text;
         std::vector<int> expected_indents;
-        for (size_t i = 0; i < component_count; i++) {
-            if (i > 0) {
-                text.push_back(L'\n');
-                expected_indents.push_back(components[i].indent);
+        int current_indent = 0;
+        for (const segment_t &segment : test) {
+            if (!segment.text) {
+                current_indent = segment.indent;
+            } else {
+                wcstring tmp = str2wcstring(segment.text);
+                text.append(tmp);
+                expected_indents.insert(expected_indents.end(), tmp.size(), current_indent);
             }
-            text.append(components[i].txt);
-            expected_indents.resize(text.size(), components[i].indent);
         }
         do_test(expected_indents.size() == text.size());
 
@@ -1350,11 +1396,13 @@ static void test_indents() {
         do_test(expected_indents.size() == indents.size());
         for (size_t i = 0; i < text.size(); i++) {
             if (expected_indents.at(i) != indents.at(i)) {
-                err(L"Wrong indent at index %lu in test #%lu (expected %d, actual %d):\n%ls\n", i,
-                    which + 1, expected_indents.at(i), indents.at(i), text.c_str());
-                break;  // don't keep showing errors for the rest of the line
+                err(L"Wrong indent at index %lu (char 0x%02x) in test #%lu (expected %d, actual "
+                    L"%d):\n%ls\n",
+                    i, text.at(i), test_idx, expected_indents.at(i), indents.at(i), text.c_str());
+                break;  // don't keep showing errors for the rest of the test
             }
         }
+        test_idx++;
     }
 }
 
diff --git a/src/parse_util.cpp b/src/parse_util.cpp
index f9789f733..77f1c0c63 100644
--- a/src/parse_util.cpp
+++ b/src/parse_util.cpp
@@ -14,6 +14,7 @@
 #include <string>
 #include <type_traits>
 
+#include "ast.h"
 #include "builtin.h"
 #include "common.h"
 #include "expand.h"
@@ -565,121 +566,16 @@ wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote,
     return result;
 }
 
-/// We are given a parse tree, the index of a node within the tree, its indent, and a vector of
-/// indents the same size as the original source string. Set the indent correspdonding to the node's
-/// source range, if appropriate.
-///
-/// trailing_indent is the indent for nodes with unrealized source, i.e. if I type 'if false <ret>'
-/// then we have an if node with an empty job list (without source) but we want the last line to be
-/// indented anyways.
-///
-/// switch statements also indent.
-///
-/// max_visited_node_idx is the largest index we visited.
-static void compute_indents_recursive(const parse_node_tree_t &tree, node_offset_t node_idx,
-                                      int node_indent, parse_token_type_t parent_type,
-                                      std::vector<int> *indents, int *trailing_indent,
-                                      node_offset_t *max_visited_node_idx) {
-    // Guard against incomplete trees.
-    if (node_idx > tree.size()) return;
-
-    // Update max_visited_node_idx.
-    if (node_idx > *max_visited_node_idx) *max_visited_node_idx = node_idx;
-
-    // We could implement this by utilizing the fish grammar. But there's an easy trick instead:
-    // almost everything that wraps a job list should be indented by 1. So just find all of the job
-    // lists. One exception is switch, which wraps a case_item_list instead of a job_list. The other
-    // exception is job_list itself: a job_list is a job and a job_list, and we want that child list
-    // to be indented the same as the parent. So just find all job_lists whose parent is not a
-    // job_list, and increment their indent by 1. We also want to treat andor_job_list like
-    // job_lists.
-    const parse_node_t &node = tree.at(node_idx);
-    const parse_token_type_t node_type = node.type;
-
-    // Increment the indent if we are either a root job_list, or root case_item_list.
-    const bool is_root_job_list = node_type != parent_type && (node_type == symbol_job_list ||
-                                                               node_type == symbol_andor_job_list);
-    const bool is_root_case_item_list =
-        node_type == symbol_case_item_list && parent_type != symbol_case_item_list;
-    if (is_root_job_list || is_root_case_item_list) {
-        node_indent += 1;
-    }
-
-    // If we have source, store the trailing indent unconditionally. If we do not have source, store
-    // the trailing indent only if ours is bigger; this prevents the trailing "run" of terminal job
-    // lists from affecting the trailing indent. For example, code like this:
-    //
-    //   if foo
-    //
-    // will be parsed as this:
-    //
-    //   job_list
-    //     job
-    //        if_statement
-    //          job [if]
-    //          job_list [empty]
-    //     job_list [empty]
-    //
-    // There's two "terminal" job lists, and we want the innermost one.
-    //
-    // Note we are relying on the fact that nodes are in the same order as the source, i.e. an
-    // in-order traversal of the node tree also traverses the source from beginning to end.
-    if (node.has_source() || node_indent > *trailing_indent) {
-        *trailing_indent = node_indent;
-    }
-
-    // Store the indent into the indent array.
-    if (node.source_start != SOURCE_OFFSET_INVALID && node.source_start < indents->size()) {
-        if (node.has_source()) {
-            // A normal non-empty node. Store the indent unconditionally.
-            indents->at(node.source_start) = node_indent;
-        } else {
-            // An empty node. We have a source offset but no source length. This can come about when
-            // a node is legitimately empty:
-            //
-            //   while true; end
-            //
-            // The job_list inside the while loop is empty. It still has a source offset (at the end
-            // of the while statement) but no source extent. We still need to capture that indent,
-            // because there may be comments inside:
-            //
-            //      while true
-            //         # loop forever
-            //      end
-            //
-            // The 'loop forever' comment must be indented, by virtue of storing the indent.
-            //
-            // Now consider what happens if we remove the end:
-            //
-            //     while true
-            //       # loop forever
-            //
-            // Now both the job_list and end_command are unmaterialized. However, we want the indent
-            // to be of the job_list and not the end_command.  Therefore, we only store the indent
-            // if it's bigger.
-            if (node_indent > indents->at(node.source_start)) {
-                indents->at(node.source_start) = node_indent;
-            }
-        }
-    }
-
-    // Recursive to all our children.
-    for (node_offset_t idx = 0; idx < node.child_count; idx++) {
-        // Note we pass our type to our child, which becomes its parent node type.
-        compute_indents_recursive(tree, node.child_start + idx, node_indent, node_type, indents,
-                                  trailing_indent, max_visited_node_idx);
-    }
-}
-
 std::vector<int> parse_util_compute_indents(const wcstring &src) {
     // Make a vector the same size as the input string, which contains the indents. Initialize them
-    // to -1.
+    // to 0.
+    static wcstring ssss;
+    ssss = src;
     const size_t src_size = src.size();
-    std::vector<int> indents(src_size, -1);
+    std::vector<int> indents(src_size, 0);
 
     // Simple trick: if our source does not contain a newline, then all indents are 0.
     if (src.find('\n') == wcstring::npos) {
-        std::fill(indents.begin(), indents.end(), 0);
         return indents;
     }
 
@@ -687,78 +583,141 @@ std::vector<int> parse_util_compute_indents(const wcstring &src) {
     // the last node we visited becomes the input indent of the next. I.e. in the case of 'switch
     // foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it
     // were a case item list.
-    parse_node_tree_t tree;
-    parse_tree_from_string(src,
-                           parse_flag_continue_after_error | parse_flag_include_comments |
-                               parse_flag_accept_incomplete_tokens,
-                           &tree, nullptr /* errors */);
+    using namespace ast;
+    auto ast =
+        ast_t::parse(src, parse_flag_continue_after_error | parse_flag_include_comments |
+                              parse_flag_accept_incomplete_tokens | parse_flag_leave_unterminated);
 
-    // Start indenting at the first node. If we have a parse error, we'll have to start indenting
-    // from the top again.
-    node_offset_t start_node_idx = 0;
-    int last_trailing_indent = 0;
+    // Visit all of our nodes. When we get a job_list or case_item_list, increment indent while
+    // visiting its children.
+    struct indent_visitor_t {
+        explicit indent_visitor_t(std::vector<int> &indents) : indents(indents) {}
 
-    while (start_node_idx < tree.size()) {
-        // The indent that we'll get for the last line.
-        int trailing_indent = 0;
+        void visit(const node_t &node) {
+            int inc = 0;
+            int dec = 0;
+            switch (node.type) {
+                case type_t::job_list:
+                case type_t::andor_job_list:
+                    // Job lists are never unwound.
+                    inc = 1;
+                    dec = 1;
+                    break;
 
-        // Biggest offset we visited.
-        node_offset_t max_visited_node_idx = 0;
+                // Increment indents for conditions in headers (#1665).
+                case type_t::job_conjunction:
+                    if (node.parent->type == type_t::while_header ||
+                        node.parent->type == type_t::if_clause) {
+                        inc = 1;
+                        dec = 1;
+                    }
+                    break;
 
-        // Invoke the recursive version. As a hack, pass job_list for the 'parent' token type, which
-        // will prevent the really-root job list from indenting.
-        compute_indents_recursive(tree, start_node_idx, last_trailing_indent, symbol_job_list,
-                                  &indents, &trailing_indent, &max_visited_node_idx);
+                // Increment indents for piped remainders.
+                case type_t::job_continuation_list:
+                    if (node.as<job_continuation_list_t>()->count() > 0) {
+                        inc = 1;
+                        dec = 1;
+                    }
+                    break;
 
-        // We may have more to indent. The trailing indent becomes our current indent. Start at the
-        // node after the last we visited.
-        last_trailing_indent = trailing_indent;
-        start_node_idx = max_visited_node_idx + 1;
-    }
+                case type_t::case_item_list:
+                    // Here's a hack. Consider:
+                    // switch abc
+                    //    cas
+                    //
+                    // fish will see that 'cas' is not valid inside a switch statement because it is
+                    // not "case". It will then unwind back to the top level job list, producing a
+                    // parse tree like:
+                    //
+                    //   job_list
+                    //      switch_job
+                    //         <err>
+                    //      normal_job
+                    //         cas
+                    //
+                    // And so we will think that the 'cas' job is at the same level as the switch.
+                    // To address this, if we see that the switch statement was not closed, do not
+                    // decrement the indent afterwards.
+                    inc = 1;
+                    dec = node.parent->as<switch_statement_t>()->end.unsourced ? 0 : 1;
+                    break;
 
-    // Handle comments. Each comment node has a parent (which is whatever the top of the symbol
-    // stack was when the comment was encountered). So the source range of the comment has the same
-    // indent as its parent.
-    const size_t tree_size = tree.size();
-    for (node_offset_t i = 0; i < tree_size; i++) {
-        const parse_node_t &node = tree.at(i);
-        if (node.type == parse_special_type_comment && node.has_source() &&
-            node.parent < tree_size) {
-            const parse_node_t &parent = tree.at(node.parent);
-            if (parent.source_start != SOURCE_OFFSET_INVALID) {
-                indents.at(node.source_start) = indents.at(parent.source_start);
+                default:
+                    break;
             }
-        }
-    }
+            indent += inc;
 
-    // Now apply the indents. The indents array has -1 for places where the indent does not change,
-    // so start at each value and extend it along the run of -1s.
-    int last_indent = 0;
-    for (size_t i = 0; i < src_size; i++) {
-        int this_indent = indents.at(i);
-        if (this_indent < 0) {
-            indents.at(i) = last_indent;
+            // If we increased the indentation, apply it to the remainder of the string, even if the
+            // list is empty. For example (where _ represents the cursor):
+            //
+            //    if foo
+            //       _
+            //
+            // we want to indent the newline.
+            if (inc) {
+                std::fill(indents.begin() + last_leaf_end, indents.end(), indent);
+                last_indent = indent;
+            }
+
+            // If this is a leaf node, apply the current indentation.
+            if (node.category == category_t::leaf) {
+                auto range = node.source_range();
+                if (range.length > 0) {
+                    // Fill to the end.
+                    // Later nodes will come along and overwrite these.
+                    std::fill(indents.begin() + range.start, indents.end(), indent);
+                    last_leaf_end = range.start + range.length;
+                    last_indent = indent;
+                }
+            }
+
+
+            node_visitor(*this).accept_children_of(&node);
+            indent -= dec;
+        }
+
+        // The one-past-the-last index of the most recently encountered leaf node.
+        // We use this to populate the indents even if there's no tokens in the range.
+        size_t last_leaf_end{0};
+
+        // The last indent which we assigned.
+        int last_indent{-1};
+
+        // List of indents, which we populate.
+        std::vector<int> &indents;
+
+        // Initialize our starting indent to -1, as our top-level node is a job list which
+        // will immediately increment it.
+        int indent{-1};
+    };
+
+    indent_visitor_t iv(indents);
+    node_visitor(iv).accept(ast.top());
+
+    // All newlines now get the *next* indent.
+    // For example, in this code:
+    //    if true
+    //       stuff
+    // the newline "belongs" to the if statement as it ends its job.
+    // But when rendered, it visually belongs to the job list.
+
+    // FIXME: if there's a middle newline, we will indent it wrongly.
+    // For example:
+    //    if true
+    //
+    //    end
+    // Here the middle newline should be indented by 1.
+
+    size_t idx = src_size;
+    int next_indent = iv.last_indent;
+    while (idx--) {
+        if (src.at(idx) == L'\n') {
+            indents.at(idx) = next_indent;
         } else {
-            // New indent level.
-            last_indent = this_indent;
-            // Make all whitespace before a token have the new level. This avoid using the wrong
-            // indentation level if a new line starts with whitespace.
-            size_t prev_char_idx = i;
-            while (prev_char_idx--) {
-                if (!std::wcschr(L" \n\t\r", src.at(prev_char_idx))) break;
-                indents.at(prev_char_idx) = last_indent;
-            }
+            next_indent = indents.at(idx);
         }
     }
-
-    // Ensure trailing whitespace has the trailing indent. This makes sure a new line is correctly
-    // indented even if it is empty.
-    size_t suffix_idx = src_size;
-    while (suffix_idx--) {
-        if (!std::wcschr(L" \n\t\r", src.at(suffix_idx))) break;
-        indents.at(suffix_idx) = last_trailing_indent;
-    }
-
     return indents;
 }
 

From 202fdfa54a35be3a253fd16c79ccb1b087a135b6 Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Wed, 1 Jul 2020 21:06:58 -0700
Subject: [PATCH 08/13] Adopt the new AST in parse_util_detect_errors

This switches parse_util_detect_errors from parsing with parse_tree to
the new ast.
---
 src/fish_tests.cpp |  20 ++--
 src/parse_tree.h   |   1 +
 src/parse_util.cpp | 282 ++++++++++++++++++++++++---------------------
 src/parse_util.h   |   9 +-
 4 files changed, 172 insertions(+), 140 deletions(-)

diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp
index 1b1292299..08e14bfce 100644
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@@ -40,6 +40,7 @@
 #include <utility>
 #include <vector>
 
+#include "ast.h"
 #include "autoload.h"
 #include "builtin.h"
 #include "color.h"
@@ -978,15 +979,18 @@ static void test_debounce_timeout() {
 }
 
 static parser_test_error_bits_t detect_argument_errors(const wcstring &src) {
-    parse_node_tree_t tree;
-    if (!parse_tree_from_string(src, parse_flag_none, &tree, NULL, symbol_argument_list)) {
+    using namespace ast;
+    auto ast = ast_t::parse_argument_list(src, parse_flag_none);
+    if (ast.errored()) {
         return PARSER_TEST_ERROR;
     }
-
-    assert(!tree.empty());  //!OCLINT(multiple unary operator)
-    tnode_t<grammar::argument_list> arg_list{&tree, &tree.at(0)};
-    auto first_arg = arg_list.next_in_list<grammar::argument>();
-    return parse_util_detect_errors_in_argument(first_arg, first_arg.get_source(src));
+    const ast::argument_t *first_arg =
+        ast.top()->as<freestanding_argument_list_t>()->arguments.at(0);
+    if (!first_arg) {
+        err(L"Failed to parse an argument");
+        return 0;
+    }
+    return parse_util_detect_errors_in_argument(*first_arg, first_arg->source(src));
 }
 
 /// Test the parser.
@@ -1084,7 +1088,7 @@ static void test_parser() {
     }
 
     if (parse_util_detect_errors(L"echo (\nfoo\n  bar") != PARSER_TEST_INCOMPLETE) {
-        err(L"unterminated multiline subhsell not reported properly");
+        err(L"unterminated multiline subshell not reported properly");
     }
 
     if (parse_util_detect_errors(L"begin ; true ; end | ") != PARSER_TEST_INCOMPLETE) {
diff --git a/src/parse_tree.h b/src/parse_tree.h
index 34525db6a..8f8d54f74 100644
--- a/src/parse_tree.h
+++ b/src/parse_tree.h
@@ -42,6 +42,7 @@ struct parse_token_t {
     source_offset_t source_length{0};
 
     /// \return the source range.
+    /// Note the start may be invalid.
     source_range_t range() const {
         return source_range_t{source_start, source_length};
     }
diff --git a/src/parse_util.cpp b/src/parse_util.cpp
index 77f1c0c63..8a74c905c 100644
--- a/src/parse_util.cpp
+++ b/src/parse_util.cpp
@@ -749,17 +749,13 @@ bool parse_util_argument_is_help(const wchar_t *s) {
     return std::wcscmp(L"-h", s) == 0 || std::wcscmp(L"--help", s) == 0;
 }
 
-/// Check if the first argument under the given node is --help.
-static bool first_argument_is_help(tnode_t<grammar::plain_statement> statement,
-                                   const wcstring &src) {
-    bool is_help = false;
-    auto arg_nodes = get_argument_nodes(statement.child<1>());
-    if (!arg_nodes.empty()) {
-        // Check the first argument only.
-        wcstring first_arg_src = arg_nodes.front().get_source(src);
-        is_help = parse_util_argument_is_help(first_arg_src.c_str());
+// \return a pointer to the first argument node of an argument_or_redirection_list_t, or nullptr if
+// there are no arguments.
+const ast::argument_t *get_first_arg(const ast::argument_or_redirection_list_t &list) {
+    for (const ast::argument_or_redirection_t &v : list) {
+        if (v.is_argument()) return &v.argument();
     }
-    return is_help;
+    return nullptr;
 }
 
 /// Given a wide character immediately after a dollar sign, return the appropriate error message.
@@ -915,11 +911,13 @@ static parser_test_error_bits_t detect_dollar_cmdsub_errors(size_t arg_src_offse
 /// Test if this argument contains any errors. Detected errors include syntax errors in command
 /// substitutions, improperly escaped characters and improper use of the variable expansion
 /// operator.
-parser_test_error_bits_t parse_util_detect_errors_in_argument(tnode_t<grammar::argument> node,
+parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argument_t &arg,
                                                               const wcstring &arg_src,
                                                               parse_error_list_t *out_errors) {
-    assert(node.has_source() && "argument has no source");
-    auto source_start = node.source_range()->start;
+    maybe_t<source_range_t> source_range = arg.try_source_range();
+    if (!source_range.has_value()) return 0;
+
+    size_t source_start = source_range->start;
     int err = 0;
     wchar_t *paran_begin, *paran_end;
     int do_loop = 1;
@@ -1013,10 +1011,10 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(tnode_t<grammar::a
 }
 
 /// Given that the job given by node should be backgrounded, return true if we detect any errors.
-static bool detect_errors_in_backgrounded_job(tnode_t<grammar::job> job,
+static bool detect_errors_in_backgrounded_job(const ast::job_t &job,
                                               parse_error_list_t *parse_errors) {
-    namespace g = grammar;
-    auto source_range = job.source_range();
+    using namespace ast;
+    auto source_range = job.try_source_range();
     if (!source_range) return false;
 
     bool errored = false;
@@ -1025,54 +1023,77 @@ static bool detect_errors_in_backgrounded_job(tnode_t<grammar::job> job,
     // foo & ; or bar
     // if foo & ; end
     // while foo & ; end
-    auto job_conj = job.try_get_parent<g::job_conjunction>();
-    if (job_conj.try_get_parent<g::if_clause>()) {
-        errored = append_syntax_error(parse_errors, source_range->start,
-                                      BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
-    } else if (job_conj.try_get_parent<g::while_header>()) {
-        errored = append_syntax_error(parse_errors, source_range->start,
-                                      BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
-    } else if (auto jlist = job_conj.try_get_parent<g::job_list>()) {
-        // This isn't very complete, e.g. we don't catch 'foo & ; not and bar'.
-        // Fetch the job list and then advance it by one.
-        auto first_jconj = jlist.next_in_list<g::job_conjunction>();
-        assert(first_jconj == job.try_get_parent<g::job_conjunction>() &&
-               "Expected first job to be the node we found");
-        (void)first_jconj;
+    const job_conjunction_t *job_conj = job.parent->try_as<job_conjunction_t>();
+    if (!job_conj) return false;
 
-        // Try getting the next job's decorator.
-        if (auto next_job_dec = jlist.next_in_list<g::job_decorator>()) {
-            // The next job is indeed a boolean statement.
-            parse_job_decoration_t bool_type = bool_statement_type(next_job_dec);
-            if (bool_type == parse_job_decoration_and) {
-                errored = append_syntax_error(parse_errors, next_job_dec.source_range()->start,
-                                              BOOL_AFTER_BACKGROUND_ERROR_MSG, L"and");
-            } else if (bool_type == parse_job_decoration_or) {
-                errored = append_syntax_error(parse_errors, next_job_dec.source_range()->start,
-                                              BOOL_AFTER_BACKGROUND_ERROR_MSG, L"or");
+    if (job_conj->parent->try_as<if_clause_t>()) {
+        errored = append_syntax_error(parse_errors, source_range->start,
+                                      BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
+    } else if (job_conj->parent->try_as<while_header_t>()) {
+        errored = append_syntax_error(parse_errors, source_range->start,
+                                      BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
+    } else if (const ast::job_list_t *jlist = job_conj->parent->try_as<ast::job_list_t>()) {
+        // This isn't very complete, e.g. we don't catch 'foo & ; not and bar'.
+        // Find the index of ourselves in the job list.
+        size_t index;
+        for (index = 0; index < jlist->count(); index++) {
+            if (jlist->at(index) == job_conj) break;
+        }
+        assert(index < jlist->count() && "Should have found the job in the list");
+
+        // Try getting the next job and check its decorator.
+        if (const job_conjunction_t *next = jlist->at(index + 1)) {
+            if (const keyword_base_t *deco = next->decorator.contents.get()) {
+                assert(
+                    (deco->kw == parse_keyword_t::kw_and || deco->kw == parse_keyword_t::kw_or) &&
+                    "Unexpected decorator keyword");
+                const wchar_t *deco_name = (deco->kw == parse_keyword_t::kw_and ? L"and" : L"or");
+                errored = append_syntax_error(parse_errors, deco->source_range().start,
+                                              BOOL_AFTER_BACKGROUND_ERROR_MSG, deco_name);
             }
         }
     }
     return errored;
 }
 
-static bool detect_errors_in_plain_statement(const wcstring &buff_src,
-                                             const parse_node_tree_t &node_tree,
-                                             tnode_t<grammar::plain_statement> pst,
-                                             parse_error_list_t *parse_errors) {
-    using namespace grammar;
+static bool detect_errors_in_decorated_statement(const wcstring &buff_src,
+                                                 const ast::decorated_statement_t &dst,
+                                                 parse_error_list_t *parse_errors) {
+    using namespace ast;
     bool errored = false;
-    auto source_start = pst.source_range()->start;
+    auto source_start = dst.source_range().start;
+    const parse_statement_decoration_t decoration = dst.decoration();
 
-    // In a few places below, we want to know if we are in a pipeline.
-    tnode_t<statement> st = pst.try_get_parent<decorated_statement>().try_get_parent<statement>();
-    pipeline_position_t pipe_pos = get_pipeline_position(st);
-    bool is_in_pipeline = (pipe_pos != pipeline_position_t::none);
+    // Determine if the first argument is help.
+    bool first_arg_is_help = false;
+    if (const auto *arg = get_first_arg(dst.args_or_redirs)) {
+        wcstring arg_src = arg->source(buff_src);
+        first_arg_is_help = parse_util_argument_is_help(arg_src.c_str());
+    }
 
-    // We need to know the decoration.
-    const enum parse_statement_decoration_t decoration = get_decoration(pst);
+    // Get the statement we are part of.
+    const statement_t *st = dst.parent->as<statement_t>();
+
+    // Walk up to the job.
+    const ast::job_t *job = nullptr;
+    for (const node_t *cursor = st; job == nullptr; cursor = cursor->parent) {
+        assert(cursor && "Reached root without finding a job");
+        job = cursor->try_as<ast::job_t>();
+    }
+    assert(job && "Should have found the job");
+
+    // Check our pipeline position.
+    pipeline_position_t pipe_pos;
+    if (job->continuation.empty()) {
+        pipe_pos = pipeline_position_t::none;
+    } else if (&job->statement == st) {
+        pipe_pos = pipeline_position_t::first;
+    } else {
+        pipe_pos = pipeline_position_t::subsequent;
+    }
 
     // Check that we don't try to pipe through exec.
+    bool is_in_pipeline = (pipe_pos != pipeline_position_t::none);
     if (is_in_pipeline && decoration == parse_statement_decoration_exec) {
         errored = append_syntax_error(parse_errors, source_start, EXEC_ERR_MSG, L"exec");
     }
@@ -1083,14 +1104,14 @@ static bool detect_errors_in_plain_statement(const wcstring &buff_src,
     if (pipe_pos == pipeline_position_t::subsequent) {
         // check if our command is 'and' or 'or'. This is very clumsy; we don't catch e.g. quoted
         // commands.
-        wcstring command = pst.child<0>().get_source(buff_src);
+        wcstring command = dst.command.source(buff_src);
         if (command == L"and" || command == L"or") {
             errored =
                 append_syntax_error(parse_errors, source_start, EXEC_ERR_MSG, command.c_str());
         }
     }
 
-    if (maybe_t<wcstring> unexp_command = command_for_plain_statement(pst, buff_src)) {
+    if (maybe_t<wcstring> unexp_command = dst.command.try_source(buff_src)) {
         wcstring command;
         // Check that we can expand the command.
         if (expand_to_command_and_args(*unexp_command, operation_context_t::empty(), &command,
@@ -1107,40 +1128,40 @@ static bool detect_errors_in_plain_statement(const wcstring &buff_src,
 
         // Check that we don't return from outside a function. But we allow it if it's
         // 'return --help'.
-        if (!errored && command == L"return") {
+        if (!errored && command == L"return" && !first_arg_is_help) {
+            // See if we are in a function.
             bool found_function = false;
-            for (const parse_node_t *ancestor = pst.node(); ancestor != nullptr;
-                 ancestor = node_tree.get_parent(*ancestor)) {
-                auto fh = tnode_t<block_statement>::try_create(&node_tree, ancestor)
-                              .child<0>()
-                              .try_get_child<function_header, 0>();
-                if (fh) {
-                    found_function = true;
-                    break;
+            for (const node_t *cursor = &dst; cursor != nullptr; cursor = cursor->parent) {
+                if (const auto *bs = cursor->try_as<block_statement_t>()) {
+                    if (bs->header->type == type_t::function_header) {
+                        found_function = true;
+                        break;
+                    }
                 }
             }
-            if (!found_function && !first_argument_is_help(pst, buff_src)) {
+
+            if (!found_function) {
                 errored = append_syntax_error(parse_errors, source_start, INVALID_RETURN_ERR_MSG);
             }
         }
 
         // Check that we don't break or continue from outside a loop.
-        if (!errored && (command == L"break" || command == L"continue")) {
+        if (!errored && (command == L"break" || command == L"continue") && !first_arg_is_help) {
             // Walk up until we hit a 'for' or 'while' loop. If we hit a function first,
             // stop the search; we can't break an outer loop from inside a function.
             // This is a little funny because we can't tell if it's a 'for' or 'while'
             // loop from the ancestor alone; we need the header. That is, we hit a
             // block_statement, and have to check its header.
             bool found_loop = false;
-            for (const parse_node_t *ancestor = pst.node(); ancestor != nullptr;
-                 ancestor = node_tree.get_parent(*ancestor)) {
-                tnode_t<block_header> bh =
-                    tnode_t<block_statement>::try_create(&node_tree, ancestor).child<0>();
-                if (bh.try_get_child<while_header, 0>() || bh.try_get_child<for_header, 0>()) {
+            for (const node_t *ancestor = &dst; ancestor != nullptr; ancestor = ancestor->parent) {
+                const auto *block = ancestor->try_as<block_statement_t>();
+                if (!block) continue;
+                if (block->header->type == type_t::for_header ||
+                    block->header->type == type_t::while_header) {
                     // This is a loop header, so we can break or continue.
                     found_loop = true;
                     break;
-                } else if (bh.try_get_child<function_header, 0>()) {
+                } else if (block->header->type == type_t::function_header) {
                     // This is a function header, so we cannot break or
                     // continue. We stop our search here.
                     found_loop = false;
@@ -1148,7 +1169,7 @@ static bool detect_errors_in_plain_statement(const wcstring &buff_src,
                 }
             }
 
-            if (!found_loop && !first_argument_is_help(pst, buff_src)) {
+            if (!found_loop) {
                 errored = append_syntax_error(
                     parse_errors, source_start,
                     (command == L"break" ? INVALID_BREAK_ERR_MSG : INVALID_CONTINUE_ERR_MSG));
@@ -1167,12 +1188,22 @@ static bool detect_errors_in_plain_statement(const wcstring &buff_src,
     return errored;
 }
 
+// Given we have a trailing argument_or_redirection_list, like `begin; end > /dev/null`, verify that
+// there are no arguments in the list.
+static bool detect_errors_in_block_redirection_list(
+    const ast::argument_or_redirection_list_t &args_or_redirs, parse_error_list_t *out_errors) {
+    if (const auto *first_arg = get_first_arg(args_or_redirs)) {
+        return append_syntax_error(out_errors, first_arg->source_range().start,
+                                   BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
+    }
+    return false;
+}
+
 parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
                                                   parse_error_list_t *out_errors,
                                                   bool allow_incomplete,
                                                   parsed_source_ref_t *out_pstree) {
     namespace g = grammar;
-    parse_node_tree_t node_tree;
     parse_error_list_t parse_errors;
 
     parser_test_error_bits_t res = 0;
@@ -1192,12 +1223,15 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
     // allow_incomplete is set.
     bool has_unclosed_quote_or_subshell = false;
 
-    // Parse the input string into a parse tree. Some errors are detected here.
-    bool parsed = parse_tree_from_string(
-        buff_src, allow_incomplete ? parse_flag_leave_unterminated : parse_flag_none, &node_tree,
-        &parse_errors);
+    const parse_tree_flags_t parse_flags =
+        allow_incomplete ? parse_flag_leave_unterminated : parse_flag_none;
 
+    // Parse the input string into an ast. Some errors are detected here.
+    using namespace ast;
+    auto ast = ast_t::parse(buff_src, parse_flags, &parse_errors);
     if (allow_incomplete) {
+        // Issue #1238: If the only error was unterminated quote, then consider this to have parsed
+        // successfully.
         size_t idx = parse_errors.size();
         while (idx--) {
             if (parse_errors.at(idx).code == parse_error_tokenizer_unterminated_quote ||
@@ -1209,19 +1243,14 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
         }
     }
 
-    // Issue #1238: If the only error was unterminated quote, then consider this to have parsed
-    // successfully. A better fix would be to have parse_tree_from_string return this information
-    // directly (but it would be a shame to munge up its nice bool return).
-    if (parse_errors.empty() && has_unclosed_quote_or_subshell) {
-        parsed = true;
-    }
-
-    if (!parsed) {
-        errored = true;
-    }
-
     // has_unclosed_quote_or_subshell may only be set if allow_incomplete is true.
     assert(!has_unclosed_quote_or_subshell || allow_incomplete);
+    if (has_unclosed_quote_or_subshell) {
+        // We do not bother to validate the rest of the tree in this case.
+        return PARSER_TEST_INCOMPLETE;
+    }
+
+    errored = !parse_errors.empty();
 
     // Expand all commands.
     // Verify 'or' and 'and' not used inside pipelines.
@@ -1230,21 +1259,17 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
     // Verify no variable expansions.
 
     if (!errored) {
-        for (const parse_node_t &node : node_tree) {
-            if (node.type == symbol_end_command && !node.has_source()) {
-                // An 'end' without source is an unclosed block.
-                has_unclosed_block = true;
-            } else if (node.type == symbol_statement && !node.has_source()) {
-                // Check for a statement without source in a pipeline, i.e. unterminated pipeline.
-                auto pipe_pos = get_pipeline_position({&node_tree, &node});
-                if (pipe_pos != pipeline_position_t::none) {
+        for (const node_t &node : ast) {
+            if (const job_continuation_t *jc = node.try_as<job_continuation_t>()) {
+                // Somewhat clumsy way of checking for a statement without source in a pipeline.
+                // See if our pipe has source but our statement does not.
+                if (!jc->pipe.unsourced && !jc->statement.try_source_range().has_value()) {
                     has_unclosed_pipe = true;
                 }
-            } else if (node.type == symbol_argument) {
-                tnode_t<g::argument> arg{&node_tree, &node};
-                const wcstring arg_src = node.get_source(buff_src);
-                res |= parse_util_detect_errors_in_argument(arg, arg_src, &parse_errors);
-            } else if (node.type == symbol_job) {
+            } else if (const argument_t *arg = node.try_as<argument_t>()) {
+                wcstring arg_src = arg->source(buff_src);
+                res |= parse_util_detect_errors_in_argument(*arg, arg_src, &parse_errors);
+            } else if (const ast::job_t *job = node.try_as<ast::job_t>()) {
                 // Disallow background in the following cases:
                 //
                 // foo & ; and bar
@@ -1252,25 +1277,27 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
                 // if foo & ; end
                 // while foo & ; end
                 // If it's not a background job, nothing to do.
-                auto job = tnode_t<g::job>{&node_tree, &node};
-                if (job_node_is_background(job)) {
-                    errored |= detect_errors_in_backgrounded_job(job, &parse_errors);
+                if (job->bg) {
+                    errored |= detect_errors_in_backgrounded_job(*job, &parse_errors);
                 }
-            } else if (node.type == symbol_arguments_or_redirections_list) {
-                // verify no arguments to the end command of if, switch, begin (#986).
-                auto list = tnode_t<g::arguments_or_redirections_list>{&node_tree, &node};
-                if (list.try_get_parent<g::if_statement>() ||
-                    list.try_get_parent<g::switch_statement>() ||
-                    list.try_get_parent<g::block_statement>()) {
-                    if (auto arg = list.next_in_list<g::argument>()) {
-                        errored = append_syntax_error(&parse_errors, arg.source_range()->start,
-                                                      END_ARG_ERR_MSG);
-                    }
-                }
-            } else if (node.type == symbol_plain_statement) {
-                tnode_t<grammar::plain_statement> pst{&node_tree, &node};
+            } else if (const ast::decorated_statement_t *stmt =
+                           node.try_as<decorated_statement_t>()) {
+                errored |= detect_errors_in_decorated_statement(buff_src, *stmt, &parse_errors);
+            } else if (const auto *block = node.try_as<block_statement_t>()) {
+                // If our 'end' had no source, we are unsourced.
+                if (block->end.unsourced) has_unclosed_block = true;
                 errored |=
-                    detect_errors_in_plain_statement(buff_src, node_tree, pst, &parse_errors);
+                    detect_errors_in_block_redirection_list(block->args_or_redirs, &parse_errors);
+            } else if (const auto *ifs = node.try_as<if_statement_t>()) {
+                // If our 'end' had no source, we are unsourced.
+                if (ifs->end.unsourced) has_unclosed_block = true;
+                errored |=
+                    detect_errors_in_block_redirection_list(ifs->args_or_redirs, &parse_errors);
+            } else if (const auto *switchs = node.try_as<switch_statement_t>()) {
+                // If our 'end' had no source, we are unsourced.
+                if (switchs->end.unsourced) has_unclosed_block = true;
+                errored |=
+                    detect_errors_in_block_redirection_list(switchs->args_or_redirs, &parse_errors);
             }
         }
     }
@@ -1285,7 +1312,8 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
     }
 
     if (out_pstree != nullptr) {
-        *out_pstree = std::make_shared<parsed_source_t>(buff_src, std::move(node_tree));
+        // TODO: legacy
+        *out_pstree = parse_source(buff_src, parse_flags, nullptr);
     }
 
     return res;
@@ -1300,25 +1328,21 @@ maybe_t<wcstring> parse_util_detect_errors_in_argument_list(const wcstring &arg_
                                                  false /* don't skip caret */);
     };
 
-    // Parse the string as an argument list.
+    // Parse the string as a freestanding argument list.
+    using namespace ast;
     parse_error_list_t errors;
-    parse_node_tree_t tree;
-    if (!parse_tree_from_string(arg_list_src, parse_flag_none, &tree, &errors,
-                                symbol_freestanding_argument_list)) {
-        // Failed to parse.
+    auto ast = ast_t::parse_argument_list(arg_list_src, parse_flag_none, &errors);
+    if (!errors.empty()) {
         return get_error_text(errors);
     }
 
     // Get the root argument list and extract arguments from it.
     // Test each of these.
-    assert(!tree.empty() && "Should have parsed a tree");
-    tnode_t<grammar::freestanding_argument_list> arg_list(&tree, &tree.at(0));
-    while (auto arg = arg_list.next_in_list<grammar::argument>()) {
-        const wcstring arg_src = arg.get_source(arg_list_src);
+    for (const argument_t &arg : ast.top()->as<freestanding_argument_list_t>()->arguments) {
+        const wcstring arg_src = arg.source(arg_list_src);
         if (parse_util_detect_errors_in_argument(arg, arg_src, &errors)) {
             return get_error_text(errors);
         }
     }
-
     return none();
 }
diff --git a/src/parse_util.h b/src/parse_util.h
index d7857b742..fd348ab9b 100644
--- a/src/parse_util.h
+++ b/src/parse_util.h
@@ -10,6 +10,10 @@
 #include "parse_tree.h"
 #include "tokenizer.h"
 
+namespace ast {
+struct argument_t;
+}
+
 /// Find the beginning and end of the first subshell in the specified string.
 ///
 /// \param in the string to search for subshells
@@ -141,10 +145,9 @@ maybe_t<wcstring> parse_util_detect_errors_in_argument_list(const wcstring &arg_
 /// Test if this argument contains any errors. Detected errors include syntax errors in command
 /// substitutions, improperly escaped characters and improper use of the variable expansion
 /// operator. This does NOT currently detect unterminated quotes.
-class parse_node_t;
+
 parser_test_error_bits_t parse_util_detect_errors_in_argument(
-    tnode_t<grammar::argument> node, const wcstring &arg_src,
-    parse_error_list_t *out_errors = nullptr);
+    const ast::argument_t &arg, const wcstring &arg_src, parse_error_list_t *out_errors = nullptr);
 
 /// Given a string containing a variable expansion error, append an appropriate error to the errors
 /// list. The global_token_pos is the offset of the token in the larger source, and the dollar_pos

From 6b24edccf620f91119997f1451de8fd78924da67 Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Sun, 21 Jun 2020 16:45:26 -0700
Subject: [PATCH 09/13] Adopt the new AST in add_pending_with_file_detection

This switches add_pending_with_file_detection from parsing with parse_tree
to the new ast.
---
 src/history.cpp | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/src/history.cpp b/src/history.cpp
index 0e2aa1385..d069a8278 100644
--- a/src/history.cpp
+++ b/src/history.cpp
@@ -1274,38 +1274,33 @@ void history_t::add_pending_with_file_detection(const wcstring &str,
 
     // Find all arguments that look like they could be file paths.
     bool needs_sync_write = false;
-    parse_node_tree_t tree;
-    parse_tree_from_string(str, parse_flag_none, &tree, nullptr);
+    using namespace ast;
+    auto ast = ast_t::parse(str);
 
     path_list_t potential_paths;
-    for (const parse_node_t &node : tree) {
-        if (!node.has_source()) {
-            continue;
-        }
-
-        if (node.type == symbol_argument) {
-            wcstring potential_path = node.get_source(str);
+    for (const node_t &node : ast) {
+        if (const argument_t *arg = node.try_as<argument_t>()) {
+            wcstring potential_path = arg->source(str);
             bool unescaped = unescape_string_in_place(&potential_path, UNESCAPE_DEFAULT);
             if (unescaped && string_could_be_path(potential_path)) {
                 potential_paths.push_back(potential_path);
             }
-        } else if (node.type == symbol_plain_statement) {
+        } else if (const decorated_statement_t *stmt = node.try_as<decorated_statement_t>()) {
             // Hack hack hack - if the command is likely to trigger an exit, then don't do
             // background file detection, because we won't be able to write it to our history file
             // before we exit.
             // Also skip it for 'echo'. This is because echo doesn't take file paths, but also
             // because the history file test wants to find the commands in the history file
             // immediately after running them, so it can't tolerate the asynchronous file detection.
-            if (get_decoration({&tree, &node}) == parse_statement_decoration_exec) {
+            if (stmt->decoration() == parse_statement_decoration_exec) {
                 needs_sync_write = true;
             }
 
-            if (maybe_t<wcstring> command = command_for_plain_statement({&tree, &node}, str)) {
-                unescape_string_in_place(&*command, UNESCAPE_DEFAULT);
-                if (*command == L"exit" || *command == L"reboot" || *command == L"restart" ||
-                    *command == L"echo") {
-                    needs_sync_write = true;
-                }
+            wcstring command = stmt->command.source(str);
+            unescape_string_in_place(&command, UNESCAPE_DEFAULT);
+            if (command == L"exit" || command == L"reboot" || command == L"restart" ||
+                command == L"echo") {
+                needs_sync_write = true;
             }
         }
     }

From 886603b2ca6aa49fd49b42b51eae057cc50aae32 Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Sun, 28 Jun 2020 14:48:16 -0700
Subject: [PATCH 10/13] Adopt the new AST in fish_indent

This switches fish_indent from parsing with parse_tree
to the new ast.

This is the most difficult transition because the new ast retains less
lexical information than the old parse tree. The strategy is:

1. Use parse_util_compute_indents to compute indenting for each token.

2. Compute the "gap text" between the text of significant tokens. This
contains whitespace, comments, etc.

3. "Fix up" the gap text while leaving the significant tokens alone.
---
 src/fish_indent.cpp      | 738 +++++++++++++++++++++++++++------------
 tests/checks/indent.fish |  13 +-
 2 files changed, 519 insertions(+), 232 deletions(-)

diff --git a/src/fish_indent.cpp b/src/fish_indent.cpp
index 17197f84c..491f99e49 100644
--- a/src/fish_indent.cpp
+++ b/src/fish_indent.cpp
@@ -44,6 +44,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
 #include "operation_context.h"
 #include "output.h"
 #include "parse_constants.h"
+#include "parse_util.h"
 #include "print_help.h"
 #include "tnode.h"
 #include "wutil.h"  // IWYU pragma: keep
@@ -52,8 +53,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
 // See discussion at https://github.com/fish-shell/fish-shell/pull/6790
 #define SPACES_PER_INDENT 4
 
-// An indent_t represents an abstract indent depth. 2 means we are in a doubly-nested block, etc.
-using indent_t = unsigned int;
 static bool dump_parse_tree = false;
 static int ret = 0;
 
@@ -84,232 +83,539 @@ static wcstring read_file(FILE *f) {
     return result;
 }
 
-struct prettifier_t {
+namespace {
+/// From C++14.
+template <bool B, typename T = void>
+using enable_if_t = typename std::enable_if<B, T>::type;
+
+/// \return the number of escaping backslashes before a character.
+/// \p idx may be "one past the end."
+size_t count_preceding_backslashes(const wcstring &text, size_t idx) {
+    assert(idx <= text.size() && "Out of bounds");
+    size_t backslashes = 0;
+    while (backslashes < idx && text.at(idx - backslashes - 1) == L'\\') {
+        backslashes++;
+    }
+    return backslashes;
+}
+
+/// \return whether a character at a given index is escaped.
+/// A character is escaped if it has an odd number of backslashes.
+bool char_is_escaped(const wcstring &text, size_t idx) {
+    return count_preceding_backslashes(text, idx) % 2 == 1;
+}
+
+using namespace ast;
+struct pretty_printer_t {
+    // Note: this got somewhat more complicated after introducing the new AST, because that AST no
+    // longer encodes detailed lexical information (e.g. every newline). This feels more complex
+    // than necessary and would probably benefit from a more layered approach where we identify
+    // certain runs, weight line breaks, have a cost model, etc.
+    pretty_printer_t(const wcstring &src, bool do_indent)
+        : source(src),
+          indents(do_indent ? parse_util_compute_indents(source) : std::vector<int>(src.size(), 0)),
+          ast(ast_t::parse(src, parse_flags())),
+          do_indent(do_indent),
+          gaps(compute_gaps()),
+          preferred_semi_locations(compute_preferred_semi_locations()) {
+        assert(indents.size() == source.size() && "indents and source should be same length");
+    }
+
     // Original source.
     const wcstring &source;
 
+    // The indents of our string.
+    // This has the same length as 'source' and describes the indentation level.
+    const std::vector<int> indents;
+
+    // The parsed ast.
+    const ast_t ast;
+
     // The prettifier output.
     wcstring output;
 
+    // The indent of the source range which we are currently emitting.
+    int current_indent{0};
+
     // Whether to indent, or just insert spaces.
     const bool do_indent;
 
-    // Whether we are at the beginning of a new line.
-    bool has_new_line = true;
+    // Whether the next gap text should hide the first newline.
+    bool gap_text_mask_newline{false};
 
-    // Whether the last token was a semicolon.
-    bool last_was_semicolon = false;
+    // The "gaps": a sorted set of ranges between tokens.
+    // These contain whitespace, comments, semicolons, and other lexical elements which are not
+    // present in the ast.
+    const std::vector<source_range_t> gaps;
 
-    // Whether we need to append a continuation new line before continuing.
-    bool needs_continuation_newline = false;
+    // The sorted set of source offsets of nl_semi_t which should be set as semis, not newlines.
+    // This is computed ahead of time for convenience.
+    const std::vector<uint32_t> preferred_semi_locations;
 
-    // Additional indentation due to line continuation (escaped newline)
-    uint32_t line_continuation_indent = 0;
+    // Flags we support.
+    using gap_flags_t = uint32_t;
+    enum {
+        default_flags = 0,
 
-    prettifier_t(const wcstring &source, bool do_indent) : source(source), do_indent(do_indent) {}
+        // Whether to allow line splitting via escaped newlines.
+        // For example, in argument lists:
+        //
+        //   echo a \
+        //   b
+        //
+        // If this is not set, then split-lines will be joined.
+        allow_escaped_newlines = 1 << 0,
 
-    void prettify_node(const parse_node_tree_t &tree, node_offset_t node_idx, indent_t node_indent,
-                       parse_token_type_t parent_type);
+        // Whether to require a space before this token.
+        // This is used when emitting semis:
+        //    echo a; echo b;
+        // No space required between 'a' and ';', or 'b' and ';'.
+        skip_space = 1 << 1,
+    };
 
-    void maybe_prepend_escaped_newline(const parse_node_t &node) {
-        if (node.has_preceding_escaped_newline()) {
-            output.append(L" \\");
-            append_newline(true);
+    // \return gap text flags for the gap text that comes *before* a given node type.
+    static gap_flags_t gap_text_flags_before_node(const node_t &node) {
+        gap_flags_t result = default_flags;
+        switch (node.type) {
+            // Allow escaped newlines in argument and redirection lists.
+            case type_t::argument:
+            case type_t::redirection:
+                result |= allow_escaped_newlines;
+                break;
+
+            case type_t::token_base:
+                // Allow escaped newlines before && and ||, and also pipes.
+                switch (node.as<token_base_t>()->type) {
+                    case parse_token_type_andand:
+                    case parse_token_type_oror:
+                    case parse_token_type_pipe:
+                        result |= allow_escaped_newlines;
+                        break;
+                    default:
+                        break;
+                }
+                break;
+
+            default:
+                break;
+        }
+        return result;
+    }
+
+    // \return whether we are at the start of a new line.
+    bool at_line_start() const { return output.empty() || output.back() == L'\n'; }
+
+    // \return whether we have a space before the output.
+    // This ignores escaped spaces and escaped newlines.
+    bool has_preceding_space() const {
+        long idx = static_cast<long>(output.size()) - 1;
+        // Skip escaped newlines.
+        // This is historical. Example:
+        //
+        // cmd1 \
+        // | cmd2
+        //
+        // we want the pipe to "see" the space after cmd1.
+        // TODO: this is too tricky, we should factor this better.
+        while (idx >= 0 && output.at(idx) == L'\n') {
+            size_t backslashes = count_preceding_backslashes(source, idx);
+            if (backslashes % 2 == 0) {
+                // Not escaped.
+                return false;
+            }
+            idx -= (1 + backslashes);
+        }
+        return idx >= 0 && output.at(idx) == L' ' && !char_is_escaped(output, idx);
+    }
+
+    // Entry point. Prettify our source code and return it.
+    wcstring prettify() {
+        output = wcstring{};
+        node_visitor(*this).accept(ast.top());
+
+        // Trailing gap text.
+        emit_gap_text_before(source_range_t{(uint32_t)source.size(), 0}, default_flags);
+
+        // Replace all trailing newlines with just a single one.
+        while (!output.empty() && at_line_start()) {
+            output.pop_back();
+        }
+        emit_newline();
+
+        wcstring result = std::move(output);
+        return result;
+    }
+
+    // \return a substring of source.
+    wcstring substr(source_range_t r) const { return source.substr(r.start, r.length); }
+
+    // Return the gap ranges from our ast.
+    std::vector<source_range_t> compute_gaps() const {
+        auto range_compare = [](source_range_t r1, source_range_t r2) {
+            if (r1.start != r2.start) return r1.start < r2.start;
+            return r1.length < r2.length;
+        };
+        // Collect the token ranges into a list.
+        std::vector<source_range_t> tok_ranges;
+        for (const node_t &node : ast) {
+            if (node.category == category_t::leaf) {
+                auto r = node.source_range();
+                if (r.length > 0) tok_ranges.push_back(r);
+            }
+        }
+        // Place a zero length range at end to aid in our inverting.
+        tok_ranges.push_back(source_range_t{(uint32_t)source.size(), 0});
+
+        // Our tokens should be sorted.
+        assert(std::is_sorted(tok_ranges.begin(), tok_ranges.end(), range_compare));
+
+        // For each range, add a gap range between the previous range and this range.
+        std::vector<source_range_t> gaps;
+        uint32_t prev_end = 0;
+        for (source_range_t tok_range : tok_ranges) {
+            assert(tok_range.start >= prev_end &&
+                   "Token range should not overlap or be out of order");
+            if (tok_range.start >= prev_end) {
+                gaps.push_back(source_range_t{prev_end, tok_range.start - prev_end});
+            }
+            prev_end = tok_range.start + tok_range.length;
+        }
+        return gaps;
+    }
+
+    // Return sorted list of semi-preferring semi_nl nodes.
+    std::vector<uint32_t> compute_preferred_semi_locations() const {
+        std::vector<uint32_t> result;
+        auto mark_as_semi = [&result](const optional_t<semi_nl_t> &n) {
+            if (n && n->has_source()) result.push_back(n->range.start);
+        };
+
+        // andor_job_lists get semis if they are short enough.
+        for (const auto &node : ast) {
+            // See if we have a condition and an andor_job_list.
+            const optional_t<semi_nl_t> *condition = nullptr;
+            const andor_job_list_t *andors = nullptr;
+            if (const auto *ifc = node.try_as<if_clause_t>()) {
+                condition = &ifc->condition.semi_nl;
+                andors = &ifc->andor_tail;
+            } else if (const auto *wc = node.try_as<while_header_t>()) {
+                condition = &wc->condition.semi_nl;
+                andors = &wc->andor_tail;
+            }
+
+            // This describes the heuristic of when to place and_or job lists on separate lines.
+            // That is, do we want:
+            //    if true; and false
+            //  or do we want:
+            //    if true
+            //        and false
+            // Lists with two or fewer get semis.
+            // Note the effective count is then three, because this list does not include the main
+            // condition.
+            if (andors && andors->count() > 0 && andors->count() <= 2) {
+                if (condition) mark_as_semi(*condition);
+                // Mark all but last of the andor list.
+                for (uint32_t i = 0; i + 1 < andors->count(); i++) {
+                    mark_as_semi(andors->at(i)->job.semi_nl);
+                }
+            }
+        }
+
+        // `x ; and y` gets semis if it has them already, and they are on the same line.
+        for (const auto &node : ast) {
+            if (const auto *job_list = node.try_as<job_list_t>()) {
+                const semi_nl_t *prev_job_semi_nl = nullptr;
+                for (const job_conjunction_t &job : *job_list) {
+                    // Set up prev_job_semi_nl for the next iteration to make control flow easier.
+                    const semi_nl_t *prev = prev_job_semi_nl;
+                    prev_job_semi_nl = job.semi_nl.contents.get();
+
+                    // Is this an 'and' or 'or' job?
+                    if (!job.decorator) continue;
+
+                    // Now see if we want to mark 'prev' as allowing a semi.
+                    // Did we have a previous semi_nl which was a newline?
+                    if (!prev || substr(prev->range) != L";") continue;
+
+                    // Is there a newline between them?
+                    assert(prev->range.start <= job.decorator->range.start &&
+                           "Ranges out of order");
+                    auto start = source.begin() + prev->range.start;
+                    auto end = source.begin() + job.decorator->range.end();
+                    if (std::find(start, end, L'\n') == end) {
+                        // We're going to allow the previous semi_nl to be a semi.
+                        result.push_back(prev->range.start);
+                    }
+                }
+            }
+        }
+        std::sort(result.begin(), result.end());
+        return result;
+    }
+
+    // Emit a space or indent as necessary, depending on the previous output.
+    void emit_space_or_indent(gap_flags_t flags = default_flags) {
+        if (at_line_start()) {
+            output.append(SPACES_PER_INDENT * current_indent, L' ');
+        } else if (!(flags & skip_space) && !has_preceding_space()) {
+            output.append(1, L' ');
         }
     }
 
-    void append_newline(bool is_continuation = false) {
-        output.push_back('\n');
-        has_new_line = true;
-        needs_continuation_newline = false;
-        line_continuation_indent = is_continuation ? 1 : 0;
+    // Emit "gap text:" newlines and comments from the original source.
+    // Gap text may be a few things:
+    //
+    // 1. Just a space is common. We will trim the spaces to be empty.
+    //
+    // Here the gap text is the comment, followed by the newline:
+    //
+    //    echo abc # arg
+    //    echo def
+    //
+    // 2. It may also be an escaped newline:
+    // Here the gap text is a space, backslash, newline, space.
+    //
+    //     echo \
+    //       hi
+    //
+    // 3. Lastly it may be an error, if there was an error token. Here the gap text is the pipe:
+    //
+    //   begin | stuff
+    //
+    //  We do not handle errors here - instead our caller does.
+    void emit_gap_text(const wcstring &gap_text, gap_flags_t flags) {
+        // Common case: if we are only spaces, do nothing.
+        if (gap_text.find_first_not_of(L' ') == wcstring::npos) return;
+
+        // Look to see if there is an escaped newline.
+        // Emit it if either we allow it, or it comes before the first comment.
+        // Note we do not have to be concerned with escaped backslashes or escaped #s. This is gap
+        // text - we already know it has no semantic significance.
+        size_t escaped_nl = gap_text.find(L"\\\n");
+        if (escaped_nl != wcstring::npos) {
+            size_t comment_idx = gap_text.find(L'#');
+            if ((flags & allow_escaped_newlines) ||
+                (comment_idx != wcstring::npos && escaped_nl < comment_idx)) {
+                // Emit a space before the escaped newline.
+                if (!at_line_start() && !has_preceding_space()) {
+                    output.append(L" ");
+                }
+                output.append(L"\\\n");
+            }
+        }
+
+        // It seems somewhat ambiguous whether we always get a newline after a comment. Ensure we
+        // always emit one.
+        bool needs_nl = false;
+
+        tokenizer_t tokenizer(gap_text.c_str(), TOK_SHOW_COMMENTS | TOK_SHOW_BLANK_LINES);
+        while (maybe_t<tok_t> tok = tokenizer.next()) {
+            wcstring tok_text = tokenizer.text_of(*tok);
+
+            if (needs_nl) {
+                emit_newline();
+                needs_nl = false;
+                if (tok_text == L"\n") continue;
+            } else if (gap_text_mask_newline) {
+                // We only respect mask_newline the first time through the loop.
+                gap_text_mask_newline = false;
+                if (tok_text == L"\n") continue;
+            }
+
+            if (tok->type == token_type_t::comment) {
+                emit_space_or_indent();
+                output.append(tok_text);
+                needs_nl = true;
+            } else if (tok->type == token_type_t::end) {
+                // This may be either a newline or semicolon.
+                // Semicolons found here are not part of the ast and can simply be removed.
+                // Newlines are preserved unless mask_newline is set.
+                if (tok_text == L"\n") {
+                    emit_newline();
+                }
+            } else {
+                fprintf(stderr,
+                        "Gap text should only have comments and newlines - instead found token "
+                        "type %d with text: %ls\n",
+                        (int)tok->type, tok_text.c_str());
+                DIE("Gap text should only have comments and newlines");
+            }
+        }
+        if (needs_nl) emit_newline();
     }
 
-    // Append whitespace as necessary. If we have a newline, append the appropriate indent.
-    // Otherwise, append a space.
-    void append_whitespace(indent_t node_indent) {
-        if (needs_continuation_newline) {
-            append_newline(true);
+    /// \return the gap text ending at a given index into the string, or empty if none.
+    source_range_t gap_text_to(uint32_t end) const {
+        auto where = std::lower_bound(
+            gaps.begin(), gaps.end(), end,
+            [](source_range_t r, uint32_t end) { return r.start + r.length < end; });
+        if (where == gaps.end() || where->start + where->length != end) {
+            // Not found.
+            return source_range_t{0, 0};
+        } else {
+            return *where;
         }
-        if (!has_new_line) {
-            output.push_back(L' ');
-        } else if (do_indent) {
-            output.append((node_indent + line_continuation_indent) * SPACES_PER_INDENT, L' ');
+    }
+
+    /// \return whether a range \p r overlaps an error range from our ast.
+    bool range_contained_error(source_range_t r) const {
+        const auto &errs = ast.extras().errors;
+        auto range_is_before = [](source_range_t x, source_range_t y) {
+            return x.start + x.length <= y.start;
+        };
+        assert(std::is_sorted(errs.begin(), errs.end(), range_is_before) &&
+               "Error ranges should be sorted");
+        return std::binary_search(errs.begin(), errs.end(), r, range_is_before);
+    }
+
+    // Emit the gap text before a source range.
+    void emit_gap_text_before(source_range_t r, gap_flags_t flags) {
+        assert(r.start <= source.size() && "source out of bounds");
+        uint32_t start = r.start;
+        if (start < indents.size()) current_indent = indents.at(start);
+
+        // Find the gap text which ends at start.
+        source_range_t range = gap_text_to(start);
+        if (range.length > 0) {
+            // If this range contained an error, append the gap text without modification.
+            // For example in: echo foo "
+            // We don't want to mess with the quote.
+            if (range_contained_error(range)) {
+                output.append(substr(range));
+            } else {
+                emit_gap_text(substr(range), flags);
+            }
         }
+        // Always clear gap_text_mask_newline after emitting even empty gap text.
+        gap_text_mask_newline = false;
+    }
+
+    /// Given a string \p input, remove unnecessary quotes, etc.
+    wcstring clean_text(const wcstring &input) {
+        // Unescape the string - this leaves special markers around if there are any
+        // expansions or anything. We specifically tell it to not compute backslash-escapes
+        // like \U or \x, because we want to leave them intact.
+        wcstring unescaped = input;
+        unescape_string_in_place(&unescaped, UNESCAPE_SPECIAL | UNESCAPE_NO_BACKSLASHES);
+
+        // Remove INTERNAL_SEPARATOR because that's a quote.
+        auto quote = [](wchar_t ch) { return ch == INTERNAL_SEPARATOR; };
+        unescaped.erase(std::remove_if(unescaped.begin(), unescaped.end(), quote), unescaped.end());
+
+        // If no non-"good" char is left, use the unescaped version.
+        // This can be extended to other characters, but giving the precise list is tough,
+        // can change over time (see "^", "%" and "?", in some cases "{}") and it just makes
+        // people feel more at ease.
+        auto goodchars = [](wchar_t ch) {
+            return fish_iswalnum(ch) || ch == L'_' || ch == L'-' || ch == L'/';
+        };
+        if (std::find_if_not(unescaped.begin(), unescaped.end(), goodchars) == unescaped.end() &&
+            !unescaped.empty()) {
+            return unescaped;
+        } else {
+            return input;
+        }
+    }
+
+    // Emit a range of original text. This indents as needed, and also inserts preceding gap text.
+    // If \p tolerate_line_splitting is set, then permit escaped newlines; otherwise collapse such
+    // lines.
+    void emit_text(source_range_t r, gap_flags_t flags) {
+        emit_gap_text_before(r, flags);
+        current_indent = indents.at(r.start);
+        if (r.length > 0) {
+            emit_space_or_indent(flags);
+            output.append(clean_text(substr(r)));
+        }
+    }
+
+    template <type_t Type>
+    void emit_node_text(const leaf_t<Type> &node) {
+        emit_text(node.range, gap_text_flags_before_node(node));
+    }
+
+    // Emit one newline.
+    void emit_newline() { output.push_back(L'\n'); }
+
+    // Emit a semicolon.
+    void emit_semi() { output.push_back(L';'); }
+
+    // For branch and list nodes, default is to visit their children.
+    template <typename Node>
+    enable_if_t<Node::Category == category_t::branch> visit(const Node &node) {
+        node_visitor(*this).accept_children_of(node);
+    }
+
+    template <typename Node>
+    enable_if_t<Node::Category == ast::category_t::list> visit(const Node &node) {
+        node_visitor(*this).accept_children_of(node);
+    }
+
+    // Leaf nodes we just visit their text.
+    void visit(const keyword_base_t &node) { emit_node_text(node); }
+    void visit(const token_base_t &node) { emit_node_text(node); }
+    void visit(const argument_t &node) { emit_node_text(node); }
+    void visit(const variable_assignment_t &node) { emit_node_text(node); }
+
+    void visit(const semi_nl_t &node) {
+        // These are semicolons or newlines which are part of the ast. That means it includes e.g.
+        // ones terminating a job or 'if' header, but not random semis in job lists. We respect
+        // preferred_semi_locations to decide whether or not these should stay as newlines or
+        // become semicolons.
+
+        // Check if we should prefer a semicolon.
+        bool prefer_semi = node.range.length > 0 &&
+                           std::binary_search(preferred_semi_locations.begin(),
+                                              preferred_semi_locations.end(), node.range.start);
+        emit_gap_text_before(node.range, gap_text_flags_before_node(node));
+
+        // Don't emit anything if the gap text put us on a newline (because it had a comment).
+        if (!at_line_start()) {
+            prefer_semi ? emit_semi() : emit_newline();
+
+            // If it was a semi but we emitted a newline, swallow a subsequent newline.
+            if (!prefer_semi && substr(node.range) == L";") {
+                gap_text_mask_newline = true;
+            }
+        }
+    }
+
+    void visit(const redirection_t &node) {
+        // No space between a redirection operator and its target (#2899).
+        emit_text(node.oper.range, default_flags);
+        emit_text(node.target.range, skip_space);
+    }
+
+    void visit(const maybe_newlines_t &node) {
+        // Our newlines may have comments embedded in them, example:
+        //    cmd |
+        //    # something
+        //    cmd2
+        // Treat it as gap text.
+        if (node.range.length > 0) {
+            auto flags = gap_text_flags_before_node(node);
+            current_indent = indents.at(node.range.start);
+            emit_gap_text_before(node.range, flags);
+            wcstring text = source.substr(node.range.start, node.range.length);
+            emit_gap_text(text, flags);
+        }
+    }
+
+    void visit(const begin_header_t &node) {
+        // 'begin' does not require a newline after it, but we insert one.
+        node_visitor(*this).accept_children_of(node);
+        if (!at_line_start()) {
+            emit_newline();
+        }
+    }
+
+    // The flags we use to parse.
+    static parse_tree_flags_t parse_flags() {
+        return parse_flag_continue_after_error | parse_flag_include_comments |
+               parse_flag_leave_unterminated | parse_flag_show_blank_lines;
     }
 };
-
-// Dump a parse tree node in a form helpful to someone debugging the behavior of this program.
-static void dump_node(indent_t node_indent, const parse_node_t &node, const wcstring &source) {
-    wchar_t nextc = L' ';
-    wchar_t prevc = L' ';
-    wcstring source_txt;
-    if (node.source_start != SOURCE_OFFSET_INVALID && node.source_length != SOURCE_OFFSET_INVALID) {
-        int nextc_idx = node.source_start + node.source_length;
-        if (static_cast<size_t>(nextc_idx) < source.size()) {
-            nextc = source[node.source_start + node.source_length];
-        }
-        if (node.source_start > 0) prevc = source[node.source_start - 1];
-        source_txt = source.substr(node.source_start, node.source_length);
-    }
-    wchar_t prevc_str[4] = {prevc, 0, 0, 0};
-    wchar_t nextc_str[4] = {nextc, 0, 0, 0};
-    if (prevc < L' ') {
-        prevc_str[0] = L'\\';
-        prevc_str[1] = L'c';
-        prevc_str[2] = prevc + '@';
-    }
-    if (nextc < L' ') {
-        nextc_str[0] = L'\\';
-        nextc_str[1] = L'c';
-        nextc_str[2] = nextc + '@';
-    }
-    std::fwprintf(stderr, L"{off %4u, len %4u, indent %2u, kw %ls, %ls} [%ls|%ls|%ls]\n",
-                  node.source_start, node.source_length, node_indent,
-                  keyword_description(node.keyword), token_type_description(node.type), prevc_str,
-                  source_txt.c_str(), nextc_str);
-}
-
-void prettifier_t::prettify_node(const parse_node_tree_t &tree, node_offset_t node_idx,
-                                 indent_t node_indent, parse_token_type_t parent_type) {
-    // Use an explicit stack to avoid stack overflow.
-    struct pending_node_t {
-        node_offset_t index;
-        indent_t indent;
-        parse_token_type_t parent_type;
-    };
-    std::stack<pending_node_t> pending_node_stack;
-
-    pending_node_stack.push({node_idx, node_indent, parent_type});
-    while (!pending_node_stack.empty()) {
-        pending_node_t args = pending_node_stack.top();
-        pending_node_stack.pop();
-        auto node_idx = args.index;
-        auto node_indent = args.indent;
-        auto parent_type = args.parent_type;
-
-        const parse_node_t &node = tree.at(node_idx);
-        const parse_token_type_t node_type = node.type;
-        const parse_token_type_t prev_node_type =
-            node_idx > 0 ? tree.at(node_idx - 1).type : token_type_invalid;
-
-        // Increment the indent if we are either a root job_list, or root case_item_list, or in an
-        // if or while header (#1665).
-        const bool is_root_job_list =
-            node_type == symbol_job_list && parent_type != symbol_job_list;
-        const bool is_root_case_list =
-            node_type == symbol_case_item_list && parent_type != symbol_case_item_list;
-        const bool is_if_while_header =
-            (node_type == symbol_job_conjunction || node_type == symbol_andor_job_list) &&
-            (parent_type == symbol_if_clause || parent_type == symbol_while_header);
-
-        if (is_root_job_list || is_root_case_list || is_if_while_header) {
-            node_indent += 1;
-        }
-
-        if (dump_parse_tree) dump_node(node_indent, node, source);
-
-        // Prepend any escaped newline, but only for certain cases.
-        // We allow it to split arguments (including at the end - this is like trailing commas in
-        // lists, makes for better diffs), to separate pipelines (but it has to be *before* the
-        // pipe, so the pipe symbol is the first thing on the new line after the indent) and to
-        // separate &&/|| job lists (`and` and `or` are handled separately below, as they *allow*
-        // semicolons)
-        // TODO: Handle
-        //     foo | \
-        //         bar
-        // so it just removes the escape - pipes don't need it. This was changed in some fish
-        // version, figure out which it was and if it is worth supporting.
-        if (prev_node_type == symbol_arguments_or_redirections_list ||
-            prev_node_type == symbol_argument_list || node_type == parse_token_type_andand ||
-            node_type == parse_token_type_pipe || node_type == parse_token_type_end) {
-            maybe_prepend_escaped_newline(node);
-        }
-
-        // handle comments, which come before the text
-        if (node.has_comments()) {
-            auto comment_nodes = tree.comment_nodes_for_node(node);
-            for (const auto &comment : comment_nodes) {
-                maybe_prepend_escaped_newline(*comment.node());
-                append_whitespace(node_indent);
-                auto source_range = comment.source_range();
-                output.append(source, source_range->start, source_range->length);
-                needs_continuation_newline = true;
-            }
-        }
-
-        if (node_type == parse_token_type_end) {
-            // For historical reasons, semicolon also get "TOK_END".
-            // We need to distinguish between them, because otherwise `a;;;;` gets extra lines
-            // instead of the semicolons. Semicolons are just ignored, unless they are followed by a
-            // command. So `echo;` removes the semicolon, but `echo; echo` removes it and adds a
-            // newline.
-            last_was_semicolon = false;
-            if (node.get_source(source) == L"\n") {
-                append_newline();
-            } else if (!has_new_line) {
-                // The semicolon is only useful if we haven't just had a newline.
-                last_was_semicolon = true;
-            }
-        } else if ((node_type >= FIRST_PARSE_TOKEN_TYPE && node_type <= LAST_PARSE_TOKEN_TYPE) ||
-                   node_type == parse_special_type_parse_error) {
-            if (last_was_semicolon) {
-                // We keep the semicolon for `; and` and `; or`,
-                // others we turn into newlines.
-                if (node.keyword != parse_keyword_t::kw_and &&
-                    node.keyword != parse_keyword_t::kw_or) {
-                    append_newline();
-                } else {
-                    output.push_back(L';');
-                }
-                last_was_semicolon = false;
-            }
-
-            if (node.has_source()) {
-                // Some type representing a particular token.
-                if (prev_node_type != parse_token_type_redirection) {
-                    append_whitespace(node_indent);
-                }
-                wcstring unescaped{source, node.source_start, node.source_length};
-                // Unescape the string - this leaves special markers around if there are any
-                // expansions or anything. We specifically tell it to not compute backslash-escapes
-                // like \U or \x, because we want to leave them intact.
-                unescape_string_in_place(&unescaped, UNESCAPE_SPECIAL | UNESCAPE_NO_BACKSLASHES);
-
-                // Remove INTERNAL_SEPARATOR because that's a quote.
-                auto quote = [](wchar_t ch) { return ch == INTERNAL_SEPARATOR; };
-                unescaped.erase(std::remove_if(unescaped.begin(), unescaped.end(), quote),
-                                unescaped.end());
-
-                // If no non-"good" char is left, use the unescaped version.
-                // This can be extended to other characters, but giving the precise list is tough,
-                // can change over time (see "^", "%" and "?", in some cases "{}") and it just makes
-                // people feel more at ease.
-                auto goodchars = [](wchar_t ch) {
-                    return fish_iswalnum(ch) || ch == L'_' || ch == L'-' || ch == L'/';
-                };
-                if (std::find_if_not(unescaped.begin(), unescaped.end(), goodchars) ==
-                        unescaped.end() &&
-                    !unescaped.empty()) {
-                    output.append(unescaped);
-                } else {
-                    output.append(source, node.source_start, node.source_length);
-                }
-                has_new_line = false;
-            }
-        }
-
-        // Put all children in stack in reversed order
-        // This way they will be processed in correct order.
-        for (node_offset_t idx = node.child_count; idx > 0; idx--) {
-            // Note: We pass our type to our child, which becomes its parent node type.
-            // Note: While node.child_start could be -1 (NODE_OFFSET_INVALID) the addition is safe
-            // because we won't execute this call in that case since node.child_count should be
-            // zero.
-            pending_node_stack.push({node.child_start + (idx - 1), node_indent, node_type});
-        }
-    }
-}
+}  // namespace
 
 static const char *highlight_role_to_string(highlight_role_t role) {
 #define TEST_ROLE(x)          \
@@ -395,17 +701,7 @@ static std::string make_pygments_csv(const wcstring &src) {
 
 // Entry point for prettification.
 static wcstring prettify(const wcstring &src, bool do_indent) {
-    parse_node_tree_t parse_tree;
-    int parse_flags = (parse_flag_continue_after_error | parse_flag_include_comments |
-                       parse_flag_leave_unterminated | parse_flag_show_blank_lines);
-    if (!parse_tree_from_string(src, parse_flags, &parse_tree, nullptr)) {
-        return src;  // we return the original string on failure
-    }
-
     if (dump_parse_tree) {
-        const wcstring dump = parse_dump_tree(parse_tree, src);
-        std::fwprintf(stderr, L"%ls\n", dump.c_str());
-
         auto ast =
             ast::ast_t::parse(src, parse_flag_leave_unterminated | parse_flag_include_comments |
                                        parse_flag_show_extra_semis);
@@ -413,17 +709,9 @@ static wcstring prettify(const wcstring &src, bool do_indent) {
         std::fwprintf(stderr, L"%ls\n", ast_dump.c_str());
     }
 
-    // We may have a forest of disconnected trees on a parse failure. We have to handle all nodes
-    // that have no parent, and all parse errors.
-    prettifier_t prettifier{src, do_indent};
-    for (node_offset_t i = 0; i < parse_tree.size(); i++) {
-        const parse_node_t &node = parse_tree.at(i);
-        if (node.parent == NODE_OFFSET_INVALID || node.type == parse_special_type_parse_error) {
-            // A root node.
-            prettifier.prettify_node(parse_tree, i, 0, symbol_job_list);
-        }
-    }
-    return std::move(prettifier.output);
+    pretty_printer_t printer{src, do_indent};
+    wcstring output = printer.prettify();
+    return output;
 }
 
 /// Given a string and list of colors of the same size, return the string with HTML span elements
diff --git a/tests/checks/indent.fish b/tests/checks/indent.fish
index e3a41f93f..f6d1ff98d 100644
--- a/tests/checks/indent.fish
+++ b/tests/checks/indent.fish
@@ -49,7 +49,7 @@ end' | $fish_indent
 #CHECK: c
 #CHECK: echo thing
 #CHECK: end
-    
+
 echo 'echo foo |
 echo banana' | $fish_indent
 #CHECK: echo foo |
@@ -57,12 +57,11 @@ echo banana' | $fish_indent
 
 echo 'echo foo \\
 ;' | $fish_indent
-#CHECK: echo foo \
-#CHECK: 
+#CHECK: echo foo
 
 echo 'echo foo \\
 ' | $fish_indent
-#CHECK: echo foo \
+#CHECK: echo foo
 
 echo -n '
 begin
@@ -201,9 +200,9 @@ end; echo alpha "
 #CHECK: begin
 #CHECK: {{    }}echo hi
 #CHECK: else
+#CHECK:
 #CHECK: {{^}}echo bye
-#CHECK: end
-#CHECK: echo alpha "
+#CHECK: end; echo alpha "
 
 # issue 1665
 echo -n '
@@ -285,7 +284,7 @@ echo bye
 #CHECK: 
 #CHECK: echo hi |
 #CHECK: 
-#CHECK: echo bye
+#CHECK: {{    }}echo bye
 
 echo 'a;;;;;;' | $fish_indent
 #CHECK: a

From 6c6088f45c65a70e1639eba1b564f6050b866343 Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Thu, 2 Jul 2020 00:12:28 -0700
Subject: [PATCH 11/13] Adopt the new AST in fish_tests

This switches fish_tests from parse_tree to the new AST.
---
 src/fish_tests.cpp | 99 +++++++++++++++++++++++++---------------------
 1 file changed, 54 insertions(+), 45 deletions(-)

diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp
index 08e14bfce..e61046d5a 100644
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@@ -4350,12 +4350,12 @@ static void test_new_parser_correctness() {
         {L"true || false; and true", true},
         {L"true || ||", false},
         {L"|| true", false},
-        {L"true || \n\n false", true},
+        {L"true || \n\n false", false},
     };
 
     for (const auto &test : parser_tests) {
-        parse_node_tree_t parse_tree;
-        bool success = parse_tree_from_string(test.src, parse_flag_none, &parse_tree, NULL);
+        auto ast = ast::ast_t::parse(test.src);
+        bool success = !ast.errored();
         if (success && !test.ok) {
             err(L"\"%ls\" should NOT have parsed, but did", test.src);
         } else if (!success && test.ok) {
@@ -4384,7 +4384,7 @@ static inline bool string_for_permutation(const wcstring *fuzzes, size_t fuzz_co
 }
 
 static void test_new_parser_fuzzing() {
-    say(L"Fuzzing parser (node size: %lu)", sizeof(parse_node_t));
+    say(L"Fuzzing parser");
     const wcstring fuzzes[] = {
         L"if",      L"else", L"for", L"in",  L"while", L"begin", L"function",
         L"switch",  L"case", L"end", L"and", L"or",    L"not",   L"command",
@@ -4395,7 +4395,6 @@ static void test_new_parser_fuzzing() {
     wcstring src;
     src.reserve(128);
 
-    parse_node_tree_t node_tree;
     parse_error_list_t errors;
 
     double start = timef();
@@ -4409,7 +4408,7 @@ static void test_new_parser_fuzzing() {
         unsigned long permutation = 0;
         while (string_for_permutation(fuzzes, sizeof fuzzes / sizeof *fuzzes, len, permutation++,
                                       &src)) {
-            parse_tree_from_string(src, parse_flag_continue_after_error, &node_tree, &errors);
+            ast::ast_t::parse(src);
         }
         if (log_it) std::fwprintf(stderr, L"done (%lu)\n", permutation);
     }
@@ -4421,33 +4420,36 @@ static void test_new_parser_fuzzing() {
 // true if successful.
 static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *out_joined_args,
                              enum parse_statement_decoration_t *out_deco) {
+    using namespace ast;
     out_cmd->clear();
     out_joined_args->clear();
     *out_deco = parse_statement_decoration_none;
 
-    parse_node_tree_t tree;
-    if (!parse_tree_from_string(src, parse_flag_none, &tree, NULL)) {
-        return false;
-    }
+    auto ast = ast_t::parse(src);
+    if (ast.errored()) return false;
 
     // Get the statement. Should only have one.
-    tnode_t<grammar::job_list> job_list{&tree, &tree.at(0)};
-    auto stmts = job_list.descendants<grammar::plain_statement>();
-    if (stmts.size() != 1) {
-        say(L"Unexpected number of statements (%lu) found in '%ls'", stmts.size(), src.c_str());
-        return false;
+    const decorated_statement_t *statement = nullptr;
+    for (const auto &n : ast) {
+        if (const auto *tmp = n.try_as<decorated_statement_t>()) {
+            if (statement) {
+                say(L"More than one decorated statement found in '%ls'", src.c_str());
+                return false;
+            }
+            statement = tmp;
+        }
     }
-    tnode_t<grammar::plain_statement> stmt = stmts.at(0);
 
     // Return its decoration and command.
-    *out_deco = get_decoration(stmt);
-    *out_cmd = *command_for_plain_statement(stmt, src);
+    *out_deco = statement->decoration();
+    *out_cmd = statement->command.source(src);
 
     // Return arguments separated by spaces.
     bool first = true;
-    for (auto arg_node : stmt.descendants<grammar::argument>()) {
+    for (const ast::argument_or_redirection_t &arg : statement->args_or_redirs) {
+        if (!arg.is_argument()) continue;
         if (!first) out_joined_args->push_back(L' ');
-        out_joined_args->append(arg_node.get_source(src));
+        out_joined_args->append(arg.source(src));
         first = false;
     }
 
@@ -4456,19 +4458,22 @@ static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *o
 
 // Verify that 'function -h' and 'function --help' are plain statements but 'function --foo' is
 // not (issue #1240).
-template <typename Type>
+template <ast::type_t Type>
 static void check_function_help(const wchar_t *src) {
-    parse_node_tree_t tree;
-    if (!parse_tree_from_string(src, parse_flag_none, &tree, NULL)) {
+    using namespace ast;
+    auto ast = ast_t::parse(src);
+    if (ast.errored()) {
         err(L"Failed to parse '%ls'", src);
     }
 
-    tnode_t<grammar::job_list> node{&tree, &tree.at(0)};
-    auto node_list = node.descendants<Type>();
-    if (node_list.size() == 0) {
-        err(L"Failed to find node of type '%ls'", token_type_description(Type::token));
-    } else if (node_list.size() > 1) {
-        err(L"Found too many nodes of type '%ls'", token_type_description(Type::token));
+    int count = 0;
+    for (const node_t &node : ast) {
+        count += (node.type == Type);
+    }
+    if (count == 0) {
+        err(L"Failed to find node of type '%ls'", ast_type_to_string(Type));
+    } else if (count > 1) {
+        err(L"Found too many nodes of type '%ls'", ast_type_to_string(Type));
     }
 }
 
@@ -4515,30 +4520,32 @@ static void test_new_parser_ll2() {
                 test.src.c_str(), (int)test.deco, (int)deco, (long)__LINE__);
     }
 
-    check_function_help<grammar::plain_statement>(L"function -h");
-    check_function_help<grammar::plain_statement>(L"function --help");
-    check_function_help<grammar::function_header>(L"function --foo; end");
-    check_function_help<grammar::function_header>(L"function foo; end");
+    check_function_help<ast::type_t::decorated_statement>(L"function -h");
+    check_function_help<ast::type_t::decorated_statement>(L"function --help");
+    check_function_help<ast::type_t::function_header>(L"function --foo; end");
+    check_function_help<ast::type_t::function_header>(L"function foo; end");
 }
 
 static void test_new_parser_ad_hoc() {
+    using namespace ast;
     // Very ad-hoc tests for issues encountered.
     say(L"Testing new parser ad hoc tests");
 
     // Ensure that 'case' terminates a job list.
     const wcstring src = L"switch foo ; case bar; case baz; end";
-    parse_node_tree_t parse_tree;
-    bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, NULL);
-    if (!success) {
+    auto ast = ast_t::parse(src);
+    if (ast.errored()) {
         err(L"Parsing failed");
     }
 
-    // Expect three case_item_lists: one for each case, and a terminal one. The bug was that we'd
+    // Expect two case_item_lists. The bug was that we'd
     // try to run a command 'case'.
-    tnode_t<grammar::job_list> root{&parse_tree, &parse_tree.at(0)};
-    auto node_list = root.descendants<grammar::case_item_list>();
-    if (node_list.size() != 3) {
-        err(L"Expected 3 case item nodes, found %lu", node_list.size());
+    int count = 0;
+    for (const auto &n : ast) {
+        count += (n.type == type_t::case_item);
+    }
+    if (count != 2) {
+        err(L"Expected 2 case item nodes, found %d", count);
     }
 }
 
@@ -4559,7 +4566,7 @@ static void test_new_parser_errors() {
         {L"if true ; end ; else", parse_error_unbalancing_else},
 
         {L"case", parse_error_unbalancing_case},
-        {L"if true ; case ; end", parse_error_unbalancing_case},
+        {L"if true ; case ; end", parse_error_generic},
     };
 
     for (const auto &test : tests) {
@@ -4567,15 +4574,17 @@ static void test_new_parser_errors() {
         parse_error_code_t expected_code = test.code;
 
         parse_error_list_t errors;
-        parse_node_tree_t parse_tree;
-        bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, &errors);
-        if (success) {
+        auto ast = ast::ast_t::parse(src, parse_flag_none, &errors);
+        if (!ast.errored()) {
             err(L"Source '%ls' was expected to fail to parse, but succeeded", src.c_str());
         }
 
         if (errors.size() != 1) {
             err(L"Source '%ls' was expected to produce 1 error, but instead produced %lu errors",
                 src.c_str(), errors.size());
+            for (const auto &err : errors) {
+                fprintf(stderr, "%ls\n", err.describe(src, false).c_str());
+            }
         } else if (errors.at(0).code != expected_code) {
             err(L"Source '%ls' was expected to produce error code %lu, but instead produced error "
                 L"code %lu",

From 3534c07584fec4fc7d69842ce4dc32ca8c407f16 Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Fri, 3 Jul 2020 11:16:51 -0700
Subject: [PATCH 12/13] Adopt the new AST in parse_execution

parse_execution is what turns a parsed tree into jobs, etc. Switch it from
parse_tree to the new AST.
---
 src/builtin_function.cpp |   5 +-
 src/builtin_function.h   |   7 +-
 src/exec.cpp             |   6 +-
 src/fish_tests.cpp       |   2 +
 src/function.cpp         |  17 +-
 src/function.h           |   6 +-
 src/parse_execution.cpp  | 756 ++++++++++++++++++++++-----------------
 src/parse_execution.h    |  89 ++---
 src/parse_tree.cpp       |  14 +-
 src/parse_tree.h         |  15 +-
 src/parse_util.cpp       |   4 +-
 src/parser.cpp           |  16 +-
 src/parser.h             |   4 +-
 src/proc.h               |   8 +-
 14 files changed, 530 insertions(+), 419 deletions(-)

diff --git a/src/builtin_function.cpp b/src/builtin_function.cpp
index 28b40706d..5300b3e07 100644
--- a/src/builtin_function.cpp
+++ b/src/builtin_function.cpp
@@ -200,8 +200,7 @@ static int validate_function_name(int argc, const wchar_t *const *argv, wcstring
 /// Define a function. Calls into `function.cpp` to perform the heavy lifting of defining a
 /// function.
 int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_list_t &c_args,
-                     const parsed_source_ref_t &source,
-                     tnode_t<grammar::block_statement> func_node) {
+                     const parsed_source_ref_t &source, const ast::block_statement_t &func_node) {
     assert(source && "Missing source in builtin_function");
     // The wgetopt function expects 'function' as the first argument. Make a new wcstring_list with
     // that property. This is needed because this builtin has a different signature than the other
@@ -252,7 +251,7 @@ int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_lis
     props->shadow_scope = opts.shadow_scope;
     props->named_arguments = std::move(opts.named_arguments);
     props->parsed_source = source;
-    props->func_node = func_node;
+    props->func_node = &func_node;
 
     // Populate inherit_vars.
     for (const wcstring &name : opts.inherit_vars) {
diff --git a/src/builtin_function.h b/src/builtin_function.h
index 9499a9a9f..4da1a378c 100644
--- a/src/builtin_function.h
+++ b/src/builtin_function.h
@@ -8,7 +8,10 @@
 class parser_t;
 struct io_streams_t;
 
+namespace ast {
+struct block_statement_t;
+}
+
 int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_list_t &c_args,
-                     const parsed_source_ref_t &source,
-                     tnode_t<grammar::block_statement> func_node);
+                     const parsed_source_ref_t &source, const ast::block_statement_t &func_node);
 #endif
diff --git a/src/exec.cpp b/src/exec.cpp
index bca3f90cd..45ce0cae1 100644
--- a/src/exec.cpp
+++ b/src/exec.cpp
@@ -623,10 +623,10 @@ static proc_performer_t get_performer_for_process(process_t *p, job_t *job,
 
     if (p->type == process_type_t::block_node) {
         const parsed_source_ref_t &source = p->block_node_source;
-        tnode_t<grammar::statement> node = p->internal_block_node;
+        const ast::statement_t *node = p->internal_block_node;
         assert(source && node && "Process is missing node info");
         return [=](parser_t &parser) {
-            return parser.eval_node(source, node, io_chain, job_group).status;
+            return parser.eval_node(source, *node, io_chain, job_group).status;
         };
     } else {
         assert(p->type == process_type_t::function);
@@ -638,7 +638,7 @@ static proc_performer_t get_performer_for_process(process_t *p, job_t *job,
         auto argv = move_to_sharedptr(p->get_argv_array().to_list());
         return [=](parser_t &parser) {
             // Pull out the job list from the function.
-            tnode_t<grammar::job_list> body = props->func_node.child<1>();
+            const ast::job_list_t &body = props->func_node->jobs;
             const block_t *fb = function_prepare_environment(parser, *argv, *props);
             auto res = parser.eval_node(props->parsed_source, body, io_chain, job_group);
             function_restore_environment(parser, fb);
diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp
index e61046d5a..5aa171a8b 100644
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@@ -4567,6 +4567,8 @@ static void test_new_parser_errors() {
 
         {L"case", parse_error_unbalancing_case},
         {L"if true ; case ; end", parse_error_generic},
+
+        {L"true | and", parse_error_andor_in_pipeline},
     };
 
     for (const auto &test : tests) {
diff --git a/src/function.cpp b/src/function.cpp
index e841d55fc..7ec28275e 100644
--- a/src/function.cpp
+++ b/src/function.cpp
@@ -224,17 +224,14 @@ bool function_get_definition(const wcstring &name, wcstring &out_definition) {
     const function_info_t *func = funcset->get_info(name);
     if (!func || !func->props) return false;
     // We want to preserve comments that the AST attaches to the header (#5285).
-    // Take everything from the end of the header to the end of the body.
+    // Take everything from the end of the header to the 'end' keyword.
     const auto &props = func->props;
-    namespace g = grammar;
-    tnode_t<g::block_header> header = props->func_node.child<0>();
-    tnode_t<g::job_list> jobs = props->func_node.child<1>();
-    auto header_src = header.source_range();
-    auto jobs_src = jobs.source_range();
-    if (header_src && jobs_src) {
+    auto header_src = props->func_node->header->try_source_range();
+    auto end_kw_src = props->func_node->end.try_source_range();
+    if (header_src && end_kw_src) {
         uint32_t body_start = header_src->start + header_src->length;
-        uint32_t body_end = jobs_src->start + jobs_src->length;
-        assert(body_start <= jobs_src->start && "job list must come after header");
+        uint32_t body_end = end_kw_src->start;
+        assert(body_start <= body_end && "end keyword should come after header");
         out_definition = wcstring(props->parsed_source->src, body_start, body_end - body_start);
     }
     return true;
@@ -313,7 +310,7 @@ int function_get_definition_lineno(const wcstring &name) {
     // return one plus the number of newlines at offsets less than the start of our function's
     // statement (which includes the header).
     // TODO: merge with line_offset_of_character_at_offset?
-    auto source_range = func->props->func_node.source_range();
+    auto source_range = func->props->func_node->try_source_range();
     assert(source_range && "Function has no source range");
     uint32_t func_start = source_range->start;
     const wcstring &source = func->props->parsed_source->src;
diff --git a/src/function.h b/src/function.h
index 3f612efab..2de5f3081 100644
--- a/src/function.h
+++ b/src/function.h
@@ -15,6 +15,10 @@
 
 class parser_t;
 
+namespace ast {
+struct block_statement_t;
+}
+
 /// A function's constant properties. These do not change once initialized.
 struct function_properties_t {
     /// Parsed source containing the function.
@@ -23,7 +27,7 @@ struct function_properties_t {
     /// Node containing the function statement, pointing into parsed_source.
     /// We store block_statement, not job_list, so that comments attached to the header are
     /// preserved.
-    tnode_t<grammar::block_statement> func_node;
+    const ast::block_statement_t *func_node;
 
     /// List of all named arguments for this function.
     wcstring_list_t named_arguments;
diff --git a/src/parse_execution.cpp b/src/parse_execution.cpp
index ef3b77f6b..22f4669e4 100644
--- a/src/parse_execution.cpp
+++ b/src/parse_execution.cpp
@@ -25,6 +25,7 @@
 #include <type_traits>
 #include <vector>
 
+#include "ast.h"
 #include "builtin.h"
 #include "builtin_function.h"
 #include "common.h"
@@ -44,40 +45,74 @@
 #include "proc.h"
 #include "reader.h"
 #include "timer.h"
-#include "tnode.h"
 #include "tokenizer.h"
 #include "trace.h"
 #include "util.h"
 #include "wildcard.h"
 #include "wutil.h"
 
-namespace g = grammar;
-
 /// These are the specific statement types that support redirections.
-static constexpr bool type_is_redirectable_block(parse_token_type_t type) {
-    return type == symbol_block_statement || type == symbol_if_statement ||
-           type == symbol_switch_statement;
+static constexpr bool type_is_redirectable_block(ast::type_t type) {
+    using t = ast::type_t;
+    return type == t::block_statement || type == t::if_statement || type == t::switch_statement;
 }
 
-static bool specific_statement_type_is_redirectable_block(const parse_node_t &node) {
+static bool specific_statement_type_is_redirectable_block(const ast::node_t &node) {
     return type_is_redirectable_block(node.type);
 }
 
 /// Get the name of a redirectable block, for profiling purposes.
-static wcstring profiling_cmd_name_for_redirectable_block(const parse_node_t &node,
-                                                          const parse_node_tree_t &tree,
-                                                          const wcstring &src) {
+static wcstring profiling_cmd_name_for_redirectable_block(const ast::node_t &node,
+                                                          const parsed_source_t &pstree) {
+    using namespace ast;
     assert(specific_statement_type_is_redirectable_block(node));
-    assert(node.has_source());
+
+    auto source_range = node.try_source_range();
+    assert(source_range.has_value() && "No source range for block");
+
+    size_t src_end = 0;
+    switch (node.type) {
+        case type_t::block_statement: {
+            const node_t *block_header = node.as<block_statement_t>()->header.get();
+            switch (block_header->type) {
+                case type_t::for_header:
+                    src_end = block_header->as<for_header_t>()->semi_nl.source_range().start;
+                    break;
+
+                case type_t::while_header:
+                    src_end = block_header->as<while_header_t>()->condition.source_range().end();
+                    break;
+
+                case type_t::function_header:
+                    src_end = block_header->as<function_header_t>()->semi_nl.source_range().start;
+                    break;
+
+                case type_t::begin_header:
+                    src_end = block_header->as<begin_header_t>()->kw_begin.source_range().end();
+                    break;
+
+                default:
+                    DIE("Unexpected block header type");
+            }
+        } break;
+
+        case type_t::if_statement:
+            src_end = node.as<if_statement_t>()->if_clause.condition.source_range().end();
+            break;
+
+        case type_t::switch_statement:
+            src_end = node.as<switch_statement_t>()->semi_nl.source_range().start;
+            break;
+
+        default:
+            DIE("Not a redirectable block type");
+            break;
+    }
+
+    assert(src_end >= source_range->start && "Invalid source end");
 
     // Get the source for the block, and cut it at the next statement terminator.
-    const size_t src_start = node.source_start;
-
-    auto term = tree.find_child<g::end_command>(node);
-    assert(term.has_source() && term.source_range()->start >= src_start);
-    size_t src_len = term.source_range()->start - src_start;
-
-    wcstring result = wcstring(src, src_start, src_len);
+    wcstring result = pstree.src.substr(source_range->start, src_end - source_range->start);
     result.append(L"...");
     return result;
 }
@@ -98,12 +133,13 @@ parse_execution_context_t::parse_execution_context_t(parsed_source_ref_t pstree,
 
 // Utilities
 
-wcstring parse_execution_context_t::get_source(const parse_node_t &node) const {
-    return node.get_source(pstree->src);
+wcstring parse_execution_context_t::get_source(const ast::node_t &node) const {
+    return node.source(pstree->src);
 }
 
-tnode_t<g::plain_statement> parse_execution_context_t::infinite_recursive_statement_in_job_list(
-    tnode_t<g::job_list> job_list, wcstring *out_func_name) const {
+const ast::decorated_statement_t *
+parse_execution_context_t::infinite_recursive_statement_in_job_list(const ast::job_list_t &jobs,
+                                                                    wcstring *out_func_name) const {
     // This is a bit fragile. It is a test to see if we are inside of function call, but not inside
     // a block in that function call. If, in the future, the rules for what block scopes are pushed
     // on function invocation changes, then this check will break.
@@ -111,60 +147,67 @@ tnode_t<g::plain_statement> parse_execution_context_t::infinite_recursive_statem
     bool is_within_function_call =
         (current && parent && current->type() == block_type_t::top && parent->is_function_call());
     if (!is_within_function_call) {
-        return {};
+        return nullptr;
     }
 
     // Get the function name of the immediate block.
     const wcstring &forbidden_function_name = parent->function_name;
 
     // Get the first job in the job list.
-    tnode_t<g::job> first_job = job_list.try_get_child<g::job_conjunction, 1>().child<0>();
-    if (!first_job) {
-        return {};
-    }
+    const ast::job_conjunction_t *jc = jobs.at(0);
+    if (!jc) return nullptr;
+    const ast::job_t *job = &jc->job;
 
-    // Here's the statement node we find that's infinite recursive.
-    tnode_t<grammar::plain_statement> infinite_recursive_statement;
+    // Helper to return if a statement is infinitely recursive in this function.
+    auto statement_recurses =
+        [&](const ast::statement_t &stat) -> const ast::decorated_statement_t * {
+        // Ignore non-decorated statements like `if`, etc.
+        const ast::decorated_statement_t *dc =
+            stat.contents.contents->try_as<ast::decorated_statement_t>();
+        if (!dc) return nullptr;
 
-    // Ignore the jobs variable assigment and "time" prefixes.
-    tnode_t<g::statement> statement = first_job.child<2>();
-    tnode_t<g::job_continuation> continuation = first_job.child<3>();
-    const null_environment_t nullenv{};
-    while (statement) {
-        // Get the list of plain statements.
         // Ignore statements with decorations like 'builtin' or 'command', since those
         // are not infinite recursion. In particular that is what enables 'wrapper functions'.
-        tnode_t<g::plain_statement> plain_statement =
-            statement.try_get_child<g::decorated_statement, 0>()
-                .try_get_child<g::plain_statement, 0>();
-        if (plain_statement) {
-            maybe_t<wcstring> cmd = command_for_plain_statement(plain_statement, pstree->src);
-            if (cmd &&
-                expand_one(*cmd, {expand_flag::skip_cmdsubst, expand_flag::skip_variables}, ctx) &&
-                cmd == forbidden_function_name) {
-                // This is it.
-                infinite_recursive_statement = plain_statement;
-                if (out_func_name != nullptr) {
-                    *out_func_name = forbidden_function_name;
-                }
+        if (dc->decoration() != parse_statement_decoration_none) return nullptr;
+
+        // Check the command.
+        wcstring cmd = dc->command.source(pstree->src);
+        bool forbidden =
+            !cmd.empty() &&
+            expand_one(cmd, {expand_flag::skip_cmdsubst, expand_flag::skip_variables}, ctx) &&
+            cmd == forbidden_function_name;
+        return forbidden ? dc : nullptr;
+    };
+
+    const ast::decorated_statement_t *infinite_recursive_statement = nullptr;
+
+    // Check main statement.
+    infinite_recursive_statement = statement_recurses(jc->job.statement);
+
+    // Check piped remainder.
+    if (!infinite_recursive_statement) {
+        for (const ast::job_continuation_t &c : job->continuation) {
+            if (const auto *s = statement_recurses(c.statement)) {
+                infinite_recursive_statement = s;
                 break;
             }
         }
-        statement = continuation.next_in_list<g::statement>();
     }
 
+    if (infinite_recursive_statement && out_func_name) {
+        *out_func_name = forbidden_function_name;
+    }
+    // may be null
     return infinite_recursive_statement;
 }
 
 process_type_t parse_execution_context_t::process_type_for_command(
-    tnode_t<grammar::plain_statement> statement, const wcstring &cmd) const {
+    const ast::decorated_statement_t &statement, const wcstring &cmd) const {
     enum process_type_t process_type = process_type_t::external;
 
     // Determine the process type, which depends on the statement decoration (command, builtin,
     // etc).
-    enum parse_statement_decoration_t decoration = get_decoration(statement);
-
-    switch (decoration) {
+    switch (statement.decoration()) {
         case parse_statement_decoration_exec:
             process_type = process_type_t::exec;
             break;
@@ -209,31 +252,33 @@ maybe_t<end_execution_reason_t> parse_execution_context_t::check_end_execution()
 }
 
 /// Return whether the job contains a single statement, of block type, with no redirections.
-bool parse_execution_context_t::job_is_simple_block(tnode_t<g::job> job_node) const {
-    tnode_t<g::statement> statement = job_node.child<2>();
-
+bool parse_execution_context_t::job_is_simple_block(const ast::job_t &job) const {
+    using namespace ast;
     // Must be no pipes.
-    if (job_node.child<3>().try_get_child<g::tok_pipe, 0>()) {
+    if (!job.continuation.empty()) {
         return false;
     }
 
-    // Helper to check if an argument or redirection list has no redirections.
-    auto is_empty = [](tnode_t<g::arguments_or_redirections_list> lst) -> bool {
-        return !lst.next_in_list<g::redirection>();
+    // Helper to check if an argument_or_redirection_list_t has no redirections.
+    auto no_redirs = [](const argument_or_redirection_list_t &list) -> bool {
+        for (const argument_or_redirection_t &val : list) {
+            if (val.is_redirection()) return false;
+        }
+        return true;
     };
 
     // Check if we're a block statement with redirections. We do it this obnoxious way to preserve
     // type safety (in case we add more specific statement types).
-    const parse_node_t &specific_statement = statement.get_child_node<0>();
-    switch (specific_statement.type) {
-        case symbol_block_statement:
-            return is_empty(statement.require_get_child<g::block_statement, 0>().child<3>());
-        case symbol_switch_statement:
-            return is_empty(statement.require_get_child<g::switch_statement, 0>().child<5>());
-        case symbol_if_statement:
-            return is_empty(statement.require_get_child<g::if_statement, 0>().child<3>());
-        case symbol_not_statement:
-        case symbol_decorated_statement:
+    const node_t &ss = *job.statement.contents.contents;
+    switch (ss.type) {
+        case type_t::block_statement:
+            return no_redirs(ss.as<block_statement_t>()->args_or_redirs);
+        case type_t::switch_statement:
+            return no_redirs(ss.as<switch_statement_t>()->args_or_redirs);
+        case type_t::if_statement:
+            return no_redirs(ss.as<if_statement_t>()->args_or_redirs);
+        case type_t::not_statement:
+        case type_t::decorated_statement:
             // not block statements
             return false;
         default:
@@ -243,14 +288,19 @@ bool parse_execution_context_t::job_is_simple_block(tnode_t<g::job> job_node) co
 }
 
 end_execution_reason_t parse_execution_context_t::run_if_statement(
-    tnode_t<g::if_statement> statement, const block_t *associated_block) {
+    const ast::if_statement_t &statement, const block_t *associated_block) {
+    using namespace ast;
+    using job_list_t = ast::job_list_t;
     end_execution_reason_t result = end_execution_reason_t::ok;
 
     // We have a sequence of if clauses, with a final else, resulting in a single job list that we
     // execute.
-    tnode_t<g::job_list> job_list_to_execute;
-    tnode_t<g::if_clause> if_clause = statement.child<0>();
-    tnode_t<g::else_clause> else_clause = statement.child<1>();
+    const job_list_t *job_list_to_execute = nullptr;
+    const if_clause_t *if_clause = &statement.if_clause;
+
+    // Index of the *next* elseif_clause to test.
+    const elseif_clause_list_t &elseif_clauses = statement.elseif_clauses;
+    size_t next_elseif_idx = 0;
 
     // We start with the 'if'.
     trace_if_enabled(*parser, L"if");
@@ -262,59 +312,54 @@ end_execution_reason_t parse_execution_context_t::run_if_statement(
         }
 
         // An if condition has a job and a "tail" of andor jobs, e.g. "foo ; and bar; or baz".
-        tnode_t<g::job_conjunction> condition_head = if_clause.child<1>();
-        tnode_t<g::andor_job_list> condition_boolean_tail = if_clause.child<3>();
-
         // Check the condition and the tail. We treat end_execution_reason_t::error here as failure,
         // in accordance with historic behavior.
-        end_execution_reason_t cond_ret = run_job_conjunction(condition_head, associated_block);
+        end_execution_reason_t cond_ret =
+            run_job_conjunction(if_clause->condition, associated_block);
         if (cond_ret == end_execution_reason_t::ok) {
-            cond_ret = run_job_list(condition_boolean_tail, associated_block);
+            cond_ret = run_job_list(if_clause->andor_tail, associated_block);
         }
         const bool take_branch =
             (cond_ret == end_execution_reason_t::ok) && parser->get_last_status() == EXIT_SUCCESS;
 
         if (take_branch) {
             // Condition succeeded.
-            job_list_to_execute = if_clause.child<4>();
+            job_list_to_execute = &if_clause->body;
             break;
         }
-        auto else_cont = else_clause.try_get_child<g::else_continuation, 1>();
-        if (!else_cont) {
-            // 'if' condition failed, no else clause, return 0, we're done.
-            parser->set_last_statuses(statuses_t::just(STATUS_CMD_OK));
-            break;
+
+        // See if we have an elseif.
+        const auto *elseif_clause = elseif_clauses.at(next_elseif_idx++);
+        if (elseif_clause) {
+            trace_if_enabled(*parser, L"else if");
+            if_clause = &elseif_clause->if_clause;
         } else {
-            // We have an 'else continuation' (either else-if or else).
-            if (auto maybe_if_clause = else_cont.try_get_child<g::if_clause, 0>()) {
-                // it's an 'else if', go to the next one.
-                if_clause = maybe_if_clause;
-                else_clause = else_cont.try_get_child<g::else_clause, 1>();
-                assert(else_clause && "Expected to have an else clause");
-                trace_if_enabled(*parser, L"else if");
-            } else {
-                // It's the final 'else', we're done.
-                job_list_to_execute = else_cont.try_get_child<g::job_list, 1>();
-                assert(job_list_to_execute && "Should have a job list");
-                trace_if_enabled(*parser, L"else");
-                break;
-            }
+            break;
         }
     }
 
-    // Execute any job list we got.
-    if (job_list_to_execute) {
+    if (!job_list_to_execute) {
+        // our ifs and elseifs failed.
+        // Check our else body.
+        if (statement.else_clause) {
+            trace_if_enabled(*parser, L"else");
+            job_list_to_execute = &statement.else_clause->body;
+        }
+    }
+
+    if (!job_list_to_execute) {
+        // 'if' condition failed, no else clause, return 0, we're done.
+        // No job list means no successful conditions, so return 0 (issue #1443).
+        parser->set_last_statuses(statuses_t::just(STATUS_CMD_OK));
+    } else {
+        // Execute the job list we got.
         block_t *ib = parser->push_block(block_t::if_block());
-        run_job_list(job_list_to_execute, ib);
+        run_job_list(*job_list_to_execute, ib);
         if (auto ret = check_end_execution()) {
             result = *ret;
         }
         parser->pop_block(ib);
-    } else {
-        // No job list means no successful conditions, so return 0 (issue #1443).
-        parser->set_last_statuses(statuses_t::just(STATUS_CMD_OK));
     }
-
     trace_if_enabled(*parser, L"end if");
 
     // It's possible there's a last-minute cancellation (issue #1297).
@@ -327,7 +372,7 @@ end_execution_reason_t parse_execution_context_t::run_if_statement(
 }
 
 end_execution_reason_t parse_execution_context_t::run_begin_statement(
-    tnode_t<g::job_list> contents) {
+    const ast::job_list_t &contents) {
     // Basic begin/end block. Push a scope block, run jobs, pop it
     trace_if_enabled(*parser, L"begin");
     block_t *sb = parser->push_block(block_t::scope_block(block_type_t::begin));
@@ -339,10 +384,12 @@ end_execution_reason_t parse_execution_context_t::run_begin_statement(
 
 // Define a function.
 end_execution_reason_t parse_execution_context_t::run_function_statement(
-    tnode_t<grammar::block_statement> statement, tnode_t<grammar::function_header> header) {
+    const ast::block_statement_t &statement, const ast::function_header_t &header) {
+    using namespace ast;
     // Get arguments.
     wcstring_list_t arguments;
-    argument_node_list_t arg_nodes = header.descendants<g::argument>();
+    ast_args_list_t arg_nodes = get_argument_nodes(header.args);
+    arg_nodes.insert(arg_nodes.begin(), &header.first_arg);
     end_execution_reason_t result =
         this->expand_arguments_from_nodes(arg_nodes, &arguments, failglob);
 
@@ -362,48 +409,46 @@ end_execution_reason_t parse_execution_context_t::run_function_statement(
 }
 
 end_execution_reason_t parse_execution_context_t::run_block_statement(
-    tnode_t<g::block_statement> statement, const block_t *associated_block) {
-    tnode_t<g::block_header> bheader = statement.child<0>();
-    tnode_t<g::job_list> contents = statement.child<1>();
-
+    const ast::block_statement_t &statement, const block_t *associated_block) {
+    const ast::node_t &bh = *statement.header.contents;
+    const ast::job_list_t &contents = statement.jobs;
     end_execution_reason_t ret = end_execution_reason_t::ok;
-    if (auto header = bheader.try_get_child<g::for_header, 0>()) {
-        ret = run_for_statement(header, contents);
-    } else if (auto header = bheader.try_get_child<g::while_header, 0>()) {
-        ret = run_while_statement(header, contents, associated_block);
-    } else if (auto header = bheader.try_get_child<g::function_header, 0>()) {
-        ret = run_function_statement(statement, header);
-    } else if (auto header = bheader.try_get_child<g::begin_header, 0>()) {
+    if (const auto *fh = bh.try_as<ast::for_header_t>()) {
+        ret = run_for_statement(*fh, contents);
+    } else if (const auto *wh = bh.try_as<ast::while_header_t>()) {
+        ret = run_while_statement(*wh, contents, associated_block);
+    } else if (const auto *fh = bh.try_as<ast::function_header_t>()) {
+        ret = run_function_statement(statement, *fh);
+    } else if (bh.try_as<ast::begin_header_t>()) {
         ret = run_begin_statement(contents);
     } else {
-        FLOGF(error, L"Unexpected block header: %ls\n", bheader.node()->describe().c_str());
+        FLOGF(error, L"Unexpected block header: %ls\n", bh.describe().c_str());
         PARSER_DIE();
     }
     return ret;
 }
 
 end_execution_reason_t parse_execution_context_t::run_for_statement(
-    tnode_t<grammar::for_header> header, tnode_t<grammar::job_list> block_contents) {
+    const ast::for_header_t &header, const ast::job_list_t &block_contents) {
     // Get the variable name: `for var_name in ...`. We expand the variable name. It better result
     // in just one.
-    tnode_t<g::tok_string> var_name_node = header.child<1>();
-    wcstring for_var_name = get_source(var_name_node);
+    wcstring for_var_name = header.var_name.source(get_source());
     if (!expand_one(for_var_name, expand_flags_t{}, ctx)) {
-        return report_error(STATUS_EXPAND_ERROR, var_name_node,
+        return report_error(STATUS_EXPAND_ERROR, header.var_name,
                             FAILED_EXPANSION_VARIABLE_NAME_ERR_MSG, for_var_name.c_str());
     }
 
     // Get the contents to iterate over.
     wcstring_list_t arguments;
-    end_execution_reason_t ret = this->expand_arguments_from_nodes(
-        get_argument_nodes(header.child<3>()), &arguments, nullglob);
+    ast_args_list_t arg_nodes = get_argument_nodes(header.args);
+    end_execution_reason_t ret = this->expand_arguments_from_nodes(arg_nodes, &arguments, nullglob);
     if (ret != end_execution_reason_t::ok) {
         return ret;
     }
 
     auto var = parser->vars().get(for_var_name, ENV_DEFAULT);
     if (var && var->read_only()) {
-        return report_error(STATUS_INVALID_ARGS, var_name_node,
+        return report_error(STATUS_INVALID_ARGS, header.var_name,
                             L"You cannot use read-only variable '%ls' in a for loop",
                             for_var_name.c_str());
     }
@@ -416,7 +461,7 @@ end_execution_reason_t parse_execution_context_t::run_for_statement(
     assert(retval == ENV_OK);
 
     if (!valid_var_name(for_var_name)) {
-        return report_error(STATUS_INVALID_ARGS, var_name_node, BUILTIN_ERR_VARNAME, L"for",
+        return report_error(STATUS_INVALID_ARGS, header.var_name, BUILTIN_ERR_VARNAME, L"for",
                             for_var_name.c_str());
     }
 
@@ -454,17 +499,16 @@ end_execution_reason_t parse_execution_context_t::run_for_statement(
 }
 
 end_execution_reason_t parse_execution_context_t::run_switch_statement(
-    tnode_t<grammar::switch_statement> statement) {
+    const ast::switch_statement_t &statement) {
     // Get the switch variable.
-    tnode_t<grammar::argument> switch_value_n = statement.child<1>();
-    const wcstring switch_value = get_source(switch_value_n);
+    const wcstring switch_value = get_source(statement.argument);
 
     // Expand it. We need to offset any errors by the position of the string.
     completion_list_t switch_values_expanded;
     parse_error_list_t errors;
     auto expand_ret = expand_string(switch_value, &switch_values_expanded,
                                     expand_flag::no_descriptions, ctx, &errors);
-    parse_error_offset_source_start(&errors, switch_value_n.source_range()->start);
+    parse_error_offset_source_start(&errors, statement.argument.range.start);
 
     switch (expand_ret.result) {
         case expand_result_t::error:
@@ -474,12 +518,12 @@ end_execution_reason_t parse_execution_context_t::run_switch_statement(
             return end_execution_reason_t::cancelled;
 
         case expand_result_t::wildcard_no_match:
-            return report_error(STATUS_UNMATCHED_WILDCARD, switch_value_n, WILDCARD_ERR_MSG,
-                                get_source(switch_value_n).c_str());
+            return report_error(STATUS_UNMATCHED_WILDCARD, statement.argument, WILDCARD_ERR_MSG,
+                                get_source(statement.argument).c_str());
 
         case expand_result_t::ok:
             if (switch_values_expanded.size() > 1) {
-                return report_error(STATUS_INVALID_ARGS, switch_value_n,
+                return report_error(STATUS_INVALID_ARGS, statement.argument,
                                     _(L"switch: Expected at most one argument, got %lu\n"),
                                     switch_values_expanded.size());
             }
@@ -497,9 +541,8 @@ end_execution_reason_t parse_execution_context_t::run_switch_statement(
     block_t *sb = parser->push_block(block_t::switch_block());
 
     // Expand case statements.
-    tnode_t<g::case_item_list> case_item_list = statement.child<3>();
-    tnode_t<g::case_item> matching_case_item{};
-    while (auto case_item = case_item_list.next_in_list<g::case_item>()) {
+    const ast::case_item_t *matching_case_item = nullptr;
+    for (const ast::case_item_t &case_item : statement.cases) {
         if (auto ret = check_end_execution()) {
             result = *ret;
             break;
@@ -508,7 +551,7 @@ end_execution_reason_t parse_execution_context_t::run_switch_statement(
         // Expand arguments. A case item list may have a wildcard that fails to expand to
         // anything. We also report case errors, but don't stop execution; i.e. a case item that
         // contains an unexpandable process will report and then fail to match.
-        auto arg_nodes = get_argument_nodes(case_item.child<1>());
+        ast_args_list_t arg_nodes = get_argument_nodes(case_item.arguments);
         wcstring_list_t case_args;
         end_execution_reason_t case_result =
             this->expand_arguments_from_nodes(arg_nodes, &case_args, failglob);
@@ -520,7 +563,7 @@ end_execution_reason_t parse_execution_context_t::run_switch_statement(
 
                 // If this matched, we're done.
                 if (match) {
-                    matching_case_item = case_item;
+                    matching_case_item = &case_item;
                     break;
                 }
             }
@@ -531,8 +574,7 @@ end_execution_reason_t parse_execution_context_t::run_switch_statement(
     if (matching_case_item) {
         // Success, evaluate the job list.
         assert(result == end_execution_reason_t::ok && "Expected success");
-        auto job_list = matching_case_item.child<3>();
-        result = this->run_job_list(job_list, sb);
+        result = this->run_job_list(matching_case_item->body, sb);
     }
 
     parser->pop_block(sb);
@@ -540,7 +582,7 @@ end_execution_reason_t parse_execution_context_t::run_switch_statement(
 }
 
 end_execution_reason_t parse_execution_context_t::run_while_statement(
-    tnode_t<grammar::while_header> header, tnode_t<grammar::job_list> contents,
+    const ast::while_header_t &header, const ast::job_list_t &contents,
     const block_t *associated_block) {
     end_execution_reason_t ret = end_execution_reason_t::ok;
 
@@ -555,10 +597,6 @@ end_execution_reason_t parse_execution_context_t::run_while_statement(
     // affordance for the first condition.
     bool first_cond_check = true;
 
-    // The conditions of the while loop.
-    tnode_t<g::job_conjunction> condition_head = header.child<1>();
-    tnode_t<g::andor_job_list> condition_boolean_tail = header.child<3>();
-
     trace_if_enabled(*parser, L"while");
 
     // Run while the condition is true.
@@ -571,9 +609,9 @@ end_execution_reason_t parse_execution_context_t::run_while_statement(
 
         // Check the condition.
         end_execution_reason_t cond_ret =
-            this->run_job_conjunction(condition_head, associated_block);
+            this->run_job_conjunction(header.condition, associated_block);
         if (cond_ret == end_execution_reason_t::ok) {
-            cond_ret = run_job_list(condition_boolean_tail, associated_block);
+            cond_ret = run_job_list(header.andor_tail, associated_block);
         }
 
         // If the loop condition failed to execute, then exit the loop without modifying the exit
@@ -623,13 +661,15 @@ end_execution_reason_t parse_execution_context_t::run_while_statement(
 }
 
 // Reports an error. Always returns end_execution_reason_t::error.
-end_execution_reason_t parse_execution_context_t::report_error(int status, const parse_node_t &node,
+end_execution_reason_t parse_execution_context_t::report_error(int status, const ast::node_t &node,
                                                                const wchar_t *fmt, ...) const {
+    auto r = node.source_range();
+
     // Create an error.
     parse_error_list_t error_list = parse_error_list_t(1);
     parse_error_t *error = &error_list.at(0);
-    error->source_start = node.source_start;
-    error->source_length = node.source_length;
+    error->source_start = r.start;
+    error->source_length = r.length;
     error->code = parse_error_syntax;  // hackish
 
     va_list va;
@@ -662,9 +702,27 @@ end_execution_reason_t parse_execution_context_t::report_errors(
     return end_execution_reason_t::error;
 }
 
+// static
+parse_execution_context_t::ast_args_list_t parse_execution_context_t::get_argument_nodes(
+    const ast::argument_list_t &args) {
+    ast_args_list_t result;
+    for (const ast::argument_t &arg : args) result.push_back(&arg);
+    return result;
+}
+
+// static
+parse_execution_context_t::ast_args_list_t parse_execution_context_t::get_argument_nodes(
+    const ast::argument_or_redirection_list_t &args) {
+    ast_args_list_t result;
+    for (const ast::argument_or_redirection_t &v : args) {
+        if (v.is_argument()) result.push_back(&v.argument());
+    }
+    return result;
+}
+
 /// Handle the case of command not found.
 end_execution_reason_t parse_execution_context_t::handle_command_not_found(
-    const wcstring &cmd_str, tnode_t<g::plain_statement> statement, int err_code) {
+    const wcstring &cmd_str, const ast::decorated_statement_t &statement, int err_code) {
     // We couldn't find the specified command. This is a non-fatal error. We want to set the exit
     // status to 127, which is the standard number used by other shells like bash and zsh.
 
@@ -677,7 +735,7 @@ end_execution_reason_t parse_execution_context_t::handle_command_not_found(
         // error messages.
         wcstring_list_t event_args;
         {
-            auto args = get_argument_nodes(statement.child<1>());
+            ast_args_list_t args = get_argument_nodes(statement.args_or_redirs);
             end_execution_reason_t arg_result =
                 this->expand_arguments_from_nodes(args, &event_args, failglob);
 
@@ -696,7 +754,7 @@ end_execution_reason_t parse_execution_context_t::handle_command_not_found(
 }
 
 end_execution_reason_t parse_execution_context_t::expand_command(
-    tnode_t<grammar::plain_statement> statement, wcstring *out_cmd,
+    const ast::decorated_statement_t &statement, wcstring *out_cmd,
     wcstring_list_t *out_args) const {
     // Here we're expanding a command, for example $HOME/bin/stuff or $randomthing. The first
     // completion becomes the command itself, everything after becomes arguments. Command
@@ -704,8 +762,8 @@ end_execution_reason_t parse_execution_context_t::expand_command(
     parse_error_list_t errors;
 
     // Get the unexpanded command string. We expect to always get it here.
-    wcstring unexp_cmd = *command_for_plain_statement(statement, pstree->src);
-    size_t pos_of_command_token = statement.child<0>().source_range()->start;
+    wcstring unexp_cmd = get_source(statement.command);
+    size_t pos_of_command_token = statement.command.range.start;
 
     // Expand the string to produce completions, and report errors.
     expand_result_t expand_err =
@@ -715,7 +773,7 @@ end_execution_reason_t parse_execution_context_t::expand_command(
         // excluding prefixes such as " " or "if ".
         // This means that the error positions are relative to the beginning
         // of the token; we need to make them relative to the original source.
-        for (auto &error : errors) error.source_start += pos_of_command_token;
+        parse_error_offset_source_start(&errors, pos_of_command_token);
         return report_errors(STATUS_ILLEGAL_CMD, errors);
     } else if (expand_err == expand_result_t::wildcard_no_match) {
         return report_error(STATUS_UNMATCHED_WILDCARD, statement, WILDCARD_ERR_MSG,
@@ -734,7 +792,7 @@ end_execution_reason_t parse_execution_context_t::expand_command(
 
 /// Creates a 'normal' (non-block) process.
 end_execution_reason_t parse_execution_context_t::populate_plain_process(
-    job_t *job, process_t *proc, tnode_t<grammar::plain_statement> statement) {
+    job_t *job, process_t *proc, const ast::decorated_statement_t &statement) {
     assert(job != nullptr);
     assert(proc != nullptr);
 
@@ -765,11 +823,9 @@ end_execution_reason_t parse_execution_context_t::populate_plain_process(
         const int no_cmd_err_code = errno;
 
         // If the specified command does not exist, and is undecorated, try using an implicit cd.
-        if (!has_command && get_decoration(statement) == parse_statement_decoration_none) {
+        if (!has_command && statement.decoration() == parse_statement_decoration_none) {
             // Implicit cd requires an empty argument and redirection list.
-            tnode_t<g::arguments_or_redirections_list> args = statement.child<1>();
-            if (args_from_cmd_expansion.empty() && !args.try_get_child<g::argument, 0>() &&
-                !args.try_get_child<g::redirection, 0>()) {
+            if (statement.args_or_redirs.empty()) {
                 // Ok, no arguments or redirections; check to see if the command is a directory.
                 use_implicit_cd =
                     path_as_implicit_cd(cmd, parser->vars().get_pwd_slash(), parser->vars())
@@ -804,7 +860,8 @@ end_execution_reason_t parse_execution_context_t::populate_plain_process(
         cmd_args.push_back(cmd);
         cmd_args.insert(cmd_args.end(), args_from_cmd_expansion.begin(),
                         args_from_cmd_expansion.end());
-        argument_node_list_t arg_nodes = statement.descendants<g::argument>();
+
+        ast_args_list_t arg_nodes = get_argument_nodes(statement.args_or_redirs);
         end_execution_reason_t arg_result =
             this->expand_arguments_from_nodes(arg_nodes, &cmd_args, glob_behavior);
         if (arg_result != end_execution_reason_t::ok) {
@@ -812,7 +869,7 @@ end_execution_reason_t parse_execution_context_t::populate_plain_process(
         }
 
         // The set of IO redirections that we construct for the process.
-        auto reason = this->determine_redirections(statement.child<1>(), &redirections);
+        auto reason = this->determine_redirections(statement.args_or_redirs, &redirections);
         if (reason != end_execution_reason_t::ok) {
             return reason;
         }
@@ -832,23 +889,23 @@ end_execution_reason_t parse_execution_context_t::populate_plain_process(
 // Determine the list of arguments, expanding stuff. Reports any errors caused by expansion. If we
 // have a wildcard that could not be expanded, report the error and continue.
 end_execution_reason_t parse_execution_context_t::expand_arguments_from_nodes(
-    const argument_node_list_t &argument_nodes, wcstring_list_t *out_arguments,
+    const ast_args_list_t &argument_nodes, wcstring_list_t *out_arguments,
     globspec_t glob_behavior) {
     // Get all argument nodes underneath the statement. We guess we'll have that many arguments (but
     // may have more or fewer, if there are wildcards involved).
     out_arguments->reserve(out_arguments->size() + argument_nodes.size());
     completion_list_t arg_expanded;
-    for (const auto &arg_node : argument_nodes) {
+    for (const ast::argument_t *arg_node : argument_nodes) {
         // Expect all arguments to have source.
-        assert(arg_node.has_source());
-        const wcstring arg_str = arg_node.get_source(pstree->src);
+        assert(arg_node->has_source());
+        const wcstring arg_str = get_source(*arg_node);
 
         // Expand this string.
         parse_error_list_t errors;
         arg_expanded.clear();
         auto expand_ret =
             expand_string(arg_str, &arg_expanded, expand_flag::no_descriptions, ctx, &errors);
-        parse_error_offset_source_start(&errors, arg_node.source_range()->start);
+        parse_error_offset_source_start(&errors, arg_node->range.start);
         switch (expand_ret.result) {
             case expand_result_t::error: {
                 return this->report_errors(expand_ret.status, errors);
@@ -862,8 +919,8 @@ end_execution_reason_t parse_execution_context_t::expand_arguments_from_nodes(
                     // For no_exec, ignore the error - this might work at runtime.
                     if (no_exec()) return end_execution_reason_t::ok;
                     // Report the unmatched wildcard error and stop processing.
-                    return report_error(STATUS_UNMATCHED_WILDCARD, arg_node, WILDCARD_ERR_MSG,
-                                        get_source(arg_node).c_str());
+                    return report_error(STATUS_UNMATCHED_WILDCARD, *arg_node, WILDCARD_ERR_MSG,
+                                        get_source(*arg_node).c_str());
                 }
                 break;
             }
@@ -892,41 +949,42 @@ end_execution_reason_t parse_execution_context_t::expand_arguments_from_nodes(
 }
 
 end_execution_reason_t parse_execution_context_t::determine_redirections(
-    tnode_t<g::arguments_or_redirections_list> node, redirection_spec_list_t *out_redirections) {
+    const ast::argument_or_redirection_list_t &list, redirection_spec_list_t *out_redirections) {
     // Get all redirection nodes underneath the statement.
-    while (auto redirect_node = node.next_in_list<g::redirection>()) {
-        wcstring target;  // file path or target fd
-        auto redirect = redirection_for_node(redirect_node, pstree->src, &target);
+    for (const ast::argument_or_redirection_t &arg_or_redir : list) {
+        if (!arg_or_redir.is_redirection()) continue;
+        const ast::redirection_t &redir_node = arg_or_redir.redirection();
 
-        if (!redirect || !redirect->is_valid()) {
+        maybe_t<pipe_or_redir_t> oper = pipe_or_redir_t::from_string(get_source(redir_node.oper));
+        if (!oper || !oper->is_valid()) {
             // TODO: figure out if this can ever happen. If so, improve this error message.
-            return report_error(STATUS_INVALID_ARGS, redirect_node, _(L"Invalid redirection: %ls"),
-                                redirect_node.get_source(pstree->src).c_str());
+            return report_error(STATUS_INVALID_ARGS, redir_node, _(L"Invalid redirection: %ls"),
+                                get_source(redir_node).c_str());
         }
 
         // PCA: I can't justify this skip_variables flag. It was like this when I got here.
+        wcstring target = get_source(redir_node.target);
         bool target_expanded =
             expand_one(target, no_exec() ? expand_flag::skip_variables : expand_flags_t{}, ctx);
         if (!target_expanded || target.empty()) {
             // TODO: Improve this error message.
-            return report_error(STATUS_INVALID_ARGS, redirect_node,
+            return report_error(STATUS_INVALID_ARGS, redir_node,
                                 _(L"Invalid redirection target: %ls"), target.c_str());
         }
 
         // Make a redirection spec from the redirect token.
-        assert(redirect && redirect->is_valid() && "expected to have a valid redirection");
-
-        redirection_spec_t spec{redirect->fd, redirect->mode, std::move(target)};
+        assert(oper && oper->is_valid() && "expected to have a valid redirection");
+        redirection_spec_t spec{oper->fd, oper->mode, std::move(target)};
 
         // Validate this spec.
         if (spec.mode == redirection_mode_t::fd && !spec.is_close() && !spec.get_target_as_fd()) {
             const wchar_t *fmt =
                 _(L"Requested redirection to '%ls', which is not a valid file descriptor");
-            return report_error(STATUS_INVALID_ARGS, redirect_node, fmt, spec.target.c_str());
+            return report_error(STATUS_INVALID_ARGS, redir_node, fmt, spec.target.c_str());
         }
         out_redirections->push_back(std::move(spec));
 
-        if (redirect->stderr_merge) {
+        if (oper->stderr_merge) {
             // This was a redirect like &> which also modifies stderr.
             // Also redirect stderr to stdout.
             out_redirections->push_back(get_stderr_merge());
@@ -936,57 +994,70 @@ end_execution_reason_t parse_execution_context_t::determine_redirections(
 }
 
 end_execution_reason_t parse_execution_context_t::populate_not_process(
-    job_t *job, process_t *proc, tnode_t<g::not_statement> not_statement) {
+    job_t *job, process_t *proc, const ast::not_statement_t &not_statement) {
     auto &flags = job->mut_flags();
     flags.negate = !flags.negate;
-    auto optional_time = not_statement.require_get_child<g::optional_time, 2>();
-    if (optional_time.tag() == parse_optional_time_time) {
+    if (not_statement.time) {
         flags.has_time_prefix = true;
         if (!job->mut_flags().foreground) {
             return this->report_error(STATUS_INVALID_ARGS, not_statement, ERROR_TIME_BACKGROUND);
         }
     }
-    return this->populate_job_process(
-        job, proc, not_statement.require_get_child<g::statement, 3>(),
-        not_statement.require_get_child<g::variable_assignments, 1>());
+    return this->populate_job_process(job, proc, not_statement.contents, not_statement.variables);
 }
 
 template <typename Type>
 end_execution_reason_t parse_execution_context_t::populate_block_process(
-    job_t *job, process_t *proc, tnode_t<g::statement> statement,
-    tnode_t<Type> specific_statement) {
+    job_t *job, process_t *proc, const ast::statement_t &statement,
+    const Type &specific_statement) {
+    using namespace ast;
     // We handle block statements by creating process_type_t::block_node, that will bounce back to
     // us when it's time to execute them.
     UNUSED(job);
-    static_assert(Type::token == symbol_block_statement || Type::token == symbol_if_statement ||
-                      Type::token == symbol_switch_statement,
+    static_assert(Type::AstType == type_t::block_statement ||
+                      Type::AstType == type_t::if_statement ||
+                      Type::AstType == type_t::switch_statement,
                   "Invalid block process");
-    assert(statement && "statement missing");
-    assert(specific_statement && "specific_statement missing");
 
-    // The set of IO redirections that we construct for the process.
-    // TODO: fix this ugly find_child.
-    auto arguments = specific_statement.template find_child<g::arguments_or_redirections_list>();
+    // Get the argument or redirections list.
+    // TODO: args_or_redirs should be available without resolving the statement type.
+    const argument_or_redirection_list_t *args_or_redirs = nullptr;
+
+    // Upcast to permit dropping the 'template' keyword.
+    const node_t &ss = specific_statement;
+    switch (Type::AstType) {
+        case type_t::block_statement:
+            args_or_redirs = &ss.as<block_statement_t>()->args_or_redirs;
+            break;
+        case type_t::if_statement:
+            args_or_redirs = &ss.as<if_statement_t>()->args_or_redirs;
+            break;
+        case type_t::switch_statement:
+            args_or_redirs = &ss.as<switch_statement_t>()->args_or_redirs;
+            break;
+        default:
+            DIE("Unexpected block node type");
+    }
+    assert(args_or_redirs && "Should have args_or_redirs");
+
     redirection_spec_list_t redirections;
-    auto reason = this->determine_redirections(arguments, &redirections);
+    auto reason = this->determine_redirections(*args_or_redirs, &redirections);
     if (reason == end_execution_reason_t::ok) {
         proc->type = process_type_t::block_node;
         proc->block_node_source = pstree;
-        proc->internal_block_node = statement;
+        proc->internal_block_node = &statement;
         proc->set_redirection_specs(std::move(redirections));
     }
     return reason;
 }
 
 end_execution_reason_t parse_execution_context_t::apply_variable_assignments(
-    process_t *proc, tnode_t<grammar::variable_assignments> variable_assignments,
+    process_t *proc, const ast::variable_assignment_list_t &variable_assignment_list,
     const block_t **block) {
-    variable_assignment_node_list_t assignment_list =
-        get_variable_assignment_nodes(variable_assignments);
-    if (assignment_list.empty()) return end_execution_reason_t::ok;
+    if (variable_assignment_list.empty()) return end_execution_reason_t::ok;
     *block = parser->push_block(block_t::variable_assignment_block());
-    for (const auto &variable_assignment : assignment_list) {
-        const wcstring &source = variable_assignment.get_source(pstree->src);
+    for (const ast::variable_assignment_t &variable_assignment : variable_assignment_list) {
+        const wcstring &source = get_source(variable_assignment);
         auto equals_pos = variable_assignment_equals_pos(source);
         assert(equals_pos);
         const wcstring variable_name = source.substr(0, *equals_pos);
@@ -996,8 +1067,7 @@ end_execution_reason_t parse_execution_context_t::apply_variable_assignments(
         // TODO this is mostly copied from expand_arguments_from_nodes, maybe extract to function
         auto expand_ret = expand_string(expression, &expression_expanded,
                                         expand_flag::no_descriptions, ctx, &errors);
-        parse_error_offset_source_start(
-            &errors, variable_assignment.source_range()->start + *equals_pos + 1);
+        parse_error_offset_source_start(&errors, variable_assignment.range.start + *equals_pos + 1);
         switch (expand_ret.result) {
             case expand_result_t::error:
                 return this->report_errors(expand_ret.status, errors);
@@ -1024,10 +1094,11 @@ end_execution_reason_t parse_execution_context_t::apply_variable_assignments(
 }
 
 end_execution_reason_t parse_execution_context_t::populate_job_process(
-    job_t *job, process_t *proc, tnode_t<grammar::statement> statement,
-    tnode_t<grammar::variable_assignments> variable_assignments) {
+    job_t *job, process_t *proc, const ast::statement_t &statement,
+    const ast::variable_assignment_list_t &variable_assignments) {
+    using namespace ast;
     // Get the "specific statement" which is boolean / block / if / switch / decorated.
-    const parse_node_t &specific_statement = statement.get_child_node<0>();
+    const node_t &specific_statement = *statement.contents.contents;
 
     const block_t *block = nullptr;
     end_execution_reason_t result =
@@ -1038,27 +1109,26 @@ end_execution_reason_t parse_execution_context_t::populate_job_process(
     if (result != end_execution_reason_t::ok) return result;
 
     switch (specific_statement.type) {
-        case symbol_not_statement: {
-            result = this->populate_not_process(job, proc, {&tree(), &specific_statement});
+        case type_t::not_statement: {
+            result =
+                this->populate_not_process(job, proc, *specific_statement.as<not_statement_t>());
             break;
         }
-        case symbol_block_statement:
-            result = this->populate_block_process(
-                job, proc, statement, tnode_t<g::block_statement>(&tree(), &specific_statement));
+        case type_t::block_statement:
+            result = this->populate_block_process(job, proc, statement,
+                                                  *specific_statement.as<block_statement_t>());
             break;
-        case symbol_if_statement:
-            result = this->populate_block_process(
-                job, proc, statement, tnode_t<g::if_statement>(&tree(), &specific_statement));
+        case type_t::if_statement:
+            result = this->populate_block_process(job, proc, statement,
+                                                  *specific_statement.as<if_statement_t>());
             break;
-        case symbol_switch_statement:
-            result = this->populate_block_process(
-                job, proc, statement, tnode_t<g::switch_statement>(&tree(), &specific_statement));
+        case type_t::switch_statement:
+            result = this->populate_block_process(job, proc, statement,
+                                                  *specific_statement.as<switch_statement_t>());
             break;
-        case symbol_decorated_statement: {
-            // Get the plain statement. It will pull out the decoration itself.
-            tnode_t<g::decorated_statement> dec_stat{&tree(), &specific_statement};
-            auto plain_statement = dec_stat.find_child<g::plain_statement>();
-            result = this->populate_plain_process(job, proc, plain_statement);
+        case type_t::decorated_statement: {
+            result = this->populate_plain_process(job, proc,
+                                                  *specific_statement.as<decorated_statement_t>());
             break;
         }
         default: {
@@ -1073,47 +1143,36 @@ end_execution_reason_t parse_execution_context_t::populate_job_process(
 }
 
 end_execution_reason_t parse_execution_context_t::populate_job_from_job_node(
-    job_t *j, tnode_t<grammar::job> job_node, const block_t *associated_block) {
+    job_t *j, const ast::job_t &job_node, const block_t *associated_block) {
     UNUSED(associated_block);
 
     // Tell the job what its command is.
     j->set_command(get_source(job_node));
 
-    // We are going to construct process_t structures for every statement in the job. Get the first
-    // statement.
-    tnode_t<g::optional_time> optional_time = job_node.child<0>();
-    tnode_t<g::variable_assignments> variable_assignments = job_node.child<1>();
-    tnode_t<g::statement> statement = job_node.child<2>();
-
+    // We are going to construct process_t structures for every statement in the job.
     // Create processes. Each one may fail.
     process_list_t processes;
     processes.emplace_back(new process_t());
-    if (optional_time.tag() == parse_optional_time_time) {
+    if (job_node.time) {
         j->mut_flags().has_time_prefix = true;
-        if (job_node_is_background(job_node)) {
+        if (job_node.bg) {
             return this->report_error(STATUS_INVALID_ARGS, job_node, ERROR_TIME_BACKGROUND);
         }
     }
-    end_execution_reason_t result =
-        this->populate_job_process(j, processes.back().get(), statement, variable_assignments);
+    end_execution_reason_t result = this->populate_job_process(
+        j, processes.back().get(), job_node.statement, job_node.variables);
 
-    // Construct process_ts for job continuations (pipelines), by walking the list until we hit the
-    // terminal (empty) job continuation.
-    tnode_t<g::job_continuation> job_cont = job_node.child<3>();
-    assert(job_cont);
-    while (auto pipe = job_cont.try_get_child<g::tok_pipe, 0>()) {
+    // Construct process_ts for job continuations (pipelines).
+    for (const ast::job_continuation_t &jc : job_node.continuation) {
         if (result != end_execution_reason_t::ok) {
             break;
         }
-        auto variable_assignments = job_cont.require_get_child<g::variable_assignments, 2>();
-        auto statement = job_cont.require_get_child<g::statement, 3>();
-
         // Handle the pipe, whose fd may not be the obvious stdout.
-        auto parsed_pipe = pipe_or_redir_t::from_string(get_source(pipe));
+        auto parsed_pipe = pipe_or_redir_t::from_string(get_source(jc.pipe));
         assert(parsed_pipe.has_value() && parsed_pipe->is_pipe && "Failed to parse valid pipe");
         if (!parsed_pipe->is_valid()) {
-            result = report_error(STATUS_INVALID_ARGS, pipe, ILLEGAL_FD_ERR_MSG,
-                                  get_source(pipe).c_str());
+            result = report_error(STATUS_INVALID_ARGS, jc.pipe, ILLEGAL_FD_ERR_MSG,
+                                  get_source(jc.pipe).c_str());
             break;
         }
         processes.back()->pipe_write_fd = parsed_pipe->fd;
@@ -1127,12 +1186,7 @@ end_execution_reason_t parse_execution_context_t::populate_job_from_job_node(
 
         // Store the new process (and maybe with an error).
         processes.emplace_back(new process_t());
-        result =
-            this->populate_job_process(j, processes.back().get(), statement, variable_assignments);
-
-        // Get the next continuation.
-        job_cont = job_cont.require_get_child<g::job_continuation, 4>();
-        assert(job_cont);
+        result = this->populate_job_process(j, processes.back().get(), jc.statement, jc.variables);
     }
 
     // Inform our processes of who is first and last
@@ -1158,7 +1212,7 @@ static bool remove_job(parser_t &parser, job_t *job) {
     return false;
 }
 
-end_execution_reason_t parse_execution_context_t::run_1_job(tnode_t<g::job> job_node,
+end_execution_reason_t parse_execution_context_t::run_1_job(const ast::job_t &job_node,
                                                             const block_t *associated_block) {
     if (auto ret = check_end_execution()) {
         return *ret;
@@ -1180,7 +1234,7 @@ end_execution_reason_t parse_execution_context_t::run_1_job(tnode_t<g::job> job_
     scoped_push<int> saved_eval_level(&parser->eval_level, parser->eval_level + 1);
 
     // Save the node index.
-    scoped_push<tnode_t<grammar::job>> saved_node(&executing_job_node, job_node);
+    scoped_push<const ast::job_t *> saved_node(&executing_job_node, &job_node);
 
     // Profiling support.
     long long start_time = 0, parse_time = 0, exec_time = 0;
@@ -1194,34 +1248,33 @@ end_execution_reason_t parse_execution_context_t::run_1_job(tnode_t<g::job> job_
     // However, if there are no redirections, then we can just jump into the block directly, which
     // is significantly faster.
     if (job_is_simple_block(job_node)) {
-        tnode_t<g::optional_time> optional_time = job_node.child<0>();
+        bool do_time = job_node.time.has_value();
         // If no-exec has been given, there is nothing to time.
-        cleanup_t timer = push_timer(optional_time.tag() == parse_optional_time_time && !no_exec());
-        tnode_t<g::variable_assignments> variable_assignments = job_node.child<1>();
+        cleanup_t timer = push_timer(do_time && !no_exec());
         const block_t *block = nullptr;
         end_execution_reason_t result =
-            this->apply_variable_assignments(nullptr, variable_assignments, &block);
+            this->apply_variable_assignments(nullptr, job_node.variables, &block);
         cleanup_t scope([&]() {
             if (block) parser->pop_block(block);
         });
 
-        tnode_t<g::statement> statement = job_node.child<2>();
-        const parse_node_t &specific_statement = statement.get_child_node<0>();
-        assert(specific_statement_type_is_redirectable_block(specific_statement));
+        const ast::node_t *specific_statement = job_node.statement.contents.get();
+        assert(specific_statement_type_is_redirectable_block(*specific_statement));
         if (result == end_execution_reason_t::ok) {
-            switch (specific_statement.type) {
-                case symbol_block_statement: {
-                    result =
-                        this->run_block_statement({&tree(), &specific_statement}, associated_block);
+            switch (specific_statement->type) {
+                case ast::type_t::block_statement: {
+                    result = this->run_block_statement(
+                        *specific_statement->as<ast::block_statement_t>(), associated_block);
                     break;
                 }
-                case symbol_if_statement: {
-                    result =
-                        this->run_if_statement({&tree(), &specific_statement}, associated_block);
+                case ast::type_t::if_statement: {
+                    result = this->run_if_statement(*specific_statement->as<ast::if_statement_t>(),
+                                                    associated_block);
                     break;
                 }
-                case symbol_switch_statement: {
-                    result = this->run_switch_statement({&tree(), &specific_statement});
+                case ast::type_t::switch_statement: {
+                    result = this->run_switch_statement(
+                        *specific_statement->as<ast::switch_statement_t>());
                     break;
                 }
                 default: {
@@ -1240,8 +1293,8 @@ end_execution_reason_t parse_execution_context_t::run_1_job(tnode_t<g::job> job_
             profile_item->level = parser->eval_level;
             profile_item->parse = 0;
             profile_item->exec = static_cast<int>(exec_time - start_time);
-            profile_item->cmd = profiling_cmd_name_for_redirectable_block(
-                specific_statement, this->tree(), this->pstree->src);
+            profile_item->cmd =
+                profiling_cmd_name_for_redirectable_block(*specific_statement, *this->pstree);
             profile_item->skipped = false;
         }
 
@@ -1258,7 +1311,7 @@ end_execution_reason_t parse_execution_context_t::run_1_job(tnode_t<g::job> job_
 
     job_t::properties_t props{};
     props.wants_terminal = wants_job_control && !ld.is_event;
-    props.initial_background = job_node_is_background(job_node);
+    props.initial_background = job_node.bg.has_value();
     props.skip_notification =
         ld.is_subshell || ld.is_block || ld.is_event || !parser->is_interactive();
     props.from_event_handler = ld.is_event;
@@ -1329,29 +1382,36 @@ end_execution_reason_t parse_execution_context_t::run_1_job(tnode_t<g::job> job_
 }
 
 end_execution_reason_t parse_execution_context_t::run_job_conjunction(
-    tnode_t<grammar::job_conjunction> job_expr, const block_t *associated_block) {
-    end_execution_reason_t result = end_execution_reason_t::ok;
-    tnode_t<g::job_conjunction> cursor = job_expr;
-    // continuation is the parent of the cursor
-    tnode_t<g::job_conjunction_continuation> continuation;
-    while (cursor) {
-        if (auto reason = check_end_execution()) {
-            result = *reason;
-            break;
+    const ast::job_conjunction_t &job_expr, const block_t *associated_block) {
+    if (auto reason = check_end_execution()) {
+        return *reason;
+    }
+    end_execution_reason_t result = run_1_job(job_expr.job, associated_block);
+
+    for (const ast::job_conjunction_continuation_t &jc : job_expr.continuations) {
+        if (result != end_execution_reason_t::ok) {
+            return result;
         }
+        if (auto reason = check_end_execution()) {
+            return *reason;
+        }
+        // Check the conjunction type.
         bool skip = false;
-        if (continuation) {
-            // Check the conjunction type.
-            parse_job_decoration_t conj = bool_statement_type(continuation);
-            assert((conj == parse_job_decoration_and || conj == parse_job_decoration_or) &&
-                   "Unexpected conjunction");
-            skip = should_skip(conj);
+        switch (jc.conjunction.type) {
+            case parse_token_type_andand:
+                // AND. Skip if the last job failed.
+                skip = parser->get_last_status() != 0;
+                break;
+            case parse_token_type_oror:
+                // OR. Skip if the last job succeeded.
+                skip = parser->get_last_status() == 0;
+                break;
+            default:
+                DIE("Unexpected job conjunction type");
         }
         if (!skip) {
-            result = run_1_job(cursor.child<0>(), associated_block);
+            result = run_1_job(jc.job, associated_block);
         }
-        continuation = cursor.child<1>();
-        cursor = continuation.try_get_child<g::job_conjunction, 2>();
     }
     return result;
 }
@@ -1369,66 +1429,86 @@ bool parse_execution_context_t::should_skip(parse_job_decoration_t type) const {
     }
 }
 
-template <typename Type>
-end_execution_reason_t parse_execution_context_t::run_job_list(tnode_t<Type> job_list,
-                                                               const block_t *associated_block) {
-    // We handle both job_list and andor_job_list uniformly.
-    static_assert(Type::token == symbol_job_list || Type::token == symbol_andor_job_list,
-                  "Not a job list");
-
-    end_execution_reason_t result = end_execution_reason_t::ok;
-    while (auto job_conj = job_list.template next_in_list<g::job_conjunction>()) {
-        if (auto reason = check_end_execution()) {
-            result = *reason;
-            break;
-        }
-
-        // Maybe skip the job if it has a leading and/or.
-        // Skipping is treated as success.
-        if (should_skip(get_decorator(job_conj))) {
-            result = end_execution_reason_t::ok;
-        } else {
-            result = this->run_job_conjunction(job_conj, associated_block);
+end_execution_reason_t parse_execution_context_t::test_and_run_1_job_conjunction(
+    const ast::job_conjunction_t &jc, const block_t *associated_block) {
+    // Test this job conjunction if it has an 'and' or 'or' decorator.
+    // If it passes, then run it.
+    if (auto reason = check_end_execution()) {
+        return *reason;
+    }
+    // Maybe skip the job if it has a leading and/or.
+    bool skip = false;
+    if (jc.decorator.has_value()) {
+        switch (jc.decorator->kw) {
+            case parse_keyword_t::kw_and:
+                // AND. Skip if the last job failed.
+                skip = parser->get_last_status() != 0;
+                break;
+            case parse_keyword_t::kw_or:
+                // OR. Skip if the last job succeeded.
+                skip = parser->get_last_status() == 0;
+                break;
+            default:
+                DIE("Unexpected keyword");
         }
     }
+    // Skipping is treated as success.
+    if (skip) {
+        return end_execution_reason_t::ok;
+    } else {
+        return this->run_job_conjunction(jc, associated_block);
+    }
+}
 
+end_execution_reason_t parse_execution_context_t::run_job_list(const ast::job_list_t &job_list_node,
+                                                               const block_t *associated_block) {
+    auto result = end_execution_reason_t::ok;
+    for (const ast::job_conjunction_t &jc : job_list_node) {
+        result = test_and_run_1_job_conjunction(jc, associated_block);
+    }
     // Returns the result of the last job executed or skipped.
     return result;
 }
 
-end_execution_reason_t parse_execution_context_t::eval_node(tnode_t<g::statement> statement,
+end_execution_reason_t parse_execution_context_t::run_job_list(
+    const ast::andor_job_list_t &job_list_node, const block_t *associated_block) {
+    auto result = end_execution_reason_t::ok;
+    for (const ast::andor_job_t &aoj : job_list_node) {
+        result = test_and_run_1_job_conjunction(aoj.job, associated_block);
+    }
+    // Returns the result of the last job executed or skipped.
+    return result;
+}
+
+end_execution_reason_t parse_execution_context_t::eval_node(const ast::statement_t &statement,
                                                             const block_t *associated_block) {
-    assert(statement && "Empty node in eval_node");
-    assert(statement.matches_node_tree(tree()) && "statement has unexpected tree");
+    // Note we only expect block-style statements here. No not statements.
     enum end_execution_reason_t status = end_execution_reason_t::ok;
-    if (auto block = statement.try_get_child<g::block_statement, 0>()) {
-        status = this->run_block_statement(block, associated_block);
-    } else if (auto ifstat = statement.try_get_child<g::if_statement, 0>()) {
-        status = this->run_if_statement(ifstat, associated_block);
-    } else if (auto switchstat = statement.try_get_child<g::switch_statement, 0>()) {
-        status = this->run_switch_statement(switchstat);
+    const ast::node_t *contents = statement.contents.get();
+    if (const auto *block = contents->try_as<ast::block_statement_t>()) {
+        status = this->run_block_statement(*block, associated_block);
+    } else if (const auto *ifstat = contents->try_as<ast::if_statement_t>()) {
+        status = this->run_if_statement(*ifstat, associated_block);
+    } else if (const auto *switchstat = contents->try_as<ast::switch_statement_t>()) {
+        status = this->run_switch_statement(*switchstat);
     } else {
-        FLOGF(error, L"Unexpected node %ls found in %s", statement.node()->describe().c_str(),
+        FLOGF(error, L"Unexpected node %ls found in %s", statement.describe().c_str(),
               __FUNCTION__);
         abort();
     }
     return status;
 }
 
-end_execution_reason_t parse_execution_context_t::eval_node(tnode_t<g::job_list> job_list,
+end_execution_reason_t parse_execution_context_t::eval_node(const ast::job_list_t &job_list,
                                                             const block_t *associated_block) {
-    // Apply this block IO for the duration of this function.
-    assert(job_list && "Empty node in eval_node");
-    assert(job_list.matches_node_tree(tree()) && "job_list has unexpected tree");
     assert(associated_block && "Null block");
 
     // Check for infinite recursion: a function which immediately calls itself..
     wcstring func_name;
-    auto infinite_recursive_node =
-        this->infinite_recursive_statement_in_job_list(job_list, &func_name);
-    if (infinite_recursive_node) {
+    if (const auto *infinite_recursive_node =
+            this->infinite_recursive_statement_in_job_list(job_list, &func_name)) {
         // We have an infinite recursion.
-        return this->report_error(STATUS_CMD_ERROR, infinite_recursive_node,
+        return this->report_error(STATUS_CMD_ERROR, *infinite_recursive_node,
                                   INFINITE_FUNC_RECURSION_ERR_MSG, func_name.c_str());
     }
 
@@ -1439,14 +1519,14 @@ end_execution_reason_t parse_execution_context_t::eval_node(tnode_t<g::job_list>
     return this->run_job_list(job_list, associated_block);
 }
 
-int parse_execution_context_t::line_offset_of_node(tnode_t<g::job> node) {
+int parse_execution_context_t::line_offset_of_node(const ast::job_t *node) {
     // If we're not executing anything, return -1.
     if (!node) {
         return -1;
     }
 
     // If for some reason we're executing a node without source, return -1.
-    auto range = node.source_range();
+    auto range = node->try_source_range();
     if (!range) {
         return -1;
     }
@@ -1501,7 +1581,7 @@ int parse_execution_context_t::get_current_line_number() {
 int parse_execution_context_t::get_current_source_offset() const {
     int result = -1;
     if (executing_job_node) {
-        if (auto range = executing_job_node.source_range()) {
+        if (auto range = executing_job_node->try_source_range()) {
             result = static_cast<int>(range->start);
         }
     }
diff --git a/src/parse_execution.h b/src/parse_execution.h
index 8ac778a0f..fe91ba902 100644
--- a/src/parse_execution.h
+++ b/src/parse_execution.h
@@ -4,6 +4,7 @@
 
 #include <stddef.h>
 
+#include "ast.h"
 #include "common.h"
 #include "io.h"
 #include "parse_constants.h"
@@ -38,7 +39,7 @@ class parse_execution_context_t {
     const operation_context_t &ctx;
 
     // The currently executing job node, used to indicate the line number.
-    tnode_t<grammar::job> executing_job_node{};
+    const ast::job_t *executing_job_node{};
 
     // Cached line number information.
     size_t cached_lineno_offset = 0;
@@ -59,88 +60,94 @@ class parse_execution_context_t {
 
     // Report an error, setting $status to \p status. Always returns
     // 'end_execution_reason_t::error'.
-    end_execution_reason_t report_error(int status, const parse_node_t &node, const wchar_t *fmt,
+    end_execution_reason_t report_error(int status, const ast::node_t &node, const wchar_t *fmt,
                                         ...) const;
     end_execution_reason_t report_errors(int status, const parse_error_list_t &error_list) const;
 
     /// Command not found support.
     end_execution_reason_t handle_command_not_found(const wcstring &cmd,
-                                                    tnode_t<grammar::plain_statement> statement,
+                                                    const ast::decorated_statement_t &statement,
                                                     int err_code);
 
     // Utilities
-    wcstring get_source(const parse_node_t &node) const;
-    tnode_t<grammar::plain_statement> infinite_recursive_statement_in_job_list(
-        tnode_t<grammar::job_list> job_list, wcstring *out_func_name) const;
+    wcstring get_source(const ast::node_t &node) const;
+    const ast::decorated_statement_t *infinite_recursive_statement_in_job_list(
+        const ast::job_list_t &job_list, wcstring *out_func_name) const;
 
     // Expand a command which may contain variables, producing an expand command and possibly
     // arguments. Prints an error message on error.
-    end_execution_reason_t expand_command(tnode_t<grammar::plain_statement> statement,
+    end_execution_reason_t expand_command(const ast::decorated_statement_t &statement,
                                           wcstring *out_cmd, wcstring_list_t *out_args) const;
 
     /// Return whether we should skip a job with the given bool statement type.
     bool should_skip(parse_job_decoration_t type) const;
 
     /// Indicates whether a job is a simple block (one block, no redirections).
-    bool job_is_simple_block(tnode_t<grammar::job> job) const;
+    bool job_is_simple_block(const ast::job_t &job) const;
 
-    enum process_type_t process_type_for_command(tnode_t<grammar::plain_statement> statement,
+    enum process_type_t process_type_for_command(const ast::decorated_statement_t &statement,
                                                  const wcstring &cmd) const;
     end_execution_reason_t apply_variable_assignments(
-        process_t *proc, tnode_t<grammar::variable_assignments> variable_assignments,
+        process_t *proc, const ast::variable_assignment_list_t &variable_assignments,
         const block_t **block);
 
     // These create process_t structures from statements.
     end_execution_reason_t populate_job_process(
-        job_t *job, process_t *proc, tnode_t<grammar::statement> statement,
-        tnode_t<grammar::variable_assignments> variable_assignments);
+        job_t *job, process_t *proc, const ast::statement_t &statement,
+        const ast::variable_assignment_list_t &variable_assignments_list_t);
     end_execution_reason_t populate_not_process(job_t *job, process_t *proc,
-                                                tnode_t<grammar::not_statement> not_statement);
+                                                const ast::not_statement_t &not_statement);
     end_execution_reason_t populate_plain_process(job_t *job, process_t *proc,
-                                                  tnode_t<grammar::plain_statement> statement);
+                                                  const ast::decorated_statement_t &statement);
 
     template <typename Type>
     end_execution_reason_t populate_block_process(job_t *job, process_t *proc,
-                                                  tnode_t<grammar::statement> statement,
-                                                  tnode_t<Type> specific_statement);
+                                                  const ast::statement_t &statement,
+                                                  const Type &specific_statement);
 
     // These encapsulate the actual logic of various (block) statements.
-    end_execution_reason_t run_block_statement(tnode_t<grammar::block_statement> statement,
+    end_execution_reason_t run_block_statement(const ast::block_statement_t &statement,
                                                const block_t *associated_block);
-    end_execution_reason_t run_for_statement(tnode_t<grammar::for_header> header,
-                                             tnode_t<grammar::job_list> contents);
-    end_execution_reason_t run_if_statement(tnode_t<grammar::if_statement> statement,
+    end_execution_reason_t run_for_statement(const ast::for_header_t &header,
+                                             const ast::job_list_t &contents);
+    end_execution_reason_t run_if_statement(const ast::if_statement_t &statement,
                                             const block_t *associated_block);
-    end_execution_reason_t run_switch_statement(tnode_t<grammar::switch_statement> statement);
-    end_execution_reason_t run_while_statement(tnode_t<grammar::while_header> header,
-                                               tnode_t<grammar::job_list> contents,
+    end_execution_reason_t run_switch_statement(const ast::switch_statement_t &statement);
+    end_execution_reason_t run_while_statement(const ast::while_header_t &header,
+                                               const ast::job_list_t &contents,
                                                const block_t *associated_block);
-    end_execution_reason_t run_function_statement(tnode_t<grammar::block_statement> statement,
-                                                  tnode_t<grammar::function_header> header);
-    end_execution_reason_t run_begin_statement(tnode_t<grammar::job_list> contents);
+    end_execution_reason_t run_function_statement(const ast::block_statement_t &statement,
+                                                  const ast::function_header_t &header);
+    end_execution_reason_t run_begin_statement(const ast::job_list_t &contents);
 
     enum globspec_t { failglob, nullglob };
-    using argument_node_list_t = std::vector<tnode_t<grammar::argument>>;
-    end_execution_reason_t expand_arguments_from_nodes(const argument_node_list_t &argument_nodes,
+    using ast_args_list_t = std::vector<const ast::argument_t *>;
+
+    static ast_args_list_t get_argument_nodes(const ast::argument_list_t &args);
+    static ast_args_list_t get_argument_nodes(const ast::argument_or_redirection_list_t &args);
+
+    end_execution_reason_t expand_arguments_from_nodes(const ast_args_list_t &argument_nodes,
                                                        wcstring_list_t *out_arguments,
                                                        globspec_t glob_behavior);
 
     // Determines the list of redirections for a node.
-    end_execution_reason_t determine_redirections(
-        tnode_t<grammar::arguments_or_redirections_list> node,
-        redirection_spec_list_t *out_redirections);
+    end_execution_reason_t determine_redirections(const ast::argument_or_redirection_list_t &list,
+                                                  redirection_spec_list_t *out_redirections);
 
-    end_execution_reason_t run_1_job(tnode_t<grammar::job> job, const block_t *associated_block);
-    end_execution_reason_t run_job_conjunction(tnode_t<grammar::job_conjunction> job_expr,
+    end_execution_reason_t run_1_job(const ast::job_t &job, const block_t *associated_block);
+    end_execution_reason_t test_and_run_1_job_conjunction(const ast::job_conjunction_t &jc,
+                                                          const block_t *associated_block);
+    end_execution_reason_t run_job_conjunction(const ast::job_conjunction_t &job_expr,
                                                const block_t *associated_block);
-    template <typename Type>
-    end_execution_reason_t run_job_list(tnode_t<Type> job_list_node,
+    end_execution_reason_t run_job_list(const ast::job_list_t &job_list_node,
                                         const block_t *associated_block);
-    end_execution_reason_t populate_job_from_job_node(job_t *j, tnode_t<grammar::job> job_node,
+    end_execution_reason_t run_job_list(const ast::andor_job_list_t &job_list_node,
+                                        const block_t *associated_block);
+    end_execution_reason_t populate_job_from_job_node(job_t *j, const ast::job_t &job_node,
                                                       const block_t *associated_block);
 
     // Returns the line number of the node. Not const since it touches cached_lineno_offset.
-    int line_offset_of_node(tnode_t<grammar::job> node);
+    int line_offset_of_node(const ast::job_t *node);
     int line_offset_of_character_at_offset(size_t offset);
 
    public:
@@ -159,14 +166,14 @@ class parse_execution_context_t {
     /// Returns the source string.
     const wcstring &get_source() const { return pstree->src; }
 
-    /// Return the parse tree.
-    const parse_node_tree_t &tree() const { return pstree->tree; }
+    /// Return the parsed ast.
+    const ast::ast_t &ast() const { return *pstree->ast; }
 
     /// Start executing at the given node. Returns 0 if there was no error, 1 if there was an
     /// error.
-    end_execution_reason_t eval_node(tnode_t<grammar::statement> statement,
+    end_execution_reason_t eval_node(const ast::statement_t &statement,
                                      const block_t *associated_block);
-    end_execution_reason_t eval_node(tnode_t<grammar::job_list> job_list,
+    end_execution_reason_t eval_node(const ast::job_list_t &job_list,
                                      const block_t *associated_block);
 };
 
diff --git a/src/parse_tree.cpp b/src/parse_tree.cpp
index b3eadcd8d..8337800b7 100644
--- a/src/parse_tree.cpp
+++ b/src/parse_tree.cpp
@@ -1214,11 +1214,19 @@ const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, nod
     return result;
 }
 
+parsed_source_t::parsed_source_t(wcstring s, ast::ast_t &&ast)
+    : src(std::move(s)), ast(make_unique<ast::ast_t>(std::move(ast))) {}
+
+parsed_source_t::~parsed_source_t() = default;
+
 parsed_source_ref_t parse_source(wcstring src, parse_tree_flags_t flags,
                                  parse_error_list_t *errors) {
-    parse_node_tree_t tree;
-    if (!parse_tree_from_string(src, flags, &tree, errors, symbol_job_list)) return {};
-    return std::make_shared<parsed_source_t>(std::move(src), std::move(tree));
+    using namespace ast;
+    ast_t ast = ast_t::parse(src, flags, errors);
+    if (ast.errored() && !(flags & parse_flag_continue_after_error)) {
+        return nullptr;
+    }
+    return std::make_shared<parsed_source_t>(std::move(src), std::move(ast));
 }
 
 const parse_node_t &parse_node_tree_t::find_child(const parse_node_t &parent,
diff --git a/src/parse_tree.h b/src/parse_tree.h
index 8f8d54f74..5e10d9e1a 100644
--- a/src/parse_tree.h
+++ b/src/parse_tree.h
@@ -206,19 +206,26 @@ bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t flags,
                             parse_node_tree_t *output, parse_error_list_t *errors,
                             parse_token_type_t goal = symbol_job_list);
 
+namespace ast {
+class ast_t;
+}
+
 /// A type wrapping up a parse tree and the original source behind it.
 struct parsed_source_t {
     wcstring src;
-    parse_node_tree_t tree;
+    std::unique_ptr<ast::ast_t> ast;
 
-    parsed_source_t(wcstring s, parse_node_tree_t t) : src(std::move(s)), tree(std::move(t)) {}
+    parsed_source_t(wcstring s, ast::ast_t &&ast);
+    ~parsed_source_t();
 
     parsed_source_t(const parsed_source_t &) = delete;
     void operator=(const parsed_source_t &) = delete;
-    parsed_source_t(parsed_source_t &&) = default;
-    parsed_source_t &operator=(parsed_source_t &&) = default;
+    parsed_source_t(parsed_source_t &&) = delete;
+    parsed_source_t &operator=(parsed_source_t &&) = delete;
 };
+
 /// Return a shared pointer to parsed_source_t, or null on failure.
+/// If parse_flag_continue_after_error is not set, this will return null on any error.
 using parsed_source_ref_t = std::shared_ptr<const parsed_source_t>;
 parsed_source_ref_t parse_source(wcstring src, parse_tree_flags_t flags,
                                  parse_error_list_t *errors);
diff --git a/src/parse_util.cpp b/src/parse_util.cpp
index 8a74c905c..4ee438946 100644
--- a/src/parse_util.cpp
+++ b/src/parse_util.cpp
@@ -1311,9 +1311,9 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
         *out_errors = std::move(parse_errors);
     }
 
+    // \return the ast to our caller if requested.
     if (out_pstree != nullptr) {
-        // TODO: legacy
-        *out_pstree = parse_source(buff_src, parse_flags, nullptr);
+        *out_pstree = std::make_shared<parsed_source_t>(buff_src, std::move(ast));
     }
 
     return res;
diff --git a/src/parser.cpp b/src/parser.cpp
index c48ec3868..12f670548 100644
--- a/src/parser.cpp
+++ b/src/parser.cpp
@@ -656,10 +656,10 @@ eval_res_t parser_t::eval(const wcstring &cmd, const io_chain_t &io,
 eval_res_t parser_t::eval(const parsed_source_ref_t &ps, const io_chain_t &io,
                           const job_group_ref_t &job_group, enum block_type_t block_type) {
     assert(block_type == block_type_t::top || block_type == block_type_t::subst);
-    if (!ps->tree.empty()) {
-        // Execute the first node.
-        tnode_t<grammar::job_list> start{&ps->tree, &ps->tree.front()};
-        return this->eval_node(ps, start, io, job_group, block_type);
+    const auto *job_list = ps->ast->top()->as<ast::job_list_t>();
+    if (!job_list->empty()) {
+        // Execute the top job list.
+        return this->eval_node(ps, *job_list, io, job_group, block_type);
     } else {
         auto status = proc_status_t::from_exit_code(get_last_status());
         bool break_expand = false;
@@ -669,11 +669,11 @@ eval_res_t parser_t::eval(const parsed_source_ref_t &ps, const io_chain_t &io,
 }
 
 template <typename T>
-eval_res_t parser_t::eval_node(const parsed_source_ref_t &ps, tnode_t<T> node,
+eval_res_t parser_t::eval_node(const parsed_source_ref_t &ps, const T &node,
                                const io_chain_t &block_io, const job_group_ref_t &job_group,
                                block_type_t block_type) {
     static_assert(
-        std::is_same<T, grammar::statement>::value || std::is_same<T, grammar::job_list>::value,
+        std::is_same<T, ast::statement_t>::value || std::is_same<T, ast::job_list_t>::value,
         "Unexpected node type");
     // Handle cancellation requests. If our block stack is currently empty, then we already did
     // successfully cancel (or there was nothing to cancel); clear the flag. If our block stack is
@@ -725,9 +725,9 @@ eval_res_t parser_t::eval_node(const parsed_source_ref_t &ps, tnode_t<T> node,
 }
 
 // Explicit instantiations. TODO: use overloads instead?
-template eval_res_t parser_t::eval_node(const parsed_source_ref_t &, tnode_t<grammar::statement>,
+template eval_res_t parser_t::eval_node(const parsed_source_ref_t &, const ast::statement_t &,
                                         const io_chain_t &, const job_group_ref_t &, block_type_t);
-template eval_res_t parser_t::eval_node(const parsed_source_ref_t &, tnode_t<grammar::job_list>,
+template eval_res_t parser_t::eval_node(const parsed_source_ref_t &, const ast::job_list_t &,
                                         const io_chain_t &, const job_group_ref_t &, block_type_t);
 
 void parser_t::get_backtrace(const wcstring &src, const parse_error_list_t &errors,
diff --git a/src/parser.h b/src/parser.h
index be8b001aa..4987c706d 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -300,9 +300,9 @@ class parser_t : public std::enable_shared_from_this<parser_t> {
                     block_type_t block_type = block_type_t::top);
 
     /// Evaluates a node.
-    /// The node type must be grammar::statement or grammar::job_list.
+    /// The node type must be ast_t::statement_t or ast::job_list_t.
     template <typename T>
-    eval_res_t eval_node(const parsed_source_ref_t &ps, tnode_t<T> node, const io_chain_t &block_io,
+    eval_res_t eval_node(const parsed_source_ref_t &ps, const T &node, const io_chain_t &block_io,
                          const job_group_ref_t &job_group,
                          block_type_t block_type = block_type_t::top);
 
diff --git a/src/proc.h b/src/proc.h
index eaae95e1c..c17dc216a 100644
--- a/src/proc.h
+++ b/src/proc.h
@@ -44,6 +44,10 @@ enum class job_control_t {
     none,
 };
 
+namespace ast {
+struct statement_t;
+}
+
 /// A proc_status_t is a value type that encapsulates logic around exited vs stopped vs signaled,
 /// etc.
 class proc_status_t {
@@ -261,10 +265,10 @@ class process_t {
     /// Type of process.
     process_type_t type{process_type_t::external};
 
-    /// For internal block processes only, the node offset of the statement.
+    /// For internal block processes only, the node of the statement.
     /// This is always either block, ifs, or switchs, never boolean or decorated.
     parsed_source_ref_t block_node_source{};
-    tnode_t<grammar::statement> internal_block_node{};
+    const ast::statement_t *internal_block_node{};
 
     struct concrete_assignment {
         wcstring variable_name;

From 0c22f67bdee555c3a1f5a18bb546ab2de4085c86 Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Thu, 2 Jul 2020 14:51:45 -0700
Subject: [PATCH 13/13] Remove the old parser bits

Now that everything has been migrated to the new AST, remove as much of
the parse_tree bits as possible
---
 CMakeLists.txt                 |   4 +-
 src/complete.cpp               |   1 -
 src/fish_indent.cpp            |   1 -
 src/fish_tests.cpp             |   1 -
 src/flog.h                     |   3 -
 src/function.h                 |   1 -
 src/history.cpp                |   1 -
 src/parse_constants.h          |  72 +--
 src/parse_execution.cpp        |  22 +-
 src/parse_execution.h          |   5 +-
 src/parse_grammar.h            | 401 --------------
 src/parse_grammar_elements.inc |  37 --
 src/parse_productions.cpp      | 466 ----------------
 src/parse_productions.h        |  50 --
 src/parse_tree.cpp             | 956 +--------------------------------
 src/parse_tree.h               | 132 -----
 src/parse_util.cpp             |   2 -
 src/parse_util.h               |   1 -
 src/parser.cpp                 |   1 -
 src/proc.h                     |   1 -
 src/reader.cpp                 |   1 -
 src/tnode.cpp                  | 152 ------
 src/tnode.h                    | 278 ----------
 23 files changed, 14 insertions(+), 2575 deletions(-)
 delete mode 100644 src/parse_grammar.h
 delete mode 100644 src/parse_grammar_elements.inc
 delete mode 100644 src/parse_productions.cpp
 delete mode 100644 src/parse_productions.h
 delete mode 100644 src/tnode.cpp
 delete mode 100644 src/tnode.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a360bde47..1d0430429 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -114,10 +114,10 @@ set(FISH_SRCS
     src/fallback.cpp src/fish_version.cpp src/function.cpp src/highlight.cpp
     src/history.cpp src/history_file.cpp src/input.cpp src/input_common.cpp src/intern.cpp
     src/io.cpp src/iothread.cpp src/kill.cpp src/output.cpp src/pager.cpp
-    src/parse_execution.cpp src/parse_productions.cpp src/parse_tree.cpp
+    src/parse_execution.cpp src/parse_tree.cpp
     src/parse_util.cpp src/parser.cpp src/parser_keywords.cpp src/path.cpp
     src/postfork.cpp src/proc.cpp src/reader.cpp src/sanity.cpp src/screen.cpp
-    src/signal.cpp src/tinyexpr.cpp src/tnode.cpp src/tokenizer.cpp src/utf8.cpp src/util.cpp
+    src/signal.cpp src/tinyexpr.cpp src/tokenizer.cpp src/utf8.cpp src/util.cpp
     src/wcstringutil.cpp src/wgetopt.cpp src/wildcard.cpp src/wutil.cpp
     src/future_feature_flags.cpp src/redirection.cpp src/topic_monitor.cpp
     src/flog.cpp src/trace.cpp src/timer.cpp src/null_terminated_array.cpp
diff --git a/src/complete.cpp b/src/complete.cpp
index 8f694512f..ba245581a 100644
--- a/src/complete.cpp
+++ b/src/complete.cpp
@@ -45,7 +45,6 @@
 #include "path.h"
 #include "proc.h"
 #include "reader.h"
-#include "tnode.h"
 #include "util.h"
 #include "wcstringutil.h"
 #include "wildcard.h"
diff --git a/src/fish_indent.cpp b/src/fish_indent.cpp
index 491f99e49..e52625152 100644
--- a/src/fish_indent.cpp
+++ b/src/fish_indent.cpp
@@ -46,7 +46,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
 #include "parse_constants.h"
 #include "parse_util.h"
 #include "print_help.h"
-#include "tnode.h"
 #include "wutil.h"  // IWYU pragma: keep
 
 // The number of spaces per indent isn't supposed to be configurable.
diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp
index 5aa171a8b..d4f094910 100644
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@@ -76,7 +76,6 @@
 #include "signal.h"
 #include "termsize.h"
 #include "timer.h"
-#include "tnode.h"
 #include "tokenizer.h"
 #include "topic_monitor.h"
 #include "utf8.h"
diff --git a/src/flog.h b/src/flog.h
index dcf5fd3cf..eed0c0771 100644
--- a/src/flog.h
+++ b/src/flog.h
@@ -64,9 +64,6 @@ class category_list_t {
     category_t exec_fork{L"exec-fork", L"Calls to fork()"};
 
     category_t output_invalid{L"output-invalid", L"Trying to print invalid output"};
-    category_t parse_productions{L"parse-productions", L"Resolving tokens"};
-    category_t parse_productions_chatty{L"parse-productions-chatty",
-                                        L"Resolving tokens (chatty messages)"};
     category_t ast_construction{L"ast-construction", L"Parsing fish AST"};
 
     category_t proc_job_run{L"proc-job-run", L"Jobs getting started or continued"};
diff --git a/src/function.h b/src/function.h
index 2de5f3081..ba7b2e05a 100644
--- a/src/function.h
+++ b/src/function.h
@@ -11,7 +11,6 @@
 #include "env.h"
 #include "event.h"
 #include "parse_tree.h"
-#include "tnode.h"
 
 class parser_t;
 
diff --git a/src/history.cpp b/src/history.cpp
index d069a8278..39fd62965 100644
--- a/src/history.cpp
+++ b/src/history.cpp
@@ -45,7 +45,6 @@
 #include "parser.h"
 #include "path.h"
 #include "reader.h"
-#include "tnode.h"
 #include "wcstringutil.h"
 #include "wildcard.h"  // IWYU pragma: keep
 #include "wutil.h"     // IWYU pragma: keep
diff --git a/src/parse_constants.h b/src/parse_constants.h
index e41992c50..454067f9d 100644
--- a/src/parse_constants.h
+++ b/src/parse_constants.h
@@ -6,7 +6,6 @@
 
 #include "common.h"
 
-#define PARSE_ASSERT(a) assert(a)
 #define PARSER_DIE()                   \
     do {                               \
         FLOG(error, L"Parser dying!"); \
@@ -27,44 +26,7 @@ struct source_range_t {
 // IMPORTANT: If the following enum table is modified you must also update token_enum_map below.
 enum parse_token_type_t : uint8_t {
     token_type_invalid = 1,
-    // Non-terminal tokens
-    symbol_job_list,
-    symbol_job_conjunction,
-    symbol_job_conjunction_continuation,
-    symbol_job_decorator,
-    symbol_job,
-    symbol_job_continuation,
-    symbol_statement,
-    symbol_block_statement,
-    symbol_block_header,
-    symbol_for_header,
-    symbol_while_header,
-    symbol_begin_header,
-    symbol_function_header,
-    symbol_if_statement,
-    symbol_if_clause,
-    symbol_else_clause,
-    symbol_else_continuation,
-    symbol_switch_statement,
-    symbol_case_item_list,
-    symbol_case_item,
-    symbol_not_statement,
-    symbol_decorated_statement,
-    symbol_plain_statement,
-    symbol_variable_assignment,
-    symbol_variable_assignments,
-    symbol_arguments_or_redirections_list,
-    symbol_andor_job_list,
-    symbol_argument_list,
-    // Freestanding argument lists are parsed from the argument list supplied to 'complete -a'.
-    // They are not generated by parse trees rooted in symbol_job_list.
-    symbol_freestanding_argument_list,
-    symbol_argument,
-    symbol_redirection,
-    symbol_optional_background,
-    symbol_optional_newlines,
-    symbol_optional_time,
-    symbol_end_command,
+
     // Terminal types.
     parse_token_type_string,
     parse_token_type_pipe,
@@ -79,13 +41,6 @@ enum parse_token_type_t : uint8_t {
     parse_special_type_parse_error,
     parse_special_type_tokenizer_error,
     parse_special_type_comment,
-
-    LAST_TOKEN_TYPE = parse_special_type_comment,
-    FIRST_TERMINAL_TYPE = parse_token_type_string,
-    LAST_TERMINAL_TYPE = parse_token_type_terminate,
-    LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate,
-    FIRST_PARSE_TOKEN_TYPE = parse_token_type_string,
-    LAST_PARSE_TOKEN_TYPE = parse_token_type_end
 };
 
 const enum_map<parse_token_type_t> token_enum_map[] = {
@@ -100,9 +55,6 @@ const enum_map<parse_token_type_t> token_enum_map[] = {
     {parse_token_type_andand, L"parse_token_type_andand"},
     {parse_token_type_oror, L"parse_token_type_oror"},
     {parse_token_type_terminate, L"parse_token_type_terminate"},
-// Define all symbols
-#define ELEM(sym) {symbol_##sym, L"symbol_" #sym},
-#include "parse_grammar_elements.inc"
     {token_type_invalid, L"token_type_invalid"},
     {token_type_invalid, nullptr}};
 #define token_enum_map_len (sizeof token_enum_map / sizeof *token_enum_map)
@@ -158,7 +110,7 @@ const enum_map<parse_keyword_t> keyword_enum_map[] = {{parse_keyword_t::kw_excla
 
 // Node tag values.
 
-// Statement decorations, stored in node tag.
+// Statement decorations.
 enum parse_statement_decoration_t {
     parse_statement_decoration_none,
     parse_statement_decoration_command,
@@ -166,19 +118,6 @@ enum parse_statement_decoration_t {
     parse_statement_decoration_exec,
 };
 
-// Job decorations, stored in node tag.
-enum parse_job_decoration_t {
-    parse_job_decoration_none,
-    parse_job_decoration_and,
-    parse_job_decoration_or,
-};
-
-// Whether a statement is backgrounded.
-enum parse_optional_background_t { parse_no_background, parse_background };
-
-// Whether a job is prefixed with "time".
-enum parse_optional_time_t { parse_optional_time_no_time, parse_optional_time_time };
-
 // Parse error code list.
 enum parse_error_code_t {
     parse_error_none,
@@ -255,6 +194,13 @@ wcstring token_type_user_presentable_description(parse_token_type_t type,
 /// errors in a substring of a larger source buffer.
 void parse_error_offset_source_start(parse_error_list_t *errors, size_t amt);
 
+// The location of a pipeline.
+enum class pipeline_position_t {
+    none,       // not part of a pipeline
+    first,      // first command in a pipeline
+    subsequent  // second or further command in a pipeline
+};
+
 /// Maximum number of function calls.
 #define FISH_MAX_STACK_DEPTH 128
 
diff --git a/src/parse_execution.cpp b/src/parse_execution.cpp
index 22f4669e4..71b1a36a3 100644
--- a/src/parse_execution.cpp
+++ b/src/parse_execution.cpp
@@ -1,11 +1,4 @@
-// Provides the "linkage" between a parse_node_tree_t and actual execution structures (job_t, etc.)
-//
-// A note on error handling: fish has two kind of errors, fatal parse errors non-fatal runtime
-// errors. A fatal error prevents execution of the entire file, while a non-fatal error skips that
-// job.
-//
-// Non-fatal errors are printed as soon as they are encountered; otherwise you would have to wait
-// for the execution to finish to see them.
+// Provides the "linkage" between an ast and actual execution structures (job_t, etc.)
 #include "config.h"  // IWYU pragma: keep
 
 #include "parse_execution.h"
@@ -1416,19 +1409,6 @@ end_execution_reason_t parse_execution_context_t::run_job_conjunction(
     return result;
 }
 
-bool parse_execution_context_t::should_skip(parse_job_decoration_t type) const {
-    switch (type) {
-        case parse_job_decoration_and:
-            // AND. Skip if the last job failed.
-            return parser->get_last_status() != 0;
-        case parse_job_decoration_or:
-            // OR. Skip if the last job succeeded.
-            return parser->get_last_status() == 0;
-        default:
-            return false;
-    }
-}
-
 end_execution_reason_t parse_execution_context_t::test_and_run_1_job_conjunction(
     const ast::job_conjunction_t &jc, const block_t *associated_block) {
     // Test this job conjunction if it has an 'and' or 'or' decorator.
diff --git a/src/parse_execution.h b/src/parse_execution.h
index fe91ba902..0775fcb96 100644
--- a/src/parse_execution.h
+++ b/src/parse_execution.h
@@ -1,4 +1,4 @@
-// Provides the "linkage" between a parse_node_tree_t and actual execution structures (job_t, etc.).
+// Provides the "linkage" between an ast and actual execution structures (job_t, etc.).
 #ifndef FISH_PARSE_EXECUTION_H
 #define FISH_PARSE_EXECUTION_H
 
@@ -79,9 +79,6 @@ class parse_execution_context_t {
     end_execution_reason_t expand_command(const ast::decorated_statement_t &statement,
                                           wcstring *out_cmd, wcstring_list_t *out_args) const;
 
-    /// Return whether we should skip a job with the given bool statement type.
-    bool should_skip(parse_job_decoration_t type) const;
-
     /// Indicates whether a job is a simple block (one block, no redirections).
     bool job_is_simple_block(const ast::job_t &job) const;
 
diff --git a/src/parse_grammar.h b/src/parse_grammar.h
deleted file mode 100644
index c89fd8816..000000000
--- a/src/parse_grammar.h
+++ /dev/null
@@ -1,401 +0,0 @@
-// Programmatic representation of fish grammar
-#ifndef FISH_PARSE_GRAMMAR_H
-#define FISH_PARSE_GRAMMAR_H
-
-#include <array>
-#include <tuple>
-#include <type_traits>
-
-#include "parse_constants.h"
-#include "tokenizer.h"
-
-struct parse_token_t;
-typedef uint8_t parse_node_tag_t;
-
-using parse_node_tag_t = uint8_t;
-struct parse_token_t;
-namespace grammar {
-
-using production_element_t = uint8_t;
-
-enum {
-    // The maximum length of any seq production.
-    MAX_PRODUCTION_LENGTH = 6
-};
-
-// Define primitive types.
-template <enum parse_token_type_t Token>
-struct primitive {
-    using type_tuple = std::tuple<>;
-    static constexpr parse_token_type_t token = Token;
-    static constexpr production_element_t element() { return Token; }
-};
-
-using tok_end = primitive<parse_token_type_end>;
-using tok_string = primitive<parse_token_type_string>;
-using tok_pipe = primitive<parse_token_type_pipe>;
-using tok_background = primitive<parse_token_type_background>;
-using tok_redirection = primitive<parse_token_type_redirection>;
-using tok_andand = primitive<parse_token_type_andand>;
-using tok_oror = primitive<parse_token_type_oror>;
-
-// Define keyword types.
-template <parse_keyword_t Keyword>
-struct keyword {
-    using type_tuple = std::tuple<>;
-    static constexpr parse_token_type_t token = parse_token_type_string;
-    static constexpr production_element_t element() {
-        // Convert a parse_keyword_t enum to a production_element_t enum.
-        return static_cast<uint32_t>(Keyword) + LAST_TOKEN_OR_SYMBOL + 1;
-    }
-};
-
-// Define special types.
-// Comments are not emitted as part of productions, but specially by the parser.
-struct comment {
-    using type_tuple = std::tuple<>;
-    static constexpr parse_token_type_t token = parse_special_type_comment;
-};
-
-// Forward declare all the symbol types.
-#define ELEM(T) struct T;
-#include "parse_grammar_elements.inc"
-
-// A production is a sequence of production elements.
-// +1 to hold the terminating token_type_invalid
-template <size_t Count>
-using production_t = std::array<const production_element_t, Count + 1>;
-
-// This is an ugly hack to avoid ODR violations
-// Given some type, return a pointer to its production.
-template <typename T>
-const production_element_t *production_for() {
-    static constexpr auto prod = T::production;
-    return prod.data();
-}
-
-// Get some production element.
-template <typename T>
-constexpr production_element_t element() {
-    return T::element();
-}
-
-// Template goo.
-namespace detail {
-template <typename T, typename Tuple>
-struct tuple_contains;
-
-template <typename T>
-struct tuple_contains<T, std::tuple<>> : std::false_type {};
-
-template <typename T, typename U, typename... Ts>
-struct tuple_contains<T, std::tuple<U, Ts...>> : tuple_contains<T, std::tuple<Ts...>> {};
-
-template <typename T, typename... Ts>
-struct tuple_contains<T, std::tuple<T, Ts...>> : std::true_type {};
-
-struct void_type {
-    using type = void;
-};
-
-// Support for checking whether the index N is valid for T::type_tuple.
-template <size_t N, typename T>
-static constexpr bool index_valid() {
-    return N < std::tuple_size<typename T::type_tuple>::value;
-}
-
-// Get the Nth type of T::type_tuple.
-template <size_t N, typename T>
-using tuple_element = std::tuple_element<N, typename T::type_tuple>;
-
-// Get the Nth type of T::type_tuple, or void if N is out of bounds.
-template <size_t N, typename T>
-using tuple_element_or_void =
-    typename std::conditional<index_valid<N, T>(), tuple_element<N, T>, void_type>::type::type;
-
-// Make a tuple by mapping the Nth item of a list of 'seq's.
-template <size_t N, typename... Ts>
-struct tuple_nther {
-    // A tuple of the Nth types of tuples (or voids).
-    using type = std::tuple<tuple_element_or_void<N, Ts>...>;
-};
-
-// Given a list of Options, each one a seq, check to see if any of them contain type Desired at
-// index Index.
-template <typename Desired, size_t Index, typename... Options>
-inline constexpr bool type_possible() {
-    using nths = typename tuple_nther<Index, Options...>::type;
-    return tuple_contains<Desired, nths>::value;
-}
-}  // namespace detail
-
-// Partial specialization hack.
-#define ELEM(T)                                   \
-    template <>                                   \
-    constexpr production_element_t element<T>() { \
-        return symbol_##T;                        \
-    }
-#include "parse_grammar_elements.inc"
-
-// Empty produces nothing.
-struct empty {
-    using type_tuple = std::tuple<>;
-    static constexpr production_t<0> production = {{token_type_invalid}};
-    static const production_element_t *resolve(const parse_token_t &, const parse_token_t &,
-                                               parse_node_tag_t *) {
-        return production_for<empty>();
-    }
-};
-
-// Sequence represents a list of (at least two) productions.
-template <class T0, class... Ts>
-struct seq {
-    static constexpr production_t<1 + sizeof...(Ts)> production = {
-        {element<T0>(), element<Ts>()..., token_type_invalid}};
-
-    static_assert(1 + sizeof...(Ts) <= MAX_PRODUCTION_LENGTH, "MAX_PRODUCTION_LENGTH too small");
-
-    using type_tuple = std::tuple<T0, Ts...>;
-
-    template <typename Desired, size_t Index>
-    static constexpr bool type_possible() {
-        using element_t = detail::tuple_element_or_void<Index, seq>;
-        return std::is_same<Desired, element_t>::value;
-    }
-
-    static const production_element_t *resolve(const parse_token_t &, const parse_token_t &,
-                                               parse_node_tag_t *) {
-        return production_for<seq>();
-    }
-};
-
-template <class... Args>
-using produces_sequence = seq<Args...>;
-
-// Ergonomic way to create a production for a single element.
-template <class T>
-using single = seq<T>;
-
-template <class T>
-using produces_single = single<T>;
-
-// Alternative represents a choice.
-struct alternative {};
-
-// Following are the grammar productions.
-#define BODY(T) static constexpr parse_token_type_t token = symbol_##T;
-
-#define DEF(T) struct T : public
-
-#define DEF_ALT(T) struct T : public alternative
-#define ALT_BODY(T, ...)                                                                     \
-    BODY(T)                                                                                  \
-    using type_tuple = std::tuple<>;                                                         \
-    template <typename Desired, size_t Index>                                                \
-    static constexpr bool type_possible() {                                                  \
-        return detail::type_possible<Desired, Index, __VA_ARGS__>();                         \
-    }                                                                                        \
-    static const production_element_t *resolve(const parse_token_t &, const parse_token_t &, \
-                                               parse_node_tag_t *)
-
-// A job_list is a list of job_conjunctions, separated by semicolons or newlines
-DEF_ALT(job_list) {
-    using normal = seq<job_decorator, job_conjunction, job_list>;
-    using empty_line = seq<tok_end, job_list>;
-    using empty = grammar::empty;
-    ALT_BODY(job_list, normal, empty_line, empty);
-};
-
-// Job decorators are 'and' and 'or'. These apply to the whole job.
-DEF_ALT(job_decorator) {
-    using ands = single<keyword<parse_keyword_t::kw_and>>;
-    using ors = single<keyword<parse_keyword_t::kw_or>>;
-    using empty = grammar::empty;
-    ALT_BODY(job_decorator, ands, ors, empty);
-};
-
-// A job_conjunction is a job followed by a continuation.
-DEF(job_conjunction) produces_sequence<job, job_conjunction_continuation>{BODY(job_conjunction)};
-
-DEF_ALT(job_conjunction_continuation) {
-    using andands = seq<tok_andand, optional_newlines, job_conjunction>;
-    using orors = seq<tok_oror, optional_newlines, job_conjunction>;
-    using empty = grammar::empty;
-    ALT_BODY(job_conjunction_continuation, andands, orors, empty);
-};
-
-/// The time builtin.
-DEF_ALT(optional_time) {
-    using empty = grammar::empty;
-    using time = single<keyword<parse_keyword_t::kw_time>>;
-    ALT_BODY(optional_time, empty, time);
-};
-
-// A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases
-// like if statements, where we require a command). To represent "non-empty", we require a
-// statement, followed by a possibly empty job_continuation, and then optionally a background
-// specifier '&'
-DEF(job)
-produces_sequence<optional_time, variable_assignments, statement, job_continuation,
-                  optional_background>{BODY(job)};
-
-DEF_ALT(job_continuation) {
-    using piped =
-        seq<tok_pipe, optional_newlines, variable_assignments, statement, job_continuation>;
-    using empty = grammar::empty;
-    ALT_BODY(job_continuation, piped, empty);
-};
-
-// A list of assignments like HOME=$PWD
-DEF_ALT(variable_assignments) {
-    using empty = grammar::empty;
-    using var = seq<variable_assignment, variable_assignments>;
-    ALT_BODY(variable_assignments, empty, var);
-};
-// A string token like VAR=value
-DEF(variable_assignment) produces_single<tok_string>{BODY(variable_assignment)};
-
-// A statement is a normal command, or an if / while / etc
-DEF_ALT(statement) {
-    using nots = single<not_statement>;
-    using block = single<block_statement>;
-    using ifs = single<if_statement>;
-    using switchs = single<switch_statement>;
-    using decorated = single<decorated_statement>;
-    ALT_BODY(statement, nots, block, ifs, switchs, decorated);
-};
-
-// A block is a conditional, loop, or begin/end
-DEF(if_statement)
-produces_sequence<if_clause, else_clause, end_command, arguments_or_redirections_list>{
-    BODY(if_statement)};
-
-DEF(if_clause)
-produces_sequence<keyword<parse_keyword_t::kw_if>, job_conjunction, tok_end, andor_job_list,
-                  job_list>{BODY(if_clause)};
-
-DEF_ALT(else_clause) {
-    using empty = grammar::empty;
-    using else_cont = seq<keyword<parse_keyword_t::kw_else>, else_continuation>;
-    ALT_BODY(else_clause, empty, else_cont);
-};
-
-DEF_ALT(else_continuation) {
-    using else_if = seq<if_clause, else_clause>;
-    using else_only = seq<tok_end, job_list>;
-    ALT_BODY(else_continuation, else_if, else_only);
-};
-
-DEF(switch_statement)
-produces_sequence<keyword<parse_keyword_t::kw_switch>, argument, tok_end, case_item_list,
-                  end_command, arguments_or_redirections_list>{BODY(switch_statement)};
-
-DEF_ALT(case_item_list) {
-    using empty = grammar::empty;
-    using case_items = seq<case_item, case_item_list>;
-    using blank_line = seq<tok_end, case_item_list>;
-    ALT_BODY(case_item_list, empty, case_items, blank_line);
-};
-
-DEF(case_item)
-produces_sequence<keyword<parse_keyword_t::kw_case>, argument_list, tok_end, job_list>{
-    BODY(case_item)};
-
-DEF(block_statement)
-produces_sequence<block_header, job_list, end_command, arguments_or_redirections_list>{
-    BODY(block_statement)};
-
-DEF_ALT(block_header) {
-    using forh = single<for_header>;
-    using whileh = single<while_header>;
-    using funch = single<function_header>;
-    using beginh = single<begin_header>;
-    ALT_BODY(block_header, forh, whileh, funch, beginh);
-};
-
-DEF(for_header)
-produces_sequence<keyword<parse_keyword_t::kw_for>, tok_string, keyword<parse_keyword_t::kw_in>,
-                  argument_list, tok_end>{BODY(for_header)};
-
-DEF(while_header)
-produces_sequence<keyword<parse_keyword_t::kw_while>, job_conjunction, tok_end, andor_job_list>{
-    BODY(while_header)};
-
-DEF(begin_header) produces_single<keyword<parse_keyword_t::kw_begin>>{BODY(begin_header)};
-
-// Functions take arguments, and require at least one (the name). No redirections allowed.
-DEF(function_header)
-produces_sequence<keyword<parse_keyword_t::kw_function>, argument, argument_list, tok_end>{
-    BODY(function_header)};
-
-DEF_ALT(not_statement) {
-    using nots =
-        seq<keyword<parse_keyword_t::kw_not>, variable_assignments, optional_time, statement>;
-    using exclams =
-        seq<keyword<parse_keyword_t::kw_exclam>, variable_assignments, optional_time, statement>;
-    ALT_BODY(not_statement, nots, exclams);
-};
-
-// An andor_job_list is zero or more job lists, where each starts with an `and` or `or` boolean
-// statement.
-DEF_ALT(andor_job_list) {
-    using empty = grammar::empty;
-    using andor_job = seq<job_decorator, job_conjunction, andor_job_list>;
-    using empty_line = seq<tok_end, andor_job_list>;
-    ALT_BODY(andor_job_list, empty, andor_job, empty_line);
-};
-
-// A decorated_statement is a command with a list of arguments_or_redirections, possibly with
-// "builtin" or "command" or "exec"
-DEF_ALT(decorated_statement) {
-    using plains = single<plain_statement>;
-    using cmds = seq<keyword<parse_keyword_t::kw_command>, plain_statement>;
-    using builtins = seq<keyword<parse_keyword_t::kw_builtin>, plain_statement>;
-    using execs = seq<keyword<parse_keyword_t::kw_exec>, plain_statement>;
-    ALT_BODY(decorated_statement, plains, cmds, builtins, execs);
-};
-
-DEF(plain_statement)
-produces_sequence<tok_string, arguments_or_redirections_list>{BODY(plain_statement)};
-
-DEF_ALT(argument_list) {
-    using empty = grammar::empty;
-    using arg = seq<argument, argument_list>;
-    ALT_BODY(argument_list, empty, arg);
-};
-
-DEF_ALT(arguments_or_redirections_list) {
-    using empty = grammar::empty;
-    using arg = seq<argument, arguments_or_redirections_list>;
-    using redir = seq<redirection, arguments_or_redirections_list>;
-    ALT_BODY(arguments_or_redirections_list, empty, arg, redir);
-};
-
-DEF(argument) produces_single<tok_string>{BODY(argument)};
-DEF(redirection) produces_sequence<tok_redirection, tok_string>{BODY(redirection)};
-
-DEF_ALT(optional_background) {
-    using empty = grammar::empty;
-    using background = single<tok_background>;
-    ALT_BODY(optional_background, empty, background);
-};
-
-DEF(end_command) produces_single<keyword<parse_keyword_t::kw_end>>{BODY(end_command)};
-
-// Note optional_newlines only allows newline-style tok_end, not semicolons.
-DEF_ALT(optional_newlines) {
-    using empty = grammar::empty;
-    using newlines = seq<tok_end, optional_newlines>;
-    ALT_BODY(optional_newlines, empty, newlines);
-};
-
-// A freestanding_argument_list is equivalent to a normal argument list, except it may contain
-// TOK_END (newlines, and even semicolons, for historical reasons)
-DEF_ALT(freestanding_argument_list) {
-    using empty = grammar::empty;
-    using arg = seq<argument, freestanding_argument_list>;
-    using semicolon = seq<tok_end, freestanding_argument_list>;
-    ALT_BODY(freestanding_argument_list, empty, arg, semicolon);
-};
-}  // namespace grammar
-#endif
diff --git a/src/parse_grammar_elements.inc b/src/parse_grammar_elements.inc
deleted file mode 100644
index 621792b7a..000000000
--- a/src/parse_grammar_elements.inc
+++ /dev/null
@@ -1,37 +0,0 @@
-// Define ELEM before including this file.
-ELEM(job_list)
-ELEM(job)
-ELEM(job_decorator)
-ELEM(job_conjunction)
-ELEM(job_conjunction_continuation)
-ELEM(job_continuation)
-ELEM(statement)
-ELEM(if_statement)
-ELEM(if_clause)
-ELEM(else_clause)
-ELEM(else_continuation)
-ELEM(switch_statement)
-ELEM(case_item_list)
-ELEM(case_item)
-ELEM(block_statement)
-ELEM(block_header)
-ELEM(for_header)
-ELEM(while_header)
-ELEM(begin_header)
-ELEM(function_header)
-ELEM(not_statement)
-ELEM(andor_job_list)
-ELEM(decorated_statement)
-ELEM(variable_assignment)
-ELEM(variable_assignments)
-ELEM(plain_statement)
-ELEM(argument_list)
-ELEM(arguments_or_redirections_list)
-ELEM(argument)
-ELEM(redirection)
-ELEM(optional_background)
-ELEM(optional_newlines)
-ELEM(optional_time)
-ELEM(end_command)
-ELEM(freestanding_argument_list)
-#undef ELEM
diff --git a/src/parse_productions.cpp b/src/parse_productions.cpp
deleted file mode 100644
index dee4a7ac9..000000000
--- a/src/parse_productions.cpp
+++ /dev/null
@@ -1,466 +0,0 @@
-#include "config.h"  // IWYU pragma: keep
-
-#include "parse_productions.h"
-
-#include <stdio.h>
-
-#include "common.h"
-#include "flog.h"
-#include "parse_constants.h"
-#include "parse_grammar.h"
-#include "parse_tree.h"
-
-using namespace parse_productions;
-using namespace grammar;
-
-#define NO_PRODUCTION nullptr
-
-// Herein are encoded the productions for our LL2 fish grammar.
-//
-// Each symbol (e.g. symbol_job_list) has a corresponding function (e.g. resolve_job_list). The
-// function accepts two tokens, representing the first and second lookahead, and returns a
-// production representing the rule, or NULL on error. There is also a tag value which is returned
-// by reference; the tag is a sort of node annotation.
-//
-// Productions are generally a static const array, and we return a pointer to the array (yes,
-// really).
-
-#define RESOLVE(SYM)                          \
-    const production_element_t *SYM::resolve( \
-        const parse_token_t &token1, const parse_token_t &token2, parse_node_tag_t *out_tag)
-
-/// A job_list is a list of jobs, separated by semicolons or newlines.
-RESOLVE(job_list) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-
-    switch (token1.type) {
-        case parse_token_type_string: {
-            // Some keywords are special.
-            switch (token1.keyword) {
-                case parse_keyword_t::kw_end:
-                case parse_keyword_t::kw_else:
-                case parse_keyword_t::kw_case: {
-                    return production_for<empty>();  // end this job list
-                }
-                default: {
-                    return production_for<normal>();  // normal string
-                }
-            }
-        }
-        case parse_token_type_pipe:
-        case parse_token_type_redirection:
-        case parse_token_type_background: {
-            return production_for<normal>();
-        }
-        case parse_token_type_end: {
-            return production_for<empty_line>();
-        }
-        case parse_token_type_terminate: {
-            return production_for<empty>();  // no more commands, just transition to empty
-        }
-        default: {
-            return NO_PRODUCTION;
-        }
-    }
-}
-
-// A job decorator is AND or OR
-RESOLVE(job_decorator) {
-    // If it's followed by --help, it's not a decoration.
-    if (token2.is_help_argument) {
-        *out_tag = parse_job_decoration_none;
-        return production_for<empty>();
-    }
-
-    switch (token1.keyword) {
-        case parse_keyword_t::kw_and: {
-            *out_tag = parse_job_decoration_and;
-            return production_for<ands>();
-        }
-        case parse_keyword_t::kw_or: {
-            *out_tag = parse_job_decoration_or;
-            return production_for<ors>();
-        }
-        default: {
-            *out_tag = parse_job_decoration_none;
-            return production_for<empty>();
-        }
-    }
-}
-
-RESOLVE(job_conjunction_continuation) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-    switch (token1.type) {
-        case parse_token_type_andand:
-            *out_tag = parse_job_decoration_and;
-            return production_for<andands>();
-        case parse_token_type_oror:
-            *out_tag = parse_job_decoration_or;
-            return production_for<orors>();
-        default:
-            return production_for<empty>();
-    }
-}
-
-RESOLVE(job_continuation) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-
-    switch (token1.type) {
-        case parse_token_type_pipe: {
-            return production_for<piped>();  // pipe, continuation
-        }
-        default: {
-            return production_for<empty>();  // not a pipe, no job continuation
-        }
-    }
-}
-
-// A statement is a normal command, or an if / while / and etc.
-RESOLVE(statement) {
-    UNUSED(out_tag);
-
-    // The only block-like builtin that takes any parameters is 'function' So go to decorated
-    // statements if the subsequent token looks like '--'. The logic here is subtle:
-    //
-    // If we are 'begin', then we expect to be invoked with no arguments.
-    // If we are 'function', then we are a non-block if we are invoked with -h or --help
-    // If we are anything else, we require an argument, so do the same thing if the subsequent token
-    // is a statement terminator.
-    if (token1.type == parse_token_type_string) {
-        // If we are a function, then look for help arguments. Otherwise, if the next token looks
-        // like an option (starts with a dash), then parse it as a decorated statement.
-        if (token1.keyword == parse_keyword_t::kw_function && token2.is_help_argument) {
-            return production_for<decorated>();
-        } else if (token1.keyword != parse_keyword_t::kw_function && token2.has_dash_prefix) {
-            return production_for<decorated>();
-        }
-
-        // Likewise if the next token doesn't look like an argument at all. This corresponds to e.g.
-        // a "naked if".
-        bool naked_invocation_invokes_help = (token1.keyword != parse_keyword_t::kw_begin &&
-                                              token1.keyword != parse_keyword_t::kw_end);
-        if (naked_invocation_invokes_help &&
-            (token2.type == parse_token_type_end || token2.type == parse_token_type_terminate)) {
-            return production_for<decorated>();
-        }
-    }
-
-    switch (token1.type) {
-        case parse_token_type_string: {
-            switch (token1.keyword) {
-                case parse_keyword_t::kw_not:
-                case parse_keyword_t::kw_exclam: {
-                    return production_for<nots>();
-                }
-                case parse_keyword_t::kw_for:
-                case parse_keyword_t::kw_while:
-                case parse_keyword_t::kw_function:
-                case parse_keyword_t::kw_begin: {
-                    return production_for<block>();
-                }
-                case parse_keyword_t::kw_if: {
-                    return production_for<ifs>();
-                }
-                case parse_keyword_t::kw_else: {
-                    return NO_PRODUCTION;
-                }
-                case parse_keyword_t::kw_switch: {
-                    return production_for<switchs>();
-                }
-                case parse_keyword_t::kw_end: {
-                    return NO_PRODUCTION;
-                }
-                // All other keywords fall through to decorated statement.
-                default: {
-                    return production_for<decorated>();
-                }
-            }
-        }
-        case parse_token_type_pipe:
-        case parse_token_type_redirection:
-        case parse_token_type_background:
-        case parse_token_type_terminate: {
-            return NO_PRODUCTION;
-        }
-        default: {
-            return NO_PRODUCTION;
-        }
-    }
-}
-
-RESOLVE(else_clause) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-
-    switch (token1.keyword) {
-        case parse_keyword_t::kw_else: {
-            return production_for<else_cont>();
-        }
-        default: {
-            return production_for<empty>();
-        }
-    }
-}
-
-RESOLVE(else_continuation) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-
-    switch (token1.keyword) {
-        case parse_keyword_t::kw_if: {
-            return production_for<else_if>();
-        }
-        default: {
-            return production_for<else_only>();
-        }
-    }
-}
-
-RESOLVE(case_item_list) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-
-    if (token1.keyword == parse_keyword_t::kw_case)
-        return production_for<case_items>();
-    else if (token1.type == parse_token_type_end)
-        return production_for<blank_line>();
-    else
-        return production_for<empty>();
-}
-
-RESOLVE(not_statement) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-    switch (token1.keyword) {
-        case parse_keyword_t::kw_not:
-            return production_for<nots>();
-        case parse_keyword_t::kw_exclam:
-            return production_for<exclams>();
-        default:
-            return NO_PRODUCTION;
-    }
-}
-
-RESOLVE(andor_job_list) {
-    UNUSED(out_tag);
-
-    if (token1.type == parse_token_type_end) {
-        return production_for<empty_line>();
-    } else if (token1.keyword == parse_keyword_t::kw_and ||
-               token1.keyword == parse_keyword_t::kw_or) {
-        // Check that the argument to and/or is a string that's not help. Otherwise it's either 'and
-        // --help' or a naked 'and', and not part of this list.
-        if (token2.type == parse_token_type_string && !token2.is_help_argument) {
-            return production_for<andor_job>();
-        }
-    }
-    // All other cases end the list.
-    return production_for<empty>();
-}
-
-RESOLVE(argument_list) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-    switch (token1.type) {
-        case parse_token_type_string: {
-            return production_for<arg>();
-        }
-        default: {
-            return production_for<empty>();
-        }
-    }
-}
-
-RESOLVE(freestanding_argument_list) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-
-    switch (token1.type) {
-        case parse_token_type_string: {
-            return production_for<arg>();
-        }
-        case parse_token_type_end: {
-            return production_for<semicolon>();
-        }
-        default: {
-            return production_for<empty>();
-        }
-    }
-}
-
-RESOLVE(block_header) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-
-    switch (token1.keyword) {
-        case parse_keyword_t::kw_for: {
-            return production_for<forh>();
-        }
-        case parse_keyword_t::kw_while: {
-            return production_for<whileh>();
-        }
-        case parse_keyword_t::kw_function: {
-            return production_for<funch>();
-        }
-        case parse_keyword_t::kw_begin: {
-            return production_for<beginh>();
-        }
-        default: {
-            return NO_PRODUCTION;
-        }
-    }
-}
-
-RESOLVE(variable_assignments) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-    if (token1.may_be_variable_assignment) {
-        assert(token1.type == parse_token_type_string);
-        return production_for<var>();
-    }
-    return production_for<empty>();
-}
-
-RESOLVE(decorated_statement) {
-    // and/or are typically parsed in job_conjunction at the beginning of a job
-    // However they may be reached here through e.g. true && and false.
-    // Refuse to parse them as a command except for --help. See #6089.
-    if ((token1.keyword == parse_keyword_t::kw_and || token1.keyword == parse_keyword_t::kw_or) &&
-        !token2.is_help_argument) {
-        return NO_PRODUCTION;
-    }
-
-    // If this is e.g. 'command --help' then the command is 'command' and not a decoration. If the
-    // second token is not a string, then this is a naked 'command' and we should execute it as
-    // undecorated.
-    if (token2.type != parse_token_type_string || token2.has_dash_prefix) {
-        return production_for<plains>();
-    }
-
-    switch (token1.keyword) {
-        case parse_keyword_t::kw_command: {
-            *out_tag = parse_statement_decoration_command;
-            return production_for<cmds>();
-        }
-        case parse_keyword_t::kw_builtin: {
-            *out_tag = parse_statement_decoration_builtin;
-            return production_for<builtins>();
-        }
-        case parse_keyword_t::kw_exec: {
-            *out_tag = parse_statement_decoration_exec;
-            return production_for<execs>();
-        }
-        default: {
-            *out_tag = parse_statement_decoration_none;
-            return production_for<plains>();
-        }
-    }
-}
-
-RESOLVE(arguments_or_redirections_list) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-
-    switch (token1.type) {
-        case parse_token_type_string:
-            return production_for<arg>();
-        case parse_token_type_redirection:
-            return production_for<redir>();
-        default:
-            return production_for<empty>();
-    }
-}
-
-RESOLVE(optional_newlines) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-    if (token1.is_newline) return production_for<newlines>();
-    return production_for<empty>();
-}
-
-RESOLVE(optional_background) {
-    UNUSED(token2);
-
-    switch (token1.type) {
-        case parse_token_type_background: {
-            *out_tag = parse_background;
-            return production_for<background>();
-        }
-        default: {
-            *out_tag = parse_no_background;
-            return production_for<empty>();
-        }
-    }
-}
-
-RESOLVE(optional_time) {
-    if (token1.keyword == parse_keyword_t::kw_time && !token2.is_help_argument) {
-        *out_tag = parse_optional_time_time;
-        return production_for<time>();
-    }
-    *out_tag = parse_optional_time_no_time;
-    return production_for<empty>();
-}
-
-const production_element_t *parse_productions::production_for_token(parse_token_type_t node_type,
-                                                                    const parse_token_t &input1,
-                                                                    const parse_token_t &input2,
-                                                                    parse_node_tag_t *out_tag) {
-    // this is **extremely** chatty
-    FLOGF(parse_productions_chatty, L"Resolving production for %ls with input token <%ls>",
-          token_type_description(node_type), input1.describe().c_str());
-
-    // Fetch the function to resolve the list of productions.
-    const production_element_t *(*resolver)(const parse_token_t &input1,  //!OCLINT(unused param)
-                                            const parse_token_t &input2,  //!OCLINT(unused param)
-                                            parse_node_tag_t *out_tag) =  //!OCLINT(unused param)
-        nullptr;
-    switch (node_type) {
-// Handle all of our grammar elements
-#define ELEM(SYM)                \
-    case (symbol_##SYM):         \
-        resolver = SYM::resolve; \
-        break;
-#include "parse_grammar_elements.inc"
-
-        // Everything else is an error.
-        case parse_token_type_string:
-        case parse_token_type_pipe:
-        case parse_token_type_redirection:
-        case parse_token_type_background:
-        case parse_token_type_andand:
-        case parse_token_type_oror:
-        case parse_token_type_end:
-        case parse_token_type_terminate: {
-            FLOGF(error, L"Terminal token type %ls passed to %s", token_type_description(node_type),
-                  __FUNCTION__);
-            PARSER_DIE();
-            break;
-        }
-        case parse_special_type_parse_error:
-        case parse_special_type_tokenizer_error:
-        case parse_special_type_comment: {
-            FLOGF(error, L"Special type %ls passed to %s\n", token_type_description(node_type),
-                  __FUNCTION__);
-            PARSER_DIE();
-            break;
-        }
-        case token_type_invalid: {
-            FLOGF(error, L"token_type_invalid passed to %s", __FUNCTION__);
-            PARSER_DIE();
-            break;
-        }
-    }
-    PARSE_ASSERT(resolver != nullptr);
-
-    const production_element_t *result = resolver(input1, input2, out_tag);
-    if (result == nullptr) {
-        FLOGF(parse_productions, L"Node type '%ls' has no production for input '%ls' (in %s)",
-              token_type_description(node_type), input1.describe().c_str(), __FUNCTION__);
-    }
-
-    return result;
-}
diff --git a/src/parse_productions.h b/src/parse_productions.h
deleted file mode 100644
index 02a9592f1..000000000
--- a/src/parse_productions.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// Programmatic representation of fish code.
-#ifndef FISH_PARSE_TREE_CONSTRUCTION_H
-#define FISH_PARSE_TREE_CONSTRUCTION_H
-
-#include <sys/types.h>
-
-#include "ast.h"
-#include "parse_constants.h"
-
-struct parse_token_t;
-
-namespace parse_productions {
-
-// A production is an array of unsigned char. Symbols are encoded directly as their symbol value.
-// Keywords are encoded with an offset of LAST_TOKEN_OR_SYMBOL + 1. So essentially we glom together
-// keywords and symbols.
-typedef uint8_t production_element_t;
-
-/// Resolve the type from a production element.
-inline parse_token_type_t production_element_type(production_element_t elem) {
-    if (elem > LAST_TOKEN_OR_SYMBOL) {
-        return parse_token_type_string;
-    } else {
-        return static_cast<parse_token_type_t>(elem);
-    }
-}
-
-/// Resolve the keyword from a production element.
-inline parse_keyword_t production_element_keyword(production_element_t elem) {
-    if (elem > LAST_TOKEN_OR_SYMBOL) {
-        // First keyword is LAST_TOKEN_OR_SYMBOL + 1.
-        return static_cast<parse_keyword_t>(elem - LAST_TOKEN_OR_SYMBOL - 1);
-    } else {
-        return parse_keyword_t::none;
-    }
-}
-
-/// Check if an element is valid.
-inline bool production_element_is_valid(production_element_t elem) {
-    return elem != token_type_invalid;
-}
-
-/// Fetch a production. We are passed two input tokens. The first input token is guaranteed to not
-/// be invalid; the second token may be invalid if there's no more tokens. We may also set flags.
-const production_element_t *production_for_token(parse_token_type_t node_type,
-                                                 const parse_token_t &input1,
-                                                 const parse_token_t &input2, uint8_t *out_tag);
-}  // namespace parse_productions
-
-#endif
diff --git a/src/parse_tree.cpp b/src/parse_tree.cpp
index 8337800b7..28080b292 100644
--- a/src/parse_tree.cpp
+++ b/src/parse_tree.cpp
@@ -13,23 +13,16 @@
 #include <type_traits>
 #include <vector>
 
+#include "ast.h"
 #include "common.h"
 #include "fallback.h"
 #include "flog.h"
 #include "parse_constants.h"
-#include "parse_productions.h"
 #include "parse_tree.h"
 #include "proc.h"
-#include "tnode.h"
 #include "tokenizer.h"
 #include "wutil.h"  // IWYU pragma: keep
 
-using namespace parse_productions;
-
-static bool production_is_empty(const production_element_t *production) {
-    return *production == token_type_invalid;
-}
-
 parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err) {
     switch (err) {
         case tokenizer_error_t::none:
@@ -174,15 +167,6 @@ wcstring token_type_user_presentable_description(parse_token_type_t type, parse_
     }
 
     switch (type) {
-        // Hackish. We only support the following types.
-        case symbol_decorated_statement:
-        case symbol_statement:
-            return L"a command";
-        case symbol_argument:
-            return L"an argument";
-        case symbol_job:
-        case symbol_job_list:
-            return L"a job";
         case parse_token_type_string:
             return L"a string";
         case parse_token_type_pipe:
@@ -205,38 +189,6 @@ wcstring token_type_user_presentable_description(parse_token_type_t type, parse_
     }
 }
 
-static wcstring block_type_user_presentable_description(parse_token_type_t type) {
-    switch (type) {
-        case symbol_for_header: {
-            return L"for loop";
-        }
-        case symbol_while_header: {
-            return L"while loop";
-        }
-        case symbol_function_header: {
-            return L"function definition";
-        }
-        case symbol_begin_header: {
-            return L"begin";
-        }
-        case symbol_if_statement: {
-            return L"if statement";
-        }
-        case symbol_switch_statement: {
-            return L"switch statement";
-        }
-        default: {
-            return token_type_description(type);
-        }
-    }
-}
-
-/// Returns a string description of the given parse node.
-wcstring parse_node_t::describe() const {
-    wcstring result = token_type_description(this->type);
-    return result;
-}
-
 /// Returns a string description of the given parse token.
 wcstring parse_token_t::describe() const {
     wcstring result = token_type_description(type);
@@ -280,743 +232,6 @@ static inline parse_token_type_t parse_token_type_from_tokenizer_token(
     return token_type_invalid;
 }
 
-/// Helper function for parse_dump_tree().
-static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src,
-                                node_offset_t node_idx, size_t indent, wcstring *result,
-                                size_t *line, node_offset_t *inout_first_node_not_dumped) {
-    assert(node_idx < nodes.size());
-
-    // Update first_node_not_dumped. This takes a bit of explanation. While it's true that a parse
-    // tree may be a "forest",  its individual trees are "compact," meaning they are not
-    // interleaved. Thus we keep track of the largest node index as we descend a tree. One past the
-    // largest is the start of the next tree.
-    if (*inout_first_node_not_dumped <= node_idx) {
-        *inout_first_node_not_dumped = node_idx + 1;
-    }
-
-    const parse_node_t &node = nodes.at(node_idx);
-
-    const size_t spacesPerIndent = 2;
-
-    // Unindent statement lists by 1 to flatten them.
-    if (node.type == symbol_job_list || node.type == symbol_arguments_or_redirections_list) {
-        if (indent > 0) indent -= 1;
-    }
-
-    append_format(*result, L"%2lu - %2lu  ", *line, node_idx);
-    result->append(indent * spacesPerIndent, L' ');
-    result->append(node.describe());
-    if (node.child_count > 0) {
-        append_format(*result, L" <%lu children>", node.child_count);
-    }
-    if (node.has_comments()) {
-        append_format(*result, L" <has_comments>");
-    }
-    if (node.has_preceding_escaped_newline()) {
-        append_format(*result, L" <preceding_esc_nl>");
-    }
-
-    if (node.has_source() && node.type == parse_token_type_string) {
-        result->append(L": \"");
-        result->append(src, node.source_start, node.source_length);
-        result->append(L"\"");
-    }
-
-    if (node.type != parse_token_type_string) {
-        if (node.has_source()) {
-            append_format(*result, L"  [%ld, %ld]", static_cast<long>(node.source_start),
-                          static_cast<long>(node.source_length));
-        } else {
-            append_format(*result, L"  [%ld, no src]", static_cast<long>(node.source_start));
-        }
-    }
-
-    result->push_back(L'\n');
-    ++*line;
-    for (node_offset_t child_idx = node.child_start;
-         child_idx < node.child_start + node.child_count; child_idx++) {
-        dump_tree_recursive(nodes, src, child_idx, indent + 1, result, line,
-                            inout_first_node_not_dumped);
-    }
-}
-
-/// Gives a debugging textual description of a parse tree. Note that this supports "parse forests"
-/// too. That is, our tree may not really be a tree, but instead a collection of trees.
-wcstring parse_dump_tree(const parse_node_tree_t &nodes, const wcstring &src) {
-    if (nodes.empty()) return L"(empty!)";
-
-    node_offset_t first_node_not_dumped = 0;
-    size_t line = 0;
-    wcstring result;
-    while (first_node_not_dumped < nodes.size()) {
-        if (first_node_not_dumped > 0) {
-            result.append(L"---New Tree---\n");
-        }
-        dump_tree_recursive(nodes, src, first_node_not_dumped, 0, &result, &line,
-                            &first_node_not_dumped);
-    }
-    return result;
-}
-
-/// Struct representing elements of the symbol stack, used in the internal state of the LL parser.
-struct parse_stack_element_t {
-    enum parse_token_type_t type;
-    enum parse_keyword_t keyword;
-    node_offset_t node_idx;
-
-    explicit parse_stack_element_t(parse_token_type_t t, node_offset_t idx)
-        : type(t), keyword(parse_keyword_t::none), node_idx(idx) {}
-
-    explicit parse_stack_element_t(production_element_t e, node_offset_t idx)
-        : type(production_element_type(e)), keyword(production_element_keyword(e)), node_idx(idx) {}
-
-    wcstring describe() const {
-        wcstring result = token_type_description(type);
-        if (keyword != parse_keyword_t::none) {
-            append_format(result, L" <%ls>", keyword_description(keyword));
-        }
-        return result;
-    }
-
-    /// Returns a name that we can show to the user, e.g. "a command".
-    wcstring user_presentable_description() const {
-        return token_type_user_presentable_description(type, keyword);
-    }
-};
-
-/// The parser itself, private implementation of class parse_t. This is a hand-coded table-driven LL
-/// parser. Most hand-coded LL parsers are recursive descent, but recursive descent parsers are
-/// difficult to "pause", unlike table-driven parsers.
-class parse_ll_t {
-    // Traditional symbol stack of the LL parser.
-    std::vector<parse_stack_element_t> symbol_stack;
-    // Parser output. This is a parse tree, but stored in an array.
-    parse_node_tree_t nodes;
-    // Whether we ran into a fatal error, including parse errors or tokenizer errors.
-    bool fatal_errored;
-    // Whether we should collect error messages or not.
-    bool should_generate_error_messages;
-    // List of errors we have encountered.
-    parse_error_list_t errors;
-    // The symbol stack can contain terminal types or symbols. Symbols go on to do productions, but
-    // terminal types are just matched against input tokens.
-    bool top_node_handle_terminal_types(const parse_token_t &token);
-
-    void parse_error_unexpected_token(const wchar_t *expected, parse_token_t token);
-    void parse_error(parse_token_t token, parse_error_code_t code, const wchar_t *fmt, ...);
-    void parse_error_at_location(size_t source_start, size_t source_length, size_t error_location,
-                                 parse_error_code_t code, const wchar_t *fmt, ...);
-    void parse_error_failed_production(const struct parse_stack_element_t &elem,
-                                       parse_token_t token);
-    void parse_error_unbalancing_token(parse_token_t token);
-
-    // Reports an error for an unclosed block, e.g. 'begin;'. Returns true on success, false on
-    // failure (e.g. it is not an unclosed block).
-    bool report_error_for_unclosed_block();
-
-    // void dump_stack(void) const;
-
-    /// Get the node corresponding to the top element of the stack.
-    parse_node_t &node_for_top_symbol() {
-        PARSE_ASSERT(!symbol_stack.empty());  //!OCLINT(multiple unary operator)
-        const parse_stack_element_t &top_symbol = symbol_stack.back();
-        PARSE_ASSERT(top_symbol.node_idx != NODE_OFFSET_INVALID);
-        PARSE_ASSERT(top_symbol.node_idx < nodes.size());
-        return nodes.at(top_symbol.node_idx);
-    }
-
-    /// Pop from the top of the symbol stack, then push the given production, updating node counts.
-    /// Note that production_element_t has type "pointer to array" so some care is required.
-    inline void symbol_stack_pop_push_production(const production_element_t *production) {
-        bool logit = false;
-        if (logit) {
-            int count = 0;
-            std::fwprintf(stderr, L"Applying production:\n");
-            for (int i = 0;; i++) {
-                production_element_t elem = production[i];
-                if (!production_element_is_valid(elem)) break;  // all done, bail out
-                parse_token_type_t type = production_element_type(elem);
-                parse_keyword_t keyword = production_element_keyword(elem);
-                std::fwprintf(stderr, L"\t%ls <%ls>\n", token_type_description(type),
-                              keyword_description(keyword));
-                count++;
-            }
-            if (!count) std::fwprintf(stderr, L"\t<empty>\n");
-        }
-
-        // Get the parent index. But we can't get the parent parse node yet, since it may be made
-        // invalid by adding children.
-        const node_offset_t parent_node_idx = symbol_stack.back().node_idx;
-
-        // Add the children. Confusingly, we want our nodes to be in forwards order (last token
-        // last, so dumps look nice), but the symbols should be reverse order (last token first, so
-        // it's lowest on the stack)
-        const size_t child_start_big = nodes.size();
-        assert(child_start_big < NODE_OFFSET_INVALID);
-        auto child_start = static_cast<node_offset_t>(child_start_big);
-
-        // To avoid constructing multiple nodes, we make a single one that we modify.
-        parse_node_t representative_child(token_type_invalid);
-        representative_child.parent = parent_node_idx;
-
-        node_offset_t child_count = 0;
-        for (int i = 0;; i++) {
-            production_element_t elem = production[i];
-            if (!production_element_is_valid(elem)) break;  // all done, bail out
-            // Append the parse node.
-            representative_child.type = production_element_type(elem);
-            nodes.push_back(representative_child);
-            child_count++;
-        }
-
-        // Update the parent.
-        parse_node_t &parent_node = nodes.at(parent_node_idx);
-
-        // Should have no children yet.
-        PARSE_ASSERT(parent_node.child_count == 0);
-
-        // Tell the node about its children.
-        parent_node.child_start = child_start;
-        parent_node.child_count = child_count;
-
-        // Replace the top of the stack with new stack elements corresponding to our new nodes. Note
-        // that these go in reverse order.
-        symbol_stack.pop_back();
-        symbol_stack.reserve(symbol_stack.size() + child_count);
-        node_offset_t idx = child_count;
-        while (idx--) {
-            production_element_t elem = production[idx];
-            PARSE_ASSERT(production_element_is_valid(elem));
-            symbol_stack.emplace_back(elem, child_start + idx);
-        }
-    }
-
-   public:
-    // Constructor
-    explicit parse_ll_t(enum parse_token_type_t goal)
-        : fatal_errored(false), should_generate_error_messages(true) {
-        this->symbol_stack.reserve(16);
-        this->nodes.reserve(64);
-        this->reset_symbols_and_nodes(goal);
-    }
-
-    // Input
-    void accept_tokens(parse_token_t token1, parse_token_t token2);
-
-    /// Report tokenizer errors.
-    void report_tokenizer_error(const tok_t &tok);
-
-    /// Indicate if we hit a fatal error.
-    bool has_fatal_error() const { return this->fatal_errored; }
-
-    /// Indicate whether we want to generate error messages.
-    void set_should_generate_error_messages(bool flag) {
-        this->should_generate_error_messages = flag;
-    }
-
-    /// Clear the parse symbol stack (but not the node tree). Add a node of the given type as the
-    /// goal node. This is called from the constructor.
-    void reset_symbols(enum parse_token_type_t goal);
-
-    /// Clear the parse symbol stack and the node tree. Add a node of the given type as the goal
-    /// node. This is called from the constructor.
-    void reset_symbols_and_nodes(enum parse_token_type_t goal);
-
-    /// Once parsing is complete, determine the ranges of intermediate nodes.
-    void determine_node_ranges();
-
-    /// Acquire output after parsing. This transfers directly from within self.
-    void acquire_output(parse_node_tree_t *output, parse_error_list_t *errors);
-};
-
-#if 0
-void parse_ll_t::dump_stack(void) const {
-    // Walk backwards from the top, looking for parents.
-    wcstring_list_t stack_lines;
-    if (symbol_stack.empty()) {
-        stack_lines.push_back(L"(empty)");
-    } else {
-        node_offset_t child = symbol_stack.back().node_idx;
-        node_offset_t cursor = child;
-        stack_lines.push_back(nodes.at(cursor).describe());
-        while (cursor--) {
-            const parse_node_t &node = nodes.at(cursor);
-            if (node.child_start <= child && node.child_start + node.child_count > child) {
-                stack_lines.push_back(node.describe());
-                child = cursor;
-            }
-        }
-    }
-
-    std::fwprintf(stderr, L"Stack dump (%zu elements):\n", symbol_stack.size());
-    for (size_t idx = 0; idx < stack_lines.size(); idx++) {
-        std::fwprintf(stderr, L"    %ls\n", stack_lines.at(idx).c_str());
-    }
-}
-#endif
-
-// Give each node a source range equal to the union of the ranges of its children. Terminal nodes
-// already have source ranges (and no children). Since children always appear after their parents,
-// we can implement this very simply by walking backwards. We then do a second pass to give empty
-// nodes an empty source range (but with a valid offset). We do this by walking forward. If a child
-// of a node has an invalid source range, we set it equal to the end of the source range of its
-// previous child.
-void parse_ll_t::determine_node_ranges() {
-    size_t idx = nodes.size();
-    while (idx--) {
-        parse_node_t *parent = &nodes[idx];
-
-        // Skip nodes that already have a source range. These are terminal nodes.
-        if (parent->source_start != SOURCE_OFFSET_INVALID) continue;
-
-        // Ok, this node needs a source range. Get all of its children, and then set its range.
-        source_offset_t min_start = SOURCE_OFFSET_INVALID,
-                        max_end = 0;  // note SOURCE_OFFSET_INVALID is huge
-        for (node_offset_t i = 0; i < parent->child_count; i++) {
-            const parse_node_t &child = nodes.at(parent->child_offset(i));
-            if (child.has_source()) {
-                min_start = std::min(min_start, child.source_start);
-                max_end = std::max(max_end, child.source_start + child.source_length);
-            }
-        }
-
-        if (min_start != SOURCE_OFFSET_INVALID) {
-            assert(max_end >= min_start);
-            parent->source_start = min_start;
-            parent->source_length = max_end - min_start;
-        }
-    }
-
-    // Forward pass.
-    size_t size = nodes.size();
-    for (idx = 0; idx < size; idx++) {
-        // Since we populate the source range based on the sibling node, it's simpler to walk over
-        // the children of each node. We keep a running "child_source_cursor" which is meant to be
-        // the end of the child's source range. It's initially set to the beginning of the parent'
-        // source range.
-        parse_node_t *parent = &nodes[idx];
-        // If the parent doesn't have a valid source range, then none of its children will either;
-        // skip it entirely.
-        if (parent->source_start == SOURCE_OFFSET_INVALID) {
-            continue;
-        }
-        source_offset_t child_source_cursor = parent->source_start;
-        for (size_t child_idx = 0; child_idx < parent->child_count; child_idx++) {
-            parse_node_t *child = &nodes[parent->child_start + child_idx];
-            if (child->source_start == SOURCE_OFFSET_INVALID) {
-                child->source_start = child_source_cursor;
-            }
-            child_source_cursor = child->source_start + child->source_length;
-        }
-    }
-}
-
-void parse_ll_t::acquire_output(parse_node_tree_t *output, parse_error_list_t *errors) {
-    if (output != nullptr) {
-        *output = std::move(this->nodes);
-    }
-    if (errors != nullptr) {
-        *errors = std::move(this->errors);
-    }
-}
-
-void parse_ll_t::parse_error(parse_token_t token, parse_error_code_t code, const wchar_t *fmt,
-                             ...) {
-    this->fatal_errored = true;
-    if (this->should_generate_error_messages) {
-        // this->dump_stack();
-        parse_error_t err;
-
-        va_list va;
-        va_start(va, fmt);
-        err.text = vformat_string(fmt, va);
-        err.code = code;
-        va_end(va);
-
-        err.source_start = token.source_start;
-        err.source_length = token.source_length;
-        this->errors.push_back(err);
-    }
-}
-
-void parse_ll_t::parse_error_at_location(size_t source_start, size_t source_length,
-                                         size_t error_location, parse_error_code_t code,
-                                         const wchar_t *fmt, ...) {
-    (void)error_location;
-    this->fatal_errored = true;
-    if (this->should_generate_error_messages) {
-        // this->dump_stack();
-        parse_error_t err;
-
-        va_list va;
-        va_start(va, fmt);
-        err.text = vformat_string(fmt, va);
-        err.code = code;
-        va_end(va);
-
-        err.source_start = source_start;
-        err.source_length = source_length;
-        this->errors.push_back(std::move(err));
-    }
-}
-
-// Unbalancing token. This includes 'else' or 'case' or 'end' outside of the appropriate block
-// This essentially duplicates some logic from resolving the production for symbol_statement_list -
-// yuck.
-void parse_ll_t::parse_error_unbalancing_token(parse_token_t token) {
-    this->fatal_errored = true;
-    if (this->should_generate_error_messages) {
-        switch (token.keyword) {
-            case parse_keyword_t::kw_end: {
-                this->parse_error(token, parse_error_unbalancing_end, L"'end' outside of a block");
-                break;
-            }
-            case parse_keyword_t::kw_else: {
-                this->parse_error(token, parse_error_unbalancing_else,
-                                  L"'else' builtin not inside of if block");
-                break;
-            }
-            case parse_keyword_t::kw_case: {
-                this->parse_error(token, parse_error_unbalancing_case,
-                                  L"'case' builtin not inside of switch block");
-                break;
-            }
-            default: {
-                // At the moment, this case should only be hit if you parse a
-                // freestanding_argument_list. For example, 'complete -c foo -a 'one & three'.
-                // Hackish error message for that case.
-                if (!symbol_stack.empty() &&
-                    symbol_stack.back().type == symbol_freestanding_argument_list) {
-                    this->parse_error(
-                        token, parse_error_generic, L"Expected %ls, but found %ls",
-                        token_type_user_presentable_description(symbol_argument).c_str(),
-                        token.user_presentable_description().c_str());
-                } else {
-                    this->parse_error(token, parse_error_generic, L"Did not expect %ls",
-                                      token.user_presentable_description().c_str());
-                }
-                break;
-            }
-        }
-    }
-}
-
-/// This is a 'generic' parse error when we can't match the top of the stack element.
-void parse_ll_t::parse_error_failed_production(const struct parse_stack_element_t &stack_elem,
-                                               parse_token_t token) {
-    fatal_errored = true;
-    if (this->should_generate_error_messages) {
-        const wcstring expected = stack_elem.user_presentable_description();
-        this->parse_error_unexpected_token(expected.c_str(), token);
-    }
-}
-
-void parse_ll_t::report_tokenizer_error(const tok_t &tok) {
-    parse_error_code_t parse_error_code = parse_error_from_tokenizer_error(tok.error);
-    this->parse_error_at_location(tok.offset, tok.length,
-                                  tok.offset + tok.error_offset_within_token, parse_error_code,
-                                  L"%ls", tokenizer_get_error_message(tok.error));
-}
-
-void parse_ll_t::parse_error_unexpected_token(const wchar_t *expected, parse_token_t token) {
-    fatal_errored = true;
-    if (this->should_generate_error_messages) {
-        this->parse_error(token, parse_error_generic, L"Expected %ls, but instead found %ls",
-                          expected, token.user_presentable_description().c_str());
-    }
-}
-
-void parse_ll_t::reset_symbols(enum parse_token_type_t goal) {
-    // Add a new goal node, and then reset our symbol list to point at it.
-    auto where = static_cast<node_offset_t>(nodes.size());
-    nodes.push_back(parse_node_t(goal));
-
-    symbol_stack.clear();
-    symbol_stack.emplace_back(goal, where);  // goal token
-    this->fatal_errored = false;
-}
-
-/// Reset both symbols and nodes.
-void parse_ll_t::reset_symbols_and_nodes(enum parse_token_type_t goal) {
-    nodes.clear();
-    this->reset_symbols(goal);
-}
-
-static bool type_is_terminal_type(parse_token_type_t type) {
-    switch (type) {
-        case parse_token_type_string:
-        case parse_token_type_pipe:
-        case parse_token_type_redirection:
-        case parse_token_type_background:
-        case parse_token_type_end:
-        case parse_token_type_andand:
-        case parse_token_type_oror:
-        case parse_token_type_terminate: {
-            return true;
-        }
-        default: {
-            return false;
-        }
-    }
-}
-
-bool parse_ll_t::report_error_for_unclosed_block() {
-    bool reported_error = false;
-    // Unclosed block, for example, 'while true ; '. We want to show the block node that opened it.
-    const parse_node_t &top_node = this->node_for_top_symbol();
-
-    // Hacktastic. We want to point at the source location of the block, but our block doesn't have
-    // a source range yet - only the terminal tokens do. So get the block statement corresponding to
-    // this end command. In general this block may be of a variety of types: if_statement,
-    // switch_statement, etc., each with different node structures. But keep descending the first
-    // child and eventually you hit a keyword: begin, if, etc. That's the keyword we care about.
-    const parse_node_t *end_command = this->nodes.get_parent(top_node, symbol_end_command);
-    const parse_node_t *block_node = end_command ? this->nodes.get_parent(*end_command) : nullptr;
-
-    if (block_node && block_node->type == symbol_block_statement) {
-        // Get the header.
-        block_node = this->nodes.get_child(*block_node, 0, symbol_block_header);
-        block_node = this->nodes.get_child(*block_node, 0);  // specific statement
-    }
-    if (block_node == nullptr) {
-        return reported_error;
-    }
-
-    // block_node is now an if_statement, switch_statement, for_header, while_header,
-    // function_header, or begin_header.
-    //
-    // Hackish: descend down the first node until we reach the bottom. This will be a keyword
-    // node like SWITCH, which will have the source range. Ordinarily the source range would be
-    // known by the parent node too, but we haven't completed parsing yet, so we haven't yet
-    // propagated source ranges.
-    const parse_node_t *cursor = block_node;
-    while (cursor->child_count > 0) {
-        cursor = this->nodes.get_child(*cursor, 0);
-        assert(cursor != nullptr);
-    }
-    if (cursor->source_start != NODE_OFFSET_INVALID) {
-        const wcstring node_desc = block_type_user_presentable_description(block_node->type);
-        this->parse_error_at_location(cursor->source_start, 0, cursor->source_start,
-                                      parse_error_generic, L"Missing end to balance this %ls",
-                                      node_desc.c_str());
-        reported_error = true;
-    }
-    return reported_error;
-}
-
-bool parse_ll_t::top_node_handle_terminal_types(const parse_token_t &token) {
-    PARSE_ASSERT(!symbol_stack.empty());  //!OCLINT(multiple unary operator)
-    PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE);
-    const auto &stack_top = symbol_stack.back();
-
-    if (!type_is_terminal_type(stack_top.type)) {
-        return false;  // was not handled
-    }
-
-    // The top of the stack is terminal. We are going to handle this (because we can't produce
-    // from a terminal type).
-
-    // Now see if we actually matched
-    bool matched = false;
-    if (stack_top.type == token.type) {
-        if (stack_top.type == parse_token_type_string) {
-            // We matched if the keywords match, or no keyword was required.
-            matched =
-                (stack_top.keyword == parse_keyword_t::none || stack_top.keyword == token.keyword);
-        } else {
-            // For other types, we only require that the types match.
-            matched = true;
-        }
-    }
-
-    if (matched) {
-        // Success. Tell the node that it matched this token, and what its source range is in
-        // the parse phase, we only set source ranges for terminal types. We propagate ranges to
-        // parent nodes afterwards.
-        parse_node_t &node = node_for_top_symbol();
-        node.keyword = token.keyword;
-        node.source_start = token.source_start;
-        node.source_length = token.source_length;
-        if (token.preceding_escaped_nl) node.flags |= parse_node_flag_preceding_escaped_nl;
-    } else {
-        // Failure
-        if (stack_top.type == parse_token_type_string && token.type == parse_token_type_string) {
-            // Keyword failure. We should unify this with the 'matched' computation above.
-            assert(stack_top.keyword != parse_keyword_t::none &&
-                   stack_top.keyword != token.keyword);
-
-            // Check to see which keyword we got which was considered wrong.
-            switch (token.keyword) {
-                // Some keywords are only valid in certain contexts. If this cascaded all the
-                // way down through the outermost job_list, it was not in a valid context.
-                case parse_keyword_t::kw_case:
-                case parse_keyword_t::kw_end:
-                case parse_keyword_t::kw_else: {
-                    this->parse_error_unbalancing_token(token);
-                    break;
-                }
-                case parse_keyword_t::none: {
-                    // This is a random other string (not a keyword).
-                    const wcstring expected = keyword_description(stack_top.keyword);
-                    this->parse_error(token, parse_error_generic, L"Expected keyword '%ls'",
-                                      expected.c_str());
-                    break;
-                }
-                default: {
-                    // Got a real keyword we can report.
-                    const wcstring actual = (token.keyword == parse_keyword_t::none
-                                                 ? token.describe()
-                                                 : keyword_description(token.keyword));
-                    const wcstring expected = keyword_description(stack_top.keyword);
-                    this->parse_error(token, parse_error_generic,
-                                      L"Expected keyword '%ls', instead got keyword '%ls'",
-                                      expected.c_str(), actual.c_str());
-                    break;
-                }
-            }
-        } else if (stack_top.keyword == parse_keyword_t::kw_end &&
-                   token.type == parse_token_type_terminate &&
-                   this->report_error_for_unclosed_block()) {
-            // handled by report_error_for_unclosed_block
-        } else {
-            const wcstring expected = stack_top.user_presentable_description();
-            this->parse_error_unexpected_token(expected.c_str(), token);
-        }
-    }
-
-    // We handled the token, so pop the symbol stack.
-    symbol_stack.pop_back();
-    return true;
-}
-
-void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) {
-    PARSE_ASSERT(token1.type >= FIRST_PARSE_TOKEN_TYPE);
-
-    // Handle special types specially. Note that these are the only types that can be pushed if the
-    // symbol stack is empty.
-    if (token1.type == parse_special_type_parse_error ||
-        token1.type == parse_special_type_tokenizer_error ||
-        token1.type == parse_special_type_comment) {
-        // We set the special node's parent to the top of the stack. This means that we have an
-        // asymmetric relationship: the special node has a parent (which is the node we were trying
-        // to generate when we encountered the special node), but the parent node does not have the
-        // special node as a child. This means for example that parents don't have to worry about
-        // tracking any comment nodes, but we can still recover the parent from the comment.
-        parse_node_t special_node(token1.type);
-        special_node.parent = symbol_stack.back().node_idx;
-        special_node.source_start = token1.source_start;
-        special_node.source_length = token1.source_length;
-        if (token1.preceding_escaped_nl) special_node.flags |= parse_node_flag_preceding_escaped_nl;
-        nodes.push_back(special_node);
-
-        // Mark special flags.
-        if (token1.type == parse_special_type_comment) {
-            this->node_for_top_symbol().flags |= parse_node_flag_has_comments;
-        }
-
-        // Tokenizer errors are fatal.
-        if (token1.type == parse_special_type_tokenizer_error) this->fatal_errored = true;
-        return;
-    }
-
-    // It's not a special type.
-    while (!this->fatal_errored) {
-        PARSE_ASSERT(!symbol_stack.empty());  //!OCLINT(multiple unary operator)
-
-        if (top_node_handle_terminal_types(token1)) {
-            break;
-        }
-
-        // top_node_match_token may indicate an error if our stack is empty.
-        if (this->fatal_errored) break;
-
-        // Get the production for the top of the stack.
-        parse_stack_element_t &stack_elem = symbol_stack.back();
-        parse_node_t &node = nodes.at(stack_elem.node_idx);
-        parse_node_tag_t tag = 0;
-        const production_element_t *production =
-            production_for_token(stack_elem.type, token1, token2, &tag);
-        node.tag = tag;
-        if (production == nullptr) {
-            tnode_t<grammar::variable_assignments> variable_assignments;
-            if (const parse_node_t *parent = nodes.get_parent(node)) {
-                if (parent->type == symbol_statement &&
-                    (token1.keyword == parse_keyword_t::kw_and ||
-                     token1.keyword == parse_keyword_t::kw_or)) {
-                    if (const parse_node_t *grandparent = nodes.get_parent(*parent)) {
-                        if (grandparent->type == symbol_job_continuation) {
-                            parse_error(token1, parse_error_andor_in_pipeline, L" "
-                                /* won't be printed but must be non-empty, see
-                                    describe_with_prefix TODO clean that up */);
-                            continue;
-                        }
-                    }
-                }
-                switch (parent->type) {
-                    default:
-                        break;
-                    case symbol_job:
-                        variable_assignments =
-                            tnode_t<grammar::job>(&nodes, parent)
-                                .try_get_child<grammar::variable_assignments, 1>();
-                        break;
-                    case symbol_job_continuation:
-                        variable_assignments =
-                            tnode_t<grammar::job_continuation>(&nodes, parent)
-                                .try_get_child<grammar::variable_assignments, 2>();
-                        break;
-                    case symbol_not_statement:
-                        variable_assignments =
-                            tnode_t<grammar::not_statement>(&nodes, parent)
-                                .try_get_child<grammar::variable_assignments, 1>();
-                        break;
-                }
-            }
-            tnode_t<grammar::variable_assignment> variable_assignment;
-            tnode_t<grammar::tok_string> assignment_tok;
-            if (variable_assignments &&
-                (variable_assignment =
-                     variable_assignments.try_get_child<grammar::variable_assignment, 0>()) &&
-                (assignment_tok = variable_assignment.try_get_child<grammar::tok_string, 0>())) {
-                parse_token_t token(parse_token_type_string);
-                token.source_start = assignment_tok.source_range()->start;
-                token.source_length = assignment_tok.source_range()->length;
-                parse_error(token, parse_error_bare_variable_assignment,
-                            L" " /* won't be printed but must be non-empty, see
-                                    describe_with_prefix */
-                );
-            } else {
-                parse_error_failed_production(stack_elem, token1);
-            }
-            // The above set fatal_errored, which ends the loop.
-        } else {
-            bool is_terminate = (token1.type == parse_token_type_terminate);
-
-            // When a job_list encounters something like 'else', it returns an empty production to
-            // return control to the outer block. But if it's unbalanced, then we'll end up with an
-            // empty stack! So make sure that doesn't happen. This is the primary mechanism by which
-            // we detect e.g. unbalanced end. However, if we get a true terminate token, then we
-            // allow (expect) this to empty the stack.
-            if (symbol_stack.size() == 1 && production_is_empty(production) && !is_terminate) {
-                this->parse_error_unbalancing_token(token1);
-                break;
-            }
-
-            // Manipulate the symbol stack. Note that stack_elem is invalidated by popping the
-            // stack.
-            symbol_stack_pop_push_production(production);
-
-            // Expect to not have an empty stack, unless this was the terminate type. Note we may
-            // not have an empty stack with the terminate type (i.e. incomplete input).
-            assert(is_terminate || !symbol_stack.empty());
-
-            if (symbol_stack.empty()) {
-                break;
-            }
-        }
-    }
-}
-
 // Given an expanded string, returns any keyword it matches.
 static inline parse_keyword_t keyword_with_name(const wchar_t *name) {
     return str_to_enum(name, keyword_enum_map, keyword_enum_map_len);
@@ -1066,9 +281,6 @@ static parse_keyword_t keyword_for_token(token_type_t tok, const wcstring &token
     return result;
 }
 
-/// Placeholder invalid token.
-static constexpr parse_token_t kInvalidToken{token_type_invalid};
-
 /// Terminal token.
 static constexpr parse_token_t kTerminalToken = {parse_token_type_terminate};
 
@@ -1111,109 +323,6 @@ parse_token_t next_parse_token(tokenizer_t *tok, maybe_t<tok_t> *out_token, wcst
     return result;
 }
 
-bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags,
-                            parse_node_tree_t *output, parse_error_list_t *errors,
-                            parse_token_type_t goal) {
-    parse_ll_t parser(goal);
-    parser.set_should_generate_error_messages(errors != nullptr);
-
-    // A string whose storage we reuse.
-    wcstring storage;
-
-    // Construct the tokenizer.
-    tok_flags_t tok_options = 0;
-    if (parse_flags & parse_flag_include_comments) tok_options |= TOK_SHOW_COMMENTS;
-    if (parse_flags & parse_flag_accept_incomplete_tokens) tok_options |= TOK_ACCEPT_UNFINISHED;
-    if (parse_flags & parse_flag_show_blank_lines) tok_options |= TOK_SHOW_BLANK_LINES;
-    if (parse_flags & parse_flag_continue_after_error) tok_options |= TOK_CONTINUE_AFTER_ERROR;
-
-    tokenizer_t tok(str.c_str(), tok_options);
-
-    // We are an LL(2) parser. We pass two tokens at a time. New tokens come in at index 1. Seed our
-    // queue with an initial token at index 1.
-    parse_token_t queue[2] = {kInvalidToken, kInvalidToken};
-
-    // Loop until we have a terminal token.
-    maybe_t<tok_t> tokenizer_token{};
-    for (size_t token_count = 0; queue[0].type != parse_token_type_terminate; token_count++) {
-        // Push a new token onto the queue.
-        queue[0] = queue[1];
-        queue[1] = next_parse_token(&tok, &tokenizer_token, &storage);
-
-        // If we are leaving things unterminated, then don't pass parse_token_type_terminate.
-        if (queue[0].type == parse_token_type_terminate &&
-            (parse_flags & parse_flag_leave_unterminated)) {
-            break;
-        }
-
-        // Pass these two tokens, unless we're still loading the queue. We know that queue[0] is
-        // valid; queue[1] may be invalid.
-        if (token_count > 0) {
-            parser.accept_tokens(queue[0], queue[1]);
-        }
-
-        // Handle tokenizer errors. This is a hack because really the parser should report this for
-        // itself; but it has no way of getting the tokenizer message.
-        if (queue[1].type == parse_special_type_tokenizer_error) {
-            parser.report_tokenizer_error(*tokenizer_token);
-        }
-
-        if (!parser.has_fatal_error()) {
-            continue;
-        }
-
-        // Handle errors.
-        if (!(parse_flags & parse_flag_continue_after_error)) {
-            break;  // bail out
-        }
-        // Hack. Typically the parse error is due to the first token. However, if it's a
-        // tokenizer error, then has_fatal_error was set due to the check above; in that
-        // case the second token is what matters.
-        size_t error_token_idx = 0;
-        if (queue[1].type == parse_special_type_tokenizer_error) {
-            error_token_idx = (queue[1].type == parse_special_type_tokenizer_error ? 1 : 0);
-            token_count = -1;  // so that it will be 0 after incrementing, and our tokenizer
-                               // error will be ignored
-        }
-
-        // Mark a special error token, and then keep going.
-        parse_token_t token = {parse_special_type_parse_error};
-        token.source_start = queue[error_token_idx].source_start;
-        token.source_length = queue[error_token_idx].source_length;
-        parser.accept_tokens(token, kInvalidToken);
-        parser.reset_symbols(goal);
-    }
-
-    // Teach each node where its source range is.
-    parser.determine_node_ranges();
-
-    // Acquire the output from the parser.
-    parser.acquire_output(output, errors);
-
-    // Indicate if we had a fatal error.
-    return !parser.has_fatal_error();
-}
-
-const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which,
-                                                 parse_token_type_t expected_type) const {
-    const parse_node_t *result = nullptr;
-
-    // We may get nodes with no children if we had an incomplete parse. Don't consider than an
-    // error.
-    if (parent.child_count > 0) {
-        PARSE_ASSERT(which < parent.child_count);
-        node_offset_t child_offset = parent.child_offset(which);
-        if (child_offset < this->size()) {
-            result = &this->at(child_offset);
-
-            // If we are given an expected type, then the node must be null or that type.
-            assert(expected_type == token_type_invalid || expected_type == result->type);
-        }
-    }
-
-    return result;
-}
-
 parsed_source_t::parsed_source_t(wcstring s, ast::ast_t &&ast)
     : src(std::move(s)), ast(make_unique<ast::ast_t>(std::move(ast))) {}
 
@@ -1228,66 +337,3 @@ parsed_source_ref_t parse_source(wcstring src, parse_tree_flags_t flags,
     }
     return std::make_shared<parsed_source_t>(std::move(src), std::move(ast));
 }
-
-const parse_node_t &parse_node_tree_t::find_child(const parse_node_t &parent,
-                                                  parse_token_type_t type) const {
-    for (node_offset_t i = 0; i < parent.child_count; i++) {
-        const parse_node_t *child = this->get_child(parent, i);
-        if (child->type == type) {
-            return *child;
-        }
-    }
-    DIE("failed to find child node");
-}
-
-const parse_node_t *parse_node_tree_t::get_parent(const parse_node_t &node,
-                                                  parse_token_type_t expected_type) const {
-    const parse_node_t *result = nullptr;
-    if (node.parent != NODE_OFFSET_INVALID) {
-        PARSE_ASSERT(node.parent < this->size());
-        const parse_node_t &parent = this->at(node.parent);
-        if (expected_type == token_type_invalid || expected_type == parent.type) {
-            // The type matches (or no type was requested).
-            result = &parent;
-        }
-    }
-    return result;
-}
-
-/// Return true if the given node has the proposed ancestor as an ancestor (or is itself that
-/// ancestor).
-static bool node_has_ancestor(const parse_node_tree_t &tree, const parse_node_t &node,
-                              const parse_node_t &proposed_ancestor) {
-    if (&node == &proposed_ancestor) {
-        return true;  // found it
-    } else if (node.parent == NODE_OFFSET_INVALID) {
-        return false;  // no more parents
-    }
-
-    // Recurse to the parent.
-    return node_has_ancestor(tree, tree.at(node.parent), proposed_ancestor);
-}
-
-const parse_node_t *parse_node_tree_t::find_node_matching_source_location(
-    parse_token_type_t type, size_t source_loc, const parse_node_t *parent) const {
-    const parse_node_t *result = nullptr;
-    // Find nodes of the given type in the tree, working backwards.
-    const size_t len = this->size();
-    for (size_t idx = 0; idx < len && result == nullptr; idx++) {
-        const parse_node_t &node = this->at(idx);
-
-        // Types must match.
-        if (node.type != type) continue;
-
-        // Must contain source location.
-        if (!node.location_in_or_at_end_of_source_range(source_loc)) continue;
-
-        // If a parent is given, it must be an ancestor.
-        if (parent != nullptr && !node_has_ancestor(*this, node, *parent)) continue;
-
-        // Found it.
-        result = &node;
-    }
-
-    return result;
-}
diff --git a/src/parse_tree.h b/src/parse_tree.h
index 5e10d9e1a..6df420a29 100644
--- a/src/parse_tree.h
+++ b/src/parse_tree.h
@@ -13,15 +13,8 @@
 #include "common.h"
 #include "maybe.h"
 #include "parse_constants.h"
-#include "parse_grammar.h"
 #include "tokenizer.h"
 
-class parse_node_tree_t;
-
-typedef uint32_t node_offset_t;
-
-#define NODE_OFFSET_INVALID (static_cast<node_offset_t>(-1))
-
 typedef uint32_t source_offset_t;
 
 constexpr source_offset_t SOURCE_OFFSET_INVALID = static_cast<source_offset_t>(-1);
@@ -61,8 +54,6 @@ struct parse_token_t {
 /// Return a new parse token, advancing the tokenizer.
 parse_token_t next_parse_token(tokenizer_t *tok, maybe_t<tok_t> *out_token, wcstring *storage);
 
-wcstring parse_dump_tree(const parse_node_tree_t &nodes, const wcstring &src);
-
 const wchar_t *token_type_description(parse_token_type_t type);
 const wchar_t *keyword_description(parse_keyword_t type);
 
@@ -83,129 +74,6 @@ typedef uint8_t parse_node_flags_t;
 /// Node-type specific tag value.
 typedef uint8_t parse_node_tag_t;
 
-/// Class for nodes of a parse tree. Since there's a lot of these, the size and order of the fields
-/// is important.
-class parse_node_t {
-   public:
-    // Start in the source code.
-    source_offset_t source_start{SOURCE_OFFSET_INVALID};
-    // Length of our range in the source code.
-    source_offset_t source_length{0};
-    // Parent
-    node_offset_t parent{NODE_OFFSET_INVALID};
-    // Children
-    node_offset_t child_start{0};
-    // Number of children.
-    uint8_t child_count{0};
-    // Type of the node.
-    enum parse_token_type_t type;
-    // Keyword associated with node.
-    enum parse_keyword_t keyword { parse_keyword_t::none };
-    // Node flags.
-    parse_node_flags_t flags : 4;
-    // This is used to store e.g. the statement decoration.
-    parse_node_tag_t tag : 4;
-    // Description
-    wcstring describe() const;
-
-    // Constructor
-    explicit parse_node_t(parse_token_type_t ty) : type(ty), flags(0), tag(0) {}
-
-    node_offset_t child_offset(node_offset_t which) const {
-        PARSE_ASSERT(which < child_count);
-        return child_start + which;
-    }
-
-    /// Indicate if this node has a range of source code associated with it.
-    bool has_source() const {
-        // Should never have a nonempty range with an invalid offset.
-        assert(this->source_start != SOURCE_OFFSET_INVALID || this->source_length == 0);
-        return this->source_length > 0;
-    }
-
-    /// Indicate if the node has comment nodes.
-    bool has_comments() const { return this->flags & parse_node_flag_has_comments; }
-
-    /// Indicates if we have a preceding escaped newline.
-    bool has_preceding_escaped_newline() const {
-        return this->flags & parse_node_flag_preceding_escaped_nl;
-    }
-
-    source_range_t source_range() const {
-        assert(has_source());
-        return {source_start, source_length};
-    }
-
-    /// Gets source for the node, or the empty string if it has no source.
-    wcstring get_source(const wcstring &str) const {
-        if (!has_source())
-            return wcstring();
-        else
-            return wcstring(str, this->source_start, this->source_length);
-    }
-
-    /// Returns whether the given location is within the source range or at its end.
-    bool location_in_or_at_end_of_source_range(size_t loc) const {
-        return has_source() && source_start <= loc && loc - source_start <= source_length;
-    }
-};
-
-template <typename Type>
-class tnode_t;
-
-/// The parse tree itself.
-class parse_node_tree_t : public std::vector<parse_node_t> {
-   public:
-    parse_node_tree_t() {}
-    parse_node_tree_t(parse_node_tree_t &&) = default;
-    parse_node_tree_t &operator=(parse_node_tree_t &&) = default;
-    parse_node_tree_t(const parse_node_tree_t &) = delete;             // no copying
-    parse_node_tree_t &operator=(const parse_node_tree_t &) = delete;  // no copying
-
-    // Get the node corresponding to a child of the given node, or NULL if there is no such child.
-    // If expected_type is provided, assert that the node has that type.
-    const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which,
-                                  parse_token_type_t expected_type = token_type_invalid) const;
-
-    // Find the first direct child of the given node of the given type. asserts on failure.
-    const parse_node_t &find_child(const parse_node_t &parent, parse_token_type_t type) const;
-
-    template <typename Type>
-    tnode_t<Type> find_child(const parse_node_t &parent) const;
-
-    // Get the node corresponding to the parent of the given node, or NULL if there is no such
-    // child. If expected_type is provided, only returns the parent if it is of that type. Note the
-    // asymmetry: get_child asserts since the children are known, but get_parent does not, since the
-    // parent may not be known.
-    const parse_node_t *get_parent(const parse_node_t &node,
-                                   parse_token_type_t expected_type = token_type_invalid) const;
-
-    // Finds a node containing the given source location. If 'parent' is not NULL, it must be an
-    // ancestor.
-    const parse_node_t *find_node_matching_source_location(parse_token_type_t type,
-                                                           size_t source_loc,
-                                                           const parse_node_t *parent) const;
-    // Utilities
-
-    /// Given a node, return all of its comment nodes.
-    std::vector<tnode_t<grammar::comment>> comment_nodes_for_node(const parse_node_t &parent) const;
-
-   private:
-    template <typename Type>
-    friend class tnode_t;
-    /// Given a node list (e.g. of type symbol_job_list) and a node type (e.g. symbol_job), return
-    /// the next element of the given type in that list, and the tail (by reference). Returns NULL
-    /// if we've exhausted the list.
-    const parse_node_t *next_node_in_node_list(const parse_node_t &node_list,
-                                               parse_token_type_t entry_type,
-                                               const parse_node_t **list_tail) const;
-};
-
-/// The big entry point. Parse a string, attempting to produce a tree for the given goal type.
-bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t flags,
-                            parse_node_tree_t *output, parse_error_list_t *errors,
-                            parse_token_type_t goal = symbol_job_list);
-
 namespace ast {
 class ast_t;
 }
diff --git a/src/parse_util.cpp b/src/parse_util.cpp
index 4ee438946..7d55f1263 100644
--- a/src/parse_util.cpp
+++ b/src/parse_util.cpp
@@ -23,7 +23,6 @@
 #include "parse_constants.h"
 #include "parse_util.h"
 #include "parser.h"
-#include "tnode.h"
 #include "tokenizer.h"
 #include "wcstringutil.h"
 #include "wildcard.h"
@@ -1203,7 +1202,6 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
                                                   parse_error_list_t *out_errors,
                                                   bool allow_incomplete,
                                                   parsed_source_ref_t *out_pstree) {
-    namespace g = grammar;
     parse_error_list_t parse_errors;
 
     parser_test_error_bits_t res = 0;
diff --git a/src/parse_util.h b/src/parse_util.h
index fd348ab9b..ba3b353e4 100644
--- a/src/parse_util.h
+++ b/src/parse_util.h
@@ -131,7 +131,6 @@ std::vector<int> parse_util_compute_indents(const wcstring &src);
 /// incomplete (e.g. an unclosed quote), an error is not returned and the PARSER_TEST_INCOMPLETE bit
 /// is set in the return value. If allow_incomplete is not set, then incomplete strings result in an
 /// error. If out_pstree is not NULL, the resulting tree is returned by reference.
-class parse_node_tree_t;
 parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
                                                   parse_error_list_t *out_errors = nullptr,
                                                   bool allow_incomplete = true,
diff --git a/src/parser.cpp b/src/parser.cpp
index 12f670548..caa627680 100644
--- a/src/parser.cpp
+++ b/src/parser.cpp
@@ -26,7 +26,6 @@
 #include "proc.h"
 #include "reader.h"
 #include "sanity.h"
-#include "tnode.h"
 #include "wutil.h"  // IWYU pragma: keep
 
 class io_chain_t;
diff --git a/src/proc.h b/src/proc.h
index c17dc216a..a702547a1 100644
--- a/src/proc.h
+++ b/src/proc.h
@@ -21,7 +21,6 @@
 #include "global_safety.h"
 #include "io.h"
 #include "parse_tree.h"
-#include "tnode.h"
 #include "topic_monitor.h"
 
 /// Types of processes.
diff --git a/src/reader.cpp b/src/reader.cpp
index 063e3b379..d9ef77452 100644
--- a/src/reader.cpp
+++ b/src/reader.cpp
@@ -75,7 +75,6 @@
 #include "screen.h"
 #include "signal.h"
 #include "termsize.h"
-#include "tnode.h"
 #include "tokenizer.h"
 #include "wutil.h"  // IWYU pragma: keep
 
diff --git a/src/tnode.cpp b/src/tnode.cpp
deleted file mode 100644
index ed64dcda5..000000000
--- a/src/tnode.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-#include "tnode.h"
-
-const parse_node_t *parse_node_tree_t::next_node_in_node_list(
-    const parse_node_t &node_list, parse_token_type_t entry_type,
-    const parse_node_t **out_list_tail) const {
-    parse_token_type_t list_type = node_list.type;
-
-    // Paranoia - it doesn't make sense for a list type to contain itself.
-    assert(list_type != entry_type);
-
-    const parse_node_t *list_cursor = &node_list;
-    const parse_node_t *list_entry = nullptr;
-
-    // Loop while we don't have an item but do have a list. Note that some nodes may contain
-    // nothing; e.g. job_list contains blank lines as a production.
-    while (list_entry == nullptr && list_cursor != nullptr) {
-        const parse_node_t *next_cursor = nullptr;
-
-        // Walk through the children.
-        for (node_offset_t i = 0; i < list_cursor->child_count; i++) {
-            const parse_node_t *child = this->get_child(*list_cursor, i);
-            if (child->type == entry_type) {
-                // This is the list entry.
-                list_entry = child;
-            } else if (child->type == list_type) {
-                // This is the next in the list.
-                next_cursor = child;
-            }
-        }
-        // Go to the next entry, even if it's NULL.
-        list_cursor = next_cursor;
-    }
-
-    // Return what we got.
-    assert(list_cursor == nullptr || list_cursor->type == list_type);
-    assert(list_entry == nullptr || list_entry->type == entry_type);
-    if (out_list_tail != nullptr) *out_list_tail = list_cursor;
-    return list_entry;
-}
-
-enum parse_statement_decoration_t get_decoration(tnode_t<grammar::plain_statement> stmt) {
-    parse_statement_decoration_t decoration = parse_statement_decoration_none;
-    if (auto decorated_statement = stmt.try_get_parent<grammar::decorated_statement>()) {
-        decoration = static_cast<parse_statement_decoration_t>(decorated_statement.tag());
-    }
-    return decoration;
-}
-
-enum parse_job_decoration_t bool_statement_type(tnode_t<grammar::job_decorator> stmt) {
-    return static_cast<parse_job_decoration_t>(stmt.tag());
-}
-
-enum parse_job_decoration_t bool_statement_type(
-    tnode_t<grammar::job_conjunction_continuation> cont) {
-    return static_cast<parse_job_decoration_t>(cont.tag());
-}
-
-maybe_t<pipe_or_redir_t> redirection_for_node(tnode_t<grammar::redirection> redirection,
-                                              const wcstring &src, wcstring *out_target) {
-    assert(redirection && "redirection is missing");
-    tnode_t<grammar::tok_redirection> prim = redirection.child<0>();  // like 2>
-    assert(prim && "expected to have primitive");
-
-    maybe_t<pipe_or_redir_t> result{};
-    if (prim.has_source()) {
-        result = pipe_or_redir_t::from_string(prim.get_source(src));
-        assert(result.has_value() && "Failed to parse valid redirection");
-        assert(!result->is_pipe && "Should not be a pipe");
-    }
-    if (out_target != nullptr) {
-        tnode_t<grammar::tok_string> target = redirection.child<1>();  // like 1 or file path
-        *out_target = target.has_source() ? target.get_source(src) : wcstring();
-    }
-    return result;
-}
-
-std::vector<tnode_t<grammar::comment>> parse_node_tree_t::comment_nodes_for_node(
-    const parse_node_t &parent) const {
-    std::vector<tnode_t<grammar::comment>> result;
-    if (parent.has_comments()) {
-        // Walk all our nodes, looking for comment nodes that have the given node as a parent.
-        for (size_t i = 0; i < this->size(); i++) {
-            const parse_node_t &potential_comment = this->at(i);
-            if (potential_comment.type == parse_special_type_comment &&
-                this->get_parent(potential_comment) == &parent) {
-                result.emplace_back(this, &potential_comment);
-            }
-        }
-    }
-    return result;
-}
-
-variable_assignment_node_list_t get_variable_assignment_nodes(
-    tnode_t<grammar::variable_assignments> list, size_t max) {
-    return list.descendants<grammar::variable_assignment>(max);
-}
-
-maybe_t<wcstring> command_for_plain_statement(tnode_t<grammar::plain_statement> stmt,
-                                              const wcstring &src) {
-    tnode_t<grammar::tok_string> cmd = stmt.child<0>();
-    if (cmd && cmd.has_source()) {
-        return cmd.get_source(src);
-    }
-    return none();
-}
-
-arguments_node_list_t get_argument_nodes(tnode_t<grammar::argument_list> list, size_t max) {
-    return list.descendants<grammar::argument>(max);
-}
-
-arguments_node_list_t get_argument_nodes(tnode_t<grammar::arguments_or_redirections_list> list,
-                                         size_t max) {
-    return list.descendants<grammar::argument>(max);
-}
-
-bool job_node_is_background(tnode_t<grammar::job> job) {
-    tnode_t<grammar::optional_background> bg = job.child<4>();
-    return bg.tag() == parse_background;
-}
-
-parse_job_decoration_t get_decorator(tnode_t<grammar::job_conjunction> conj) {
-    using namespace grammar;
-    tnode_t<job_decorator> dec;
-    // We have two possible parents: job_list and andor_job_list.
-    if (auto p = conj.try_get_parent<job_list>()) {
-        dec = p.require_get_child<job_decorator, 0>();
-    } else if (auto p = conj.try_get_parent<andor_job_list>()) {
-        dec = p.require_get_child<job_decorator, 0>();
-    }
-    // note this returns 0 (none) if dec is empty.
-    return bool_statement_type(dec);
-}
-
-pipeline_position_t get_pipeline_position(tnode_t<grammar::statement> st) {
-    using namespace grammar;
-    if (!st) {
-        return pipeline_position_t::none;
-    }
-
-    // If we're part of a job continuation, we're definitely in a pipeline.
-    if (st.try_get_parent<job_continuation>()) {
-        return pipeline_position_t::subsequent;
-    }
-
-    // Check if we're the beginning of a job, and if so, whether that job
-    // has a non-empty continuation.
-    tnode_t<job_continuation> jc = st.try_get_parent<job>().child<3>();
-    if (jc.try_get_child<statement, 3>()) {
-        return pipeline_position_t::first;
-    }
-    return pipeline_position_t::none;
-}
diff --git a/src/tnode.h b/src/tnode.h
deleted file mode 100644
index ef2f59a1f..000000000
--- a/src/tnode.h
+++ /dev/null
@@ -1,278 +0,0 @@
-// Type-safe access to fish parse trees.
-#ifndef FISH_TNODE_H
-#define FISH_TNODE_H
-
-#include "parse_grammar.h"
-#include "parse_tree.h"
-
-// Check if a child type is possible for a parent type at a given index.
-template <typename Parent, typename Child, size_t Index>
-constexpr bool child_type_possible_at_index() {
-    return Parent::template type_possible<Child, Index>();
-}
-
-// Check if a child type is possible for a parent type at any index.
-// The number of cases here should match MAX_PRODUCTION_LENGTH.
-template <typename Parent, typename Child>
-constexpr bool child_type_possible() {
-    return child_type_possible_at_index<Parent, Child, 0>() ||
-           child_type_possible_at_index<Parent, Child, 1>() ||
-           child_type_possible_at_index<Parent, Child, 2>() ||
-           child_type_possible_at_index<Parent, Child, 3>() ||
-           child_type_possible_at_index<Parent, Child, 4>() ||
-           child_type_possible_at_index<Parent, Child, 5>();
-}
-
-/// tnode_t ("typed node") is type-safe access to a parse_tree. A tnode_t holds both a pointer to a
-/// parse_node_tree_t and a pointer to a parse_node_t. (Note that the parse_node_tree_t is unowned;
-/// the caller must ensure that the tnode does not outlive the tree.
-///
-/// tnode_t is a lightweight value-type class. It ought to be passed by value. A tnode_t may also be
-/// "missing", associated with a null parse_node_t pointer. operator bool() may be used to check if
-/// a tnode_t is misisng.
-///
-/// A tnode_t is parametrized by a grammar element, and uses the fish grammar to statically
-/// type-check accesses to children and parents. Any particular tnode either corresponds to a
-/// sequence (a single child) or an alternation (multiple possible children). A sequence may have
-/// its children accessed directly via child(), which is templated on the index  (and returns a
-/// tnode of the proper type). Alternations may be disambiguated via try_get_child(), which returns
-/// an empty child if the child has the wrong type, or require_get_child() which aborts if the child
-/// has the wrong type.
-template <typename Type>
-class tnode_t {
-    /// The tree containing our node.
-    const parse_node_tree_t *tree = nullptr;
-
-    /// The node in the tree
-    const parse_node_t *nodeptr = nullptr;
-
-    // Helper to get a child type at a given index.
-    template <class Element, uint32_t Index>
-    using child_at = typename std::tuple_element<Index, typename Element::type_tuple>::type;
-
-   public:
-    tnode_t() = default;
-
-    tnode_t(const parse_node_tree_t *t, const parse_node_t *n) : tree(t), nodeptr(n) {
-        assert(t && "tree cannot be null in this constructor");
-        assert((!n || n->type == Type::token) && "node has wrong type");
-    }
-
-    // Try to create a tnode from the given tree and parse node.
-    // Returns an empty node if the parse node is null, or has the wrong type.
-    static tnode_t try_create(const parse_node_tree_t *tree, const parse_node_t *node) {
-        assert(tree && "tree cannot be null");
-        return tnode_t(tree, node && node->type == Type::token ? node : nullptr);
-    }
-
-    /// Temporary conversion to parse_node_t to assist in migration.
-    /* implicit */ operator const parse_node_t &() const {
-        assert(nodeptr && "Empty tnode_t");
-        return *nodeptr;
-    }
-
-    /* implicit */ operator const parse_node_t *() const { return nodeptr; }
-
-    /// \return the underlying (type-erased) node.
-    const parse_node_t *node() const { return nodeptr; }
-
-    /// Check whether we're populated.
-    explicit operator bool() const { return nodeptr != nullptr; }
-
-    bool operator==(const tnode_t &rhs) const { return tree == rhs.tree && nodeptr == rhs.nodeptr; }
-
-    bool operator!=(const tnode_t &rhs) const { return !(*this == rhs); }
-
-    // Helper to return whether the given tree is the same as ours.
-    bool matches_node_tree(const parse_node_tree_t &t) const { return &t == tree; }
-
-    const parse_node_tree_t *get_tree() const { return tree; }
-
-    bool has_source() const { return nodeptr && nodeptr->has_source(); }
-
-    // return the tag, or 0 if missing.
-    parse_node_tag_t tag() const { return nodeptr ? nodeptr->tag : 0; }
-
-    // return the number of children, or 0 if missing.
-    uint8_t child_count() const { return nodeptr ? nodeptr->child_count : 0; }
-
-    maybe_t<source_range_t> source_range() const {
-        if (!nodeptr || nodeptr->source_start == NODE_OFFSET_INVALID) return none();
-        return source_range_t{nodeptr->source_start, nodeptr->source_length};
-    }
-
-    wcstring get_source(const wcstring &str) const {
-        if (!nodeptr) {
-            return L"";
-        }
-        return nodeptr->get_source(str);
-    }
-
-    bool location_in_or_at_end_of_source_range(size_t loc) const {
-        return nodeptr && nodeptr->location_in_or_at_end_of_source_range(loc);
-    }
-
-    static tnode_t find_node_matching_source_location(const parse_node_tree_t *tree,
-                                                      size_t source_loc,
-                                                      const parse_node_t *parent) {
-        assert(tree && "null tree");
-        return tnode_t{tree,
-                       tree->find_node_matching_source_location(Type::token, source_loc, parent)};
-    }
-
-    /// Type-safe access to a child at the given index.
-    template <node_offset_t Index>
-    tnode_t<child_at<Type, Index>> child() const {
-        using child_type = child_at<Type, Index>;
-        const parse_node_t *child = nullptr;
-        if (nodeptr) child = tree->get_child(*nodeptr, Index, child_type::token);
-        return tnode_t<child_type>{tree, child};
-    }
-
-    /// Return a parse_node_t for a child.
-    /// This is used to disambiguate alts.
-    template <node_offset_t Index>
-    const parse_node_t &get_child_node() const {
-        assert(nodeptr && "receiver is missing in get_child_node");
-        return *tree->get_child(*nodeptr, Index);
-    }
-
-    /// If the child at the given index has the given type, return it; otherwise return an empty
-    /// child. Note this will refuse to compile if the child type is not possible.
-    /// This is used for e.g. alternations.
-    template <class ChildType, node_offset_t Index>
-    tnode_t<ChildType> try_get_child() const {
-        static_assert(child_type_possible_at_index<Type, ChildType, Index>(),
-                      "Cannot contain a child of this type");
-        const parse_node_t *child = nullptr;
-        if (nodeptr) child = tree->get_child(*nodeptr, Index);
-        if (child && child->type == ChildType::token) return {tree, child};
-        return {tree, nullptr};
-    }
-
-    /// assert that this is not empty and that the child at index Index has the given type, then
-    /// return that child. Note this will refuse to compile if the child type is not possible.
-    template <class ChildType, node_offset_t Index>
-    tnode_t<ChildType> require_get_child() const {
-        assert(nodeptr && "receiver is missing in require_get_child()");
-        auto result = try_get_child<ChildType, Index>();
-        assert(result && "require_get_child(): wrong child type");
-        return result;
-    }
-
-    /// Find the first direct child of the given node of the given type. asserts on failure.
-    template <class ChildType>
-    tnode_t<ChildType> find_child() const {
-        static_assert(child_type_possible<Type, ChildType>(), "Cannot have that type as a child");
-        assert(nodeptr && "receiver is missing in find_child()");
-        tnode_t<ChildType> result{tree, &tree->find_child(*nodeptr, ChildType::token)};
-        assert(result && "cannot find child");
-        return result;
-    }
-
-    /// Type-safe access to a node's parent.
-    /// If the parent exists and has type ParentType, return it.
-    /// Otherwise return a missing tnode.
-    template <class ParentType>
-    tnode_t<ParentType> try_get_parent() const {
-        static_assert(child_type_possible<ParentType, Type>(), "Parent cannot have us as a child");
-        if (!nodeptr) return {};
-        return {tree, tree->get_parent(*nodeptr, ParentType::token)};
-    }
-
-    /// Finds all descendants (up to max_count) under this node of the given type.
-    template <typename DescendantType>
-    std::vector<tnode_t<DescendantType>> descendants(size_t max_count = -1) const {
-        if (!nodeptr) return {};
-        std::vector<tnode_t<DescendantType>> result;
-        std::vector<const parse_node_t *> stack{nodeptr};
-        while (!stack.empty() && result.size() < max_count) {
-            const parse_node_t *node = stack.back();
-            if (node->type == DescendantType::token) result.emplace_back(tree, node);
-            stack.pop_back();
-            node_offset_t index = node->child_count;
-            while (index--) {
-                stack.push_back(tree->get_child(*node, index));
-            }
-        }
-        return result;
-    }
-
-    /// Given that we are a list type, \return the next node of some Item in some node list,
-    /// adjusting 'this' to be the remainder of the list.
-    /// Returns an empty item on failure.
-    template <class ItemType>
-    tnode_t<ItemType> next_in_list() {
-        // We require that we can contain ourselves, and ItemType as well.
-        static_assert(child_type_possible<Type, Type>(), "Is not a list");
-        static_assert(child_type_possible<Type, ItemType>(), "Is not a list of that type");
-        if (!nodeptr) return {tree, nullptr};
-        const parse_node_t *next =
-            tree->next_node_in_node_list(*nodeptr, ItemType::token, &nodeptr);
-        return {tree, next};
-    }
-};
-
-template <typename Type>
-tnode_t<Type> parse_node_tree_t::find_child(const parse_node_t &parent) const {
-    return tnode_t<Type>(this, &this->find_child(parent, Type::token));
-}
-
-/// Return the arguments under an arguments_list or arguments_or_redirection_list
-/// Do not return more than max.
-using variable_assignment_node_list_t = std::vector<tnode_t<grammar::variable_assignment>>;
-variable_assignment_node_list_t get_variable_assignment_nodes(
-    tnode_t<grammar::variable_assignments>, size_t max = -1);
-
-/// Given a plain statement, get the command from the child node. Returns the command string on
-/// success, none on failure.
-maybe_t<wcstring> command_for_plain_statement(tnode_t<grammar::plain_statement> stmt,
-                                              const wcstring &src);
-
-/// Return the decoration for a plain statement.
-parse_statement_decoration_t get_decoration(tnode_t<grammar::plain_statement> stmt);
-
-/// Return the type for a boolean statement.
-parse_job_decoration_t bool_statement_type(tnode_t<grammar::job_decorator> stmt);
-
-parse_job_decoration_t bool_statement_type(tnode_t<grammar::job_conjunction_continuation> cont);
-
-/// Given a redirection node, get the parsed redirection and target of the redirection (file path,
-/// or fd).
-maybe_t<pipe_or_redir_t> redirection_for_node(tnode_t<grammar::redirection> redirection,
-                                              const wcstring &src, wcstring *out_target);
-
-/// Return the arguments under an arguments_list or arguments_or_redirection_list
-/// Do not return more than max.
-using arguments_node_list_t = std::vector<tnode_t<grammar::argument>>;
-arguments_node_list_t get_argument_nodes(tnode_t<grammar::argument_list>, size_t max = -1);
-arguments_node_list_t get_argument_nodes(tnode_t<grammar::arguments_or_redirections_list>,
-                                         size_t max = -1);
-
-/// Return whether the given job is background because it has a & symbol.
-bool job_node_is_background(tnode_t<grammar::job>);
-
-/// If the conjunction is has a decorator (and/or), return it; otherwise return none. This only
-/// considers the leading conjunction, e.g. in `and true || false` only the 'true' conjunction will
-/// return 'and'.
-parse_job_decoration_t get_decorator(tnode_t<grammar::job_conjunction>);
-
-/// Return whether the statement is part of a pipeline.
-/// This doesn't detect e.g. pipelines involving our parent's block statements.
-enum class pipeline_position_t {
-    none,       // not part of a pipeline
-    first,      // first command in a pipeline
-    subsequent  // second or further command in a pipeline
-};
-pipeline_position_t get_pipeline_position(tnode_t<grammar::statement> st);
-
-/// Check whether an argument_list is a root list.
-inline bool argument_list_is_root(tnode_t<grammar::argument_list> list) {
-    return !list.try_get_parent<grammar::argument_list>();
-}
-
-inline bool argument_list_is_root(tnode_t<grammar::arguments_or_redirections_list> list) {
-    return !list.try_get_parent<grammar::arguments_or_redirections_list>();
-}
-
-#endif