Clean up some tokenization

Remove TOK_NONE Turn token_type into an enum class Make next() turn a maybe_t<tok_t> instead of a bool
2026-05-23 04:51:16 -03:00 · 2019-10-13 16:06:16 -07:00
parent 82eca4bc86
commit 1a65e18ba8
9 changed files with 210 additions and 201 deletions
--- a/src/builtin_commandline.cpp
+++ b/src/builtin_commandline.cpp
@@ -100,12 +100,11 @@ static void write_part(const wchar_t *begin, const wchar_t *end, int cut_at_curs
        wcstring out;
        wcstring buff(begin, end - begin);
        tokenizer_t tok(buff.c_str(), TOK_ACCEPT_UNFINISHED);
-        tok_t token;
-        while (tok.next(&token)) {
-            if ((cut_at_cursor) && (token.offset + token.length >= pos)) break;
+        while (auto token = tok.next()) {
+            if ((cut_at_cursor) && (token->offset + token->length >= pos)) break;

-            if (token.type == TOK_STRING) {
-                wcstring tmp = tok.text_of(token);
+            if (token->type == token_type_t::string) {
+                wcstring tmp = tok.text_of(*token);
                unescape_string_in_place(&tmp, UNESCAPE_INCOMPLETE);
                out.append(tmp);
                out.push_back(L'\n');
--- a/src/fish_test_helper.cpp
+++ b/src/fish_test_helper.cpp
@@ -39,6 +39,12 @@ static void sigint_parent() {
    fprintf(stderr, "Sent SIGINT to %d\n", parent);
 }

+static void print_stdout_stderr() {
+    fprintf(stdout, "stdout\n");
+    fprintf(stderr, "stderr\n");
+    fflush(nullptr);
+}
+
 int main(int argc, char *argv[]) {
    if (argc <= 1) {
        fprintf(stderr, "No commands given.\n");
@@ -51,6 +57,8 @@ int main(int argc, char *argv[]) {
            report_foreground();
        } else if (!strcmp(argv[i], "sigint_parent")) {
            sigint_parent();
+        } else if (!strcmp(argv[i], "print_stdout_stderr")) {
+            print_stdout_stderr();
        } else {
            fprintf(stderr, "%s: Unknown command: %s\n", argv[0], argv[i]);
            return EXIT_FAILURE;
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@@ -564,29 +564,27 @@ static void test_convert_nulls() {
 /// Test the tokenizer.
 static void test_tokenizer() {
    say(L"Testing tokenizer");
-    tok_t token;
-
    {
-        bool got = false;
        const wchar_t *str = L"alpha beta";
        tokenizer_t t(str, 0);
+        maybe_t<tok_t> token{};

-        got = t.next(&token);  // alpha
-        do_test(got);
-        do_test(token.type == TOK_STRING);
-        do_test(token.offset == 0);
-        do_test(token.length == 5);
-        do_test(t.text_of(token) == L"alpha");
+        token = t.next();  // alpha
+        do_test(token.has_value());
+        do_test(token->type == token_type_t::string);
+        do_test(token->offset == 0);
+        do_test(token->length == 5);
+        do_test(t.text_of(*token) == L"alpha");

-        got = t.next(&token);  // beta
-        do_test(got);
-        do_test(token.type == TOK_STRING);
-        do_test(token.offset == 6);
-        do_test(token.length == 4);
-        do_test(t.text_of(token) == L"beta");
+        token = t.next();  // beta
+        do_test(token.has_value());
+        do_test(token->type == token_type_t::string);
+        do_test(token->offset == 6);
+        do_test(token->length == 4);
+        do_test(t.text_of(*token) == L"beta");

-        got = t.next(&token);
-        do_test(!got);
+        token = t.next();
+        do_test(!token.has_value());
    }

    const wchar_t *str =
@@ -595,30 +593,31 @@ static void test_tokenizer() {
        L"&&& ||| "
        L"&& || & |"
        L"Compress_Newlines\n  \n\t\n   \nInto_Just_One";
-    const int types[] = {TOK_STRING, TOK_REDIRECT,   TOK_STRING,   TOK_REDIRECT, TOK_STRING,
-                         TOK_STRING, TOK_STRING,     TOK_REDIRECT, TOK_REDIRECT, TOK_STRING,
-                         TOK_ANDAND, TOK_BACKGROUND, TOK_OROR,     TOK_PIPE,     TOK_ANDAND,
-                         TOK_OROR,   TOK_BACKGROUND, TOK_PIPE,     TOK_STRING,   TOK_END,
-                         TOK_STRING};
+    using tt = token_type_t;
+    const token_type_t types[] = {
+        tt::string, tt::redirect, tt::string,   tt::redirect, tt::string,     tt::string,
+        tt::string, tt::redirect, tt::redirect, tt::string,   tt::andand,     tt::background,
+        tt::oror,   tt::pipe,     tt::andand,   tt::oror,     tt::background, tt::pipe,
+        tt::string, tt::end,      tt::string};

    say(L"Test correct tokenization");

    {
        tokenizer_t t(str, 0);
        size_t i = 0;
-        while (t.next(&token)) {
+        while (auto token = t.next()) {
            if (i >= sizeof types / sizeof *types) {
                err(L"Too many tokens returned from tokenizer");
-                std::fwprintf(stdout, L"Got excess token type %ld\n", (long)token.type);
+                std::fwprintf(stdout, L"Got excess token type %ld\n", (long)token->type);
                break;
            }
-            if (types[i] != token.type) {
+            if (types[i] != token->type) {
                err(L"Tokenization error:");
                std::fwprintf(
                    stdout,
                    L"Token number %zu of string \n'%ls'\n, expected type %ld, got token type "
                    L"%ld\n",
-                    i + 1, str, (long)types[i], (long)token.type);
+                    i + 1, str, (long)types[i], (long)token->type);
            }
            i++;
        }
@@ -630,37 +629,44 @@ static void test_tokenizer() {
    // Test some errors.
    {
        tokenizer_t t(L"abc\\", 0);
-        do_test(t.next(&token));
-        do_test(token.type == TOK_ERROR);
-        do_test(token.error == tokenizer_error_t::unterminated_escape);
-        do_test(token.error_offset == 3);
+        auto token = t.next();
+        do_test(token.has_value());
+        do_test(token->type == token_type_t::error);
+        do_test(token->error == tokenizer_error_t::unterminated_escape);
+        do_test(token->error_offset == 3);
    }

    {
        tokenizer_t t(L"abc )defg(hij", 0);
-        do_test(t.next(&token));
-        do_test(t.next(&token));
-        do_test(token.type == TOK_ERROR);
-        do_test(token.error == tokenizer_error_t::closing_unopened_subshell);
-        do_test(token.error_offset == 4);
+        auto token = t.next();
+        do_test(token.has_value());
+        token = t.next();
+        do_test(token.has_value());
+        do_test(token->type == token_type_t::error);
+        do_test(token->error == tokenizer_error_t::closing_unopened_subshell);
+        do_test(token->error_offset == 4);
    }

    {
        tokenizer_t t(L"abc defg(hij (klm)", 0);
-        do_test(t.next(&token));
-        do_test(t.next(&token));
-        do_test(token.type == TOK_ERROR);
-        do_test(token.error == tokenizer_error_t::unterminated_subshell);
-        do_test(token.error_offset == 4);
+        auto token = t.next();
+        do_test(token.has_value());
+        token = t.next();
+        do_test(token.has_value());
+        do_test(token->type == token_type_t::error);
+        do_test(token->error == tokenizer_error_t::unterminated_subshell);
+        do_test(token->error_offset == 4);
    }

    {
        tokenizer_t t(L"abc defg[hij (klm)", 0);
-        do_test(t.next(&token));
-        do_test(t.next(&token));
-        do_test(token.type == TOK_ERROR);
-        do_test(token.error == tokenizer_error_t::unterminated_slice);
-        do_test(token.error_offset == 4);
+        auto token = t.next();
+        do_test(token.has_value());
+        token = t.next();
+        do_test(token.has_value());
+        do_test(token->type == token_type_t::error);
+        do_test(token->error == tokenizer_error_t::unterminated_slice);
+        do_test(token->error_offset == 4);
    }

    // Test redirection_type_for_string.
--- a/src/parse_tree.cpp
+++ b/src/parse_tree.cpp
@@ -18,6 +18,7 @@
 #include "flog.h"
 #include "parse_constants.h"
 #include "parse_productions.h"
+#include "parse_tree.h"
 #include "proc.h"
 #include "tnode.h"
 #include "tokenizer.h"
@@ -235,28 +236,25 @@ wcstring parse_token_t::user_presentable_description() const {

 /// Convert from tokenizer_t's token type to a parse_token_t type.
 static inline parse_token_type_t parse_token_type_from_tokenizer_token(
-    enum token_type tokenizer_token_type) {
+    enum token_type_t tokenizer_token_type) {
    switch (tokenizer_token_type) {
-        case TOK_NONE:
-            DIE("TOK_NONE passed to parse_token_type_from_tokenizer_token");
-            return token_type_invalid;
-        case TOK_STRING:
+        case token_type_t::string:
            return parse_token_type_string;
-        case TOK_PIPE:
+        case token_type_t::pipe:
            return parse_token_type_pipe;
-        case TOK_ANDAND:
+        case token_type_t::andand:
            return parse_token_type_andand;
-        case TOK_OROR:
+        case token_type_t::oror:
            return parse_token_type_oror;
-        case TOK_END:
+        case token_type_t::end:
            return parse_token_type_end;
-        case TOK_BACKGROUND:
+        case token_type_t::background:
            return parse_token_type_background;
-        case TOK_REDIRECT:
+        case token_type_t::redirect:
            return parse_token_type_redirection;
-        case TOK_ERROR:
+        case token_type_t::error:
            return parse_special_type_tokenizer_error;
-        case TOK_COMMENT:
+        case token_type_t::comment:
            return parse_special_type_comment;
    }
    FLOGF(error, L"Bad token type %d passed to %s", (int)tokenizer_token_type, __FUNCTION__);
@@ -960,9 +958,9 @@ static bool is_keyword_char(wchar_t c) {
 }

 /// Given a token, returns the keyword it matches, or parse_keyword_none.
-static parse_keyword_t keyword_for_token(token_type tok, const wcstring &token) {
+static parse_keyword_t keyword_for_token(token_type_t tok, const wcstring &token) {
    /* Only strings can be keywords */
-    if (tok != TOK_STRING) {
+    if (tok != token_type_t::string) {
        return parse_keyword_none;
    }

@@ -1009,32 +1007,35 @@ static inline bool is_help_argument(const wcstring &txt) {
 }

 /// Return a new parse token, advancing the tokenizer.
-static inline parse_token_t next_parse_token(tokenizer_t *tok, tok_t *token, wcstring *storage) {
-    if (!tok->next(token)) {
+static inline parse_token_t next_parse_token(tokenizer_t *tok, maybe_t<tok_t> *out_token,
+                                             wcstring *storage) {
+    *out_token = tok->next();
+    if (!out_token->has_value()) {
        return kTerminalToken;
    }
+    const tok_t &token = **out_token;

    // Set the type, keyword, and whether there's a dash prefix. Note that this is quite sketchy,
    // because it ignores quotes. This is the historical behavior. For example, `builtin --names`
    // lists builtins, but `builtin "--names"` attempts to run --names as a command. Amazingly as of
    // this writing (10/12/13) nobody seems to have noticed this. Squint at it really hard and it
    // even starts to look like a feature.
-    parse_token_t result{parse_token_type_from_tokenizer_token(token->type)};
-    const wcstring &text = tok->copy_text_of(*token, storage);
-    result.keyword = keyword_for_token(token->type, text);
+    parse_token_t result{parse_token_type_from_tokenizer_token(token.type)};
+    const wcstring &text = tok->copy_text_of(token, storage);
+    result.keyword = keyword_for_token(token.type, text);
    result.has_dash_prefix = !text.empty() && text.at(0) == L'-';
    result.is_help_argument = result.has_dash_prefix && is_help_argument(text);
    result.is_newline = (result.type == parse_token_type_end && text == L"\n");
-    result.preceding_escaped_nl = token->preceding_escaped_nl;
+    result.preceding_escaped_nl = token.preceding_escaped_nl;

    // These assertions are totally bogus. Basically our tokenizer works in size_t but we work in
    // uint32_t to save some space. If we have a source file larger than 4 GB, we'll probably just
    // crash.
-    assert(token->offset < SOURCE_OFFSET_INVALID);
-    result.source_start = (source_offset_t)token->offset;
+    assert(token.offset < SOURCE_OFFSET_INVALID);
+    result.source_start = (source_offset_t)token.offset;

-    assert(token->length <= SOURCE_OFFSET_INVALID);
-    result.source_length = (source_offset_t)token->length;
+    assert(token.length <= SOURCE_OFFSET_INVALID);
+    result.source_length = (source_offset_t)token.length;

    return result;
 }
@@ -1063,7 +1064,7 @@ bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags,
    parse_token_t queue[2] = {kInvalidToken, kInvalidToken};

    // Loop until we have a terminal token.
-    tok_t tokenizer_token;
+    maybe_t<tok_t> tokenizer_token{};
    for (size_t token_count = 0; queue[0].type != parse_token_type_terminate; token_count++) {
        // Push a new token onto the queue.
        queue[0] = queue[1];
@@ -1084,7 +1085,7 @@ bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags,
        // Handle tokenizer errors. This is a hack because really the parser should report this for
        // itself; but it has no way of getting the tokenizer message.
        if (queue[1].type == parse_special_type_tokenizer_error) {
-            parser.report_tokenizer_error(tokenizer_token);
+            parser.report_tokenizer_error(*tokenizer_token);
        }

        if (!parser.has_fatal_error()) {
--- a/src/parse_util.cpp
+++ b/src/parse_util.cpp
@@ -20,6 +20,7 @@
 #include "fallback.h"  // IWYU pragma: keep
 #include "future_feature_flags.h"
 #include "parse_constants.h"
+#include "parse_util.h"
 #include "parser.h"
 #include "tnode.h"
 #include "tokenizer.h"
@@ -310,32 +311,32 @@ static void job_or_process_extent(const wchar_t *buff, size_t cursor_pos, const
    assert(buffcpy != NULL);

    tokenizer_t tok(buffcpy, TOK_ACCEPT_UNFINISHED);
-    tok_t token;
-    while (tok.next(&token) && !finished) {
-        size_t tok_begin = token.offset;
+    for (maybe_t<tok_t> token = tok.next(); token && !finished; token = tok.next())
+        while ((token = tok.next()) && !finished) {
+            size_t tok_begin = token->offset;

-        switch (token.type) {
-            case TOK_PIPE: {
-                if (!process) {
+            switch (token->type) {
+                case token_type_t::pipe: {
+                    if (!process) {
+                        break;
+                    }
+                }
+                /* FALLTHROUGH */
+                case token_type_t::end:
+                case token_type_t::background: {
+                    if (tok_begin >= pos) {
+                        finished = 1;
+                        if (b) *b = (wchar_t *)begin + tok_begin;
+                    } else {
+                        if (a) *a = (wchar_t *)begin + tok_begin + 1;
+                    }
+                    break;
+                }
+                default: {
                    break;
                }
            }
-            /* FALLTHROUGH */
-            case TOK_END:
-            case TOK_BACKGROUND: {
-                if (tok_begin >= pos) {
-                    finished = 1;
-                    if (b) *b = (wchar_t *)begin + tok_begin;
-                } else {
-                    if (a) *a = (wchar_t *)begin + tok_begin + 1;
-                }
-                break;
-            }
-            default: {
-                break;
-            }
        }
-    }

    free(buffcpy);
 }
@@ -380,14 +381,13 @@ void parse_util_token_extent(const wchar_t *buff, size_t cursor_pos, const wchar
    const wcstring buffcpy = wcstring(cmdsubst_begin, cmdsubst_end - cmdsubst_begin);

    tokenizer_t tok(buffcpy.c_str(), TOK_ACCEPT_UNFINISHED);
-    tok_t token;
-    while (tok.next(&token)) {
-        size_t tok_begin = token.offset;
+    while (maybe_t<tok_t> token = tok.next()) {
+        size_t tok_begin = token->offset;
        size_t tok_end = tok_begin;

        // Calculate end of token.
-        if (token.type == TOK_STRING) {
-            tok_end += token.length;
+        if (token->type == token_type_t::string) {
+            tok_end += token->length;
        }

        // Cursor was before beginning of this token, means that the cursor is between two tokens,
@@ -399,16 +399,16 @@ void parse_util_token_extent(const wchar_t *buff, size_t cursor_pos, const wchar

        // If cursor is inside the token, this is the token we are looking for. If so, set a and b
        // and break.
-        if (token.type == TOK_STRING && tok_end >= offset_within_cmdsubst) {
-            a = cmdsubst_begin + token.offset;
-            b = a + token.length;
+        if (token->type == token_type_t::string && tok_end >= offset_within_cmdsubst) {
+            a = cmdsubst_begin + token->offset;
+            b = a + token->length;
            break;
        }

        // Remember previous string token.
-        if (token.type == TOK_STRING) {
-            pa = cmdsubst_begin + token.offset;
-            pb = pa + token.length;
+        if (token->type == token_type_t::string) {
+            pa = cmdsubst_begin + token->offset;
+            pb = pa + token->length;
        }
    }

@@ -482,21 +482,20 @@ static wchar_t get_quote(const wcstring &cmd_str, size_t len) {
 }

 void parse_util_get_parameter_info(const wcstring &cmd, const size_t pos, wchar_t *quote,
-                                   size_t *offset, enum token_type *out_type) {
+                                   size_t *offset, token_type_t *out_type) {
    size_t prev_pos = 0;
    wchar_t last_quote = L'\0';

    tokenizer_t tok(cmd.c_str(), TOK_ACCEPT_UNFINISHED);
-    tok_t token;
-    while (tok.next(&token)) {
-        if (token.offset > pos) break;
+    while (auto token = tok.next()) {
+        if (token->offset > pos) break;

-        if (token.type == TOK_STRING)
-            last_quote = get_quote(tok.text_of(token), pos - token.offset);
+        if (token->type == token_type_t::string)
+            last_quote = get_quote(tok.text_of(*token), pos - token->offset);

-        if (out_type != NULL) *out_type = token.type;
+        if (out_type != NULL) *out_type = token->type;

-        prev_pos = token.offset;
+        prev_pos = token->offset;
    }

    wchar_t *cmd_tmp = wcsdup(cmd.c_str());
--- a/src/parse_util.h
+++ b/src/parse_util.h
@@ -110,7 +110,7 @@ bool parse_util_argument_is_help(const wchar_t *s);
 /// \param offset If not NULL, get_param will store the offset to the beginning of the parameter.
 /// \param out_type If not NULL, get_param will store the token type.
 void parse_util_get_parameter_info(const wcstring &cmd, const size_t pos, wchar_t *quote,
-                                   size_t *offset, enum token_type *out_type);
+                                   size_t *offset, token_type_t *out_type);

 /// Attempts to escape the string 'cmd' using the given quote type, as determined by the quote
 /// character. The quote can be a single quote or double quote, or L'\0' to indicate no quoting (and
--- a/src/reader.cpp
+++ b/src/reader.cpp
@@ -199,12 +199,11 @@ class reader_history_search_t {
        } else if (mode_ == token) {
            const wcstring &needle = search_string();
            tokenizer_t tok(text.c_str(), TOK_ACCEPT_UNFINISHED);
-            tok_t token;

            wcstring_list_t local_tokens;
-            while (tok.next(&token)) {
-                if (token.type != TOK_STRING) continue;
-                wcstring text = tok.text_of(token);
+            while (auto token = tok.next()) {
+                if (token->type != token_type_t::string) continue;
+                wcstring text = tok.text_of(*token);
                if (text.find(needle) != wcstring::npos) {
                    local_tokens.emplace_back(std::move(text));
                }
@@ -2346,11 +2345,11 @@ static wchar_t unescaped_quote(const wcstring &str, size_t pos) {
 /// Returns true if the last token is a comment.
 static bool text_ends_in_comment(const wcstring &text) {
    tokenizer_t tok(text.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SHOW_COMMENTS);
-    tok_t token;
-    while (tok.next(&token)) {
-        ;  // pass
+    bool is_comment = false;
+    while (auto token = tok.next()) {
+        is_comment = token->type == token_type_t::comment;
    }
-    return token.type == TOK_COMMENT;
+    return is_comment;
 }

 /// \return true if an event is a normal character that should be inserted into the buffer.
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@@ -16,6 +16,7 @@
 #include "common.h"
 #include "fallback.h"  // IWYU pragma: keep
 #include "future_feature_flags.h"
+#include "tokenizer.h"
 #include "wutil.h"  // IWYU pragma: keep

 // _(s) is already wgettext(s).c_str(), so let's not convert back to wcstring
@@ -64,8 +65,7 @@ tok_t tokenizer_t::call_error(tokenizer_error_t error_type, const wchar_t *token

    this->has_next = false;

-    tok_t result;
-    result.type = TOK_ERROR;
+    tok_t result{token_type_t::error};
    result.error = error_type;
    result.offset = token_start - this->start;
    result.length = this->buff - token_start;
@@ -81,15 +81,7 @@ tokenizer_t::tokenizer_t(const wchar_t *start, tok_flags_t flags) : buff(start),
    this->show_blank_lines = static_cast<bool>(flags & TOK_SHOW_BLANK_LINES);
 }

-bool tokenizer_t::next(struct tok_t *result) {
-    assert(result != NULL);
-    maybe_t<tok_t> tok = this->tok_next();
-    if (!tok) {
-        return false;
-    }
-    *result = std::move(*tok);
-    return true;
-}
+tok_t::tok_t(token_type_t type) : type(type) {}

 /// Tests if this character can be a part of a string. The redirect ^ is allowed unless it's the
 /// first character. Hash (#) starts a comment if it's the first character in a token; otherwise it
@@ -252,31 +244,30 @@ tok_t tokenizer_t::read_string() {
    }

    if ((!this->accept_unfinished) && (mode != tok_modes::regular_text)) {
-        tok_t error;
-        if ((mode & tok_modes::char_escape) == tok_modes::char_escape) {
-            error = this->call_error(tokenizer_error_t::unterminated_escape, buff_start,
-                                     this->buff - 1);
-        } else if ((mode & tok_modes::array_brackets) == tok_modes::array_brackets) {
-            error = this->call_error(tokenizer_error_t::unterminated_slice, buff_start,
-                                     this->start + slice_offset);
-        } else if ((mode & tok_modes::subshell) == tok_modes::subshell) {
+        if (mode & tok_modes::char_escape) {
+            return this->call_error(tokenizer_error_t::unterminated_escape, buff_start,
+                                    this->buff - 1);
+        } else if (mode & tok_modes::array_brackets) {
+            return this->call_error(tokenizer_error_t::unterminated_slice, buff_start,
+                                    this->start + slice_offset);
+        } else if (mode & tok_modes::subshell) {
            assert(paran_offsets.size() > 0);
            size_t offset_of_open_paran = paran_offsets.back();

-            error = this->call_error(tokenizer_error_t::unterminated_subshell, buff_start,
-                                     this->start + offset_of_open_paran);
-        } else if ((mode & tok_modes::curly_braces) == tok_modes::curly_braces) {
+            return this->call_error(tokenizer_error_t::unterminated_subshell, buff_start,
+                                    this->start + offset_of_open_paran);
+        } else if (mode & tok_modes::curly_braces) {
            assert(brace_offsets.size() > 0);
            size_t offset_of_open_brace = brace_offsets.back();

-            error = this->call_error(tokenizer_error_t::unterminated_brace, buff_start,
-                                     this->start + offset_of_open_brace);
+            return this->call_error(tokenizer_error_t::unterminated_brace, buff_start,
+                                    this->start + offset_of_open_brace);
+        } else {
+            DIE("Unknown non-regular-text mode");
        }
-        return error;
    }

-    tok_t result;
-    result.type = TOK_STRING;
+    tok_t result(token_type_t::string);
    result.offset = buff_start - this->start;
    result.length = this->buff - buff_start;
    return result;
@@ -289,7 +280,7 @@ struct parsed_redir_or_pipe_t {
    size_t consumed{0};

    // The token type, always either TOK_PIPE or TOK_REDIRECT.
-    token_type type{TOK_REDIRECT};
+    token_type_t type{token_type_t::redirect};

    // The redirection mode if the type is TOK_REDIRECT.
    redirection_type_t redirection_mode{redirection_type_t::overwrite};
@@ -373,7 +364,7 @@ static maybe_t<parsed_redir_or_pipe_t> read_redirection_or_fd_pipe(const wchar_t
    } else if (opt_char == L'|') {
        // So the string looked like '2>|'. This is not a redirection - it's a pipe! That gets
        // handled elsewhere.
-        result.type = TOK_PIPE;
+        result.type = token_type_t::pipe;
        idx++;
    }

@@ -384,7 +375,7 @@ static maybe_t<parsed_redir_or_pipe_t> read_redirection_or_fd_pipe(const wchar_t
 maybe_t<redirection_type_t> redirection_type_for_string(const wcstring &str, int *out_fd) {
    auto v = read_redirection_or_fd_pipe(str.c_str());
    // Redirections only, no pipes.
-    if (!v || v->type != TOK_REDIRECT || v->fd < 0) return none();
+    if (!v || v->type != token_type_t::redirect || v->fd < 0) return none();
    if (out_fd) *out_fd = v->fd;
    return v->redirection_mode;
 }
@@ -395,7 +386,7 @@ int fd_redirected_by_pipe(const wcstring &str) {
        return STDOUT_FILENO;
    }
    auto v = read_redirection_or_fd_pipe(str.c_str());
-    return (v && v->type == TOK_PIPE) ? v->fd : -1;
+    return (v && v->type == token_type_t::pipe) ? v->fd : -1;
 }

 int oflags_for_redirection_type(redirection_type_t type) {
@@ -434,7 +425,7 @@ static bool iswspace_not_nl(wchar_t c) {
    }
 }

-maybe_t<tok_t> tokenizer_t::tok_next() {
+maybe_t<tok_t> tokenizer_t::next() {
    if (!this->has_next) {
        return none();
    }
@@ -464,8 +455,7 @@ maybe_t<tok_t> tokenizer_t::tok_next() {

        // Maybe return the comment.
        if (this->show_comments) {
-            tok_t result;
-            result.type = TOK_COMMENT;
+            tok_t result(token_type_t::comment);
            result.offset = comment_start - this->start;
            result.length = comment_len;
            result.preceding_escaped_nl = preceding_escaped_nl;
@@ -476,10 +466,9 @@ maybe_t<tok_t> tokenizer_t::tok_next() {

    // We made it past the comments and ate any trailing newlines we wanted to ignore.
    this->continue_line_after_comment = false;
-    size_t start_pos = this->buff - this->start;
+    const size_t start_pos = this->buff - this->start;

-    tok_t result;
-    result.offset = start_pos;
+    maybe_t<tok_t> result{};
    switch (*this->buff) {
        case L'\0': {
            this->has_next = false;
@@ -488,8 +477,9 @@ maybe_t<tok_t> tokenizer_t::tok_next() {
        case L'\r':  // carriage-return
        case L'\n':  // newline
        case L';': {
-            result.type = TOK_END;
-            result.length = 1;
+            result.emplace(token_type_t::end);
+            result->offset = start_pos;
+            result->length = 1;
            this->buff++;
            // Hack: when we get a newline, swallow as many as we can. This compresses multiple
            // subsequent newlines into a single one.
@@ -503,25 +493,29 @@ maybe_t<tok_t> tokenizer_t::tok_next() {
        }
        case L'&': {
            if (this->buff[1] == L'&') {
-                result.type = TOK_ANDAND;
-                result.length = 2;
+                result.emplace(token_type_t::andand);
+                result->offset = start_pos;
+                result->length = 2;
                this->buff += 2;
            } else {
-                result.type = TOK_BACKGROUND;
-                result.length = 1;
+                result.emplace(token_type_t::background);
+                result->offset = start_pos;
+                result->length = 1;
                this->buff++;
            }
            break;
        }
        case L'|': {
            if (this->buff[1] == L'|') {
-                result.type = TOK_OROR;
-                result.length = 2;
+                result.emplace(token_type_t::oror);
+                result->offset = start_pos;
+                result->length = 2;
                this->buff += 2;
            } else {
-                result.type = TOK_PIPE;
-                result.redirected_fd = 1;
-                result.length = 1;
+                result.emplace(token_type_t::pipe);
+                result->redirected_fd = 1;
+                result->offset = start_pos;
+                result->length = 1;
                this->buff++;
            }
            break;
@@ -535,9 +529,10 @@ maybe_t<tok_t> tokenizer_t::tok_next() {
                return this->call_error(tokenizer_error_t::invalid_redirect, this->buff,
                                        this->buff);
            }
-            result.type = redir_or_pipe->type;
-            result.redirected_fd = redir_or_pipe->fd;
-            result.length = redir_or_pipe->consumed;
+            result.emplace(redir_or_pipe->type);
+            result->offset = start_pos;
+            result->redirected_fd = redir_or_pipe->fd;
+            result->length = redir_or_pipe->consumed;
            this->buff += redir_or_pipe->consumed;
            break;
        }
@@ -553,13 +548,14 @@ maybe_t<tok_t> tokenizer_t::tok_next() {
                // It looks like a redirection or a pipe. But we don't support piping fd 0. Note
                // that fd 0 may be -1, indicating overflow; but we don't treat that as a tokenizer
                // error.
-                if (redir_or_pipe->type == TOK_PIPE && redir_or_pipe->fd == 0) {
+                if (redir_or_pipe->type == token_type_t::pipe && redir_or_pipe->fd == 0) {
                    return this->call_error(tokenizer_error_t::invalid_pipe, error_location,
                                            error_location);
                }
-                result.type = redir_or_pipe->type;
-                result.redirected_fd = redir_or_pipe->fd;
-                result.length = redir_or_pipe->consumed;
+                result.emplace(redir_or_pipe->type);
+                result->redirected_fd = redir_or_pipe->fd;
+                result->offset = start_pos;
+                result->length = redir_or_pipe->consumed;
                this->buff += redir_or_pipe->consumed;
            } else {
                // Not a redirection or pipe, so just a string.
@@ -568,15 +564,17 @@ maybe_t<tok_t> tokenizer_t::tok_next() {
            break;
        }
    }
-    result.preceding_escaped_nl = preceding_escaped_nl;
+    assert(result.has_value() && "Should have a token");
+    result->preceding_escaped_nl = preceding_escaped_nl;
    return result;
 }

 wcstring tok_first(const wcstring &str) {
    tokenizer_t t(str.c_str(), 0);
-    tok_t token;
-    if (t.next(&token) && token.type == TOK_STRING) {
-        return t.text_of(token);
+    if (auto token = t.next()) {
+        if (token->type == token_type_t::string) {
+            return t.text_of(*token);
+        }
    }
    return {};
 }
--- a/src/tokenizer.h
+++ b/src/tokenizer.h
@@ -10,17 +10,16 @@
 #include "parse_constants.h"

 /// Token types.
-enum token_type {
-    TOK_NONE,        /// Tokenizer not yet constructed
-    TOK_ERROR,       /// Error reading token
-    TOK_STRING,      /// String token
-    TOK_PIPE,        /// Pipe token
-    TOK_ANDAND,      /// && token
-    TOK_OROR,        /// || token
-    TOK_END,         /// End token (semicolon or newline, not literal end)
-    TOK_REDIRECT,    /// redirection token
-    TOK_BACKGROUND,  /// send job to bg token
-    TOK_COMMENT      /// comment token
+enum class token_type_t {
+    error,       /// Error reading token
+    string,      /// String token
+    pipe,        /// Pipe token
+    andand,      /// && token
+    oror,        /// || token
+    end,         /// End token (semicolon or newline, not literal end)
+    redirect,    /// redirection token
+    background,  /// send job to bg token
+    comment,     /// comment token
 };

 enum class redirection_type_t {
@@ -65,7 +64,7 @@ const wchar_t *tokenizer_get_error_message(tokenizer_error_t err);

 struct tok_t {
    // The type of the token.
-    token_type type{TOK_NONE};
+    token_type_t type;

    // Offset of the token.
    size_t offset{0};
@@ -85,7 +84,8 @@ struct tok_t {
    // at 'offset'.
    size_t error_offset{size_t(-1)};

-    tok_t() = default;
+    // Construct from a token type.
+    explicit tok_t(token_type_t type);
 };

 /// The tokenizer struct.
@@ -112,7 +112,6 @@ class tokenizer_t {
    tok_t call_error(tokenizer_error_t error_type, const wchar_t *token_start,
                     const wchar_t *error_loc);
    tok_t read_string();
-    maybe_t<tok_t> tok_next();

   public:
    /// Constructor for a tokenizer. b is the string that is to be tokenized. It is not copied, and
@@ -124,8 +123,8 @@ class tokenizer_t {
    /// token. Setting TOK_SHOW_COMMENTS will return comments as tokens
    tokenizer_t(const wchar_t *b, tok_flags_t flags);

-    /// Returns the next token by reference. Returns true if we got one, false if we're at the end.
-    bool next(struct tok_t *result);
+    /// Returns the next token, or none() if we are at the end.
+    maybe_t<tok_t> next();

    /// Returns the text of a token, as a string.
    wcstring text_of(const tok_t &tok) const { return wcstring(start + tok.offset, tok.length); }