From 1a65e18ba8cfbfede331b04d8a091be8490c5a80 Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Sun, 13 Oct 2019 16:06:16 -0700
Subject: [PATCH] Clean up some tokenization

Remove TOK_NONE
Turn token_type into an enum class
Make next() turn a maybe_t<tok_t> instead of a bool
---
 src/builtin_commandline.cpp |   9 ++-
 src/fish_test_helper.cpp    |   8 +++
 src/fish_tests.cpp          |  96 +++++++++++++++--------------
 src/parse_tree.cpp          |  55 ++++++++---------
 src/parse_util.cpp          |  79 ++++++++++++------------
 src/parse_util.h            |   2 +-
 src/reader.cpp              |  15 +++--
 src/tokenizer.cpp           | 116 ++++++++++++++++++------------------
 src/tokenizer.h             |  31 +++++-----
 9 files changed, 210 insertions(+), 201 deletions(-)

diff --git a/src/builtin_commandline.cpp b/src/builtin_commandline.cpp
index 0c849612b..cf309b498 100644
--- a/src/builtin_commandline.cpp
+++ b/src/builtin_commandline.cpp
@@ -100,12 +100,11 @@ static void write_part(const wchar_t *begin, const wchar_t *end, int cut_at_curs
         wcstring out;
         wcstring buff(begin, end - begin);
         tokenizer_t tok(buff.c_str(), TOK_ACCEPT_UNFINISHED);
-        tok_t token;
-        while (tok.next(&token)) {
-            if ((cut_at_cursor) && (token.offset + token.length >= pos)) break;
+        while (auto token = tok.next()) {
+            if ((cut_at_cursor) && (token->offset + token->length >= pos)) break;
 
-            if (token.type == TOK_STRING) {
-                wcstring tmp = tok.text_of(token);
+            if (token->type == token_type_t::string) {
+                wcstring tmp = tok.text_of(*token);
                 unescape_string_in_place(&tmp, UNESCAPE_INCOMPLETE);
                 out.append(tmp);
                 out.push_back(L'\n');
diff --git a/src/fish_test_helper.cpp b/src/fish_test_helper.cpp
index 5ee10407e..dd70b3ed6 100644
--- a/src/fish_test_helper.cpp
+++ b/src/fish_test_helper.cpp
@@ -39,6 +39,12 @@ static void sigint_parent() {
     fprintf(stderr, "Sent SIGINT to %d\n", parent);
 }
 
+static void print_stdout_stderr() {
+    fprintf(stdout, "stdout\n");
+    fprintf(stderr, "stderr\n");
+    fflush(nullptr);
+}
+
 int main(int argc, char *argv[]) {
     if (argc <= 1) {
         fprintf(stderr, "No commands given.\n");
@@ -51,6 +57,8 @@ int main(int argc, char *argv[]) {
             report_foreground();
         } else if (!strcmp(argv[i], "sigint_parent")) {
             sigint_parent();
+        } else if (!strcmp(argv[i], "print_stdout_stderr")) {
+            print_stdout_stderr();
         } else {
             fprintf(stderr, "%s: Unknown command: %s\n", argv[0], argv[i]);
             return EXIT_FAILURE;
diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp
index e5837704e..2a566fd23 100644
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@@ -564,29 +564,27 @@ static void test_convert_nulls() {
 /// Test the tokenizer.
 static void test_tokenizer() {
     say(L"Testing tokenizer");
-    tok_t token;
-
     {
-        bool got = false;
         const wchar_t *str = L"alpha beta";
         tokenizer_t t(str, 0);
+        maybe_t<tok_t> token{};
 
-        got = t.next(&token);  // alpha
-        do_test(got);
-        do_test(token.type == TOK_STRING);
-        do_test(token.offset == 0);
-        do_test(token.length == 5);
-        do_test(t.text_of(token) == L"alpha");
+        token = t.next();  // alpha
+        do_test(token.has_value());
+        do_test(token->type == token_type_t::string);
+        do_test(token->offset == 0);
+        do_test(token->length == 5);
+        do_test(t.text_of(*token) == L"alpha");
 
-        got = t.next(&token);  // beta
-        do_test(got);
-        do_test(token.type == TOK_STRING);
-        do_test(token.offset == 6);
-        do_test(token.length == 4);
-        do_test(t.text_of(token) == L"beta");
+        token = t.next();  // beta
+        do_test(token.has_value());
+        do_test(token->type == token_type_t::string);
+        do_test(token->offset == 6);
+        do_test(token->length == 4);
+        do_test(t.text_of(*token) == L"beta");
 
-        got = t.next(&token);
-        do_test(!got);
+        token = t.next();
+        do_test(!token.has_value());
     }
 
     const wchar_t *str =
@@ -595,30 +593,31 @@ static void test_tokenizer() {
         L"&&& ||| "
         L"&& || & |"
         L"Compress_Newlines\n  \n\t\n   \nInto_Just_One";
-    const int types[] = {TOK_STRING, TOK_REDIRECT,   TOK_STRING,   TOK_REDIRECT, TOK_STRING,
-                         TOK_STRING, TOK_STRING,     TOK_REDIRECT, TOK_REDIRECT, TOK_STRING,
-                         TOK_ANDAND, TOK_BACKGROUND, TOK_OROR,     TOK_PIPE,     TOK_ANDAND,
-                         TOK_OROR,   TOK_BACKGROUND, TOK_PIPE,     TOK_STRING,   TOK_END,
-                         TOK_STRING};
+    using tt = token_type_t;
+    const token_type_t types[] = {
+        tt::string, tt::redirect, tt::string,   tt::redirect, tt::string,     tt::string,
+        tt::string, tt::redirect, tt::redirect, tt::string,   tt::andand,     tt::background,
+        tt::oror,   tt::pipe,     tt::andand,   tt::oror,     tt::background, tt::pipe,
+        tt::string, tt::end,      tt::string};
 
     say(L"Test correct tokenization");
 
     {
         tokenizer_t t(str, 0);
         size_t i = 0;
-        while (t.next(&token)) {
+        while (auto token = t.next()) {
             if (i >= sizeof types / sizeof *types) {
                 err(L"Too many tokens returned from tokenizer");
-                std::fwprintf(stdout, L"Got excess token type %ld\n", (long)token.type);
+                std::fwprintf(stdout, L"Got excess token type %ld\n", (long)token->type);
                 break;
             }
-            if (types[i] != token.type) {
+            if (types[i] != token->type) {
                 err(L"Tokenization error:");
                 std::fwprintf(
                     stdout,
                     L"Token number %zu of string \n'%ls'\n, expected type %ld, got token type "
                     L"%ld\n",
-                    i + 1, str, (long)types[i], (long)token.type);
+                    i + 1, str, (long)types[i], (long)token->type);
             }
             i++;
         }
@@ -630,37 +629,44 @@ static void test_tokenizer() {
     // Test some errors.
     {
         tokenizer_t t(L"abc\\", 0);
-        do_test(t.next(&token));
-        do_test(token.type == TOK_ERROR);
-        do_test(token.error == tokenizer_error_t::unterminated_escape);
-        do_test(token.error_offset == 3);
+        auto token = t.next();
+        do_test(token.has_value());
+        do_test(token->type == token_type_t::error);
+        do_test(token->error == tokenizer_error_t::unterminated_escape);
+        do_test(token->error_offset == 3);
     }
 
     {
         tokenizer_t t(L"abc )defg(hij", 0);
-        do_test(t.next(&token));
-        do_test(t.next(&token));
-        do_test(token.type == TOK_ERROR);
-        do_test(token.error == tokenizer_error_t::closing_unopened_subshell);
-        do_test(token.error_offset == 4);
+        auto token = t.next();
+        do_test(token.has_value());
+        token = t.next();
+        do_test(token.has_value());
+        do_test(token->type == token_type_t::error);
+        do_test(token->error == tokenizer_error_t::closing_unopened_subshell);
+        do_test(token->error_offset == 4);
     }
 
     {
         tokenizer_t t(L"abc defg(hij (klm)", 0);
-        do_test(t.next(&token));
-        do_test(t.next(&token));
-        do_test(token.type == TOK_ERROR);
-        do_test(token.error == tokenizer_error_t::unterminated_subshell);
-        do_test(token.error_offset == 4);
+        auto token = t.next();
+        do_test(token.has_value());
+        token = t.next();
+        do_test(token.has_value());
+        do_test(token->type == token_type_t::error);
+        do_test(token->error == tokenizer_error_t::unterminated_subshell);
+        do_test(token->error_offset == 4);
     }
 
     {
         tokenizer_t t(L"abc defg[hij (klm)", 0);
-        do_test(t.next(&token));
-        do_test(t.next(&token));
-        do_test(token.type == TOK_ERROR);
-        do_test(token.error == tokenizer_error_t::unterminated_slice);
-        do_test(token.error_offset == 4);
+        auto token = t.next();
+        do_test(token.has_value());
+        token = t.next();
+        do_test(token.has_value());
+        do_test(token->type == token_type_t::error);
+        do_test(token->error == tokenizer_error_t::unterminated_slice);
+        do_test(token->error_offset == 4);
     }
 
     // Test redirection_type_for_string.
diff --git a/src/parse_tree.cpp b/src/parse_tree.cpp
index d4e261416..6b216a135 100644
--- a/src/parse_tree.cpp
+++ b/src/parse_tree.cpp
@@ -18,6 +18,7 @@
 #include "flog.h"
 #include "parse_constants.h"
 #include "parse_productions.h"
+#include "parse_tree.h"
 #include "proc.h"
 #include "tnode.h"
 #include "tokenizer.h"
@@ -235,28 +236,25 @@ wcstring parse_token_t::user_presentable_description() const {
 
 /// Convert from tokenizer_t's token type to a parse_token_t type.
 static inline parse_token_type_t parse_token_type_from_tokenizer_token(
-    enum token_type tokenizer_token_type) {
+    enum token_type_t tokenizer_token_type) {
     switch (tokenizer_token_type) {
-        case TOK_NONE:
-            DIE("TOK_NONE passed to parse_token_type_from_tokenizer_token");
-            return token_type_invalid;
-        case TOK_STRING:
+        case token_type_t::string:
             return parse_token_type_string;
-        case TOK_PIPE:
+        case token_type_t::pipe:
             return parse_token_type_pipe;
-        case TOK_ANDAND:
+        case token_type_t::andand:
             return parse_token_type_andand;
-        case TOK_OROR:
+        case token_type_t::oror:
             return parse_token_type_oror;
-        case TOK_END:
+        case token_type_t::end:
             return parse_token_type_end;
-        case TOK_BACKGROUND:
+        case token_type_t::background:
             return parse_token_type_background;
-        case TOK_REDIRECT:
+        case token_type_t::redirect:
             return parse_token_type_redirection;
-        case TOK_ERROR:
+        case token_type_t::error:
             return parse_special_type_tokenizer_error;
-        case TOK_COMMENT:
+        case token_type_t::comment:
             return parse_special_type_comment;
     }
     FLOGF(error, L"Bad token type %d passed to %s", (int)tokenizer_token_type, __FUNCTION__);
@@ -960,9 +958,9 @@ static bool is_keyword_char(wchar_t c) {
 }
 
 /// Given a token, returns the keyword it matches, or parse_keyword_none.
-static parse_keyword_t keyword_for_token(token_type tok, const wcstring &token) {
+static parse_keyword_t keyword_for_token(token_type_t tok, const wcstring &token) {
     /* Only strings can be keywords */
-    if (tok != TOK_STRING) {
+    if (tok != token_type_t::string) {
         return parse_keyword_none;
     }
 
@@ -1009,32 +1007,35 @@ static inline bool is_help_argument(const wcstring &txt) {
 }
 
 /// Return a new parse token, advancing the tokenizer.
-static inline parse_token_t next_parse_token(tokenizer_t *tok, tok_t *token, wcstring *storage) {
-    if (!tok->next(token)) {
+static inline parse_token_t next_parse_token(tokenizer_t *tok, maybe_t<tok_t> *out_token,
+                                             wcstring *storage) {
+    *out_token = tok->next();
+    if (!out_token->has_value()) {
         return kTerminalToken;
     }
+    const tok_t &token = **out_token;
 
     // Set the type, keyword, and whether there's a dash prefix. Note that this is quite sketchy,
     // because it ignores quotes. This is the historical behavior. For example, `builtin --names`
     // lists builtins, but `builtin "--names"` attempts to run --names as a command. Amazingly as of
     // this writing (10/12/13) nobody seems to have noticed this. Squint at it really hard and it
     // even starts to look like a feature.
-    parse_token_t result{parse_token_type_from_tokenizer_token(token->type)};
-    const wcstring &text = tok->copy_text_of(*token, storage);
-    result.keyword = keyword_for_token(token->type, text);
+    parse_token_t result{parse_token_type_from_tokenizer_token(token.type)};
+    const wcstring &text = tok->copy_text_of(token, storage);
+    result.keyword = keyword_for_token(token.type, text);
     result.has_dash_prefix = !text.empty() && text.at(0) == L'-';
     result.is_help_argument = result.has_dash_prefix && is_help_argument(text);
     result.is_newline = (result.type == parse_token_type_end && text == L"\n");
-    result.preceding_escaped_nl = token->preceding_escaped_nl;
+    result.preceding_escaped_nl = token.preceding_escaped_nl;
 
     // These assertions are totally bogus. Basically our tokenizer works in size_t but we work in
     // uint32_t to save some space. If we have a source file larger than 4 GB, we'll probably just
     // crash.
-    assert(token->offset < SOURCE_OFFSET_INVALID);
-    result.source_start = (source_offset_t)token->offset;
+    assert(token.offset < SOURCE_OFFSET_INVALID);
+    result.source_start = (source_offset_t)token.offset;
 
-    assert(token->length <= SOURCE_OFFSET_INVALID);
-    result.source_length = (source_offset_t)token->length;
+    assert(token.length <= SOURCE_OFFSET_INVALID);
+    result.source_length = (source_offset_t)token.length;
 
     return result;
 }
@@ -1063,7 +1064,7 @@ bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags,
     parse_token_t queue[2] = {kInvalidToken, kInvalidToken};
 
     // Loop until we have a terminal token.
-    tok_t tokenizer_token;
+    maybe_t<tok_t> tokenizer_token{};
     for (size_t token_count = 0; queue[0].type != parse_token_type_terminate; token_count++) {
         // Push a new token onto the queue.
         queue[0] = queue[1];
@@ -1084,7 +1085,7 @@ bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags,
         // Handle tokenizer errors. This is a hack because really the parser should report this for
         // itself; but it has no way of getting the tokenizer message.
         if (queue[1].type == parse_special_type_tokenizer_error) {
-            parser.report_tokenizer_error(tokenizer_token);
+            parser.report_tokenizer_error(*tokenizer_token);
         }
 
         if (!parser.has_fatal_error()) {
diff --git a/src/parse_util.cpp b/src/parse_util.cpp
index 388baae62..a3c9e2616 100644
--- a/src/parse_util.cpp
+++ b/src/parse_util.cpp
@@ -20,6 +20,7 @@
 #include "fallback.h"  // IWYU pragma: keep
 #include "future_feature_flags.h"
 #include "parse_constants.h"
+#include "parse_util.h"
 #include "parser.h"
 #include "tnode.h"
 #include "tokenizer.h"
@@ -310,32 +311,32 @@ static void job_or_process_extent(const wchar_t *buff, size_t cursor_pos, const
     assert(buffcpy != NULL);
 
     tokenizer_t tok(buffcpy, TOK_ACCEPT_UNFINISHED);
-    tok_t token;
-    while (tok.next(&token) && !finished) {
-        size_t tok_begin = token.offset;
+    for (maybe_t<tok_t> token = tok.next(); token && !finished; token = tok.next())
+        while ((token = tok.next()) && !finished) {
+            size_t tok_begin = token->offset;
 
-        switch (token.type) {
-            case TOK_PIPE: {
-                if (!process) {
+            switch (token->type) {
+                case token_type_t::pipe: {
+                    if (!process) {
+                        break;
+                    }
+                }
+                /* FALLTHROUGH */
+                case token_type_t::end:
+                case token_type_t::background: {
+                    if (tok_begin >= pos) {
+                        finished = 1;
+                        if (b) *b = (wchar_t *)begin + tok_begin;
+                    } else {
+                        if (a) *a = (wchar_t *)begin + tok_begin + 1;
+                    }
+                    break;
+                }
+                default: {
                     break;
                 }
             }
-            /* FALLTHROUGH */
-            case TOK_END:
-            case TOK_BACKGROUND: {
-                if (tok_begin >= pos) {
-                    finished = 1;
-                    if (b) *b = (wchar_t *)begin + tok_begin;
-                } else {
-                    if (a) *a = (wchar_t *)begin + tok_begin + 1;
-                }
-                break;
-            }
-            default: {
-                break;
-            }
         }
-    }
 
     free(buffcpy);
 }
@@ -380,14 +381,13 @@ void parse_util_token_extent(const wchar_t *buff, size_t cursor_pos, const wchar
     const wcstring buffcpy = wcstring(cmdsubst_begin, cmdsubst_end - cmdsubst_begin);
 
     tokenizer_t tok(buffcpy.c_str(), TOK_ACCEPT_UNFINISHED);
-    tok_t token;
-    while (tok.next(&token)) {
-        size_t tok_begin = token.offset;
+    while (maybe_t<tok_t> token = tok.next()) {
+        size_t tok_begin = token->offset;
         size_t tok_end = tok_begin;
 
         // Calculate end of token.
-        if (token.type == TOK_STRING) {
-            tok_end += token.length;
+        if (token->type == token_type_t::string) {
+            tok_end += token->length;
         }
 
         // Cursor was before beginning of this token, means that the cursor is between two tokens,
@@ -399,16 +399,16 @@ void parse_util_token_extent(const wchar_t *buff, size_t cursor_pos, const wchar
 
         // If cursor is inside the token, this is the token we are looking for. If so, set a and b
         // and break.
-        if (token.type == TOK_STRING && tok_end >= offset_within_cmdsubst) {
-            a = cmdsubst_begin + token.offset;
-            b = a + token.length;
+        if (token->type == token_type_t::string && tok_end >= offset_within_cmdsubst) {
+            a = cmdsubst_begin + token->offset;
+            b = a + token->length;
             break;
         }
 
         // Remember previous string token.
-        if (token.type == TOK_STRING) {
-            pa = cmdsubst_begin + token.offset;
-            pb = pa + token.length;
+        if (token->type == token_type_t::string) {
+            pa = cmdsubst_begin + token->offset;
+            pb = pa + token->length;
         }
     }
 
@@ -482,21 +482,20 @@ static wchar_t get_quote(const wcstring &cmd_str, size_t len) {
 }
 
 void parse_util_get_parameter_info(const wcstring &cmd, const size_t pos, wchar_t *quote,
-                                   size_t *offset, enum token_type *out_type) {
+                                   size_t *offset, token_type_t *out_type) {
     size_t prev_pos = 0;
     wchar_t last_quote = L'\0';
 
     tokenizer_t tok(cmd.c_str(), TOK_ACCEPT_UNFINISHED);
-    tok_t token;
-    while (tok.next(&token)) {
-        if (token.offset > pos) break;
+    while (auto token = tok.next()) {
+        if (token->offset > pos) break;
 
-        if (token.type == TOK_STRING)
-            last_quote = get_quote(tok.text_of(token), pos - token.offset);
+        if (token->type == token_type_t::string)
+            last_quote = get_quote(tok.text_of(*token), pos - token->offset);
 
-        if (out_type != NULL) *out_type = token.type;
+        if (out_type != NULL) *out_type = token->type;
 
-        prev_pos = token.offset;
+        prev_pos = token->offset;
     }
 
     wchar_t *cmd_tmp = wcsdup(cmd.c_str());
diff --git a/src/parse_util.h b/src/parse_util.h
index e862b796f..a3e7a849e 100644
--- a/src/parse_util.h
+++ b/src/parse_util.h
@@ -110,7 +110,7 @@ bool parse_util_argument_is_help(const wchar_t *s);
 /// \param offset If not NULL, get_param will store the offset to the beginning of the parameter.
 /// \param out_type If not NULL, get_param will store the token type.
 void parse_util_get_parameter_info(const wcstring &cmd, const size_t pos, wchar_t *quote,
-                                   size_t *offset, enum token_type *out_type);
+                                   size_t *offset, token_type_t *out_type);
 
 /// Attempts to escape the string 'cmd' using the given quote type, as determined by the quote
 /// character. The quote can be a single quote or double quote, or L'\0' to indicate no quoting (and
diff --git a/src/reader.cpp b/src/reader.cpp
index 5377e0647..13713bd92 100644
--- a/src/reader.cpp
+++ b/src/reader.cpp
@@ -199,12 +199,11 @@ class reader_history_search_t {
         } else if (mode_ == token) {
             const wcstring &needle = search_string();
             tokenizer_t tok(text.c_str(), TOK_ACCEPT_UNFINISHED);
-            tok_t token;
 
             wcstring_list_t local_tokens;
-            while (tok.next(&token)) {
-                if (token.type != TOK_STRING) continue;
-                wcstring text = tok.text_of(token);
+            while (auto token = tok.next()) {
+                if (token->type != token_type_t::string) continue;
+                wcstring text = tok.text_of(*token);
                 if (text.find(needle) != wcstring::npos) {
                     local_tokens.emplace_back(std::move(text));
                 }
@@ -2346,11 +2345,11 @@ static wchar_t unescaped_quote(const wcstring &str, size_t pos) {
 /// Returns true if the last token is a comment.
 static bool text_ends_in_comment(const wcstring &text) {
     tokenizer_t tok(text.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SHOW_COMMENTS);
-    tok_t token;
-    while (tok.next(&token)) {
-        ;  // pass
+    bool is_comment = false;
+    while (auto token = tok.next()) {
+        is_comment = token->type == token_type_t::comment;
     }
-    return token.type == TOK_COMMENT;
+    return is_comment;
 }
 
 /// \return true if an event is a normal character that should be inserted into the buffer.
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp
index 14287bcf2..5e9235e0a 100644
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@@ -16,6 +16,7 @@
 #include "common.h"
 #include "fallback.h"  // IWYU pragma: keep
 #include "future_feature_flags.h"
+#include "tokenizer.h"
 #include "wutil.h"  // IWYU pragma: keep
 
 // _(s) is already wgettext(s).c_str(), so let's not convert back to wcstring
@@ -64,8 +65,7 @@ tok_t tokenizer_t::call_error(tokenizer_error_t error_type, const wchar_t *token
 
     this->has_next = false;
 
-    tok_t result;
-    result.type = TOK_ERROR;
+    tok_t result{token_type_t::error};
     result.error = error_type;
     result.offset = token_start - this->start;
     result.length = this->buff - token_start;
@@ -81,15 +81,7 @@ tokenizer_t::tokenizer_t(const wchar_t *start, tok_flags_t flags) : buff(start),
     this->show_blank_lines = static_cast<bool>(flags & TOK_SHOW_BLANK_LINES);
 }
 
-bool tokenizer_t::next(struct tok_t *result) {
-    assert(result != NULL);
-    maybe_t<tok_t> tok = this->tok_next();
-    if (!tok) {
-        return false;
-    }
-    *result = std::move(*tok);
-    return true;
-}
+tok_t::tok_t(token_type_t type) : type(type) {}
 
 /// Tests if this character can be a part of a string. The redirect ^ is allowed unless it's the
 /// first character. Hash (#) starts a comment if it's the first character in a token; otherwise it
@@ -252,31 +244,30 @@ tok_t tokenizer_t::read_string() {
     }
 
     if ((!this->accept_unfinished) && (mode != tok_modes::regular_text)) {
-        tok_t error;
-        if ((mode & tok_modes::char_escape) == tok_modes::char_escape) {
-            error = this->call_error(tokenizer_error_t::unterminated_escape, buff_start,
-                                     this->buff - 1);
-        } else if ((mode & tok_modes::array_brackets) == tok_modes::array_brackets) {
-            error = this->call_error(tokenizer_error_t::unterminated_slice, buff_start,
-                                     this->start + slice_offset);
-        } else if ((mode & tok_modes::subshell) == tok_modes::subshell) {
+        if (mode & tok_modes::char_escape) {
+            return this->call_error(tokenizer_error_t::unterminated_escape, buff_start,
+                                    this->buff - 1);
+        } else if (mode & tok_modes::array_brackets) {
+            return this->call_error(tokenizer_error_t::unterminated_slice, buff_start,
+                                    this->start + slice_offset);
+        } else if (mode & tok_modes::subshell) {
             assert(paran_offsets.size() > 0);
             size_t offset_of_open_paran = paran_offsets.back();
 
-            error = this->call_error(tokenizer_error_t::unterminated_subshell, buff_start,
-                                     this->start + offset_of_open_paran);
-        } else if ((mode & tok_modes::curly_braces) == tok_modes::curly_braces) {
+            return this->call_error(tokenizer_error_t::unterminated_subshell, buff_start,
+                                    this->start + offset_of_open_paran);
+        } else if (mode & tok_modes::curly_braces) {
             assert(brace_offsets.size() > 0);
             size_t offset_of_open_brace = brace_offsets.back();
 
-            error = this->call_error(tokenizer_error_t::unterminated_brace, buff_start,
-                                     this->start + offset_of_open_brace);
+            return this->call_error(tokenizer_error_t::unterminated_brace, buff_start,
+                                    this->start + offset_of_open_brace);
+        } else {
+            DIE("Unknown non-regular-text mode");
         }
-        return error;
     }
 
-    tok_t result;
-    result.type = TOK_STRING;
+    tok_t result(token_type_t::string);
     result.offset = buff_start - this->start;
     result.length = this->buff - buff_start;
     return result;
@@ -289,7 +280,7 @@ struct parsed_redir_or_pipe_t {
     size_t consumed{0};
 
     // The token type, always either TOK_PIPE or TOK_REDIRECT.
-    token_type type{TOK_REDIRECT};
+    token_type_t type{token_type_t::redirect};
 
     // The redirection mode if the type is TOK_REDIRECT.
     redirection_type_t redirection_mode{redirection_type_t::overwrite};
@@ -373,7 +364,7 @@ static maybe_t<parsed_redir_or_pipe_t> read_redirection_or_fd_pipe(const wchar_t
     } else if (opt_char == L'|') {
         // So the string looked like '2>|'. This is not a redirection - it's a pipe! That gets
         // handled elsewhere.
-        result.type = TOK_PIPE;
+        result.type = token_type_t::pipe;
         idx++;
     }
 
@@ -384,7 +375,7 @@ static maybe_t<parsed_redir_or_pipe_t> read_redirection_or_fd_pipe(const wchar_t
 maybe_t<redirection_type_t> redirection_type_for_string(const wcstring &str, int *out_fd) {
     auto v = read_redirection_or_fd_pipe(str.c_str());
     // Redirections only, no pipes.
-    if (!v || v->type != TOK_REDIRECT || v->fd < 0) return none();
+    if (!v || v->type != token_type_t::redirect || v->fd < 0) return none();
     if (out_fd) *out_fd = v->fd;
     return v->redirection_mode;
 }
@@ -395,7 +386,7 @@ int fd_redirected_by_pipe(const wcstring &str) {
         return STDOUT_FILENO;
     }
     auto v = read_redirection_or_fd_pipe(str.c_str());
-    return (v && v->type == TOK_PIPE) ? v->fd : -1;
+    return (v && v->type == token_type_t::pipe) ? v->fd : -1;
 }
 
 int oflags_for_redirection_type(redirection_type_t type) {
@@ -434,7 +425,7 @@ static bool iswspace_not_nl(wchar_t c) {
     }
 }
 
-maybe_t<tok_t> tokenizer_t::tok_next() {
+maybe_t<tok_t> tokenizer_t::next() {
     if (!this->has_next) {
         return none();
     }
@@ -464,8 +455,7 @@ maybe_t<tok_t> tokenizer_t::tok_next() {
 
         // Maybe return the comment.
         if (this->show_comments) {
-            tok_t result;
-            result.type = TOK_COMMENT;
+            tok_t result(token_type_t::comment);
             result.offset = comment_start - this->start;
             result.length = comment_len;
             result.preceding_escaped_nl = preceding_escaped_nl;
@@ -476,10 +466,9 @@ maybe_t<tok_t> tokenizer_t::tok_next() {
 
     // We made it past the comments and ate any trailing newlines we wanted to ignore.
     this->continue_line_after_comment = false;
-    size_t start_pos = this->buff - this->start;
+    const size_t start_pos = this->buff - this->start;
 
-    tok_t result;
-    result.offset = start_pos;
+    maybe_t<tok_t> result{};
     switch (*this->buff) {
         case L'\0': {
             this->has_next = false;
@@ -488,8 +477,9 @@ maybe_t<tok_t> tokenizer_t::tok_next() {
         case L'\r':  // carriage-return
         case L'\n':  // newline
         case L';': {
-            result.type = TOK_END;
-            result.length = 1;
+            result.emplace(token_type_t::end);
+            result->offset = start_pos;
+            result->length = 1;
             this->buff++;
             // Hack: when we get a newline, swallow as many as we can. This compresses multiple
             // subsequent newlines into a single one.
@@ -503,25 +493,29 @@ maybe_t<tok_t> tokenizer_t::tok_next() {
         }
         case L'&': {
             if (this->buff[1] == L'&') {
-                result.type = TOK_ANDAND;
-                result.length = 2;
+                result.emplace(token_type_t::andand);
+                result->offset = start_pos;
+                result->length = 2;
                 this->buff += 2;
             } else {
-                result.type = TOK_BACKGROUND;
-                result.length = 1;
+                result.emplace(token_type_t::background);
+                result->offset = start_pos;
+                result->length = 1;
                 this->buff++;
             }
             break;
         }
         case L'|': {
             if (this->buff[1] == L'|') {
-                result.type = TOK_OROR;
-                result.length = 2;
+                result.emplace(token_type_t::oror);
+                result->offset = start_pos;
+                result->length = 2;
                 this->buff += 2;
             } else {
-                result.type = TOK_PIPE;
-                result.redirected_fd = 1;
-                result.length = 1;
+                result.emplace(token_type_t::pipe);
+                result->redirected_fd = 1;
+                result->offset = start_pos;
+                result->length = 1;
                 this->buff++;
             }
             break;
@@ -535,9 +529,10 @@ maybe_t<tok_t> tokenizer_t::tok_next() {
                 return this->call_error(tokenizer_error_t::invalid_redirect, this->buff,
                                         this->buff);
             }
-            result.type = redir_or_pipe->type;
-            result.redirected_fd = redir_or_pipe->fd;
-            result.length = redir_or_pipe->consumed;
+            result.emplace(redir_or_pipe->type);
+            result->offset = start_pos;
+            result->redirected_fd = redir_or_pipe->fd;
+            result->length = redir_or_pipe->consumed;
             this->buff += redir_or_pipe->consumed;
             break;
         }
@@ -553,13 +548,14 @@ maybe_t<tok_t> tokenizer_t::tok_next() {
                 // It looks like a redirection or a pipe. But we don't support piping fd 0. Note
                 // that fd 0 may be -1, indicating overflow; but we don't treat that as a tokenizer
                 // error.
-                if (redir_or_pipe->type == TOK_PIPE && redir_or_pipe->fd == 0) {
+                if (redir_or_pipe->type == token_type_t::pipe && redir_or_pipe->fd == 0) {
                     return this->call_error(tokenizer_error_t::invalid_pipe, error_location,
                                             error_location);
                 }
-                result.type = redir_or_pipe->type;
-                result.redirected_fd = redir_or_pipe->fd;
-                result.length = redir_or_pipe->consumed;
+                result.emplace(redir_or_pipe->type);
+                result->redirected_fd = redir_or_pipe->fd;
+                result->offset = start_pos;
+                result->length = redir_or_pipe->consumed;
                 this->buff += redir_or_pipe->consumed;
             } else {
                 // Not a redirection or pipe, so just a string.
@@ -568,15 +564,17 @@ maybe_t<tok_t> tokenizer_t::tok_next() {
             break;
         }
     }
-    result.preceding_escaped_nl = preceding_escaped_nl;
+    assert(result.has_value() && "Should have a token");
+    result->preceding_escaped_nl = preceding_escaped_nl;
     return result;
 }
 
 wcstring tok_first(const wcstring &str) {
     tokenizer_t t(str.c_str(), 0);
-    tok_t token;
-    if (t.next(&token) && token.type == TOK_STRING) {
-        return t.text_of(token);
+    if (auto token = t.next()) {
+        if (token->type == token_type_t::string) {
+            return t.text_of(*token);
+        }
     }
     return {};
 }
diff --git a/src/tokenizer.h b/src/tokenizer.h
index 0d0527e86..35b3aeb3e 100644
--- a/src/tokenizer.h
+++ b/src/tokenizer.h
@@ -10,17 +10,16 @@
 #include "parse_constants.h"
 
 /// Token types.
-enum token_type {
-    TOK_NONE,        /// Tokenizer not yet constructed
-    TOK_ERROR,       /// Error reading token
-    TOK_STRING,      /// String token
-    TOK_PIPE,        /// Pipe token
-    TOK_ANDAND,      /// && token
-    TOK_OROR,        /// || token
-    TOK_END,         /// End token (semicolon or newline, not literal end)
-    TOK_REDIRECT,    /// redirection token
-    TOK_BACKGROUND,  /// send job to bg token
-    TOK_COMMENT      /// comment token
+enum class token_type_t {
+    error,       /// Error reading token
+    string,      /// String token
+    pipe,        /// Pipe token
+    andand,      /// && token
+    oror,        /// || token
+    end,         /// End token (semicolon or newline, not literal end)
+    redirect,    /// redirection token
+    background,  /// send job to bg token
+    comment,     /// comment token
 };
 
 enum class redirection_type_t {
@@ -65,7 +64,7 @@ const wchar_t *tokenizer_get_error_message(tokenizer_error_t err);
 
 struct tok_t {
     // The type of the token.
-    token_type type{TOK_NONE};
+    token_type_t type;
 
     // Offset of the token.
     size_t offset{0};
@@ -85,7 +84,8 @@ struct tok_t {
     // at 'offset'.
     size_t error_offset{size_t(-1)};
 
-    tok_t() = default;
+    // Construct from a token type.
+    explicit tok_t(token_type_t type);
 };
 
 /// The tokenizer struct.
@@ -112,7 +112,6 @@ class tokenizer_t {
     tok_t call_error(tokenizer_error_t error_type, const wchar_t *token_start,
                      const wchar_t *error_loc);
     tok_t read_string();
-    maybe_t<tok_t> tok_next();
 
    public:
     /// Constructor for a tokenizer. b is the string that is to be tokenized. It is not copied, and
@@ -124,8 +123,8 @@ class tokenizer_t {
     /// token. Setting TOK_SHOW_COMMENTS will return comments as tokens
     tokenizer_t(const wchar_t *b, tok_flags_t flags);
 
-    /// Returns the next token by reference. Returns true if we got one, false if we're at the end.
-    bool next(struct tok_t *result);
+    /// Returns the next token, or none() if we are at the end.
+    maybe_t<tok_t> next();
 
     /// Returns the text of a token, as a string.
     wcstring text_of(const tok_t &tok) const { return wcstring(start + tok.offset, tok.length); }