Eliminate class parse_t

2026-06-07 18:21:16 -03:00 · 2014-01-12 22:39:12 -08:00
parent ec469782c8
commit 096f850433
9 changed files with 86 additions and 167 deletions
--- a/builtin.cpp
+++ b/builtin.cpp
@@ -4298,7 +4298,7 @@ int builtin_parse(parser_t &parser, wchar_t **argv)
        const wcstring src = str2wcstring(&txt.at(0), txt.size());
        parse_node_tree_t parse_tree;
        parse_error_list_t errors;
-        bool success = parse_t::parse(src, parse_flag_none, &parse_tree, &errors, true);
+        bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, &errors, true);
        if (! success)
        {
            stdout_buffer.append(L"Parsing failed:\n");
@@ -4311,7 +4311,7 @@ int builtin_parse(parser_t &parser, wchar_t **argv)
            stdout_buffer.append(L"(Reparsed with continue after error)\n");
            parse_tree.clear();
            errors.clear();
-            parse_t::parse(src, parse_flag_continue_after_error, &parse_tree, &errors, true);
+            parse_tree_from_string(src, parse_flag_continue_after_error, &parse_tree, &errors, true);
        }
        const wcstring dump = parse_dump_tree(parse_tree, src);
        stdout_buffer.append(dump);
--- a/complete.cpp
+++ b/complete.cpp
@@ -1839,7 +1839,7 @@ void complete(const wcstring &cmd_with_subcmds, std::vector<completion_t> &comps
        //const wcstring prev_token(prev_begin, prev_token_len);
        
        parse_node_tree_t tree;
-        parse_t::parse(cmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL);
+        parse_tree_from_string(cmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL);
        
        /* Find the plain statement that contains the position */
        const parse_node_t *plain_statement = tree.find_node_matching_source_location(symbol_plain_statement, pos, NULL);
--- a/fish_tests.cpp
+++ b/fish_tests.cpp
@@ -660,11 +660,11 @@ static void test_parser()
        err(L"Invalid block mode when evaluating undetected");
    }
    
-    /* These are disabled since they produce a long backtrace. We should find a way to either visually compress the backtrace, or disable error spewing */
-#if 1
    /* Ensure that we don't crash on infinite self recursion and mutual recursion. These must use the principal parser because we cannot yet execute jobs on other parsers (!) */
    say(L"Testing recursion detection");
    parser_t::principal_parser().eval(L"function recursive ; recursive ; end ; recursive; ", io_chain_t(), TOP);
+#if 0
+    /* This is disabled since it produces a long backtrace. We should find a way to either visually compress the backtrace, or disable error spewing */
    parser_t::principal_parser().eval(L"function recursive1 ; recursive2 ; end ; function recursive2 ; recursive1 ; end ; recursive1; ", io_chain_t(), TOP);
 #endif
 }
@@ -720,10 +720,9 @@ static void test_cancellation()
    test_1_cancellation(L"while true ; end");
    fprintf(stderr, ".");
    
-    test_1_cancellation(L"for i in (whiel true ; end) ; end");
+    test_1_cancellation(L"for i in (while true ; end) ; end");
    fprintf(stderr, ".");

-    
    fprintf(stderr, "\n");
    
    /* Restore signal handling */
@@ -2234,7 +2233,7 @@ static void test_new_parser_correctness(void)
        const parser_test_t *test = &parser_tests[i];

        parse_node_tree_t parse_tree;
-        bool success = parse_t::parse(test->src, parse_flag_none, &parse_tree, NULL);
+        bool success = parse_tree_from_string(test->src, parse_flag_none, &parse_tree, NULL);
        say(L"%lu / %lu: Parse \"%ls\": %s", i+1, sizeof parser_tests / sizeof *parser_tests, test->src, success ? "yes" : "no");
        if (success && ! test->ok)
        {
@@ -2248,95 +2247,75 @@ static void test_new_parser_correctness(void)
    say(L"Parse tests complete");
 }

-struct parser_fuzz_token_t
+/* Given that we have an array of 'fuzz_count' strings, we wish to enumerate all permutations of 'len' values. We do this by incrementing an integer, interpreting it as "base fuzz_count". */
+static inline bool string_for_permutation(const wcstring *fuzzes, size_t fuzz_count, size_t len, size_t permutation, wcstring *out_str)
 {
-    parse_token_type_t token_type;
-    parse_keyword_t keyword;
-
-    parser_fuzz_token_t() : token_type(FIRST_TERMINAL_TYPE), keyword(parse_keyword_none)
+    out_str->clear();
+    
+    size_t remaining_permutation = permutation;
+    for (size_t i=0; i < len; i++)
    {
+        size_t idx = remaining_permutation % fuzz_count;
+        remaining_permutation /= fuzz_count;
+        
+        out_str->append(fuzzes[idx]);
+        out_str->push_back(L' ');
    }
-};
-
-static bool increment(std::vector<parser_fuzz_token_t> &tokens)
-{
-    size_t i, end = tokens.size();
-    for (i=0; i < end; i++)
-    {
-        bool wrapped = false;
-
-        struct parser_fuzz_token_t &token = tokens[i];
-        bool incremented_in_keyword = false;
-        if (token.token_type == parse_token_type_string)
-        {
-            // try incrementing the keyword
-            token.keyword++;
-            if (token.keyword <= LAST_KEYWORD)
-            {
-                incremented_in_keyword = true;
-            }
-            else
-            {
-                token.keyword = parse_keyword_none;
-                incremented_in_keyword = false;
-            }
-        }
-
-        if (! incremented_in_keyword)
-        {
-            token.token_type++;
-            // Skip the very special parse_token_type_terminate, since that's always the last thing delivered
-            if (token.token_type == parse_token_type_terminate)
-            {
-                token.token_type++;
-            }
-            
-            if (token.token_type > LAST_TERMINAL_TYPE)
-            {
-                token.token_type = FIRST_TERMINAL_TYPE;
-                wrapped = true;
-            }
-        }
-
-        if (! wrapped)
-        {
-            break;
-        }
-    }
-    return i == end;
+    // Return false if we wrapped
+    return remaining_permutation == 0;
 }

 static void test_new_parser_fuzzing(void)
 {
    say(L"Fuzzing parser (node size: %lu)", sizeof(parse_node_t));
+    const wcstring fuzzes[] =
+    {
+        L"if",
+        L"else",
+        L"for",
+        L"in",
+        L"while",
+        L"begin",
+        L"function",
+        L"switch",
+        L"case",
+        L"end",
+        L"and",
+        L"or",
+        L"not",
+        L"command",
+        L"builtin",
+        L"foo",
+        L"|",
+        L"^",
+        L"&",
+        L";",
+    };
+    
+    /* Generate a list of strings of all keyword / token combinations. */
+    wcstring src;
+    src.reserve(128);
+    
+    parse_node_tree_t node_tree;
+    parse_error_list_t errors;
+
    double start = timef();
-    bool log_it = false;
-    // ensure nothing crashes
-    size_t max = 4;
-    for (size_t len=1; len <= max; len++)
+    bool log_it = true;
+    size_t max_len = 5;
+    for (size_t len = 0; len < max_len; len++)
    {
        if (log_it)
-            fprintf(stderr, "%lu / %lu...", len, max);
-        std::vector<parser_fuzz_token_t> tokens(len);
-        size_t count = 0;
-        parse_t parser;
-        parse_node_tree_t parse_tree;
-        do
-        {
-            parser.clear();
-            parse_tree.clear();
-            count++;
-            for (size_t i=0; i < len; i++)
-            {
-                const parser_fuzz_token_t &token = tokens[i];
-                parser.parse_1_token(token.token_type, token.keyword, &parse_tree, NULL);
-            }
+            fprintf(stderr, "%lu / %lu...", len, max_len);

-            // keep going until we wrap
+        /* We wish to look at all permutations of 4 elements of 'fuzzes' (with replacement). Construct an int and keep incrementing it. */
+        size_t permutation = 0;
+        while (string_for_permutation(fuzzes, sizeof fuzzes / sizeof *fuzzes, len, permutation++, &src))
+        {
+            parse_tree_from_string(src, parse_flag_continue_after_error, &node_tree, &errors);
        }
-        while (! increment(tokens));
        if (log_it)
-            fprintf(stderr, "done (%lu)\n", count);
+            fprintf(stderr, "done (%lu)\n", permutation);
+        
    }
    double end = timef();
    if (log_it)
@@ -2352,7 +2331,7 @@ static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *o
    
    bool result = false;
    parse_node_tree_t tree;
-    if (parse_t::parse(src, parse_flag_none, &tree, NULL))
+    if (parse_tree_from_string(src, parse_flag_none, &tree, NULL))
    {
        /* Get the statement. Should only have one */
        const parse_node_tree_t::parse_node_list_t stmt_nodes = tree.find_nodes(tree.at(0), symbol_plain_statement);
@@ -2432,7 +2411,7 @@ static void test_new_parser_ad_hoc()
    /* Ensure that 'case' terminates a job list */
    const wcstring src = L"switch foo ; case bar; case baz; end";
    parse_node_tree_t parse_tree;
-    bool success = parse_t::parse(src, parse_flag_none, &parse_tree, NULL);
+    bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, NULL);
    if (! success)
    {
        err(L"Parsing failed");
@@ -2476,7 +2455,7 @@ static void test_new_parser_errors(void)
        
        parse_error_list_t errors;
        parse_node_tree_t parse_tree;
-        bool success = parse_t::parse(src, parse_flag_none, &parse_tree, &errors);
+        bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, &errors);
        if (success)
        {
            err(L"Source '%ls' was expected to fail to parse, but succeeded", src.c_str());
--- a/highlight.cpp
+++ b/highlight.cpp
@@ -714,7 +714,7 @@ static bool autosuggest_parse_command(const wcstring &buff, wcstring *out_expand
    
    /* Parse the buffer */
    parse_node_tree_t parse_tree;
-    parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL);
+    parse_tree_from_string(buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL);
    
    /* Find the last statement */
    const parse_node_t *last_statement = parse_tree.find_last_node_of_type(symbol_plain_statement, NULL);
@@ -1716,7 +1716,7 @@ class highlighter_t
    {
        /* Parse the tree */
        this->parse_tree.clear();
-        parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &this->parse_tree, NULL);
+        parse_tree_from_string(buff, parse_flag_continue_after_error | parse_flag_include_comments, &this->parse_tree, NULL);
    }
    
    /* Perform highlighting, returning an array of colors */
@@ -2062,7 +2062,7 @@ const highlighter_t::color_array_t & highlighter_t::highlight()
    
    /* Parse the buffer */
    parse_node_tree_t parse_tree;
-    parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL);
+    parse_tree_from_string(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL);

 #if 0
    const wcstring dump = parse_dump_tree(parse_tree, buff);
--- a/parse_tree.cpp
+++ b/parse_tree.cpp
@@ -973,15 +973,6 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2)
    }
 }

-parse_t::parse_t() : parser(new parse_ll_t())
-{
-}
-
-parse_t::~parse_t()
-{
-    delete parser;
-}
-
 static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt)
 {
    parse_keyword_t result = parse_keyword_none;
@@ -1056,9 +1047,10 @@ static inline parse_token_t next_parse_token(tokenizer_t *tok)
    return result;
 }

-bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it)
+bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it)
 {
-    this->parser->set_should_generate_error_messages(errors != NULL);
+    parse_ll_t parser;
+    parser.set_should_generate_error_messages(errors != NULL);

    /* Construct the tokenizer */
    tok_flags_t tok_options = 0;
@@ -1090,16 +1082,16 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags
        }
        
        /* Pass these two tokens. We know that queue[0] is valid; queue[1] may be invalid. */
-        this->parser->accept_tokens(queue[0], queue[1]);
+        parser.accept_tokens(queue[0], queue[1]);
        
        /* Handle tokenizer errors. This is a hack because really the parser should report this for itself; but it has no way of getting the tokenizer message */
        if (queue[1].type == parse_special_type_tokenizer_error)
        {
-            this->parser->report_tokenizer_error(queue[1], tok_last(&tok));
+            parser.report_tokenizer_error(queue[1], tok_last(&tok));
        }
        
        /* Handle errors */
-        if (this->parser->has_fatal_error())
+        if (parser.has_fatal_error())
        {
            if (parse_flags & parse_flag_continue_after_error)
            {
@@ -1108,8 +1100,8 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags

                /* Mark a special error token, and then keep going */
                const parse_token_t token = {parse_special_type_parse_error, parse_keyword_none, false, queue[error_token_idx].source_start, queue[error_token_idx].source_length};
-                this->parser->accept_tokens(token, kInvalidToken);
-                this->parser->reset_symbols();
+                parser.accept_tokens(token, kInvalidToken);
+                parser.reset_symbols();
            }
            else
            {
@@ -1123,10 +1115,10 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags


    // Teach each node where its source range is
-    this->parser->determine_node_ranges();
+    parser.determine_node_ranges();
    
    // Acquire the output from the parser
-    this->parser->acquire_output(output, errors);
+    parser.acquire_output(output, errors);

 #if 0
    //wcstring result = dump_tree(this->parser->nodes, str);
@@ -1135,40 +1127,7 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags
 #endif
    
    // Indicate if we had a fatal error
-    return ! this->parser->has_fatal_error();
-}
-
-bool parse_t::parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it)
-{
-    parse_t parse;
-    return parse.parse_internal(str, flags, output, errors, log_it);
-}
-
-bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors)
-{
-    const parse_token_t invalid_token = {token_type_invalid, parse_keyword_none, -1, -1};
-    
-    // Only strings can have keywords. So if we have a keyword, the type must be a string
-    assert(keyword == parse_keyword_none || token_type == parse_token_type_string);
-
-    parse_token_t token;
-    token.type = token_type;
-    token.keyword = keyword;
-    token.source_start = -1;
-    token.source_length = 0;
-    
-    bool wants_errors = (errors != NULL);
-    this->parser->set_should_generate_error_messages(wants_errors);
-
-    /* Passing invalid_token here is totally wrong. This code is only used in testing however. */
-    this->parser->accept_tokens(token, invalid_token);
-
-    return ! this->parser->has_fatal_error();
-}
-
-void parse_t::clear()
-{
-    this->parser->reset_symbols_and_nodes();
+    return ! parser.has_fatal_error();
 }

 const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) const
--- a/parse_tree.h
+++ b/parse_tree.h
@@ -74,28 +74,6 @@ enum
 };
 typedef unsigned int parse_tree_flags_t;

-class parse_ll_t;
-class parse_t
-{
-    parse_ll_t * const parser;
-
-    bool parse_internal(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
-
-public:
-    parse_t();
-    ~parse_t();
-
-    /* Parse a string all at once */
-    static bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
-
-    /* Parse a single token */
-    bool parse_1_token(parse_token_type_t token, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors);
-    
-    /* Reset, ready to parse something else */
-    void clear();
-
-};
-
 wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src);

 wcstring token_type_description(parse_token_type_t type);
@@ -218,6 +196,9 @@ class parse_node_tree_t : public std::vector<parse_node_t>
    parse_node_list_t specific_statements_for_job(const parse_node_t &job) const;
 };

+/* The big entry point. Parse a string! */
+bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
+
 /* Fish grammar:

 # A job_list is a list of jobs, separated by semicolons or newlines
--- a/parse_util.cpp
+++ b/parse_util.cpp
@@ -878,7 +878,7 @@ std::vector<int> parse_util_compute_indents(const wcstring &src)
    
    /* Parse the string. We pass continue_after_error to produce a forest; the trailing indent of the last node we visited becomes the input indent of the next. I.e. in the case of 'switch foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it were a case item list */
    parse_node_tree_t tree;
-    parse_t::parse(src, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL /* errors */);
+    parse_tree_from_string(src, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL /* errors */);
    
    /* Start indenting at the first node. If we have a parse error, we'll have to start indenting from the top again */
    node_offset_t start_node_idx = 0;
@@ -994,7 +994,7 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, pars
    
    // Parse the input string into a parse tree
    // Some errors are detected here
-    bool parsed = parse_t::parse(buff_src, parse_flag_leave_unterminated, &node_tree, &parse_errors);
+    bool parsed = parse_tree_from_string(buff_src, parse_flag_leave_unterminated, &node_tree, &parse_errors);
    if (! parsed)
    {
        errored = true;
--- a/parser.cpp
+++ b/parser.cpp
@@ -2581,7 +2581,7 @@ int parser_t::eval_new_parser(const wcstring &cmd, const io_chain_t &io, enum bl
    
    /* Parse the source into a tree, if we can */
    parse_node_tree_t tree;
-    if (! parse_t::parse(cmd, parse_flag_none, &tree, NULL))
+    if (! parse_tree_from_string(cmd, parse_flag_none, &tree, NULL))
    {
        return 1;
    }
--- a/reader.cpp
+++ b/reader.cpp
@@ -664,7 +664,7 @@ bool reader_expand_abbreviation_in_command(const wcstring &cmdline, size_t curso
    
    /* Parse this subcmd */
    parse_node_tree_t parse_tree;
-    parse_t::parse(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL);
+    parse_tree_from_string(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL);
    
    /* Look for plain statements where the cursor is at the end of the command */
    const parse_node_t *matching_cmd_node = NULL;