Eliminate class parse_t

This commit is contained in:
ridiculousfish
2014-01-12 22:39:12 -08:00
parent ec469782c8
commit 096f850433
9 changed files with 86 additions and 167 deletions

View File

@@ -4298,7 +4298,7 @@ int builtin_parse(parser_t &parser, wchar_t **argv)
const wcstring src = str2wcstring(&txt.at(0), txt.size());
parse_node_tree_t parse_tree;
parse_error_list_t errors;
bool success = parse_t::parse(src, parse_flag_none, &parse_tree, &errors, true);
bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, &errors, true);
if (! success)
{
stdout_buffer.append(L"Parsing failed:\n");
@@ -4311,7 +4311,7 @@ int builtin_parse(parser_t &parser, wchar_t **argv)
stdout_buffer.append(L"(Reparsed with continue after error)\n");
parse_tree.clear();
errors.clear();
parse_t::parse(src, parse_flag_continue_after_error, &parse_tree, &errors, true);
parse_tree_from_string(src, parse_flag_continue_after_error, &parse_tree, &errors, true);
}
const wcstring dump = parse_dump_tree(parse_tree, src);
stdout_buffer.append(dump);

View File

@@ -1839,7 +1839,7 @@ void complete(const wcstring &cmd_with_subcmds, std::vector<completion_t> &comps
//const wcstring prev_token(prev_begin, prev_token_len);
parse_node_tree_t tree;
parse_t::parse(cmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL);
parse_tree_from_string(cmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL);
/* Find the plain statement that contains the position */
const parse_node_t *plain_statement = tree.find_node_matching_source_location(symbol_plain_statement, pos, NULL);

View File

@@ -660,11 +660,11 @@ static void test_parser()
err(L"Invalid block mode when evaluating undetected");
}
/* These are disabled since they produce a long backtrace. We should find a way to either visually compress the backtrace, or disable error spewing */
#if 1
/* Ensure that we don't crash on infinite self recursion and mutual recursion. These must use the principal parser because we cannot yet execute jobs on other parsers (!) */
say(L"Testing recursion detection");
parser_t::principal_parser().eval(L"function recursive ; recursive ; end ; recursive; ", io_chain_t(), TOP);
#if 0
/* This is disabled since it produces a long backtrace. We should find a way to either visually compress the backtrace, or disable error spewing */
parser_t::principal_parser().eval(L"function recursive1 ; recursive2 ; end ; function recursive2 ; recursive1 ; end ; recursive1; ", io_chain_t(), TOP);
#endif
}
@@ -720,10 +720,9 @@ static void test_cancellation()
test_1_cancellation(L"while true ; end");
fprintf(stderr, ".");
test_1_cancellation(L"for i in (whiel true ; end) ; end");
test_1_cancellation(L"for i in (while true ; end) ; end");
fprintf(stderr, ".");
fprintf(stderr, "\n");
/* Restore signal handling */
@@ -2234,7 +2233,7 @@ static void test_new_parser_correctness(void)
const parser_test_t *test = &parser_tests[i];
parse_node_tree_t parse_tree;
bool success = parse_t::parse(test->src, parse_flag_none, &parse_tree, NULL);
bool success = parse_tree_from_string(test->src, parse_flag_none, &parse_tree, NULL);
say(L"%lu / %lu: Parse \"%ls\": %s", i+1, sizeof parser_tests / sizeof *parser_tests, test->src, success ? "yes" : "no");
if (success && ! test->ok)
{
@@ -2248,95 +2247,75 @@ static void test_new_parser_correctness(void)
say(L"Parse tests complete");
}
struct parser_fuzz_token_t
/* Given that we have an array of 'fuzz_count' strings, we wish to enumerate all permutations of 'len' values. We do this by incrementing an integer, interpreting it as "base fuzz_count". */
static inline bool string_for_permutation(const wcstring *fuzzes, size_t fuzz_count, size_t len, size_t permutation, wcstring *out_str)
{
parse_token_type_t token_type;
parse_keyword_t keyword;
parser_fuzz_token_t() : token_type(FIRST_TERMINAL_TYPE), keyword(parse_keyword_none)
out_str->clear();
size_t remaining_permutation = permutation;
for (size_t i=0; i < len; i++)
{
size_t idx = remaining_permutation % fuzz_count;
remaining_permutation /= fuzz_count;
out_str->append(fuzzes[idx]);
out_str->push_back(L' ');
}
};
static bool increment(std::vector<parser_fuzz_token_t> &tokens)
{
size_t i, end = tokens.size();
for (i=0; i < end; i++)
{
bool wrapped = false;
struct parser_fuzz_token_t &token = tokens[i];
bool incremented_in_keyword = false;
if (token.token_type == parse_token_type_string)
{
// try incrementing the keyword
token.keyword++;
if (token.keyword <= LAST_KEYWORD)
{
incremented_in_keyword = true;
}
else
{
token.keyword = parse_keyword_none;
incremented_in_keyword = false;
}
}
if (! incremented_in_keyword)
{
token.token_type++;
// Skip the very special parse_token_type_terminate, since that's always the last thing delivered
if (token.token_type == parse_token_type_terminate)
{
token.token_type++;
}
if (token.token_type > LAST_TERMINAL_TYPE)
{
token.token_type = FIRST_TERMINAL_TYPE;
wrapped = true;
}
}
if (! wrapped)
{
break;
}
}
return i == end;
// Return false if we wrapped
return remaining_permutation == 0;
}
static void test_new_parser_fuzzing(void)
{
say(L"Fuzzing parser (node size: %lu)", sizeof(parse_node_t));
const wcstring fuzzes[] =
{
L"if",
L"else",
L"for",
L"in",
L"while",
L"begin",
L"function",
L"switch",
L"case",
L"end",
L"and",
L"or",
L"not",
L"command",
L"builtin",
L"foo",
L"|",
L"^",
L"&",
L";",
};
/* Generate a list of strings of all keyword / token combinations. */
wcstring src;
src.reserve(128);
parse_node_tree_t node_tree;
parse_error_list_t errors;
double start = timef();
bool log_it = false;
// ensure nothing crashes
size_t max = 4;
for (size_t len=1; len <= max; len++)
bool log_it = true;
size_t max_len = 5;
for (size_t len = 0; len < max_len; len++)
{
if (log_it)
fprintf(stderr, "%lu / %lu...", len, max);
std::vector<parser_fuzz_token_t> tokens(len);
size_t count = 0;
parse_t parser;
parse_node_tree_t parse_tree;
do
{
parser.clear();
parse_tree.clear();
count++;
for (size_t i=0; i < len; i++)
{
const parser_fuzz_token_t &token = tokens[i];
parser.parse_1_token(token.token_type, token.keyword, &parse_tree, NULL);
}
fprintf(stderr, "%lu / %lu...", len, max_len);
// keep going until we wrap
/* We wish to look at all permutations of 4 elements of 'fuzzes' (with replacement). Construct an int and keep incrementing it. */
size_t permutation = 0;
while (string_for_permutation(fuzzes, sizeof fuzzes / sizeof *fuzzes, len, permutation++, &src))
{
parse_tree_from_string(src, parse_flag_continue_after_error, &node_tree, &errors);
}
while (! increment(tokens));
if (log_it)
fprintf(stderr, "done (%lu)\n", count);
fprintf(stderr, "done (%lu)\n", permutation);
}
double end = timef();
if (log_it)
@@ -2352,7 +2331,7 @@ static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *o
bool result = false;
parse_node_tree_t tree;
if (parse_t::parse(src, parse_flag_none, &tree, NULL))
if (parse_tree_from_string(src, parse_flag_none, &tree, NULL))
{
/* Get the statement. Should only have one */
const parse_node_tree_t::parse_node_list_t stmt_nodes = tree.find_nodes(tree.at(0), symbol_plain_statement);
@@ -2432,7 +2411,7 @@ static void test_new_parser_ad_hoc()
/* Ensure that 'case' terminates a job list */
const wcstring src = L"switch foo ; case bar; case baz; end";
parse_node_tree_t parse_tree;
bool success = parse_t::parse(src, parse_flag_none, &parse_tree, NULL);
bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, NULL);
if (! success)
{
err(L"Parsing failed");
@@ -2476,7 +2455,7 @@ static void test_new_parser_errors(void)
parse_error_list_t errors;
parse_node_tree_t parse_tree;
bool success = parse_t::parse(src, parse_flag_none, &parse_tree, &errors);
bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, &errors);
if (success)
{
err(L"Source '%ls' was expected to fail to parse, but succeeded", src.c_str());

View File

@@ -714,7 +714,7 @@ static bool autosuggest_parse_command(const wcstring &buff, wcstring *out_expand
/* Parse the buffer */
parse_node_tree_t parse_tree;
parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL);
parse_tree_from_string(buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL);
/* Find the last statement */
const parse_node_t *last_statement = parse_tree.find_last_node_of_type(symbol_plain_statement, NULL);
@@ -1716,7 +1716,7 @@ class highlighter_t
{
/* Parse the tree */
this->parse_tree.clear();
parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &this->parse_tree, NULL);
parse_tree_from_string(buff, parse_flag_continue_after_error | parse_flag_include_comments, &this->parse_tree, NULL);
}
/* Perform highlighting, returning an array of colors */
@@ -2062,7 +2062,7 @@ const highlighter_t::color_array_t & highlighter_t::highlight()
/* Parse the buffer */
parse_node_tree_t parse_tree;
parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL);
parse_tree_from_string(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL);
#if 0
const wcstring dump = parse_dump_tree(parse_tree, buff);

View File

@@ -973,15 +973,6 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2)
}
}
parse_t::parse_t() : parser(new parse_ll_t())
{
}
parse_t::~parse_t()
{
delete parser;
}
static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt)
{
parse_keyword_t result = parse_keyword_none;
@@ -1056,9 +1047,10 @@ static inline parse_token_t next_parse_token(tokenizer_t *tok)
return result;
}
bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it)
bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it)
{
this->parser->set_should_generate_error_messages(errors != NULL);
parse_ll_t parser;
parser.set_should_generate_error_messages(errors != NULL);
/* Construct the tokenizer */
tok_flags_t tok_options = 0;
@@ -1090,16 +1082,16 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags
}
/* Pass these two tokens. We know that queue[0] is valid; queue[1] may be invalid. */
this->parser->accept_tokens(queue[0], queue[1]);
parser.accept_tokens(queue[0], queue[1]);
/* Handle tokenizer errors. This is a hack because really the parser should report this for itself; but it has no way of getting the tokenizer message */
if (queue[1].type == parse_special_type_tokenizer_error)
{
this->parser->report_tokenizer_error(queue[1], tok_last(&tok));
parser.report_tokenizer_error(queue[1], tok_last(&tok));
}
/* Handle errors */
if (this->parser->has_fatal_error())
if (parser.has_fatal_error())
{
if (parse_flags & parse_flag_continue_after_error)
{
@@ -1108,8 +1100,8 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags
/* Mark a special error token, and then keep going */
const parse_token_t token = {parse_special_type_parse_error, parse_keyword_none, false, queue[error_token_idx].source_start, queue[error_token_idx].source_length};
this->parser->accept_tokens(token, kInvalidToken);
this->parser->reset_symbols();
parser.accept_tokens(token, kInvalidToken);
parser.reset_symbols();
}
else
{
@@ -1123,10 +1115,10 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags
// Teach each node where its source range is
this->parser->determine_node_ranges();
parser.determine_node_ranges();
// Acquire the output from the parser
this->parser->acquire_output(output, errors);
parser.acquire_output(output, errors);
#if 0
//wcstring result = dump_tree(this->parser->nodes, str);
@@ -1135,40 +1127,7 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags
#endif
// Indicate if we had a fatal error
return ! this->parser->has_fatal_error();
}
bool parse_t::parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it)
{
parse_t parse;
return parse.parse_internal(str, flags, output, errors, log_it);
}
bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors)
{
const parse_token_t invalid_token = {token_type_invalid, parse_keyword_none, -1, -1};
// Only strings can have keywords. So if we have a keyword, the type must be a string
assert(keyword == parse_keyword_none || token_type == parse_token_type_string);
parse_token_t token;
token.type = token_type;
token.keyword = keyword;
token.source_start = -1;
token.source_length = 0;
bool wants_errors = (errors != NULL);
this->parser->set_should_generate_error_messages(wants_errors);
/* Passing invalid_token here is totally wrong. This code is only used in testing however. */
this->parser->accept_tokens(token, invalid_token);
return ! this->parser->has_fatal_error();
}
void parse_t::clear()
{
this->parser->reset_symbols_and_nodes();
return ! parser.has_fatal_error();
}
const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) const

View File

@@ -74,28 +74,6 @@ enum
};
typedef unsigned int parse_tree_flags_t;
class parse_ll_t;
class parse_t
{
parse_ll_t * const parser;
bool parse_internal(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
public:
parse_t();
~parse_t();
/* Parse a string all at once */
static bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
/* Parse a single token */
bool parse_1_token(parse_token_type_t token, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors);
/* Reset, ready to parse something else */
void clear();
};
wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src);
wcstring token_type_description(parse_token_type_t type);
@@ -218,6 +196,9 @@ class parse_node_tree_t : public std::vector<parse_node_t>
parse_node_list_t specific_statements_for_job(const parse_node_t &job) const;
};
/* The big entry point. Parse a string! */
bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
/* Fish grammar:
# A job_list is a list of jobs, separated by semicolons or newlines

View File

@@ -878,7 +878,7 @@ std::vector<int> parse_util_compute_indents(const wcstring &src)
/* Parse the string. We pass continue_after_error to produce a forest; the trailing indent of the last node we visited becomes the input indent of the next. I.e. in the case of 'switch foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it were a case item list */
parse_node_tree_t tree;
parse_t::parse(src, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL /* errors */);
parse_tree_from_string(src, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL /* errors */);
/* Start indenting at the first node. If we have a parse error, we'll have to start indenting from the top again */
node_offset_t start_node_idx = 0;
@@ -994,7 +994,7 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, pars
// Parse the input string into a parse tree
// Some errors are detected here
bool parsed = parse_t::parse(buff_src, parse_flag_leave_unterminated, &node_tree, &parse_errors);
bool parsed = parse_tree_from_string(buff_src, parse_flag_leave_unterminated, &node_tree, &parse_errors);
if (! parsed)
{
errored = true;

View File

@@ -2581,7 +2581,7 @@ int parser_t::eval_new_parser(const wcstring &cmd, const io_chain_t &io, enum bl
/* Parse the source into a tree, if we can */
parse_node_tree_t tree;
if (! parse_t::parse(cmd, parse_flag_none, &tree, NULL))
if (! parse_tree_from_string(cmd, parse_flag_none, &tree, NULL))
{
return 1;
}

View File

@@ -664,7 +664,7 @@ bool reader_expand_abbreviation_in_command(const wcstring &cmdline, size_t curso
/* Parse this subcmd */
parse_node_tree_t parse_tree;
parse_t::parse(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL);
parse_tree_from_string(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL);
/* Look for plain statements where the cursor is at the end of the command */
const parse_node_t *matching_cmd_node = NULL;