From 54f683fc6b41bd5575a8b3eb7307b69a1b725df6 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 26 May 2013 12:12:16 -0700 Subject: [PATCH 001/177] Initial work on parser bringup (squash) --- expression.cpp | 9 +++ expression.h | 103 +++++++++++++++++++++++++++++++++ fish.xcodeproj/project.pbxproj | 12 +++- 3 files changed, 121 insertions(+), 3 deletions(-) create mode 100644 expression.cpp create mode 100644 expression.h diff --git a/expression.cpp b/expression.cpp new file mode 100644 index 000000000..d8d5386e9 --- /dev/null +++ b/expression.cpp @@ -0,0 +1,9 @@ +// +// expression.cpp +// fish +// +// Created by Peter Ammon on 5/25/13. +// +// + +#include "expression.h" diff --git a/expression.h b/expression.h new file mode 100644 index 000000000..390c1bb79 --- /dev/null +++ b/expression.h @@ -0,0 +1,103 @@ +/**\file expression.h + + Programmatic representation of fish code. + +*/ + +#ifndef FISH_EXPRESSION_H +#define FISH_EXPRESSION_H + +#include + +#include "config.h" +#include "util.h" +#include "common.h" + + +/* Fish grammar: + +# A statement_list is a list of statements, separated by semicolons or newlines + + statement_list = | statement | statement statement_list + +# A statement is a normal job, or an if / while / and etc. + + statement = boolean_statement | block_statement | decorated_statement + +# A block is a conditional, loop, or begin/end + + block_statement = block_header statement_list END arguments_or_redirections_list + block_header = if_header | for_header | while_header | function_header | begin_header + if_header = IF statement + for_header = FOR var_name IN arguments_or_redirections_list STATEMENT_TERMINATOR + while_header = WHILE statement + begin_header = BEGIN STATEMENT_TERMINATOR + function_header = FUNCTION arguments_or_redirections_list STATEMENT_TERMINATOR + +# A boolean statement is AND or OR or NOT + + boolean_statement = AND statement | OR statement | NOT statement + +# A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" + + decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement + plain_statement = command arguments_or_redirections_list terminator + + arguments_or_redirections_list = | argument_or_redirection + argument_or_redirection = redirection | + redirection = REDIRECTION + +*/ + + +class parse_command_t; + +/** Root of a parse tree */ +class parse_tree_t +{ + /** Literal source code */ + wcstring source; + + /** Initial node */ + parse_command_list_t *child; +}; + +/** Base class for nodes of a parse tree */ +class parse_node_base_t +{ + /* Backreference to the tree */ + parse_tree_t * const tree; + + /* Start in the source code */ + const unsigned int source_start; + + /* Length of our range in the source code */ + const unsigned int source_length; +}; + +class parse_statement_list_t : public parse_node_base_t +{ + std::vector statements; +}; + +class parse_statement_t : public parse_node_base_t +{ + +}; + +class parse_boolean_statement_t : public parse_statement_t +{ + +}; + +class parse_plain_statement_t : public parse_statement_t +{ + +}; + +class parse_block_statement_t : public parse_statement_t +{ + +}; + +#endif diff --git a/fish.xcodeproj/project.pbxproj b/fish.xcodeproj/project.pbxproj index b48ea5771..23b06b5ed 100644 --- a/fish.xcodeproj/project.pbxproj +++ b/fish.xcodeproj/project.pbxproj @@ -72,6 +72,7 @@ D07D266D15E33B86009E43F6 /* functions in Copy Files */ = {isa = PBXBuildFile; fileRef = D025C02815D1FEA100B9DB63 /* functions */; }; D07D266E15E33B86009E43F6 /* tools in Copy Files */ = {isa = PBXBuildFile; fileRef = D025C02915D1FEA100B9DB63 /* tools */; }; D07D267215E34171009E43F6 /* config.fish in Copy Files */ = {isa = PBXBuildFile; fileRef = D0CBD580159EE48F0024809C /* config.fish */; }; + D07FEA311751E6AF003066C3 /* expression.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D07FEA2F1751E6AF003066C3 /* expression.cpp */; }; D0879AC816BF9AAB00E98E56 /* fish_term_icon.icns in Resources */ = {isa = PBXBuildFile; fileRef = D0879AC616BF9A1A00E98E56 /* fish_term_icon.icns */; }; D0A564FE168D23D800AF6161 /* man in CopyFiles */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; }; D0A56501168D258300AF6161 /* man in Copy Files */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; }; @@ -336,6 +337,8 @@ D03EE83814DF88B200FC7150 /* lru.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = lru.h; sourceTree = ""; }; D07B247215BCC15700D4ADB4 /* add-shell */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; name = "add-shell"; path = "build_tools/osx_package_scripts/add-shell"; sourceTree = ""; }; D07B247515BCC4BE00D4ADB4 /* install.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; name = install.sh; path = osx/install.sh; sourceTree = ""; }; + D07FEA2F1751E6AF003066C3 /* expression.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = expression.cpp; sourceTree = ""; }; + D07FEA301751E6AF003066C3 /* expression.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = expression.h; sourceTree = ""; }; D0879AC616BF9A1A00E98E56 /* fish_term_icon.icns */ = {isa = PBXFileReference; lastKnownFileType = image.icns; name = fish_term_icon.icns; path = osx/fish_term_icon.icns; sourceTree = ""; }; D09B1C1914FC7B5B00F91077 /* postfork.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = postfork.cpp; sourceTree = ""; }; D09B1C1A14FC7B5B00F91077 /* postfork.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = postfork.h; sourceTree = ""; }; @@ -582,6 +585,8 @@ D0A0853C13B3ACEE0099B651 /* exec.cpp */, D0A0850C13B3ACEE0099B651 /* expand.h */, D0A0853D13B3ACEE0099B651 /* expand.cpp */, + D07FEA301751E6AF003066C3 /* expression.h */, + D07FEA2F1751E6AF003066C3 /* expression.cpp */, D0A0850D13B3ACEE0099B651 /* fallback.h */, D0A0853E13B3ACEE0099B651 /* fallback.cpp */, D0A0850E13B3ACEE0099B651 /* function.h */, @@ -1102,6 +1107,7 @@ D0D02A7A15983916008E62BD /* env_universal.cpp in Sources */, D0D02A7B15983928008E62BD /* env_universal_common.cpp in Sources */, D0D02A89159839DF008E62BD /* fish.cpp in Sources */, + D07FEA311751E6AF003066C3 /* expression.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -1189,7 +1195,7 @@ "PREFIX=L\\\"/usr/local\\\"", "DATADIR=L\\\"/usr/local/share\\\"", "SYSCONFDIR=L\\\"/usr/local/etc\\\"", - "BINDIR=L\\\"/usr/local/bin\\\"", + "BINDIR=L\\\"/usr/local/bin\\\"", ); GCC_WARN_64_TO_32_BIT_CONVERSION = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES; @@ -1339,7 +1345,7 @@ "PREFIX=L\\\"/usr/local\\\"", "DATADIR=L\\\"/usr/local/share\\\"", "SYSCONFDIR=L\\\"/usr/local/etc\\\"", - "BINDIR=L\\\"/usr/local/bin\\\"", + "BINDIR=L\\\"/usr/local/bin\\\"", ); GCC_SYMBOLS_PRIVATE_EXTERN = NO; GCC_WARN_64_TO_32_BIT_CONVERSION = YES; @@ -1365,7 +1371,7 @@ "PREFIX=L\\\"/usr/local\\\"", "DATADIR=L\\\"/usr/local/share\\\"", "SYSCONFDIR=L\\\"/usr/local/etc\\\"", - "BINDIR=L\\\"/usr/local/bin\\\"", + "BINDIR=L\\\"/usr/local/bin\\\"", ); GCC_WARN_64_TO_32_BIT_CONVERSION = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES; From d54346b2055935cc54aa212e2dccd40c5487b18f Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 1 Jun 2013 22:14:47 -0700 Subject: [PATCH 002/177] Stuff --- expression.cpp | 614 ++++++++++++++++++++++++++++++++++++++++++++++++- expression.h | 75 ++---- parser.cpp | 1 + tokenizer.h | 2 +- 4 files changed, 632 insertions(+), 60 deletions(-) diff --git a/expression.cpp b/expression.cpp index d8d5386e9..955621c60 100644 --- a/expression.cpp +++ b/expression.cpp @@ -1,9 +1,607 @@ -// -// expression.cpp -// fish -// -// Created by Peter Ammon on 5/25/13. -// -// - #include "expression.h" +#include +#include + +/* Fish grammar: + +# A statement_list is a list of statements, separated by semicolons or newlines + + statement_list = | + statement statement_list + +# A statement is a normal job, or an if / while / and etc. + + statement = boolean_statement | block_statement | decorated_statement + +# A block is a conditional, loop, or begin/end + + block_statement = block_header statement_list END arguments_or_redirections_list + block_header = if_header | for_header | while_header | function_header | begin_header + if_header = IF statement + for_header = FOR var_name IN arguments_or_redirections_list STATEMENT_TERMINATOR + while_header = WHILE statement + begin_header = BEGIN STATEMENT_TERMINATOR + function_header = FUNCTION arguments_or_redirections_list STATEMENT_TERMINATOR + +# A boolean statement is AND or OR or NOT + + boolean_statement = AND statement | OR statement | NOT statement + +# A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" + + decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement + plain_statement = command arguments_or_redirections_list terminator + + arguments_or_redirections_list = | + argument_or_redirection arguments_or_redirections_list + argument_or_redirection = redirection | + redirection = + + terminator = | + +*/ + +#define PARSE_ASSERT(a) assert(a) + +#define PARSER_DIE() assert(0) + +#if 1 +class parse_command_t; + +enum parse_token_type_t +{ + token_type_invalid, + + // Non-terminal tokens + symbol_statement_list, + symbol_statement, + symbol_block_statement, + symbol_block_header, + symbol_if_header, + symbol_for_header, + symbol_while_header, + symbol_begin_header, + symbol_function_header, + symbol_boolean_statement, + symbol_decorated_statement, + symbol_plain_statement, + symbol_arguments_or_redirections_list, + symbol_argument_or_redirection, + + + // Terminal types + parse_token_type_string, + parse_token_type_pipe, + parse_token_type_redirection, + parse_token_background, + parse_token_type_end, + parse_token_type_terminate, +}; + +enum parse_keyword_t +{ + parse_keyword_none, + parse_keyword_if, + parse_keyword_else, + parse_keyword_for, + parse_keyword_in, + parse_keyword_while, + parse_keyword_begin, + parse_keyword_function, + parse_keyword_switch, + parse_keyword_end, + parse_keyword_and, + parse_keyword_or, + parse_keyword_not, + parse_keyword_command, + parse_keyword_builtin +}; + +struct parse_stack_element_t +{ + enum parse_token_type_t type; + enum parse_keyword_t keyword; + + // Construct a token type, with no keyword + parse_stack_element_t(enum parse_token_type_t t) : type(t), keyword(parse_keyword_none) + { + } + + // Construct a string type from a keyword + parse_stack_element_t(enum parse_keyword_t k) : type(parse_token_type_string), keyword(k) + { + } +}; + +struct parse_token_t +{ + enum parse_token_type_t type; // The type of the token as represnted by the parser + enum token_type tokenizer_type; // The type of the token as represented by the tokenizer + enum parse_keyword_t keyword; // Any keyword represented by this parser + size_t source_start; + size_t source_end; +}; + +// Convert from tokenizer_t's token type to our token +static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_token_type) +{ + parse_token_t result = {}; + result.tokenizer_type = tokenizer_token_type; + switch (tokenizer_token_type) + { + case TOK_STRING: + result.type = parse_token_type_string; + break; + + case TOK_PIPE: + result.type = parse_token_type_pipe; + break; + + case TOK_END: + result.type = parse_token_type_end; + break; + + case TOK_BACKGROUND: + result.type = parse_token_background; + break; + + default: + fprintf(stderr, "Bad token type %d passed to %s\n", (int)tokenizer_token_type, __FUNCTION__); + assert(0); + break; + } + return result; +} + +/** Root of a parse tree */ +class parse_statement_list_t; +class parse_tree_t +{ + friend class parse_ll_t; + + parse_statement_list_t *root; +}; + +/** Base class for nodes of a parse tree */ +class parse_node_base_t +{ + /* Backreference to the tree */ + parse_tree_t * const tree; + + /* Type of the node */ + const enum parse_token_type_t type; + + /* Start in the source code */ + const unsigned int source_start; + + /* Length of our range in the source code */ + const unsigned int source_length; + + public: + parse_node_base_t(parse_tree_t *tr, parse_token_type_t ty) : tree(tr), type(ty), source_start(0), source_length(0) + { + } + + virtual ~parse_node_base_t() + { + } +}; + +class parse_statement_t; +class parse_statement_list_t : public parse_node_base_t +{ + std::vector statements; //deleted by destructor + public: + parse_statement_list_t(parse_tree_t *t) : parse_node_base_t(t, symbol_statement_list) + { + } +}; + +class parse_statement_t : public parse_node_base_t +{ + // abstract class + + public: + parse_statement_t(parse_tree_t *t, parse_token_type_t ty) : parse_node_base_t(t, ty) + { + } +}; + +class parse_boolean_statement_t : public parse_statement_t +{ + enum { + boolean_and, + boolean_or, + boolean_not + }; + parse_statement_t *subject; + + parse_boolean_statement_t(parse_tree_t *t) : subject(NULL), parse_statement_t(t, symbol_boolean_statement) + { + } +}; + +class parse_plain_statement_t; +class parse_decorated_statement_t : public parse_statement_t +{ + enum { + decoration_command, + decoration_builtin + } decoration; + + parse_plain_statement_t *subject; + + parse_decorated_statement_t(parse_tree_t *t) : subject(NULL), parse_statement_t(t, symbol_decorated_statement) + { + } + +}; + +class parse_plain_statement_t : public parse_statement_t +{ + wcstring_list_t arguments; + wcstring_list_t redirections; + + parse_plain_statement_t(parse_tree_t *t) : parse_statement_t(t, symbol_plain_statement) + { + } +}; + +class parse_block_statement_t : public parse_statement_t +{ + // abstract class + parse_block_statement_t(parse_tree_t *t, parse_token_type_t ty) : parse_statement_t(t, ty) + { + } +}; + +class parse_ll_t +{ + friend class parse_t; + + std::stack symbol_stack; // LL parser stack + std::stack node_stack; // stack of nodes we are constructing; owned by the tree (not by us!) + parse_tree_t *tree; //tree we are constructing + + // Constructor + parse_ll_t() + { + this->tree = new parse_tree_t(); + tree->root = new parse_statement_list_t(this->tree);; + + symbol_stack.push(symbol_statement_list); // goal token + node_stack.push(tree->root); //outermost node + } + + // implementation of certain parser constructions + void accept_token(parse_token_t token); + void accept_token_statement_list(parse_token_t token); + void accept_token_statement(parse_token_t token); + void accept_token_block_header(parse_token_t token); + void accept_token_boolean_statement(parse_token_t token); + void accept_token_decorated_statement(parse_token_t token); + void accept_token_arguments_or_redirections_list(parse_token_t token); + void accept_token_argument_or_redirection(parse_token_t token); + + void token_unhandled(parse_token_t token, const char *function); + + void parse_error(const wchar_t *expected, parse_token_t token); + + parse_token_type_t stack_top_type() const + { + return symbol_stack.top().type; + } + + // Pop from the top of the symbol stack, then push. Note that these are pushed in reverse order, so the first argument will be on the top of the stack + inline void symbol_stack_pop_push(parse_stack_element_t tok1 = token_type_invalid, parse_stack_element_t tok2 = token_type_invalid, parse_stack_element_t tok3 = token_type_invalid, parse_stack_element_t tok4 = token_type_invalid, parse_stack_element_t tok5 = token_type_invalid) + { + symbol_stack.pop(); + if (tok5.type != token_type_invalid) symbol_stack.push(tok5); + if (tok4.type != token_type_invalid) symbol_stack.push(tok4); + if (tok3.type != token_type_invalid) symbol_stack.push(tok3); + if (tok2.type != token_type_invalid) symbol_stack.push(tok2); + if (tok1.type != token_type_invalid) symbol_stack.push(tok1); + } +}; + +void parse_ll_t::token_unhandled(parse_token_t token, const char *function) +{ + fprintf(stderr, "Unhandled token with type %d in function %s\n", (int)token.type, function); + PARSER_DIE(); +} + +void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) +{ + fprintf(stderr, "Expected a %ls, instead got a token of type %d\n", expected, (int)token.type); +} + +void parse_ll_t::accept_token_statement_list(parse_token_t token) +{ + PARSE_ASSERT(symbol_stack.top().type == symbol_statement_list); + switch (token.type) + { + case parse_token_type_string: + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_background: + case parse_token_type_end: + symbol_stack_pop_push(symbol_statement, symbol_statement_list); + + break; + + case parse_token_type_terminate: + // no more commands, just transition to empty + symbol_stack_pop_push(); + break; + + default: + token_unhandled(token, __FUNCTION__); + break; + } +} + +void parse_ll_t::accept_token_statement(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_statement); + switch (token.type) + { + case parse_token_type_string: + switch (token.keyword) + { + case parse_keyword_and: + case parse_keyword_or: + case parse_keyword_not: + symbol_stack_pop_push(symbol_boolean_statement); + break; + + case parse_keyword_if: + case parse_keyword_else: + case parse_keyword_for: + case parse_keyword_in: + case parse_keyword_while: + case parse_keyword_begin: + case parse_keyword_function: + case parse_keyword_switch: + symbol_stack_pop_push(symbol_block_statement); + break; + + case parse_keyword_end: + // TODO + break; + + case parse_keyword_none: + case parse_keyword_command: + case parse_keyword_builtin: + symbol_stack_pop_push(symbol_decorated_statement); + break; + + } + break; + + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_background: + case parse_token_type_end: + case parse_token_type_terminate: + parse_error(L"command", token); + break; + + default: + token_unhandled(token, __FUNCTION__); + break; + } +} + +void parse_ll_t::accept_token_block_header(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_block_header); + switch (token.type) + { + case parse_token_type_string: + switch (token.keyword) + { + case parse_keyword_if: + symbol_stack_pop_push(symbol_if_header); + break; + + case parse_keyword_else: + //todo + break; + + case parse_keyword_for: + symbol_stack_pop_push(symbol_for_header); + break; + + + case parse_keyword_while: + symbol_stack_pop_push(symbol_while_header); + break; + + case parse_keyword_begin: + symbol_stack_pop_push(symbol_begin_header); + break; + + case parse_keyword_function: + symbol_stack_pop_push(symbol_function_header); + break; + + default: + token_unhandled(token, __FUNCTION__); + break; + + } + break; + + default: + token_unhandled(token, __FUNCTION__); + break; + } +} + +void parse_ll_t::accept_token_boolean_statement(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_boolean_statement); + switch (token.type) + { + case parse_token_type_string: + switch (token.keyword) + { + case parse_keyword_and: + symbol_stack_pop_push(parse_keyword_and, symbol_statement); + break; + case parse_keyword_or: + symbol_stack_pop_push(parse_keyword_or, symbol_statement); + break; + case parse_keyword_not: + symbol_stack_pop_push(parse_keyword_not, symbol_statement); + break; + + default: + token_unhandled(token, __FUNCTION__); + break; + } + + default: + token_unhandled(token, __FUNCTION__); + break; + } +} + +void parse_ll_t::accept_token_decorated_statement(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_decorated_statement); + switch (token.type) + { + case parse_token_type_string: + switch (token.keyword) + { + case parse_keyword_command: + symbol_stack_pop_push(parse_keyword_command, symbol_statement); + break; + case parse_keyword_builtin: + symbol_stack_pop_push(parse_keyword_builtin, symbol_statement); + break; + default: + symbol_stack_pop_push(symbol_plain_statement); + break; + } + + default: + token_unhandled(token, __FUNCTION__); + break; + } +} + +void parse_ll_t::accept_token_arguments_or_redirections_list(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_arguments_or_redirections_list); + switch (token.type) + { + case parse_token_type_string: + case parse_token_type_redirection: + symbol_stack_pop_push(symbol_argument_or_redirection, symbol_arguments_or_redirections_list); + break; + + default: + // Some other token, end of list + symbol_stack_pop_push(); + break; + } +} + +void parse_ll_t::accept_token_argument_or_redirection(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_argument_or_redirection); + switch (token.type) + { + case parse_token_type_string: + symbol_stack_pop_push(); + // Got an argument + break; + + case parse_token_type_redirection: + symbol_stack_pop_push(); + // Got a redirection + break; + + default: + token_unhandled(token, __FUNCTION__); + break; + } +} + +void parse_ll_t::accept_token(parse_token_t token) +{ + assert(! symbol_stack.empty()); + switch (stack_top_type()) + { + case symbol_statement_list: + accept_token_statement_list(token); + break; + + case symbol_statement: + accept_token_statement(token); + break; + + case symbol_block_statement: + symbol_stack_pop_push(symbol_block_header, symbol_statement_list, parse_keyword_end, symbol_arguments_or_redirections_list); + break; + + case symbol_block_header: + accept_token_block_header(token); + break; + + case symbol_if_header: + break; + + case symbol_for_header: + symbol_stack_pop_push(parse_keyword_for, parse_token_type_string, parse_keyword_in, symbol_arguments_or_redirections_list, parse_token_type_end); + break; + + case symbol_while_header: + symbol_stack_pop_push(parse_keyword_while, symbol_statement); + break; + + case symbol_begin_header: + symbol_stack_pop_push(parse_keyword_begin, parse_token_type_end); + break; + + case symbol_function_header: + symbol_stack_pop_push(parse_keyword_function, symbol_arguments_or_redirections_list, parse_token_type_end); + break; + + case symbol_boolean_statement: + accept_token_boolean_statement(token); + break; + + case symbol_decorated_statement: + accept_token_decorated_statement(token); + break; + + case symbol_plain_statement: + symbol_stack_pop_push(parse_token_type_string, symbol_arguments_or_redirections_list, parse_token_type_end); + break; + + case symbol_arguments_or_redirections_list: + accept_token_arguments_or_redirections_list(token); + break; + + case symbol_argument_or_redirection: + accept_token_argument_or_redirection(token); + break; + } +} +#endif + + +class parse_sr_t +{ + friend class parse_t; + + std::vector node_stack; + void accept_token(parse_token_t token); +}; + +parse_t::parse_t() : parser(new parse_sr_t()) +{ +} diff --git a/expression.h b/expression.h index 390c1bb79..c458800a7 100644 --- a/expression.h +++ b/expression.h @@ -12,13 +12,23 @@ #include "config.h" #include "util.h" #include "common.h" +#include "tokenizer.h" +class parse_ll_t; +class parse_sr_t; +class parse_t +{ + parse_sr_t * const parser; + parse_t(); +}; + /* Fish grammar: # A statement_list is a list of statements, separated by semicolons or newlines - statement_list = | statement | statement statement_list + statement_list = | + statement statement_list # A statement is a normal job, or an if / while / and etc. @@ -43,61 +53,24 @@ decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement plain_statement = command arguments_or_redirections_list terminator - arguments_or_redirections_list = | argument_or_redirection - argument_or_redirection = redirection | - redirection = REDIRECTION + arguments_or_redirections_list = | + argument_or_redirection arguments_or_redirections_list + argument_or_redirection = redirection | + redirection = + + terminator = | */ -class parse_command_t; +/* fish Shift-Reduce grammar: -/** Root of a parse tree */ -class parse_tree_t -{ - /** Literal source code */ - wcstring source; - - /** Initial node */ - parse_command_list_t *child; -}; + + IF <- if_statement + FOR <- for_statement + + -/** Base class for nodes of a parse tree */ -class parse_node_base_t -{ - /* Backreference to the tree */ - parse_tree_t * const tree; - - /* Start in the source code */ - const unsigned int source_start; - - /* Length of our range in the source code */ - const unsigned int source_length; -}; - -class parse_statement_list_t : public parse_node_base_t -{ - std::vector statements; -}; - -class parse_statement_t : public parse_node_base_t -{ - -}; - -class parse_boolean_statement_t : public parse_statement_t -{ - -}; - -class parse_plain_statement_t : public parse_statement_t -{ - -}; - -class parse_block_statement_t : public parse_statement_t -{ - -}; +*/ #endif diff --git a/parser.cpp b/parser.cpp index e0a79ea7b..89c1e31b8 100644 --- a/parser.cpp +++ b/parser.cpp @@ -1311,6 +1311,7 @@ void parser_t::parse_job_argument_list(process_t *p, case TOK_BACKGROUND: { job_set_flag(j, JOB_FOREGROUND, 0); + // PCA note fall through, this is deliberate. The background modifier & terminates a command } case TOK_END: diff --git a/tokenizer.h b/tokenizer.h index 4357757dc..40390bcf8 100644 --- a/tokenizer.h +++ b/tokenizer.h @@ -22,7 +22,7 @@ enum token_type TOK_INVALID,/**< Invalid token */ TOK_STRING,/**< String token */ TOK_PIPE,/**< Pipe token */ - TOK_END,/**< End token */ + TOK_END,/**< End token (semicolon or newline, not literal end) */ TOK_REDIRECT_OUT, /**< redirection token */ TOK_REDIRECT_APPEND,/**< redirection append token */ TOK_REDIRECT_IN,/**< input redirection token */ From 99494afd08081fe90c16777117b2434c3051acc3 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 6 Jun 2013 21:49:40 -0700 Subject: [PATCH 003/177] Parser work. Not sure whether to have a different class for each node or not. --- expression.cpp | 283 ++++++++++++++++++++++++++++++++++++++++--------- expression.h | 2 +- 2 files changed, 234 insertions(+), 51 deletions(-) diff --git a/expression.cpp b/expression.cpp index 955621c60..bbc83fe10 100644 --- a/expression.cpp +++ b/expression.cpp @@ -97,21 +97,6 @@ enum parse_keyword_t parse_keyword_builtin }; -struct parse_stack_element_t -{ - enum parse_token_type_t type; - enum parse_keyword_t keyword; - - // Construct a token type, with no keyword - parse_stack_element_t(enum parse_token_type_t t) : type(t), keyword(parse_keyword_none) - { - } - - // Construct a string type from a keyword - parse_stack_element_t(enum parse_keyword_t k) : type(parse_token_type_string), keyword(k) - { - } -}; struct parse_token_t { @@ -165,9 +150,6 @@ class parse_tree_t /** Base class for nodes of a parse tree */ class parse_node_base_t { - /* Backreference to the tree */ - parse_tree_t * const tree; - /* Type of the node */ const enum parse_token_type_t type; @@ -177,8 +159,12 @@ class parse_node_base_t /* Length of our range in the source code */ const unsigned int source_length; + protected: + /* Index of the production used */ + unsigned char branch; + public: - parse_node_base_t(parse_tree_t *tr, parse_token_type_t ty) : tree(tr), type(ty), source_start(0), source_length(0) + parse_node_base_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0) { } @@ -187,12 +173,25 @@ class parse_node_base_t } }; +class parse_node_t : public parse_node_base_t +{ + public: + parse_node_t *p1; + parse_node_t *p2; + uint32_t c1; + + parse_node_t(parse_token_type_t ty) : parse_node_base_t(ty), p1(NULL), p2(NULL), c1(0) + { + } +}; + class parse_statement_t; class parse_statement_list_t : public parse_node_base_t { - std::vector statements; //deleted by destructor public: - parse_statement_list_t(parse_tree_t *t) : parse_node_base_t(t, symbol_statement_list) + parse_statement_t *statement; + parse_statement_list_t *next; + parse_statement_list_t() : parse_node_base_t(symbol_statement_list), statement(NULL), next(NULL) { } }; @@ -200,49 +199,82 @@ class parse_statement_list_t : public parse_node_base_t class parse_statement_t : public parse_node_base_t { // abstract class - public: - parse_statement_t(parse_tree_t *t, parse_token_type_t ty) : parse_node_base_t(t, ty) + parse_statement_t(parse_token_type_t ty) : parse_node_base_t(ty) + { + } +}; + +class parse_abstract_statement_t : public parse_statement_t +{ + public: + parse_statement_t *subject; + parse_abstract_statement_t() : parse_statement_t(symbol_statement), subject(NULL) { } }; class parse_boolean_statement_t : public parse_statement_t { + public: enum { + boolean_invalid, boolean_and, boolean_or, boolean_not - }; - parse_statement_t *subject; + } condition; - parse_boolean_statement_t(parse_tree_t *t) : subject(NULL), parse_statement_t(t, symbol_boolean_statement) + parse_boolean_statement_t() : parse_statement_t(symbol_boolean_statement), condition(boolean_invalid) { +#if 0 + switch (keyword) + { + case parse_keyword_and: + condition = boolean_and; + break; + + case parse_keyword_or: + condition = boolean_or; + break; + + case parse_keyword_not: + condition = boolean_not; + break; + + default: + PARSE_ASSERT(0 && "Unknown keyword"); + break; + } +#endif } }; class parse_plain_statement_t; class parse_decorated_statement_t : public parse_statement_t { +public: enum { + decoration_none, decoration_command, decoration_builtin } decoration; parse_plain_statement_t *subject; - parse_decorated_statement_t(parse_tree_t *t) : subject(NULL), parse_statement_t(t, symbol_decorated_statement) + parse_decorated_statement_t() : parse_statement_t(symbol_decorated_statement), subject(NULL), decoration(decoration_none) { } }; +class parse_string_t; class parse_plain_statement_t : public parse_statement_t { - wcstring_list_t arguments; - wcstring_list_t redirections; + parse_string_t *command; + parse_arguments_or_redirection_list_t *arguments_or_redirections_list; - parse_plain_statement_t(parse_tree_t *t) : parse_statement_t(t, symbol_plain_statement) + public: + parse_plain_statement_t() : parse_statement_t(symbol_plain_statement) { } }; @@ -250,27 +282,107 @@ class parse_plain_statement_t : public parse_statement_t class parse_block_statement_t : public parse_statement_t { // abstract class - parse_block_statement_t(parse_tree_t *t, parse_token_type_t ty) : parse_statement_t(t, ty) + parse_block_statement_t(parse_tree_t *t, parse_token_type_t ty) : parse_statement_t(ty) { } }; +class parse_string_t : public parse_node_base_t +{ +}; + +class parse_arguments_or_redirection_list_t : public parse_node_base_t +{ +}; + + +struct parse_stack_element_t +{ + enum parse_token_type_t type; + enum parse_keyword_t keyword; + parse_node_base_t *node; + + private: + void allocate_node(void) + { + assert(node == NULL); + switch (type) + { + // Set up our node + case symbol_statement_list: + node = new parse_statement_list_t(); + break; + + case symbol_statement: + node = new parse_abstract_statement_t(); + break; + + case symbol_block_statement: + case symbol_block_header: + case symbol_if_header: + case symbol_for_header: + case symbol_while_header: + case symbol_begin_header: + case symbol_function_header: + break; + + case symbol_boolean_statement: + node = new parse_boolean_statement_t(); + break; + + case symbol_decorated_statement: + node = new parse_decorated_statement_t(); + break; + + case symbol_plain_statement: + node = new parse_plain_statement_t(); + break; + + case symbol_arguments_or_redirections_list: + case symbol_argument_or_redirection: + + default: + ; + // nothing + } + } + + + public: + + // Construct a token type, with no keyword + parse_stack_element_t(enum parse_token_type_t t) : type(t), keyword(parse_keyword_none) + { + allocate_node(); + } + + // Construct a string type from a keyword + parse_stack_element_t(enum parse_keyword_t k) : type(parse_token_type_string), keyword(k), node(NULL) + { + allocate_node(); + } +}; + +template +static T* cast_node(parse_node_base_t *node) +{ + return static_cast(node); +} + class parse_ll_t { friend class parse_t; - std::stack symbol_stack; // LL parser stack - std::stack node_stack; // stack of nodes we are constructing; owned by the tree (not by us!) + std::vector symbol_stack; // LL parser stack parse_tree_t *tree; //tree we are constructing // Constructor parse_ll_t() { this->tree = new parse_tree_t(); - tree->root = new parse_statement_list_t(this->tree);; - symbol_stack.push(symbol_statement_list); // goal token - node_stack.push(tree->root); //outermost node + symbol_stack.push_back(symbol_statement_list); // goal token + tree->root = stack_get_node_cast(0); } // implementation of certain parser constructions @@ -280,6 +392,7 @@ class parse_ll_t void accept_token_block_header(parse_token_t token); void accept_token_boolean_statement(parse_token_t token); void accept_token_decorated_statement(parse_token_t token); + void accept_token_plain_statement(parse_token_t token); void accept_token_arguments_or_redirections_list(parse_token_t token); void accept_token_argument_or_redirection(parse_token_t token); @@ -289,18 +402,33 @@ class parse_ll_t parse_token_type_t stack_top_type() const { - return symbol_stack.top().type; + return symbol_stack.back().type; + } + + template + T* stack_get_node_cast(unsigned int idx) + { + assert(idx < symbol_stack.size()); + parse_node_base_t *base_node = symbol_stack.at(symbol_stack.size() - idx - 1).node; + return static_cast(base_node); + + } + + parse_node_base_t *stack_get_node(unsigned int idx) const + { + assert(idx < symbol_stack.size()); + return symbol_stack.at(symbol_stack.size() - idx - 1).node; } // Pop from the top of the symbol stack, then push. Note that these are pushed in reverse order, so the first argument will be on the top of the stack inline void symbol_stack_pop_push(parse_stack_element_t tok1 = token_type_invalid, parse_stack_element_t tok2 = token_type_invalid, parse_stack_element_t tok3 = token_type_invalid, parse_stack_element_t tok4 = token_type_invalid, parse_stack_element_t tok5 = token_type_invalid) { - symbol_stack.pop(); - if (tok5.type != token_type_invalid) symbol_stack.push(tok5); - if (tok4.type != token_type_invalid) symbol_stack.push(tok4); - if (tok3.type != token_type_invalid) symbol_stack.push(tok3); - if (tok2.type != token_type_invalid) symbol_stack.push(tok2); - if (tok1.type != token_type_invalid) symbol_stack.push(tok1); + symbol_stack.pop_back(); + if (tok5.type != token_type_invalid) symbol_stack.push_back(tok5); + if (tok4.type != token_type_invalid) symbol_stack.push_back(tok4); + if (tok3.type != token_type_invalid) symbol_stack.push_back(tok3); + if (tok2.type != token_type_invalid) symbol_stack.push_back(tok2); + if (tok1.type != token_type_invalid) symbol_stack.push_back(tok1); } }; @@ -317,7 +445,8 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) void parse_ll_t::accept_token_statement_list(parse_token_t token) { - PARSE_ASSERT(symbol_stack.top().type == symbol_statement_list); + PARSE_ASSERT(stack_top_type() == symbol_statement_list); + parse_statement_list_t *list = stack_get_node_cast(0); switch (token.type) { case parse_token_type_string: @@ -326,7 +455,8 @@ void parse_ll_t::accept_token_statement_list(parse_token_t token) case parse_token_background: case parse_token_type_end: symbol_stack_pop_push(symbol_statement, symbol_statement_list); - + list->next = stack_get_node_cast(0); + list->statement = stack_get_node_cast(1); break; case parse_token_type_terminate: @@ -343,6 +473,7 @@ void parse_ll_t::accept_token_statement_list(parse_token_t token) void parse_ll_t::accept_token_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_statement); + parse_abstract_statement_t *statement = stack_get_node_cast(0); switch (token.type) { case parse_token_type_string: @@ -352,6 +483,7 @@ void parse_ll_t::accept_token_statement(parse_token_t token) case parse_keyword_or: case parse_keyword_not: symbol_stack_pop_push(symbol_boolean_statement); + statement->subject = stack_get_node_cast(0); break; case parse_keyword_if: @@ -363,6 +495,7 @@ void parse_ll_t::accept_token_statement(parse_token_t token) case parse_keyword_function: case parse_keyword_switch: symbol_stack_pop_push(symbol_block_statement); + assert(0 && "Need assignment"); break; case parse_keyword_end: @@ -373,6 +506,7 @@ void parse_ll_t::accept_token_statement(parse_token_t token) case parse_keyword_command: case parse_keyword_builtin: symbol_stack_pop_push(symbol_decorated_statement); + statement->subject = stack_get_node_cast(0); break; } @@ -441,18 +575,22 @@ void parse_ll_t::accept_token_block_header(parse_token_t token) void parse_ll_t::accept_token_boolean_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_boolean_statement); + parse_boolean_statement_t *statement = stack_get_node_cast(0); switch (token.type) { case parse_token_type_string: switch (token.keyword) { case parse_keyword_and: + statement->condition = parse_boolean_statement_t::boolean_and; symbol_stack_pop_push(parse_keyword_and, symbol_statement); break; case parse_keyword_or: + statement->condition = parse_boolean_statement_t::boolean_or; symbol_stack_pop_push(parse_keyword_or, symbol_statement); break; case parse_keyword_not: + statement->condition = parse_boolean_statement_t::boolean_not; symbol_stack_pop_push(parse_keyword_not, symbol_statement); break; @@ -470,19 +608,29 @@ void parse_ll_t::accept_token_boolean_statement(parse_token_t token) void parse_ll_t::accept_token_decorated_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_decorated_statement); + parse_decorated_statement_t *statement = stack_get_node_cast(0); switch (token.type) { case parse_token_type_string: switch (token.keyword) { case parse_keyword_command: - symbol_stack_pop_push(parse_keyword_command, symbol_statement); + symbol_stack_pop_push(parse_keyword_command, symbol_plain_statement); + statement->subject = stack_get_node_cast(0); + statement->decoration = parse_decorated_statement_t::decoration_command; + break; + case parse_keyword_builtin: - symbol_stack_pop_push(parse_keyword_builtin, symbol_statement); + symbol_stack_pop_push(parse_keyword_builtin, symbol_plain_statement); + statement->subject = stack_get_node_cast(0); + statement->decoration = parse_decorated_statement_t::decoration_builtin; break; + default: symbol_stack_pop_push(symbol_plain_statement); + statement->subject = stack_get_node_cast(0); + statement->decoration = parse_decorated_statement_t::decoration_none; break; } @@ -492,6 +640,14 @@ void parse_ll_t::accept_token_decorated_statement(parse_token_t token) } } +void parse_ll_t::accept_token_plain_statement(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_decorated_statement); + parse_plain_statement_t *statement = stack_get_node_cast(0); + symbol_stack_pop_push(parse_token_type_string, symbol_arguments_or_redirections_list, parse_token_type_end); + statement-> +} + void parse_ll_t::accept_token_arguments_or_redirections_list(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_arguments_or_redirections_list); @@ -579,7 +735,7 @@ void parse_ll_t::accept_token(parse_token_t token) break; case symbol_plain_statement: - symbol_stack_pop_push(parse_token_type_string, symbol_arguments_or_redirections_list, parse_token_type_end); + accept_token_plain_statement(token); break; case symbol_arguments_or_redirections_list: @@ -593,15 +749,42 @@ void parse_ll_t::accept_token(parse_token_t token) } #endif - +#if 0 class parse_sr_t { friend class parse_t; std::vector node_stack; void accept_token(parse_token_t token); + + void accept_token_string(parse_token_t token); }; -parse_t::parse_t() : parser(new parse_sr_t()) +void parse_sr_t::accept_token_string(parse_token_t token) +{ + assert(token.type == parse_token_type_string); +} + +void parse_sr_t::accept_token(parse_token_t token) +{ + // We are a SR parser. Our action depends on a combination of the top element(s) of our node stack and the token type. + // Switch on the token type to make progress + switch (token.type) + { + case parse_token_type_string: + accept_token_string(token); + break; + + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_background: + case parse_token_type_end: + case parse_token_type_terminate: + } +} + +#endif + +parse_t::parse_t() : parser(new parse_ll_t()) { } diff --git a/expression.h b/expression.h index c458800a7..bb79a2d04 100644 --- a/expression.h +++ b/expression.h @@ -19,7 +19,7 @@ class parse_ll_t; class parse_sr_t; class parse_t { - parse_sr_t * const parser; + parse_ll_t * const parser; parse_t(); }; From f3e5262dc0d0676e77374a77a0835329a20af03d Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 8 Jun 2013 19:20:26 -0700 Subject: [PATCH 004/177] Parser limps into existence --- Makefile.in | 2 +- configure.ac | 2 +- expression.cpp | 665 ++++++++++++++++++++++++------------------------- expression.h | 5 + fish_tests.cpp | 12 +- tokenizer.h | 1 - 6 files changed, 350 insertions(+), 337 deletions(-) diff --git a/Makefile.in b/Makefile.in index c59ac0de3..61a9ef279 100644 --- a/Makefile.in +++ b/Makefile.in @@ -95,7 +95,7 @@ FISH_OBJS := function.o builtin.o complete.o env.o exec.o expand.o \ env_universal.o env_universal_common.o input_common.o event.o \ signal.o io.o parse_util.o common.o screen.o path.o autoload.o \ parser_keywords.o iothread.o color.o postfork.o \ - builtin_test.o + builtin_test.o expression.o FISH_INDENT_OBJS := fish_indent.o print_help.o common.o \ parser_keywords.o wutil.o tokenizer.o diff --git a/configure.ac b/configure.ac index 34f25e1f4..631ba9c7c 100644 --- a/configure.ac +++ b/configure.ac @@ -92,7 +92,7 @@ fi # So ensure this happens before we modify CXXFLAGS below # -AC_PROG_CXX([g++ c++]) +AC_PROG_CXX([clang++ g++ c++]) AC_PROG_CPP AC_PROG_INSTALL diff --git a/expression.cpp b/expression.cpp index bbc83fe10..75245bd13 100644 --- a/expression.cpp +++ b/expression.cpp @@ -1,7 +1,8 @@ #include "expression.h" -#include +#include "tokenizer.h" #include + /* Fish grammar: # A statement_list is a list of statements, separated by semicolons or newlines @@ -41,11 +42,12 @@ */ +typedef size_t node_offset_t; + #define PARSE_ASSERT(a) assert(a) #define PARSER_DIE() assert(0) -#if 1 class parse_command_t; enum parse_token_type_t @@ -67,7 +69,6 @@ enum parse_token_type_t symbol_plain_statement, symbol_arguments_or_redirections_list, symbol_argument_or_redirection, - // Terminal types parse_token_type_string, @@ -76,8 +77,42 @@ enum parse_token_type_t parse_token_background, parse_token_type_end, parse_token_type_terminate, + + FIRST_PARSE_TOKEN_TYPE = parse_token_type_string }; +static wcstring token_type_description(parse_token_type_t type) +{ + switch (type) + { + case token_type_invalid: return L"invalid"; + + case symbol_statement_list: return L"statement_list"; + case symbol_statement: return L"statement"; + case symbol_block_statement: return L"block_statement"; + case symbol_block_header: return L"block_header"; + case symbol_if_header: return L"if_header"; + case symbol_for_header: return L"for_header"; + case symbol_while_header: return L"while_header"; + case symbol_begin_header: return L"begin_header"; + case symbol_function_header: return L"function_header"; + case symbol_boolean_statement: return L"boolean_statement"; + case symbol_decorated_statement: return L"decorated_statement"; + case symbol_plain_statement: return L"plain_statement"; + case symbol_arguments_or_redirections_list: return L"arguments_or_redirections_list"; + case symbol_argument_or_redirection: return L"argument_or_redirection"; + + case parse_token_type_string: return L"token_string"; + case parse_token_type_pipe: return L"token_pipe"; + case parse_token_type_redirection: return L"token_redirection"; + case parse_token_background: return L"token_background"; + case parse_token_type_end: return L"token_end"; + case parse_token_type_terminate: return L"token_terminate"; + + default: return format_string(L"Unknown token type %ld", static_cast(type)); + } +} + enum parse_keyword_t { parse_keyword_none, @@ -100,11 +135,11 @@ enum parse_keyword_t struct parse_token_t { - enum parse_token_type_t type; // The type of the token as represnted by the parser + enum parse_token_type_t type; // The type of the token as represented by the parser enum token_type tokenizer_type; // The type of the token as represented by the tokenizer enum parse_keyword_t keyword; // Any keyword represented by this parser size_t source_start; - size_t source_end; + size_t source_length; }; // Convert from tokenizer_t's token type to our token @@ -138,253 +173,114 @@ static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_ return result; } -/** Root of a parse tree */ -class parse_statement_list_t; -class parse_tree_t -{ - friend class parse_ll_t; - - parse_statement_list_t *root; -}; - /** Base class for nodes of a parse tree */ -class parse_node_base_t +class parse_node_t { - /* Type of the node */ - const enum parse_token_type_t type; - + public: + /* Start in the source code */ - const unsigned int source_start; + size_t source_start; /* Length of our range in the source code */ - const unsigned int source_length; + size_t source_length; + + /* Children */ + node_offset_t child_start; + node_offset_t child_count; - protected: - /* Index of the production used */ - unsigned char branch; + /* Type-dependent data */ + uint32_t tag; - public: - parse_node_base_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0) + /* Type of the node */ + enum parse_token_type_t type; + + + /* Description */ + wcstring describe(void) const { + wcstring result = token_type_description(type); + return result; } - virtual ~parse_node_base_t() + /* Constructor */ + explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0), child_start(0), child_count(0), tag(0) { } }; -class parse_node_t : public parse_node_base_t +static void dump_tree_recursive(const std::vector &nodes, const wcstring &src, size_t start, size_t indent, wcstring *result, size_t *line) { - public: - parse_node_t *p1; - parse_node_t *p2; - uint32_t c1; + assert(start < nodes.size()); + const parse_node_t &node = nodes.at(start); - parse_node_t(parse_token_type_t ty) : parse_node_base_t(ty), p1(NULL), p2(NULL), c1(0) + append_format(*result, L"%2lu ", *line); + result->append(indent, L' ');; + result->append(node.describe()); + if (node.child_count > 0) { + append_format(*result, L" <%lu children>", node.child_count); } -}; - -class parse_statement_t; -class parse_statement_list_t : public parse_node_base_t -{ - public: - parse_statement_t *statement; - parse_statement_list_t *next; - parse_statement_list_t() : parse_node_base_t(symbol_statement_list), statement(NULL), next(NULL) + if (node.type == parse_token_type_string) { + result->append(L": \""); + result->append(src, node.source_start, node.source_length); + result->append(L"\""); } -}; - -class parse_statement_t : public parse_node_base_t -{ - // abstract class - public: - parse_statement_t(parse_token_type_t ty) : parse_node_base_t(ty) + result->push_back(L'\n'); + ++*line; + for (size_t child_idx = node.child_start; child_idx < node.child_start + node.child_count; child_idx++) { + dump_tree_recursive(nodes, src, child_idx, indent + 2, result, line); } -}; +} -class parse_abstract_statement_t : public parse_statement_t +static wcstring dump_tree(const std::vector &nodes, const wcstring &src) { - public: - parse_statement_t *subject; - parse_abstract_statement_t() : parse_statement_t(symbol_statement), subject(NULL) - { - } -}; - -class parse_boolean_statement_t : public parse_statement_t -{ - public: - enum { - boolean_invalid, - boolean_and, - boolean_or, - boolean_not - } condition; + if (nodes.empty()) + return L"(empty!)"; - parse_boolean_statement_t() : parse_statement_t(symbol_boolean_statement), condition(boolean_invalid) - { -#if 0 - switch (keyword) - { - case parse_keyword_and: - condition = boolean_and; - break; - - case parse_keyword_or: - condition = boolean_or; - break; - - case parse_keyword_not: - condition = boolean_not; - break; - - default: - PARSE_ASSERT(0 && "Unknown keyword"); - break; - } -#endif - } -}; - -class parse_plain_statement_t; -class parse_decorated_statement_t : public parse_statement_t -{ -public: - enum { - decoration_none, - decoration_command, - decoration_builtin - } decoration; - - parse_plain_statement_t *subject; - - parse_decorated_statement_t() : parse_statement_t(symbol_decorated_statement), subject(NULL), decoration(decoration_none) - { - } - -}; - -class parse_string_t; -class parse_plain_statement_t : public parse_statement_t -{ - parse_string_t *command; - parse_arguments_or_redirection_list_t *arguments_or_redirections_list; - - public: - parse_plain_statement_t() : parse_statement_t(symbol_plain_statement) - { - } -}; - -class parse_block_statement_t : public parse_statement_t -{ - // abstract class - parse_block_statement_t(parse_tree_t *t, parse_token_type_t ty) : parse_statement_t(ty) - { - } -}; - -class parse_string_t : public parse_node_base_t -{ -}; - -class parse_arguments_or_redirection_list_t : public parse_node_base_t -{ -}; + size_t line = 0; + wcstring result; + dump_tree_recursive(nodes, src, 0, 0, &result, &line); + return result; +} struct parse_stack_element_t { enum parse_token_type_t type; enum parse_keyword_t keyword; - parse_node_base_t *node; + node_offset_t node_idx; - private: - void allocate_node(void) + parse_stack_element_t(parse_token_type_t t) : type(t), keyword(parse_keyword_none), node_idx(-1) { - assert(node == NULL); - switch (type) - { - // Set up our node - case symbol_statement_list: - node = new parse_statement_list_t(); - break; - - case symbol_statement: - node = new parse_abstract_statement_t(); - break; - - case symbol_block_statement: - case symbol_block_header: - case symbol_if_header: - case symbol_for_header: - case symbol_while_header: - case symbol_begin_header: - case symbol_function_header: - break; - - case symbol_boolean_statement: - node = new parse_boolean_statement_t(); - break; - - case symbol_decorated_statement: - node = new parse_decorated_statement_t(); - break; - - case symbol_plain_statement: - node = new parse_plain_statement_t(); - break; - - case symbol_arguments_or_redirections_list: - case symbol_argument_or_redirection: - - default: - ; - // nothing - } - } - - - public: - - // Construct a token type, with no keyword - parse_stack_element_t(enum parse_token_type_t t) : type(t), keyword(parse_keyword_none) - { - allocate_node(); } - // Construct a string type from a keyword - parse_stack_element_t(enum parse_keyword_t k) : type(parse_token_type_string), keyword(k), node(NULL) + parse_stack_element_t(parse_keyword_t k) : type(parse_token_type_string), keyword(k), node_idx(-1) { - allocate_node(); } }; -template -static T* cast_node(parse_node_base_t *node) -{ - return static_cast(node); -} - class parse_ll_t { friend class parse_t; std::vector symbol_stack; // LL parser stack - parse_tree_t *tree; //tree we are constructing + std::vector nodes; + bool errored; // Constructor - parse_ll_t() + parse_ll_t() : errored(false) { - this->tree = new parse_tree_t(); - - symbol_stack.push_back(symbol_statement_list); // goal token - tree->root = stack_get_node_cast(0); + // initial node + parse_stack_element_t elem = symbol_statement_list; + elem.node_idx = 0; + symbol_stack.push_back(elem); // goal token + nodes.push_back(parse_node_t(symbol_statement_list)); } + bool top_node_match_token(parse_token_t token); + // implementation of certain parser constructions void accept_token(parse_token_t token); void accept_token_statement_list(parse_token_t token); @@ -395,34 +291,63 @@ class parse_ll_t void accept_token_plain_statement(parse_token_t token); void accept_token_arguments_or_redirections_list(parse_token_t token); void accept_token_argument_or_redirection(parse_token_t token); + bool accept_token_string(parse_token_t token); void token_unhandled(parse_token_t token, const char *function); void parse_error(const wchar_t *expected, parse_token_t token); + // Get the node corresponding to the top element of the stack + parse_node_t &node_for_top_symbol() + { + PARSE_ASSERT(! symbol_stack.empty()); + const parse_stack_element_t &top_symbol = symbol_stack.back(); + PARSE_ASSERT(top_symbol.node_idx != -1); + PARSE_ASSERT(top_symbol.node_idx < nodes.size()); + return nodes.at(top_symbol.node_idx); + } + parse_token_type_t stack_top_type() const { return symbol_stack.back().type; } - template - T* stack_get_node_cast(unsigned int idx) + void top_node_set_tag(uint32_t tag) { - assert(idx < symbol_stack.size()); - parse_node_base_t *base_node = symbol_stack.at(symbol_stack.size() - idx - 1).node; - return static_cast(base_node); + this->node_for_top_symbol().tag = tag; + } + + inline void add_child_to_node(size_t parent_node_idx, parse_stack_element_t *tok) + { + PARSE_ASSERT(tok->type != token_type_invalid); + tok->node_idx = nodes.size(); + nodes.push_back(parse_node_t(tok->type)); + nodes.at(parent_node_idx).child_count += 1; + } - } - parse_node_base_t *stack_get_node(unsigned int idx) const - { - assert(idx < symbol_stack.size()); - return symbol_stack.at(symbol_stack.size() - idx - 1).node; - } - - // Pop from the top of the symbol stack, then push. Note that these are pushed in reverse order, so the first argument will be on the top of the stack + // Pop from the top of the symbol stack, then push, updating node counts. Note that these are pushed in reverse order, so the first argument will be on the top of the stack. inline void symbol_stack_pop_push(parse_stack_element_t tok1 = token_type_invalid, parse_stack_element_t tok2 = token_type_invalid, parse_stack_element_t tok3 = token_type_invalid, parse_stack_element_t tok4 = token_type_invalid, parse_stack_element_t tok5 = token_type_invalid) { + // Get the node for the top symbol and tell it about its children + size_t node_idx = symbol_stack.back().node_idx; + parse_node_t &node = nodes.at(node_idx); + + // Should have no children yet + PARSE_ASSERT(node.child_count == 0); + + // Tell the node where its children start + node.child_start = nodes.size(); + + // Add nodes for the children + // Confusingly, we want our nodes to be in forwards order (last token last, so dumps look nice), but the symbols should be reverse order (last token first, so it's lowest on the stack) + if (tok1.type != token_type_invalid) add_child_to_node(node_idx, &tok1); + if (tok2.type != token_type_invalid) add_child_to_node(node_idx, &tok2); + if (tok3.type != token_type_invalid) add_child_to_node(node_idx, &tok3); + if (tok4.type != token_type_invalid) add_child_to_node(node_idx, &tok4); + if (tok5.type != token_type_invalid) add_child_to_node(node_idx, &tok5); + + // The above set the node_idx. Now replace the top of the stack. symbol_stack.pop_back(); if (tok5.type != token_type_invalid) symbol_stack.push_back(tok5); if (tok4.type != token_type_invalid) symbol_stack.push_back(tok4); @@ -446,7 +371,6 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) void parse_ll_t::accept_token_statement_list(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_statement_list); - parse_statement_list_t *list = stack_get_node_cast(0); switch (token.type) { case parse_token_type_string: @@ -455,8 +379,6 @@ void parse_ll_t::accept_token_statement_list(parse_token_t token) case parse_token_background: case parse_token_type_end: symbol_stack_pop_push(symbol_statement, symbol_statement_list); - list->next = stack_get_node_cast(0); - list->statement = stack_get_node_cast(1); break; case parse_token_type_terminate: @@ -473,7 +395,6 @@ void parse_ll_t::accept_token_statement_list(parse_token_t token) void parse_ll_t::accept_token_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_statement); - parse_abstract_statement_t *statement = stack_get_node_cast(0); switch (token.type) { case parse_token_type_string: @@ -483,7 +404,6 @@ void parse_ll_t::accept_token_statement(parse_token_t token) case parse_keyword_or: case parse_keyword_not: symbol_stack_pop_push(symbol_boolean_statement); - statement->subject = stack_get_node_cast(0); break; case parse_keyword_if: @@ -506,7 +426,6 @@ void parse_ll_t::accept_token_statement(parse_token_t token) case parse_keyword_command: case parse_keyword_builtin: symbol_stack_pop_push(symbol_decorated_statement); - statement->subject = stack_get_node_cast(0); break; } @@ -575,23 +494,16 @@ void parse_ll_t::accept_token_block_header(parse_token_t token) void parse_ll_t::accept_token_boolean_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_boolean_statement); - parse_boolean_statement_t *statement = stack_get_node_cast(0); switch (token.type) { case parse_token_type_string: switch (token.keyword) { case parse_keyword_and: - statement->condition = parse_boolean_statement_t::boolean_and; - symbol_stack_pop_push(parse_keyword_and, symbol_statement); - break; case parse_keyword_or: - statement->condition = parse_boolean_statement_t::boolean_or; - symbol_stack_pop_push(parse_keyword_or, symbol_statement); - break; case parse_keyword_not: - statement->condition = parse_boolean_statement_t::boolean_not; - symbol_stack_pop_push(parse_keyword_not, symbol_statement); + top_node_set_tag(token.keyword); + symbol_stack_pop_push(token.keyword, symbol_statement); break; default: @@ -608,31 +520,27 @@ void parse_ll_t::accept_token_boolean_statement(parse_token_t token) void parse_ll_t::accept_token_decorated_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_decorated_statement); - parse_decorated_statement_t *statement = stack_get_node_cast(0); switch (token.type) { case parse_token_type_string: switch (token.keyword) { case parse_keyword_command: + top_node_set_tag(parse_keyword_command); symbol_stack_pop_push(parse_keyword_command, symbol_plain_statement); - statement->subject = stack_get_node_cast(0); - statement->decoration = parse_decorated_statement_t::decoration_command; - break; case parse_keyword_builtin: + top_node_set_tag(parse_keyword_builtin); symbol_stack_pop_push(parse_keyword_builtin, symbol_plain_statement); - statement->subject = stack_get_node_cast(0); - statement->decoration = parse_decorated_statement_t::decoration_builtin; break; default: + top_node_set_tag(parse_keyword_none); symbol_stack_pop_push(symbol_plain_statement); - statement->subject = stack_get_node_cast(0); - statement->decoration = parse_decorated_statement_t::decoration_none; break; } + break; default: token_unhandled(token, __FUNCTION__); @@ -642,10 +550,8 @@ void parse_ll_t::accept_token_decorated_statement(parse_token_t token) void parse_ll_t::accept_token_plain_statement(parse_token_t token) { - PARSE_ASSERT(stack_top_type() == symbol_decorated_statement); - parse_plain_statement_t *statement = stack_get_node_cast(0); + PARSE_ASSERT(stack_top_type() == symbol_plain_statement); symbol_stack_pop_push(parse_token_type_string, symbol_arguments_or_redirections_list, parse_token_type_end); - statement-> } void parse_ll_t::accept_token_arguments_or_redirections_list(parse_token_t token) @@ -671,12 +577,12 @@ void parse_ll_t::accept_token_argument_or_redirection(parse_token_t token) switch (token.type) { case parse_token_type_string: - symbol_stack_pop_push(); + symbol_stack_pop_push(parse_token_type_string); // Got an argument break; case parse_token_type_redirection: - symbol_stack_pop_push(); + symbol_stack_pop_push(parse_token_type_redirection); // Got a redirection break; @@ -686,105 +592,198 @@ void parse_ll_t::accept_token_argument_or_redirection(parse_token_t token) } } -void parse_ll_t::accept_token(parse_token_t token) +bool parse_ll_t::accept_token_string(parse_token_t token) { - assert(! symbol_stack.empty()); - switch (stack_top_type()) - { - case symbol_statement_list: - accept_token_statement_list(token); - break; - - case symbol_statement: - accept_token_statement(token); - break; - - case symbol_block_statement: - symbol_stack_pop_push(symbol_block_header, symbol_statement_list, parse_keyword_end, symbol_arguments_or_redirections_list); - break; - - case symbol_block_header: - accept_token_block_header(token); - break; - - case symbol_if_header: - break; - - case symbol_for_header: - symbol_stack_pop_push(parse_keyword_for, parse_token_type_string, parse_keyword_in, symbol_arguments_or_redirections_list, parse_token_type_end); - break; - - case symbol_while_header: - symbol_stack_pop_push(parse_keyword_while, symbol_statement); - break; - - case symbol_begin_header: - symbol_stack_pop_push(parse_keyword_begin, parse_token_type_end); - break; - - case symbol_function_header: - symbol_stack_pop_push(parse_keyword_function, symbol_arguments_or_redirections_list, parse_token_type_end); - break; - - case symbol_boolean_statement: - accept_token_boolean_statement(token); - break; - - case symbol_decorated_statement: - accept_token_decorated_statement(token); - break; - - case symbol_plain_statement: - accept_token_plain_statement(token); - break; - - case symbol_arguments_or_redirections_list: - accept_token_arguments_or_redirections_list(token); - break; - - case symbol_argument_or_redirection: - accept_token_argument_or_redirection(token); - break; - } -} -#endif - -#if 0 -class parse_sr_t -{ - friend class parse_t; - - std::vector node_stack; - void accept_token(parse_token_t token); - - void accept_token_string(parse_token_t token); -}; - -void parse_sr_t::accept_token_string(parse_token_t token) -{ - assert(token.type == parse_token_type_string); -} - -void parse_sr_t::accept_token(parse_token_t token) -{ - // We are a SR parser. Our action depends on a combination of the top element(s) of our node stack and the token type. - // Switch on the token type to make progress + PARSE_ASSERT(stack_top_type() == parse_token_type_string); + bool result = false; switch (token.type) { case parse_token_type_string: - accept_token_string(token); + // Got our string + symbol_stack_pop_push(); + result = true; break; - case parse_token_type_pipe: - case parse_token_type_redirection: - case parse_token_background: - case parse_token_type_end: - case parse_token_type_terminate: + default: + token_unhandled(token, __FUNCTION__); + break; } + return result; } -#endif +bool parse_ll_t::top_node_match_token(parse_token_t token) +{ + PARSE_ASSERT(! symbol_stack.empty()); + PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); + bool result = false; + parse_stack_element_t &stack_top = symbol_stack.back(); + if (stack_top.type == token.type) + { + // So far so good. See if we need a particular keyword. + if (stack_top.keyword == parse_keyword_none || stack_top.keyword == token.keyword) + { + // Success. Tell the node that it matched this token + parse_node_t &node = node_for_top_symbol(); + node.source_start = token.source_start; + node.source_length = token.source_length; + + // We consumed this symbol + symbol_stack.pop_back(); + result = true; + } + } + return result; +} + +void parse_ll_t::accept_token(parse_token_t token) +{ + PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); + PARSE_ASSERT(! symbol_stack.empty()); + bool consumed = false; + while (! consumed && ! this->errored) + { + fprintf(stderr, "Top type %ls\n", token_type_description(stack_top_type()).c_str()); + if (top_node_match_token(token)) + { + consumed = true; + break; + } + + switch (stack_top_type()) + { + /* Symbols */ + case symbol_statement_list: + accept_token_statement_list(token); + break; + + case symbol_statement: + accept_token_statement(token); + break; + + case symbol_block_statement: + symbol_stack_pop_push(symbol_block_header, symbol_statement_list, parse_keyword_end, symbol_arguments_or_redirections_list); + break; + + case symbol_block_header: + accept_token_block_header(token); + break; + + case symbol_if_header: + break; + + case symbol_for_header: + symbol_stack_pop_push(parse_keyword_for, parse_token_type_string, parse_keyword_in, symbol_arguments_or_redirections_list, parse_token_type_end); + break; + + case symbol_while_header: + symbol_stack_pop_push(parse_keyword_while, symbol_statement); + break; + + case symbol_begin_header: + symbol_stack_pop_push(parse_keyword_begin, parse_token_type_end); + break; + + case symbol_function_header: + symbol_stack_pop_push(parse_keyword_function, symbol_arguments_or_redirections_list, parse_token_type_end); + break; + + case symbol_boolean_statement: + accept_token_boolean_statement(token); + break; + + case symbol_decorated_statement: + accept_token_decorated_statement(token); + break; + + case symbol_plain_statement: + accept_token_plain_statement(token); + break; + + case symbol_arguments_or_redirections_list: + accept_token_arguments_or_redirections_list(token); + break; + + case symbol_argument_or_redirection: + accept_token_argument_or_redirection(token); + break; + + /* Tokens */ + case parse_token_type_string: + consumed = accept_token_string(token); + break; + + default: + fprintf(stderr, "Bailing with token type %d\n", (int)token.type); + break; + } + } +} parse_t::parse_t() : parser(new parse_ll_t()) { } + +static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) +{ + parse_keyword_t result = parse_keyword_none; + if (tok == TOK_STRING) + { + + const struct { + const wchar_t *txt; + parse_keyword_t keyword; + } keywords[] = { + {L"if", parse_keyword_if}, + {L"else", parse_keyword_else}, + {L"for", parse_keyword_for}, + {L"in", parse_keyword_in}, + {L"while", parse_keyword_while}, + {L"begin", parse_keyword_begin}, + {L"function", parse_keyword_function}, + {L"switch", parse_keyword_switch}, + {L"end", parse_keyword_end}, + {L"and", parse_keyword_and}, + {L"or", parse_keyword_or}, + {L"not", parse_keyword_not}, + {L"command", parse_keyword_command}, + {L"builtin", parse_keyword_builtin} + }; + + for (size_t i=0; i < sizeof keywords / sizeof *keywords; i++) + { + if (! wcscmp(keywords[i].txt, tok_txt)) + { + result = keywords[i].keyword; + break; + } + } + } + return result; +} + +void parse_t::parse(const wcstring &str) +{ + tokenizer_t tok = tokenizer_t(str.c_str(), 0); + for (; tok_has_next(&tok); tok_next(&tok)) + { + token_type tok_type = static_cast(tok_last_type(&tok)); + const wchar_t *tok_txt = tok_last(&tok); + int tok_start = tok_get_pos(&tok); + + if (tok_type == TOK_ERROR) + { + fprintf(stderr, "Tokenizer error\n"); + break; + } + + parse_token_t token = parse_token_from_tokenizer_token(tok_type); + token.tokenizer_type = tok_type; + token.source_start = (size_t)tok_start; + token.source_length = wcslen(tok_txt); + token.keyword = keyword_for_token(tok_type, tok_txt); + this->parser->accept_token(token); + } + wcstring result = dump_tree(this->parser->nodes, str); + fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); + fprintf(stderr, "node size %ld", sizeof(parse_node_t)); +} diff --git a/expression.h b/expression.h index bb79a2d04..379314cdf 100644 --- a/expression.h +++ b/expression.h @@ -20,9 +20,14 @@ class parse_sr_t; class parse_t { parse_ll_t * const parser; + + public: parse_t(); + void parse(const wcstring &str); }; + + /* Fish grammar: # A statement_list is a list of statements, separated by semicolons or newlines diff --git a/fish_tests.cpp b/fish_tests.cpp index 5146e88e0..fb35f15ef 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -60,6 +60,7 @@ #include "postfork.h" #include "signal.h" #include "highlight.h" +#include "expression.h" /** The number of tests to run @@ -1717,6 +1718,12 @@ void history_tests_t::test_history_speed(void) delete hist; } +static void test_new_parser(void) +{ + say(L"Testing new parser!"); + parse_t parser; + parser.parse(L"echo hello"); +} /** Main test @@ -1733,12 +1740,15 @@ int main(int argc, char **argv) say(L"Lines beginning with '(ignore):' are not errors, they are warning messages\ngenerated by the fish parser library when given broken input, and can be\nignored. All actual errors begin with 'Error:'."); set_main_thread(); setup_fork_guards(); - proc_init(); + //proc_init(); event_init(); function_init(); builtin_init(); reader_init(); env_init(); + + test_new_parser(); + return 0; test_format(); test_escape(); diff --git a/tokenizer.h b/tokenizer.h index 40390bcf8..f2d6c0c0b 100644 --- a/tokenizer.h +++ b/tokenizer.h @@ -19,7 +19,6 @@ enum token_type { TOK_NONE, /**< Tokenizer not yet constructed */ TOK_ERROR, /**< Error reading token */ - TOK_INVALID,/**< Invalid token */ TOK_STRING,/**< String token */ TOK_PIPE,/**< Pipe token */ TOK_END,/**< End token (semicolon or newline, not literal end) */ From 048f08080d3d2fe60bcc6690c8ef124194980c32 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 9 Jun 2013 14:21:24 -0700 Subject: [PATCH 005/177] Rename expression.h to parse_tree.h and parse_exec.h --- Makefile.in | 2 +- fish.xcodeproj/project.pbxproj | 16 ++++--- fish_tests.cpp | 3 +- parse_exec.cpp | 1 + parse_exec.h | 11 +++++ expression.cpp => parse_tree.cpp | 76 +++++++++++++------------------- expression.h => parse_tree.h | 18 ++------ parser.cpp | 9 ++-- parser.h | 4 +- 9 files changed, 63 insertions(+), 77 deletions(-) create mode 100644 parse_exec.cpp create mode 100644 parse_exec.h rename expression.cpp => parse_tree.cpp (92%) rename expression.h => parse_tree.h (91%) diff --git a/Makefile.in b/Makefile.in index 61a9ef279..56e32e59d 100644 --- a/Makefile.in +++ b/Makefile.in @@ -95,7 +95,7 @@ FISH_OBJS := function.o builtin.o complete.o env.o exec.o expand.o \ env_universal.o env_universal_common.o input_common.o event.o \ signal.o io.o parse_util.o common.o screen.o path.o autoload.o \ parser_keywords.o iothread.o color.o postfork.o \ - builtin_test.o expression.o + builtin_test.o parse_tree.o parse_exec.o FISH_INDENT_OBJS := fish_indent.o print_help.o common.o \ parser_keywords.o wutil.o tokenizer.o diff --git a/fish.xcodeproj/project.pbxproj b/fish.xcodeproj/project.pbxproj index 23b06b5ed..165fb2785 100644 --- a/fish.xcodeproj/project.pbxproj +++ b/fish.xcodeproj/project.pbxproj @@ -72,10 +72,10 @@ D07D266D15E33B86009E43F6 /* functions in Copy Files */ = {isa = PBXBuildFile; fileRef = D025C02815D1FEA100B9DB63 /* functions */; }; D07D266E15E33B86009E43F6 /* tools in Copy Files */ = {isa = PBXBuildFile; fileRef = D025C02915D1FEA100B9DB63 /* tools */; }; D07D267215E34171009E43F6 /* config.fish in Copy Files */ = {isa = PBXBuildFile; fileRef = D0CBD580159EE48F0024809C /* config.fish */; }; - D07FEA311751E6AF003066C3 /* expression.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D07FEA2F1751E6AF003066C3 /* expression.cpp */; }; D0879AC816BF9AAB00E98E56 /* fish_term_icon.icns in Resources */ = {isa = PBXBuildFile; fileRef = D0879AC616BF9A1A00E98E56 /* fish_term_icon.icns */; }; D0A564FE168D23D800AF6161 /* man in CopyFiles */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; }; D0A56501168D258300AF6161 /* man in Copy Files */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; }; + D0C52F371765284C00BFAB82 /* parse_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F351765284C00BFAB82 /* parse_tree.cpp */; }; D0CBD587159EF0E10024809C /* launch_fish.scpt in Resources */ = {isa = PBXBuildFile; fileRef = D0CBD586159EF0E10024809C /* launch_fish.scpt */; }; D0D02A67159837AD008E62BD /* complete.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853713B3ACEE0099B651 /* complete.cpp */; }; D0D02A69159837B2008E62BD /* env.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853A13B3ACEE0099B651 /* env.cpp */; }; @@ -337,8 +337,6 @@ D03EE83814DF88B200FC7150 /* lru.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = lru.h; sourceTree = ""; }; D07B247215BCC15700D4ADB4 /* add-shell */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; name = "add-shell"; path = "build_tools/osx_package_scripts/add-shell"; sourceTree = ""; }; D07B247515BCC4BE00D4ADB4 /* install.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; name = install.sh; path = osx/install.sh; sourceTree = ""; }; - D07FEA2F1751E6AF003066C3 /* expression.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = expression.cpp; sourceTree = ""; }; - D07FEA301751E6AF003066C3 /* expression.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = expression.h; sourceTree = ""; }; D0879AC616BF9A1A00E98E56 /* fish_term_icon.icns */ = {isa = PBXFileReference; lastKnownFileType = image.icns; name = fish_term_icon.icns; path = osx/fish_term_icon.icns; sourceTree = ""; }; D09B1C1914FC7B5B00F91077 /* postfork.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = postfork.cpp; sourceTree = ""; }; D09B1C1A14FC7B5B00F91077 /* postfork.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = postfork.h; sourceTree = ""; }; @@ -444,6 +442,10 @@ D0B6B0FE14E88BA400AD6C10 /* color.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = color.cpp; sourceTree = ""; }; D0B6B0FF14E88BA400AD6C10 /* color.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = color.h; sourceTree = ""; }; D0C4FD9415A7D7EE00212EF1 /* config.fish */ = {isa = PBXFileReference; lastKnownFileType = text; name = config.fish; path = etc/config.fish; sourceTree = ""; }; + D0C52F331765281F00BFAB82 /* parse_exec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_exec.cpp; sourceTree = ""; }; + D0C52F341765281F00BFAB82 /* parse_exec.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_exec.h; sourceTree = ""; }; + D0C52F351765284C00BFAB82 /* parse_tree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_tree.cpp; sourceTree = ""; }; + D0C52F361765284C00BFAB82 /* parse_tree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_tree.h; sourceTree = ""; }; D0C6FCC914CFA4B0004CE8AD /* autoload.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = autoload.cpp; sourceTree = ""; }; D0C6FCCB14CFA4B7004CE8AD /* autoload.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = autoload.h; sourceTree = ""; }; D0C861EA16CC7054003B5A04 /* builtin_set_color.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = builtin_set_color.cpp; sourceTree = ""; }; @@ -585,8 +587,10 @@ D0A0853C13B3ACEE0099B651 /* exec.cpp */, D0A0850C13B3ACEE0099B651 /* expand.h */, D0A0853D13B3ACEE0099B651 /* expand.cpp */, - D07FEA301751E6AF003066C3 /* expression.h */, - D07FEA2F1751E6AF003066C3 /* expression.cpp */, + D0C52F361765284C00BFAB82 /* parse_tree.h */, + D0C52F351765284C00BFAB82 /* parse_tree.cpp */, + D0C52F341765281F00BFAB82 /* parse_exec.h */, + D0C52F331765281F00BFAB82 /* parse_exec.cpp */, D0A0850D13B3ACEE0099B651 /* fallback.h */, D0A0853E13B3ACEE0099B651 /* fallback.cpp */, D0A0850E13B3ACEE0099B651 /* function.h */, @@ -1107,7 +1111,7 @@ D0D02A7A15983916008E62BD /* env_universal.cpp in Sources */, D0D02A7B15983928008E62BD /* env_universal_common.cpp in Sources */, D0D02A89159839DF008E62BD /* fish.cpp in Sources */, - D07FEA311751E6AF003066C3 /* expression.cpp in Sources */, + D0C52F371765284C00BFAB82 /* parse_tree.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/fish_tests.cpp b/fish_tests.cpp index 69368993b..35c1e3277 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -60,7 +60,8 @@ #include "postfork.h" #include "signal.h" #include "highlight.h" -#include "expression.h" +#include "parse_tree.h" +#include "parse_exec.h" /** The number of tests to run diff --git a/parse_exec.cpp b/parse_exec.cpp new file mode 100644 index 000000000..5f4cf2d1d --- /dev/null +++ b/parse_exec.cpp @@ -0,0 +1 @@ +#include "parse_exec.h" diff --git a/parse_exec.h b/parse_exec.h new file mode 100644 index 000000000..50a0ddd68 --- /dev/null +++ b/parse_exec.h @@ -0,0 +1,11 @@ +/**\file parse_exec.h + + Programmatic execution of a parse tree +*/ + +#ifndef FISH_PARSE_TREE_H +#define FISH_PARSE_TREE_H + +#include "parse_tree.h" + +#endif diff --git a/expression.cpp b/parse_tree.cpp similarity index 92% rename from expression.cpp rename to parse_tree.cpp index 75245bd13..bdc3cf2bd 100644 --- a/expression.cpp +++ b/parse_tree.cpp @@ -2,47 +2,9 @@ #include "tokenizer.h" #include - -/* Fish grammar: - -# A statement_list is a list of statements, separated by semicolons or newlines - - statement_list = | - statement statement_list - -# A statement is a normal job, or an if / while / and etc. - - statement = boolean_statement | block_statement | decorated_statement - -# A block is a conditional, loop, or begin/end - - block_statement = block_header statement_list END arguments_or_redirections_list - block_header = if_header | for_header | while_header | function_header | begin_header - if_header = IF statement - for_header = FOR var_name IN arguments_or_redirections_list STATEMENT_TERMINATOR - while_header = WHILE statement - begin_header = BEGIN STATEMENT_TERMINATOR - function_header = FUNCTION arguments_or_redirections_list STATEMENT_TERMINATOR - -# A boolean statement is AND or OR or NOT - - boolean_statement = AND statement | OR statement | NOT statement - -# A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" - - decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement - plain_statement = command arguments_or_redirections_list terminator - - arguments_or_redirections_list = | - argument_or_redirection arguments_or_redirections_list - argument_or_redirection = redirection | - redirection = - - terminator = | - -*/ - +struct parse_node_t; typedef size_t node_offset_t; +typedef std::vector parse_node_tree_t; #define PARSE_ASSERT(a) assert(a) @@ -208,7 +170,7 @@ class parse_node_t } }; -static void dump_tree_recursive(const std::vector &nodes, const wcstring &src, size_t start, size_t indent, wcstring *result, size_t *line) +static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, size_t start, size_t indent, wcstring *result, size_t *line) { assert(start < nodes.size()); const parse_node_t &node = nodes.at(start); @@ -234,7 +196,7 @@ static void dump_tree_recursive(const std::vector &nodes, const wc } } -static wcstring dump_tree(const std::vector &nodes, const wcstring &src) +static wcstring dump_tree(const parse_node_tree_t &nodes, const wcstring &src) { if (nodes.empty()) return L"(empty!)"; @@ -245,7 +207,6 @@ static wcstring dump_tree(const std::vector &nodes, const wcstring return result; } - struct parse_stack_element_t { enum parse_token_type_t type; @@ -261,12 +222,36 @@ struct parse_stack_element_t } }; +class parse_execution_context_t +{ + wcstring src; + const parse_node_tree_t nodes; + size_t node_idx; + + public: + parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s) : src(s), nodes(n), node_idx(0) + { + } + + wcstring simulate(void); +}; + +wcstring parse_execution_context_t::simulate() +{ + if (nodes.empty()) + return L"(empty!"); + + PARSE_ASSERT(node_idx < nodes.size()); + PARSE_ASSERT(nodes.at(node_idx).type == symbol_statement_list); + +} + class parse_ll_t { friend class parse_t; std::vector symbol_stack; // LL parser stack - std::vector nodes; + parse_node_tree_t nodes; bool errored; // Constructor @@ -642,7 +627,6 @@ void parse_ll_t::accept_token(parse_token_t token) bool consumed = false; while (! consumed && ! this->errored) { - fprintf(stderr, "Top type %ls\n", token_type_description(stack_top_type()).c_str()); if (top_node_match_token(token)) { consumed = true; @@ -785,5 +769,5 @@ void parse_t::parse(const wcstring &str) } wcstring result = dump_tree(this->parser->nodes, str); fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); - fprintf(stderr, "node size %ld", sizeof(parse_node_t)); + fprintf(stderr, "node size %ld\n", sizeof(parse_node_t)); } diff --git a/expression.h b/parse_tree.h similarity index 91% rename from expression.h rename to parse_tree.h index 379314cdf..9536479bd 100644 --- a/expression.h +++ b/parse_tree.h @@ -1,11 +1,10 @@ -/**\file expression.h +/**\file parse_tree.h Programmatic representation of fish code. - */ -#ifndef FISH_EXPRESSION_H -#define FISH_EXPRESSION_H +#ifndef FISH_PARSE_TREE_H +#define FISH_PARSE_TREE_H #include @@ -65,17 +64,6 @@ class parse_t terminator = | -*/ - - -/* fish Shift-Reduce grammar: - - - IF <- if_statement - FOR <- for_statement - - - */ #endif diff --git a/parser.cpp b/parser.cpp index 53c68c3cd..d2b0f0690 100644 --- a/parser.cpp +++ b/parser.cpp @@ -722,7 +722,7 @@ void parser_t::print_errors_stderr() } -int parser_t::eval_args(const wchar_t *line, std::vector &args) +void parser_t::eval_args(const wchar_t *line, std::vector &args) { expand_flags_t eflags = 0; @@ -731,10 +731,9 @@ int parser_t::eval_args(const wchar_t *line, std::vector &args) if (this->parser_type != PARSER_TYPE_GENERAL) eflags |= EXPAND_SKIP_CMDSUBST; - int do_loop=1; + bool do_loop=1; - CHECK(line, 1); -// CHECK( args, 1 ); + if (! line) return; // PCA we need to suppress calling proc_push_interactive off of the main thread. I'm not sure exactly what it does. if (this->parser_type == PARSER_TYPE_GENERAL) @@ -803,8 +802,6 @@ int parser_t::eval_args(const wchar_t *line, std::vector &args) if (this->parser_type == PARSER_TYPE_GENERAL) proc_pop_interactive(); - - return 1; } void parser_t::stack_trace(block_t *b, wcstring &buff) diff --git a/parser.h b/parser.h index c2ad6c9b7..0c90641ba 100644 --- a/parser.h +++ b/parser.h @@ -396,7 +396,7 @@ class parser_t /** Evaluate line as a list of parameters, i.e. tokenize it and perform parameter expansion and cmdsubst execution on the tokens. - The output is inserted into output, and should be freed by the caller. + The output is inserted into output. \param line Line to evaluate \param output List to insert output to @@ -405,7 +405,7 @@ class parser_t \param line Line to evaluate \param output List to insert output to */ - int eval_args(const wchar_t *line, std::vector &output); + void eval_args(const wchar_t *line, std::vector &output); /** Sets the current evaluation error. This function should only be used by libraries that are called by From e2a506e54a297dcbd13cdb881936e50bbefceb28 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Tue, 11 Jun 2013 09:37:51 -0700 Subject: [PATCH 006/177] More work on new parser --- fish.xcodeproj/project.pbxproj | 2 + parse_exec.cpp | 147 +++++++++++++++++++++++++++++++++ parse_exec.h | 15 +++- parse_tree.cpp | 121 ++------------------------- parse_tree.h | 90 ++++++++++++++++++++ 5 files changed, 257 insertions(+), 118 deletions(-) diff --git a/fish.xcodeproj/project.pbxproj b/fish.xcodeproj/project.pbxproj index 165fb2785..6f6c83ba1 100644 --- a/fish.xcodeproj/project.pbxproj +++ b/fish.xcodeproj/project.pbxproj @@ -76,6 +76,7 @@ D0A564FE168D23D800AF6161 /* man in CopyFiles */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; }; D0A56501168D258300AF6161 /* man in Copy Files */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; }; D0C52F371765284C00BFAB82 /* parse_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F351765284C00BFAB82 /* parse_tree.cpp */; }; + D0C52F381765720600BFAB82 /* parse_exec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F331765281F00BFAB82 /* parse_exec.cpp */; }; D0CBD587159EF0E10024809C /* launch_fish.scpt in Resources */ = {isa = PBXBuildFile; fileRef = D0CBD586159EF0E10024809C /* launch_fish.scpt */; }; D0D02A67159837AD008E62BD /* complete.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853713B3ACEE0099B651 /* complete.cpp */; }; D0D02A69159837B2008E62BD /* env.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853A13B3ACEE0099B651 /* env.cpp */; }; @@ -1112,6 +1113,7 @@ D0D02A7B15983928008E62BD /* env_universal_common.cpp in Sources */, D0D02A89159839DF008E62BD /* fish.cpp in Sources */, D0C52F371765284C00BFAB82 /* parse_tree.cpp in Sources */, + D0C52F381765720600BFAB82 /* parse_exec.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/parse_exec.cpp b/parse_exec.cpp index 5f4cf2d1d..30503857e 100644 --- a/parse_exec.cpp +++ b/parse_exec.cpp @@ -1 +1,148 @@ #include "parse_exec.h" +#include + + +struct exec_node_t +{ + node_offset_t parse_node_idx; + + exec_node_t(size_t pni) : parse_node_idx(pni) + { + } + + virtual ~exec_node_t(); +}; + +exec_node_t::~exec_node_t() +{ +} + +struct exec_redirection_t : public exec_node_t +{ + +}; + +struct exec_argument_t : public exec_node_t +{ + +}; + +struct exec_statement_t +{ + enum + { + decoration_plain, + decoration_command, + decoration_builtin + } decoration; + + std::vector arguments; + std::vector redirections; +}; + +class parse_exec_t +{ + parse_node_tree_t parse_tree; + wcstring src; + std::vector exec_nodes; + + parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s) + { + } + + void pop_push(uint32_t child_idx) + { + exec_node_t &top = exec_nodes.back(); + const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); + PARSE_ASSERT(child_idx < parse_node.child_count); + node_offset_t child_node_idx = parse_node.child_start + child_idx; + exec_nodes.pop_back(); + exec_nodes.push_back(child_node_idx); + + } + + void simulate(void); + void enter_parse_node(size_t idx); + void run_top_node(void); +}; + +void parse_exec_t::run_top_node() +{ + PARSE_ASSERT(! exec_nodes.empty()); + exec_node_t &top = exec_nodes.back(); + const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); + + switch (parse_node.type) + { + case symbol_statement_list: + PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); + if (parse_node.child_count == 0) + { + // Statement list done + exec_nodes.pop_back(); + } + else + { + // First child is a statement, next is the rest of the list + node_offset_t head = parse_node.child_start; + node_offset_t tail = parse_node.child_start + 1; + exec_nodes.pop_back(); + exec_nodes.push_back(tail); + exec_nodes.push_back(head); + } + break; + + case symbol_statement: + PARSE_ASSERT(parse_node.child_count == 1); + pop_push(0); + break; + + case decorated_statement: + PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2 ); + pop_push(0); + break; + + default: + fprintf(stderr, "Unhandled token type %ld\n", (long)parse_node.type); + PARSER_DIE(); + break; + + } +} + +void parse_exec_t::enter_parse_node(size_t idx) +{ + PARSE_ASSERT(idx < parse_tree.size()); + exec_node_t exec(idx); + exec_nodes.push_back(exec); +} + +wcstring parse_exec_t::simulate(void) +{ + PARSE_ASSERT(exec_nodes.empty()); + assemble_statement_list(0); + enter_parse_node(0); + run_node(); +} + +wcstring parse_execution_context_t::simulate() +{ + if (parse_tree.empty()) + return L"(empty!)"; + + PARSE_ASSERT(node_idx < nodes.size()); + PARSE_ASSERT(nodes.at(node_idx).type == symbol_statement_list); + + wcstring result; + +} + +parse_execution_context_t::parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s) +{ + ctx = new parse_exec_t(n, s); +} + +wcstring parse_execution_context_t::simulate(void) +{ + return ctx->simulate(); +} diff --git a/parse_exec.h b/parse_exec.h index 50a0ddd68..635ebb836 100644 --- a/parse_exec.h +++ b/parse_exec.h @@ -3,9 +3,20 @@ Programmatic execution of a parse tree */ -#ifndef FISH_PARSE_TREE_H -#define FISH_PARSE_TREE_H +#ifndef FISH_PARSE_EXEC_H +#define FISH_PARSE_EXEC_H #include "parse_tree.h" +class parse_exec_t; +class parse_execution_context_t +{ + parse_exec_t *ctx; + + public: + parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s); + wcstring simulate(void); +}; + + #endif diff --git a/parse_tree.cpp b/parse_tree.cpp index bdc3cf2bd..8c38ff0cd 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -1,48 +1,10 @@ -#include "expression.h" +#include "parse_tree.h" #include "tokenizer.h" #include -struct parse_node_t; -typedef size_t node_offset_t; -typedef std::vector parse_node_tree_t; - -#define PARSE_ASSERT(a) assert(a) - -#define PARSER_DIE() assert(0) class parse_command_t; -enum parse_token_type_t -{ - token_type_invalid, - - // Non-terminal tokens - symbol_statement_list, - symbol_statement, - symbol_block_statement, - symbol_block_header, - symbol_if_header, - symbol_for_header, - symbol_while_header, - symbol_begin_header, - symbol_function_header, - symbol_boolean_statement, - symbol_decorated_statement, - symbol_plain_statement, - symbol_arguments_or_redirections_list, - symbol_argument_or_redirection, - - // Terminal types - parse_token_type_string, - parse_token_type_pipe, - parse_token_type_redirection, - parse_token_background, - parse_token_type_end, - parse_token_type_terminate, - - FIRST_PARSE_TOKEN_TYPE = parse_token_type_string -}; - static wcstring token_type_description(parse_token_type_t type) { switch (type) @@ -75,25 +37,11 @@ static wcstring token_type_description(parse_token_type_t type) } } -enum parse_keyword_t +wcstring parse_node_t::describe(void) const { - parse_keyword_none, - parse_keyword_if, - parse_keyword_else, - parse_keyword_for, - parse_keyword_in, - parse_keyword_while, - parse_keyword_begin, - parse_keyword_function, - parse_keyword_switch, - parse_keyword_end, - parse_keyword_and, - parse_keyword_or, - parse_keyword_not, - parse_keyword_command, - parse_keyword_builtin -}; - + wcstring result = token_type_description(type); + return result; +} struct parse_token_t { @@ -135,41 +83,6 @@ static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_ return result; } -/** Base class for nodes of a parse tree */ -class parse_node_t -{ - public: - - /* Start in the source code */ - size_t source_start; - - /* Length of our range in the source code */ - size_t source_length; - - /* Children */ - node_offset_t child_start; - node_offset_t child_count; - - /* Type-dependent data */ - uint32_t tag; - - /* Type of the node */ - enum parse_token_type_t type; - - - /* Description */ - wcstring describe(void) const - { - wcstring result = token_type_description(type); - return result; - } - - /* Constructor */ - explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0), child_start(0), child_count(0), tag(0) - { - } -}; - static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, size_t start, size_t indent, wcstring *result, size_t *line) { assert(start < nodes.size()); @@ -222,30 +135,6 @@ struct parse_stack_element_t } }; -class parse_execution_context_t -{ - wcstring src; - const parse_node_tree_t nodes; - size_t node_idx; - - public: - parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s) : src(s), nodes(n), node_idx(0) - { - } - - wcstring simulate(void); -}; - -wcstring parse_execution_context_t::simulate() -{ - if (nodes.empty()) - return L"(empty!"); - - PARSE_ASSERT(node_idx < nodes.size()); - PARSE_ASSERT(nodes.at(node_idx).type == symbol_statement_list); - -} - class parse_ll_t { friend class parse_t; diff --git a/parse_tree.h b/parse_tree.h index 9536479bd..892c36cdd 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -12,6 +12,10 @@ #include "util.h" #include "common.h" #include "tokenizer.h" +#include + +#define PARSE_ASSERT(a) assert(a) +#define PARSER_DIE() assert(0) class parse_ll_t; @@ -25,6 +29,92 @@ class parse_t void parse(const wcstring &str); }; +class parse_node_t; +typedef std::vector parse_node_tree_t; +typedef size_t node_offset_t; + + +enum parse_token_type_t +{ + token_type_invalid, + + // Non-terminal tokens + symbol_statement_list, + symbol_statement, + symbol_block_statement, + symbol_block_header, + symbol_if_header, + symbol_for_header, + symbol_while_header, + symbol_begin_header, + symbol_function_header, + symbol_boolean_statement, + symbol_decorated_statement, + symbol_plain_statement, + symbol_arguments_or_redirections_list, + symbol_argument_or_redirection, + + // Terminal types + parse_token_type_string, + parse_token_type_pipe, + parse_token_type_redirection, + parse_token_background, + parse_token_type_end, + parse_token_type_terminate, + + FIRST_PARSE_TOKEN_TYPE = parse_token_type_string +}; + +enum parse_keyword_t +{ + parse_keyword_none, + parse_keyword_if, + parse_keyword_else, + parse_keyword_for, + parse_keyword_in, + parse_keyword_while, + parse_keyword_begin, + parse_keyword_function, + parse_keyword_switch, + parse_keyword_end, + parse_keyword_and, + parse_keyword_or, + parse_keyword_not, + parse_keyword_command, + parse_keyword_builtin +}; + +/** Base class for nodes of a parse tree */ +class parse_node_t +{ + public: + + /* Type of the node */ + enum parse_token_type_t type; + + /* Start in the source code */ + size_t source_start; + + /* Length of our range in the source code */ + size_t source_length; + + /* Children */ + node_offset_t child_start; + node_offset_t child_count; + + /* Type-dependent data */ + uint32_t tag; + + + /* Description */ + wcstring describe(void) const; + + /* Constructor */ + explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0), child_start(0), child_count(0), tag(0) + { + } +}; + /* Fish grammar: From 827a9d640c97b95931280e55b8567f69985c2ff1 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 15 Jun 2013 14:32:38 -0700 Subject: [PATCH 007/177] Added parse builtin for testing. Lots of work on simulated execution. --- builtin.cpp | 28 ++++- fish_tests.cpp | 10 +- parse_exec.cpp | 289 +++++++++++++++++++++++++++++++++++++++++-------- parse_tree.cpp | 10 +- parse_tree.h | 11 +- 5 files changed, 295 insertions(+), 53 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index 9796d356f..12b331b82 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -64,6 +64,8 @@ #include "expand.h" #include "path.h" #include "history.h" +#include "parse_tree.h" +#include "parse_exec.h" /** The default prompt for the read command @@ -3938,6 +3940,30 @@ static int builtin_history(parser_t &parser, wchar_t **argv) return STATUS_BUILTIN_ERROR; } +int builtin_parse(parser_t &parser, wchar_t **argv) +{ + std::vector txt; + for (;;) + { + char buff[256]; + ssize_t amt = read_loop(builtin_stdin, buff, sizeof buff); + if (amt <= 0) break; + txt.insert(txt.end(), buff, buff + amt); + } + if (! txt.empty()) + { + const wcstring src = str2wcstring(&txt.at(0), txt.size()); + parse_node_tree_t parse_tree; + parse_t parser; + parser.parse(src, &parse_tree); + parse_execution_context_t ctx(parse_tree, src); + stdout_buffer.append(L"Simulating execution:"); + wcstring simulation = ctx.simulate(); + stdout_buffer.append(simulation); + stdout_buffer.push_back(L'\n'); + } + return STATUS_BUILTIN_OK; +} /* END OF BUILTIN COMMANDS @@ -3985,6 +4011,7 @@ static const builtin_data_t builtin_datas[]= { L"jobs", &builtin_jobs, N_(L"Print currently running jobs") }, { L"not", &builtin_generic, N_(L"Negate exit status of job") }, { L"or", &builtin_generic, N_(L"Execute command if previous command failed") }, + { L"parse", &builtin_parse, N_(L"Try out the new parser") }, { L"printf", &builtin_printf, N_(L"Prints formatted text") }, { L"pwd", &builtin_pwd, N_(L"Print the working directory") }, { L"random", &builtin_random, N_(L"Generate random number") }, @@ -4144,4 +4171,3 @@ void builtin_pop_io(parser_t &parser) builtin_stdin = 0; } } - diff --git a/fish_tests.cpp b/fish_tests.cpp index 35c1e3277..20d79288b 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -1722,8 +1722,16 @@ void history_tests_t::test_history_speed(void) static void test_new_parser(void) { say(L"Testing new parser!"); + const wcstring src = L"echo hello world"; + parse_node_tree_t parse_tree; parse_t parser; - parser.parse(L"echo hello"); + parser.parse(src, &parse_tree); + parse_execution_context_t ctx(parse_tree, src); + say(L"Simulating execution:"); + wcstring simulation = ctx.simulate(); + printf("%ls\n", simulation.c_str()); + + } /** diff --git a/parse_exec.cpp b/parse_exec.cpp index 30503857e..04601e5ba 100644 --- a/parse_exec.cpp +++ b/parse_exec.cpp @@ -1,34 +1,41 @@ #include "parse_exec.h" #include +typedef uint16_t sanity_id_t; +static sanity_id_t next_sanity_id() +{ + static sanity_id_t last_sanity_id; + return ++last_sanity_id; +} struct exec_node_t { node_offset_t parse_node_idx; + sanity_id_t command_sanity_id; exec_node_t(size_t pni) : parse_node_idx(pni) { } - virtual ~exec_node_t(); }; -exec_node_t::~exec_node_t() +struct exec_argument_t { -} + node_offset_t parse_node_idx; + sanity_id_t command_sanity_id; +}; -struct exec_redirection_t : public exec_node_t +struct exec_redirection_t { }; -struct exec_argument_t : public exec_node_t +struct exec_basic_statement_t { + // Node containing the command + node_offset_t command_idx; -}; - -struct exec_statement_t -{ + // Decoration enum { decoration_plain, @@ -38,68 +45,260 @@ struct exec_statement_t std::vector arguments; std::vector redirections; + uint16_t sanity_id; + + exec_basic_statement_t() : command_idx(0), decoration(decoration_plain) + { + sanity_id = next_sanity_id(); + } + + void set_decoration(uint32_t k) + { + PARSE_ASSERT(k == parse_keyword_none || k == parse_keyword_command || k == parse_keyword_builtin); + switch (k) + { + case parse_keyword_none: + decoration = decoration_plain; + break; + case parse_keyword_command: + decoration = decoration_command; + break; + case parse_keyword_builtin: + decoration = decoration_builtin; + break; + default: + PARSER_DIE(); + break; + } + + } }; class parse_exec_t { parse_node_tree_t parse_tree; wcstring src; + + bool simulating; + wcstring_list_t simulation_result; + + /* The stack of nodes as we execute them */ std::vector exec_nodes; - parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s) + /* The stack of commands being built */ + std::vector assembling_statements; + + void get_node_string(node_offset_t idx, wcstring *output) const { + const parse_node_t &node = parse_tree.at(idx); + PARSE_ASSERT(node.source_start <= src.size()); + PARSE_ASSERT(node.source_start + node.source_length <= src.size()); + output->assign(src, node.source_start, node.source_length); + } + + void pop_push(node_offset_t child_idx, node_offset_t child_count = 1) + { + PARSE_ASSERT(! exec_nodes.empty()); + if (child_count == 0) + { + // No children, just remove the top node + exec_nodes.pop_back(); + } + else + { + // Figure out the offset of the children + exec_node_t &top = exec_nodes.back(); + const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); + PARSE_ASSERT(child_idx < parse_node.child_count); + node_offset_t child_node_idx = parse_node.child_start + child_idx; + + // Remove the top node + exec_nodes.pop_back(); + + // Append the given children, backwards + sanity_id_t command_sanity_id = assembling_statements.empty() ? 0 : assembling_statements.back().sanity_id; + node_offset_t cursor = child_count; + while (cursor--) + { + exec_nodes.push_back(child_node_idx + cursor); + exec_nodes.back().command_sanity_id = command_sanity_id; + } + } } - void pop_push(uint32_t child_idx) + void pop() + { + PARSE_ASSERT(! exec_nodes.empty()); + exec_nodes.pop_back(); + } + + void pop_push_all() { exec_node_t &top = exec_nodes.back(); const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); - PARSE_ASSERT(child_idx < parse_node.child_count); - node_offset_t child_node_idx = parse_node.child_start + child_idx; - exec_nodes.pop_back(); - exec_nodes.push_back(child_node_idx); + pop_push(0, parse_node.child_count); + } + + void assemble_command(node_offset_t idx) + { + // Set the command for our top basic statement + PARSE_ASSERT(! assembling_statements.empty()); + assembling_statements.back().command_idx = idx; + } + + void assemble_argument_or_redirection(node_offset_t idx) + { + const parse_node_t &node = parse_tree.at(idx); + PARSE_ASSERT(! assembling_statements.empty()); + exec_basic_statement_t &statement = assembling_statements.back(); + switch (node.type) + { + case parse_token_type_string: + // Argument + { + exec_argument_t arg = exec_argument_t(); + arg.parse_node_idx = idx; + arg.command_sanity_id = statement.sanity_id; + statement.arguments.push_back(arg); + } + break; + + case parse_token_type_redirection: + // Redirection + break; + + default: + PARSER_DIE(); + break; + } } - void simulate(void); + void assembly_complete() + { + // Finished building a command + PARSE_ASSERT(! assembling_statements.empty()); + const exec_basic_statement_t &statement = assembling_statements.back(); + + if (simulating) + { + simulate_statement(statement); + } + assembling_statements.pop_back(); + } + + void simulate_statement(const exec_basic_statement_t &statement) + { + PARSE_ASSERT(simulating); + wcstring line; + switch (statement.decoration) + { + case exec_basic_statement_t::decoration_builtin: + line.append(L" "); + break; + + case exec_basic_statement_t::decoration_command: + line.append(L" "); + break; + + default: + break; + } + + wcstring tmp; + get_node_string(statement.command_idx, &tmp); + line.append(L"cmd:"); + line.append(tmp); + for (size_t i=0; i < statement.arguments.size(); i++) + { + const exec_argument_t &arg = statement.arguments.at(i); + get_node_string(arg.parse_node_idx, &tmp); + line.append(L" "); + line.append(L"arg:"); + line.append(tmp); + } + simulation_result.push_back(line); + } + void enter_parse_node(size_t idx); void run_top_node(void); + exec_basic_statement_t *create_basic_statement(void); + + public: + parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s), simulating(false) + { + } + wcstring simulate(void); }; +exec_basic_statement_t *parse_exec_t::create_basic_statement() +{ + assembling_statements.push_back(exec_basic_statement_t()); + return &assembling_statements.back(); +} + void parse_exec_t::run_top_node() { PARSE_ASSERT(! exec_nodes.empty()); exec_node_t &top = exec_nodes.back(); const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); + bool log = false; + + if (log) + { + wcstring tmp; + tmp.append(exec_nodes.size(), L' '); + tmp.append(parse_node.describe()); + printf("%ls\n", tmp.c_str()); + } switch (parse_node.type) { case symbol_statement_list: PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); - if (parse_node.child_count == 0) - { - // Statement list done - exec_nodes.pop_back(); - } - else - { - // First child is a statement, next is the rest of the list - node_offset_t head = parse_node.child_start; - node_offset_t tail = parse_node.child_start + 1; - exec_nodes.pop_back(); - exec_nodes.push_back(tail); - exec_nodes.push_back(head); - } + pop_push_all(); break; case symbol_statement: PARSE_ASSERT(parse_node.child_count == 1); - pop_push(0); + pop_push_all(); break; - case decorated_statement: + case symbol_decorated_statement: + { PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2 ); - pop_push(0); + exec_basic_statement_t *cmd = create_basic_statement(); + cmd->set_decoration(parse_node.tag); + + // Push the last node (skip any decoration) + pop_push(parse_node.child_count - 1, 1); + break; + } + + case symbol_plain_statement: + PARSE_ASSERT(parse_node.child_count == 3); + // Extract the command + PARSE_ASSERT(! assembling_statements.empty()); + assemble_command(parse_node.child_start + 0); + // Jump to statement list, then terminator + pop_push(1, 2); + break; + + case symbol_arguments_or_redirections_list: + PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); + pop_push_all(); + break; + + case symbol_argument_or_redirection: + PARSE_ASSERT(parse_node.child_count == 1); + assemble_argument_or_redirection(parse_node.child_start + 0); + pop(); + break; + + case parse_token_type_end: + PARSE_ASSERT(parse_node.child_count == 0); + assembly_complete(); + pop(); break; default: @@ -118,23 +317,27 @@ void parse_exec_t::enter_parse_node(size_t idx) } wcstring parse_exec_t::simulate(void) -{ - PARSE_ASSERT(exec_nodes.empty()); - assemble_statement_list(0); - enter_parse_node(0); - run_node(); -} - -wcstring parse_execution_context_t::simulate() { if (parse_tree.empty()) return L"(empty!)"; - PARSE_ASSERT(node_idx < nodes.size()); - PARSE_ASSERT(nodes.at(node_idx).type == symbol_statement_list); + PARSE_ASSERT(exec_nodes.empty()); + simulating = true; + + enter_parse_node(0); + while (! exec_nodes.empty()) + { + run_top_node(); + } wcstring result; + for (size_t i=0; i < simulation_result.size(); i++) + { + result.append(simulation_result.at(i)); + result.append(L"\n"); + } + return result; } parse_execution_context_t::parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s) diff --git a/parse_tree.cpp b/parse_tree.cpp index 8c38ff0cd..a58b0ff80 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -634,7 +634,7 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) return result; } -void parse_t::parse(const wcstring &str) +void parse_t::parse(const wcstring &str, parse_node_tree_t *output) { tokenizer_t tok = tokenizer_t(str.c_str(), 0); for (; tok_has_next(&tok); tok_next(&tok)) @@ -658,5 +658,11 @@ void parse_t::parse(const wcstring &str) } wcstring result = dump_tree(this->parser->nodes, str); fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); - fprintf(stderr, "node size %ld\n", sizeof(parse_node_t)); + fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t)); + + if (output != NULL) + { + output->swap(this->parser->nodes); + this->parser->nodes.clear(); + } } diff --git a/parse_tree.h b/parse_tree.h index 892c36cdd..0b63efa82 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -17,22 +17,21 @@ #define PARSE_ASSERT(a) assert(a) #define PARSER_DIE() assert(0) +class parse_node_t; +typedef std::vector parse_node_tree_t; +typedef size_t node_offset_t; + class parse_ll_t; -class parse_sr_t; class parse_t { parse_ll_t * const parser; public: parse_t(); - void parse(const wcstring &str); + void parse(const wcstring &str, parse_node_tree_t *output); }; -class parse_node_t; -typedef std::vector parse_node_tree_t; -typedef size_t node_offset_t; - enum parse_token_type_t { From 4ca46b7883291fd78469369ce87dce451b73838b Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 15 Jun 2013 15:21:35 -0700 Subject: [PATCH 008/177] Beginning support for new parser error messages --- builtin.cpp | 25 ++++++++++++++----- fish_tests.cpp | 19 +++++++++------ parse_tree.cpp | 65 ++++++++++++++++++++++++++++++++++++++++++++------ parse_tree.h | 15 +++++++++++- 4 files changed, 103 insertions(+), 21 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index 12b331b82..fe09f4f68 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -3954,13 +3954,26 @@ int builtin_parse(parser_t &parser, wchar_t **argv) { const wcstring src = str2wcstring(&txt.at(0), txt.size()); parse_node_tree_t parse_tree; + parse_error_list_t errors; parse_t parser; - parser.parse(src, &parse_tree); - parse_execution_context_t ctx(parse_tree, src); - stdout_buffer.append(L"Simulating execution:"); - wcstring simulation = ctx.simulate(); - stdout_buffer.append(simulation); - stdout_buffer.push_back(L'\n'); + bool success = parser.parse(src, &parse_tree, &errors); + if (! success) + { + stdout_buffer.append(L"Parsing failed:\n"); + for (size_t i=0; i < errors.size(); i++) + { + stdout_buffer.append(errors.at(i).describe(src)); + stdout_buffer.push_back(L'\n'); + } + } + else + { + parse_execution_context_t ctx(parse_tree, src); + stdout_buffer.append(L"Simulating execution:\n"); + wcstring simulation = ctx.simulate(); + stdout_buffer.append(simulation); + stdout_buffer.push_back(L'\n'); + } } return STATUS_BUILTIN_OK; } diff --git a/fish_tests.cpp b/fish_tests.cpp index 20d79288b..85ead75ac 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -1725,13 +1725,18 @@ static void test_new_parser(void) const wcstring src = L"echo hello world"; parse_node_tree_t parse_tree; parse_t parser; - parser.parse(src, &parse_tree); - parse_execution_context_t ctx(parse_tree, src); - say(L"Simulating execution:"); - wcstring simulation = ctx.simulate(); - printf("%ls\n", simulation.c_str()); - - + bool success = parser.parse(src, &parse_tree, NULL); + if (! success) + { + say(L"Parsing failed"); + } + else + { + parse_execution_context_t ctx(parse_tree, src); + say(L"Simulating execution:"); + wcstring simulation = ctx.simulate(); + say(simulation.c_str()); + } } /** diff --git a/parse_tree.cpp b/parse_tree.cpp index a58b0ff80..22a178408 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -3,7 +3,40 @@ #include -class parse_command_t; +wcstring parse_error_t::describe(const wcstring &src) const +{ + wcstring result = text; + if (source_start < src.size() && source_start + source_length <= src.size()) + { + // Locate the beginning of this line of source + size_t line_start = 0; + + // Look for a newline prior to source_start. If we don't find one, start at the beginning of the string; otherwise start one past the newline + size_t newline = src.find_last_of(L'\n', source_start); + if (newline != wcstring::npos) + { + line_start = newline + 1; + } + + size_t line_end = src.find(L'\n', source_start + source_length); + if (line_end == wcstring::npos) + { + line_end = src.size(); + } + assert(line_end >= line_start); + assert(source_start >= line_start); + + // Append the line of text + result.push_back(L'\n'); + result.append(src, line_start, line_end - line_start); + + // Append the caret line + result.push_back(L'\n'); + result.append(source_start - line_start, L' '); + result.push_back(L'^'); + } + return result; +} static wcstring token_type_description(parse_token_type_t type) { @@ -141,10 +174,12 @@ class parse_ll_t std::vector symbol_stack; // LL parser stack parse_node_tree_t nodes; - bool errored; + + bool fatal_errored; + parse_error_list_t errors; // Constructor - parse_ll_t() : errored(false) + parse_ll_t() : fatal_errored(false) { // initial node parse_stack_element_t elem = symbol_statement_list; @@ -170,6 +205,7 @@ class parse_ll_t void token_unhandled(parse_token_t token, const char *function); void parse_error(const wchar_t *expected, parse_token_t token); + void append_error_callout(wcstring &error_message, parse_token_t token); // Get the node corresponding to the top element of the stack parse_node_t &node_for_top_symbol() @@ -239,7 +275,13 @@ void parse_ll_t::token_unhandled(parse_token_t token, const char *function) void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) { - fprintf(stderr, "Expected a %ls, instead got a token of type %d\n", expected, (int)token.type); + wcstring desc = token_type_description(token.type); + parse_error_t error; + error.text = format_string(L"Expected a %ls, instead got a token of type %ls", expected, desc.c_str()); + error.source_start = token.source_start; + error.source_start = token.source_length; + errors.push_back(error); + fatal_errored = true; } void parse_ll_t::accept_token_statement_list(parse_token_t token) @@ -514,7 +556,7 @@ void parse_ll_t::accept_token(parse_token_t token) PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); PARSE_ASSERT(! symbol_stack.empty()); bool consumed = false; - while (! consumed && ! this->errored) + while (! consumed && ! this->fatal_errored) { if (top_node_match_token(token)) { @@ -634,10 +676,10 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) return result; } -void parse_t::parse(const wcstring &str, parse_node_tree_t *output) +bool parse_t::parse(const wcstring &str, parse_node_tree_t *output, parse_error_list_t *errors) { tokenizer_t tok = tokenizer_t(str.c_str(), 0); - for (; tok_has_next(&tok); tok_next(&tok)) + for (; tok_has_next(&tok) && ! this->parser->fatal_errored; tok_next(&tok)) { token_type tok_type = static_cast(tok_last_type(&tok)); const wchar_t *tok_txt = tok_last(&tok); @@ -656,6 +698,7 @@ void parse_t::parse(const wcstring &str, parse_node_tree_t *output) token.keyword = keyword_for_token(tok_type, tok_txt); this->parser->accept_token(token); } + wcstring result = dump_tree(this->parser->nodes, str); fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t)); @@ -665,4 +708,12 @@ void parse_t::parse(const wcstring &str, parse_node_tree_t *output) output->swap(this->parser->nodes); this->parser->nodes.clear(); } + + if (errors != NULL) + { + errors->swap(this->parser->errors); + this->parser->errors.clear(); + } + + return ! this->parser->fatal_errored; } diff --git a/parse_tree.h b/parse_tree.h index 0b63efa82..271f8cf80 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -21,6 +21,19 @@ class parse_node_t; typedef std::vector parse_node_tree_t; typedef size_t node_offset_t; +struct parse_error_t +{ + /** Text of the error */ + wcstring text; + + /** Offset and length of the token in the source code that triggered this error */ + size_t source_start; + size_t source_length; + + /** Return a string describing the error, suitable for presentation to the user */ + wcstring describe(const wcstring &src) const; +}; +typedef std::vector parse_error_list_t; class parse_ll_t; class parse_t @@ -29,7 +42,7 @@ class parse_t public: parse_t(); - void parse(const wcstring &str, parse_node_tree_t *output); + bool parse(const wcstring &str, parse_node_tree_t *output, parse_error_list_t *errors); }; From b771e97ac61d3377eb74f037db4424c6c4010729 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Tue, 18 Jun 2013 23:35:04 -0700 Subject: [PATCH 009/177] More work on simulating execution of parse trees. Pipes are totally screwed up. --- parse_exec.cpp | 15 +++++++++++- parse_tree.cpp | 63 ++++++++++++++++++++++++++++++++++++++++++-------- parse_tree.h | 4 ++-- 3 files changed, 70 insertions(+), 12 deletions(-) diff --git a/parse_exec.cpp b/parse_exec.cpp index 04601e5ba..6ab90117e 100644 --- a/parse_exec.cpp +++ b/parse_exec.cpp @@ -260,9 +260,22 @@ void parse_exec_t::run_top_node() break; case symbol_statement: + { PARSE_ASSERT(parse_node.child_count == 1); - pop_push_all(); + // See if we're just an empty statement + const parse_node_t &child = parse_tree.at(parse_node.child_start + 0); + if (child.type == parse_token_type_end) + { + // Empty statement + pop(); + } + else + { + // We have a statement to execute + pop_push_all(); + } break; + } case symbol_decorated_statement: { diff --git a/parse_tree.cpp b/parse_tree.cpp index 22a178408..d2c34b3bb 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -121,8 +121,16 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring & assert(start < nodes.size()); const parse_node_t &node = nodes.at(start); + const size_t spacesPerIndent = 2; + + // unindent statement lists by 1 to flatten them + if (node.type == symbol_statement_list || node.type == symbol_arguments_or_redirections_list) + { + if (indent > 0) indent -= 1; + } + append_format(*result, L"%2lu ", *line); - result->append(indent, L' ');; + result->append(indent * spacesPerIndent, L' ');; result->append(node.describe()); if (node.child_count > 0) { @@ -138,7 +146,7 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring & ++*line; for (size_t child_idx = node.child_start; child_idx < node.child_start + node.child_count; child_idx++) { - dump_tree_recursive(nodes, src, child_idx, indent + 2, result, line); + dump_tree_recursive(nodes, src, child_idx, indent + 1, result, line); } } @@ -239,6 +247,18 @@ class parse_ll_t // Pop from the top of the symbol stack, then push, updating node counts. Note that these are pushed in reverse order, so the first argument will be on the top of the stack. inline void symbol_stack_pop_push(parse_stack_element_t tok1 = token_type_invalid, parse_stack_element_t tok2 = token_type_invalid, parse_stack_element_t tok3 = token_type_invalid, parse_stack_element_t tok4 = token_type_invalid, parse_stack_element_t tok5 = token_type_invalid) { + + // Logging? + if (1) + { + fprintf(stderr, "Pop %ls\n", token_type_description(symbol_stack.back().type).c_str()); + if (tok5.type != token_type_invalid) fprintf(stderr, "Push %ls\n", token_type_description(tok5.type).c_str()); + if (tok4.type != token_type_invalid) fprintf(stderr, "Push %ls\n", token_type_description(tok4.type).c_str()); + if (tok3.type != token_type_invalid) fprintf(stderr, "Push %ls\n", token_type_description(tok3.type).c_str()); + if (tok2.type != token_type_invalid) fprintf(stderr, "Push %ls\n", token_type_description(tok2.type).c_str()); + if (tok1.type != token_type_invalid) fprintf(stderr, "Push %ls\n", token_type_description(tok1.type).c_str()); + } + // Get the node for the top symbol and tell it about its children size_t node_idx = symbol_stack.back().node_idx; parse_node_t &node = nodes.at(node_idx); @@ -323,21 +343,38 @@ void parse_ll_t::accept_token_statement(parse_token_t token) break; case parse_keyword_if: - case parse_keyword_else: + symbol_stack_pop_push(symbol_if_header); + break; + case parse_keyword_for: - case parse_keyword_in: + symbol_stack_pop_push(symbol_for_header); + break; + case parse_keyword_while: + symbol_stack_pop_push(symbol_while_header); + break; + case parse_keyword_begin: + symbol_stack_pop_push(symbol_begin_header); + break; + case parse_keyword_function: + symbol_stack_pop_push(symbol_function_header); + break; + + case parse_keyword_else: case parse_keyword_switch: symbol_stack_pop_push(symbol_block_statement); - assert(0 && "Need assignment"); + fprintf(stderr, "Unimplemented type\n"); + PARSER_DIE(); break; case parse_keyword_end: // TODO break; - + + // 'in' is only special within a for_header + case parse_keyword_in: case parse_keyword_none: case parse_keyword_command: case parse_keyword_builtin: @@ -347,12 +384,16 @@ void parse_ll_t::accept_token_statement(parse_token_t token) } break; + case parse_token_type_end: + // Empty line, or just a semicolon + symbol_stack_pop_push(parse_token_type_end); + break; + case parse_token_type_pipe: case parse_token_type_redirection: case parse_token_background: - case parse_token_type_end: case parse_token_type_terminate: - parse_error(L"command", token); + parse_error(L"statement", token); break; default: @@ -553,6 +594,10 @@ bool parse_ll_t::top_node_match_token(parse_token_t token) void parse_ll_t::accept_token(parse_token_t token) { + if (1) + { + fprintf(stderr, "Accept token of type %ls\n", token_type_description(token.type).c_str()); + } PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); PARSE_ASSERT(! symbol_stack.empty()); bool consumed = false; @@ -628,7 +673,7 @@ void parse_ll_t::accept_token(parse_token_t token) break; default: - fprintf(stderr, "Bailing with token type %d\n", (int)token.type); + fprintf(stderr, "Bailing with token type %ls\n", token_type_description(token.type).c_str()); break; } } diff --git a/parse_tree.h b/parse_tree.h index 271f8cf80..b95e371a6 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -136,9 +136,9 @@ class parse_node_t statement_list = | statement statement_list -# A statement is a normal job, or an if / while / and etc. +# A statement is a normal job, or an if / while / and etc, or just a nothing (i.e. newline) - statement = boolean_statement | block_statement | decorated_statement + statement = boolean_statement | block_statement | decorated_statement | # A block is a conditional, loop, or begin/end From d840643cb33a12ea9433b12fe7443e15db2df75c Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 23 Jun 2013 02:09:46 -0700 Subject: [PATCH 010/177] More work on new parser --- parse_exec.cpp | 328 ++++++++++++++++++++++++++++++++++++------------- parse_tree.cpp | 173 +++++++++++++++++++------- parse_tree.h | 43 +++++-- 3 files changed, 403 insertions(+), 141 deletions(-) diff --git a/parse_exec.cpp b/parse_exec.cpp index 6ab90117e..88fdd00a6 100644 --- a/parse_exec.cpp +++ b/parse_exec.cpp @@ -24,10 +24,18 @@ struct exec_argument_t node_offset_t parse_node_idx; sanity_id_t command_sanity_id; }; +typedef std::vector exec_argument_list_t; struct exec_redirection_t { + node_offset_t parse_node_idx; +}; +typedef std::vector exec_redirection_list_t; +struct exec_arguments_and_redirections_t +{ + exec_argument_list_t arguments; + exec_redirection_list_t redirections; }; struct exec_basic_statement_t @@ -35,6 +43,9 @@ struct exec_basic_statement_t // Node containing the command node_offset_t command_idx; + // Arguments + exec_arguments_and_redirections_t arguments_and_redirections; + // Decoration enum { @@ -43,8 +54,6 @@ struct exec_basic_statement_t decoration_builtin } decoration; - std::vector arguments; - std::vector redirections; uint16_t sanity_id; exec_basic_statement_t() : command_idx(0), decoration(decoration_plain) @@ -70,10 +79,48 @@ struct exec_basic_statement_t PARSER_DIE(); break; } - + } + + const exec_argument_list_t &arguments() const + { + return arguments_and_redirections.arguments; + } + + const exec_redirection_list_t &redirections() const + { + return arguments_and_redirections.redirections; } }; +struct exec_block_statement_t +{ + // Arguments + exec_arguments_and_redirections_t arguments_and_redirections; + + const exec_argument_list_t &arguments() const + { + return arguments_and_redirections.arguments; + } + + const exec_redirection_list_t &redirections() const + { + return arguments_and_redirections.redirections; + } + +}; + +struct exec_job_t +{ + // List of statements (separated with pipes) + std::vector statements; + + void add_statement(const exec_basic_statement_t &statement) + { + statements.push_back(statement); + } +}; + + class parse_exec_t { parse_node_tree_t parse_tree; @@ -85,6 +132,9 @@ class parse_exec_t /* The stack of nodes as we execute them */ std::vector exec_nodes; + /* The stack of jobs being built */ + std::vector assembling_jobs; + /* The stack of commands being built */ std::vector assembling_statements; @@ -95,7 +145,39 @@ class parse_exec_t PARSE_ASSERT(node.source_start + node.source_length <= src.size()); output->assign(src, node.source_start, node.source_length); } + + const parse_node_t &get_child(parse_node_t &parent, node_offset_t which) const + { + return parse_tree.at(parent.child_offset(which)); + } + + void pop_push_specific(node_offset_t idx1, node_offset_t idx2 = NODE_OFFSET_INVALID, node_offset_t idx3 = NODE_OFFSET_INVALID, node_offset_t idx4 = NODE_OFFSET_INVALID, node_offset_t idx5 = NODE_OFFSET_INVALID) + { + PARSE_ASSERT(! exec_nodes.empty()); + // Figure out the offset of the children + exec_node_t &top = exec_nodes.back(); + const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); + node_offset_t child_node_idx = parse_node.child_start; + // Remove the top node + exec_nodes.pop_back(); + + // Append the given children, backwards + sanity_id_t command_sanity_id = assembling_statements.empty() ? 0 : assembling_statements.back().sanity_id; + const node_offset_t idxs[] = {idx5, idx4, idx3, idx2, idx1}; + for (size_t q=0; q < sizeof idxs / sizeof *idxs; q++) + { + node_offset_t idx = idxs[q]; + if (idx != (node_offset_t)(-1)) + { + PARSE_ASSERT(idx < parse_node.child_count); + exec_nodes.push_back(child_node_idx + idx); + exec_nodes.back().command_sanity_id = command_sanity_id; + } + } + + } + void pop_push(node_offset_t child_idx, node_offset_t child_count = 1) { PARSE_ASSERT(! exec_nodes.empty()); @@ -139,90 +221,130 @@ class parse_exec_t pop_push(0, parse_node.child_count); } - void assemble_command(node_offset_t idx) - { - // Set the command for our top basic statement - PARSE_ASSERT(! assembling_statements.empty()); - assembling_statements.back().command_idx = idx; - } - - void assemble_argument_or_redirection(node_offset_t idx) + void assemble_1_argument_or_redirection(node_offset_t idx, exec_arguments_and_redirections_t *output) const { const parse_node_t &node = parse_tree.at(idx); - PARSE_ASSERT(! assembling_statements.empty()); - exec_basic_statement_t &statement = assembling_statements.back(); - switch (node.type) + PARSE_ASSERT(output != NULL); + PARSE_ASSERT(node.type == symbol_argument_or_redirection); + PARSE_ASSERT(node.child_count == 1); + node_offset_t child_idx = node.child_offset(0); + const parse_node_t &child = parse_tree.at(child_idx); + switch (child.type) { case parse_token_type_string: // Argument { exec_argument_t arg = exec_argument_t(); arg.parse_node_idx = idx; - arg.command_sanity_id = statement.sanity_id; - statement.arguments.push_back(arg); + output->arguments.push_back(arg); } break; case parse_token_type_redirection: // Redirection + { + exec_redirection_t redirect = exec_redirection_t(); + redirect.parse_node_idx = idx; + output->redirections.push_back(redirect); + } break; default: PARSER_DIE(); break; } - } - void assembly_complete() + void assemble_arguments_and_redirections(node_offset_t start_idx, exec_arguments_and_redirections_t *output) const { - // Finished building a command - PARSE_ASSERT(! assembling_statements.empty()); - const exec_basic_statement_t &statement = assembling_statements.back(); + node_offset_t idx = start_idx; + for (;;) + { + const parse_node_t &node = parse_tree.at(idx); + PARSE_ASSERT(node.type == symbol_arguments_or_redirections_list); + PARSE_ASSERT(node.child_count == 0 || node.child_count == 2); + if (node.child_count == 0) + { + // No more children + break; + } + else + { + // Skip to next child + assemble_1_argument_or_redirection(node.child_offset(0), output); + idx = node.child_offset(1); + } + } + } + + void assemble_command_for_plain_statement(node_offset_t idx, parse_keyword_t decoration) + { + const parse_node_t &node = parse_tree.at(idx); + PARSE_ASSERT(node.type == symbol_plain_statement); + PARSE_ASSERT(node.child_count == 2); + exec_basic_statement_t statement; + statement.set_decoration(decoration); + statement.command_idx = node.child_offset(0); + assemble_arguments_and_redirections(node.child_offset(1), &statement.arguments_and_redirections); + assembling_jobs.back().add_statement(statement); + } + + void job_assembly_complete() + { + PARSE_ASSERT(! assembling_jobs.empty()); + const exec_job_t &job = assembling_jobs.back(); if (simulating) { - simulate_statement(statement); + simulate_job(job); } - assembling_statements.pop_back(); + assembling_jobs.pop_back(); } - void simulate_statement(const exec_basic_statement_t &statement) + void simulate_job(const exec_job_t &job) { PARSE_ASSERT(simulating); wcstring line; - switch (statement.decoration) + for (size_t i=0; i < job.statements.size(); i++) { - case exec_basic_statement_t::decoration_builtin: - line.append(L" "); - break; - - case exec_basic_statement_t::decoration_command: - line.append(L" "); - break; + if (i > 0) + { + line.append(L" "); + } + const exec_basic_statement_t &statement = job.statements.at(i); + switch (statement.decoration) + { + case exec_basic_statement_t::decoration_builtin: + line.append(L" "); + break; - default: - break; - } - - wcstring tmp; - get_node_string(statement.command_idx, &tmp); - line.append(L"cmd:"); - line.append(tmp); - for (size_t i=0; i < statement.arguments.size(); i++) - { - const exec_argument_t &arg = statement.arguments.at(i); - get_node_string(arg.parse_node_idx, &tmp); - line.append(L" "); - line.append(L"arg:"); + case exec_basic_statement_t::decoration_command: + line.append(L" "); + break; + + default: + break; + } + + wcstring tmp; + get_node_string(statement.command_idx, &tmp); + line.append(L"cmd:"); line.append(tmp); + for (size_t i=0; i < statement.arguments().size(); i++) + { + const exec_argument_t &arg = statement.arguments().at(i); + get_node_string(arg.parse_node_idx, &tmp); + line.append(L" "); + line.append(L"arg:"); + line.append(tmp); + } } - simulation_result.push_back(line); + simulation_result.push_back(line); } void enter_parse_node(size_t idx); void run_top_node(void); - exec_basic_statement_t *create_basic_statement(void); + exec_job_t *create_job(void); public: parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s), simulating(false) @@ -231,10 +353,10 @@ class parse_exec_t wcstring simulate(void); }; -exec_basic_statement_t *parse_exec_t::create_basic_statement() +exec_job_t *parse_exec_t::create_job() { - assembling_statements.push_back(exec_basic_statement_t()); - return &assembling_statements.back(); + assembling_jobs.push_back(exec_job_t()); + return &assembling_jobs.back(); } void parse_exec_t::run_top_node() @@ -242,7 +364,7 @@ void parse_exec_t::run_top_node() PARSE_ASSERT(! exec_nodes.empty()); exec_node_t &top = exec_nodes.back(); const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); - bool log = false; + bool log = true; if (log) { @@ -254,68 +376,102 @@ void parse_exec_t::run_top_node() switch (parse_node.type) { - case symbol_statement_list: + case symbol_job_list: PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); + if (parse_node.child_count == 0) + { + // No more jobs, done + pop(); + } + else if (parse_tree.at(parse_node.child_start + 0).type == parse_token_type_end) + { + // Empty job, so just skip it + pop_push(1, 1); + } + else + { + // Normal job + pop_push(0, 2); + } + break; + + case symbol_job: + { + PARSE_ASSERT(parse_node.child_count == 2); + exec_job_t *job = create_job(); pop_push_all(); break; - - case symbol_statement: - { - PARSE_ASSERT(parse_node.child_count == 1); - // See if we're just an empty statement - const parse_node_t &child = parse_tree.at(parse_node.child_start + 0); - if (child.type == parse_token_type_end) + } + + case symbol_job_continuation: + PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 3); + if (parse_node.child_count == 0) { - // Empty statement + // All done with this job + job_assembly_complete(); pop(); } else { - // We have a statement to execute - pop_push_all(); + // Skip the pipe + pop_push(1, 2); } + break; + + case symbol_statement: + { + PARSE_ASSERT(parse_node.child_count == 1); + pop_push_all(); + break; + } + + case symbol_block_statement: + { + PARSE_ASSERT(parse_node.child_count == 5); + pop_push_specific(0, 2, 4); + break; + } + + case symbol_block_header: + { + PARSE_ASSERT(parse_node.child_count == 1); + pop_push_all(); + break; + } + + case symbol_function_header: + { + PARSE_ASSERT(parse_node.child_count == 3); + //pop_push_all(); + pop(); break; } case symbol_decorated_statement: { - PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2 ); - exec_basic_statement_t *cmd = create_basic_statement(); - cmd->set_decoration(parse_node.tag); + PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2); - // Push the last node (skip any decoration) - pop_push(parse_node.child_count - 1, 1); + node_offset_t plain_statement_idx = parse_node.child_offset(parse_node.child_count - 1); + parse_keyword_t decoration = static_cast(parse_node.tag); + assemble_command_for_plain_statement(plain_statement_idx, decoration); + pop(); break; } - + + // The following symbols should be handled by their parents, i.e. never pushed on our stack case symbol_plain_statement: - PARSE_ASSERT(parse_node.child_count == 3); - // Extract the command - PARSE_ASSERT(! assembling_statements.empty()); - assemble_command(parse_node.child_start + 0); - // Jump to statement list, then terminator - pop_push(1, 2); - break; - case symbol_arguments_or_redirections_list: - PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); - pop_push_all(); - break; - case symbol_argument_or_redirection: - PARSE_ASSERT(parse_node.child_count == 1); - assemble_argument_or_redirection(parse_node.child_start + 0); - pop(); + PARSER_DIE(); break; case parse_token_type_end: PARSE_ASSERT(parse_node.child_count == 0); - assembly_complete(); pop(); break; default: - fprintf(stderr, "Unhandled token type %ld\n", (long)parse_node.type); + fprintf(stderr, "Unhandled token type %ls at index %ld\n", token_type_description(parse_node.type).c_str(), top.parse_node_idx); PARSER_DIE(); break; diff --git a/parse_tree.cpp b/parse_tree.cpp index d2c34b3bb..019afc53b 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -38,13 +38,16 @@ wcstring parse_error_t::describe(const wcstring &src) const return result; } -static wcstring token_type_description(parse_token_type_t type) +wcstring token_type_description(parse_token_type_t type) { switch (type) { case token_type_invalid: return L"invalid"; - case symbol_statement_list: return L"statement_list"; + case symbol_job_list: return L"job_list"; + case symbol_job: return L"job"; + case symbol_job_continuation: return L"job_continuation"; + case symbol_statement: return L"statement"; case symbol_block_statement: return L"block_statement"; case symbol_block_header: return L"block_header"; @@ -70,6 +73,30 @@ static wcstring token_type_description(parse_token_type_t type) } } +wcstring keyword_description(parse_keyword_t k) +{ + switch (k) + { + case parse_keyword_none: return L"none"; + case parse_keyword_if: return L"if"; + case parse_keyword_else: return L"else"; + case parse_keyword_for: return L"for"; + case parse_keyword_in: return L"in"; + case parse_keyword_while: return L"while"; + case parse_keyword_begin: return L"begin"; + case parse_keyword_function: return L"function"; + case parse_keyword_switch: return L"switch"; + case parse_keyword_end: return L"end"; + case parse_keyword_and: return L"and"; + case parse_keyword_or: return L"or"; + case parse_keyword_not: return L"not"; + case parse_keyword_command: return L"command"; + case parse_keyword_builtin: return L"builtin"; + default: + return format_string(L"Unknown keyword type %ld", static_cast(k)); + } +} + wcstring parse_node_t::describe(void) const { wcstring result = token_type_description(type); @@ -83,8 +110,20 @@ struct parse_token_t enum parse_keyword_t keyword; // Any keyword represented by this parser size_t source_start; size_t source_length; + + wcstring describe() const; }; +wcstring parse_token_t::describe(void) const +{ + wcstring result = token_type_description(type); + if (keyword != parse_keyword_none) + { + append_format(result, L" <%ls>", keyword_description(keyword).c_str()); + } + return result; +} + // Convert from tokenizer_t's token type to our token static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_token_type) { @@ -124,12 +163,12 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring & const size_t spacesPerIndent = 2; // unindent statement lists by 1 to flatten them - if (node.type == symbol_statement_list || node.type == symbol_arguments_or_redirections_list) + if (node.type == symbol_job_list || node.type == symbol_arguments_or_redirections_list) { if (indent > 0) indent -= 1; } - append_format(*result, L"%2lu ", *line); + append_format(*result, L"%2lu - %l2u ", *line, start); result->append(indent * spacesPerIndent, L' ');; result->append(node.describe()); if (node.child_count > 0) @@ -190,17 +229,19 @@ class parse_ll_t parse_ll_t() : fatal_errored(false) { // initial node - parse_stack_element_t elem = symbol_statement_list; + parse_stack_element_t elem = symbol_job_list; elem.node_idx = 0; symbol_stack.push_back(elem); // goal token - nodes.push_back(parse_node_t(symbol_statement_list)); + nodes.push_back(parse_node_t(symbol_job_list)); } bool top_node_match_token(parse_token_t token); // implementation of certain parser constructions - void accept_token(parse_token_t token); - void accept_token_statement_list(parse_token_t token); + void accept_token(parse_token_t token, const wcstring &src); + void accept_token_job_list(parse_token_t token); + void accept_token_job(parse_token_t token); + void accept_token_job_continuation(parse_token_t token); void accept_token_statement(parse_token_t token); void accept_token_block_header(parse_token_t token); void accept_token_boolean_statement(parse_token_t token); @@ -289,7 +330,7 @@ class parse_ll_t void parse_ll_t::token_unhandled(parse_token_t token, const char *function) { - fprintf(stderr, "Unhandled token with type %d in function %s\n", (int)token.type, function); + fprintf(stderr, "Unhandled token with type %ls in function %s\n", token_type_description(token.type).c_str(), function); PARSER_DIE(); } @@ -304,17 +345,33 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) fatal_errored = true; } -void parse_ll_t::accept_token_statement_list(parse_token_t token) +void parse_ll_t::accept_token_job_list(parse_token_t token) { - PARSE_ASSERT(stack_top_type() == symbol_statement_list); + PARSE_ASSERT(stack_top_type() == symbol_job_list); switch (token.type) { case parse_token_type_string: + // 'end' is special + if (token.keyword == parse_keyword_end) + { + // End this job list + symbol_stack_pop_push(); + } + else + { + // Normal string + symbol_stack_pop_push(symbol_job, symbol_job_list); + } + break; + case parse_token_type_pipe: case parse_token_type_redirection: case parse_token_background: + symbol_stack_pop_push(symbol_job, symbol_job_list); + break; + case parse_token_type_end: - symbol_stack_pop_push(symbol_statement, symbol_statement_list); + symbol_stack_pop_push(parse_token_type_end, symbol_job_list); break; case parse_token_type_terminate: @@ -328,6 +385,30 @@ void parse_ll_t::accept_token_statement_list(parse_token_t token) } } +void parse_ll_t::accept_token_job(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_job); + symbol_stack_pop_push(symbol_statement, symbol_job_continuation); +} + +void parse_ll_t::accept_token_job_continuation(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_job_continuation); + switch (token.type) + { + case parse_token_type_pipe: + // Pipe, continuation + symbol_stack_pop_push(parse_token_type_pipe, symbol_statement, symbol_job_continuation); + break; + + default: + // Not a pipe, no job continuation + symbol_stack_pop_push(); + break; + } +} + + void parse_ll_t::accept_token_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_statement); @@ -341,25 +422,13 @@ void parse_ll_t::accept_token_statement(parse_token_t token) case parse_keyword_not: symbol_stack_pop_push(symbol_boolean_statement); break; - + case parse_keyword_if: - symbol_stack_pop_push(symbol_if_header); - break; - case parse_keyword_for: - symbol_stack_pop_push(symbol_for_header); - break; - case parse_keyword_while: - symbol_stack_pop_push(symbol_while_header); - break; - - case parse_keyword_begin: - symbol_stack_pop_push(symbol_begin_header); - break; - case parse_keyword_function: - symbol_stack_pop_push(symbol_function_header); + case parse_keyword_begin: + symbol_stack_pop_push(symbol_block_statement); break; case parse_keyword_else: @@ -370,7 +439,7 @@ void parse_ll_t::accept_token_statement(parse_token_t token) break; case parse_keyword_end: - // TODO + PARSER_DIE(); //todo break; // 'in' is only special within a for_header @@ -384,11 +453,6 @@ void parse_ll_t::accept_token_statement(parse_token_t token) } break; - case parse_token_type_end: - // Empty line, or just a semicolon - symbol_stack_pop_push(parse_token_type_end); - break; - case parse_token_type_pipe: case parse_token_type_redirection: case parse_token_background: @@ -415,7 +479,7 @@ void parse_ll_t::accept_token_block_header(parse_token_t token) break; case parse_keyword_else: - //todo + PARSER_DIE(); //todo break; case parse_keyword_for: @@ -508,7 +572,7 @@ void parse_ll_t::accept_token_decorated_statement(parse_token_t token) void parse_ll_t::accept_token_plain_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_plain_statement); - symbol_stack_pop_push(parse_token_type_string, symbol_arguments_or_redirections_list, parse_token_type_end); + symbol_stack_pop_push(parse_token_type_string, symbol_arguments_or_redirections_list); } void parse_ll_t::accept_token_arguments_or_redirections_list(parse_token_t token) @@ -588,15 +652,23 @@ bool parse_ll_t::top_node_match_token(parse_token_t token) symbol_stack.pop_back(); result = true; } + else if (token.type == parse_token_type_pipe) + { + // Pipes are primitive + symbol_stack.pop_back(); + result = true; + } } return result; } -void parse_ll_t::accept_token(parse_token_t token) +void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) { - if (1) + bool logit = true; + if (logit) { - fprintf(stderr, "Accept token of type %ls\n", token_type_description(token.type).c_str()); + const wcstring txt = wcstring(src, token.source_start, token.source_length); + fprintf(stderr, "Accept token %ls\n", token.describe().c_str()); } PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); PARSE_ASSERT(! symbol_stack.empty()); @@ -605,6 +677,10 @@ void parse_ll_t::accept_token(parse_token_t token) { if (top_node_match_token(token)) { + if (logit) + { + fprintf(stderr, "Consumed token %ls\n", token.describe().c_str()); + } consumed = true; break; } @@ -612,16 +688,24 @@ void parse_ll_t::accept_token(parse_token_t token) switch (stack_top_type()) { /* Symbols */ - case symbol_statement_list: - accept_token_statement_list(token); + case symbol_job_list: + accept_token_job_list(token); break; + case symbol_job: + accept_token_job(token); + break; + + case symbol_job_continuation: + accept_token_job_continuation(token); + break; + case symbol_statement: accept_token_statement(token); break; case symbol_block_statement: - symbol_stack_pop_push(symbol_block_header, symbol_statement_list, parse_keyword_end, symbol_arguments_or_redirections_list); + symbol_stack_pop_push(symbol_block_header, parse_token_type_end, symbol_job_list, parse_keyword_end, symbol_arguments_or_redirections_list); break; case symbol_block_header: @@ -644,7 +728,7 @@ void parse_ll_t::accept_token(parse_token_t token) break; case symbol_function_header: - symbol_stack_pop_push(parse_keyword_function, symbol_arguments_or_redirections_list, parse_token_type_end); + symbol_stack_pop_push(parse_keyword_function, parse_token_type_string, symbol_arguments_or_redirections_list); break; case symbol_boolean_statement: @@ -673,7 +757,8 @@ void parse_ll_t::accept_token(parse_token_t token) break; default: - fprintf(stderr, "Bailing with token type %ls\n", token_type_description(token.type).c_str()); + fprintf(stderr, "Bailing with token type %ls and stack top %ls\n", token_type_description(token.type).c_str(), token_type_description(stack_top_type()).c_str()); + exit_without_destructors(EXIT_FAILURE); break; } } @@ -741,7 +826,7 @@ bool parse_t::parse(const wcstring &str, parse_node_tree_t *output, parse_error_ token.source_start = (size_t)tok_start; token.source_length = wcslen(tok_txt); token.keyword = keyword_for_token(tok_type, tok_txt); - this->parser->accept_token(token); + this->parser->accept_token(token, str); } wcstring result = dump_tree(this->parser->nodes, str); diff --git a/parse_tree.h b/parse_tree.h index b95e371a6..49ec6a132 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -20,6 +20,7 @@ class parse_node_t; typedef std::vector parse_node_tree_t; typedef size_t node_offset_t; +#define NODE_OFFSET_INVALID (static_cast(-1)) struct parse_error_t { @@ -51,7 +52,9 @@ enum parse_token_type_t token_type_invalid, // Non-terminal tokens - symbol_statement_list, + symbol_job_list, + symbol_job, + symbol_job_continuation, symbol_statement, symbol_block_statement, symbol_block_header, @@ -96,6 +99,9 @@ enum parse_keyword_t parse_keyword_builtin }; +wcstring token_type_description(parse_token_type_t type); +wcstring keyword_description(parse_keyword_t type); + /** Base class for nodes of a parse tree */ class parse_node_t { @@ -125,31 +131,46 @@ class parse_node_t explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0), child_start(0), child_count(0), tag(0) { } + + node_offset_t child_offset(node_offset_t which) const + { + PARSE_ASSERT(which < child_count); + return child_start + which; + } }; /* Fish grammar: -# A statement_list is a list of statements, separated by semicolons or newlines +# A job_list is a list of jobs, separated by semicolons or newlines - statement_list = | - statement statement_list + job_list = | + job_list | + job job_list -# A statement is a normal job, or an if / while / and etc, or just a nothing (i.e. newline) +# A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation - statement = boolean_statement | block_statement | decorated_statement | + job = statement job_continuation + job_continuation = | + statement job_continuation + +# A statement is a normal command, or an if / while / and etc + + statement = boolean_statement | block_statement | decorated_statement # A block is a conditional, loop, or begin/end - block_statement = block_header statement_list END arguments_or_redirections_list + block_statement = block_header STATEMENT_TERMINATOR job_list arguments_or_redirections_list block_header = if_header | for_header | while_header | function_header | begin_header if_header = IF statement - for_header = FOR var_name IN arguments_or_redirections_list STATEMENT_TERMINATOR + for_header = FOR var_name IN arguments_or_redirections_list while_header = WHILE statement - begin_header = BEGIN STATEMENT_TERMINATOR - function_header = FUNCTION arguments_or_redirections_list STATEMENT_TERMINATOR + begin_header = BEGIN + function_header = FUNCTION function_name arguments_or_redirections_list +#(TODO: functions should not support taking redirections in their arguments) + # A boolean statement is AND or OR or NOT boolean_statement = AND statement | OR statement | NOT statement @@ -157,7 +178,7 @@ class parse_node_t # A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement - plain_statement = command arguments_or_redirections_list terminator + plain_statement = command arguments_or_redirections_list arguments_or_redirections_list = | argument_or_redirection arguments_or_redirections_list From 66af0c1a53b8108e6509a24921a068870120244c Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Mon, 24 Jun 2013 12:33:40 -0700 Subject: [PATCH 011/177] More work on the AST. block statements worked out a bit more. --- builtin.cpp | 112 ++++++++++++++- parse_exec.cpp | 366 ++++++++++++++++++++----------------------------- parse_exec.h | 125 ++++++++++++++++- parse_tree.cpp | 2 + parse_tree.h | 8 +- proc.h | 6 - 6 files changed, 389 insertions(+), 230 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index fe09f4f68..d77d6361b 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -3940,6 +3940,105 @@ static int builtin_history(parser_t &parser, wchar_t **argv) return STATUS_BUILTIN_ERROR; } +#pragma mark Simulator + +struct parse_execution_simulator_t : public parse_execution_visitor_t +{ + wcstring_list_t result; + + wcstring &back() + { + assert(! result.empty()); + return result.back(); + } + + void append_src(node_offset_t idx) + { + wcstring tmp; + context->get_source(idx, &tmp); + back().append(tmp); + } + + void append(const wchar_t *s) + { + back().append(s); + } + + bool enter_job_list(void) + { + return true; + } + + bool enter_job(void) + { + result.resize(result.size() + 1); + return true; + } + + void visit_statement(void) + { + } + + virtual void visit_boolean_statement(void) + { + } + + virtual void enter_if_header(const if_header_t &statement) + { + } + + virtual void exit_if_header(const if_header_t &statement) + { + append_format(back(), L"\nIF successful jump to %lu", (unsigned long)statement.body); + } + + void visit_basic_statement(const exec_basic_statement_t &statement) + { + wcstring &line = this->back(); + if (! line.empty()) + { + line.append(L" "); + } + switch (statement.decoration) + { + case exec_basic_statement_t::decoration_builtin: + line.append(L" "); + break; + + case exec_basic_statement_t::decoration_command: + line.append(L" "); + break; + + default: + break; + } + + line.append(L"cmd:"); + this->append_src(statement.command_idx); + for (size_t i=0; i < statement.arguments().size(); i++) + { + const exec_argument_t &arg = statement.arguments().at(i); + append(L" "); + append(L"arg:"); + append_src(arg.parse_node_idx); + } + } + + void visit_function(const exec_function_header_t &function) { + wcstring &line = this->back(); + line.append(L"define function: "); + wcstring tmp; + context->get_source(function.name_idx, &tmp); + line.append(tmp); + } + + void exit_job_list(void) + { + } +}; + + + int builtin_parse(parser_t &parser, wchar_t **argv) { std::vector txt; @@ -3969,10 +4068,17 @@ int builtin_parse(parser_t &parser, wchar_t **argv) else { parse_execution_context_t ctx(parse_tree, src); + parse_execution_simulator_t sim; + sim.context = &ctx; + while (ctx.visit_next_node(&sim)) + { + } stdout_buffer.append(L"Simulating execution:\n"); - wcstring simulation = ctx.simulate(); - stdout_buffer.append(simulation); - stdout_buffer.push_back(L'\n'); + for (size_t i=0; i < sim.result.size(); i++) + { + stdout_buffer.append(sim.result.at(i)); + stdout_buffer.push_back(L'\n'); + } } } return STATUS_BUILTIN_OK; diff --git a/parse_exec.cpp b/parse_exec.cpp index 88fdd00a6..f12f757a7 100644 --- a/parse_exec.cpp +++ b/parse_exec.cpp @@ -1,124 +1,25 @@ #include "parse_exec.h" #include -typedef uint16_t sanity_id_t; -static sanity_id_t next_sanity_id() -{ - static sanity_id_t last_sanity_id; - return ++last_sanity_id; -} - struct exec_node_t { node_offset_t parse_node_idx; - sanity_id_t command_sanity_id; + node_offset_t body_parse_node_idx; + bool visited; - exec_node_t(size_t pni) : parse_node_idx(pni) + explicit exec_node_t(node_offset_t pni) : parse_node_idx(pni), body_parse_node_idx(NODE_OFFSET_INVALID), visited(false) { } + explicit exec_node_t(node_offset_t pni, node_offset_t body_pni) : parse_node_idx(pni), body_parse_node_idx(body_pni), visited(false) + { + } }; -struct exec_argument_t +exec_basic_statement_t::exec_basic_statement_t() : command_idx(0), decoration(decoration_plain) { - node_offset_t parse_node_idx; - sanity_id_t command_sanity_id; -}; -typedef std::vector exec_argument_list_t; - -struct exec_redirection_t -{ - node_offset_t parse_node_idx; -}; -typedef std::vector exec_redirection_list_t; - -struct exec_arguments_and_redirections_t -{ - exec_argument_list_t arguments; - exec_redirection_list_t redirections; -}; - -struct exec_basic_statement_t -{ - // Node containing the command - node_offset_t command_idx; - // Arguments - exec_arguments_and_redirections_t arguments_and_redirections; - - // Decoration - enum - { - decoration_plain, - decoration_command, - decoration_builtin - } decoration; - - uint16_t sanity_id; - - exec_basic_statement_t() : command_idx(0), decoration(decoration_plain) - { - sanity_id = next_sanity_id(); - } - - void set_decoration(uint32_t k) - { - PARSE_ASSERT(k == parse_keyword_none || k == parse_keyword_command || k == parse_keyword_builtin); - switch (k) - { - case parse_keyword_none: - decoration = decoration_plain; - break; - case parse_keyword_command: - decoration = decoration_command; - break; - case parse_keyword_builtin: - decoration = decoration_builtin; - break; - default: - PARSER_DIE(); - break; - } - } - - const exec_argument_list_t &arguments() const - { - return arguments_and_redirections.arguments; - } - - const exec_redirection_list_t &redirections() const - { - return arguments_and_redirections.redirections; - } -}; - -struct exec_block_statement_t -{ - // Arguments - exec_arguments_and_redirections_t arguments_and_redirections; - - const exec_argument_list_t &arguments() const - { - return arguments_and_redirections.arguments; - } - - const exec_redirection_list_t &redirections() const - { - return arguments_and_redirections.redirections; - } - -}; - -struct exec_job_t -{ - // List of statements (separated with pipes) - std::vector statements; - - void add_statement(const exec_basic_statement_t &statement) - { - statements.push_back(statement); - } -}; +} class parse_exec_t @@ -126,26 +27,15 @@ class parse_exec_t parse_node_tree_t parse_tree; wcstring src; - bool simulating; - wcstring_list_t simulation_result; - /* The stack of nodes as we execute them */ std::vector exec_nodes; - /* The stack of jobs being built */ - std::vector assembling_jobs; - /* The stack of commands being built */ std::vector assembling_statements; - void get_node_string(node_offset_t idx, wcstring *output) const - { - const parse_node_t &node = parse_tree.at(idx); - PARSE_ASSERT(node.source_start <= src.size()); - PARSE_ASSERT(node.source_start + node.source_length <= src.size()); - output->assign(src, node.source_start, node.source_length); - } - + /* Current visitor (very transient) */ + struct parse_execution_visitor_t * visitor; + const parse_node_t &get_child(parse_node_t &parent, node_offset_t which) const { return parse_tree.at(parent.child_offset(which)); @@ -163,7 +53,6 @@ class parse_exec_t exec_nodes.pop_back(); // Append the given children, backwards - sanity_id_t command_sanity_id = assembling_statements.empty() ? 0 : assembling_statements.back().sanity_id; const node_offset_t idxs[] = {idx5, idx4, idx3, idx2, idx1}; for (size_t q=0; q < sizeof idxs / sizeof *idxs; q++) { @@ -171,13 +60,23 @@ class parse_exec_t if (idx != (node_offset_t)(-1)) { PARSE_ASSERT(idx < parse_node.child_count); - exec_nodes.push_back(child_node_idx + idx); - exec_nodes.back().command_sanity_id = command_sanity_id; + exec_nodes.push_back(exec_node_t(child_node_idx + idx)); } } } + void push(node_offset_t global_idx) + { + exec_nodes.push_back(exec_node_t(global_idx)); + } + + void push(const exec_node_t &node) + { + exec_nodes.push_back(node); + } + + void pop_push(node_offset_t child_idx, node_offset_t child_count = 1) { PARSE_ASSERT(! exec_nodes.empty()); @@ -198,12 +97,10 @@ class parse_exec_t exec_nodes.pop_back(); // Append the given children, backwards - sanity_id_t command_sanity_id = assembling_statements.empty() ? 0 : assembling_statements.back().sanity_id; node_offset_t cursor = child_count; while (cursor--) { - exec_nodes.push_back(child_node_idx + cursor); - exec_nodes.back().command_sanity_id = command_sanity_id; + exec_nodes.push_back(exec_node_t(child_node_idx + cursor)); } } } @@ -235,7 +132,7 @@ class parse_exec_t // Argument { exec_argument_t arg = exec_argument_t(); - arg.parse_node_idx = idx; + arg.parse_node_idx = child_idx; output->arguments.push_back(arg); } break; @@ -244,7 +141,7 @@ class parse_exec_t // Redirection { exec_redirection_t redirect = exec_redirection_t(); - redirect.parse_node_idx = idx; + redirect.parse_node_idx = child_idx; output->redirections.push_back(redirect); } break; @@ -286,84 +183,106 @@ class parse_exec_t statement.set_decoration(decoration); statement.command_idx = node.child_offset(0); assemble_arguments_and_redirections(node.child_offset(1), &statement.arguments_and_redirections); - assembling_jobs.back().add_statement(statement); + visitor->visit_basic_statement(statement); } - void job_assembly_complete() + void assemble_block_statement(node_offset_t parse_node_idx) { - PARSE_ASSERT(! assembling_jobs.empty()); - const exec_job_t &job = assembling_jobs.back(); - if (simulating) - { - simulate_job(job); - } - assembling_jobs.pop_back(); + const parse_node_t &node = parse_tree.at(parse_node_idx); + PARSE_ASSERT(node.type == symbol_block_statement); + PARSE_ASSERT(node.child_count == 5); + + // Fetch arguments and redirections. These ought to be evaluated before the job list + exec_block_statement_t statement; + assemble_arguments_and_redirections(node.child_offset(4), &statement.arguments_and_redirections); + + // Generic visit + visitor->enter_block_statement(statement); + + // Dig into the header to discover the type + const parse_node_t &header_parent = parse_tree.at(node.child_offset(0)); + PARSE_ASSERT(header_parent.type == symbol_block_header); + PARSE_ASSERT(header_parent.child_count == 1); + const node_offset_t header_idx = header_parent.child_offset(0); + + // Fetch body (job list) + node_offset_t body_idx = node.child_offset(2); + PARSE_ASSERT(parse_tree.at(body_idx).type == symbol_job_list); + + pop(); + push(exec_node_t(header_idx, body_idx)); } - void simulate_job(const exec_job_t &job) + void assemble_function_header(const exec_node_t &exec_node, const parse_node_t &header) { - PARSE_ASSERT(simulating); - wcstring line; - for (size_t i=0; i < job.statements.size(); i++) + PARSE_ASSERT(header.type == symbol_function_header); + PARSE_ASSERT(&header == &parse_tree.at(exec_node.parse_node_idx)); + PARSE_ASSERT(exec_node.body_parse_node_idx != NODE_OFFSET_INVALID); + exec_function_header_t function_info; + function_info.name_idx = header.child_offset(1); + function_info.body_idx = exec_node.body_parse_node_idx; + assemble_arguments_and_redirections(header.child_offset(2), &function_info.arguments_and_redirections); + visitor->visit_function(function_info); + + // Always pop + pop(); + } + + void assemble_if_header(exec_node_t &exec_node, const parse_node_t &header) + { + PARSE_ASSERT(header.type == symbol_if_header); + PARSE_ASSERT(&header == &parse_tree.at(exec_node.parse_node_idx)); + PARSE_ASSERT(exec_node.body_parse_node_idx != NODE_OFFSET_INVALID); + if_header_t if_header; + if_header.body = exec_node.body_parse_node_idx; + // We may hit this on enter or exit + if (! exec_node.visited) { - if (i > 0) - { - line.append(L" "); - } - const exec_basic_statement_t &statement = job.statements.at(i); - switch (statement.decoration) - { - case exec_basic_statement_t::decoration_builtin: - line.append(L" "); - break; - - case exec_basic_statement_t::decoration_command: - line.append(L" "); - break; - - default: - break; - } - - wcstring tmp; - get_node_string(statement.command_idx, &tmp); - line.append(L"cmd:"); - line.append(tmp); - for (size_t i=0; i < statement.arguments().size(); i++) - { - const exec_argument_t &arg = statement.arguments().at(i); - get_node_string(arg.parse_node_idx, &tmp); - line.append(L" "); - line.append(L"arg:"); - line.append(tmp); - } + // Entry. Don't pop the header - just push the job. We'll pop it on exit. + exec_node.visited = true; + visitor->enter_if_header(if_header); + push(header.child_offset(1)); } - simulation_result.push_back(line); + else + { + // Exit. Pop it. + visitor->exit_if_header(if_header); + pop(); + } + } void enter_parse_node(size_t idx); void run_top_node(void); - exec_job_t *create_job(void); public: - parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s), simulating(false) + + void get_node_string(node_offset_t idx, wcstring *output) const { + const parse_node_t &node = parse_tree.at(idx); + PARSE_ASSERT(node.source_start <= src.size()); + PARSE_ASSERT(node.source_start + node.source_length <= src.size()); + output->assign(src, node.source_start, node.source_length); + } + + bool visit_next_node(parse_execution_visitor_t *v); + + parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s), visitor(NULL) + { + if (! parse_tree.empty()) + { + exec_nodes.push_back(exec_node_t(0)); + } } - wcstring simulate(void); }; -exec_job_t *parse_exec_t::create_job() -{ - assembling_jobs.push_back(exec_job_t()); - return &assembling_jobs.back(); -} - void parse_exec_t::run_top_node() { PARSE_ASSERT(! exec_nodes.empty()); - exec_node_t &top = exec_nodes.back(); - const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); + exec_node_t &exec_node = exec_nodes.back(); + const node_offset_t parse_node_idx = exec_node.parse_node_idx; + const parse_node_t &parse_node = parse_tree.at(exec_node.parse_node_idx); bool log = true; if (log) @@ -381,6 +300,7 @@ void parse_exec_t::run_top_node() if (parse_node.child_count == 0) { // No more jobs, done + visitor->exit_job_list(); pop(); } else if (parse_tree.at(parse_node.child_start + 0).type == parse_token_type_end) @@ -391,6 +311,7 @@ void parse_exec_t::run_top_node() else { // Normal job + visitor->enter_job_list(); pop_push(0, 2); } break; @@ -398,7 +319,7 @@ void parse_exec_t::run_top_node() case symbol_job: { PARSE_ASSERT(parse_node.child_count == 2); - exec_job_t *job = create_job(); + visitor->enter_job(); pop_push_all(); break; } @@ -408,7 +329,7 @@ void parse_exec_t::run_top_node() if (parse_node.child_count == 0) { // All done with this job - job_assembly_complete(); + visitor->exit_job(); pop(); } else @@ -428,7 +349,7 @@ void parse_exec_t::run_top_node() case symbol_block_statement: { PARSE_ASSERT(parse_node.child_count == 5); - pop_push_specific(0, 2, 4); + assemble_block_statement(parse_node_idx); break; } @@ -442,8 +363,14 @@ void parse_exec_t::run_top_node() case symbol_function_header: { PARSE_ASSERT(parse_node.child_count == 3); - //pop_push_all(); - pop(); + assemble_function_header(exec_node, parse_node); + break; + } + + case symbol_if_header: + { + PARSE_ASSERT(parse_node.child_count == 2); + assemble_if_header(exec_node, parse_node); break; } @@ -462,6 +389,7 @@ void parse_exec_t::run_top_node() case symbol_plain_statement: case symbol_arguments_or_redirections_list: case symbol_argument_or_redirection: + fprintf(stderr, "Unhandled token type %ls at index %ld\n", token_type_description(parse_node.type).c_str(), exec_node.parse_node_idx); PARSER_DIE(); break; @@ -471,13 +399,28 @@ void parse_exec_t::run_top_node() break; default: - fprintf(stderr, "Unhandled token type %ls at index %ld\n", token_type_description(parse_node.type).c_str(), top.parse_node_idx); + fprintf(stderr, "Unhandled token type %ls at index %ld\n", token_type_description(parse_node.type).c_str(), exec_node.parse_node_idx); PARSER_DIE(); break; } } +bool parse_exec_t::visit_next_node(parse_execution_visitor_t *v) +{ + PARSE_ASSERT(v != NULL); + PARSE_ASSERT(visitor == NULL); + if (exec_nodes.empty()) + { + return false; + } + + visitor = v; + run_top_node(); + visitor = NULL; + return true; +} + void parse_exec_t::enter_parse_node(size_t idx) { PARSE_ASSERT(idx < parse_tree.size()); @@ -485,36 +428,27 @@ void parse_exec_t::enter_parse_node(size_t idx) exec_nodes.push_back(exec); } -wcstring parse_exec_t::simulate(void) -{ - if (parse_tree.empty()) - return L"(empty!)"; - - PARSE_ASSERT(exec_nodes.empty()); - simulating = true; - - enter_parse_node(0); - while (! exec_nodes.empty()) - { - run_top_node(); - } - - wcstring result; - for (size_t i=0; i < simulation_result.size(); i++) - { - result.append(simulation_result.at(i)); - result.append(L"\n"); - } - - return result; -} parse_execution_context_t::parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s) { ctx = new parse_exec_t(n, s); } -wcstring parse_execution_context_t::simulate(void) +parse_execution_context_t::~parse_execution_context_t() { - return ctx->simulate(); + delete ctx; } + +bool parse_execution_context_t::visit_next_node(parse_execution_visitor_t *visitor) +{ + return ctx->visit_next_node(visitor); +} + +void parse_execution_context_t::get_source(node_offset_t idx, wcstring *result) const +{ + return ctx->get_node_string(idx, result); +} + + + + diff --git a/parse_exec.h b/parse_exec.h index 635ebb836..533051993 100644 --- a/parse_exec.h +++ b/parse_exec.h @@ -8,15 +8,136 @@ #include "parse_tree.h" +struct parse_execution_visitor_t; class parse_exec_t; class parse_execution_context_t { - parse_exec_t *ctx; + parse_exec_t *ctx; //owned public: parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s); - wcstring simulate(void); + ~parse_execution_context_t(); + + bool visit_next_node(parse_execution_visitor_t *visitor); + + // Gets the source for a node at a given index + void get_source(node_offset_t idx, wcstring *result) const; }; +struct exec_argument_t +{ + node_offset_t parse_node_idx; +}; +typedef std::vector exec_argument_list_t; + +struct exec_redirection_t +{ + node_offset_t parse_node_idx; +}; +typedef std::vector exec_redirection_list_t; + +struct exec_arguments_and_redirections_t +{ + exec_argument_list_t arguments; + exec_redirection_list_t redirections; +}; + +struct exec_basic_statement_t +{ + // Node containing the command + node_offset_t command_idx; + + // Arguments + exec_arguments_and_redirections_t arguments_and_redirections; + + // Decoration + enum + { + decoration_plain, + decoration_command, + decoration_builtin + } decoration; + + exec_basic_statement_t(); + + void set_decoration(uint32_t k) + { + PARSE_ASSERT(k == parse_keyword_none || k == parse_keyword_command || k == parse_keyword_builtin); + switch (k) + { + case parse_keyword_none: + decoration = decoration_plain; + break; + case parse_keyword_command: + decoration = decoration_command; + break; + case parse_keyword_builtin: + decoration = decoration_builtin; + break; + default: + PARSER_DIE(); + break; + } + } + + const exec_argument_list_t &arguments() const + { + return arguments_and_redirections.arguments; + } + + const exec_redirection_list_t &redirections() const + { + return arguments_and_redirections.redirections; + } +}; + +struct exec_function_header_t +{ + // Node containing the function name + node_offset_t name_idx; + + // Node containing the function body + node_offset_t body_idx; + + // Arguments + exec_arguments_and_redirections_t arguments_and_redirections; +}; + +struct exec_block_statement_t +{ + // Arguments + exec_arguments_and_redirections_t arguments_and_redirections; +}; + +struct if_header_t +{ + // Node containing the body of the if statement + node_offset_t body; +}; + +struct parse_execution_visitor_t +{ + node_offset_t node_idx; + parse_execution_context_t *context; + + parse_execution_visitor_t() : node_idx(0), context(NULL) + { + } + + virtual bool enter_job_list(void) { return true; } + virtual bool enter_job(void) { return true; } + virtual void visit_statement(void) { } + virtual void visit_function(const exec_function_header_t &function) { } + virtual bool enter_block_statement(const exec_block_statement_t &statement) { return true; } + + virtual void enter_if_header(const if_header_t &statement) { } + virtual void exit_if_header(const if_header_t &statement) { } + + virtual void visit_boolean_statement(void) { } + virtual void visit_basic_statement(const exec_basic_statement_t &statement) { } + virtual void exit_job(void) { } + virtual void exit_job_list(void) { } +}; + #endif diff --git a/parse_tree.cpp b/parse_tree.cpp index 019afc53b..6f6982e19 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -244,6 +244,7 @@ class parse_ll_t void accept_token_job_continuation(parse_token_t token); void accept_token_statement(parse_token_t token); void accept_token_block_header(parse_token_t token); + void accept_token_if_header(parse_token_t token); void accept_token_boolean_statement(parse_token_t token); void accept_token_decorated_statement(parse_token_t token); void accept_token_plain_statement(parse_token_t token); @@ -713,6 +714,7 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) break; case symbol_if_header: + symbol_stack_pop_push(parse_keyword_if, symbol_job); break; case symbol_for_header: diff --git a/parse_tree.h b/parse_tree.h index 49ec6a132..29365209c 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -18,7 +18,7 @@ #define PARSER_DIE() assert(0) class parse_node_t; -typedef std::vector parse_node_tree_t; +class parse_node_tree_t; typedef size_t node_offset_t; #define NODE_OFFSET_INVALID (static_cast(-1)) @@ -123,7 +123,6 @@ class parse_node_t /* Type-dependent data */ uint32_t tag; - /* Description */ wcstring describe(void) const; @@ -139,6 +138,9 @@ class parse_node_t } }; +class parse_node_tree_t : public std::vector +{ +}; /* Fish grammar: @@ -163,7 +165,7 @@ class parse_node_t block_statement = block_header STATEMENT_TERMINATOR job_list arguments_or_redirections_list block_header = if_header | for_header | while_header | function_header | begin_header - if_header = IF statement + if_header = IF job for_header = FOR var_name IN arguments_or_redirections_list while_header = WHILE statement begin_header = BEGIN diff --git a/proc.h b/proc.h index 163831116..1d4d210d4 100644 --- a/proc.h +++ b/proc.h @@ -482,18 +482,12 @@ void job_free(job_t* j); */ void job_promote(job_t *job); -/** - Create a new job. -*/ -job_t *job_create(); - /** Return the job with the specified job id. If id is 0 or less, return the last job used. */ job_t *job_get(job_id_t id); - /** Return the job with the specified pid. */ From eba5b0e4c5c079aaf9b5e1cf69d096ca9965342a Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 27 Jun 2013 15:12:27 -0700 Subject: [PATCH 012/177] If statements parse --- parse_exec.cpp | 31 ++----------- parse_tree.cpp | 124 ++++++++++++++++++++++++++++++++++++++++++------- parse_tree.h | 24 +++++++--- 3 files changed, 130 insertions(+), 49 deletions(-) diff --git a/parse_exec.cpp b/parse_exec.cpp index f12f757a7..c5c15a076 100644 --- a/parse_exec.cpp +++ b/parse_exec.cpp @@ -228,30 +228,7 @@ class parse_exec_t // Always pop pop(); } - - void assemble_if_header(exec_node_t &exec_node, const parse_node_t &header) - { - PARSE_ASSERT(header.type == symbol_if_header); - PARSE_ASSERT(&header == &parse_tree.at(exec_node.parse_node_idx)); - PARSE_ASSERT(exec_node.body_parse_node_idx != NODE_OFFSET_INVALID); - if_header_t if_header; - if_header.body = exec_node.body_parse_node_idx; - // We may hit this on enter or exit - if (! exec_node.visited) - { - // Entry. Don't pop the header - just push the job. We'll pop it on exit. - exec_node.visited = true; - visitor->enter_if_header(if_header); - push(header.child_offset(1)); - } - else - { - // Exit. Pop it. - visitor->exit_if_header(if_header); - pop(); - } - } void enter_parse_node(size_t idx); void run_top_node(void); @@ -367,11 +344,11 @@ void parse_exec_t::run_top_node() break; } - case symbol_if_header: + case symbol_if_statement: { - PARSE_ASSERT(parse_node.child_count == 2); - assemble_if_header(exec_node, parse_node); - break; + PARSE_ASSERT(parse_node.child_count == 3); + + } case symbol_decorated_statement: diff --git a/parse_tree.cpp b/parse_tree.cpp index 6f6982e19..bdf77dc55 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -51,11 +51,16 @@ wcstring token_type_description(parse_token_type_t type) case symbol_statement: return L"statement"; case symbol_block_statement: return L"block_statement"; case symbol_block_header: return L"block_header"; - case symbol_if_header: return L"if_header"; case symbol_for_header: return L"for_header"; case symbol_while_header: return L"while_header"; case symbol_begin_header: return L"begin_header"; case symbol_function_header: return L"function_header"; + + case symbol_if_statement: return L"if_statement"; + case symbol_if_clause: return L"if_clause"; + case symbol_else_clause: return L"else_clause"; + case symbol_else_continuation: return L"else_continuation"; + case symbol_boolean_statement: return L"boolean_statement"; case symbol_decorated_statement: return L"decorated_statement"; case symbol_plain_statement: return L"plain_statement"; @@ -213,6 +218,17 @@ struct parse_stack_element_t parse_stack_element_t(parse_keyword_t k) : type(parse_token_type_string), keyword(k), node_idx(-1) { } + + wcstring describe(void) const + { + wcstring result = token_type_description(type); + if (keyword != parse_keyword_none) + { + append_format(result, L" <%ls>", keyword_description(keyword).c_str()); + } + return result; + } + }; class parse_ll_t @@ -244,7 +260,8 @@ class parse_ll_t void accept_token_job_continuation(parse_token_t token); void accept_token_statement(parse_token_t token); void accept_token_block_header(parse_token_t token); - void accept_token_if_header(parse_token_t token); + void accept_token_else_clause(parse_token_t token); + void accept_token_else_continuation(parse_token_t token); void accept_token_boolean_statement(parse_token_t token); void accept_token_decorated_statement(parse_token_t token); void accept_token_plain_statement(parse_token_t token); @@ -257,6 +274,8 @@ class parse_ll_t void parse_error(const wchar_t *expected, parse_token_t token); void append_error_callout(wcstring &error_message, parse_token_t token); + void dump_stack(void) const; + // Get the node corresponding to the top element of the stack parse_node_t &node_for_top_symbol() { @@ -294,11 +313,11 @@ class parse_ll_t if (1) { fprintf(stderr, "Pop %ls\n", token_type_description(symbol_stack.back().type).c_str()); - if (tok5.type != token_type_invalid) fprintf(stderr, "Push %ls\n", token_type_description(tok5.type).c_str()); - if (tok4.type != token_type_invalid) fprintf(stderr, "Push %ls\n", token_type_description(tok4.type).c_str()); - if (tok3.type != token_type_invalid) fprintf(stderr, "Push %ls\n", token_type_description(tok3.type).c_str()); - if (tok2.type != token_type_invalid) fprintf(stderr, "Push %ls\n", token_type_description(tok2.type).c_str()); - if (tok1.type != token_type_invalid) fprintf(stderr, "Push %ls\n", token_type_description(tok1.type).c_str()); + if (tok5.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok5.describe().c_str()); + if (tok4.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok4.describe().c_str()); + if (tok3.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok3.describe().c_str()); + if (tok2.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok2.describe().c_str()); + if (tok1.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok1.describe().c_str()); } // Get the node for the top symbol and tell it about its children @@ -329,9 +348,41 @@ class parse_ll_t } }; +void parse_ll_t::dump_stack(void) const +{ + // Walk backwards from the top, looking for parents + wcstring_list_t lines; + if (symbol_stack.empty()) + { + lines.push_back(L"(empty)"); + } + else + { + node_offset_t child = symbol_stack.back().node_idx; + node_offset_t cursor = child; + lines.push_back(nodes.at(cursor).describe()); + while (cursor--) + { + const parse_node_t &node = nodes.at(cursor); + if (node.child_start <= child && node.child_start + node.child_count > child) + { + lines.push_back(node.describe()); + child = cursor; + } + } + } + + fprintf(stderr, "Stack dump (%lu elements):\n", symbol_stack.size()); + for (size_t idx = 0; idx < lines.size(); idx++) + { + fprintf(stderr, " %ls\n", lines.at(idx).c_str()); + } +} + void parse_ll_t::token_unhandled(parse_token_t token, const char *function) { fprintf(stderr, "Unhandled token with type %ls in function %s\n", token_type_description(token.type).c_str(), function); + this->dump_stack(); PARSER_DIE(); } @@ -424,13 +475,16 @@ void parse_ll_t::accept_token_statement(parse_token_t token) symbol_stack_pop_push(symbol_boolean_statement); break; - case parse_keyword_if: case parse_keyword_for: case parse_keyword_while: case parse_keyword_function: case parse_keyword_begin: symbol_stack_pop_push(symbol_block_statement); break; + + case parse_keyword_if: + symbol_stack_pop_push(symbol_if_statement); + break; case parse_keyword_else: case parse_keyword_switch: @@ -475,10 +529,6 @@ void parse_ll_t::accept_token_block_header(parse_token_t token) case parse_token_type_string: switch (token.keyword) { - case parse_keyword_if: - symbol_stack_pop_push(symbol_if_header); - break; - case parse_keyword_else: PARSER_DIE(); //todo break; @@ -513,6 +563,36 @@ void parse_ll_t::accept_token_block_header(parse_token_t token) } } +void parse_ll_t::accept_token_else_clause(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_else_clause); + switch (token.keyword) + { + case parse_keyword_else: + symbol_stack_pop_push(parse_keyword_else, symbol_else_continuation); + break; + + default: + symbol_stack_pop_push(); + break; + } +} + +void parse_ll_t::accept_token_else_continuation(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_else_continuation); + switch (token.keyword) + { + case parse_keyword_if: + symbol_stack_pop_push(symbol_if_clause, symbol_else_clause); + break; + + default: + symbol_stack_pop_push(parse_token_type_end, symbol_job_list); + break; + } +} + void parse_ll_t::accept_token_boolean_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_boolean_statement); @@ -705,6 +785,22 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) accept_token_statement(token); break; + case symbol_if_statement: + symbol_stack_pop_push(symbol_if_clause, symbol_else_clause, parse_keyword_end); + break; + + case symbol_if_clause: + symbol_stack_pop_push(parse_keyword_if, symbol_job, parse_token_type_end, symbol_job_list); + break; + + case symbol_else_clause: + accept_token_else_clause(token); + break; + + case symbol_else_continuation: + accept_token_else_continuation(token); + break; + case symbol_block_statement: symbol_stack_pop_push(symbol_block_header, parse_token_type_end, symbol_job_list, parse_keyword_end, symbol_arguments_or_redirections_list); break; @@ -713,10 +809,6 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) accept_token_block_header(token); break; - case symbol_if_header: - symbol_stack_pop_push(parse_keyword_if, symbol_job); - break; - case symbol_for_header: symbol_stack_pop_push(parse_keyword_for, parse_token_type_string, parse_keyword_in, symbol_arguments_or_redirections_list, parse_token_type_end); break; diff --git a/parse_tree.h b/parse_tree.h index 29365209c..4530a6326 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -15,7 +15,7 @@ #include #define PARSE_ASSERT(a) assert(a) -#define PARSER_DIE() assert(0) +#define PARSER_DIE() exit_without_destructors(-1) class parse_node_t; class parse_node_tree_t; @@ -58,11 +58,16 @@ enum parse_token_type_t symbol_statement, symbol_block_statement, symbol_block_header, - symbol_if_header, symbol_for_header, symbol_while_header, symbol_begin_header, symbol_function_header, + + symbol_if_statement, + symbol_if_clause, + symbol_else_clause, + symbol_else_continuation, + symbol_boolean_statement, symbol_decorated_statement, symbol_plain_statement, @@ -159,17 +164,24 @@ class parse_node_tree_t : public std::vector # A statement is a normal command, or an if / while / and etc - statement = boolean_statement | block_statement | decorated_statement + statement = boolean_statement | block_statement | if_statement | decorated_statement # A block is a conditional, loop, or begin/end + if_statement = if_clause else_clause + if_clause = job STATEMENT_TERMINATOR job_list + else_clause = | + else_continuation + else_continuation = if_clause else_clause | + STATEMENT_TERMINATOR job_list + block_statement = block_header STATEMENT_TERMINATOR job_list arguments_or_redirections_list - block_header = if_header | for_header | while_header | function_header | begin_header - if_header = IF job - for_header = FOR var_name IN arguments_or_redirections_list + block_header = for_header | while_header | function_header | begin_header + for_header = FOR var_name IN arguments_or_redirections_list while_header = WHILE statement begin_header = BEGIN function_header = FUNCTION function_name arguments_or_redirections_list + #(TODO: functions should not support taking redirections in their arguments) From 70b83a3bbbb7b4b7809d7164fc4ed62342355eb3 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 30 Jun 2013 15:38:31 -0700 Subject: [PATCH 013/177] Implement support for switch --- builtin.cpp | 10 ++- parse_exec.cpp | 172 +++++++++++++++++++++++++++++++++++++++++++++++-- parse_exec.h | 29 +++++++-- parse_tree.cpp | 94 +++++++++++++++++++++++---- parse_tree.h | 23 +++++-- 5 files changed, 298 insertions(+), 30 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index d77d6361b..331f96308 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -3983,11 +3983,11 @@ struct parse_execution_simulator_t : public parse_execution_visitor_t { } - virtual void enter_if_header(const if_header_t &statement) + virtual void enter_if_clause(const exec_if_clause_t &statement) { } - virtual void exit_if_header(const if_header_t &statement) + virtual void exit_if_clause(const exec_if_clause_t &statement) { append_format(back(), L"\nIF successful jump to %lu", (unsigned long)statement.body); } @@ -4041,6 +4041,12 @@ struct parse_execution_simulator_t : public parse_execution_visitor_t int builtin_parse(parser_t &parser, wchar_t **argv) { + struct sigaction act; + sigemptyset(& act.sa_mask); + act.sa_flags=0; + act.sa_handler=SIG_DFL; + sigaction(SIGINT, &act, 0); + std::vector txt; for (;;) { diff --git a/parse_exec.cpp b/parse_exec.cpp index c5c15a076..0028d530c 100644 --- a/parse_exec.cpp +++ b/parse_exec.cpp @@ -36,7 +36,7 @@ class parse_exec_t /* Current visitor (very transient) */ struct parse_execution_visitor_t * visitor; - const parse_node_t &get_child(parse_node_t &parent, node_offset_t which) const + const parse_node_t &get_child(const parse_node_t &parent, node_offset_t which) const { return parse_tree.at(parent.child_offset(which)); } @@ -214,6 +214,118 @@ class parse_exec_t push(exec_node_t(header_idx, body_idx)); } + /* which: 0 -> if, 1 -> else if, 2 -> else */ + void assemble_if_else_clause(exec_node_t &exec_node, const parse_node_t &node, int which) + { + if (which == 0) + { + PARSE_ASSERT(node.type == symbol_if_clause); + PARSE_ASSERT(node.child_count == 4); + } + else if (which == 2) + { + PARSE_ASSERT(node.type == symbol_else_continuation); + PARSE_ASSERT(node.child_count == 2); + } + + struct exec_if_clause_t clause; + if (which == 0) + { + clause.body = node.child_offset(3); + } + else + { + clause.body = node.child_offset(1); + } + if (! exec_node.visited) + { + visitor->enter_if_clause(clause); + exec_node.visited = true; + if (which == 0) + { + push(node.child_offset(1)); + } + } + else + { + visitor->exit_if_clause(clause); + pop(); + } + } + + void assemble_arguments(node_offset_t start_idx, exec_argument_list_t *output) const + { + node_offset_t idx = start_idx; + for (;;) + { + const parse_node_t &node = parse_tree.at(idx); + PARSE_ASSERT(node.type == symbol_argument_list || node.type == symbol_argument_list_nonempty); + if (node.type == symbol_argument_list) + { + // argument list, may be empty + PARSE_ASSERT(node.child_count == 0 || node.child_count == 1); + if (node.child_count == 0) + { + break; + } + else + { + idx = node.child_offset(0); + } + } + else + { + // nonempty argument list + PARSE_ASSERT(node.child_count == 2); + output->push_back(exec_argument_t(node.child_offset(0))); + idx = node.child_offset(1); + } + } + } + + void assemble_1_case_item(exec_switch_statement_t *statement, node_offset_t node_idx) + { + const parse_node_t &node = parse_tree.at(node_idx); + PARSE_ASSERT(node.type == symbol_case_item); + + // add a new case + size_t len = statement->cases.size(); + statement->cases.resize(len + 1); + exec_switch_case_t &new_case = statement->cases.back(); + + // assemble it + new_case.body = node.child_offset(3); + assemble_arguments(node.child_offset(1), &new_case.arguments); + + + } + + void assemble_case_item_list(exec_switch_statement_t *statement, node_offset_t node_idx) + { + const parse_node_t &node = parse_tree.at(node_idx); + PARSE_ASSERT(node.type == symbol_case_item_list); + PARSE_ASSERT(node.child_count == 0 || node.child_count == 2); + if (node.child_count == 2) + { + assemble_1_case_item(statement, node.child_offset(0)); + assemble_case_item_list(statement, node.child_offset(1)); + } + } + + void assemble_switch_statement(const exec_node_t &exec_node, const parse_node_t &parse_node) + { + PARSE_ASSERT(parse_node.type == symbol_switch_statement); + exec_switch_statement_t statement; + + statement.argument.parse_node_idx = parse_node.child_offset(1); + assemble_case_item_list(&statement, parse_node.child_offset(3)); + + visitor->visit_switch_statement(statement); + + // pop off the switch + pop(); + } + void assemble_function_header(const exec_node_t &exec_node, const parse_node_t &header) { PARSE_ASSERT(header.type == symbol_function_header); @@ -222,7 +334,7 @@ class parse_exec_t exec_function_header_t function_info; function_info.name_idx = header.child_offset(1); function_info.body_idx = exec_node.body_parse_node_idx; - assemble_arguments_and_redirections(header.child_offset(2), &function_info.arguments_and_redirections); + assemble_arguments(header.child_offset(2), &function_info.arguments); visitor->visit_function(function_info); // Always pop @@ -347,10 +459,59 @@ void parse_exec_t::run_top_node() case symbol_if_statement: { PARSE_ASSERT(parse_node.child_count == 3); - - + pop_push(0, 2); + break; } - + + case symbol_if_clause: + { + PARSE_ASSERT(parse_node.child_count == 4); + assemble_if_else_clause(exec_node, parse_node, 0); + pop(); + break; + } + + case symbol_else_clause: + { + PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); + if (parse_node.child_count == 0) + { + // No else + pop(); + } + else + { + // We have an else + pop_push(1); + } + break; + } + + case symbol_else_continuation: + { + // Figure out if this is an else if or a terminating else + PARSE_ASSERT(parse_node.child_count == 2); + const parse_node_t &first_child = get_child(parse_node, 1); + PARSE_ASSERT(first_child.type == symbol_if_clause || first_child.type == parse_token_type_end); + if (first_child.type == symbol_if_clause) + { + pop_push_all(); + } + else + { + // else + assemble_if_else_clause(exec_node, parse_node, 2); + pop(); + } + break; + } + + case symbol_switch_statement: + { + assemble_switch_statement(exec_node, parse_node); + break; + } + case symbol_decorated_statement: { PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2); @@ -363,6 +524,7 @@ void parse_exec_t::run_top_node() } // The following symbols should be handled by their parents, i.e. never pushed on our stack + case symbol_case_item_list: case symbol_plain_statement: case symbol_arguments_or_redirections_list: case symbol_argument_or_redirection: diff --git a/parse_exec.h b/parse_exec.h index 533051993..197f656d3 100644 --- a/parse_exec.h +++ b/parse_exec.h @@ -28,6 +28,12 @@ class parse_execution_context_t struct exec_argument_t { node_offset_t parse_node_idx; + exec_argument_t(node_offset_t p) : parse_node_idx(p) + { + } + exec_argument_t() + { + } }; typedef std::vector exec_argument_list_t; @@ -101,7 +107,7 @@ struct exec_function_header_t node_offset_t body_idx; // Arguments - exec_arguments_and_redirections_t arguments_and_redirections; + exec_argument_list_t arguments; }; struct exec_block_statement_t @@ -110,12 +116,24 @@ struct exec_block_statement_t exec_arguments_and_redirections_t arguments_and_redirections; }; -struct if_header_t +struct exec_if_clause_t { // Node containing the body of the if statement node_offset_t body; }; +struct exec_switch_case_t +{ + exec_argument_list_t arguments; + node_offset_t body; +}; + +struct exec_switch_statement_t +{ + exec_argument_t argument; + std::vector cases; +}; + struct parse_execution_visitor_t { node_offset_t node_idx; @@ -131,8 +149,11 @@ struct parse_execution_visitor_t virtual void visit_function(const exec_function_header_t &function) { } virtual bool enter_block_statement(const exec_block_statement_t &statement) { return true; } - virtual void enter_if_header(const if_header_t &statement) { } - virtual void exit_if_header(const if_header_t &statement) { } + virtual void enter_if_clause(const exec_if_clause_t &statement) { } + virtual void exit_if_clause(const exec_if_clause_t &statement) { } + + virtual void visit_switch_statement(const exec_switch_statement_t &header) { } + virtual void visit_boolean_statement(void) { } virtual void visit_basic_statement(const exec_basic_statement_t &statement) { } diff --git a/parse_tree.cpp b/parse_tree.cpp index bdf77dc55..0f5395c78 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -61,6 +61,13 @@ wcstring token_type_description(parse_token_type_t type) case symbol_else_clause: return L"else_clause"; case symbol_else_continuation: return L"else_continuation"; + case symbol_switch_statement: return L"switch_statement"; + case symbol_case_item_list: return L"case_item_list"; + case symbol_case_item: return L"case_item"; + + case symbol_argument_list_nonempty: return L"argument_list_nonempty"; + case symbol_argument_list: return L"argument_list"; + case symbol_boolean_statement: return L"boolean_statement"; case symbol_decorated_statement: return L"decorated_statement"; case symbol_plain_statement: return L"plain_statement"; @@ -263,8 +270,10 @@ class parse_ll_t void accept_token_else_clause(parse_token_t token); void accept_token_else_continuation(parse_token_t token); void accept_token_boolean_statement(parse_token_t token); + void accept_token_case_item_list(parse_token_t token); void accept_token_decorated_statement(parse_token_t token); void accept_token_plain_statement(parse_token_t token); + void accept_token_argument_list(parse_token_t token); void accept_token_arguments_or_redirections_list(parse_token_t token); void accept_token_argument_or_redirection(parse_token_t token); bool accept_token_string(parse_token_t token); @@ -312,7 +321,7 @@ class parse_ll_t // Logging? if (1) { - fprintf(stderr, "Pop %ls\n", token_type_description(symbol_stack.back().type).c_str()); + fprintf(stderr, "Pop %ls (%lu)\n", token_type_description(symbol_stack.back().type).c_str(), symbol_stack.size()); if (tok5.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok5.describe().c_str()); if (tok4.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok4.describe().c_str()); if (tok3.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok3.describe().c_str()); @@ -404,15 +413,18 @@ void parse_ll_t::accept_token_job_list(parse_token_t token) { case parse_token_type_string: // 'end' is special - if (token.keyword == parse_keyword_end) + switch (token.keyword) { - // End this job list - symbol_stack_pop_push(); - } - else - { - // Normal string - symbol_stack_pop_push(symbol_job, symbol_job_list); + case parse_keyword_end: + case parse_keyword_else: + // End this job list + symbol_stack_pop_push(); + break; + + default: + // Normal string + symbol_stack_pop_push(symbol_job, symbol_job_list); + break; } break; @@ -487,10 +499,11 @@ void parse_ll_t::accept_token_statement(parse_token_t token) break; case parse_keyword_else: + symbol_stack_pop_push(); + break; + case parse_keyword_switch: - symbol_stack_pop_push(symbol_block_statement); - fprintf(stderr, "Unimplemented type\n"); - PARSER_DIE(); + symbol_stack_pop_push(symbol_switch_statement); break; case parse_keyword_end: @@ -502,6 +515,7 @@ void parse_ll_t::accept_token_statement(parse_token_t token) case parse_keyword_none: case parse_keyword_command: case parse_keyword_builtin: + case parse_keyword_case: symbol_stack_pop_push(symbol_decorated_statement); break; @@ -612,6 +626,7 @@ void parse_ll_t::accept_token_boolean_statement(parse_token_t token) token_unhandled(token, __FUNCTION__); break; } + break; default: token_unhandled(token, __FUNCTION__); @@ -619,6 +634,22 @@ void parse_ll_t::accept_token_boolean_statement(parse_token_t token) } } +void parse_ll_t::accept_token_case_item_list(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_case_item_list); + switch (token.keyword) + { + case parse_keyword_case: + symbol_stack_pop_push(symbol_case_item, symbol_case_item_list); + break; + + default: + // empty list + symbol_stack_pop_push(); + break; + } +} + void parse_ll_t::accept_token_decorated_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_decorated_statement); @@ -656,6 +687,20 @@ void parse_ll_t::accept_token_plain_statement(parse_token_t token) symbol_stack_pop_push(parse_token_type_string, symbol_arguments_or_redirections_list); } +void parse_ll_t::accept_token_argument_list(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_argument_list); + if (token.type == parse_token_type_string) + { + symbol_stack_pop_push(symbol_argument_list_nonempty); + } + else + { + symbol_stack_pop_push(); + } +} + + void parse_ll_t::accept_token_arguments_or_redirections_list(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_arguments_or_redirections_list); @@ -822,7 +867,19 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) break; case symbol_function_header: - symbol_stack_pop_push(parse_keyword_function, parse_token_type_string, symbol_arguments_or_redirections_list); + symbol_stack_pop_push(parse_keyword_function, parse_token_type_string, symbol_argument_list); + break; + + case symbol_switch_statement: + symbol_stack_pop_push(parse_keyword_switch, parse_token_type_string, parse_token_type_end, symbol_case_item_list, parse_keyword_end); + break; + + case symbol_case_item_list: + accept_token_case_item_list(token); + break; + + case symbol_case_item: + symbol_stack_pop_push(parse_keyword_case, symbol_argument_list, parse_token_type_end, symbol_job_list); break; case symbol_boolean_statement: @@ -837,6 +894,14 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) accept_token_plain_statement(token); break; + case symbol_argument_list_nonempty: + symbol_stack_pop_push(parse_token_type_string, symbol_argument_list); + break; + + case symbol_argument_list: + accept_token_argument_list(token); + break; + case symbol_arguments_or_redirections_list: accept_token_arguments_or_redirections_list(token); break; @@ -844,7 +909,7 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) case symbol_argument_or_redirection: accept_token_argument_or_redirection(token); break; - + /* Tokens */ case parse_token_type_string: consumed = accept_token_string(token); @@ -880,6 +945,7 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) {L"begin", parse_keyword_begin}, {L"function", parse_keyword_function}, {L"switch", parse_keyword_switch}, + {L"case", parse_keyword_case}, {L"end", parse_keyword_end}, {L"and", parse_keyword_and}, {L"or", parse_keyword_or}, diff --git a/parse_tree.h b/parse_tree.h index 4530a6326..525480f3a 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -68,11 +68,18 @@ enum parse_token_type_t symbol_else_clause, symbol_else_continuation, + symbol_switch_statement, + symbol_case_item_list, + symbol_case_item, + symbol_boolean_statement, symbol_decorated_statement, symbol_plain_statement, symbol_arguments_or_redirections_list, symbol_argument_or_redirection, + + symbol_argument_list_nonempty, + symbol_argument_list, // Terminal types parse_token_type_string, @@ -96,6 +103,7 @@ enum parse_keyword_t parse_keyword_begin, parse_keyword_function, parse_keyword_switch, + parse_keyword_case, parse_keyword_end, parse_keyword_and, parse_keyword_or, @@ -164,7 +172,7 @@ class parse_node_tree_t : public std::vector # A statement is a normal command, or an if / while / and etc - statement = boolean_statement | block_statement | if_statement | decorated_statement + statement = boolean_statement | block_statement | if_statement | switch_statement | decorated_statement # A block is a conditional, loop, or begin/end @@ -174,16 +182,21 @@ class parse_node_tree_t : public std::vector else_continuation else_continuation = if_clause else_clause | STATEMENT_TERMINATOR job_list + + switch_statement = SWITCH STATEMENT_TERMINATOR case_item_list + case_item_list = | + case_item case_item_list + case_item = CASE argument_list STATEMENT_TERMINATOR job_list + + argument_list_nonempty = argument_list + argument_list = | argument_list_nonempty block_statement = block_header STATEMENT_TERMINATOR job_list arguments_or_redirections_list block_header = for_header | while_header | function_header | begin_header for_header = FOR var_name IN arguments_or_redirections_list while_header = WHILE statement begin_header = BEGIN - function_header = FUNCTION function_name arguments_or_redirections_list - - -#(TODO: functions should not support taking redirections in their arguments) + function_header = FUNCTION function_name argument_list # A boolean statement is AND or OR or NOT From 66d5436789083f0c1a4d8b32a4264f38ce6fd7c1 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 4 Jul 2013 14:05:42 -0700 Subject: [PATCH 014/177] Fix for extra token_end --- parse_tree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parse_tree.cpp b/parse_tree.cpp index 0f5395c78..d3e14fd2f 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -863,7 +863,7 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) break; case symbol_begin_header: - symbol_stack_pop_push(parse_keyword_begin, parse_token_type_end); + symbol_stack_pop_push(parse_keyword_begin); break; case symbol_function_header: From 93f27666db09107561c2500c7b5e2047dbbc72fb Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Wed, 10 Jul 2013 23:45:09 -0700 Subject: [PATCH 015/177] More work --- parse_tree.cpp | 34 +++++-- parse_tree.h | 263 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 289 insertions(+), 8 deletions(-) diff --git a/parse_tree.cpp b/parse_tree.cpp index d3e14fd2f..d812c67ed 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -2,6 +2,7 @@ #include "tokenizer.h" #include +using namespace parse_symbols; wcstring parse_error_t::describe(const wcstring &src) const { @@ -355,6 +356,25 @@ class parse_ll_t if (tok2.type != token_type_invalid) symbol_stack.push_back(tok2); if (tok1.type != token_type_invalid) symbol_stack.push_back(tok1); } + + template + inline void symbol_stack_pop_push2() + { + symbol_stack_pop_push(T::t0::get_token(), T::t1::get_token(), T::t2::get_token(), T::t3::get_token(), T::t4::get_token()); + } + + template + inline void symbol_stack_pop_push_production(int which) + { + switch (which) + { + case 0: symbol_stack_pop_push2(); break; + case 1: symbol_stack_pop_push2(); break; + case 2: symbol_stack_pop_push2(); break; + case 3: symbol_stack_pop_push2(); break; + case 4: symbol_stack_pop_push2(); break; + } + } }; void parse_ll_t::dump_stack(void) const @@ -418,12 +438,12 @@ void parse_ll_t::accept_token_job_list(parse_token_t token) case parse_keyword_end: case parse_keyword_else: // End this job list - symbol_stack_pop_push(); + symbol_stack_pop_push_production(0); break; default: // Normal string - symbol_stack_pop_push(symbol_job, symbol_job_list); + symbol_stack_pop_push_production(1); break; } break; @@ -431,16 +451,17 @@ void parse_ll_t::accept_token_job_list(parse_token_t token) case parse_token_type_pipe: case parse_token_type_redirection: case parse_token_background: - symbol_stack_pop_push(symbol_job, symbol_job_list); + symbol_stack_pop_push_production(1); break; case parse_token_type_end: - symbol_stack_pop_push(parse_token_type_end, symbol_job_list); + // Empty line + symbol_stack_pop_push_production(2); break; case parse_token_type_terminate: // no more commands, just transition to empty - symbol_stack_pop_push(); + symbol_stack_pop_push_production(0); break; default: @@ -452,7 +473,8 @@ void parse_ll_t::accept_token_job_list(parse_token_t token) void parse_ll_t::accept_token_job(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_job); - symbol_stack_pop_push(symbol_statement, symbol_job_continuation); + //symbol_stack_pop_push(symbol_statement, symbol_job_continuation); + symbol_stack_pop_push2(); } void parse_ll_t::accept_token_job_continuation(parse_token_t token) diff --git a/parse_tree.h b/parse_tree.h index 525480f3a..9e3f087e0 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -46,7 +46,6 @@ class parse_t bool parse(const wcstring &str, parse_node_tree_t *output, parse_error_list_t *errors); }; - enum parse_token_type_t { token_type_invalid, @@ -155,6 +154,266 @@ class parse_node_tree_t : public std::vector { }; +namespace parse_symbols +{ + + #define SYMBOL(x) static inline parse_token_type_t get_token() { return x; } + + /* Placeholder */ + struct none + { + SYMBOL(token_type_invalid); + }; + + struct EMPTY + { + typedef none t0; + typedef none t1; + typedef none t2; + typedef none t3; + typedef none t4; + typedef none t5; + }; + + template + struct Seq + { + typedef T0 t0; + typedef T1 t1; + typedef T2 t2; + typedef T3 t3; + typedef T4 t4; + typedef T5 t5; + }; + + template + struct OR + { + typedef P0 p0; + typedef P1 p1; + typedef P2 p2; + typedef P3 p3; + typedef P4 p4; + typedef P5 p5; + }; + + template + struct Token + { + SYMBOL(WHICH); + }; + + template + struct Keyword + { + static inline parse_keyword_t get_token() { return WHICH; } + }; + + struct job; + struct statement; + struct job_continuation; + struct boolean_statement; + struct block_statement; + struct if_statement; + struct if_clause; + struct else_clause; + struct else_continuation; + struct switch_statement; + struct decorated_statement; + struct else_clause; + struct else_continuation; + struct switch_statement; + struct case_item_list; + struct case_item; + struct argument_list_nonempty; + struct argument_list; + struct block_statement; + struct block_header; + struct for_header; + struct while_header; + struct begin_header; + struct function_header; + struct boolean_statement; + struct decorated_statement; + struct plain_statement; + struct arguments_or_redirections_list; + struct argument_or_redirection; + struct redirection; + struct statement_terminator; + + /* A job_list is a list of jobs, separated by semicolons or newlines */ + struct job_list : OR< + EMPTY, + Seq, + Seq, job_list> + > + { + SYMBOL(symbol_job_list) + }; + + /* A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation */ + struct job : Seq + { + SYMBOL(symbol_job); + }; + + struct job_continuation : OR< + EMPTY, + Seq, statement, job_continuation> + > + { + SYMBOL(symbol_job_continuation); + }; + + /* A statement is a normal command, or an if / while / and etc */ + struct statement : OR< + boolean_statement, + block_statement, + if_statement, + switch_statement, + decorated_statement + > + { + SYMBOL(symbol_statement); + }; + + struct if_statement : Seq > + { + SYMBOL(symbol_if_statement); + }; + + struct if_clause : Seq, job, statement_terminator, job_list> + { + SYMBOL(symbol_if_clause); + }; + + struct else_clause : OR< + EMPTY, + Keyword, else_continuation + > + { + SYMBOL(symbol_else_clause); + }; + + struct else_continuation : OR< + Seq, + Seq + > + { + SYMBOL(symbol_else_continuation); + }; + + struct switch_statement : Seq, Token, statement_terminator, case_item_list, Keyword + > + { + SYMBOL(symbol_switch_statement); + }; + + struct case_item_list : OR + < + EMPTY, + case_item, case_item_list + > + { + SYMBOL(symbol_case_item_list); + }; + + struct case_item : Seq, argument_list, statement_terminator, job_list> + { + SYMBOL(symbol_case_item); + }; + + struct argument_list_nonempty : Seq, argument_list> + { + SYMBOL(symbol_argument_list_nonempty); + }; + + struct argument_list : OR + { + SYMBOL(symbol_argument_list); + }; + + struct block_statement : Seq, arguments_or_redirections_list> + { + SYMBOL(symbol_block_statement); + }; + + struct block_header : OR + { + SYMBOL(symbol_block_header); + }; + + struct for_header : Seq, Token, Keyword, arguments_or_redirections_list> + { + SYMBOL(symbol_for_header); + }; + + struct while_header : Seq, statement> + { + SYMBOL(symbol_while_header); + }; + + struct begin_header : Keyword + { + SYMBOL(symbol_begin_header); + }; + + struct function_header : Keyword + { + SYMBOL(symbol_function_header); + }; + + /* A boolean statement is AND or OR or NOT */ + struct boolean_statement : OR< + Seq, statement>, + Seq, statement>, + Seq, statement> + > + { + SYMBOL(symbol_boolean_statement); + }; + + /* A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" */ + struct decorated_statement : OR< + Seq, plain_statement>, + Seq, plain_statement>, + plain_statement + > + { + SYMBOL(symbol_decorated_statement); + }; + + struct plain_statement : Seq, arguments_or_redirections_list> + { + SYMBOL(symbol_plain_statement); + }; + + struct arguments_or_redirections_list : OR< + EMPTY, + Seq > + { + SYMBOL(symbol_arguments_or_redirections_list); + }; + + struct argument_or_redirection : OR< + Token, + redirection + > + { + SYMBOL(symbol_argument_or_redirection); + }; + + struct redirection : Token + { + SYMBOL(parse_token_type_redirection); + }; + + struct statement_terminator : Token + { + SYMBOL(parse_token_type_end); + }; +} + /* Fish grammar: @@ -205,7 +464,7 @@ class parse_node_tree_t : public std::vector # A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement - plain_statement = command arguments_or_redirections_list + plain_statement = COMMAND arguments_or_redirections_list arguments_or_redirections_list = | argument_or_redirection arguments_or_redirections_list From 77b6b0a9b2e7260884064dbac72f17bb5e86431f Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 21 Jul 2013 15:22:11 -0700 Subject: [PATCH 016/177] Move production logic into templates --- parse_tree.cpp | 212 ++++++++++++------------------------------------- parse_tree.h | 116 ++++++++++++++++++++++++--- 2 files changed, 157 insertions(+), 171 deletions(-) diff --git a/parse_tree.cpp b/parse_tree.cpp index d812c67ed..24bf41e7c 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -271,7 +271,6 @@ class parse_ll_t void accept_token_else_clause(parse_token_t token); void accept_token_else_continuation(parse_token_t token); void accept_token_boolean_statement(parse_token_t token); - void accept_token_case_item_list(parse_token_t token); void accept_token_decorated_statement(parse_token_t token); void accept_token_plain_statement(parse_token_t token); void accept_token_argument_list(parse_token_t token); @@ -313,10 +312,15 @@ class parse_ll_t nodes.push_back(parse_node_t(tok->type)); nodes.at(parent_node_idx).child_count += 1; } + + inline void symbol_stack_pop() + { + symbol_stack.pop_back(); + } // Pop from the top of the symbol stack, then push, updating node counts. Note that these are pushed in reverse order, so the first argument will be on the top of the stack. - inline void symbol_stack_pop_push(parse_stack_element_t tok1 = token_type_invalid, parse_stack_element_t tok2 = token_type_invalid, parse_stack_element_t tok3 = token_type_invalid, parse_stack_element_t tok4 = token_type_invalid, parse_stack_element_t tok5 = token_type_invalid) + inline void symbol_stack_pop_push_int(parse_stack_element_t tok1 = token_type_invalid, parse_stack_element_t tok2 = token_type_invalid, parse_stack_element_t tok3 = token_type_invalid, parse_stack_element_t tok4 = token_type_invalid, parse_stack_element_t tok5 = token_type_invalid) { // Logging? @@ -360,7 +364,7 @@ class parse_ll_t template inline void symbol_stack_pop_push2() { - symbol_stack_pop_push(T::t0::get_token(), T::t1::get_token(), T::t2::get_token(), T::t3::get_token(), T::t4::get_token()); + symbol_stack_pop_push_int(T::t0::get_token(), T::t1::get_token(), T::t2::get_token(), T::t3::get_token(), T::t4::get_token()); } template @@ -375,6 +379,12 @@ class parse_ll_t case 4: symbol_stack_pop_push2(); break; } } + + template + inline void symbol_stack_produce(parse_token_t tok) + { + symbol_stack_pop_push_production(T::production(tok.type, tok.keyword)); + } }; void parse_ll_t::dump_stack(void) const @@ -470,13 +480,6 @@ void parse_ll_t::accept_token_job_list(parse_token_t token) } } -void parse_ll_t::accept_token_job(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_job); - //symbol_stack_pop_push(symbol_statement, symbol_job_continuation); - symbol_stack_pop_push2(); -} - void parse_ll_t::accept_token_job_continuation(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_job_continuation); @@ -484,12 +487,12 @@ void parse_ll_t::accept_token_job_continuation(parse_token_t token) { case parse_token_type_pipe: // Pipe, continuation - symbol_stack_pop_push(parse_token_type_pipe, symbol_statement, symbol_job_continuation); + symbol_stack_pop_push_production(1); break; default: // Not a pipe, no job continuation - symbol_stack_pop_push(); + symbol_stack_pop_push_production(0); break; } } @@ -506,26 +509,26 @@ void parse_ll_t::accept_token_statement(parse_token_t token) case parse_keyword_and: case parse_keyword_or: case parse_keyword_not: - symbol_stack_pop_push(symbol_boolean_statement); + symbol_stack_pop_push_production(0); break; case parse_keyword_for: case parse_keyword_while: case parse_keyword_function: case parse_keyword_begin: - symbol_stack_pop_push(symbol_block_statement); + symbol_stack_pop_push_production(1); break; case parse_keyword_if: - symbol_stack_pop_push(symbol_if_statement); + symbol_stack_pop_push_production(2); break; case parse_keyword_else: - symbol_stack_pop_push(); + symbol_stack_pop(); break; case parse_keyword_switch: - symbol_stack_pop_push(symbol_switch_statement); + symbol_stack_pop_push_production(3); break; case parse_keyword_end: @@ -538,7 +541,7 @@ void parse_ll_t::accept_token_statement(parse_token_t token) case parse_keyword_command: case parse_keyword_builtin: case parse_keyword_case: - symbol_stack_pop_push(symbol_decorated_statement); + symbol_stack_pop_push_production(4); break; } @@ -570,22 +573,22 @@ void parse_ll_t::accept_token_block_header(parse_token_t token) break; case parse_keyword_for: - symbol_stack_pop_push(symbol_for_header); + symbol_stack_pop_push_production(0); break; case parse_keyword_while: - symbol_stack_pop_push(symbol_while_header); + symbol_stack_pop_push_production(1); + break; + + case parse_keyword_function: + symbol_stack_pop_push_production(2); break; case parse_keyword_begin: - symbol_stack_pop_push(symbol_begin_header); + symbol_stack_pop_push_production(3); break; - case parse_keyword_function: - symbol_stack_pop_push(symbol_function_header); - break; - default: token_unhandled(token, __FUNCTION__); break; @@ -602,163 +605,52 @@ void parse_ll_t::accept_token_block_header(parse_token_t token) void parse_ll_t::accept_token_else_clause(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_else_clause); - switch (token.keyword) - { - case parse_keyword_else: - symbol_stack_pop_push(parse_keyword_else, symbol_else_continuation); - break; - - default: - symbol_stack_pop_push(); - break; - } + symbol_stack_produce(token); } void parse_ll_t::accept_token_else_continuation(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_else_continuation); - switch (token.keyword) - { - case parse_keyword_if: - symbol_stack_pop_push(symbol_if_clause, symbol_else_clause); - break; - - default: - symbol_stack_pop_push(parse_token_type_end, symbol_job_list); - break; - } + symbol_stack_produce(token); } void parse_ll_t::accept_token_boolean_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_boolean_statement); - switch (token.type) - { - case parse_token_type_string: - switch (token.keyword) - { - case parse_keyword_and: - case parse_keyword_or: - case parse_keyword_not: - top_node_set_tag(token.keyword); - symbol_stack_pop_push(token.keyword, symbol_statement); - break; - - default: - token_unhandled(token, __FUNCTION__); - break; - } - break; - - default: - token_unhandled(token, __FUNCTION__); - break; - } -} - -void parse_ll_t::accept_token_case_item_list(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_case_item_list); - switch (token.keyword) - { - case parse_keyword_case: - symbol_stack_pop_push(symbol_case_item, symbol_case_item_list); - break; - - default: - // empty list - symbol_stack_pop_push(); - break; - } + top_node_set_tag(token.keyword); + symbol_stack_produce(token); } void parse_ll_t::accept_token_decorated_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_decorated_statement); - switch (token.type) - { - case parse_token_type_string: - switch (token.keyword) - { - case parse_keyword_command: - top_node_set_tag(parse_keyword_command); - symbol_stack_pop_push(parse_keyword_command, symbol_plain_statement); - break; - - case parse_keyword_builtin: - top_node_set_tag(parse_keyword_builtin); - symbol_stack_pop_push(parse_keyword_builtin, symbol_plain_statement); - break; - - default: - top_node_set_tag(parse_keyword_none); - symbol_stack_pop_push(symbol_plain_statement); - break; - } - break; - - default: - token_unhandled(token, __FUNCTION__); - break; - } + top_node_set_tag(token.keyword); + symbol_stack_produce(token); } void parse_ll_t::accept_token_plain_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_plain_statement); - symbol_stack_pop_push(parse_token_type_string, symbol_arguments_or_redirections_list); + symbol_stack_produce(token); } void parse_ll_t::accept_token_argument_list(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_argument_list); - if (token.type == parse_token_type_string) - { - symbol_stack_pop_push(symbol_argument_list_nonempty); - } - else - { - symbol_stack_pop_push(); - } + symbol_stack_produce(token); } void parse_ll_t::accept_token_arguments_or_redirections_list(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_arguments_or_redirections_list); - switch (token.type) - { - case parse_token_type_string: - case parse_token_type_redirection: - symbol_stack_pop_push(symbol_argument_or_redirection, symbol_arguments_or_redirections_list); - break; - - default: - // Some other token, end of list - symbol_stack_pop_push(); - break; - } + symbol_stack_produce(token); } void parse_ll_t::accept_token_argument_or_redirection(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_argument_or_redirection); - switch (token.type) - { - case parse_token_type_string: - symbol_stack_pop_push(parse_token_type_string); - // Got an argument - break; - - case parse_token_type_redirection: - symbol_stack_pop_push(parse_token_type_redirection); - // Got a redirection - break; - - default: - token_unhandled(token, __FUNCTION__); - break; - } + symbol_stack_produce(token); } bool parse_ll_t::accept_token_string(parse_token_t token) @@ -769,7 +661,7 @@ bool parse_ll_t::accept_token_string(parse_token_t token) { case parse_token_type_string: // Got our string - symbol_stack_pop_push(); + symbol_stack_pop(); result = true; break; @@ -841,7 +733,7 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) break; case symbol_job: - accept_token_job(token); + symbol_stack_pop_push2(); break; case symbol_job_continuation: @@ -853,11 +745,11 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) break; case symbol_if_statement: - symbol_stack_pop_push(symbol_if_clause, symbol_else_clause, parse_keyword_end); + symbol_stack_produce(token); break; case symbol_if_clause: - symbol_stack_pop_push(parse_keyword_if, symbol_job, parse_token_type_end, symbol_job_list); + symbol_stack_produce(token); break; case symbol_else_clause: @@ -869,39 +761,39 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) break; case symbol_block_statement: - symbol_stack_pop_push(symbol_block_header, parse_token_type_end, symbol_job_list, parse_keyword_end, symbol_arguments_or_redirections_list); + symbol_stack_produce(token); break; case symbol_block_header: - accept_token_block_header(token); + symbol_stack_produce(token); break; case symbol_for_header: - symbol_stack_pop_push(parse_keyword_for, parse_token_type_string, parse_keyword_in, symbol_arguments_or_redirections_list, parse_token_type_end); + symbol_stack_produce(token); break; case symbol_while_header: - symbol_stack_pop_push(parse_keyword_while, symbol_statement); + symbol_stack_produce(token); break; case symbol_begin_header: - symbol_stack_pop_push(parse_keyword_begin); + symbol_stack_produce(token); break; case symbol_function_header: - symbol_stack_pop_push(parse_keyword_function, parse_token_type_string, symbol_argument_list); + symbol_stack_produce(token); break; case symbol_switch_statement: - symbol_stack_pop_push(parse_keyword_switch, parse_token_type_string, parse_token_type_end, symbol_case_item_list, parse_keyword_end); + symbol_stack_produce(token); break; case symbol_case_item_list: - accept_token_case_item_list(token); + symbol_stack_produce(token); break; case symbol_case_item: - symbol_stack_pop_push(parse_keyword_case, symbol_argument_list, parse_token_type_end, symbol_job_list); + symbol_stack_produce(token); break; case symbol_boolean_statement: @@ -917,7 +809,7 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) break; case symbol_argument_list_nonempty: - symbol_stack_pop_push(parse_token_type_string, symbol_argument_list); + symbol_stack_produce(token); break; case symbol_argument_list: diff --git a/parse_tree.h b/parse_tree.h index 9e3f087e0..39e370af6 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -159,12 +159,27 @@ namespace parse_symbols #define SYMBOL(x) static inline parse_token_type_t get_token() { return x; } - /* Placeholder */ - struct none + #define PRODUCE(X) static int production(parse_token_type_t tok, parse_keyword_t key) { return X; } + + #define NO_PRODUCTION (-1) + + + template + struct Token { - SYMBOL(token_type_invalid); + SYMBOL(WHICH); + + typedef Token t0; + typedef Token t1; + typedef Token t2; + typedef Token t3; + typedef Token t4; + typedef Token t5; }; + /* Placeholder */ + typedef Token none; + struct EMPTY { typedef none t0; @@ -197,12 +212,6 @@ namespace parse_symbols typedef P5 p5; }; - template - struct Token - { - SYMBOL(WHICH); - }; - template struct Keyword { @@ -220,8 +229,6 @@ namespace parse_symbols struct else_continuation; struct switch_statement; struct decorated_statement; - struct else_clause; - struct else_continuation; struct switch_statement; struct case_item_list; struct case_item; @@ -280,11 +287,13 @@ namespace parse_symbols struct if_statement : Seq > { SYMBOL(symbol_if_statement); + PRODUCE(0) }; struct if_clause : Seq, job, statement_terminator, job_list> { SYMBOL(symbol_if_clause); + PRODUCE(0) }; struct else_clause : OR< @@ -293,6 +302,15 @@ namespace parse_symbols > { SYMBOL(symbol_else_clause); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (key) + { + case parse_keyword_else: return 1; + default: return 0; + } + } }; struct else_continuation : OR< @@ -301,6 +319,15 @@ namespace parse_symbols > { SYMBOL(symbol_else_continuation); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (key) + { + case parse_keyword_if: return 0; + default: return 1; + } + } }; struct switch_statement : Seq, Token, statement_terminator, case_item_list, Keyword @@ -316,6 +343,15 @@ namespace parse_symbols > { SYMBOL(symbol_case_item_list); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (key) + { + case parse_keyword_case: return 1; + default: return 0; + } + } }; struct case_item : Seq, argument_list, statement_terminator, job_list> @@ -331,11 +367,20 @@ namespace parse_symbols struct argument_list : OR { SYMBOL(symbol_argument_list); + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (tok) + { + case parse_token_type_string: return 1; + default: return 0; + } + } }; struct block_statement : Seq, arguments_or_redirections_list> { SYMBOL(symbol_block_statement); + PRODUCE(0) }; struct block_header : OR @@ -371,6 +416,17 @@ namespace parse_symbols > { SYMBOL(symbol_boolean_statement); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (key) + { + case parse_keyword_and: return 0; + case parse_keyword_or: return 1; + case parse_keyword_not: return 2; + default: return NO_PRODUCTION; + } + } }; /* A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" */ @@ -381,11 +437,27 @@ namespace parse_symbols > { SYMBOL(symbol_decorated_statement); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (key) + { + case parse_keyword_command: return 0; + case parse_keyword_builtin: return 1; + default: return 2; + } + } }; struct plain_statement : Seq, arguments_or_redirections_list> { SYMBOL(symbol_plain_statement); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + return 0; + } + }; struct arguments_or_redirections_list : OR< @@ -393,6 +465,18 @@ namespace parse_symbols Seq > { SYMBOL(symbol_arguments_or_redirections_list); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (tok) + { + case parse_token_type_string: + case parse_token_type_redirection: + return 1; + default: + return 0; + } + } }; struct argument_or_redirection : OR< @@ -401,6 +485,16 @@ namespace parse_symbols > { SYMBOL(symbol_argument_or_redirection); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (tok) + { + case parse_token_type_string: return 0; + case parse_token_type_redirection: return 1; + default: return NO_PRODUCTION; + } + } }; struct redirection : Token From 3e3eefc2dcb2e0e31b224703a063e05dc8c67996 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Mon, 22 Jul 2013 18:26:15 -0700 Subject: [PATCH 017/177] Improvements to new parser. All functions and completions now parse. --- builtin.cpp | 61 ++-- builtin_printf.cpp | 2 +- exec.cpp | 16 +- fish.xcodeproj/project.pbxproj | 2 + fish_tests.cpp | 6 +- parse_exec.cpp | 170 ++++----- parse_exec.h | 49 +-- parse_tree.cpp | 628 +++++++++++++++------------------ parse_tree.h | 414 ++-------------------- parse_tree_construction.h | 586 ++++++++++++++++++++++++++++++ parse_util.cpp | 10 +- tokenizer.cpp | 10 +- tokenizer.h | 5 +- 13 files changed, 1080 insertions(+), 879 deletions(-) create mode 100644 parse_tree_construction.h diff --git a/builtin.cpp b/builtin.cpp index 3b40be3c4..d2a80a8c4 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -3946,53 +3946,53 @@ static int builtin_history(parser_t &parser, wchar_t **argv) struct parse_execution_simulator_t : public parse_execution_visitor_t { wcstring_list_t result; - + wcstring &back() { assert(! result.empty()); return result.back(); } - + void append_src(node_offset_t idx) { wcstring tmp; context->get_source(idx, &tmp); back().append(tmp); } - + void append(const wchar_t *s) { back().append(s); } - + bool enter_job_list(void) { return true; } - + bool enter_job(void) { result.resize(result.size() + 1); return true; } - + void visit_statement(void) { } - + virtual void visit_boolean_statement(void) { } - + virtual void enter_if_clause(const exec_if_clause_t &statement) { } - + virtual void exit_if_clause(const exec_if_clause_t &statement) { append_format(back(), L"\nIF successful jump to %lu", (unsigned long)statement.body); } - + void visit_basic_statement(const exec_basic_statement_t &statement) { wcstring &line = this->back(); @@ -4005,34 +4005,35 @@ struct parse_execution_simulator_t : public parse_execution_visitor_t case exec_basic_statement_t::decoration_builtin: line.append(L" "); break; - + case exec_basic_statement_t::decoration_command: line.append(L" "); break; - + default: - break; + break; } - + line.append(L"cmd:"); this->append_src(statement.command_idx); for (size_t i=0; i < statement.arguments().size(); i++) { const exec_argument_t &arg = statement.arguments().at(i); append(L" "); - append(L"arg:"); + append(L"arg:"); append_src(arg.parse_node_idx); } } - - void visit_function(const exec_function_header_t &function) { + + void visit_function(const exec_function_header_t &function) + { wcstring &line = this->back(); line.append(L"define function: "); wcstring tmp; context->get_source(function.name_idx, &tmp); line.append(tmp); } - + void exit_job_list(void) { } @@ -4074,17 +4075,19 @@ int builtin_parse(parser_t &parser, wchar_t **argv) } else { - parse_execution_context_t ctx(parse_tree, src); - parse_execution_simulator_t sim; - sim.context = &ctx; - while (ctx.visit_next_node(&sim)) - { - } - stdout_buffer.append(L"Simulating execution:\n"); - for (size_t i=0; i < sim.result.size(); i++) - { - stdout_buffer.append(sim.result.at(i)); - stdout_buffer.push_back(L'\n'); + if (0) { + parse_execution_context_t ctx(parse_tree, src); + parse_execution_simulator_t sim; + sim.context = &ctx; + while (ctx.visit_next_node(&sim)) + { + } + stdout_buffer.append(L"Simulating execution:\n"); + for (size_t i=0; i < sim.result.size(); i++) + { + stdout_buffer.append(sim.result.at(i)); + stdout_buffer.push_back(L'\n'); + } } } } diff --git a/builtin_printf.cpp b/builtin_printf.cpp index efe4a2118..b7df7fa82 100644 --- a/builtin_printf.cpp +++ b/builtin_printf.cpp @@ -632,7 +632,7 @@ int builtin_printf_state_t::print_formatted(const wchar_t *format, int argc, wch } break; } - + modify_allowed_format_specifiers(ok, "aAcdeEfFgGiosuxX", true); for (;; f++, direc_length++) diff --git a/exec.cpp b/exec.cpp index d5c7d4bf8..5f4676631 100644 --- a/exec.cpp +++ b/exec.cpp @@ -537,7 +537,7 @@ static bool can_use_posix_spawn_for_job(const job_t *job, const process_t *proce return false; } } - + /* Now see if we have a redirection involving a file. The only one we allow is /dev/null, which we assume will not fail. */ bool result = true; for (size_t idx = 0; idx < job->io.size(); idx++) @@ -545,8 +545,8 @@ static bool can_use_posix_spawn_for_job(const job_t *job, const process_t *proce const shared_ptr &io = job->io.at(idx); if (redirection_is_to_real_file(io.get())) { - result = false; - break; + result = false; + break; } } return result; @@ -1192,16 +1192,16 @@ void exec(parser_t &parser, job_t *j) forking is expensive, fish tries to avoid it when possible. */ - + bool fork_was_skipped = false; - + const shared_ptr stdout_io = io_chain_get(j->io, STDOUT_FILENO); const shared_ptr stderr_io = io_chain_get(j->io, STDERR_FILENO); - + /* If we are outputting to a file, we have to actually do it, even if we have no output, so that we can truncate the file. Does not apply to /dev/null. */ bool must_fork = redirection_is_to_real_file(stdout_io.get()) || redirection_is_to_real_file(stderr_io.get()); if (! must_fork) - { + { if (p->next == NULL) { const bool stdout_is_to_buffer = stdout_io && stdout_io->io_mode == IO_BUFFER; @@ -1250,7 +1250,7 @@ void exec(parser_t &parser, job_t *j) } } } - + if (fork_was_skipped) { diff --git a/fish.xcodeproj/project.pbxproj b/fish.xcodeproj/project.pbxproj index 93293e50c..5ae10bfc4 100644 --- a/fish.xcodeproj/project.pbxproj +++ b/fish.xcodeproj/project.pbxproj @@ -465,6 +465,7 @@ D0D2693C159835CA005D9B9C /* fish */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = fish; sourceTree = BUILT_PRODUCTS_DIR; }; D0F3373A1506DE3C00ECEFC0 /* builtin_test.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = builtin_test.cpp; sourceTree = ""; }; D0F5E28415A7A32D00315DFF /* config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = config.h; sourceTree = ""; }; + D0FE8EE6179CA8A5008C9F21 /* parse_tree_construction.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_tree_construction.h; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -589,6 +590,7 @@ D0A0850C13B3ACEE0099B651 /* expand.h */, D0A0853D13B3ACEE0099B651 /* expand.cpp */, D0C52F361765284C00BFAB82 /* parse_tree.h */, + D0FE8EE6179CA8A5008C9F21 /* parse_tree_construction.h */, D0C52F351765284C00BFAB82 /* parse_tree.cpp */, D0C52F341765281F00BFAB82 /* parse_exec.h */, D0C52F331765281F00BFAB82 /* parse_exec.cpp */, diff --git a/fish_tests.cpp b/fish_tests.cpp index 739b47b02..6ebd3d220 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -534,7 +534,7 @@ static void test_utils() { say(L"Testing utils"); const wchar_t *a = L"echo (echo (echo hi"; - + const wchar_t *begin = NULL, *end = NULL; parse_util_cmdsubst_extent(a, 0, &begin, &end); if (begin != a || end != begin + wcslen(begin)) err(L"parse_util_cmdsubst_extent failed on line %ld", (long)__LINE__); @@ -544,7 +544,7 @@ static void test_utils() if (begin != a || end != begin + wcslen(begin)) err(L"parse_util_cmdsubst_extent failed on line %ld", (long)__LINE__); parse_util_cmdsubst_extent(a, 3, &begin, &end); if (begin != a || end != begin + wcslen(begin)) err(L"parse_util_cmdsubst_extent failed on line %ld", (long)__LINE__); - + parse_util_cmdsubst_extent(a, 8, &begin, &end); if (begin != a + wcslen(L"echo (")) err(L"parse_util_cmdsubst_extent failed on line %ld", (long)__LINE__); @@ -1842,7 +1842,7 @@ int main(int argc, char **argv) builtin_init(); reader_init(); env_init(); - + test_new_parser(); return 0; diff --git a/parse_exec.cpp b/parse_exec.cpp index 0028d530c..3f2074f4a 100644 --- a/parse_exec.cpp +++ b/parse_exec.cpp @@ -6,11 +6,11 @@ struct exec_node_t node_offset_t parse_node_idx; node_offset_t body_parse_node_idx; bool visited; - + explicit exec_node_t(node_offset_t pni) : parse_node_idx(pni), body_parse_node_idx(NODE_OFFSET_INVALID), visited(false) { } - + explicit exec_node_t(node_offset_t pni, node_offset_t body_pni) : parse_node_idx(pni), body_parse_node_idx(body_pni), visited(false) { } @@ -18,7 +18,7 @@ struct exec_node_t exec_basic_statement_t::exec_basic_statement_t() : command_idx(0), decoration(decoration_plain) { - + } @@ -26,21 +26,21 @@ class parse_exec_t { parse_node_tree_t parse_tree; wcstring src; - + /* The stack of nodes as we execute them */ std::vector exec_nodes; - + /* The stack of commands being built */ std::vector assembling_statements; - + /* Current visitor (very transient) */ struct parse_execution_visitor_t * visitor; - + const parse_node_t &get_child(const parse_node_t &parent, node_offset_t which) const { return parse_tree.at(parent.child_offset(which)); } - + void pop_push_specific(node_offset_t idx1, node_offset_t idx2 = NODE_OFFSET_INVALID, node_offset_t idx3 = NODE_OFFSET_INVALID, node_offset_t idx4 = NODE_OFFSET_INVALID, node_offset_t idx5 = NODE_OFFSET_INVALID) { PARSE_ASSERT(! exec_nodes.empty()); @@ -48,10 +48,10 @@ class parse_exec_t exec_node_t &top = exec_nodes.back(); const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); node_offset_t child_node_idx = parse_node.child_start; - + // Remove the top node exec_nodes.pop_back(); - + // Append the given children, backwards const node_offset_t idxs[] = {idx5, idx4, idx3, idx2, idx1}; for (size_t q=0; q < sizeof idxs / sizeof *idxs; q++) @@ -65,18 +65,18 @@ class parse_exec_t } } - + void push(node_offset_t global_idx) { exec_nodes.push_back(exec_node_t(global_idx)); } - + void push(const exec_node_t &node) { exec_nodes.push_back(node); } - + void pop_push(node_offset_t child_idx, node_offset_t child_count = 1) { PARSE_ASSERT(! exec_nodes.empty()); @@ -92,10 +92,10 @@ class parse_exec_t const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); PARSE_ASSERT(child_idx < parse_node.child_count); node_offset_t child_node_idx = parse_node.child_start + child_idx; - + // Remove the top node exec_nodes.pop_back(); - + // Append the given children, backwards node_offset_t cursor = child_count; while (cursor--) @@ -104,20 +104,20 @@ class parse_exec_t } } } - + void pop() { PARSE_ASSERT(! exec_nodes.empty()); exec_nodes.pop_back(); } - + void pop_push_all() { exec_node_t &top = exec_nodes.back(); const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); pop_push(0, parse_node.child_count); } - + void assemble_1_argument_or_redirection(node_offset_t idx, exec_arguments_and_redirections_t *output) const { const parse_node_t &node = parse_tree.at(idx); @@ -130,28 +130,28 @@ class parse_exec_t { case parse_token_type_string: // Argument - { - exec_argument_t arg = exec_argument_t(); - arg.parse_node_idx = child_idx; - output->arguments.push_back(arg); - } - break; - + { + exec_argument_t arg = exec_argument_t(); + arg.parse_node_idx = child_idx; + output->arguments.push_back(arg); + } + break; + case parse_token_type_redirection: // Redirection - { - exec_redirection_t redirect = exec_redirection_t(); - redirect.parse_node_idx = child_idx; - output->redirections.push_back(redirect); - } - break; - + { + exec_redirection_t redirect = exec_redirection_t(); + redirect.parse_node_idx = child_idx; + output->redirections.push_back(redirect); + } + break; + default: PARSER_DIE(); break; } } - + void assemble_arguments_and_redirections(node_offset_t start_idx, exec_arguments_and_redirections_t *output) const { node_offset_t idx = start_idx; @@ -173,7 +173,7 @@ class parse_exec_t } } } - + void assemble_command_for_plain_statement(node_offset_t idx, parse_keyword_t decoration) { const parse_node_t &node = parse_tree.at(idx); @@ -185,10 +185,10 @@ class parse_exec_t assemble_arguments_and_redirections(node.child_offset(1), &statement.arguments_and_redirections); visitor->visit_basic_statement(statement); } - + void assemble_block_statement(node_offset_t parse_node_idx) { - + const parse_node_t &node = parse_tree.at(parse_node_idx); PARSE_ASSERT(node.type == symbol_block_statement); PARSE_ASSERT(node.child_count == 5); @@ -196,24 +196,24 @@ class parse_exec_t // Fetch arguments and redirections. These ought to be evaluated before the job list exec_block_statement_t statement; assemble_arguments_and_redirections(node.child_offset(4), &statement.arguments_and_redirections); - + // Generic visit visitor->enter_block_statement(statement); - + // Dig into the header to discover the type const parse_node_t &header_parent = parse_tree.at(node.child_offset(0)); PARSE_ASSERT(header_parent.type == symbol_block_header); - PARSE_ASSERT(header_parent.child_count == 1); + PARSE_ASSERT(header_parent.child_count == 1); const node_offset_t header_idx = header_parent.child_offset(0); - + // Fetch body (job list) node_offset_t body_idx = node.child_offset(2); PARSE_ASSERT(parse_tree.at(body_idx).type == symbol_job_list); - + pop(); push(exec_node_t(header_idx, body_idx)); } - + /* which: 0 -> if, 1 -> else if, 2 -> else */ void assemble_if_else_clause(exec_node_t &exec_node, const parse_node_t &node, int which) { @@ -227,7 +227,7 @@ class parse_exec_t PARSE_ASSERT(node.type == symbol_else_continuation); PARSE_ASSERT(node.child_count == 2); } - + struct exec_if_clause_t clause; if (which == 0) { @@ -252,7 +252,7 @@ class parse_exec_t pop(); } } - + void assemble_arguments(node_offset_t start_idx, exec_argument_list_t *output) const { node_offset_t idx = start_idx; @@ -282,24 +282,24 @@ class parse_exec_t } } } - + void assemble_1_case_item(exec_switch_statement_t *statement, node_offset_t node_idx) { const parse_node_t &node = parse_tree.at(node_idx); PARSE_ASSERT(node.type == symbol_case_item); - + // add a new case size_t len = statement->cases.size(); statement->cases.resize(len + 1); exec_switch_case_t &new_case = statement->cases.back(); - + // assemble it new_case.body = node.child_offset(3); assemble_arguments(node.child_offset(1), &new_case.arguments); - - + + } - + void assemble_case_item_list(exec_switch_statement_t *statement, node_offset_t node_idx) { const parse_node_t &node = parse_tree.at(node_idx); @@ -311,21 +311,21 @@ class parse_exec_t assemble_case_item_list(statement, node.child_offset(1)); } } - + void assemble_switch_statement(const exec_node_t &exec_node, const parse_node_t &parse_node) { PARSE_ASSERT(parse_node.type == symbol_switch_statement); exec_switch_statement_t statement; - + statement.argument.parse_node_idx = parse_node.child_offset(1); assemble_case_item_list(&statement, parse_node.child_offset(3)); - + visitor->visit_switch_statement(statement); - + // pop off the switch pop(); } - + void assemble_function_header(const exec_node_t &exec_node, const parse_node_t &header) { PARSE_ASSERT(header.type == symbol_function_header); @@ -336,17 +336,17 @@ class parse_exec_t function_info.body_idx = exec_node.body_parse_node_idx; assemble_arguments(header.child_offset(2), &function_info.arguments); visitor->visit_function(function_info); - + // Always pop pop(); } - + void enter_parse_node(size_t idx); void run_top_node(void); - - public: - + +public: + void get_node_string(node_offset_t idx, wcstring *output) const { const parse_node_t &node = parse_tree.at(idx); @@ -354,9 +354,9 @@ class parse_exec_t PARSE_ASSERT(node.source_start + node.source_length <= src.size()); output->assign(src, node.source_start, node.source_length); } - + bool visit_next_node(parse_execution_visitor_t *v); - + parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s), visitor(NULL) { if (! parse_tree.empty()) @@ -373,7 +373,7 @@ void parse_exec_t::run_top_node() const node_offset_t parse_node_idx = exec_node.parse_node_idx; const parse_node_t &parse_node = parse_tree.at(exec_node.parse_node_idx); bool log = true; - + if (log) { wcstring tmp; @@ -381,7 +381,7 @@ void parse_exec_t::run_top_node() tmp.append(parse_node.describe()); printf("%ls\n", tmp.c_str()); } - + switch (parse_node.type) { case symbol_job_list: @@ -404,7 +404,7 @@ void parse_exec_t::run_top_node() pop_push(0, 2); } break; - + case symbol_job: { PARSE_ASSERT(parse_node.child_count == 2); @@ -412,7 +412,7 @@ void parse_exec_t::run_top_node() pop_push_all(); break; } - + case symbol_job_continuation: PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 3); if (parse_node.child_count == 0) @@ -426,51 +426,51 @@ void parse_exec_t::run_top_node() // Skip the pipe pop_push(1, 2); } - break; - + break; + case symbol_statement: { PARSE_ASSERT(parse_node.child_count == 1); pop_push_all(); break; } - + case symbol_block_statement: { PARSE_ASSERT(parse_node.child_count == 5); assemble_block_statement(parse_node_idx); break; } - + case symbol_block_header: { PARSE_ASSERT(parse_node.child_count == 1); pop_push_all(); break; } - + case symbol_function_header: { PARSE_ASSERT(parse_node.child_count == 3); assemble_function_header(exec_node, parse_node); break; } - + case symbol_if_statement: { - PARSE_ASSERT(parse_node.child_count == 3); + PARSE_ASSERT(parse_node.child_count == 4); pop_push(0, 2); break; } - + case symbol_if_clause: { PARSE_ASSERT(parse_node.child_count == 4); assemble_if_else_clause(exec_node, parse_node, 0); pop(); - break; + break; } - + case symbol_else_clause: { PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); @@ -486,7 +486,7 @@ void parse_exec_t::run_top_node() } break; } - + case symbol_else_continuation: { // Figure out if this is an else if or a terminating else @@ -505,17 +505,17 @@ void parse_exec_t::run_top_node() } break; } - + case symbol_switch_statement: { assemble_switch_statement(exec_node, parse_node); break; } - + case symbol_decorated_statement: { PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2); - + node_offset_t plain_statement_idx = parse_node.child_offset(parse_node.child_count - 1); parse_keyword_t decoration = static_cast(parse_node.tag); assemble_command_for_plain_statement(plain_statement_idx, decoration); @@ -528,20 +528,20 @@ void parse_exec_t::run_top_node() case symbol_plain_statement: case symbol_arguments_or_redirections_list: case symbol_argument_or_redirection: - fprintf(stderr, "Unhandled token type %ls at index %ld\n", token_type_description(parse_node.type).c_str(), exec_node.parse_node_idx); + fprintf(stderr, "Unexpected token type %ls at index %ld. This should have been handled by the parent.\n", token_type_description(parse_node.type).c_str(), exec_node.parse_node_idx); PARSER_DIE(); break; - + case parse_token_type_end: PARSE_ASSERT(parse_node.child_count == 0); pop(); break; - + default: fprintf(stderr, "Unhandled token type %ls at index %ld\n", token_type_description(parse_node.type).c_str(), exec_node.parse_node_idx); PARSER_DIE(); break; - + } } @@ -553,7 +553,7 @@ bool parse_exec_t::visit_next_node(parse_execution_visitor_t *v) { return false; } - + visitor = v; run_top_node(); visitor = NULL; diff --git a/parse_exec.h b/parse_exec.h index 197f656d3..1eea99ab1 100644 --- a/parse_exec.h +++ b/parse_exec.h @@ -13,13 +13,13 @@ class parse_exec_t; class parse_execution_context_t { parse_exec_t *ctx; //owned - - public: + +public: parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s); ~parse_execution_context_t(); - + bool visit_next_node(parse_execution_visitor_t *visitor); - + // Gets the source for a node at a given index void get_source(node_offset_t idx, wcstring *result) const; }; @@ -53,10 +53,10 @@ struct exec_basic_statement_t { // Node containing the command node_offset_t command_idx; - + // Arguments exec_arguments_and_redirections_t arguments_and_redirections; - + // Decoration enum { @@ -64,9 +64,9 @@ struct exec_basic_statement_t decoration_command, decoration_builtin } decoration; - + exec_basic_statement_t(); - + void set_decoration(uint32_t k) { PARSE_ASSERT(k == parse_keyword_none || k == parse_keyword_command || k == parse_keyword_builtin); @@ -86,12 +86,12 @@ struct exec_basic_statement_t break; } } - + const exec_argument_list_t &arguments() const { return arguments_and_redirections.arguments; } - + const exec_redirection_list_t &redirections() const { return arguments_and_redirections.redirections; @@ -102,10 +102,10 @@ struct exec_function_header_t { // Node containing the function name node_offset_t name_idx; - + // Node containing the function body node_offset_t body_idx; - + // Arguments exec_argument_list_t arguments; }; @@ -138,23 +138,32 @@ struct parse_execution_visitor_t { node_offset_t node_idx; parse_execution_context_t *context; - + parse_execution_visitor_t() : node_idx(0), context(NULL) { } - - virtual bool enter_job_list(void) { return true; } - virtual bool enter_job(void) { return true; } + + virtual bool enter_job_list(void) + { + return true; + } + virtual bool enter_job(void) + { + return true; + } virtual void visit_statement(void) { } virtual void visit_function(const exec_function_header_t &function) { } - virtual bool enter_block_statement(const exec_block_statement_t &statement) { return true; } - + virtual bool enter_block_statement(const exec_block_statement_t &statement) + { + return true; + } + virtual void enter_if_clause(const exec_if_clause_t &statement) { } virtual void exit_if_clause(const exec_if_clause_t &statement) { } - + virtual void visit_switch_statement(const exec_switch_statement_t &header) { } - + virtual void visit_boolean_statement(void) { } virtual void visit_basic_statement(const exec_basic_statement_t &statement) { } virtual void exit_job(void) { } diff --git a/parse_tree.cpp b/parse_tree.cpp index 24bf41e7c..aea3e729f 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -1,4 +1,4 @@ -#include "parse_tree.h" +#include "parse_tree_construction.h" #include "tokenizer.h" #include @@ -11,26 +11,28 @@ wcstring parse_error_t::describe(const wcstring &src) const { // Locate the beginning of this line of source size_t line_start = 0; - + // Look for a newline prior to source_start. If we don't find one, start at the beginning of the string; otherwise start one past the newline size_t newline = src.find_last_of(L'\n', source_start); + fprintf(stderr, "newline: %lu, source_start %lu, source_length %lu\n", newline, source_start, source_length); if (newline != wcstring::npos) { - line_start = newline + 1; + line_start = newline;// + 1; } - + size_t line_end = src.find(L'\n', source_start + source_length); if (line_end == wcstring::npos) { line_end = src.size(); } assert(line_end >= line_start); + fprintf(stderr, "source start: %lu, line start %lu\n", source_start, line_start); assert(source_start >= line_start); - + // Append the line of text result.push_back(L'\n'); result.append(src, line_start, line_end - line_start); - + // Append the caret line result.push_back(L'\n'); result.append(source_start - line_start, L' '); @@ -43,68 +45,115 @@ wcstring token_type_description(parse_token_type_t type) { switch (type) { - case token_type_invalid: return L"invalid"; - - case symbol_job_list: return L"job_list"; - case symbol_job: return L"job"; - case symbol_job_continuation: return L"job_continuation"; - - case symbol_statement: return L"statement"; - case symbol_block_statement: return L"block_statement"; - case symbol_block_header: return L"block_header"; - case symbol_for_header: return L"for_header"; - case symbol_while_header: return L"while_header"; - case symbol_begin_header: return L"begin_header"; - case symbol_function_header: return L"function_header"; - - case symbol_if_statement: return L"if_statement"; - case symbol_if_clause: return L"if_clause"; - case symbol_else_clause: return L"else_clause"; - case symbol_else_continuation: return L"else_continuation"; - - case symbol_switch_statement: return L"switch_statement"; - case symbol_case_item_list: return L"case_item_list"; - case symbol_case_item: return L"case_item"; - - case symbol_argument_list_nonempty: return L"argument_list_nonempty"; - case symbol_argument_list: return L"argument_list"; - - case symbol_boolean_statement: return L"boolean_statement"; - case symbol_decorated_statement: return L"decorated_statement"; - case symbol_plain_statement: return L"plain_statement"; - case symbol_arguments_or_redirections_list: return L"arguments_or_redirections_list"; - case symbol_argument_or_redirection: return L"argument_or_redirection"; - - case parse_token_type_string: return L"token_string"; - case parse_token_type_pipe: return L"token_pipe"; - case parse_token_type_redirection: return L"token_redirection"; - case parse_token_background: return L"token_background"; - case parse_token_type_end: return L"token_end"; - case parse_token_type_terminate: return L"token_terminate"; - - default: return format_string(L"Unknown token type %ld", static_cast(type)); + case token_type_invalid: + return L"invalid"; + + case symbol_job_list: + return L"job_list"; + case symbol_job: + return L"job"; + case symbol_job_continuation: + return L"job_continuation"; + + case symbol_statement: + return L"statement"; + case symbol_block_statement: + return L"block_statement"; + case symbol_block_header: + return L"block_header"; + case symbol_for_header: + return L"for_header"; + case symbol_while_header: + return L"while_header"; + case symbol_begin_header: + return L"begin_header"; + case symbol_function_header: + return L"function_header"; + + case symbol_if_statement: + return L"if_statement"; + case symbol_if_clause: + return L"if_clause"; + case symbol_else_clause: + return L"else_clause"; + case symbol_else_continuation: + return L"else_continuation"; + + case symbol_switch_statement: + return L"switch_statement"; + case symbol_case_item_list: + return L"case_item_list"; + case symbol_case_item: + return L"case_item"; + + case symbol_argument_list_nonempty: + return L"argument_list_nonempty"; + case symbol_argument_list: + return L"argument_list"; + + case symbol_boolean_statement: + return L"boolean_statement"; + case symbol_decorated_statement: + return L"decorated_statement"; + case symbol_plain_statement: + return L"plain_statement"; + case symbol_arguments_or_redirections_list: + return L"arguments_or_redirections_list"; + case symbol_argument_or_redirection: + return L"argument_or_redirection"; + + case parse_token_type_string: + return L"token_string"; + case parse_token_type_pipe: + return L"token_pipe"; + case parse_token_type_redirection: + return L"token_redirection"; + case parse_token_type_background: + return L"token_background"; + case parse_token_type_end: + return L"token_end"; + case parse_token_type_terminate: + return L"token_terminate"; + case symbol_optional_background: + return L"optional_background"; } + return format_string(L"Unknown token type %ld", static_cast(type)); } wcstring keyword_description(parse_keyword_t k) { switch (k) { - case parse_keyword_none: return L"none"; - case parse_keyword_if: return L"if"; - case parse_keyword_else: return L"else"; - case parse_keyword_for: return L"for"; - case parse_keyword_in: return L"in"; - case parse_keyword_while: return L"while"; - case parse_keyword_begin: return L"begin"; - case parse_keyword_function: return L"function"; - case parse_keyword_switch: return L"switch"; - case parse_keyword_end: return L"end"; - case parse_keyword_and: return L"and"; - case parse_keyword_or: return L"or"; - case parse_keyword_not: return L"not"; - case parse_keyword_command: return L"command"; - case parse_keyword_builtin: return L"builtin"; + case parse_keyword_none: + return L"none"; + case parse_keyword_if: + return L"if"; + case parse_keyword_else: + return L"else"; + case parse_keyword_for: + return L"for"; + case parse_keyword_in: + return L"in"; + case parse_keyword_while: + return L"while"; + case parse_keyword_begin: + return L"begin"; + case parse_keyword_function: + return L"function"; + case parse_keyword_switch: + return L"switch"; + case parse_keyword_end: + return L"end"; + case parse_keyword_and: + return L"and"; + case parse_keyword_or: + return L"or"; + case parse_keyword_not: + return L"not"; + case parse_keyword_command: + return L"command"; + case parse_keyword_builtin: + return L"builtin"; default: return format_string(L"Unknown keyword type %ld", static_cast(k)); } @@ -123,7 +172,7 @@ struct parse_token_t enum parse_keyword_t keyword; // Any keyword represented by this parser size_t source_start; size_t source_length; - + wcstring describe() const; }; @@ -147,19 +196,28 @@ static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_ case TOK_STRING: result.type = parse_token_type_string; break; - + case TOK_PIPE: result.type = parse_token_type_pipe; break; - + case TOK_END: result.type = parse_token_type_end; break; - + case TOK_BACKGROUND: - result.type = parse_token_background; + result.type = parse_token_type_background; break; + case TOK_REDIRECT_OUT: + case TOK_REDIRECT_APPEND: + case TOK_REDIRECT_IN: + case TOK_REDIRECT_FD: + case TOK_REDIRECT_NOCLOB: + result.type = parse_token_type_redirection; + break; + + default: fprintf(stderr, "Bad token type %d passed to %s\n", (int)tokenizer_token_type, __FUNCTION__); assert(0); @@ -172,15 +230,15 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring & { assert(start < nodes.size()); const parse_node_t &node = nodes.at(start); - + const size_t spacesPerIndent = 2; - + // unindent statement lists by 1 to flatten them if (node.type == symbol_job_list || node.type == symbol_arguments_or_redirections_list) { if (indent > 0) indent -= 1; } - + append_format(*result, L"%2lu - %l2u ", *line, start); result->append(indent * spacesPerIndent, L' ');; result->append(node.describe()); @@ -206,7 +264,7 @@ static wcstring dump_tree(const parse_node_tree_t &nodes, const wcstring &src) { if (nodes.empty()) return L"(empty!)"; - + size_t line = 0; wcstring result; dump_tree_recursive(nodes, src, 0, 0, &result, &line); @@ -218,15 +276,15 @@ struct parse_stack_element_t enum parse_token_type_t type; enum parse_keyword_t keyword; node_offset_t node_idx; - + parse_stack_element_t(parse_token_type_t t) : type(t), keyword(parse_keyword_none), node_idx(-1) { } - + parse_stack_element_t(parse_keyword_t k) : type(parse_token_type_string), keyword(k), node_idx(-1) { } - + wcstring describe(void) const { wcstring result = token_type_description(type); @@ -242,13 +300,13 @@ struct parse_stack_element_t class parse_ll_t { friend class parse_t; - + std::vector symbol_stack; // LL parser stack parse_node_tree_t nodes; - + bool fatal_errored; parse_error_list_t errors; - + // Constructor parse_ll_t() : fatal_errored(false) { @@ -258,33 +316,30 @@ class parse_ll_t symbol_stack.push_back(elem); // goal token nodes.push_back(parse_node_t(symbol_job_list)); } - + bool top_node_match_token(parse_token_t token); - + // implementation of certain parser constructions void accept_token(parse_token_t token, const wcstring &src); void accept_token_job_list(parse_token_t token); void accept_token_job(parse_token_t token); void accept_token_job_continuation(parse_token_t token); - void accept_token_statement(parse_token_t token); - void accept_token_block_header(parse_token_t token); void accept_token_else_clause(parse_token_t token); void accept_token_else_continuation(parse_token_t token); - void accept_token_boolean_statement(parse_token_t token); - void accept_token_decorated_statement(parse_token_t token); void accept_token_plain_statement(parse_token_t token); void accept_token_argument_list(parse_token_t token); void accept_token_arguments_or_redirections_list(parse_token_t token); void accept_token_argument_or_redirection(parse_token_t token); bool accept_token_string(parse_token_t token); - + void token_unhandled(parse_token_t token, const char *function); - + void parse_error(const wchar_t *expected, parse_token_t token); + void parse_error(parse_token_t token, const wchar_t *format, ...); void append_error_callout(wcstring &error_message, parse_token_t token); - + void dump_stack(void) const; - + // Get the node corresponding to the top element of the stack parse_node_t &node_for_top_symbol() { @@ -294,17 +349,17 @@ class parse_ll_t PARSE_ASSERT(top_symbol.node_idx < nodes.size()); return nodes.at(top_symbol.node_idx); } - + parse_token_type_t stack_top_type() const { return symbol_stack.back().type; } - + void top_node_set_tag(uint32_t tag) { this->node_for_top_symbol().tag = tag; } - + inline void add_child_to_node(size_t parent_node_idx, parse_stack_element_t *tok) { PARSE_ASSERT(tok->type != token_type_invalid); @@ -312,19 +367,19 @@ class parse_ll_t nodes.push_back(parse_node_t(tok->type)); nodes.at(parent_node_idx).child_count += 1; } - + inline void symbol_stack_pop() { symbol_stack.pop_back(); } - + // Pop from the top of the symbol stack, then push, updating node counts. Note that these are pushed in reverse order, so the first argument will be on the top of the stack. inline void symbol_stack_pop_push_int(parse_stack_element_t tok1 = token_type_invalid, parse_stack_element_t tok2 = token_type_invalid, parse_stack_element_t tok3 = token_type_invalid, parse_stack_element_t tok4 = token_type_invalid, parse_stack_element_t tok5 = token_type_invalid) { - + // Logging? - if (1) + if (0) { fprintf(stderr, "Pop %ls (%lu)\n", token_type_description(symbol_stack.back().type).c_str(), symbol_stack.size()); if (tok5.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok5.describe().c_str()); @@ -333,17 +388,17 @@ class parse_ll_t if (tok2.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok2.describe().c_str()); if (tok1.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok1.describe().c_str()); } - + // Get the node for the top symbol and tell it about its children size_t node_idx = symbol_stack.back().node_idx; parse_node_t &node = nodes.at(node_idx); - + // Should have no children yet PARSE_ASSERT(node.child_count == 0); - + // Tell the node where its children start node.child_start = nodes.size(); - + // Add nodes for the children // Confusingly, we want our nodes to be in forwards order (last token last, so dumps look nice), but the symbols should be reverse order (last token first, so it's lowest on the stack) if (tok1.type != token_type_invalid) add_child_to_node(node_idx, &tok1); @@ -351,7 +406,7 @@ class parse_ll_t if (tok3.type != token_type_invalid) add_child_to_node(node_idx, &tok3); if (tok4.type != token_type_invalid) add_child_to_node(node_idx, &tok4); if (tok5.type != token_type_invalid) add_child_to_node(node_idx, &tok5); - + // The above set the node_idx. Now replace the top of the stack. symbol_stack.pop_back(); if (tok5.type != token_type_invalid) symbol_stack.push_back(tok5); @@ -360,31 +415,69 @@ class parse_ll_t if (tok2.type != token_type_invalid) symbol_stack.push_back(tok2); if (tok1.type != token_type_invalid) symbol_stack.push_back(tok1); } - + template - inline void symbol_stack_pop_push2() + inline void symbol_stack_pop_push2(typename T::magic_seq_type_t x = 0) { symbol_stack_pop_push_int(T::t0::get_token(), T::t1::get_token(), T::t2::get_token(), T::t3::get_token(), T::t4::get_token()); } - + template - inline void symbol_stack_pop_push_production(int which) + inline void symbol_stack_pop_push2(typename T::magic_symbol_type_t x = 0) { + symbol_stack_pop_push_int(T::get_token()); + } + + // Singular. Sole productions are always of type Seq. + template + inline void symbol_stack_produce(parse_token_t tok, typename T::sole_production::magic_seq_type_t magic=0) + { + typedef typename T::sole_production seq; + symbol_stack_pop_push_int(seq::t0::get_token(), seq::t1::get_token(), seq::t2::get_token(), seq::t3::get_token(), seq::t4::get_token()); + } + + // Plural productions, of type Or. + template + inline void symbol_stack_produce(parse_token_t tok, typename T::productions::magic_or_type_t magic=0) + { + typedef typename T::productions ors; + int which = T::production(tok.type, tok.keyword); switch (which) { - case 0: symbol_stack_pop_push2(); break; - case 1: symbol_stack_pop_push2(); break; - case 2: symbol_stack_pop_push2(); break; - case 3: symbol_stack_pop_push2(); break; - case 4: symbol_stack_pop_push2(); break; + case 0: + symbol_stack_pop_push2(); + break; + case 1: + symbol_stack_pop_push2(); + break; + case 2: + symbol_stack_pop_push2(); + break; + case 3: + symbol_stack_pop_push2(); + break; + case 4: + symbol_stack_pop_push2(); + break; + + case NO_PRODUCTION: + parse_error(tok, L"Failed to produce with stack top '%ls' for token '%ls'\n", symbol_stack.back().describe().c_str(), tok.describe().c_str()); + break; + + default: + parse_error(tok, L"Unexpected production %d for token %ls\n", which, tok.describe().c_str()); + break; } } + // Non-sequence basic productions template - inline void symbol_stack_produce(parse_token_t tok) + inline void symbol_stack_produce(parse_token_t tok, typename T::sole_production::magic_symbol_type_t magic=0) { - symbol_stack_pop_push_production(T::production(tok.type, tok.keyword)); + symbol_stack_pop_push_int(T::sole_production::get_token()); } + + }; void parse_ll_t::dump_stack(void) const @@ -410,7 +503,7 @@ void parse_ll_t::dump_stack(void) const } } } - + fprintf(stderr, "Stack dump (%lu elements):\n", symbol_stack.size()); for (size_t idx = 0; idx < lines.size(); idx++) { @@ -422,9 +515,31 @@ void parse_ll_t::token_unhandled(parse_token_t token, const char *function) { fprintf(stderr, "Unhandled token with type %ls in function %s\n", token_type_description(token.type).c_str(), function); this->dump_stack(); - PARSER_DIE(); + parse_error_t err; + err.text = format_string(L"Unhandled token with type %ls in function %s", token_type_description(token.type).c_str(), function); + err.source_start = token.source_start; + err.source_length = token.source_length; + this->errors.push_back(err); + this->fatal_errored = true; } +void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...) +{ + this->dump_stack(); + parse_error_t err; + + va_list va; + va_start(va, fmt); + err.text = vformat_string(fmt, va); + va_end(va); + + err.source_start = token.source_start; + err.source_length = token.source_length; + this->errors.push_back(err); + this->fatal_errored = true; +} + + void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) { wcstring desc = token_type_description(token.type); @@ -436,172 +551,6 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) fatal_errored = true; } -void parse_ll_t::accept_token_job_list(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_job_list); - switch (token.type) - { - case parse_token_type_string: - // 'end' is special - switch (token.keyword) - { - case parse_keyword_end: - case parse_keyword_else: - // End this job list - symbol_stack_pop_push_production(0); - break; - - default: - // Normal string - symbol_stack_pop_push_production(1); - break; - } - break; - - case parse_token_type_pipe: - case parse_token_type_redirection: - case parse_token_background: - symbol_stack_pop_push_production(1); - break; - - case parse_token_type_end: - // Empty line - symbol_stack_pop_push_production(2); - break; - - case parse_token_type_terminate: - // no more commands, just transition to empty - symbol_stack_pop_push_production(0); - break; - - default: - token_unhandled(token, __FUNCTION__); - break; - } -} - -void parse_ll_t::accept_token_job_continuation(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_job_continuation); - switch (token.type) - { - case parse_token_type_pipe: - // Pipe, continuation - symbol_stack_pop_push_production(1); - break; - - default: - // Not a pipe, no job continuation - symbol_stack_pop_push_production(0); - break; - } -} - - -void parse_ll_t::accept_token_statement(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_statement); - switch (token.type) - { - case parse_token_type_string: - switch (token.keyword) - { - case parse_keyword_and: - case parse_keyword_or: - case parse_keyword_not: - symbol_stack_pop_push_production(0); - break; - - case parse_keyword_for: - case parse_keyword_while: - case parse_keyword_function: - case parse_keyword_begin: - symbol_stack_pop_push_production(1); - break; - - case parse_keyword_if: - symbol_stack_pop_push_production(2); - break; - - case parse_keyword_else: - symbol_stack_pop(); - break; - - case parse_keyword_switch: - symbol_stack_pop_push_production(3); - break; - - case parse_keyword_end: - PARSER_DIE(); //todo - break; - - // 'in' is only special within a for_header - case parse_keyword_in: - case parse_keyword_none: - case parse_keyword_command: - case parse_keyword_builtin: - case parse_keyword_case: - symbol_stack_pop_push_production(4); - break; - - } - break; - - case parse_token_type_pipe: - case parse_token_type_redirection: - case parse_token_background: - case parse_token_type_terminate: - parse_error(L"statement", token); - break; - - default: - token_unhandled(token, __FUNCTION__); - break; - } -} - -void parse_ll_t::accept_token_block_header(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_block_header); - switch (token.type) - { - case parse_token_type_string: - switch (token.keyword) - { - case parse_keyword_else: - PARSER_DIE(); //todo - break; - - case parse_keyword_for: - symbol_stack_pop_push_production(0); - break; - - - case parse_keyword_while: - symbol_stack_pop_push_production(1); - break; - - case parse_keyword_function: - symbol_stack_pop_push_production(2); - break; - - case parse_keyword_begin: - symbol_stack_pop_push_production(3); - break; - - default: - token_unhandled(token, __FUNCTION__); - break; - - } - break; - - default: - token_unhandled(token, __FUNCTION__); - break; - } -} - void parse_ll_t::accept_token_else_clause(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_else_clause); @@ -614,25 +563,6 @@ void parse_ll_t::accept_token_else_continuation(parse_token_t token) symbol_stack_produce(token); } -void parse_ll_t::accept_token_boolean_statement(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_boolean_statement); - top_node_set_tag(token.keyword); - symbol_stack_produce(token); -} - -void parse_ll_t::accept_token_decorated_statement(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_decorated_statement); - top_node_set_tag(token.keyword); - symbol_stack_produce(token); -} - -void parse_ll_t::accept_token_plain_statement(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_plain_statement); - symbol_stack_produce(token); -} void parse_ll_t::accept_token_argument_list(parse_token_t token) { @@ -664,7 +594,7 @@ bool parse_ll_t::accept_token_string(parse_token_t token) symbol_stack_pop(); result = true; break; - + default: token_unhandled(token, __FUNCTION__); break; @@ -687,7 +617,7 @@ bool parse_ll_t::top_node_match_token(parse_token_t token) parse_node_t &node = node_for_top_symbol(); node.source_start = token.source_start; node.source_length = token.source_length; - + // We consumed this symbol symbol_stack.pop_back(); result = true; @@ -704,7 +634,7 @@ bool parse_ll_t::top_node_match_token(parse_token_t token) void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) { - bool logit = true; + bool logit = false; if (logit) { const wcstring txt = wcstring(src, token.source_start, token.source_length); @@ -724,107 +654,113 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) consumed = true; break; } - + switch (stack_top_type()) { - /* Symbols */ + /* Symbols */ case symbol_job_list: - accept_token_job_list(token); + symbol_stack_produce(token); break; - + case symbol_job: - symbol_stack_pop_push2(); + symbol_stack_produce(token); break; - + case symbol_job_continuation: - accept_token_job_continuation(token); + symbol_stack_produce(token); break; case symbol_statement: - accept_token_statement(token); + symbol_stack_produce(token); break; - + case symbol_if_statement: symbol_stack_produce(token); break; - + case symbol_if_clause: symbol_stack_produce(token); break; - + case symbol_else_clause: accept_token_else_clause(token); break; - + case symbol_else_continuation: accept_token_else_continuation(token); break; - + case symbol_block_statement: symbol_stack_produce(token); break; - + case symbol_block_header: symbol_stack_produce(token); break; - + case symbol_for_header: symbol_stack_produce(token); break; - + case symbol_while_header: symbol_stack_produce(token); break; - + case symbol_begin_header: symbol_stack_produce(token); break; - + case symbol_function_header: symbol_stack_produce(token); break; - + case symbol_switch_statement: symbol_stack_produce(token); break; - + case symbol_case_item_list: symbol_stack_produce(token); break; - + case symbol_case_item: symbol_stack_produce(token); break; - + case symbol_boolean_statement: - accept_token_boolean_statement(token); + top_node_set_tag(token.keyword); + symbol_stack_produce(token); break; - + case symbol_decorated_statement: - accept_token_decorated_statement(token); + top_node_set_tag(token.keyword); + symbol_stack_produce(token); break; - + case symbol_plain_statement: - accept_token_plain_statement(token); + symbol_stack_produce(token); break; - + case symbol_argument_list_nonempty: symbol_stack_produce(token); break; - + case symbol_argument_list: accept_token_argument_list(token); break; - + case symbol_arguments_or_redirections_list: accept_token_arguments_or_redirections_list(token); break; - + case symbol_argument_or_redirection: accept_token_argument_or_redirection(token); break; - - /* Tokens */ + + case symbol_optional_background: + symbol_stack_produce(token); + break; + + /* Tokens */ case parse_token_type_string: consumed = accept_token_string(token); break; @@ -846,11 +782,13 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) parse_keyword_t result = parse_keyword_none; if (tok == TOK_STRING) { - - const struct { + + const struct + { const wchar_t *txt; parse_keyword_t keyword; - } keywords[] = { + } keywords[] = + { {L"if", parse_keyword_if}, {L"else", parse_keyword_else}, {L"for", parse_keyword_for}, @@ -867,7 +805,7 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) {L"command", parse_keyword_command}, {L"builtin", parse_keyword_builtin} }; - + for (size_t i=0; i < sizeof keywords / sizeof *keywords; i++) { if (! wcscmp(keywords[i].txt, tok_txt)) @@ -888,36 +826,40 @@ bool parse_t::parse(const wcstring &str, parse_node_tree_t *output, parse_error_ token_type tok_type = static_cast(tok_last_type(&tok)); const wchar_t *tok_txt = tok_last(&tok); int tok_start = tok_get_pos(&tok); - + size_t tok_extent = tok_get_extent(&tok); + if (tok_type == TOK_ERROR) { fprintf(stderr, "Tokenizer error\n"); break; } - + parse_token_t token = parse_token_from_tokenizer_token(tok_type); token.tokenizer_type = tok_type; token.source_start = (size_t)tok_start; - token.source_length = wcslen(tok_txt); + token.source_length = tok_extent; token.keyword = keyword_for_token(tok_type, tok_txt); this->parser->accept_token(token, str); + + if (this->parser->fatal_errored) + break; } - wcstring result = dump_tree(this->parser->nodes, str); + wcstring result = L"";//dump_tree(this->parser->nodes, str); fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t)); - + if (output != NULL) { output->swap(this->parser->nodes); this->parser->nodes.clear(); } - + if (errors != NULL) { errors->swap(this->parser->errors); this->parser->errors.clear(); } - + return ! this->parser->fatal_errored; } diff --git a/parse_tree.h b/parse_tree.h index 39e370af6..6b1fc0d19 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -26,11 +26,11 @@ struct parse_error_t { /** Text of the error */ wcstring text; - + /** Offset and length of the token in the source code that triggered this error */ size_t source_start; size_t source_length; - + /** Return a string describing the error, suitable for presentation to the user */ wcstring describe(const wcstring &src) const; }; @@ -40,8 +40,8 @@ class parse_ll_t; class parse_t { parse_ll_t * const parser; - - public: + +public: parse_t(); bool parse(const wcstring &str, parse_node_tree_t *output, parse_error_list_t *errors); }; @@ -49,7 +49,7 @@ class parse_t enum parse_token_type_t { token_type_invalid, - + // Non-terminal tokens symbol_job_list, symbol_job, @@ -61,33 +61,35 @@ enum parse_token_type_t symbol_while_header, symbol_begin_header, symbol_function_header, - + symbol_if_statement, symbol_if_clause, symbol_else_clause, symbol_else_continuation, - + symbol_switch_statement, symbol_case_item_list, symbol_case_item, - + symbol_boolean_statement, symbol_decorated_statement, symbol_plain_statement, symbol_arguments_or_redirections_list, symbol_argument_or_redirection, - + symbol_argument_list_nonempty, symbol_argument_list, + + symbol_optional_background, // Terminal types parse_token_type_string, parse_token_type_pipe, parse_token_type_redirection, - parse_token_background, + parse_token_type_background, parse_token_type_end, parse_token_type_terminate, - + FIRST_PARSE_TOKEN_TYPE = parse_token_type_string }; @@ -117,32 +119,32 @@ wcstring keyword_description(parse_keyword_t type); /** Base class for nodes of a parse tree */ class parse_node_t { - public: - +public: + /* Type of the node */ enum parse_token_type_t type; - + /* Start in the source code */ size_t source_start; - + /* Length of our range in the source code */ size_t source_length; /* Children */ node_offset_t child_start; node_offset_t child_count; - + /* Type-dependent data */ uint32_t tag; - + /* Description */ wcstring describe(void) const; - + /* Constructor */ explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0), child_start(0), child_count(0), tag(0) { } - + node_offset_t child_offset(node_offset_t which) const { PARSE_ASSERT(which < child_count); @@ -154,360 +156,6 @@ class parse_node_tree_t : public std::vector { }; -namespace parse_symbols -{ - - #define SYMBOL(x) static inline parse_token_type_t get_token() { return x; } - - #define PRODUCE(X) static int production(parse_token_type_t tok, parse_keyword_t key) { return X; } - - #define NO_PRODUCTION (-1) - - - template - struct Token - { - SYMBOL(WHICH); - - typedef Token t0; - typedef Token t1; - typedef Token t2; - typedef Token t3; - typedef Token t4; - typedef Token t5; - }; - - /* Placeholder */ - typedef Token none; - - struct EMPTY - { - typedef none t0; - typedef none t1; - typedef none t2; - typedef none t3; - typedef none t4; - typedef none t5; - }; - - template - struct Seq - { - typedef T0 t0; - typedef T1 t1; - typedef T2 t2; - typedef T3 t3; - typedef T4 t4; - typedef T5 t5; - }; - - template - struct OR - { - typedef P0 p0; - typedef P1 p1; - typedef P2 p2; - typedef P3 p3; - typedef P4 p4; - typedef P5 p5; - }; - - template - struct Keyword - { - static inline parse_keyword_t get_token() { return WHICH; } - }; - - struct job; - struct statement; - struct job_continuation; - struct boolean_statement; - struct block_statement; - struct if_statement; - struct if_clause; - struct else_clause; - struct else_continuation; - struct switch_statement; - struct decorated_statement; - struct switch_statement; - struct case_item_list; - struct case_item; - struct argument_list_nonempty; - struct argument_list; - struct block_statement; - struct block_header; - struct for_header; - struct while_header; - struct begin_header; - struct function_header; - struct boolean_statement; - struct decorated_statement; - struct plain_statement; - struct arguments_or_redirections_list; - struct argument_or_redirection; - struct redirection; - struct statement_terminator; - - /* A job_list is a list of jobs, separated by semicolons or newlines */ - struct job_list : OR< - EMPTY, - Seq, - Seq, job_list> - > - { - SYMBOL(symbol_job_list) - }; - - /* A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation */ - struct job : Seq - { - SYMBOL(symbol_job); - }; - - struct job_continuation : OR< - EMPTY, - Seq, statement, job_continuation> - > - { - SYMBOL(symbol_job_continuation); - }; - - /* A statement is a normal command, or an if / while / and etc */ - struct statement : OR< - boolean_statement, - block_statement, - if_statement, - switch_statement, - decorated_statement - > - { - SYMBOL(symbol_statement); - }; - - struct if_statement : Seq > - { - SYMBOL(symbol_if_statement); - PRODUCE(0) - }; - - struct if_clause : Seq, job, statement_terminator, job_list> - { - SYMBOL(symbol_if_clause); - PRODUCE(0) - }; - - struct else_clause : OR< - EMPTY, - Keyword, else_continuation - > - { - SYMBOL(symbol_else_clause); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_else: return 1; - default: return 0; - } - } - }; - - struct else_continuation : OR< - Seq, - Seq - > - { - SYMBOL(symbol_else_continuation); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_if: return 0; - default: return 1; - } - } - }; - - struct switch_statement : Seq, Token, statement_terminator, case_item_list, Keyword - > - { - SYMBOL(symbol_switch_statement); - }; - - struct case_item_list : OR - < - EMPTY, - case_item, case_item_list - > - { - SYMBOL(symbol_case_item_list); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_case: return 1; - default: return 0; - } - } - }; - - struct case_item : Seq, argument_list, statement_terminator, job_list> - { - SYMBOL(symbol_case_item); - }; - - struct argument_list_nonempty : Seq, argument_list> - { - SYMBOL(symbol_argument_list_nonempty); - }; - - struct argument_list : OR - { - SYMBOL(symbol_argument_list); - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (tok) - { - case parse_token_type_string: return 1; - default: return 0; - } - } - }; - - struct block_statement : Seq, arguments_or_redirections_list> - { - SYMBOL(symbol_block_statement); - PRODUCE(0) - }; - - struct block_header : OR - { - SYMBOL(symbol_block_header); - }; - - struct for_header : Seq, Token, Keyword, arguments_or_redirections_list> - { - SYMBOL(symbol_for_header); - }; - - struct while_header : Seq, statement> - { - SYMBOL(symbol_while_header); - }; - - struct begin_header : Keyword - { - SYMBOL(symbol_begin_header); - }; - - struct function_header : Keyword - { - SYMBOL(symbol_function_header); - }; - - /* A boolean statement is AND or OR or NOT */ - struct boolean_statement : OR< - Seq, statement>, - Seq, statement>, - Seq, statement> - > - { - SYMBOL(symbol_boolean_statement); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_and: return 0; - case parse_keyword_or: return 1; - case parse_keyword_not: return 2; - default: return NO_PRODUCTION; - } - } - }; - - /* A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" */ - struct decorated_statement : OR< - Seq, plain_statement>, - Seq, plain_statement>, - plain_statement - > - { - SYMBOL(symbol_decorated_statement); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_command: return 0; - case parse_keyword_builtin: return 1; - default: return 2; - } - } - }; - - struct plain_statement : Seq, arguments_or_redirections_list> - { - SYMBOL(symbol_plain_statement); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - return 0; - } - - }; - - struct arguments_or_redirections_list : OR< - EMPTY, - Seq > - { - SYMBOL(symbol_arguments_or_redirections_list); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (tok) - { - case parse_token_type_string: - case parse_token_type_redirection: - return 1; - default: - return 0; - } - } - }; - - struct argument_or_redirection : OR< - Token, - redirection - > - { - SYMBOL(symbol_argument_or_redirection); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (tok) - { - case parse_token_type_string: return 0; - case parse_token_type_redirection: return 1; - default: return NO_PRODUCTION; - } - } - }; - - struct redirection : Token - { - SYMBOL(parse_token_type_redirection); - }; - - struct statement_terminator : Token - { - SYMBOL(parse_token_type_end); - }; -} - /* Fish grammar: @@ -520,45 +168,45 @@ namespace parse_symbols # A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation job = statement job_continuation - job_continuation = | + job_continuation = | statement job_continuation # A statement is a normal command, or an if / while / and etc statement = boolean_statement | block_statement | if_statement | switch_statement | decorated_statement - + # A block is a conditional, loop, or begin/end - if_statement = if_clause else_clause + if_statement = if_clause else_clause arguments_or_redirections_list if_clause = job STATEMENT_TERMINATOR job_list else_clause = | else_continuation else_continuation = if_clause else_clause | STATEMENT_TERMINATOR job_list - + switch_statement = SWITCH STATEMENT_TERMINATOR case_item_list case_item_list = | case_item case_item_list case_item = CASE argument_list STATEMENT_TERMINATOR job_list - + argument_list_nonempty = argument_list argument_list = | argument_list_nonempty - block_statement = block_header STATEMENT_TERMINATOR job_list arguments_or_redirections_list + block_statement = block_header job_list arguments_or_redirections_list block_header = for_header | while_header | function_header | begin_header for_header = FOR var_name IN arguments_or_redirections_list while_header = WHILE statement begin_header = BEGIN function_header = FUNCTION function_name argument_list - + # A boolean statement is AND or OR or NOT boolean_statement = AND statement | OR statement | NOT statement - + # A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement - plain_statement = COMMAND arguments_or_redirections_list + plain_statement = COMMAND arguments_or_redirections_list optional_background arguments_or_redirections_list = | argument_or_redirection arguments_or_redirections_list @@ -567,6 +215,8 @@ namespace parse_symbols terminator = | + optional_background = | + */ #endif diff --git a/parse_tree_construction.h b/parse_tree_construction.h new file mode 100644 index 000000000..fb9e8dfbf --- /dev/null +++ b/parse_tree_construction.h @@ -0,0 +1,586 @@ +/**\file parse_tree.h + + Programmatic representation of fish code. +*/ + +#ifndef FISH_PARSE_TREE_CONSTRUCTION_H +#define FISH_PARSE_TREE_CONSTRUCTION_H + +#include "parse_tree.h" + +/* Terrifying template black magic. */ + +namespace parse_symbols +{ + +#define SYMBOL(x) static inline parse_token_type_t get_token() { return x; } + +#define PRODUCE(X) static int production(parse_token_type_t tok, parse_keyword_t key) { return X; } + +#define NO_PRODUCTION (-1) + +struct Symbol +{ + typedef int magic_symbol_type_t; +}; + +template +struct Token : public Symbol +{ + SYMBOL(WHICH); +}; + +/* Placeholder */ +typedef Token none; + +typedef Token EMPTY; + +template +struct Seq +{ + typedef T0 t0; + typedef T1 t1; + typedef T2 t2; + typedef T3 t3; + typedef T4 t4; + typedef T5 t5; + + typedef int magic_seq_type_t; +}; + +template +struct OR +{ + typedef P0 p0; + typedef P1 p1; + typedef P2 p2; + typedef P3 p3; + typedef P4 p4; + typedef P5 p5; + + typedef int magic_or_type_t; +}; + +template +struct Keyword : public Symbol +{ + static inline parse_keyword_t get_token() + { + return WHICH; + } +}; + +struct job; +struct statement; +struct job_continuation; +struct boolean_statement; +struct block_statement; +struct if_statement; +struct if_clause; +struct else_clause; +struct else_continuation; +struct switch_statement; +struct decorated_statement; +struct switch_statement; +struct case_item_list; +struct case_item; +struct argument_list_nonempty; +struct argument_list; +struct block_statement; +struct block_header; +struct for_header; +struct while_header; +struct begin_header; +struct function_header; +struct boolean_statement; +struct decorated_statement; +struct plain_statement; +struct arguments_or_redirections_list; +struct argument_or_redirection; +struct redirection; +struct statement_terminator; +struct optional_background; + +/* A job_list is a list of jobs, separated by semicolons or newlines */ +struct job_list : public Symbol +{ + typedef OR< + EMPTY, + Seq, + Seq, job_list> + > productions; + + SYMBOL(symbol_job_list) + + static int production(parse_token_type_t token_type, parse_keyword_t token_keyword) + { + switch (token_type) + { + case parse_token_type_string: + // 'end' is special + switch (token_keyword) + { + case parse_keyword_end: + case parse_keyword_else: + // End this job list + return 0; + + default: + // Normal string + return 1; + } + + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_type_background: + return 1; + + case parse_token_type_end: + // Empty line + return 2; + + case parse_token_type_terminate: + // no more commands, just transition to empty + return 0; + break; + + default: + return NO_PRODUCTION; + } + } + +}; + +/* A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation */ +struct job : public Symbol +{ + typedef Seq sole_production; + SYMBOL(symbol_job); +}; + +struct job_continuation : public Symbol +{ + typedef OR< + EMPTY, + Seq, statement, job_continuation> + > productions; + + SYMBOL(symbol_job_continuation); + + static int production(parse_token_type_t token_type, parse_keyword_t token_keyword) + { + switch (token_type) + { + case parse_token_type_pipe: + // Pipe, continuation + return 1; + + default: + // Not a pipe, no job continuation + return 0; + } + + } +}; + +/* A statement is a normal command, or an if / while / and etc */ +struct statement : public Symbol +{ + typedef OR< + boolean_statement, + block_statement, + if_statement, + switch_statement, + decorated_statement + > productions; + + SYMBOL(symbol_statement); + + static int production(parse_token_type_t token_type, parse_keyword_t token_keyword) + { + switch (token_type) + { + case parse_token_type_string: + switch (token_keyword) + { + case parse_keyword_and: + case parse_keyword_or: + case parse_keyword_not: + return 0; + + case parse_keyword_for: + case parse_keyword_while: + case parse_keyword_function: + case parse_keyword_begin: + return 1; + + case parse_keyword_if: + return 2; + + case parse_keyword_else: + //symbol_stack_pop(); + return NO_PRODUCTION; + + case parse_keyword_switch: + return 3; + + case parse_keyword_end: + PARSER_DIE(); //todo + return NO_PRODUCTION; + + // 'in' is only special within a for_header + case parse_keyword_in: + case parse_keyword_none: + case parse_keyword_command: + case parse_keyword_builtin: + case parse_keyword_case: + return 4; + } + break; + + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_type_background: + case parse_token_type_terminate: + return NO_PRODUCTION; + //parse_error(L"statement", token); + + default: + return NO_PRODUCTION; + } + } + +}; + +struct if_statement : public Symbol +{ + typedef Seq, arguments_or_redirections_list> sole_production; + SYMBOL(symbol_if_statement); +}; + +struct if_clause : public Symbol +{ + typedef Seq, job, statement_terminator, job_list> sole_production; + SYMBOL(symbol_if_clause); +}; + +struct else_clause : public Symbol +{ + typedef OR< + EMPTY, + Seq, else_continuation> + > productions; + + SYMBOL(symbol_else_clause); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (key) + { + case parse_keyword_else: + return 1; + default: + return 0; + } + } +}; + +struct else_continuation : public Symbol +{ + typedef OR< + Seq, + Seq + > productions; + + SYMBOL(symbol_else_continuation); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (key) + { + case parse_keyword_if: + return 0; + default: + return 1; + } + } +}; + +struct switch_statement : public Symbol +{ + typedef Seq, + Token, + statement_terminator, + case_item_list, + Keyword + > sole_production; + + SYMBOL(symbol_switch_statement); +}; + +struct case_item_list : public Symbol +{ + typedef OR + < + EMPTY, + Seq, + Seq, case_item_list> + > productions; + + SYMBOL(symbol_case_item_list); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (key) + { + case parse_keyword_case: return 1; + + default: + if (tok == parse_token_type_end) + { + /* empty line */ + return 2; + } + else + { + return 0; + } + + } + } +}; + +struct case_item : public Symbol +{ + typedef Seq, argument_list, statement_terminator, job_list> sole_production; + + SYMBOL(symbol_case_item); +}; + +struct argument_list_nonempty : public Symbol +{ + typedef Seq, argument_list> sole_production; + SYMBOL(symbol_argument_list_nonempty); +}; + +struct argument_list : public Symbol +{ + typedef OR productions; + + SYMBOL(symbol_argument_list); + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (tok) + { + case parse_token_type_string: + return 1; + default: + return 0; + } + } +}; + +struct block_statement : public Symbol +{ + typedef Seq, arguments_or_redirections_list> sole_production; + + SYMBOL(symbol_block_statement); +}; + +struct block_header : public Symbol +{ + typedef OR productions; + + SYMBOL(symbol_block_header); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (key) + { + // todo + case parse_keyword_else: + return NO_PRODUCTION; + case parse_keyword_for: + return 0; + case parse_keyword_while: + return 1; + case parse_keyword_function: + return 2; + case parse_keyword_begin: + return 3; + default: + return NO_PRODUCTION; + } + } +}; + +struct for_header : public Symbol +{ + typedef Seq, Token, Keyword, arguments_or_redirections_list> sole_production; + + SYMBOL(symbol_for_header); +}; + +struct while_header : public Symbol +{ + typedef Seq, statement> sole_production; + + SYMBOL(symbol_while_header); +}; + +struct begin_header : public Symbol +{ + typedef Keyword sole_production; + SYMBOL(symbol_begin_header); +}; + +struct function_header : public Symbol +{ + typedef Seq< Keyword, Token, argument_list> sole_production; + SYMBOL(symbol_function_header); +}; + +/* A boolean statement is AND or OR or NOT */ +struct boolean_statement : public Symbol +{ + typedef OR< + Seq, statement>, + Seq, statement>, + Seq, statement> + > productions; + + SYMBOL(symbol_boolean_statement); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (key) + { + case parse_keyword_and: + return 0; + case parse_keyword_or: + return 1; + case parse_keyword_not: + return 2; + default: + return NO_PRODUCTION; + } + } +}; + +/* A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" */ +struct decorated_statement : public Symbol +{ + + typedef OR< + Seq, plain_statement>, + Seq, plain_statement>, + plain_statement + > productions; + + SYMBOL(symbol_decorated_statement); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (key) + { + case parse_keyword_command: + return 0; + case parse_keyword_builtin: + return 1; + default: + return 2; + } + } +}; + +struct plain_statement : public Symbol +{ + + typedef Seq, arguments_or_redirections_list, optional_background> sole_production; + + SYMBOL(symbol_plain_statement); + +}; + +struct arguments_or_redirections_list : public Symbol +{ + typedef OR< + EMPTY, + Seq > + productions; + + SYMBOL(symbol_arguments_or_redirections_list); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (tok) + { + case parse_token_type_string: + case parse_token_type_redirection: + return 1; + default: + return 0; + } + } +}; + +struct argument_or_redirection : public Symbol +{ + typedef OR< + Token, + redirection + > productions; + + + SYMBOL(symbol_argument_or_redirection); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (tok) + { + case parse_token_type_string: + return 0; + case parse_token_type_redirection: + return 1; + default: + return NO_PRODUCTION; + } + } +}; + +struct redirection : public Symbol +{ + typedef Token production; + SYMBOL(parse_token_type_redirection); +}; + +struct statement_terminator : public Symbol +{ + typedef Token production; + SYMBOL(parse_token_type_end); +}; + +struct optional_background : public Symbol +{ + typedef OR< + EMPTY, + Token + > productions; + + SYMBOL(symbol_optional_background); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (tok) + { + case parse_token_type_background: + return 1; + default: + return 0; + } + } +}; + +} + +#endif diff --git a/parse_util.cpp b/parse_util.cpp index 5e6f4459b..6f2913455 100644 --- a/parse_util.cpp +++ b/parse_util.cpp @@ -245,10 +245,10 @@ void parse_util_cmdsubst_extent(const wchar_t *buff, size_t cursor_pos, const wc const wchar_t * const cursor = buff + cursor_pos; CHECK(buff,); - + const size_t bufflen = wcslen(buff); assert(cursor_pos <= bufflen); - + /* ap and bp are the beginning and end of the tightest command substitition found so far */ const wchar_t *ap = buff, *bp = buff + bufflen; const wchar_t *pos = buff; @@ -260,13 +260,13 @@ void parse_util_cmdsubst_extent(const wchar_t *buff, size_t cursor_pos, const wc /* No subshell found, all done */ break; } - + /* Intrepret NULL to mean the end */ if (end == NULL) { end = const_cast(buff) + bufflen; } - + if (begin < cursor && end >= cursor) { /* This command substitution surrounds the cursor, so it's a tighter fit */ @@ -288,7 +288,7 @@ void parse_util_cmdsubst_extent(const wchar_t *buff, size_t cursor_pos, const wc assert(pos <= buff + bufflen); } } - + if (a != NULL) *a = ap; if (b != NULL) *b = bp; } diff --git a/tokenizer.cpp b/tokenizer.cpp index 831197ee5..6d99b46cd 100644 --- a/tokenizer.cpp +++ b/tokenizer.cpp @@ -651,13 +651,19 @@ wcstring tok_first(const wchar_t *str) return result; } -int tok_get_pos(tokenizer_t *tok) +int tok_get_pos(const tokenizer_t *tok) { CHECK(tok, 0); - return (int)tok->last_pos; } +size_t tok_get_extent(const tokenizer_t *tok) +{ + CHECK(tok, 0); + size_t current_pos = tok->buff - tok->orig_buff; + return current_pos > tok->last_pos ? current_pos - tok->last_pos : 0; +} + void tok_set_pos(tokenizer_t *tok, int pos) { diff --git a/tokenizer.h b/tokenizer.h index f2d6c0c0b..0f3ff3693 100644 --- a/tokenizer.h +++ b/tokenizer.h @@ -142,7 +142,10 @@ int tok_has_next(tokenizer_t *tok); /** Returns the position of the beginning of the current token in the original string */ -int tok_get_pos(tokenizer_t *tok); +int tok_get_pos(const tokenizer_t *tok); + +/** Returns the extent of the current token */ +size_t tok_get_extent(const tokenizer_t *tok); /** Returns the original string to tokenizer From 4f8d4f378cffa71b4e80bfa2049e2152b429615c Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 25 Jul 2013 15:24:22 -0700 Subject: [PATCH 018/177] AST no templates --- fish.xcodeproj/project.pbxproj | 8 ++- parse_productions.cpp | 63 +++++++++++++++++++ ...tree_construction.h => parse_productions.h | 61 ++++++++++++++++++ parse_tree.cpp | 2 +- parse_tree.h | 4 +- 5 files changed, 133 insertions(+), 5 deletions(-) create mode 100644 parse_productions.cpp rename parse_tree_construction.h => parse_productions.h (93%) diff --git a/fish.xcodeproj/project.pbxproj b/fish.xcodeproj/project.pbxproj index 5ae10bfc4..708e65e30 100644 --- a/fish.xcodeproj/project.pbxproj +++ b/fish.xcodeproj/project.pbxproj @@ -152,6 +152,7 @@ D0F019FD15A977CA0034B3B1 /* config.fish in CopyFiles */ = {isa = PBXBuildFile; fileRef = D0C4FD9415A7D7EE00212EF1 /* config.fish */; }; D0F01A0315A978910034B3B1 /* osx_fish_launcher.m in Sources */ = {isa = PBXBuildFile; fileRef = D0D02AFA159871B2008E62BD /* osx_fish_launcher.m */; }; D0F01A0515A978A10034B3B1 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D0CBD583159EEE010024809C /* Foundation.framework */; }; + D0FE8EE8179FB760008C9F21 /* parse_productions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0FE8EE7179FB75F008C9F21 /* parse_productions.cpp */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -465,7 +466,8 @@ D0D2693C159835CA005D9B9C /* fish */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = fish; sourceTree = BUILT_PRODUCTS_DIR; }; D0F3373A1506DE3C00ECEFC0 /* builtin_test.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = builtin_test.cpp; sourceTree = ""; }; D0F5E28415A7A32D00315DFF /* config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = config.h; sourceTree = ""; }; - D0FE8EE6179CA8A5008C9F21 /* parse_tree_construction.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_tree_construction.h; sourceTree = ""; }; + D0FE8EE6179CA8A5008C9F21 /* parse_productions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_productions.h; sourceTree = ""; }; + D0FE8EE7179FB75F008C9F21 /* parse_productions.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_productions.cpp; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -589,8 +591,9 @@ D0A0853C13B3ACEE0099B651 /* exec.cpp */, D0A0850C13B3ACEE0099B651 /* expand.h */, D0A0853D13B3ACEE0099B651 /* expand.cpp */, + D0FE8EE6179CA8A5008C9F21 /* parse_productions.h */, + D0FE8EE7179FB75F008C9F21 /* parse_productions.cpp */, D0C52F361765284C00BFAB82 /* parse_tree.h */, - D0FE8EE6179CA8A5008C9F21 /* parse_tree_construction.h */, D0C52F351765284C00BFAB82 /* parse_tree.cpp */, D0C52F341765281F00BFAB82 /* parse_exec.h */, D0C52F331765281F00BFAB82 /* parse_exec.cpp */, @@ -1116,6 +1119,7 @@ D0D02A89159839DF008E62BD /* fish.cpp in Sources */, D0C52F371765284C00BFAB82 /* parse_tree.cpp in Sources */, D0C52F381765720600BFAB82 /* parse_exec.cpp in Sources */, + D0FE8EE8179FB760008C9F21 /* parse_productions.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/parse_productions.cpp b/parse_productions.cpp new file mode 100644 index 000000000..82bdd0b91 --- /dev/null +++ b/parse_productions.cpp @@ -0,0 +1,63 @@ +#include "parse_productions.h" + +using namespace parse_productions; + +#define PRODUCTIONS(sym) static const Production_t sym##_productions + +PRODUCTIONS(job_list) = + { + {}, + {symbol_job, symbol_job_list}, + {parse_token_type_end, symbol_job_list} + }; + + + +/* A job_list is a list of jobs, separated by semicolons or newlines */ + +DEC(job_list) { + symbol_job_list, + { + {}, + {symbol_job, symbol_job_list}, + {parse_token_type_end, symbol_job_list} + }, + resolve_job_list +}; + +static int resolve_job_list(parse_token_type_t token_type, parse_keyword_t token_keyword) + { + switch (token_type) + { + case parse_token_type_string: + // 'end' is special + switch (token_keyword) + { + case parse_keyword_end: + case parse_keyword_else: + // End this job list + return 0; + + default: + // Normal string + return 1; + } + + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_type_background: + return 1; + + case parse_token_type_end: + // Empty line + return 2; + + case parse_token_type_terminate: + // no more commands, just transition to empty + return 0; + break; + + default: + return NO_PRODUCTION; + } + } \ No newline at end of file diff --git a/parse_tree_construction.h b/parse_productions.h similarity index 93% rename from parse_tree_construction.h rename to parse_productions.h index fb9e8dfbf..5ded6af00 100644 --- a/parse_tree_construction.h +++ b/parse_productions.h @@ -10,6 +10,67 @@ /* Terrifying template black magic. */ +/* + +- Get info for symbol +- Resolve production from info +- Get productions for children +- Get symbols for productions + +Production may be: + +1. Single value +2. Sequence of values (possibly empty) +3. Options of Single / Sequence + +Info to specify: + +1. Number of different productions +2. Resolver function +3. Symbols for associated productions + +Choice: should info be a class or a data? + +data: + +struct Symbol_t +{ + enum parse_token_type_t token_type; + int (*resolver)(parse_token_type_t tok, parse_keyword_t key); //may be trivial + production productions[5]; +} + +struct Production_t +{ + enum parse_token_type_t symbols[5]; +} + +*/ + +namespace parse_productions +{ + +#define MAX_PRODUCTIONS 5 +#define MAX_SYMBOLS_PER_PRODUCTION 5 + + + +struct Production_t +{ + enum parse_token_type_t symbols[MAX_SYMBOLS_PER_PRODUCTION]; +}; + +struct Symbol_t +{ + enum parse_token_type_t token_type; + int (*resolver)(parse_token_type_t tok, parse_keyword_t key); + Production_t productions[MAX_PRODUCTIONS]; +}; + + + +} + namespace parse_symbols { diff --git a/parse_tree.cpp b/parse_tree.cpp index aea3e729f..4df277d48 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -1,4 +1,4 @@ -#include "parse_tree_construction.h" +#include "parse_productions.h" #include "tokenizer.h" #include diff --git a/parse_tree.h b/parse_tree.h index 6b1fc0d19..dfe9f24e9 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -3,8 +3,8 @@ Programmatic representation of fish code. */ -#ifndef FISH_PARSE_TREE_H -#define FISH_PARSE_TREE_H +#ifndef FISH_PARSE_PRODUCTIONS_H +#define FISH_PARSE_PRODUCTIONS_H #include From 9dc91925e7bf4dc43936f7657a1a85cbd1ec4909 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Fri, 26 Jul 2013 23:59:12 -0700 Subject: [PATCH 019/177] Fewer templates --- parse_productions.cpp | 387 ++++++++++++++++++++++++++++++++++++------ parse_productions.h | 10 +- parse_tree.h | 3 +- 3 files changed, 346 insertions(+), 54 deletions(-) diff --git a/parse_productions.cpp b/parse_productions.cpp index 82bdd0b91..fba24c597 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -2,62 +2,351 @@ using namespace parse_productions; -#define PRODUCTIONS(sym) static const Production_t sym##_productions - -PRODUCTIONS(job_list) = - { - {}, - {symbol_job, symbol_job_list}, - {parse_token_type_end, symbol_job_list} - }; - - +#define PRODUCTIONS(sym) static const ProductionList_t sym##_productions +#define RESOLVE(sym) static int resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword) +#define RESOLVE_ONLY(sym) static int resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword) { return 0; } /* A job_list is a list of jobs, separated by semicolons or newlines */ - -DEC(job_list) { - symbol_job_list, - { - {}, - {symbol_job, symbol_job_list}, - {parse_token_type_end, symbol_job_list} - }, - resolve_job_list +PRODUCTIONS(job_list) = +{ + {}, + {symbol_job, symbol_job_list}, + {parse_token_type_end, symbol_job_list} }; -static int resolve_job_list(parse_token_type_t token_type, parse_keyword_t token_keyword) +RESOLVE(job_list) +{ + switch (token_type) { - switch (token_type) - { - case parse_token_type_string: - // 'end' is special - switch (token_keyword) - { - case parse_keyword_end: - case parse_keyword_else: - // End this job list - return 0; + case parse_token_type_string: + // 'end' is special + switch (token_keyword) + { + case parse_keyword_end: + case parse_keyword_else: + // End this job list + return 0; - default: - // Normal string - return 1; - } + default: + // Normal string + return 1; + } - case parse_token_type_pipe: - case parse_token_type_redirection: - case parse_token_type_background: - return 1; + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_type_background: + return 1; - case parse_token_type_end: - // Empty line - return 2; + case parse_token_type_end: + // Empty line + return 2; - case parse_token_type_terminate: - // no more commands, just transition to empty - return 0; - break; + case parse_token_type_terminate: + // no more commands, just transition to empty + return 0; + break; + + default: + return NO_PRODUCTION; + } +} + +/* A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation */ + +PRODUCTIONS(job) = +{ + {symbol_statement, symbol_job_continuation} +}; +RESOLVE_ONLY(job) + +PRODUCTIONS(job_continuation) = +{ + {}, + {parse_token_type_pipe, symbol_statement, symbol_job_continuation} +}; +RESOLVE(job_continuation) +{ + switch (token_type) + { + case parse_token_type_pipe: + // Pipe, continuation + return 1; + + default: + // Not a pipe, no job continuation + return 0; + } +} + +/* A statement is a normal command, or an if / while / and etc */ +PRODUCTIONS(statement) = +{ + {symbol_boolean_statement}, + {symbol_block_statement}, + {symbol_if_statement}, + {symbol_switch_statement}, + {symbol_decorated_statement} +}; +RESOLVE(statement) +{ + switch (token_type) + { + case parse_token_type_string: + switch (token_keyword) + { + case parse_keyword_and: + case parse_keyword_or: + case parse_keyword_not: + return 0; + + case parse_keyword_for: + case parse_keyword_while: + case parse_keyword_function: + case parse_keyword_begin: + return 1; + + case parse_keyword_if: + return 2; + + case parse_keyword_else: + //symbol_stack_pop(); + return NO_PRODUCTION; + + case parse_keyword_switch: + return 3; + + case parse_keyword_end: + PARSER_DIE(); //todo + return NO_PRODUCTION; + + // 'in' is only special within a for_header + case parse_keyword_in: + case parse_keyword_none: + case parse_keyword_command: + case parse_keyword_builtin: + case parse_keyword_case: + return 4; + } + break; + + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_type_background: + case parse_token_type_terminate: + return NO_PRODUCTION; + //parse_error(L"statement", token); + + default: + return NO_PRODUCTION; + } +} + +PRODUCTIONS(if_statement) = +{ + {symbol_if_clause, symbol_else_clause, PRODUCE_KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list} +}; +RESOLVE_ONLY(if_statement) + +PRODUCTIONS(if_clause) = +{ + { PRODUCE_KEYWORD(parse_keyword_if), symbol_job, parse_token_type_end, symbol_job_list } +}; +RESOLVE_ONLY(if_clause) + +PRODUCTIONS(else_clause) = +{ + { }, + { PRODUCE_KEYWORD(parse_keyword_else), symbol_else_continuation } +}; +RESOLVE(else_clause) +{ + switch (token_keyword) + { + case parse_keyword_else: + return 1; + default: + return 0; + } +} + +PRODUCTIONS(else_continuation) = +{ + {symbol_if_clause, symbol_else_clause}, + {parse_token_type_end, symbol_job_list} +}; +RESOLVE(else_continuation) +{ + switch (token_keyword) + { + case parse_keyword_if: + return 0; + default: + return 1; + } +} + +PRODUCTIONS(switch_statement) = +{ + { PRODUCE_KEYWORD(parse_keyword_switch), parse_token_type_string, parse_token_type_end, symbol_case_item_list, PRODUCE_KEYWORD(parse_keyword_end)} +}; +RESOLVE_ONLY(switch_statement) + +PRODUCTIONS(case_item_list) = +{ + {}, + {symbol_case_item, symbol_case_item_list}, + {parse_token_type_end, symbol_case_item_list} +}; +RESOLVE(case_item_list) +{ + if (token_keyword == parse_keyword_case) return 1; + else if (token_type == parse_token_type_end) return 2; //empty line + else return 0; +} + +PRODUCTIONS(case_item) = +{ + {PRODUCE_KEYWORD(parse_keyword_case), symbol_argument_list, parse_token_type_end, symbol_job_list} +}; +RESOLVE_ONLY(case_item) + +PRODUCTIONS(argument_list_nonempty) = +{ + {parse_token_type_string, symbol_argument_list} +}; +RESOLVE_ONLY(argument_list_nonempty) + +PRODUCTIONS(argument_list) = +{ + {}, + {symbol_argument_list_nonempty} +}; +RESOLVE(argument_list) +{ + switch (token_type) + { + case parse_token_type_string: return 1; + default: return 0; + } +} + +PRODUCTIONS(block_statement) = +{ + {symbol_block_header, parse_token_type_end, symbol_job_list, PRODUCE_KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list} +}; +RESOLVE_ONLY(block_statement) + +PRODUCTIONS(block_header) = +{ + {symbol_for_header}, + {symbol_while_header}, + {symbol_function_header}, + {symbol_begin_header} +}; +RESOLVE(block_header) +{ + switch (token_keyword) + { + case parse_keyword_else: + return NO_PRODUCTION; + case parse_keyword_for: + return 0; + case parse_keyword_while: + return 1; + case parse_keyword_function: + return 2; + case parse_keyword_begin: + return 3; + default: + return NO_PRODUCTION; + } +} + +PRODUCTIONS(for_header) = +{ + {PRODUCE_KEYWORD(parse_keyword_for), parse_token_type_string, PRODUCE_KEYWORD(parse_keyword_in), symbol_arguments_or_redirections_list} +}; +RESOLVE_ONLY(for_header) + +PRODUCTIONS(while_header) = +{ + {PRODUCE_KEYWORD(parse_keyword_while), symbol_statement} +}; +RESOLVE_ONLY(while_header) + +PRODUCTIONS(begin_header) = +{ + {PRODUCE_KEYWORD(parse_keyword_begin)} +}; +RESOLVE_ONLY(begin_header) + +PRODUCTIONS(function_header) = +{ + {PRODUCE_KEYWORD(parse_keyword_function), parse_token_type_string, symbol_argument_list} +}; +RESOLVE_ONLY(function_header) + +/* A boolean statement is AND or OR or NOT */ +PRODUCTIONS(boolean_statement) = +{ + {PRODUCE_KEYWORD(parse_keyword_and), symbol_statement}, + {PRODUCE_KEYWORD(parse_keyword_or), symbol_statement}, + {PRODUCE_KEYWORD(parse_keyword_not), symbol_statement} +}; +RESOLVE(boolean_statement) +{ + switch (token_keyword) + { + case parse_keyword_and: + return 0; + case parse_keyword_or: + return 1; + case parse_keyword_not: + return 2; + default: + return NO_PRODUCTION; + } +} + +PRODUCTIONS(decorated_statement) = +{ + {PRODUCE_KEYWORD(parse_keyword_command), symbol_plain_statement}, + {PRODUCE_KEYWORD(parse_keyword_builtin), symbol_plain_statement}, + {symbol_plain_statement} +}; +RESOLVE(decorated_statement) +{ + switch (token_keyword) + { + case parse_keyword_command: + return 0; + case parse_keyword_builtin: + return 1; + default: + return 2; + } +} + +PRODUCTIONS(plain_statement) = +{ + {parse_token_type_string, symbol_arguments_or_redirections_list, symbol_optional_background} +}; +RESOLVE_ONLY(plain_statement) + +PRODUCTIONS(arguments_or_redirections_list) = +{ + {}, + {symbol_argument_or_redirection, symbol_arguments_or_redirections_list} +}; +RESOLVE(arguments_or_redirections_list) +{ + switch (token_type) + { + case parse_token_type_string: + case parse_token_type_redirection: + return 1; + default: + return 0; + } +} - default: - return NO_PRODUCTION; - } - } \ No newline at end of file diff --git a/parse_productions.h b/parse_productions.h index 5ded6af00..d3743014b 100644 --- a/parse_productions.h +++ b/parse_productions.h @@ -55,10 +55,12 @@ namespace parse_productions -struct Production_t -{ - enum parse_token_type_t symbols[MAX_SYMBOLS_PER_PRODUCTION]; -}; +/* A production is an array of unsigned char. Symbols are encoded directly as their symbol value. Keywords are encoded with an offset of LAST_TOKEN_OR_SYMBOL + 1. So essentially we glom together keywords and symbols. */ +typedef unsigned char Production_t[MAX_SYMBOLS_PER_PRODUCTION]; + +typedef Production_t ProductionList_t[MAX_PRODUCTIONS]; + +#define PRODUCE_KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) struct Symbol_t { diff --git a/parse_tree.h b/parse_tree.h index dfe9f24e9..ef7678f5c 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -89,7 +89,8 @@ enum parse_token_type_t parse_token_type_background, parse_token_type_end, parse_token_type_terminate, - + + LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate, FIRST_PARSE_TOKEN_TYPE = parse_token_type_string }; From b133137a1f0341f9e21b622448bf5d5056c53046 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 28 Jul 2013 15:19:38 -0700 Subject: [PATCH 020/177] Removed templates (yay) --- parse_productions.cpp | 147 +++++++++- parse_productions.h | 661 +++--------------------------------------- parse_tree.cpp | 345 +++++----------------- parse_tree.h | 3 + 4 files changed, 252 insertions(+), 904 deletions(-) diff --git a/parse_productions.cpp b/parse_productions.cpp index fba24c597..e63f56023 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -1,10 +1,34 @@ #include "parse_productions.h" using namespace parse_productions; +#define NO_PRODUCTION ((production_option_idx_t)(-1)) -#define PRODUCTIONS(sym) static const ProductionList_t sym##_productions -#define RESOLVE(sym) static int resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword) -#define RESOLVE_ONLY(sym) static int resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword) { return 0; } +static bool production_is_empty(const production_t production) +{ + return production[0] == token_type_invalid; +} + +// Empty productions are allowed but must be first. Validate that the given production is in the valid range, i.e. it is either not empty or there is a non-empty production after it +static bool production_is_valid(const production_options_t production_list, production_option_idx_t which) +{ + if (which < 0 || which >= MAX_PRODUCTIONS) + return false; + + bool nonempty_found = false; + for (int i=which; i < MAX_PRODUCTIONS; i++) + { + if (! production_is_empty(production_list[i])) + { + nonempty_found = true; + break; + } + } + return nonempty_found; +} + +#define PRODUCTIONS(sym) static const production_options_t productions_##sym +#define RESOLVE(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) +#define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) { return 0; } /* A job_list is a list of jobs, separated by semicolons or newlines */ PRODUCTIONS(job_list) = @@ -44,7 +68,6 @@ RESOLVE(job_list) case parse_token_type_terminate: // no more commands, just transition to empty return 0; - break; default: return NO_PRODUCTION; @@ -350,3 +373,119 @@ RESOLVE(arguments_or_redirections_list) } } +PRODUCTIONS(argument_or_redirection) = +{ + {parse_token_type_string}, + {parse_token_type_redirection} +}; +RESOLVE(argument_or_redirection) +{ + switch (token_type) + { + case parse_token_type_string: + return 0; + case parse_token_type_redirection: + return 1; + default: + return NO_PRODUCTION; + } +} + +PRODUCTIONS(optional_background) = +{ + {}, + { parse_token_type_background } +}; + +RESOLVE(optional_background) +{ + switch (token_type) + { + case parse_token_type_background: + return 1; + default: + return 0; + } +} + +#define TEST(sym) case (symbol_##sym): production_list = & productions_ ## sym ; resolver = resolve_ ## sym ; break; +const production_t *parse_productions::production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_which_production, production_tag_t *out_tag) +{ + bool log_it = false; + if (log_it) + { + fprintf(stderr, "Resolving production for %ls with input type %ls <%ls>\n", token_type_description(node_type).c_str(), token_type_description(input_type).c_str(), keyword_description(input_keyword).c_str()); + } + + /* Fetch the list of productions and the function to resolve them */ + const production_options_t *production_list = NULL; + production_option_idx_t (*resolver)(parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) = NULL; + switch (node_type) + { + TEST(job_list) + TEST(job) + TEST(statement) + TEST(job_continuation) + TEST(boolean_statement) + TEST(block_statement) + TEST(if_statement) + TEST(if_clause) + TEST(else_clause) + TEST(else_continuation) + TEST(switch_statement) + TEST(decorated_statement) + TEST(case_item_list) + TEST(case_item) + TEST(argument_list_nonempty) + TEST(argument_list) + TEST(block_header) + TEST(for_header) + TEST(while_header) + TEST(begin_header) + TEST(function_header) + TEST(plain_statement) + TEST(arguments_or_redirections_list) + TEST(argument_or_redirection) + TEST(optional_background) + + case parse_token_type_string: + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_type_background: + case parse_token_type_end: + case parse_token_type_terminate: + fprintf(stderr, "Terminal token type %ls passed to %s\n", token_type_description(node_type).c_str(), __FUNCTION__); + PARSER_DIE(); + break; + + case token_type_invalid: + fprintf(stderr, "token_type_invalid passed to %s\n", __FUNCTION__); + PARSER_DIE(); + break; + + } + PARSE_ASSERT(production_list != NULL); + PARSE_ASSERT(resolver != NULL); + + const production_t *result = NULL; + production_option_idx_t which = resolver(input_type, input_keyword, out_tag); + + if (log_it) + { + fprintf(stderr, "\tresolved to %u\n", (unsigned)which); + } + + + if (which == NO_PRODUCTION) + { + fprintf(stderr, "Token type '%ls' has no production for input type '%ls', keyword '%ls' (in %s)\n", token_type_description(node_type).c_str(), token_type_description(input_type).c_str(), keyword_description(input_keyword).c_str(), __FUNCTION__); + result = NULL; + } + else + { + PARSE_ASSERT(production_is_valid(*production_list, which)); + result = &((*production_list)[which]); + } + *out_which_production = which; + return result; +} diff --git a/parse_productions.h b/parse_productions.h index d3743014b..d7b7c19d4 100644 --- a/parse_productions.h +++ b/parse_productions.h @@ -8,45 +8,6 @@ #include "parse_tree.h" -/* Terrifying template black magic. */ - -/* - -- Get info for symbol -- Resolve production from info -- Get productions for children -- Get symbols for productions - -Production may be: - -1. Single value -2. Sequence of values (possibly empty) -3. Options of Single / Sequence - -Info to specify: - -1. Number of different productions -2. Resolver function -3. Symbols for associated productions - -Choice: should info be a class or a data? - -data: - -struct Symbol_t -{ - enum parse_token_type_t token_type; - int (*resolver)(parse_token_type_t tok, parse_keyword_t key); //may be trivial - production productions[5]; -} - -struct Production_t -{ - enum parse_token_type_t symbols[5]; -} - -*/ - namespace parse_productions { @@ -54,596 +15,54 @@ namespace parse_productions #define MAX_SYMBOLS_PER_PRODUCTION 5 +typedef uint32_t production_tag_t; /* A production is an array of unsigned char. Symbols are encoded directly as their symbol value. Keywords are encoded with an offset of LAST_TOKEN_OR_SYMBOL + 1. So essentially we glom together keywords and symbols. */ -typedef unsigned char Production_t[MAX_SYMBOLS_PER_PRODUCTION]; +typedef uint8_t production_element_t; -typedef Production_t ProductionList_t[MAX_PRODUCTIONS]; +/* An index into a production option list */ +typedef uint8_t production_option_idx_t; + +inline parse_token_type_t production_element_type(production_element_t elem) +{ + if (elem > LAST_TOKEN_OR_SYMBOL) + { + return parse_token_type_string; + } + else + { + return static_cast(elem); + } +} + +inline parse_keyword_t production_element_keyword(production_element_t elem) +{ + if (elem > LAST_TOKEN_OR_SYMBOL) + { + // First keyword is LAST_TOKEN_OR_SYMBOL + 1 + return static_cast(elem - LAST_TOKEN_OR_SYMBOL - 1); + } + else + { + return parse_keyword_none; + } +} + + +inline bool production_element_is_valid(production_element_t elem) +{ + return elem != token_type_invalid; +} + +typedef production_element_t const production_t[MAX_SYMBOLS_PER_PRODUCTION]; + +typedef production_t production_options_t[MAX_PRODUCTIONS]; #define PRODUCE_KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) -struct Symbol_t -{ - enum parse_token_type_t token_type; - int (*resolver)(parse_token_type_t tok, parse_keyword_t key); - Production_t productions[MAX_PRODUCTIONS]; -}; - - +const production_t *production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_idx, production_tag_t *out_tag); } -namespace parse_symbols -{ - -#define SYMBOL(x) static inline parse_token_type_t get_token() { return x; } - -#define PRODUCE(X) static int production(parse_token_type_t tok, parse_keyword_t key) { return X; } - -#define NO_PRODUCTION (-1) - -struct Symbol -{ - typedef int magic_symbol_type_t; -}; - -template -struct Token : public Symbol -{ - SYMBOL(WHICH); -}; - -/* Placeholder */ -typedef Token none; - -typedef Token EMPTY; - -template -struct Seq -{ - typedef T0 t0; - typedef T1 t1; - typedef T2 t2; - typedef T3 t3; - typedef T4 t4; - typedef T5 t5; - - typedef int magic_seq_type_t; -}; - -template -struct OR -{ - typedef P0 p0; - typedef P1 p1; - typedef P2 p2; - typedef P3 p3; - typedef P4 p4; - typedef P5 p5; - - typedef int magic_or_type_t; -}; - -template -struct Keyword : public Symbol -{ - static inline parse_keyword_t get_token() - { - return WHICH; - } -}; - -struct job; -struct statement; -struct job_continuation; -struct boolean_statement; -struct block_statement; -struct if_statement; -struct if_clause; -struct else_clause; -struct else_continuation; -struct switch_statement; -struct decorated_statement; -struct switch_statement; -struct case_item_list; -struct case_item; -struct argument_list_nonempty; -struct argument_list; -struct block_statement; -struct block_header; -struct for_header; -struct while_header; -struct begin_header; -struct function_header; -struct boolean_statement; -struct decorated_statement; -struct plain_statement; -struct arguments_or_redirections_list; -struct argument_or_redirection; -struct redirection; -struct statement_terminator; -struct optional_background; - -/* A job_list is a list of jobs, separated by semicolons or newlines */ -struct job_list : public Symbol -{ - typedef OR< - EMPTY, - Seq, - Seq, job_list> - > productions; - - SYMBOL(symbol_job_list) - - static int production(parse_token_type_t token_type, parse_keyword_t token_keyword) - { - switch (token_type) - { - case parse_token_type_string: - // 'end' is special - switch (token_keyword) - { - case parse_keyword_end: - case parse_keyword_else: - // End this job list - return 0; - - default: - // Normal string - return 1; - } - - case parse_token_type_pipe: - case parse_token_type_redirection: - case parse_token_type_background: - return 1; - - case parse_token_type_end: - // Empty line - return 2; - - case parse_token_type_terminate: - // no more commands, just transition to empty - return 0; - break; - - default: - return NO_PRODUCTION; - } - } - -}; - -/* A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation */ -struct job : public Symbol -{ - typedef Seq sole_production; - SYMBOL(symbol_job); -}; - -struct job_continuation : public Symbol -{ - typedef OR< - EMPTY, - Seq, statement, job_continuation> - > productions; - - SYMBOL(symbol_job_continuation); - - static int production(parse_token_type_t token_type, parse_keyword_t token_keyword) - { - switch (token_type) - { - case parse_token_type_pipe: - // Pipe, continuation - return 1; - - default: - // Not a pipe, no job continuation - return 0; - } - - } -}; - -/* A statement is a normal command, or an if / while / and etc */ -struct statement : public Symbol -{ - typedef OR< - boolean_statement, - block_statement, - if_statement, - switch_statement, - decorated_statement - > productions; - - SYMBOL(symbol_statement); - - static int production(parse_token_type_t token_type, parse_keyword_t token_keyword) - { - switch (token_type) - { - case parse_token_type_string: - switch (token_keyword) - { - case parse_keyword_and: - case parse_keyword_or: - case parse_keyword_not: - return 0; - - case parse_keyword_for: - case parse_keyword_while: - case parse_keyword_function: - case parse_keyword_begin: - return 1; - - case parse_keyword_if: - return 2; - - case parse_keyword_else: - //symbol_stack_pop(); - return NO_PRODUCTION; - - case parse_keyword_switch: - return 3; - - case parse_keyword_end: - PARSER_DIE(); //todo - return NO_PRODUCTION; - - // 'in' is only special within a for_header - case parse_keyword_in: - case parse_keyword_none: - case parse_keyword_command: - case parse_keyword_builtin: - case parse_keyword_case: - return 4; - } - break; - - case parse_token_type_pipe: - case parse_token_type_redirection: - case parse_token_type_background: - case parse_token_type_terminate: - return NO_PRODUCTION; - //parse_error(L"statement", token); - - default: - return NO_PRODUCTION; - } - } - -}; - -struct if_statement : public Symbol -{ - typedef Seq, arguments_or_redirections_list> sole_production; - SYMBOL(symbol_if_statement); -}; - -struct if_clause : public Symbol -{ - typedef Seq, job, statement_terminator, job_list> sole_production; - SYMBOL(symbol_if_clause); -}; - -struct else_clause : public Symbol -{ - typedef OR< - EMPTY, - Seq, else_continuation> - > productions; - - SYMBOL(symbol_else_clause); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_else: - return 1; - default: - return 0; - } - } -}; - -struct else_continuation : public Symbol -{ - typedef OR< - Seq, - Seq - > productions; - - SYMBOL(symbol_else_continuation); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_if: - return 0; - default: - return 1; - } - } -}; - -struct switch_statement : public Symbol -{ - typedef Seq, - Token, - statement_terminator, - case_item_list, - Keyword - > sole_production; - - SYMBOL(symbol_switch_statement); -}; - -struct case_item_list : public Symbol -{ - typedef OR - < - EMPTY, - Seq, - Seq, case_item_list> - > productions; - - SYMBOL(symbol_case_item_list); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_case: return 1; - - default: - if (tok == parse_token_type_end) - { - /* empty line */ - return 2; - } - else - { - return 0; - } - - } - } -}; - -struct case_item : public Symbol -{ - typedef Seq, argument_list, statement_terminator, job_list> sole_production; - - SYMBOL(symbol_case_item); -}; - -struct argument_list_nonempty : public Symbol -{ - typedef Seq, argument_list> sole_production; - SYMBOL(symbol_argument_list_nonempty); -}; - -struct argument_list : public Symbol -{ - typedef OR productions; - - SYMBOL(symbol_argument_list); - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (tok) - { - case parse_token_type_string: - return 1; - default: - return 0; - } - } -}; - -struct block_statement : public Symbol -{ - typedef Seq, arguments_or_redirections_list> sole_production; - - SYMBOL(symbol_block_statement); -}; - -struct block_header : public Symbol -{ - typedef OR productions; - - SYMBOL(symbol_block_header); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - // todo - case parse_keyword_else: - return NO_PRODUCTION; - case parse_keyword_for: - return 0; - case parse_keyword_while: - return 1; - case parse_keyword_function: - return 2; - case parse_keyword_begin: - return 3; - default: - return NO_PRODUCTION; - } - } -}; - -struct for_header : public Symbol -{ - typedef Seq, Token, Keyword, arguments_or_redirections_list> sole_production; - - SYMBOL(symbol_for_header); -}; - -struct while_header : public Symbol -{ - typedef Seq, statement> sole_production; - - SYMBOL(symbol_while_header); -}; - -struct begin_header : public Symbol -{ - typedef Keyword sole_production; - SYMBOL(symbol_begin_header); -}; - -struct function_header : public Symbol -{ - typedef Seq< Keyword, Token, argument_list> sole_production; - SYMBOL(symbol_function_header); -}; - -/* A boolean statement is AND or OR or NOT */ -struct boolean_statement : public Symbol -{ - typedef OR< - Seq, statement>, - Seq, statement>, - Seq, statement> - > productions; - - SYMBOL(symbol_boolean_statement); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_and: - return 0; - case parse_keyword_or: - return 1; - case parse_keyword_not: - return 2; - default: - return NO_PRODUCTION; - } - } -}; - -/* A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" */ -struct decorated_statement : public Symbol -{ - - typedef OR< - Seq, plain_statement>, - Seq, plain_statement>, - plain_statement - > productions; - - SYMBOL(symbol_decorated_statement); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_command: - return 0; - case parse_keyword_builtin: - return 1; - default: - return 2; - } - } -}; - -struct plain_statement : public Symbol -{ - - typedef Seq, arguments_or_redirections_list, optional_background> sole_production; - - SYMBOL(symbol_plain_statement); - -}; - -struct arguments_or_redirections_list : public Symbol -{ - typedef OR< - EMPTY, - Seq > - productions; - - SYMBOL(symbol_arguments_or_redirections_list); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (tok) - { - case parse_token_type_string: - case parse_token_type_redirection: - return 1; - default: - return 0; - } - } -}; - -struct argument_or_redirection : public Symbol -{ - typedef OR< - Token, - redirection - > productions; - - - SYMBOL(symbol_argument_or_redirection); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (tok) - { - case parse_token_type_string: - return 0; - case parse_token_type_redirection: - return 1; - default: - return NO_PRODUCTION; - } - } -}; - -struct redirection : public Symbol -{ - typedef Token production; - SYMBOL(parse_token_type_redirection); -}; - -struct statement_terminator : public Symbol -{ - typedef Token production; - SYMBOL(parse_token_type_end); -}; - -struct optional_background : public Symbol -{ - typedef OR< - EMPTY, - Token - > productions; - - SYMBOL(symbol_optional_background); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (tok) - { - case parse_token_type_background: - return 1; - default: - return 0; - } - } -}; - -} #endif diff --git a/parse_tree.cpp b/parse_tree.cpp index 4df277d48..a25c549f6 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -2,7 +2,7 @@ #include "tokenizer.h" #include -using namespace parse_symbols; +using namespace parse_productions; wcstring parse_error_t::describe(const wcstring &src) const { @@ -260,6 +260,7 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring & } } +__attribute__((unused)) static wcstring dump_tree(const parse_node_tree_t &nodes, const wcstring &src) { if (nodes.empty()) @@ -277,11 +278,11 @@ struct parse_stack_element_t enum parse_keyword_t keyword; node_offset_t node_idx; - parse_stack_element_t(parse_token_type_t t) : type(t), keyword(parse_keyword_none), node_idx(-1) + explicit parse_stack_element_t(parse_token_type_t t, node_offset_t idx) : type(t), keyword(parse_keyword_none), node_idx(idx) { } - - parse_stack_element_t(parse_keyword_t k) : type(parse_token_type_string), keyword(k), node_idx(-1) + + explicit parse_stack_element_t(production_element_t e, node_offset_t idx) : type(production_element_type(e)), keyword(production_element_keyword(e)), node_idx(idx) { } @@ -311,26 +312,13 @@ class parse_ll_t parse_ll_t() : fatal_errored(false) { // initial node - parse_stack_element_t elem = symbol_job_list; - elem.node_idx = 0; - symbol_stack.push_back(elem); // goal token + symbol_stack.push_back(parse_stack_element_t(symbol_job_list, 0)); // goal token nodes.push_back(parse_node_t(symbol_job_list)); } bool top_node_match_token(parse_token_t token); - // implementation of certain parser constructions void accept_token(parse_token_t token, const wcstring &src); - void accept_token_job_list(parse_token_t token); - void accept_token_job(parse_token_t token); - void accept_token_job_continuation(parse_token_t token); - void accept_token_else_clause(parse_token_t token); - void accept_token_else_continuation(parse_token_t token); - void accept_token_plain_statement(parse_token_t token); - void accept_token_argument_list(parse_token_t token); - void accept_token_arguments_or_redirections_list(parse_token_t token); - void accept_token_argument_or_redirection(parse_token_t token); - bool accept_token_string(parse_token_t token); void token_unhandled(parse_token_t token, const char *function); @@ -373,111 +361,67 @@ class parse_ll_t symbol_stack.pop_back(); } - - // Pop from the top of the symbol stack, then push, updating node counts. Note that these are pushed in reverse order, so the first argument will be on the top of the stack. - inline void symbol_stack_pop_push_int(parse_stack_element_t tok1 = token_type_invalid, parse_stack_element_t tok2 = token_type_invalid, parse_stack_element_t tok3 = token_type_invalid, parse_stack_element_t tok4 = token_type_invalid, parse_stack_element_t tok5 = token_type_invalid) + // Pop from the top of the symbol stack, then push the given production, updating node counts. Note that production_t has type "pointer to array" so some care is required. + inline void symbol_stack_pop_push_production(const production_t *production) { - - // Logging? - if (0) + bool logit = false; + if (logit) { - fprintf(stderr, "Pop %ls (%lu)\n", token_type_description(symbol_stack.back().type).c_str(), symbol_stack.size()); - if (tok5.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok5.describe().c_str()); - if (tok4.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok4.describe().c_str()); - if (tok3.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok3.describe().c_str()); - if (tok2.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok2.describe().c_str()); - if (tok1.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok1.describe().c_str()); + size_t count = 0; + fprintf(stderr, "Applying production:\n"); + for (size_t i=0; i < MAX_SYMBOLS_PER_PRODUCTION; i++) + { + production_element_t elem = (*production)[i]; + if (production_element_is_valid(elem)) + { + parse_token_type_t type = production_element_type(elem); + parse_keyword_t keyword = production_element_keyword(elem); + fprintf(stderr, "\t%ls <%ls>\n", token_type_description(type).c_str(), keyword_description(keyword).c_str()); + count++; + } + } + if (! count) fprintf(stderr, "\t\n"); } - // Get the node for the top symbol and tell it about its children - size_t node_idx = symbol_stack.back().node_idx; - parse_node_t &node = nodes.at(node_idx); - + + // Add the children. Confusingly, we want our nodes to be in forwards order (last token last, so dumps look nice), but the symbols should be reverse order (last token first, so it's lowest on the stack) + const size_t child_start = nodes.size(); + size_t child_count = 0; + for (size_t i=0; i < MAX_SYMBOLS_PER_PRODUCTION; i++) + { + production_element_t elem = (*production)[i]; + if (production_element_is_valid(elem)) + { + // Generate the parse node. Note that this push_back may invalidate node. + parse_token_type_t child_type = production_element_type(elem); + nodes.push_back(parse_node_t(child_type)); + child_count++; + } + } + + // Update the parent + const size_t parent_node_idx = symbol_stack.back().node_idx; + parse_node_t &parent_node = nodes.at(parent_node_idx); + // Should have no children yet - PARSE_ASSERT(node.child_count == 0); + PARSE_ASSERT(parent_node.child_count == 0); - // Tell the node where its children start - node.child_start = nodes.size(); - - // Add nodes for the children - // Confusingly, we want our nodes to be in forwards order (last token last, so dumps look nice), but the symbols should be reverse order (last token first, so it's lowest on the stack) - if (tok1.type != token_type_invalid) add_child_to_node(node_idx, &tok1); - if (tok2.type != token_type_invalid) add_child_to_node(node_idx, &tok2); - if (tok3.type != token_type_invalid) add_child_to_node(node_idx, &tok3); - if (tok4.type != token_type_invalid) add_child_to_node(node_idx, &tok4); - if (tok5.type != token_type_invalid) add_child_to_node(node_idx, &tok5); - - // The above set the node_idx. Now replace the top of the stack. + // Tell the node about its children + parent_node.child_start = child_start; + parent_node.child_count = child_count; + + // Replace the top of the stack with new stack elements corresponding to our new nodes. Note that these go in reverse order. symbol_stack.pop_back(); - if (tok5.type != token_type_invalid) symbol_stack.push_back(tok5); - if (tok4.type != token_type_invalid) symbol_stack.push_back(tok4); - if (tok3.type != token_type_invalid) symbol_stack.push_back(tok3); - if (tok2.type != token_type_invalid) symbol_stack.push_back(tok2); - if (tok1.type != token_type_invalid) symbol_stack.push_back(tok1); - } - - template - inline void symbol_stack_pop_push2(typename T::magic_seq_type_t x = 0) - { - symbol_stack_pop_push_int(T::t0::get_token(), T::t1::get_token(), T::t2::get_token(), T::t3::get_token(), T::t4::get_token()); - } - - template - inline void symbol_stack_pop_push2(typename T::magic_symbol_type_t x = 0) - { - symbol_stack_pop_push_int(T::get_token()); - } - - // Singular. Sole productions are always of type Seq. - template - inline void symbol_stack_produce(parse_token_t tok, typename T::sole_production::magic_seq_type_t magic=0) - { - typedef typename T::sole_production seq; - symbol_stack_pop_push_int(seq::t0::get_token(), seq::t1::get_token(), seq::t2::get_token(), seq::t3::get_token(), seq::t4::get_token()); - } - - // Plural productions, of type Or. - template - inline void symbol_stack_produce(parse_token_t tok, typename T::productions::magic_or_type_t magic=0) - { - typedef typename T::productions ors; - int which = T::production(tok.type, tok.keyword); - switch (which) + symbol_stack.reserve(symbol_stack.size() + child_count); + size_t idx = child_count; + while (idx--) { - case 0: - symbol_stack_pop_push2(); - break; - case 1: - symbol_stack_pop_push2(); - break; - case 2: - symbol_stack_pop_push2(); - break; - case 3: - symbol_stack_pop_push2(); - break; - case 4: - symbol_stack_pop_push2(); - break; - - case NO_PRODUCTION: - parse_error(tok, L"Failed to produce with stack top '%ls' for token '%ls'\n", symbol_stack.back().describe().c_str(), tok.describe().c_str()); - break; - - default: - parse_error(tok, L"Unexpected production %d for token %ls\n", which, tok.describe().c_str()); - break; + production_element_t elem = (*production)[idx]; + PARSE_ASSERT(production_element_is_valid(elem)); + symbol_stack.push_back(parse_stack_element_t(elem, child_start + idx)); } } - // Non-sequence basic productions - template - inline void symbol_stack_produce(parse_token_t tok, typename T::sole_production::magic_symbol_type_t magic=0) - { - symbol_stack_pop_push_int(T::sole_production::get_token()); - } - - }; void parse_ll_t::dump_stack(void) const @@ -551,57 +495,6 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) fatal_errored = true; } -void parse_ll_t::accept_token_else_clause(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_else_clause); - symbol_stack_produce(token); -} - -void parse_ll_t::accept_token_else_continuation(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_else_continuation); - symbol_stack_produce(token); -} - - -void parse_ll_t::accept_token_argument_list(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_argument_list); - symbol_stack_produce(token); -} - - -void parse_ll_t::accept_token_arguments_or_redirections_list(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_arguments_or_redirections_list); - symbol_stack_produce(token); -} - -void parse_ll_t::accept_token_argument_or_redirection(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_argument_or_redirection); - symbol_stack_produce(token); -} - -bool parse_ll_t::accept_token_string(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == parse_token_type_string); - bool result = false; - switch (token.type) - { - case parse_token_type_string: - // Got our string - symbol_stack_pop(); - result = true; - break; - - default: - token_unhandled(token, __FUNCTION__); - break; - } - return result; -} - bool parse_ll_t::top_node_match_token(parse_token_t token) { PARSE_ASSERT(! symbol_stack.empty()); @@ -654,122 +547,16 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) consumed = true; break; } - - switch (stack_top_type()) - { - /* Symbols */ - case symbol_job_list: - symbol_stack_produce(token); - break; - - case symbol_job: - symbol_stack_produce(token); - break; - - case symbol_job_continuation: - symbol_stack_produce(token); - break; - - case symbol_statement: - symbol_stack_produce(token); - break; - - case symbol_if_statement: - symbol_stack_produce(token); - break; - - case symbol_if_clause: - symbol_stack_produce(token); - break; - - case symbol_else_clause: - accept_token_else_clause(token); - break; - - case symbol_else_continuation: - accept_token_else_continuation(token); - break; - - case symbol_block_statement: - symbol_stack_produce(token); - break; - - case symbol_block_header: - symbol_stack_produce(token); - break; - - case symbol_for_header: - symbol_stack_produce(token); - break; - - case symbol_while_header: - symbol_stack_produce(token); - break; - - case symbol_begin_header: - symbol_stack_produce(token); - break; - - case symbol_function_header: - symbol_stack_produce(token); - break; - - case symbol_switch_statement: - symbol_stack_produce(token); - break; - - case symbol_case_item_list: - symbol_stack_produce(token); - break; - - case symbol_case_item: - symbol_stack_produce(token); - break; - - case symbol_boolean_statement: - top_node_set_tag(token.keyword); - symbol_stack_produce(token); - break; - - case symbol_decorated_statement: - top_node_set_tag(token.keyword); - symbol_stack_produce(token); - break; - - case symbol_plain_statement: - symbol_stack_produce(token); - break; - - case symbol_argument_list_nonempty: - symbol_stack_produce(token); - break; - - case symbol_argument_list: - accept_token_argument_list(token); - break; - - case symbol_arguments_or_redirections_list: - accept_token_arguments_or_redirections_list(token); - break; - - case symbol_argument_or_redirection: - accept_token_argument_or_redirection(token); - break; - - case symbol_optional_background: - symbol_stack_produce(token); - break; - - /* Tokens */ - case parse_token_type_string: - consumed = accept_token_string(token); - break; - - default: - fprintf(stderr, "Bailing with token type %ls and stack top %ls\n", token_type_description(token.type).c_str(), token_type_description(stack_top_type()).c_str()); - exit_without_destructors(EXIT_FAILURE); - break; - } + + // Get the production for the top of the stack + parse_stack_element_t &stack_elem = symbol_stack.back(); + parse_node_t &node = nodes.at(stack_elem.node_idx); + const production_t *production = production_for_token(stack_elem.type, token.type, token.keyword, &node.production_idx, &node.tag); + PARSE_ASSERT(production != NULL); + + // Manipulate the symbol stack. + // Note that stack_elem is invalidated by popping the stack. + symbol_stack_pop_push_production(production); } } diff --git a/parse_tree.h b/parse_tree.h index ef7678f5c..c53864258 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -137,6 +137,9 @@ class parse_node_t /* Type-dependent data */ uint32_t tag; + + /* Which production was used */ + uint8_t production_idx; /* Description */ wcstring describe(void) const; From 680ac41bb1ee83e708e7309f2141a76203d90c7c Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 28 Jul 2013 15:44:09 -0700 Subject: [PATCH 021/177] Clean up some error handling --- parse_productions.cpp | 10 ++++++++-- parse_productions.h | 19 +++++++++++-------- parse_tree.cpp | 38 +++++++++++++++----------------------- 3 files changed, 34 insertions(+), 33 deletions(-) diff --git a/parse_productions.cpp b/parse_productions.cpp index e63f56023..61f7636de 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -30,6 +30,9 @@ static bool production_is_valid(const production_options_t production_list, prod #define RESOLVE(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) #define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) { return 0; } +#define PRODUCE_KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) + + /* A job_list is a list of jobs, separated by semicolons or newlines */ PRODUCTIONS(job_list) = { @@ -409,7 +412,7 @@ RESOLVE(optional_background) } #define TEST(sym) case (symbol_##sym): production_list = & productions_ ## sym ; resolver = resolve_ ## sym ; break; -const production_t *parse_productions::production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_which_production, production_tag_t *out_tag) +const production_t *parse_productions::production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_which_production, production_tag_t *out_tag, wcstring *out_error_text) { bool log_it = false; if (log_it) @@ -478,7 +481,10 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n if (which == NO_PRODUCTION) { - fprintf(stderr, "Token type '%ls' has no production for input type '%ls', keyword '%ls' (in %s)\n", token_type_description(node_type).c_str(), token_type_description(input_type).c_str(), keyword_description(input_keyword).c_str(), __FUNCTION__); + if (log_it) + { + fprintf(stderr, "Token type '%ls' has no production for input type '%ls', keyword '%ls' (in %s)\n", token_type_description(node_type).c_str(), token_type_description(input_type).c_str(), keyword_description(input_keyword).c_str(), __FUNCTION__); + } result = NULL; } else diff --git a/parse_productions.h b/parse_productions.h index d7b7c19d4..a0d43f629 100644 --- a/parse_productions.h +++ b/parse_productions.h @@ -23,6 +23,13 @@ typedef uint8_t production_element_t; /* An index into a production option list */ typedef uint8_t production_option_idx_t; +/* A production is an array of production elements */ +typedef production_element_t const production_t[MAX_SYMBOLS_PER_PRODUCTION]; + +/* A production options is an array of (possible) productions */ +typedef production_t production_options_t[MAX_PRODUCTIONS]; + +/* Resolve the type from a production element */ inline parse_token_type_t production_element_type(production_element_t elem) { if (elem > LAST_TOKEN_OR_SYMBOL) @@ -35,6 +42,7 @@ inline parse_token_type_t production_element_type(production_element_t elem) } } +/* Resolve the keyword from a production element */ inline parse_keyword_t production_element_keyword(production_element_t elem) { if (elem > LAST_TOKEN_OR_SYMBOL) @@ -48,19 +56,14 @@ inline parse_keyword_t production_element_keyword(production_element_t elem) } } - +/* Check if an element is valid */ inline bool production_element_is_valid(production_element_t elem) { return elem != token_type_invalid; } -typedef production_element_t const production_t[MAX_SYMBOLS_PER_PRODUCTION]; - -typedef production_t production_options_t[MAX_PRODUCTIONS]; - -#define PRODUCE_KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) - -const production_t *production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_idx, production_tag_t *out_tag); +/* Fetch a production */ +const production_t *production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_idx, production_tag_t *out_tag, wcstring *out_error_text); } diff --git a/parse_tree.cpp b/parse_tree.cpp index a25c549f6..7a809167e 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -14,7 +14,7 @@ wcstring parse_error_t::describe(const wcstring &src) const // Look for a newline prior to source_start. If we don't find one, start at the beginning of the string; otherwise start one past the newline size_t newline = src.find_last_of(L'\n', source_start); - fprintf(stderr, "newline: %lu, source_start %lu, source_length %lu\n", newline, source_start, source_length); + //fprintf(stderr, "newline: %lu, source_start %lu, source_length %lu\n", newline, source_start, source_length); if (newline != wcstring::npos) { line_start = newline;// + 1; @@ -26,7 +26,7 @@ wcstring parse_error_t::describe(const wcstring &src) const line_end = src.size(); } assert(line_end >= line_start); - fprintf(stderr, "source start: %lu, line start %lu\n", source_start, line_start); + //fprintf(stderr, "source start: %lu, line start %lu\n", source_start, line_start); assert(source_start >= line_start); // Append the line of text @@ -320,8 +320,6 @@ class parse_ll_t void accept_token(parse_token_t token, const wcstring &src); - void token_unhandled(parse_token_t token, const char *function); - void parse_error(const wchar_t *expected, parse_token_t token); void parse_error(parse_token_t token, const wchar_t *format, ...); void append_error_callout(wcstring &error_message, parse_token_t token); @@ -455,18 +453,6 @@ void parse_ll_t::dump_stack(void) const } } -void parse_ll_t::token_unhandled(parse_token_t token, const char *function) -{ - fprintf(stderr, "Unhandled token with type %ls in function %s\n", token_type_description(token.type).c_str(), function); - this->dump_stack(); - parse_error_t err; - err.text = format_string(L"Unhandled token with type %ls in function %s", token_type_description(token.type).c_str(), function); - err.source_start = token.source_start; - err.source_length = token.source_length; - this->errors.push_back(err); - this->fatal_errored = true; -} - void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...) { this->dump_stack(); @@ -551,12 +537,18 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) // Get the production for the top of the stack parse_stack_element_t &stack_elem = symbol_stack.back(); parse_node_t &node = nodes.at(stack_elem.node_idx); - const production_t *production = production_for_token(stack_elem.type, token.type, token.keyword, &node.production_idx, &node.tag); - PARSE_ASSERT(production != NULL); - - // Manipulate the symbol stack. - // Note that stack_elem is invalidated by popping the stack. - symbol_stack_pop_push_production(production); + const production_t *production = production_for_token(stack_elem.type, token.type, token.keyword, &node.production_idx, &node.tag, NULL /* error text */); + if (production == NULL) + { + this->parse_error(token, L"Unable to produce a '%ls' from input '%ls'", stack_elem.describe().c_str(), token.describe().c_str()); + // parse_error sets fatal_errored, which ends the loop + } + else + { + // Manipulate the symbol stack. + // Note that stack_elem is invalidated by popping the stack. + symbol_stack_pop_push_production(production); + } } } @@ -632,7 +624,7 @@ bool parse_t::parse(const wcstring &str, parse_node_tree_t *output, parse_error_ break; } - wcstring result = L"";//dump_tree(this->parser->nodes, str); + wcstring result = dump_tree(this->parser->nodes, str); fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t)); From 876b9d49b4ae89c02275ad7c3c2e26f27e38f298 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 8 Aug 2013 14:44:52 -0700 Subject: [PATCH 022/177] Added fish_tests target to fish.xcodeproj --- fish.xcodeproj/project.pbxproj | 215 +++++++++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) diff --git a/fish.xcodeproj/project.pbxproj b/fish.xcodeproj/project.pbxproj index 708e65e30..c397f3216 100644 --- a/fish.xcodeproj/project.pbxproj +++ b/fish.xcodeproj/project.pbxproj @@ -73,6 +73,48 @@ D07D266E15E33B86009E43F6 /* tools in Copy Files */ = {isa = PBXBuildFile; fileRef = D025C02915D1FEA100B9DB63 /* tools */; }; D07D267215E34171009E43F6 /* config.fish in Copy Files */ = {isa = PBXBuildFile; fileRef = D0CBD580159EE48F0024809C /* config.fish */; }; D0879AC816BF9AAB00E98E56 /* fish_term_icon.icns in Resources */ = {isa = PBXBuildFile; fileRef = D0879AC616BF9A1A00E98E56 /* fish_term_icon.icns */; }; + D08A329417B4458D00F3A533 /* fish_tests.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D08A329317B4458D00F3A533 /* fish_tests.cpp */; }; + D08A329517B445C200F3A533 /* function.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854413B3ACEE0099B651 /* function.cpp */; }; + D08A329617B445FD00F3A533 /* builtin.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853513B3ACEE0099B651 /* builtin.cpp */; }; + D08A329717B4463B00F3A533 /* complete.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853713B3ACEE0099B651 /* complete.cpp */; }; + D08A329817B4463B00F3A533 /* env.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853A13B3ACEE0099B651 /* env.cpp */; }; + D08A329917B4463B00F3A533 /* exec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853C13B3ACEE0099B651 /* exec.cpp */; }; + D08A329A17B4463B00F3A533 /* expand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853D13B3ACEE0099B651 /* expand.cpp */; }; + D08A329B17B4463B00F3A533 /* highlight.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854713B3ACEE0099B651 /* highlight.cpp */; }; + D08A329C17B4463B00F3A533 /* history.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854813B3ACEE0099B651 /* history.cpp */; }; + D08A329D17B4463B00F3A533 /* kill.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854F13B3ACEE0099B651 /* kill.cpp */; }; + D08A329E17B4463B00F3A533 /* parser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855413B3ACEE0099B651 /* parser.cpp */; }; + D08A329F17B4463B00F3A533 /* proc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855713B3ACEE0099B651 /* proc.cpp */; }; + D08A32A017B4463B00F3A533 /* reader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855813B3ACEE0099B651 /* reader.cpp */; }; + D08A32A117B4463B00F3A533 /* sanity.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855913B3ACEE0099B651 /* sanity.cpp */; }; + D08A32A217B4463B00F3A533 /* tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855D13B3ACEE0099B651 /* tokenizer.cpp */; }; + D08A32A317B4463B00F3A533 /* wgetopt.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855F13B3ACEE0099B651 /* wgetopt.cpp */; }; + D08A32A417B4463B00F3A533 /* wildcard.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0856013B3ACEE0099B651 /* wildcard.cpp */; }; + D08A32A517B4463B00F3A533 /* wutil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0856113B3ACEE0099B651 /* wutil.cpp */; }; + D08A32A617B4464300F3A533 /* input.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854A13B3ACEE0099B651 /* input.cpp */; }; + D08A32A717B446A300F3A533 /* autoload.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C6FCC914CFA4B0004CE8AD /* autoload.cpp */; }; + D08A32A817B446A300F3A533 /* builtin_test.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0F3373A1506DE3C00ECEFC0 /* builtin_test.cpp */; }; + D08A32A917B446A300F3A533 /* color.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0B6B0FE14E88BA400AD6C10 /* color.cpp */; }; + D08A32AA17B446A300F3A533 /* common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853613B3ACEE0099B651 /* common.cpp */; }; + D08A32AB17B446A300F3A533 /* env_universal_common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853813B3ACEE0099B651 /* env_universal_common.cpp */; }; + D08A32AC17B446A300F3A533 /* env_universal.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853913B3ACEE0099B651 /* env_universal.cpp */; }; + D08A32AD17B446A300F3A533 /* event.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853B13B3ACEE0099B651 /* event.cpp */; }; + D08A32AE17B446A300F3A533 /* input_common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854913B3ACEE0099B651 /* input_common.cpp */; }; + D08A32AF17B446A300F3A533 /* intern.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854B13B3ACEE0099B651 /* intern.cpp */; }; + D08A32B017B446A300F3A533 /* io.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854C13B3ACEE0099B651 /* io.cpp */; }; + D08A32B117B446A300F3A533 /* iothread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854D13B3ACEE0099B651 /* iothread.cpp */; }; + D08A32B217B446A300F3A533 /* output.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855113B3ACEE0099B651 /* output.cpp */; }; + D08A32B317B446A300F3A533 /* parse_util.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855213B3ACEE0099B651 /* parse_util.cpp */; }; + D08A32B417B446A300F3A533 /* parser_keywords.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855313B3ACEE0099B651 /* parser_keywords.cpp */; }; + D08A32B517B446A300F3A533 /* path.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855513B3ACEE0099B651 /* path.cpp */; }; + D08A32B617B446A300F3A533 /* postfork.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D09B1C1914FC7B5B00F91077 /* postfork.cpp */; }; + D08A32B717B446A300F3A533 /* screen.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855A13B3ACEE0099B651 /* screen.cpp */; }; + D08A32B817B446A300F3A533 /* signal.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855C13B3ACEE0099B651 /* signal.cpp */; }; + D08A32B917B446B100F3A533 /* parse_productions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0FE8EE7179FB75F008C9F21 /* parse_productions.cpp */; }; + D08A32BA17B446B100F3A533 /* parse_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F351765284C00BFAB82 /* parse_tree.cpp */; }; + D08A32BB17B446B100F3A533 /* parse_exec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F331765281F00BFAB82 /* parse_exec.cpp */; }; + D08A32BC17B4473B00F3A533 /* libncurses.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = D0D02A8C15983CFA008E62BD /* libncurses.dylib */; }; + D08A32BD17B4474000F3A533 /* libiconv.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = D0D02A8A15983CDF008E62BD /* libiconv.dylib */; }; D0A564FE168D23D800AF6161 /* man in CopyFiles */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; }; D0A56501168D258300AF6161 /* man in Copy Files */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; }; D0C52F371765284C00BFAB82 /* parse_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F351765284C00BFAB82 /* parse_tree.cpp */; }; @@ -295,6 +337,15 @@ name = "Copy Files"; runOnlyForDeploymentPostprocessing = 1; }; + D08A328B17B4455100F3A533 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; D0F019F015A977010034B3B1 /* CopyFiles */ = { isa = PBXCopyFilesBuildPhase; buildActionMask = 2147483647; @@ -340,6 +391,8 @@ D07B247215BCC15700D4ADB4 /* add-shell */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; name = "add-shell"; path = "build_tools/osx_package_scripts/add-shell"; sourceTree = ""; }; D07B247515BCC4BE00D4ADB4 /* install.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; name = install.sh; path = osx/install.sh; sourceTree = ""; }; D0879AC616BF9A1A00E98E56 /* fish_term_icon.icns */ = {isa = PBXFileReference; lastKnownFileType = image.icns; name = fish_term_icon.icns; path = osx/fish_term_icon.icns; sourceTree = ""; }; + D08A328D17B4455100F3A533 /* fish_tests */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = fish_tests; sourceTree = BUILT_PRODUCTS_DIR; }; + D08A329317B4458D00F3A533 /* fish_tests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fish_tests.cpp; sourceTree = ""; }; D09B1C1914FC7B5B00F91077 /* postfork.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = postfork.cpp; sourceTree = ""; }; D09B1C1A14FC7B5B00F91077 /* postfork.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = postfork.h; sourceTree = ""; }; D0A0850313B3ACEE0099B651 /* builtin.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = builtin.h; sourceTree = ""; }; @@ -471,6 +524,15 @@ /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ + D08A328A17B4455100F3A533 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + D08A32BD17B4474000F3A533 /* libiconv.dylib in Frameworks */, + D08A32BC17B4473B00F3A533 /* libncurses.dylib in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; D0D02AB915985EF9008E62BD /* Frameworks */ = { isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; @@ -534,6 +596,13 @@ name = "Other Build Products"; sourceTree = ""; }; + D08A328E17B4455100F3A533 /* fish_tests */ = { + isa = PBXGroup; + children = ( + ); + path = fish_tests; + sourceTree = ""; + }; D0A084F013B3AC130099B651 = { isa = PBXGroup; children = ( @@ -543,6 +612,7 @@ D0D02A8E15983D5F008E62BD /* Libraries */, D0D02AAB15985C14008E62BD /* Resources */, D031890A15E36DB500D9CC39 /* Other Build Products */, + D08A328E17B4455100F3A533 /* fish_tests */, D0D2693215983562005D9B9C /* Products */, ); sourceTree = ""; @@ -672,6 +742,7 @@ D0A0856613B3ACEE0099B651 /* xdgmimemagic.cpp */, D0A0852F13B3ACEE0099B651 /* xdgmimeparent.h */, D0A0856713B3ACEE0099B651 /* xdgmimeparent.cpp */, + D08A329317B4458D00F3A533 /* fish_tests.cpp */, ); name = Sources; sourceTree = ""; @@ -713,6 +784,7 @@ D0D02ABC15985EF9008E62BD /* fishd */, D0D02AD01598642A008E62BD /* fish_indent */, D0D02AE415986537008E62BD /* fish_pager */, + D08A328D17B4455100F3A533 /* fish_tests */, ); name = Products; sourceTree = ""; @@ -745,6 +817,23 @@ /* End PBXLegacyTarget section */ /* Begin PBXNativeTarget section */ + D08A328C17B4455100F3A533 /* fish_tests */ = { + isa = PBXNativeTarget; + buildConfigurationList = D08A329217B4455100F3A533 /* Build configuration list for PBXNativeTarget "fish_tests" */; + buildPhases = ( + D08A328917B4455100F3A533 /* Sources */, + D08A328A17B4455100F3A533 /* Frameworks */, + D08A328B17B4455100F3A533 /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = fish_tests; + productName = fish_tests; + productReference = D08A328D17B4455100F3A533 /* fish_tests */; + productType = "com.apple.product-type.tool"; + }; D0D02A9915985A75008E62BD /* fish.app */ = { isa = PBXNativeTarget; buildConfigurationList = D0D02AA415985A75008E62BD /* Build configuration list for PBXNativeTarget "fish.app" */; @@ -854,6 +943,7 @@ D0D02ABB15985EF9008E62BD /* fishd */, D0D02ACF1598642A008E62BD /* fish_indent */, D0D02AE315986537008E62BD /* fish_pager */, + D08A328C17B4455100F3A533 /* fish_tests */, D0A564E6168CFDD800AF6161 /* man_pages */, D0A084F713B3AC130099B651 /* Makefile */, ); @@ -1034,6 +1124,53 @@ /* End PBXShellScriptBuildPhase section */ /* Begin PBXSourcesBuildPhase section */ + D08A328917B4455100F3A533 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + D08A32B917B446B100F3A533 /* parse_productions.cpp in Sources */, + D08A32BA17B446B100F3A533 /* parse_tree.cpp in Sources */, + D08A32BB17B446B100F3A533 /* parse_exec.cpp in Sources */, + D08A32A717B446A300F3A533 /* autoload.cpp in Sources */, + D08A32A817B446A300F3A533 /* builtin_test.cpp in Sources */, + D08A32A917B446A300F3A533 /* color.cpp in Sources */, + D08A32AA17B446A300F3A533 /* common.cpp in Sources */, + D08A32AB17B446A300F3A533 /* env_universal_common.cpp in Sources */, + D08A32AC17B446A300F3A533 /* env_universal.cpp in Sources */, + D08A32AD17B446A300F3A533 /* event.cpp in Sources */, + D08A32AE17B446A300F3A533 /* input_common.cpp in Sources */, + D08A32AF17B446A300F3A533 /* intern.cpp in Sources */, + D08A32B017B446A300F3A533 /* io.cpp in Sources */, + D08A32B117B446A300F3A533 /* iothread.cpp in Sources */, + D08A32B217B446A300F3A533 /* output.cpp in Sources */, + D08A32B317B446A300F3A533 /* parse_util.cpp in Sources */, + D08A32B417B446A300F3A533 /* parser_keywords.cpp in Sources */, + D08A32B517B446A300F3A533 /* path.cpp in Sources */, + D08A32B617B446A300F3A533 /* postfork.cpp in Sources */, + D08A32B717B446A300F3A533 /* screen.cpp in Sources */, + D08A32B817B446A300F3A533 /* signal.cpp in Sources */, + D08A32A617B4464300F3A533 /* input.cpp in Sources */, + D08A329717B4463B00F3A533 /* complete.cpp in Sources */, + D08A329817B4463B00F3A533 /* env.cpp in Sources */, + D08A329917B4463B00F3A533 /* exec.cpp in Sources */, + D08A329A17B4463B00F3A533 /* expand.cpp in Sources */, + D08A329B17B4463B00F3A533 /* highlight.cpp in Sources */, + D08A329C17B4463B00F3A533 /* history.cpp in Sources */, + D08A329D17B4463B00F3A533 /* kill.cpp in Sources */, + D08A329E17B4463B00F3A533 /* parser.cpp in Sources */, + D08A329F17B4463B00F3A533 /* proc.cpp in Sources */, + D08A32A017B4463B00F3A533 /* reader.cpp in Sources */, + D08A32A117B4463B00F3A533 /* sanity.cpp in Sources */, + D08A32A217B4463B00F3A533 /* tokenizer.cpp in Sources */, + D08A32A317B4463B00F3A533 /* wgetopt.cpp in Sources */, + D08A32A417B4463B00F3A533 /* wildcard.cpp in Sources */, + D08A32A517B4463B00F3A533 /* wutil.cpp in Sources */, + D08A329617B445FD00F3A533 /* builtin.cpp in Sources */, + D08A329417B4458D00F3A533 /* fish_tests.cpp in Sources */, + D08A329517B445C200F3A533 /* function.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; D0D02AB815985EF9008E62BD /* Sources */ = { isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; @@ -1345,6 +1482,74 @@ }; name = Release; }; + D08A328F17B4455100F3A533 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD_64_BIT)"; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_OBJC_EXCEPTIONS = YES; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_UNINITIALIZED_AUTOS = YES; + MACOSX_DEPLOYMENT_TARGET = 10.8; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Debug; + }; + D08A329017B4455100F3A533 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD_64_BIT)"; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_ENABLE_OBJC_EXCEPTIONS = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES; + MACOSX_DEPLOYMENT_TARGET = 10.8; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Release; + }; + D08A329117B4455100F3A533 /* Release_C++11 */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD_64_BIT)"; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_ENABLE_OBJC_EXCEPTIONS = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES; + MACOSX_DEPLOYMENT_TARGET = 10.8; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = "Release_C++11"; + }; D0A084F813B3AC130099B651 /* Debug */ = { isa = XCBuildConfiguration; buildSettings = { @@ -1617,6 +1822,16 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; + D08A329217B4455100F3A533 /* Build configuration list for PBXNativeTarget "fish_tests" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + D08A328F17B4455100F3A533 /* Debug */, + D08A329017B4455100F3A533 /* Release */, + D08A329117B4455100F3A533 /* Release_C++11 */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; D0A084F513B3AC130099B651 /* Build configuration list for PBXProject "fish" */ = { isa = XCConfigurationList; buildConfigurations = ( From 6a6593335d2311432ca5d05d2081cb30c4fec34f Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 8 Aug 2013 15:06:32 -0700 Subject: [PATCH 023/177] Teach fish_tests about new parsing files in Makefile --- Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index b3e745a6a..c36cbe7d6 100644 --- a/Makefile.in +++ b/Makefile.in @@ -100,7 +100,7 @@ FISH_OBJS := function.o builtin.o complete.o env.o exec.o expand.o \ env_universal.o env_universal_common.o input_common.o event.o \ signal.o io.o parse_util.o common.o screen.o path.o autoload.o \ parser_keywords.o iothread.o color.o postfork.o \ - builtin_test.o parse_tree.o parse_exec.o + builtin_test.o parse_tree.o parse_productions.o parse_exec.o FISH_INDENT_OBJS := fish_indent.o print_help.o common.o \ parser_keywords.o wutil.o tokenizer.o From 8e07e55c1f928ed8a0da6360e00b2b8139594a63 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 8 Aug 2013 15:06:46 -0700 Subject: [PATCH 024/177] More work on new parser --- builtin.cpp | 2 +- common.cpp | 2 +- common.h | 4 +- fish_tests.cpp | 50 ++++- highlight.cpp | 412 ++++++++++++++++++++++++++++++++++++++++++ highlight.h | 1 + parse_productions.cpp | 26 ++- parse_tree.cpp | 200 ++++++++++++++++++-- parse_tree.h | 43 ++++- 9 files changed, 708 insertions(+), 32 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index d2a80a8c4..4ae9e5b51 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -4063,7 +4063,7 @@ int builtin_parse(parser_t &parser, wchar_t **argv) parse_node_tree_t parse_tree; parse_error_list_t errors; parse_t parser; - bool success = parser.parse(src, &parse_tree, &errors); + bool success = parser.parse(src, parse_flag_none, &parse_tree, &errors, true); if (! success) { stdout_buffer.append(L"Parsing failed:\n"); diff --git a/common.cpp b/common.cpp index 7a9f7a514..c9a6b2279 100644 --- a/common.cpp +++ b/common.cpp @@ -507,7 +507,7 @@ const wchar_t *wcsfuncname(const wchar_t *str) } -int wcsvarchr(wchar_t chr) +bool wcsvarchr(wchar_t chr) { return iswalnum(chr) || chr == L'_'; } diff --git a/common.h b/common.h index 57fe7fa1a..abbf12f34 100644 --- a/common.h +++ b/common.h @@ -608,10 +608,10 @@ const wchar_t *wcsfuncname(const wchar_t *str); /** Test if the given string is valid in a variable name - \return 1 if this is a valid name, 0 otherwise + \return true if this is a valid name, false otherwise */ -int wcsvarchr(wchar_t chr); +bool wcsvarchr(wchar_t chr); /** diff --git a/fish_tests.cpp b/fish_tests.cpp index 6ebd3d220..dd16deb99 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -1801,23 +1801,65 @@ void history_tests_t::test_history_speed(void) delete hist; } +static void test_new_parser_correctness(void) +{ + say(L"Testing new parser!"); + const struct parser_test_t + { + const wchar_t *src; + bool ok; + } + parser_tests[] = + { + {L"; ; ; ", true}, + {L"if ; end", false}, + {L"if true ; end", true}, + {L"if true; end ; end", false}, + {L"if end; end ; end", false}, + {L"end", false} + }; + + for (size_t i=0; i < sizeof parser_tests / sizeof *parser_tests; i++) + { + const parser_test_t *test = &parser_tests[i]; + + parse_node_tree_t parse_tree; + parse_t parser; + bool success = parser.parse(test->src, parse_flag_none, &parse_tree, NULL); + say(L"%lu / %lu: Parse \"%ls\": %s", i+1, sizeof parser_tests / sizeof *parser_tests, test->src, success ? "yes" : "no"); + if (success && ! test->ok) + { + err(L"\"%ls\" should NOT have parsed, but did", test->src); + } + else if (! success && test->ok) + { + err(L"\"%ls\" should have parsed, but failed", test->src); + } + } + say(L"Parse tests complete"); + +} + +__attribute__((unused)) static void test_new_parser(void) { say(L"Testing new parser!"); const wcstring src = L"echo hello world"; parse_node_tree_t parse_tree; parse_t parser; - bool success = parser.parse(src, &parse_tree, NULL); + bool success = parser.parse(src, parse_flag_none, &parse_tree, NULL); if (! success) { say(L"Parsing failed"); } else { +#if 0 parse_execution_context_t ctx(parse_tree, src); say(L"Simulating execution:"); wcstring simulation = ctx.simulate(); say(simulation.c_str()); +#endif } } @@ -1827,13 +1869,12 @@ static void test_new_parser(void) int main(int argc, char **argv) { setlocale(LC_ALL, ""); - srand(time(0)); + //srand(time(0)); configure_thread_assertions_for_testing(); program_name=L"(ignore)"; say(L"Testing low-level functionality"); - say(L"Lines beginning with '(ignore):' are not errors, they are warning messages\ngenerated by the fish parser library when given broken input, and can be\nignored. All actual errors begin with 'Error:'."); set_main_thread(); setup_fork_guards(); //proc_init(); @@ -1843,7 +1884,8 @@ int main(int argc, char **argv) reader_init(); env_init(); - test_new_parser(); + test_new_parser_correctness(); + //test_new_parser(); return 0; test_format(); diff --git a/highlight.cpp b/highlight.cpp index 606604386..3c0838902 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -34,6 +34,7 @@ #include "wildcard.h" #include "path.h" #include "history.h" +#include "parse_tree.h" /** Number of elements in the highlight_var array @@ -1307,11 +1308,16 @@ static void tokenize(const wchar_t * const buff, std::vector &color, const } } +void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); // PCA This function does I/O, (calls is_potential_path, path_get_path, maybe others) and so ought to only run on a background thread void highlight_shell(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) { ASSERT_IS_BACKGROUND_THREAD(); + if (1) { + highlight_shell_magic(buff, color, pos, error, vars); + return; + } const size_t length = buff.size(); assert(buff.size() == color.size()); @@ -1440,7 +1446,413 @@ void highlight_shell(const wcstring &buff, std::vector &color, size_t pos, } } +static void color_node(const parse_node_t &node, int color, std::vector &color_array) +{ + // Can only color nodes with valid source ranges + if (! node.has_source()) + return; + + // Fill the color array with our color in the corresponding range + size_t source_end = node.source_start + node.source_length; + assert(source_end >= node.source_start); + assert(source_end <= color_array.size()); + + std::fill(color_array.begin() + node.source_start, color_array.begin() + source_end, color); +} +static void color_argument(const wcstring &buffstr, std::vector::iterator colors, int normal_status) +{ + const size_t buff_len = buffstr.size(); + std::fill(colors, colors + buff_len, normal_status); + + enum {e_unquoted, e_single_quoted, e_double_quoted} mode = e_unquoted; + int bracket_count=0; + for (size_t in_pos=0; in_pos < buff_len; in_pos++) + { + const wchar_t c = buffstr.at(in_pos); + switch (mode) + { + case e_unquoted: + { + if (c == L'\\') + { + int fill_color = HIGHLIGHT_ESCAPE; //may be set to HIGHLIGHT_ERROR + const size_t backslash_pos = in_pos; + size_t fill_end = backslash_pos; + + // Move to the escaped character + in_pos++; + const wchar_t escaped_char = (in_pos < buff_len ? buffstr.at(in_pos) : L'\0'); + + if (escaped_char == L'\0') + { + fill_end = in_pos; + fill_color = HIGHLIGHT_ERROR; + } + else if (wcschr(L"~%", escaped_char)) + { + if (in_pos == 1) + { + fill_end = in_pos + 1; + } + } + else if (escaped_char == L',') + { + if (bracket_count) + { + fill_end = in_pos + 1; + } + } + else if (wcschr(L"abefnrtv*?$(){}[]'\"<>^ \\#;|&", escaped_char)) + { + fill_end = in_pos + 1; + } + else if (wcschr(L"c", escaped_char)) + { + // Like \ci. So highlight three characters + fill_end = in_pos + 1; + } + else if (wcschr(L"uUxX01234567", escaped_char)) + { + long long res=0; + int chars=2; + int base=16; + + wchar_t max_val = ASCII_MAX; + + switch (escaped_char) + { + case L'u': + { + chars=4; + max_val = UCS2_MAX; + in_pos++; + break; + } + + case L'U': + { + chars=8; + max_val = WCHAR_MAX; + in_pos++; + break; + } + + case L'x': + { + in_pos++; + break; + } + + case L'X': + { + max_val = BYTE_MAX; + in_pos++; + break; + } + + default: + { + // a digit like \12 + base=8; + chars=3; + break; + } + } + + // Consume + for (int i=0; i < chars && in_pos < buff_len; i++) + { + long d = convert_digit(buffstr.at(in_pos), base); + if (d < 0) + break; + res = (res * base) + d; + in_pos++; + } + //in_pos is now at the first character that could not be converted (or buff_len) + assert(in_pos >= backslash_pos && in_pos <= buff_len); + fill_end = in_pos; + + // It's an error if we exceeded the max value + if (res > max_val) + fill_color = HIGHLIGHT_ERROR; + + // Subtract one from in_pos, so that the increment in the loop will move to the next character + in_pos--; + } + assert(fill_end >= backslash_pos); + std::fill(colors + backslash_pos, colors + fill_end, fill_color); + } + else + { + // Not a backslash + switch (c) + { + case L'~': + case L'%': + { + if (in_pos == 0) + { + colors[in_pos] = HIGHLIGHT_OPERATOR; + } + break; + } + + case L'$': + { + assert(in_pos < buff_len); + int dollar_color = HIGHLIGHT_ERROR; + if (in_pos + 1 < buff_len) + { + wchar_t next = buffstr.at(in_pos + 1); + if (next == L'$' || wcsvarchr(next)) + dollar_color = HIGHLIGHT_OPERATOR; + } + colors[in_pos] = dollar_color; + break; + } + + + case L'*': + case L'?': + case L'(': + case L')': + { + colors[in_pos] = HIGHLIGHT_OPERATOR; + break; + } + + case L'{': + { + colors[in_pos] = HIGHLIGHT_OPERATOR; + bracket_count++; + break; + } + + case L'}': + { + colors[in_pos] = HIGHLIGHT_OPERATOR; + bracket_count--; + break; + } + + case L',': + { + if (bracket_count > 0) + { + colors[in_pos] = HIGHLIGHT_OPERATOR; + } + + break; + } + + case L'\'': + { + colors[in_pos] = HIGHLIGHT_QUOTE; + mode = e_single_quoted; + break; + } + + case L'\"': + { + colors[in_pos] = HIGHLIGHT_QUOTE; + mode = e_double_quoted; + break; + } + + } + } + break; + } + + /* + Mode 1 means single quoted string, i.e 'foo' + */ + case e_single_quoted: + { + colors[in_pos] = HIGHLIGHT_QUOTE; + if (c == L'\\') + { + // backslash + if (in_pos + 1 < buff_len) + { + const wchar_t escaped_char = buffstr.at(in_pos + 1); + if (escaped_char == L'\\' || escaped_char == L'\'') + { + colors[in_pos] = HIGHLIGHT_ESCAPE; //backslash + colors[in_pos + 1] = HIGHLIGHT_ESCAPE; //escaped char + in_pos += 1; //skip over backslash + } + } + } + else if (c == L'\'') + { + mode = e_unquoted; + } + break; + } + + /* + Mode 2 means double quoted string, i.e. "foo" + */ + case e_double_quoted: + { + colors[in_pos] = HIGHLIGHT_QUOTE; + switch (c) + { + case L'"': + { + mode = e_unquoted; + break; + } + + case L'\\': + { + // backslash + if (in_pos + 1 < buff_len) + { + const wchar_t escaped_char = buffstr.at(in_pos + 1); + if (escaped_char == L'\\' || escaped_char == L'\'' || escaped_char == L'$') + { + colors[in_pos] = HIGHLIGHT_ESCAPE; //backslash + colors[in_pos + 1] = HIGHLIGHT_ESCAPE; //escaped char + in_pos += 1; //skip over backslash + } + } + break; + } + + case L'$': + { + int dollar_color = HIGHLIGHT_ERROR; + if (in_pos + 1 < buff_len) + { + wchar_t next = buffstr.at(in_pos + 1); + if (next == L'$' || wcsvarchr(next)) + dollar_color = HIGHLIGHT_OPERATOR; + } + colors[in_pos] = dollar_color; + break; + } + + } + break; + } + } + } +} + +// Color all of the arguments of the given command +static void color_arguments(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &parent, std::vector &color_array) +{ + const parse_node_tree_t::parse_node_list_t nodes = tree.find_nodes(parent, symbol_argument); + + wcstring param; + for (node_offset_t i=0; i < nodes.size(); i++) + { + const parse_node_t *child = nodes.at(i); + assert(child != NULL && child->type == symbol_argument); + param.assign(src, child->source_start, child->source_length); + color_argument(param, color_array.begin() + child->source_start, HIGHLIGHT_NORMAL); + } +} + +static void color_children(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, int color, std::vector &color_array) +{ + for (node_offset_t idx=0; idx < parent.child_count; idx++) + { + const parse_node_t *child = tree.get_child(parent, idx); + if (child != NULL && child->type == type && child->has_source()) + { + color_node(*child, color, color_array); + } + } +} + +void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) +{ + ASSERT_IS_BACKGROUND_THREAD(); + + const size_t length = buff.size(); + assert(buff.size() == color.size()); + + if (length == 0) + return; + + std::fill(color.begin(), color.end(), -1); + + /* Do something sucky and get the current working directory on this background thread. This should really be passed in. */ + const wcstring working_directory = env_get_pwd_slash(); + + /* Parse the buffer */ + parse_node_tree_t parse_tree; + parse_t parser; + parser.parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL); + + /* Walk the node tree */ + for (parse_node_tree_t::const_iterator iter = parse_tree.begin(); iter != parse_tree.end(); ++iter) + { + const parse_node_t &node = *iter; + + switch (node.type) + { + // Color direct string descendants, e.g. 'for' and 'in'. + case symbol_for_header: + case symbol_while_header: + case symbol_begin_header: + case symbol_function_header: + case symbol_if_clause: + case symbol_else_clause: + case symbol_case_item: + case symbol_switch_statement: + case symbol_boolean_statement: + case symbol_decorated_statement: + color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color); + break; + + case symbol_redirection: + color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_REDIRECTION, color); + break; + + case parse_token_type_background: + case parse_token_type_end: + color_node(node, HIGHLIGHT_END, color); + break; + + case symbol_plain_statement: + { + // Color the command + color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color); + + // Color arguments + const parse_node_t *arguments = parse_tree.get_child(node, 1, symbol_arguments_or_redirections_list); + if (arguments != NULL) + { + color_arguments(buff, parse_tree, *arguments, color); + } + } + break; + + + case symbol_arguments_or_redirections_list: + case symbol_argument_list: + /* Nothing, these are handled by their parents */ + break; + + case parse_special_type_parse_error: + case parse_special_type_tokenizer_error: + color_node(node, HIGHLIGHT_ERROR, color); + break; + + case parse_special_type_comment: + color_node(node, HIGHLIGHT_COMMENT, color); + break; + + default: + break; + } + } +} /** Perform quote and parenthesis highlighting on the specified string. diff --git a/highlight.h b/highlight.h index 6747bba51..ea8557918 100644 --- a/highlight.h +++ b/highlight.h @@ -84,6 +84,7 @@ struct file_detection_context_t; \param error a list in which a description of each error will be inserted. May be 0, in whcich case no error descriptions will be generated. */ void highlight_shell(const wcstring &buffstr, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); +void highlight_shell_magic(const wcstring &buffstr, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); /** Perform syntax highlighting for the text in buff. Matching quotes and paranthesis are highlighted. The result is diff --git a/parse_productions.cpp b/parse_productions.cpp index 61f7636de..b5efa11ca 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -135,14 +135,12 @@ RESOLVE(statement) return 2; case parse_keyword_else: - //symbol_stack_pop(); return NO_PRODUCTION; case parse_keyword_switch: return 3; case parse_keyword_end: - PARSER_DIE(); //todo return NO_PRODUCTION; // 'in' is only special within a for_header @@ -378,7 +376,7 @@ RESOLVE(arguments_or_redirections_list) PRODUCTIONS(argument_or_redirection) = { - {parse_token_type_string}, + {symbol_argument}, {parse_token_type_redirection} }; RESOLVE(argument_or_redirection) @@ -394,6 +392,18 @@ RESOLVE(argument_or_redirection) } } +PRODUCTIONS(argument) = +{ + {parse_token_type_string} +}; +RESOLVE_ONLY(argument) + +PRODUCTIONS(redirection) = +{ + {parse_token_type_redirection} +}; +RESOLVE_ONLY(redirection) + PRODUCTIONS(optional_background) = { {}, @@ -449,6 +459,8 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n TEST(plain_statement) TEST(arguments_or_redirections_list) TEST(argument_or_redirection) + TEST(argument) + TEST(redirection) TEST(optional_background) case parse_token_type_string: @@ -461,6 +473,14 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n PARSER_DIE(); break; + case parse_special_type_parse_error: + case parse_special_type_tokenizer_error: + case parse_special_type_comment: + fprintf(stderr, "Special type %ls passed to %s\n", token_type_description(node_type).c_str(), __FUNCTION__); + PARSER_DIE(); + break; + + case token_type_invalid: fprintf(stderr, "token_type_invalid passed to %s\n", __FUNCTION__); PARSER_DIE(); diff --git a/parse_tree.cpp b/parse_tree.cpp index 7a809167e..bab295042 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -101,6 +101,11 @@ wcstring token_type_description(parse_token_type_t type) return L"arguments_or_redirections_list"; case symbol_argument_or_redirection: return L"argument_or_redirection"; + case symbol_argument: + return L"symbol_argument"; + case symbol_redirection: + return L"symbol_redirection"; + case parse_token_type_string: return L"token_string"; @@ -116,6 +121,14 @@ wcstring token_type_description(parse_token_type_t type) return L"token_terminate"; case symbol_optional_background: return L"optional_background"; + + case parse_special_type_parse_error: + return L"parse_error"; + case parse_special_type_tokenizer_error: + return L"tokenizer_error"; + case parse_special_type_comment: + return L"comment"; + } return format_string(L"Unknown token type %ld", static_cast(type)); } @@ -216,6 +229,14 @@ static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_ case TOK_REDIRECT_NOCLOB: result.type = parse_token_type_redirection; break; + + case TOK_ERROR: + result.type = parse_special_type_tokenizer_error; + break; + + case TOK_COMMENT: + result.type = parse_special_type_comment; + break; default: @@ -248,9 +269,16 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring & } if (node.type == parse_token_type_string) { - result->append(L": \""); - result->append(src, node.source_start, node.source_length); - result->append(L"\""); + if (node.source_start == -1) + { + append_format(*result, L" (no source)"); + } + else + { + result->append(L": \""); + result->append(src, node.source_start, node.source_length); + result->append(L"\""); + } } result->push_back(L'\n'); ++*line; @@ -311,20 +339,24 @@ class parse_ll_t // Constructor parse_ll_t() : fatal_errored(false) { - // initial node - symbol_stack.push_back(parse_stack_element_t(symbol_job_list, 0)); // goal token - nodes.push_back(parse_node_t(symbol_job_list)); + this->reset(); } bool top_node_match_token(parse_token_t token); void accept_token(parse_token_t token, const wcstring &src); + + // Clear the parse symbol stack (but not the node tree). Add a new job_list_t goal node. + void reset(void); void parse_error(const wchar_t *expected, parse_token_t token); void parse_error(parse_token_t token, const wchar_t *format, ...); void append_error_callout(wcstring &error_message, parse_token_t token); void dump_stack(void) const; + + // Figure out the ranges of intermediate nodes + void determine_node_ranges(); // Get the node corresponding to the top element of the stack parse_node_t &node_for_top_symbol() @@ -453,9 +485,41 @@ void parse_ll_t::dump_stack(void) const } } +// Give each node a source range equal to the union of the ranges of its children +// Terminal nodes already have source ranges (and no children) +// Since children always appear after their parents, we can implement this very simply by walking backwards +void parse_ll_t::determine_node_ranges(void) +{ + const size_t source_start_invalid = -1; + size_t idx = nodes.size(); + while (idx--) + { + parse_node_t *parent = &nodes.at(idx); + + // Skip nodes that already have a source range. These are terminal nodes. + if (parent->source_start != source_start_invalid) + continue; + + // Ok, this node needs a source range. Get all of its children, and then set its range. + size_t min_start = source_start_invalid, max_end = 0; //note source_start_invalid is huge + for (node_offset_t i=0; i < parent->child_count; i++) + { + const parse_node_t &child = nodes.at(parent->child_offset(i)); + min_start = std::min(min_start, child.source_start); + max_end = std::max(max_end, child.source_start + child.source_length); + } + + if (min_start != source_start_invalid) { + assert(max_end >= min_start); + parent->source_start = min_start; + parent->source_length = max_end - min_start; + } + } +} + void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...) { - this->dump_stack(); + //this->dump_stack(); parse_error_t err; va_list va; @@ -481,8 +545,27 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) fatal_errored = true; } +void parse_ll_t::reset(void) +{ + // add a new job_list node and then reset our symbol list to point at it + node_offset_t where = nodes.size(); + nodes.push_back(parse_node_t(symbol_job_list)); + + symbol_stack.clear(); + symbol_stack.push_back(parse_stack_element_t(symbol_job_list, where)); // goal token + this->fatal_errored = false; +} + + bool parse_ll_t::top_node_match_token(parse_token_t token) { + if (symbol_stack.empty()) + { + // This can come about with an unbalanced 'end' or 'else', which causes us to terminate the outermost job list. + this->fatal_errored = true; + return false; + } + PARSE_ASSERT(! symbol_stack.empty()); PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); bool result = false; @@ -520,10 +603,23 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) fprintf(stderr, "Accept token %ls\n", token.describe().c_str()); } PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); - PARSE_ASSERT(! symbol_stack.empty()); + bool consumed = false; + + // Handle special types specially. Note that these are the only types that can be pushed if the symbol stack is empty. + if (token.type == parse_special_type_parse_error || token.type == parse_special_type_tokenizer_error || token.type == parse_special_type_comment) + { + parse_node_t err_node(token.type); + err_node.source_start = token.source_start; + err_node.source_length = token.source_length; + nodes.push_back(err_node); + consumed = true; + } + while (! consumed && ! this->fatal_errored) { + PARSE_ASSERT(! symbol_stack.empty()); + if (top_node_match_token(token)) { if (logit) @@ -534,6 +630,10 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) break; } + // top_node_match_token may indicate an error if our stack is empty + if (this->fatal_errored) + break; + // Get the production for the top of the stack parse_stack_element_t &stack_elem = symbol_stack.back(); parse_node_t &node = nodes.at(stack_elem.node_idx); @@ -548,6 +648,12 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) // Manipulate the symbol stack. // Note that stack_elem is invalidated by popping the stack. symbol_stack_pop_push_production(production); + + // If we end up with an empty stack, something bad happened, like an unbalanced end + if (symbol_stack.empty()) + { + this->parse_error(token, L"All symbols removed from symbol stack. Likely unbalanced else or end?", stack_elem.describe().c_str(), token.describe().c_str()); + } } } } @@ -556,6 +662,11 @@ parse_t::parse_t() : parser(new parse_ll_t()) { } +parse_t::~parse_t() +{ + delete parser; +} + static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) { parse_keyword_t result = parse_keyword_none; @@ -597,21 +708,20 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) return result; } -bool parse_t::parse(const wcstring &str, parse_node_tree_t *output, parse_error_list_t *errors) +bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it) { - tokenizer_t tok = tokenizer_t(str.c_str(), 0); + tok_flags_t tok_options = TOK_SQUASH_ERRORS; + if (parse_flags & parse_flag_include_comments) + tok_options |= TOK_SHOW_COMMENTS; + + tokenizer_t tok = tokenizer_t(str.c_str(), tok_options); for (; tok_has_next(&tok) && ! this->parser->fatal_errored; tok_next(&tok)) { token_type tok_type = static_cast(tok_last_type(&tok)); const wchar_t *tok_txt = tok_last(&tok); int tok_start = tok_get_pos(&tok); size_t tok_extent = tok_get_extent(&tok); - - if (tok_type == TOK_ERROR) - { - fprintf(stderr, "Tokenizer error\n"); - break; - } + assert(tok_extent < 10000000); //paranoia parse_token_t token = parse_token_from_tokenizer_token(tok_type); token.tokenizer_type = tok_type; @@ -621,12 +731,31 @@ bool parse_t::parse(const wcstring &str, parse_node_tree_t *output, parse_error_ this->parser->accept_token(token, str); if (this->parser->fatal_errored) - break; + { + if (parse_flags & parse_flag_continue_after_error) + { + /* Mark an error and then keep going */ + token.type = parse_special_type_parse_error; + token.keyword = parse_keyword_none; + this->parser->accept_token(token, str); + this->parser->reset(); + } + else + { + /* Bail out */ + break; + } + } } + // Teach each node where its source range is + this->parser->determine_node_ranges(); + +#if 0 wcstring result = dump_tree(this->parser->nodes, str); fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t)); +#endif if (output != NULL) { @@ -642,3 +771,40 @@ bool parse_t::parse(const wcstring &str, parse_node_tree_t *output, parse_error_ return ! this->parser->fatal_errored; } + +const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) const +{ + const parse_node_t *result = NULL; + PARSE_ASSERT(which < parent.child_count); + node_offset_t child_offset = parent.child_offset(which); + if (child_offset < this->size()) + { + result = &this->at(child_offset); + } + + // If we are given an expected type, then the node must be null or that type + if (result != NULL) + { + assert(expected_type == token_type_invalid || expected_type == result->type); + } + + return result; +} + +static void find_nodes_recursive(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, parse_node_tree_t::parse_node_list_t *result) +{ + if (parent.type == type) result->push_back(&parent); + for (size_t i=0; i < parent.child_count; i++) + { + const parse_node_t *child = tree.get_child(parent, i); + assert(child != NULL); + find_nodes_recursive(tree, *child, type, result); + } +} + +parse_node_tree_t::parse_node_list_t parse_node_tree_t::find_nodes(const parse_node_t &parent, parse_token_type_t type) const +{ + parse_node_list_t result; + find_nodes_recursive(*this, parent, type, &result); + return result; +} diff --git a/parse_tree.h b/parse_tree.h index c53864258..25b63a0ca 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -15,7 +15,7 @@ #include #define PARSE_ASSERT(a) assert(a) -#define PARSER_DIE() exit_without_destructors(-1) +#define PARSER_DIE() do { fprintf(stderr, "Parser dying!\n"); exit_without_destructors(-1); } while (0) class parse_node_t; class parse_node_tree_t; @@ -36,6 +36,18 @@ struct parse_error_t }; typedef std::vector parse_error_list_t; +enum +{ + parse_flag_none = 0, + + /* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */ + parse_flag_continue_after_error = 1 << 0, + + /* Include comment tokens */ + parse_flag_include_comments = 1 << 1 +}; +typedef unsigned int parse_tree_flags_t; + class parse_ll_t; class parse_t { @@ -43,7 +55,8 @@ class parse_t public: parse_t(); - bool parse(const wcstring &str, parse_node_tree_t *output, parse_error_list_t *errors); + ~parse_t(); + bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false); }; enum parse_token_type_t @@ -80,6 +93,9 @@ enum parse_token_type_t symbol_argument_list_nonempty, symbol_argument_list, + symbol_argument, + symbol_redirection, + symbol_optional_background, // Terminal types @@ -90,6 +106,11 @@ enum parse_token_type_t parse_token_type_end, parse_token_type_terminate, + // Very special terminal types that don't appear in the production list + parse_special_type_parse_error, + parse_special_type_tokenizer_error, + parse_special_type_comment, + LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate, FIRST_PARSE_TOKEN_TYPE = parse_token_type_string }; @@ -145,7 +166,7 @@ class parse_node_t wcstring describe(void) const; /* Constructor */ - explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0), child_start(0), child_count(0), tag(0) + explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(-1), source_length(0), child_start(0), child_count(0), tag(0) { } @@ -154,10 +175,23 @@ class parse_node_t PARSE_ASSERT(which < child_count); return child_start + which; } + + bool has_source() const + { + return source_start != (size_t)(-1); + } }; class parse_node_tree_t : public std::vector { + public: + + /* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. */ + const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; + + /* Find all the nodes of a given type underneath a given node */ + typedef std::vector parse_node_list_t; + parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const; }; @@ -214,7 +248,8 @@ class parse_node_tree_t : public std::vector arguments_or_redirections_list = | argument_or_redirection arguments_or_redirections_list - argument_or_redirection = redirection | + argument_or_redirection = argument | redirection + argument = redirection = terminator = | From e58b73179f4727c79465c6f273aef377b9bb8bee Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 11 Aug 2013 00:35:00 -0700 Subject: [PATCH 025/177] More work on new parser --- builtin.cpp | 5 +- fish_tests.cpp | 192 ++++++++++++++++++++++++- highlight.cpp | 61 +++++--- parse_productions.cpp | 80 ++++++----- parse_tree.cpp | 320 +++++++++++++++++++++++++++--------------- parse_tree.h | 89 +++++++----- 6 files changed, 532 insertions(+), 215 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index 4ae9e5b51..90fb099bb 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -4075,7 +4075,10 @@ int builtin_parse(parser_t &parser, wchar_t **argv) } else { - if (0) { + const wcstring dump = parse_dump_tree(parse_tree, src); + fprintf(stderr, "%ls", dump.c_str()); + if (0) + { parse_execution_context_t ctx(parse_tree, src); parse_execution_simulator_t sim; sim.context = &ctx; diff --git a/fish_tests.cpp b/fish_tests.cpp index dd16deb99..9d8f2b803 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -1816,13 +1816,16 @@ static void test_new_parser_correctness(void) {L"if true ; end", true}, {L"if true; end ; end", false}, {L"if end; end ; end", false}, - {L"end", false} + {L"if end", false}, + {L"end", false}, + {L"for i i", false}, + {L"for i in a b c ; end", true} }; - + for (size_t i=0; i < sizeof parser_tests / sizeof *parser_tests; i++) { const parser_test_t *test = &parser_tests[i]; - + parse_node_tree_t parse_tree; parse_t parser; bool success = parser.parse(test->src, parse_flag_none, &parse_tree, NULL); @@ -1837,7 +1840,87 @@ static void test_new_parser_correctness(void) } } say(L"Parse tests complete"); +} +struct parser_fuzz_token_t +{ + parse_token_type_t token_type; + parse_keyword_t keyword; + + parser_fuzz_token_t() : token_type(FIRST_TERMINAL_TYPE), keyword(parse_keyword_none) + { + } +}; + +static bool increment(std::vector &tokens) +{ + size_t i, end = tokens.size(); + for (i=0; i < end; i++) + { + bool wrapped = false; + + struct parser_fuzz_token_t &token = tokens[i]; + bool incremented_in_keyword = false; + if (token.token_type == parse_token_type_string) + { + // try incrementing the keyword + token.keyword++; + if (token.keyword <= LAST_KEYWORD) + { + incremented_in_keyword = true; + } + else + { + token.keyword = parse_keyword_none; + incremented_in_keyword = false; + } + } + + if (! incremented_in_keyword) + { + token.token_type++; + if (token.token_type > LAST_TERMINAL_TYPE) + { + token.token_type = FIRST_TERMINAL_TYPE; + wrapped = true; + } + } + + if (! wrapped) + { + break; + } + } + return i == end; +} + +static void test_new_parser_fuzzing(void) +{ + say(L"Fuzzing parser (node size: %lu)", sizeof(parse_node_t)); + double start = timef(); + // ensure nothing crashes + size_t max = 5; + for (size_t len=1; len <= max; len++) + { + fprintf(stderr, "%lu / %lu\n", len, max); + std::vector tokens(len); + do + { + parse_t parser; + parse_node_tree_t parse_tree; + parse_error_list_t errors; + for (size_t i=0; i < len; i++) + { + const parser_fuzz_token_t &token = tokens[i]; + parser.parse_1_token(token.token_type, token.keyword, &parse_tree, &errors); + } + + // keep going until we wrap + } + while (! increment(tokens)); + } + double end = timef(); + say(L"All fuzzed in %f seconds!", end - start); } __attribute__((unused)) @@ -1863,6 +1946,104 @@ static void test_new_parser(void) } } +static void test_highlighting(void) +{ + say(L"Testing syntax highlighting"); + if (system("mkdir -p /tmp/fish_highlight_test/")) err(L"mkdir failed"); + if (system("touch /tmp/fish_highlight_test/foo")) err(L"touch failed"); + if (system("touch /tmp/fish_highlight_test/bar")) err(L"touch failed"); + + // Here are the components of our source and the colors we expect those to be + struct highlight_component_t { + const wchar_t *txt; + int color; + }; + + const highlight_component_t components1[] = + { + {L"echo", HIGHLIGHT_COMMAND}, + {L"/tmp/fish_highlight_test/foo", HIGHLIGHT_PARAM | HIGHLIGHT_VALID_PATH}, + {L"&", HIGHLIGHT_END}, + {NULL, -1} + }; + + const highlight_component_t components2[] = + { + {L"command", HIGHLIGHT_COMMAND}, + {L"echo", HIGHLIGHT_COMMAND}, + {L"abc", HIGHLIGHT_PARAM}, + {L"/tmp/fish_highlight_test/foo", HIGHLIGHT_PARAM | HIGHLIGHT_VALID_PATH}, + {L"&", HIGHLIGHT_END}, + {NULL, -1} + }; + + const highlight_component_t components3[] = + { + {L"if command ls", HIGHLIGHT_COMMAND}, + {L"; ", HIGHLIGHT_END}, + {L"echo", HIGHLIGHT_COMMAND}, + {L"abc", HIGHLIGHT_PARAM}, + {L"; ", HIGHLIGHT_END}, + {L"/bin/definitely_not_a_command", HIGHLIGHT_ERROR}, + {L"; ", HIGHLIGHT_END}, + {L"end", HIGHLIGHT_COMMAND}, + {NULL, -1} + }; + + const highlight_component_t *tests[] = {components1, components2, components3}; + for (size_t which = 0; which < sizeof tests / sizeof *tests; which++) + { + const highlight_component_t *components = tests[which]; + // Count how many we have + size_t component_count = 0; + while (components[component_count].txt != NULL) + { + component_count++; + } + + // Generate the text + wcstring text; + std::vector expected_colors; + for (size_t i=0; i < component_count; i++) + { + if (i > 0) + { + text.push_back(L' '); + expected_colors.push_back(0); + } + text.append(components[i].txt); + + // hackish space handling + const size_t text_len = wcslen(components[i].txt); + for (size_t j=0; j < text_len; j++) + { + bool is_space = (components[i].txt[j] == L' '); + expected_colors.push_back(is_space ? 0 : components[i].color); + } + } + assert(expected_colors.size() == text.size()); + + std::vector colors(text.size()); + highlight_shell(text, colors, 20, NULL, env_vars_snapshot_t()); + + if (expected_colors.size() != colors.size()) + { + err(L"Color vector has wrong size! Expected %lu, actual %lu", expected_colors.size(), colors.size()); + } + assert(expected_colors.size() == colors.size()); + for (size_t i=0; i < text.size(); i++) + { + if (expected_colors.at(i) != colors.at(i)) + { + const wcstring spaces(i, L' '); + err(L"Wrong color at index %lu in text (expected %d, actual %d):\n%ls\n%ls^", i, expected_colors.at(i), colors.at(i), text.c_str(), spaces.c_str()); + } + } + } + + system("rm -Rf /tmp/fish_highlight_test"); +} + /** Main test */ @@ -1884,9 +2065,10 @@ int main(int argc, char **argv) reader_init(); env_init(); - test_new_parser_correctness(); + //test_new_parser_fuzzing(); + //test_new_parser_correctness(); + //test_highlighting(); //test_new_parser(); - return 0; test_format(); test_escape(); diff --git a/highlight.cpp b/highlight.cpp index 3c0838902..f2a7d6e0c 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -1314,7 +1314,8 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t void highlight_shell(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) { ASSERT_IS_BACKGROUND_THREAD(); - if (1) { + if (0) + { highlight_shell_magic(buff, color, pos, error, vars); return; } @@ -1451,12 +1452,12 @@ static void color_node(const parse_node_t &node, int color, std::vector &co // Can only color nodes with valid source ranges if (! node.has_source()) return; - + // Fill the color array with our color in the corresponding range size_t source_end = node.source_start + node.source_length; assert(source_end >= node.source_start); assert(source_end <= color_array.size()); - + std::fill(color_array.begin() + node.source_start, color_array.begin() + source_end, color); } @@ -1464,7 +1465,7 @@ static void color_argument(const wcstring &buffstr, std::vector::iterator c { const size_t buff_len = buffstr.size(); std::fill(colors, colors + buff_len, normal_status); - + enum {e_unquoted, e_single_quoted, e_double_quoted} mode = e_unquoted; int bracket_count=0; for (size_t in_pos=0; in_pos < buff_len; in_pos++) @@ -1479,11 +1480,11 @@ static void color_argument(const wcstring &buffstr, std::vector::iterator c int fill_color = HIGHLIGHT_ESCAPE; //may be set to HIGHLIGHT_ERROR const size_t backslash_pos = in_pos; size_t fill_end = backslash_pos; - + // Move to the escaped character in_pos++; const wchar_t escaped_char = (in_pos < buff_len ? buffstr.at(in_pos) : L'\0'); - + if (escaped_char == L'\0') { fill_end = in_pos; @@ -1559,7 +1560,7 @@ static void color_argument(const wcstring &buffstr, std::vector::iterator c break; } } - + // Consume for (int i=0; i < chars && in_pos < buff_len; i++) { @@ -1572,11 +1573,11 @@ static void color_argument(const wcstring &buffstr, std::vector::iterator c //in_pos is now at the first character that could not be converted (or buff_len) assert(in_pos >= backslash_pos && in_pos <= buff_len); fill_end = in_pos; - + // It's an error if we exceeded the max value if (res > max_val) fill_color = HIGHLIGHT_ERROR; - + // Subtract one from in_pos, so that the increment in the loop will move to the next character in_pos--; } @@ -1746,7 +1747,7 @@ static void color_argument(const wcstring &buffstr, std::vector::iterator c static void color_arguments(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &parent, std::vector &color_array) { const parse_node_tree_t::parse_node_list_t nodes = tree.find_nodes(parent, symbol_argument); - + wcstring param; for (node_offset_t i=0; i < nodes.size(); i++) { @@ -1783,20 +1784,20 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t /* Do something sucky and get the current working directory on this background thread. This should really be passed in. */ const wcstring working_directory = env_get_pwd_slash(); - + /* Parse the buffer */ parse_node_tree_t parse_tree; parse_t parser; parser.parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL); - + /* Walk the node tree */ for (parse_node_tree_t::const_iterator iter = parse_tree.begin(); iter != parse_tree.end(); ++iter) { const parse_node_t &node = *iter; - + switch (node.type) { - // Color direct string descendants, e.g. 'for' and 'in'. + // Color direct string descendants, e.g. 'for' and 'in'. case symbol_for_header: case symbol_while_header: case symbol_begin_header: @@ -1809,21 +1810,35 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t case symbol_decorated_statement: color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color); break; - + + case symbol_if_statement: + { + // Color the 'end' + color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color); + + // Color arguments and redirections + const parse_node_t *arguments = parse_tree.get_child(node, 3, symbol_arguments_or_redirections_list); + if (arguments != NULL) + { + color_arguments(buff, parse_tree, *arguments, color); + } + } + break; + case symbol_redirection: color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_REDIRECTION, color); break; - + case parse_token_type_background: case parse_token_type_end: color_node(node, HIGHLIGHT_END, color); break; - + case symbol_plain_statement: { // Color the command color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color); - + // Color arguments const parse_node_t *arguments = parse_tree.get_child(node, 1, symbol_arguments_or_redirections_list); if (arguments != NULL) @@ -1832,22 +1847,22 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t } } break; - - + + case symbol_arguments_or_redirections_list: case symbol_argument_list: /* Nothing, these are handled by their parents */ break; - + case parse_special_type_parse_error: case parse_special_type_tokenizer_error: color_node(node, HIGHLIGHT_ERROR, color); break; - + case parse_special_type_comment: color_node(node, HIGHLIGHT_COMMENT, color); break; - + default: break; } diff --git a/parse_productions.cpp b/parse_productions.cpp index b5efa11ca..3165a2f0d 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -13,7 +13,7 @@ static bool production_is_valid(const production_options_t production_list, prod { if (which < 0 || which >= MAX_PRODUCTIONS) return false; - + bool nonempty_found = false; for (int i=which; i < MAX_PRODUCTIONS; i++) { @@ -249,8 +249,10 @@ RESOLVE(argument_list) { switch (token_type) { - case parse_token_type_string: return 1; - default: return 0; + case parse_token_type_string: + return 1; + default: + return 0; } } @@ -429,40 +431,40 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n { fprintf(stderr, "Resolving production for %ls with input type %ls <%ls>\n", token_type_description(node_type).c_str(), token_type_description(input_type).c_str(), keyword_description(input_keyword).c_str()); } - + /* Fetch the list of productions and the function to resolve them */ const production_options_t *production_list = NULL; production_option_idx_t (*resolver)(parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) = NULL; switch (node_type) { - TEST(job_list) - TEST(job) - TEST(statement) - TEST(job_continuation) - TEST(boolean_statement) - TEST(block_statement) - TEST(if_statement) - TEST(if_clause) - TEST(else_clause) - TEST(else_continuation) - TEST(switch_statement) - TEST(decorated_statement) - TEST(case_item_list) - TEST(case_item) - TEST(argument_list_nonempty) - TEST(argument_list) - TEST(block_header) - TEST(for_header) - TEST(while_header) - TEST(begin_header) - TEST(function_header) - TEST(plain_statement) - TEST(arguments_or_redirections_list) - TEST(argument_or_redirection) - TEST(argument) - TEST(redirection) - TEST(optional_background) - + TEST(job_list) + TEST(job) + TEST(statement) + TEST(job_continuation) + TEST(boolean_statement) + TEST(block_statement) + TEST(if_statement) + TEST(if_clause) + TEST(else_clause) + TEST(else_continuation) + TEST(switch_statement) + TEST(decorated_statement) + TEST(case_item_list) + TEST(case_item) + TEST(argument_list_nonempty) + TEST(argument_list) + TEST(block_header) + TEST(for_header) + TEST(while_header) + TEST(begin_header) + TEST(function_header) + TEST(plain_statement) + TEST(arguments_or_redirections_list) + TEST(argument_or_redirection) + TEST(argument) + TEST(redirection) + TEST(optional_background) + case parse_token_type_string: case parse_token_type_pipe: case parse_token_type_redirection: @@ -472,33 +474,33 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n fprintf(stderr, "Terminal token type %ls passed to %s\n", token_type_description(node_type).c_str(), __FUNCTION__); PARSER_DIE(); break; - + case parse_special_type_parse_error: case parse_special_type_tokenizer_error: case parse_special_type_comment: fprintf(stderr, "Special type %ls passed to %s\n", token_type_description(node_type).c_str(), __FUNCTION__); PARSER_DIE(); break; - - + + case token_type_invalid: fprintf(stderr, "token_type_invalid passed to %s\n", __FUNCTION__); PARSER_DIE(); break; - + } PARSE_ASSERT(production_list != NULL); PARSE_ASSERT(resolver != NULL); - + const production_t *result = NULL; production_option_idx_t which = resolver(input_type, input_keyword, out_tag); - + if (log_it) { fprintf(stderr, "\tresolved to %u\n", (unsigned)which); } - + if (which == NO_PRODUCTION) { if (log_it) diff --git a/parse_tree.cpp b/parse_tree.cpp index bab295042..b64c6d9f3 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -4,6 +4,7 @@ using namespace parse_productions; +/** Returns a string description of this parse error */ wcstring parse_error_t::describe(const wcstring &src) const { wcstring result = text; @@ -41,6 +42,7 @@ wcstring parse_error_t::describe(const wcstring &src) const return result; } +/** Returns a string description of the given token type */ wcstring token_type_description(parse_token_type_t type) { switch (type) @@ -121,7 +123,7 @@ wcstring token_type_description(parse_token_type_t type) return L"token_terminate"; case symbol_optional_background: return L"optional_background"; - + case parse_special_type_parse_error: return L"parse_error"; case parse_special_type_tokenizer_error: @@ -172,70 +174,68 @@ wcstring keyword_description(parse_keyword_t k) } } +/** Returns a string description of the given parse node */ wcstring parse_node_t::describe(void) const { wcstring result = token_type_description(type); return result; } +/** A struct representing the token type passed to */ struct parse_token_t { enum parse_token_type_t type; // The type of the token as represented by the parser - enum token_type tokenizer_type; // The type of the token as represented by the tokenizer enum parse_keyword_t keyword; // Any keyword represented by this parser size_t source_start; size_t source_length; - wcstring describe() const; + wcstring describe() const + { + wcstring result = token_type_description(type); + if (keyword != parse_keyword_none) + { + append_format(result, L" <%ls>", keyword_description(keyword).c_str()); + } + return result; + } }; -wcstring parse_token_t::describe(void) const +/* Convert from tokenizer_t's token type to a parse_token_t type */ +static parse_token_type_t parse_token_type_from_tokenizer_token(enum token_type tokenizer_token_type) { - wcstring result = token_type_description(type); - if (keyword != parse_keyword_none) - { - append_format(result, L" <%ls>", keyword_description(keyword).c_str()); - } - return result; -} - -// Convert from tokenizer_t's token type to our token -static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_token_type) -{ - parse_token_t result = {}; - result.tokenizer_type = tokenizer_token_type; + parse_token_type_t result = token_type_invalid; switch (tokenizer_token_type) { case TOK_STRING: - result.type = parse_token_type_string; + result = parse_token_type_string; break; case TOK_PIPE: - result.type = parse_token_type_pipe; + result = parse_token_type_pipe; break; case TOK_END: - result.type = parse_token_type_end; + result = parse_token_type_end; break; case TOK_BACKGROUND: - result.type = parse_token_type_background; + result = parse_token_type_background; break; - + case TOK_REDIRECT_OUT: case TOK_REDIRECT_APPEND: case TOK_REDIRECT_IN: case TOK_REDIRECT_FD: case TOK_REDIRECT_NOCLOB: - result.type = parse_token_type_redirection; + result = parse_token_type_redirection; break; - + case TOK_ERROR: - result.type = parse_special_type_tokenizer_error; + result = parse_special_type_tokenizer_error; break; - + case TOK_COMMENT: - result.type = parse_special_type_comment; + result = parse_special_type_comment; break; @@ -247,6 +247,7 @@ static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_ return result; } +/* Helper function for dump_tree */ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, size_t start, size_t indent, wcstring *result, size_t *line) { assert(start < nodes.size()); @@ -288,8 +289,8 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring & } } -__attribute__((unused)) -static wcstring dump_tree(const parse_node_tree_t &nodes, const wcstring &src) +/* Gives a debugging textual description of a parse tree */ +wcstring parse_dump_tree(const parse_node_tree_t &nodes, const wcstring &src) { if (nodes.empty()) return L"(empty!)"; @@ -300,6 +301,7 @@ static wcstring dump_tree(const parse_node_tree_t &nodes, const wcstring &src) return result; } +/* Struct representing elements of the symbol stack, used in the internal state of the LL parser */ struct parse_stack_element_t { enum parse_token_type_t type; @@ -309,7 +311,7 @@ struct parse_stack_element_t explicit parse_stack_element_t(parse_token_type_t t, node_offset_t idx) : type(t), keyword(parse_keyword_none), node_idx(idx) { } - + explicit parse_stack_element_t(production_element_t e, node_offset_t idx) : type(production_element_type(e)), keyword(production_element_keyword(e)), node_idx(idx) { } @@ -323,40 +325,31 @@ struct parse_stack_element_t } return result; } - }; +/* The parser itself, private implementation of class parse_t. This is a hand-coded table-driven LL parser. Most hand-coded LL parsers are recursive descent, but recursive descent parsers are difficult to "pause", unlike table-driven parsers. */ class parse_ll_t { - friend class parse_t; - - std::vector symbol_stack; // LL parser stack + /* Traditional symbol stack of the LL parser */ + std::vector symbol_stack; + + /* Parser output. This is a parse tree, but stored in an array. */ parse_node_tree_t nodes; + /* Whether we ran into a fatal error, including parse errors or tokenizer errors */ bool fatal_errored; + + /* List of errors we have encountered */ parse_error_list_t errors; - // Constructor - parse_ll_t() : fatal_errored(false) - { - this->reset(); - } - - bool top_node_match_token(parse_token_t token); - - void accept_token(parse_token_t token, const wcstring &src); - - // Clear the parse symbol stack (but not the node tree). Add a new job_list_t goal node. - void reset(void); + /* The symbol stack can contain terminal types or symbols. Symbols go on to do productions, but terminal types are just matched against input tokens. */ + bool top_node_handle_terminal_types(parse_token_t token); void parse_error(const wchar_t *expected, parse_token_t token); void parse_error(parse_token_t token, const wchar_t *format, ...); void append_error_callout(wcstring &error_message, parse_token_t token); void dump_stack(void) const; - - // Figure out the ranges of intermediate nodes - void determine_node_ranges(); // Get the node corresponding to the top element of the stack parse_node_t &node_for_top_symbol() @@ -413,33 +406,38 @@ class parse_ll_t if (! count) fprintf(stderr, "\t\n"); } - + // Add the children. Confusingly, we want our nodes to be in forwards order (last token last, so dumps look nice), but the symbols should be reverse order (last token first, so it's lowest on the stack) const size_t child_start = nodes.size(); size_t child_count = 0; for (size_t i=0; i < MAX_SYMBOLS_PER_PRODUCTION; i++) { production_element_t elem = (*production)[i]; - if (production_element_is_valid(elem)) + if (!production_element_is_valid(elem)) + { + // All done, bail out + break; + } + else { // Generate the parse node. Note that this push_back may invalidate node. - parse_token_type_t child_type = production_element_type(elem); - nodes.push_back(parse_node_t(child_type)); - child_count++; + parse_token_type_t child_type = production_element_type(elem); + nodes.push_back(parse_node_t(child_type)); + child_count++; } } - + // Update the parent const size_t parent_node_idx = symbol_stack.back().node_idx; parse_node_t &parent_node = nodes.at(parent_node_idx); - + // Should have no children yet PARSE_ASSERT(parent_node.child_count == 0); // Tell the node about its children parent_node.child_start = child_start; parent_node.child_count = child_count; - + // Replace the top of the stack with new stack elements corresponding to our new nodes. Note that these go in reverse order. symbol_stack.pop_back(); symbol_stack.reserve(symbol_stack.size() + child_count); @@ -452,6 +450,36 @@ class parse_ll_t } } + public: + + /* Constructor */ + parse_ll_t() : fatal_errored(false) + { + this->symbol_stack.reserve(16); + this->nodes.reserve(64); + this->reset_symbols_and_nodes(); + } + + /* Input */ + void accept_token(parse_token_t token); + + /* Indicate if we hit a fatal error */ + bool has_fatal_error(void) const + { + return this->fatal_errored; + } + + /* Clear the parse symbol stack (but not the node tree). Add a new job_list_t goal node. This is called from the constructor */ + void reset_symbols(void); + + /* Clear the parse symbol stack and the node tree. Add a new job_list_t goal node. This is called from the constructor. */ + void reset_symbols_and_nodes(void); + + /* Once parsing is complete, determine the ranges of intermediate nodes */ + void determine_node_ranges(); + + /* Acquire output after parsing. This transfers directly from within self */ + void acquire_output(parse_node_tree_t *output, parse_error_list_t *errors); }; void parse_ll_t::dump_stack(void) const @@ -495,11 +523,11 @@ void parse_ll_t::determine_node_ranges(void) while (idx--) { parse_node_t *parent = &nodes.at(idx); - + // Skip nodes that already have a source range. These are terminal nodes. if (parent->source_start != source_start_invalid) continue; - + // Ok, this node needs a source range. Get all of its children, and then set its range. size_t min_start = source_start_invalid, max_end = 0; //note source_start_invalid is huge for (node_offset_t i=0; i < parent->child_count; i++) @@ -508,8 +536,9 @@ void parse_ll_t::determine_node_ranges(void) min_start = std::min(min_start, child.source_start); max_end = std::max(max_end, child.source_start + child.source_length); } - - if (min_start != source_start_invalid) { + + if (min_start != source_start_invalid) + { assert(max_end >= min_start); parent->source_start = min_start; parent->source_length = max_end - min_start; @@ -517,11 +546,27 @@ void parse_ll_t::determine_node_ranges(void) } } +void parse_ll_t::acquire_output(parse_node_tree_t *output, parse_error_list_t *errors) +{ + if (output != NULL) + { + std::swap(*output, this->nodes); + } + this->nodes.clear(); + + if (errors != NULL) + { + std::swap(*errors, this->errors); + } + this->errors.clear(); + this->symbol_stack.clear(); +} + void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...) { //this->dump_stack(); parse_error_t err; - + va_list va; va_start(va, fmt); err.text = vformat_string(fmt, va); @@ -545,19 +590,42 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) fatal_errored = true; } -void parse_ll_t::reset(void) +void parse_ll_t::reset_symbols(void) { - // add a new job_list node and then reset our symbol list to point at it + /* Add a new job_list node, and then reset our symbol list to point at it */ node_offset_t where = nodes.size(); nodes.push_back(parse_node_t(symbol_job_list)); - + symbol_stack.clear(); symbol_stack.push_back(parse_stack_element_t(symbol_job_list, where)); // goal token this->fatal_errored = false; } +/* Reset both symbols and nodes */ +void parse_ll_t::reset_symbols_and_nodes(void) +{ + nodes.clear(); + this->reset_symbols(); +} -bool parse_ll_t::top_node_match_token(parse_token_t token) +static bool type_is_terminal_type(parse_token_type_t type) +{ + switch (type) + { + case parse_token_type_string: + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_type_background: + case parse_token_type_end: + case parse_token_type_terminate: + return true; + + default: + return false; + } +} + +bool parse_ll_t::top_node_handle_terminal_types(parse_token_t token) { if (symbol_stack.empty()) { @@ -565,47 +633,64 @@ bool parse_ll_t::top_node_match_token(parse_token_t token) this->fatal_errored = true; return false; } - + PARSE_ASSERT(! symbol_stack.empty()); PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); - bool result = false; + bool handled = false; parse_stack_element_t &stack_top = symbol_stack.back(); - if (stack_top.type == token.type) + if (type_is_terminal_type(stack_top.type)) { - // So far so good. See if we need a particular keyword. - if (stack_top.keyword == parse_keyword_none || stack_top.keyword == token.keyword) + // The top of the stack is terminal. We are going to handle this (because we can't produce from a terminal type) + handled = true; + + // Now see if we actually matched + bool matched = false; + if (stack_top.type == token.type) + { + switch (stack_top.type) + { + case parse_token_type_string: + // We matched if the keywords match, or no keyword was required + matched = (stack_top.keyword == parse_keyword_none || stack_top.keyword == token.keyword); + break; + + default: + // For other types, we only require that the types match + matched = true; + break; + } + } + + if (matched) { // Success. Tell the node that it matched this token parse_node_t &node = node_for_top_symbol(); node.source_start = token.source_start; node.source_length = token.source_length; - - // We consumed this symbol - symbol_stack.pop_back(); - result = true; } - else if (token.type == parse_token_type_pipe) + else { - // Pipes are primitive - symbol_stack.pop_back(); - result = true; + // Failure + this->fatal_errored = true; } + + // We handled the token, so pop the symbol stack + symbol_stack.pop_back(); } - return result; + return handled; } -void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) +void parse_ll_t::accept_token(parse_token_t token) { bool logit = false; if (logit) { - const wcstring txt = wcstring(src, token.source_start, token.source_length); fprintf(stderr, "Accept token %ls\n", token.describe().c_str()); } PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); - + bool consumed = false; - + // Handle special types specially. Note that these are the only types that can be pushed if the symbol stack is empty. if (token.type == parse_special_type_parse_error || token.type == parse_special_type_tokenizer_error || token.type == parse_special_type_comment) { @@ -619,8 +704,8 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) while (! consumed && ! this->fatal_errored) { PARSE_ASSERT(! symbol_stack.empty()); - - if (top_node_match_token(token)) + + if (top_node_handle_terminal_types(token)) { if (logit) { @@ -629,11 +714,11 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) consumed = true; break; } - + // top_node_match_token may indicate an error if our stack is empty if (this->fatal_errored) break; - + // Get the production for the top of the stack parse_stack_element_t &stack_elem = symbol_stack.back(); parse_node_t &node = nodes.at(stack_elem.node_idx); @@ -648,7 +733,7 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) // Manipulate the symbol stack. // Note that stack_elem is invalidated by popping the stack. symbol_stack_pop_push_production(production); - + // If we end up with an empty stack, something bad happened, like an unbalanced end if (symbol_stack.empty()) { @@ -713,9 +798,9 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n tok_flags_t tok_options = TOK_SQUASH_ERRORS; if (parse_flags & parse_flag_include_comments) tok_options |= TOK_SHOW_COMMENTS; - + tokenizer_t tok = tokenizer_t(str.c_str(), tok_options); - for (; tok_has_next(&tok) && ! this->parser->fatal_errored; tok_next(&tok)) + for (; tok_has_next(&tok) && ! this->parser->has_fatal_error(); tok_next(&tok)) { token_type tok_type = static_cast(tok_last_type(&tok)); const wchar_t *tok_txt = tok_last(&tok); @@ -723,22 +808,22 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n size_t tok_extent = tok_get_extent(&tok); assert(tok_extent < 10000000); //paranoia - parse_token_t token = parse_token_from_tokenizer_token(tok_type); - token.tokenizer_type = tok_type; + parse_token_t token; + token.type = parse_token_type_from_tokenizer_token(tok_type); token.source_start = (size_t)tok_start; token.source_length = tok_extent; token.keyword = keyword_for_token(tok_type, tok_txt); - this->parser->accept_token(token, str); - - if (this->parser->fatal_errored) + this->parser->accept_token(token); + + if (this->parser->has_fatal_error()) { if (parse_flags & parse_flag_continue_after_error) { /* Mark an error and then keep going */ token.type = parse_special_type_parse_error; token.keyword = parse_keyword_none; - this->parser->accept_token(token, str); - this->parser->reset(); + this->parser->accept_token(token); + this->parser->reset_symbols(); } else { @@ -757,19 +842,32 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t)); #endif - if (output != NULL) - { - output->swap(this->parser->nodes); - this->parser->nodes.clear(); - } + // Acquire the output from the parser + this->parser->acquire_output(output, errors); + + // Indicate if we had a fatal error + return ! this->parser->has_fatal_error(); +} - if (errors != NULL) - { - errors->swap(this->parser->errors); - this->parser->errors.clear(); - } +bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors) +{ + // Only strings can have keywords. So if we have a keyword, the type must be a string + assert(keyword == parse_keyword_none || token_type == parse_token_type_string); - return ! this->parser->fatal_errored; + parse_token_t token; + token.type = token_type; + token.keyword = keyword; + token.source_start = -1; + token.source_length = 0; + + this->parser->accept_token(token); + + return ! this->parser->has_fatal_error(); +} + +void parse_t::clear() +{ + this->parser->reset_symbols_and_nodes(); } const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) const @@ -781,13 +879,13 @@ const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, nod { result = &this->at(child_offset); } - + // If we are given an expected type, then the node must be null or that type if (result != NULL) { assert(expected_type == token_type_invalid || expected_type == result->type); } - + return result; } diff --git a/parse_tree.h b/parse_tree.h index 25b63a0ca..18e3cffa2 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -36,29 +36,6 @@ struct parse_error_t }; typedef std::vector parse_error_list_t; -enum -{ - parse_flag_none = 0, - - /* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */ - parse_flag_continue_after_error = 1 << 0, - - /* Include comment tokens */ - parse_flag_include_comments = 1 << 1 -}; -typedef unsigned int parse_tree_flags_t; - -class parse_ll_t; -class parse_t -{ - parse_ll_t * const parser; - -public: - parse_t(); - ~parse_t(); - bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false); -}; - enum parse_token_type_t { token_type_invalid, @@ -92,10 +69,10 @@ enum parse_token_type_t symbol_argument_list_nonempty, symbol_argument_list, - + symbol_argument, symbol_redirection, - + symbol_optional_background, // Terminal types @@ -105,12 +82,15 @@ enum parse_token_type_t parse_token_type_background, parse_token_type_end, parse_token_type_terminate, - + // Very special terminal types that don't appear in the production list parse_special_type_parse_error, parse_special_type_tokenizer_error, parse_special_type_comment, - + + FIRST_TERMINAL_TYPE = parse_token_type_string, + LAST_TERMINAL_TYPE = parse_token_type_terminate, + LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate, FIRST_PARSE_TOKEN_TYPE = parse_token_type_string }; @@ -132,9 +112,46 @@ enum parse_keyword_t parse_keyword_or, parse_keyword_not, parse_keyword_command, - parse_keyword_builtin + parse_keyword_builtin, + + LAST_KEYWORD = parse_keyword_builtin }; + +enum +{ + parse_flag_none = 0, + + /* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */ + parse_flag_continue_after_error = 1 << 0, + + /* Include comment tokens */ + parse_flag_include_comments = 1 << 1 +}; +typedef unsigned int parse_tree_flags_t; + +class parse_ll_t; +class parse_t +{ + parse_ll_t * const parser; + +public: + parse_t(); + ~parse_t(); + + /* Parse a string */ + bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false); + + /* Parse a single token */ + bool parse_1_token(parse_token_type_t token, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors); + + /* Reset, ready to parse something else */ + void clear(); + +}; + +wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src); + wcstring token_type_description(parse_token_type_t type); wcstring keyword_description(parse_keyword_t type); @@ -158,7 +175,7 @@ class parse_node_t /* Type-dependent data */ uint32_t tag; - + /* Which production was used */ uint8_t production_idx; @@ -175,7 +192,7 @@ class parse_node_t PARSE_ASSERT(which < child_count); return child_start + which; } - + bool has_source() const { return source_start != (size_t)(-1); @@ -184,11 +201,11 @@ class parse_node_t class parse_node_tree_t : public std::vector { - public: - +public: + /* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. */ const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; - + /* Find all the nodes of a given type underneath a given node */ typedef std::vector parse_node_list_t; parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const; @@ -200,8 +217,8 @@ class parse_node_tree_t : public std::vector # A job_list is a list of jobs, separated by semicolons or newlines job_list = | - job_list | job job_list + job_list # A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation @@ -251,9 +268,9 @@ class parse_node_tree_t : public std::vector argument_or_redirection = argument | redirection argument = redirection = - + terminator = | - + optional_background = | */ From 14741518a7fc52f110dcd5ca71216b423520b789 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 6 Oct 2013 16:23:45 -0700 Subject: [PATCH 026/177] Command highlighting now works --- fish_tests.cpp | 17 +++-- highlight.cpp | 168 +++++++++++++++++++++++++++++++++++++----- highlight.h | 4 + parse_productions.cpp | 42 +++++------ parse_tree.cpp | 117 +++++++++++++++++++++++------ parse_tree.h | 25 ++++++- 6 files changed, 299 insertions(+), 74 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index 010303191..894408591 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -1922,11 +1922,10 @@ static void test_new_parser_fuzzing(void) { parse_t parser; parse_node_tree_t parse_tree; - parse_error_list_t errors; for (size_t i=0; i < len; i++) { const parser_fuzz_token_t &token = tokens[i]; - parser.parse_1_token(token.token_type, token.keyword, &parse_tree, &errors); + parser.parse_1_token(token.token_type, token.keyword, &parse_tree, NULL); } // keep going until we wrap @@ -1940,7 +1939,7 @@ static void test_new_parser_fuzzing(void) __attribute__((unused)) static void test_new_parser(void) { - say(L"Testing new parser!"); + say(L"Testing new parser"); const wcstring src = L"echo hello world"; parse_node_tree_t parse_tree; parse_t parser; @@ -2050,7 +2049,7 @@ static void test_highlighting(void) if (expected_colors.at(i) != colors.at(i)) { const wcstring spaces(i, L' '); - err(L"Wrong color at index %lu in text (expected %d, actual %d):\n%ls\n%ls^", i, expected_colors.at(i), colors.at(i), text.c_str(), spaces.c_str()); + err(L"Wrong color at index %lu in text (expected %#x, actual %#x):\n%ls\n%ls^", i, expected_colors.at(i), colors.at(i), text.c_str(), spaces.c_str()); } } } @@ -2079,10 +2078,12 @@ int main(int argc, char **argv) reader_init(); env_init(); - //test_new_parser_fuzzing(); - //test_new_parser_correctness(); - //test_highlighting(); - //test_new_parser(); + test_highlighting(); + return 0; + test_new_parser_fuzzing(); + test_new_parser_correctness(); + test_highlighting(); + test_new_parser(); test_format(); test_escape(); diff --git a/highlight.cpp b/highlight.cpp index dc221a2f2..9837d95cc 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -667,18 +667,19 @@ static void highlight_param(const wcstring &buffstr, std::vector &colors, w } } -static int has_expand_reserved(const wchar_t *str) +static bool has_expand_reserved(const wcstring &str) { - while (*str) + bool result = false; + for (size_t i=0; i < str.size(); i++) { - if (*str >= EXPAND_RESERVED && - *str <= EXPAND_RESERVED_END) + wchar_t wc = str.at(i); + if (wc >= EXPAND_RESERVED && wc <= EXPAND_RESERVED_END) { - return 1; + result = true; + break; } - str++; } - return 0; + return result; } /* Parse a command line. Return by reference the last command, its arguments, and the offset in the string of the beginning of the last argument. This is used by autosuggestions */ @@ -712,7 +713,7 @@ static bool autosuggest_parse_command(const wcstring &str, wcstring *out_command /* Command. First check that the command actually exists. */ wcstring local_cmd = tok_last(&tok); bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES); - if (! expanded || has_expand_reserved(cmd.c_str())) + if (! expanded || has_expand_reserved(cmd)) { /* We can't expand this cmd, ignore it */ } @@ -1027,7 +1028,7 @@ static void tokenize(const wchar_t * const buff, std::vector &color, const */ cmd = tok_last(&tok); bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS); - if (! expanded || has_expand_reserved(cmd.c_str())) + if (! expanded || has_expand_reserved(cmd)) { color.at(tok_get_pos(&tok)) = HIGHLIGHT_ERROR; } @@ -1308,17 +1309,22 @@ static void tokenize(const wchar_t * const buff, std::vector &color, const } } -void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); - -// PCA This function does I/O, (calls is_potential_path, path_get_path, maybe others) and so ought to only run on a background thread void highlight_shell(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) { - ASSERT_IS_BACKGROUND_THREAD(); - if (0) + if (1) { highlight_shell_magic(buff, color, pos, error, vars); - return; } + else + { + highlight_shell_classic(buff, color, pos, error, vars); + } +} + +// PCA This function does I/O, (calls is_potential_path, path_get_path, maybe others) and so ought to only run on a background thread +void highlight_shell_classic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) +{ + ASSERT_IS_BACKGROUND_THREAD(); const size_t length = buff.size(); assert(buff.size() == color.size()); @@ -1461,6 +1467,7 @@ static void color_node(const parse_node_t &node, int color, std::vector &co std::fill(color_array.begin() + node.source_start, color_array.begin() + source_end, color); } +/* This function is a disaster badly in need of refactoring */ static void color_argument(const wcstring &buffstr, std::vector::iterator colors, int normal_status) { const size_t buff_len = buffstr.size(); @@ -1743,6 +1750,28 @@ static void color_argument(const wcstring &buffstr, std::vector::iterator c } } +// Indicates whether the source range of the given node forms a valid path in the given working_directory +static bool node_is_potential_path(const wcstring &src, const parse_node_t &node, const wcstring &working_directory) +{ + if (! node.has_source()) + return false; + + + /* Get the node source, unescape it, and then pass it to is_potential_path along with the working directory (as a one element list) */ + bool result = false; + wcstring token(src, node.source_start, node.source_length); + if (unescape_string(token, 1)) + { + /* Big hack: is_potential_path expects a tilde, but unescape_string gives us HOME_DIRECTORY. Put it back. */ + if (! token.empty() && token.at(0) == HOME_DIRECTORY) + token.at(0) = L'~'; + + const wcstring_list_t working_directory_list(1, working_directory); + result = is_potential_path(token, working_directory_list, PATH_EXPAND_TILDE); + } + return result; +} + // Color all of the arguments of the given command static void color_arguments(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &parent, std::vector &color_array) { @@ -1754,22 +1783,87 @@ static void color_arguments(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t *child = nodes.at(i); assert(child != NULL && child->type == symbol_argument); param.assign(src, child->source_start, child->source_length); - color_argument(param, color_array.begin() + child->source_start, HIGHLIGHT_NORMAL); + color_argument(param, color_array.begin() + child->source_start, HIGHLIGHT_PARAM); } } +/* Color all the children of the command with the given type */ static void color_children(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, int color, std::vector &color_array) { for (node_offset_t idx=0; idx < parent.child_count; idx++) { const parse_node_t *child = tree.get_child(parent, idx); - if (child != NULL && child->type == type && child->has_source()) + if (child != NULL && child->type == type) { color_node(*child, color, color_array); } } } +/* Color a possibly decorated command */ +static void color_command(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &cmd_node, enum parse_statement_decoration_t decoration, std::vector &color_array, const wcstring &working_directory, const env_vars_snapshot_t &vars) +{ + if (! cmd_node.has_source()) + return; + + /* Get the source of the command */ + wcstring cmd(src, cmd_node.source_start, cmd_node.source_length); + + /* Try expanding it. If we cannot, it's an error. */ + bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS); + if (! expanded || has_expand_reserved(cmd)) + { + color_node(cmd_node, HIGHLIGHT_ERROR, color_array); + return; + } + + /* Determine which types we check, based on the decoration */ + bool builtin_ok = true, function_ok = true, abbreviation_ok = true, command_ok = true, implicit_cd_ok = true; + if (decoration == parse_statement_decoration_command) + { + builtin_ok = false; + function_ok = false; + abbreviation_ok = false; + command_ok = true; + implicit_cd_ok = false; + } + else if (decoration == parse_statement_decoration_builtin) + { + builtin_ok = true; + function_ok = false; + abbreviation_ok = false; + command_ok = false; + implicit_cd_ok = false; + } + + /* Check them */ + bool is_valid = false; + + /* Builtins */ + if (! is_valid && builtin_ok) + is_valid = builtin_exists(cmd); + + /* Functions */ + if (! is_valid && function_ok) + is_valid = function_exists_no_autoload(cmd, vars); + + /* Abbreviations */ + if (! is_valid && abbreviation_ok) + is_valid = expand_abbreviation(cmd, NULL); + + /* Regular commands */ + if (! is_valid && command_ok) + is_valid = path_get_path(cmd, NULL, vars); + + /* Implicit cd */ + if (! is_valid && implicit_cd_ok) + is_valid = path_can_be_implicit_cd(cmd, NULL, working_directory.c_str(), vars); + + /* Color the node */ + int color = is_valid ? HIGHLIGHT_COMMAND : HIGHLIGHT_ERROR; + color_node(cmd_node, color, color_array); +} + void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) { ASSERT_IS_BACKGROUND_THREAD(); @@ -1780,7 +1874,7 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t if (length == 0) return; - std::fill(color.begin(), color.end(), -1); + std::fill(color.begin(), color.end(), 0); /* Do something sucky and get the current working directory on this background thread. This should really be passed in. */ const wcstring working_directory = env_get_pwd_slash(); @@ -1790,6 +1884,11 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t parse_t parser; parser.parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL); +#if 0 + const wcstring dump = parse_dump_tree(parse_tree, buff); + fprintf(stderr, "%ls\n", dump.c_str()); +#endif + /* Walk the node tree */ for (parse_node_tree_t::const_iterator iter = parse_tree.begin(); iter != parse_tree.end(); ++iter) { @@ -1837,7 +1936,12 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t case symbol_plain_statement: { // Color the command - color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color); + const parse_node_t *cmd = parse_tree.get_child(node, 0, parse_token_type_string); + if (cmd != NULL) + { + enum parse_statement_decoration_t decoration = static_cast(node.tag); + color_command(buff, parse_tree, *cmd, decoration, color, working_directory, vars); + } // Color arguments const parse_node_t *arguments = parse_tree.get_child(node, 1, symbol_arguments_or_redirections_list); @@ -1867,6 +1971,32 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t break; } } + + if (pos <= buff.size()) + { + /* If the cursor is over an argument, and that argument is a valid path, underline it */ + for (parse_node_tree_t::const_iterator iter = parse_tree.begin(); iter != parse_tree.end(); ++iter) + { + const parse_node_t &node = *iter; + /* See if this node contains the cursor */ + if (node.type == symbol_argument && node.source_contains_location(pos)) + { + /* See if this is a valid path */ + if (node_is_potential_path(buff, node, working_directory)) + { + /* It is, underline it. */ + for (size_t i=node.source_start; i < node.source_start + node.source_length; i++) + { + /* Don't color HIGHLIGHT_ERROR because it looks dorky. For example, trying to cd into a non-directory would show an underline and also red. */ + if (! (color.at(i) & HIGHLIGHT_ERROR)) + { + color.at(i) |= HIGHLIGHT_VALID_PATH; + } + } + } + } + } + } } /** diff --git a/highlight.h b/highlight.h index ea8557918..eb123258c 100644 --- a/highlight.h +++ b/highlight.h @@ -134,5 +134,9 @@ enum typedef unsigned int path_flags_t; bool is_potential_path(const wcstring &const_path, const wcstring_list_t &directories, path_flags_t flags, wcstring *out_path = NULL); +/* For testing */ +void highlight_shell_classic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); +void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); + #endif diff --git a/parse_productions.cpp b/parse_productions.cpp index 3165a2f0d..4876ba58e 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -30,7 +30,7 @@ static bool production_is_valid(const production_options_t production_list, prod #define RESOLVE(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) #define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) { return 0; } -#define PRODUCE_KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) +#define KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) /* A job_list is a list of jobs, separated by semicolons or newlines */ @@ -167,20 +167,20 @@ RESOLVE(statement) PRODUCTIONS(if_statement) = { - {symbol_if_clause, symbol_else_clause, PRODUCE_KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list} + {symbol_if_clause, symbol_else_clause, KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list} }; RESOLVE_ONLY(if_statement) PRODUCTIONS(if_clause) = { - { PRODUCE_KEYWORD(parse_keyword_if), symbol_job, parse_token_type_end, symbol_job_list } + { KEYWORD(parse_keyword_if), symbol_job, parse_token_type_end, symbol_job_list } }; RESOLVE_ONLY(if_clause) PRODUCTIONS(else_clause) = { { }, - { PRODUCE_KEYWORD(parse_keyword_else), symbol_else_continuation } + { KEYWORD(parse_keyword_else), symbol_else_continuation } }; RESOLVE(else_clause) { @@ -211,7 +211,7 @@ RESOLVE(else_continuation) PRODUCTIONS(switch_statement) = { - { PRODUCE_KEYWORD(parse_keyword_switch), parse_token_type_string, parse_token_type_end, symbol_case_item_list, PRODUCE_KEYWORD(parse_keyword_end)} + { KEYWORD(parse_keyword_switch), parse_token_type_string, parse_token_type_end, symbol_case_item_list, KEYWORD(parse_keyword_end)} }; RESOLVE_ONLY(switch_statement) @@ -230,7 +230,7 @@ RESOLVE(case_item_list) PRODUCTIONS(case_item) = { - {PRODUCE_KEYWORD(parse_keyword_case), symbol_argument_list, parse_token_type_end, symbol_job_list} + {KEYWORD(parse_keyword_case), symbol_argument_list, parse_token_type_end, symbol_job_list} }; RESOLVE_ONLY(case_item) @@ -258,7 +258,7 @@ RESOLVE(argument_list) PRODUCTIONS(block_statement) = { - {symbol_block_header, parse_token_type_end, symbol_job_list, PRODUCE_KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list} + {symbol_block_header, parse_token_type_end, symbol_job_list, KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list} }; RESOLVE_ONLY(block_statement) @@ -290,34 +290,34 @@ RESOLVE(block_header) PRODUCTIONS(for_header) = { - {PRODUCE_KEYWORD(parse_keyword_for), parse_token_type_string, PRODUCE_KEYWORD(parse_keyword_in), symbol_arguments_or_redirections_list} + {KEYWORD(parse_keyword_for), parse_token_type_string, KEYWORD(parse_keyword_in), symbol_arguments_or_redirections_list} }; RESOLVE_ONLY(for_header) PRODUCTIONS(while_header) = { - {PRODUCE_KEYWORD(parse_keyword_while), symbol_statement} + {KEYWORD(parse_keyword_while), symbol_statement} }; RESOLVE_ONLY(while_header) PRODUCTIONS(begin_header) = { - {PRODUCE_KEYWORD(parse_keyword_begin)} + {KEYWORD(parse_keyword_begin)} }; RESOLVE_ONLY(begin_header) PRODUCTIONS(function_header) = { - {PRODUCE_KEYWORD(parse_keyword_function), parse_token_type_string, symbol_argument_list} + {KEYWORD(parse_keyword_function), parse_token_type_string, symbol_argument_list} }; RESOLVE_ONLY(function_header) /* A boolean statement is AND or OR or NOT */ PRODUCTIONS(boolean_statement) = { - {PRODUCE_KEYWORD(parse_keyword_and), symbol_statement}, - {PRODUCE_KEYWORD(parse_keyword_or), symbol_statement}, - {PRODUCE_KEYWORD(parse_keyword_not), symbol_statement} + {KEYWORD(parse_keyword_and), symbol_statement}, + {KEYWORD(parse_keyword_or), symbol_statement}, + {KEYWORD(parse_keyword_not), symbol_statement} }; RESOLVE(boolean_statement) { @@ -336,19 +336,19 @@ RESOLVE(boolean_statement) PRODUCTIONS(decorated_statement) = { - {PRODUCE_KEYWORD(parse_keyword_command), symbol_plain_statement}, - {PRODUCE_KEYWORD(parse_keyword_builtin), symbol_plain_statement}, - {symbol_plain_statement} + {symbol_plain_statement}, + {KEYWORD(parse_keyword_command), symbol_plain_statement}, + {KEYWORD(parse_keyword_builtin), symbol_plain_statement}, }; RESOLVE(decorated_statement) { switch (token_keyword) { - case parse_keyword_command: - return 0; - case parse_keyword_builtin: - return 1; default: + return 0; + case parse_keyword_command: + return 1; + case parse_keyword_builtin: return 2; } } diff --git a/parse_tree.cpp b/parse_tree.cpp index b64c6d9f3..698297160 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -339,9 +339,12 @@ class parse_ll_t /* Whether we ran into a fatal error, including parse errors or tokenizer errors */ bool fatal_errored; + /* Whether we should collect error messages or not */ + bool should_generate_error_messages; + /* List of errors we have encountered */ parse_error_list_t errors; - + /* The symbol stack can contain terminal types or symbols. Symbols go on to do productions, but terminal types are just matched against input tokens. */ bool top_node_handle_terminal_types(parse_token_t token); @@ -453,7 +456,7 @@ class parse_ll_t public: /* Constructor */ - parse_ll_t() : fatal_errored(false) + parse_ll_t() : fatal_errored(false), should_generate_error_messages(true) { this->symbol_stack.reserve(16); this->nodes.reserve(64); @@ -469,6 +472,12 @@ class parse_ll_t return this->fatal_errored; } + /* Indicate whether we want to generate error messages */ + void set_should_generate_error_messages(bool flag) + { + this->should_generate_error_messages = flag; + } + /* Clear the parse symbol stack (but not the node tree). Add a new job_list_t goal node. This is called from the constructor */ void reset_symbols(void); @@ -564,30 +573,36 @@ void parse_ll_t::acquire_output(parse_node_tree_t *output, parse_error_list_t *e void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...) { - //this->dump_stack(); - parse_error_t err; - - va_list va; - va_start(va, fmt); - err.text = vformat_string(fmt, va); - va_end(va); - - err.source_start = token.source_start; - err.source_length = token.source_length; - this->errors.push_back(err); this->fatal_errored = true; + if (this->should_generate_error_messages) + { + //this->dump_stack(); + parse_error_t err; + + va_list va; + va_start(va, fmt); + err.text = vformat_string(fmt, va); + va_end(va); + + err.source_start = token.source_start; + err.source_length = token.source_length; + this->errors.push_back(err); + } } void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) { - wcstring desc = token_type_description(token.type); - parse_error_t error; - error.text = format_string(L"Expected a %ls, instead got a token of type %ls", expected, desc.c_str()); - error.source_start = token.source_start; - error.source_start = token.source_length; - errors.push_back(error); fatal_errored = true; + if (this->should_generate_error_messages) + { + wcstring desc = token_type_description(token.type); + parse_error_t error; + error.text = format_string(L"Expected a %ls, instead got a token of type %ls", expected, desc.c_str()); + error.source_start = token.source_start; + error.source_start = token.source_length; + errors.push_back(error); + } } void parse_ll_t::reset_symbols(void) @@ -725,7 +740,14 @@ void parse_ll_t::accept_token(parse_token_t token) const production_t *production = production_for_token(stack_elem.type, token.type, token.keyword, &node.production_idx, &node.tag, NULL /* error text */); if (production == NULL) { - this->parse_error(token, L"Unable to produce a '%ls' from input '%ls'", stack_elem.describe().c_str(), token.describe().c_str()); + if (should_generate_error_messages) + { + this->parse_error(token, L"Unable to produce a '%ls' from input '%ls'", stack_elem.describe().c_str(), token.describe().c_str()); + } + else + { + this->parse_error(token, NULL); + } // parse_error sets fatal_errored, which ends the loop } else @@ -737,7 +759,7 @@ void parse_ll_t::accept_token(parse_token_t token) // If we end up with an empty stack, something bad happened, like an unbalanced end if (symbol_stack.empty()) { - this->parse_error(token, L"All symbols removed from symbol stack. Likely unbalanced else or end?", stack_elem.describe().c_str(), token.describe().c_str()); + this->parse_error(token, L"All symbols removed from symbol stack. Likely unbalanced else or end?"); } } } @@ -793,11 +815,46 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) return result; } +// Set type-specific tags for nodes +// This is not in parse_ll_t because it knows about different node types +static void tag_nodes(const wcstring &src, parse_node_tree_t *tree) +{ + size_t count = tree->size(); + for (size_t i=0; i < count; i++) + { + const parse_node_t &node = tree->at(i); + switch (node.type) + { + case symbol_decorated_statement: + { + // Set a tag on the plain statement to indicate the decoration type + // The decoration types matches the production + bool is_decorated = (node.production_idx > 0); + + // Get the plain statement and set the tag equal to the production index we used + // This is an enum parse_statement_decoration_t + node_offset_t statement_idx = (is_decorated ? 1 : 0); + parse_node_t *plain_statement = tree->get_child(node, statement_idx, symbol_plain_statement); + if (plain_statement != NULL) + { + plain_statement->tag = static_cast(node.production_idx); + } + } + break; + + default: + break; + } + } +} + bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it) { tok_flags_t tok_options = TOK_SQUASH_ERRORS; if (parse_flags & parse_flag_include_comments) tok_options |= TOK_SHOW_COMMENTS; + + this->parser->set_should_generate_error_messages(errors != NULL); tokenizer_t tok = tokenizer_t(str.c_str(), tok_options); for (; tok_has_next(&tok) && ! this->parser->has_fatal_error(); tok_next(&tok)) @@ -835,7 +892,9 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n // Teach each node where its source range is this->parser->determine_node_ranges(); - + + // Tag nodes + #if 0 wcstring result = dump_tree(this->parser->nodes, str); fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); @@ -845,6 +904,9 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n // Acquire the output from the parser this->parser->acquire_output(output, errors); + // Set node tags + tag_nodes(str, output); + // Indicate if we had a fatal error return ! this->parser->has_fatal_error(); } @@ -859,6 +921,9 @@ bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keywo token.keyword = keyword; token.source_start = -1; token.source_length = 0; + + bool wants_errors = (errors != NULL); + this->parser->set_should_generate_error_messages(wants_errors); this->parser->accept_token(token); @@ -889,6 +954,14 @@ const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, nod return result; } +/* Hackish non-const version of get_child */ +parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) +{ + const parse_node_tree_t *const_this = this; + const parse_node_t *result = const_this->get_child(parent, which, expected_type); + return const_cast(result); +} + static void find_nodes_recursive(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, parse_node_tree_t::parse_node_list_t *result) { if (parent.type == type) result->push_back(&parent); diff --git a/parse_tree.h b/parse_tree.h index 18e3cffa2..f577a7def 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -124,7 +124,7 @@ enum /* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */ parse_flag_continue_after_error = 1 << 0, - + /* Include comment tokens */ parse_flag_include_comments = 1 << 1 }; @@ -155,7 +155,7 @@ wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src); wcstring token_type_description(parse_token_type_t type); wcstring keyword_description(parse_keyword_t type); -/** Base class for nodes of a parse tree */ +/** Class for nodes of a parse tree */ class parse_node_t { public: @@ -193,24 +193,40 @@ class parse_node_t return child_start + which; } + /* Indicate if this node has a range of source code associated with it */ bool has_source() const { return source_start != (size_t)(-1); } + + /* Indicate if this node's source range contains a given location. The funny math makes this modulo-overflow safe, though overflow is not expected. */ + bool source_contains_location(size_t where) const + { + return this->has_source() && where >= source_start && where - source_start < source_length; + } }; +/* The parse tree itself */ class parse_node_tree_t : public std::vector { public: /* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. */ const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; + parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid); /* Find all the nodes of a given type underneath a given node */ typedef std::vector parse_node_list_t; parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const; }; +/* Statement decorations, stored in the tag of plain_statement. This matches the order of productions in decorated_statement */ +enum parse_statement_decoration_t +{ + parse_statement_decoration_none, + parse_statement_decoration_command, + parse_statement_decoration_builtin +}; /* Fish grammar: @@ -259,9 +275,10 @@ class parse_node_tree_t : public std::vector boolean_statement = AND statement | OR statement | NOT statement # A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" +# The tag of a plain statement indicates which mode to use - decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement - plain_statement = COMMAND arguments_or_redirections_list optional_background + decorated_statement = plain_statement | COMMAND plain_statement | BUILTIN plain_statement + plain_statement = arguments_or_redirections_list optional_background arguments_or_redirections_list = | argument_or_redirection arguments_or_redirections_list From 20ccda69f4319cadbfb242f139e48a84699b503d Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Mon, 7 Oct 2013 01:04:37 -0700 Subject: [PATCH 027/177] Command highlighting works --- highlight.cpp | 69 +++++++++++++++------------ parse_exec.cpp | 2 +- parse_productions.cpp | 9 +--- parse_tree.cpp | 105 +++++++++++++----------------------------- parse_tree.h | 38 +++++++++------ 5 files changed, 98 insertions(+), 125 deletions(-) diff --git a/highlight.cpp b/highlight.cpp index 9837d95cc..a8e8326e1 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -1800,23 +1800,9 @@ static void color_children(const parse_node_tree_t &tree, const parse_node_t &pa } } -/* Color a possibly decorated command */ -static void color_command(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &cmd_node, enum parse_statement_decoration_t decoration, std::vector &color_array, const wcstring &working_directory, const env_vars_snapshot_t &vars) +/* Determine if a command is valid */ +static bool command_is_valid(const wcstring &cmd, enum parse_statement_decoration_t decoration, const wcstring &working_directory, const env_vars_snapshot_t &vars) { - if (! cmd_node.has_source()) - return; - - /* Get the source of the command */ - wcstring cmd(src, cmd_node.source_start, cmd_node.source_length); - - /* Try expanding it. If we cannot, it's an error. */ - bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS); - if (! expanded || has_expand_reserved(cmd)) - { - color_node(cmd_node, HIGHLIGHT_ERROR, color_array); - return; - } - /* Determine which types we check, based on the decoration */ bool builtin_ok = true, function_ok = true, abbreviation_ok = true, command_ok = true, implicit_cd_ok = true; if (decoration == parse_statement_decoration_command) @@ -1859,9 +1845,8 @@ static void color_command(const wcstring &src, const parse_node_tree_t &tree, co if (! is_valid && implicit_cd_ok) is_valid = path_can_be_implicit_cd(cmd, NULL, working_directory.c_str(), vars); - /* Color the node */ - int color = is_valid ? HIGHLIGHT_COMMAND : HIGHLIGHT_ERROR; - color_node(cmd_node, color, color_array); + /* Return what we got */ + return is_valid; } void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) @@ -1874,6 +1859,7 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t if (length == 0) return; + /* Start out at zero */ std::fill(color.begin(), color.end(), 0); /* Do something sucky and get the current working directory on this background thread. This should really be passed in. */ @@ -1925,25 +1911,45 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t break; case symbol_redirection: + { color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_REDIRECTION, color); - break; + } + break; case parse_token_type_background: case parse_token_type_end: + { color_node(node, HIGHLIGHT_END, color); - break; + } + break; case symbol_plain_statement: { - // Color the command - const parse_node_t *cmd = parse_tree.get_child(node, 0, parse_token_type_string); - if (cmd != NULL) + // Get the decoration from the parent + enum parse_statement_decoration_t decoration = parse_statement_decoration_none; + const parse_node_t *decorated_statement = parse_tree.get_parent(node, symbol_decorated_statement); + if (decorated_statement != NULL) { - enum parse_statement_decoration_t decoration = static_cast(node.tag); - color_command(buff, parse_tree, *cmd, decoration, color, working_directory, vars); + decoration = static_cast(decorated_statement->production_idx); } - // Color arguments + /* Color the command */ + const parse_node_t *cmd_node = parse_tree.get_child(node, 0, parse_token_type_string); + if (cmd_node != NULL && cmd_node->has_source()) + { + bool is_valid_cmd = false; + wcstring cmd(buff, cmd_node->source_start, cmd_node->source_length); + + /* Try expanding it. If we cannot, it's an error. */ + bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS); + if (expanded && ! has_expand_reserved(cmd)) + { + is_valid_cmd = command_is_valid(cmd, decoration, working_directory, vars); + } + color_node(*cmd_node, is_valid_cmd ? HIGHLIGHT_COMMAND : HIGHLIGHT_ERROR, color); + } + + /* Color arguments */ const parse_node_t *arguments = parse_tree.get_child(node, 1, symbol_arguments_or_redirections_list); if (arguments != NULL) { @@ -1978,8 +1984,13 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t for (parse_node_tree_t::const_iterator iter = parse_tree.begin(); iter != parse_tree.end(); ++iter) { const parse_node_t &node = *iter; - /* See if this node contains the cursor */ - if (node.type == symbol_argument && node.source_contains_location(pos)) + + /* Must be an argument with source */ + if (node.type != symbol_argument || ! node.has_source()) + continue; + + /* See if this node contains the cursor. We check <= source_length so that, when backspacing (and the cursor is just beyond the last token), we may still underline it */ + if (pos >= node.source_start && pos - node.source_start <= node.source_length) { /* See if this is a valid path */ if (node_is_potential_path(buff, node, working_directory)) diff --git a/parse_exec.cpp b/parse_exec.cpp index 3f2074f4a..c424ad957 100644 --- a/parse_exec.cpp +++ b/parse_exec.cpp @@ -259,7 +259,7 @@ class parse_exec_t for (;;) { const parse_node_t &node = parse_tree.at(idx); - PARSE_ASSERT(node.type == symbol_argument_list || node.type == symbol_argument_list_nonempty); + PARSE_ASSERT(node.type == symbol_argument_list); if (node.type == symbol_argument_list) { // argument list, may be empty diff --git a/parse_productions.cpp b/parse_productions.cpp index 4876ba58e..0900977f7 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -234,16 +234,10 @@ PRODUCTIONS(case_item) = }; RESOLVE_ONLY(case_item) -PRODUCTIONS(argument_list_nonempty) = -{ - {parse_token_type_string, symbol_argument_list} -}; -RESOLVE_ONLY(argument_list_nonempty) - PRODUCTIONS(argument_list) = { {}, - {symbol_argument_list_nonempty} + {symbol_argument, symbol_argument_list} }; RESOLVE(argument_list) { @@ -451,7 +445,6 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n TEST(decorated_statement) TEST(case_item_list) TEST(case_item) - TEST(argument_list_nonempty) TEST(argument_list) TEST(block_header) TEST(for_header) diff --git a/parse_tree.cpp b/parse_tree.cpp index 698297160..0a85a1d95 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -88,8 +88,6 @@ wcstring token_type_description(parse_token_type_t type) case symbol_case_item: return L"case_item"; - case symbol_argument_list_nonempty: - return L"argument_list_nonempty"; case symbol_argument_list: return L"argument_list"; @@ -369,24 +367,6 @@ class parse_ll_t return symbol_stack.back().type; } - void top_node_set_tag(uint32_t tag) - { - this->node_for_top_symbol().tag = tag; - } - - inline void add_child_to_node(size_t parent_node_idx, parse_stack_element_t *tok) - { - PARSE_ASSERT(tok->type != token_type_invalid); - tok->node_idx = nodes.size(); - nodes.push_back(parse_node_t(tok->type)); - nodes.at(parent_node_idx).child_count += 1; - } - - inline void symbol_stack_pop() - { - symbol_stack.pop_back(); - } - // Pop from the top of the symbol stack, then push the given production, updating node counts. Note that production_t has type "pointer to array" so some care is required. inline void symbol_stack_pop_push_production(const production_t *production) { @@ -408,7 +388,9 @@ class parse_ll_t } if (! count) fprintf(stderr, "\t\n"); } - + + // Get the parent index. But we can't get the parent parse node yet, since it may be made invalid by adding children + const size_t parent_node_idx = symbol_stack.back().node_idx; // Add the children. Confusingly, we want our nodes to be in forwards order (last token last, so dumps look nice), but the symbols should be reverse order (last token first, so it's lowest on the stack) const size_t child_start = nodes.size(); @@ -425,13 +407,14 @@ class parse_ll_t { // Generate the parse node. Note that this push_back may invalidate node. parse_token_type_t child_type = production_element_type(elem); - nodes.push_back(parse_node_t(child_type)); + parse_node_t child = parse_node_t(child_type); + child.parent = parent_node_idx; + nodes.push_back(child); child_count++; } } // Update the parent - const size_t parent_node_idx = symbol_stack.back().node_idx; parse_node_t &parent_node = nodes.at(parent_node_idx); // Should have no children yet @@ -815,39 +798,6 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) return result; } -// Set type-specific tags for nodes -// This is not in parse_ll_t because it knows about different node types -static void tag_nodes(const wcstring &src, parse_node_tree_t *tree) -{ - size_t count = tree->size(); - for (size_t i=0; i < count; i++) - { - const parse_node_t &node = tree->at(i); - switch (node.type) - { - case symbol_decorated_statement: - { - // Set a tag on the plain statement to indicate the decoration type - // The decoration types matches the production - bool is_decorated = (node.production_idx > 0); - - // Get the plain statement and set the tag equal to the production index we used - // This is an enum parse_statement_decoration_t - node_offset_t statement_idx = (is_decorated ? 1 : 0); - parse_node_t *plain_statement = tree->get_child(node, statement_idx, symbol_plain_statement); - if (plain_statement != NULL) - { - plain_statement->tag = static_cast(node.production_idx); - } - } - break; - - default: - break; - } - } -} - bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it) { tok_flags_t tok_options = TOK_SQUASH_ERRORS; @@ -904,9 +854,6 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n // Acquire the output from the parser this->parser->acquire_output(output, errors); - // Set node tags - tag_nodes(str, output); - // Indicate if we had a fatal error return ! this->parser->has_fatal_error(); } @@ -938,28 +885,38 @@ void parse_t::clear() const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) const { const parse_node_t *result = NULL; - PARSE_ASSERT(which < parent.child_count); - node_offset_t child_offset = parent.child_offset(which); - if (child_offset < this->size()) + + /* We may get nodes with no children if we had an imcomplete parse. Don't consider than an error */ + if (parent.child_count > 0) { - result = &this->at(child_offset); - } - - // If we are given an expected type, then the node must be null or that type - if (result != NULL) - { - assert(expected_type == token_type_invalid || expected_type == result->type); + PARSE_ASSERT(which < parent.child_count); + node_offset_t child_offset = parent.child_offset(which); + if (child_offset < this->size()) + { + result = &this->at(child_offset); + + /* If we are given an expected type, then the node must be null or that type */ + assert(expected_type == token_type_invalid || expected_type == result->type); + } } return result; } -/* Hackish non-const version of get_child */ -parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) +const parse_node_t *parse_node_tree_t::get_parent(const parse_node_t &node, parse_token_type_t expected_type) const { - const parse_node_tree_t *const_this = this; - const parse_node_t *result = const_this->get_child(parent, which, expected_type); - return const_cast(result); + const parse_node_t *result = NULL; + if (node.parent != NODE_OFFSET_INVALID) + { + PARSE_ASSERT(node.parent < this->size()); + const parse_node_t &parent = this->at(node.parent); + if (expected_type == token_type_invalid || expected_type == parent.type) + { + // The type matches (or no type was requested) + result = &parent; + } + } + return result; } static void find_nodes_recursive(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, parse_node_tree_t::parse_node_list_t *result) diff --git a/parse_tree.h b/parse_tree.h index f577a7def..6fcbde0dc 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -67,7 +67,6 @@ enum parse_token_type_t symbol_arguments_or_redirections_list, symbol_argument_or_redirection, - symbol_argument_list_nonempty, symbol_argument_list, symbol_argument, @@ -168,6 +167,9 @@ class parse_node_t /* Length of our range in the source code */ size_t source_length; + + /* Parent */ + node_offset_t parent; /* Children */ node_offset_t child_start; @@ -183,7 +185,7 @@ class parse_node_t wcstring describe(void) const; /* Constructor */ - explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(-1), source_length(0), child_start(0), child_count(0), tag(0) + explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(-1), source_length(0), parent(NODE_OFFSET_INVALID), child_start(0), child_count(0), tag(0) { } @@ -198,12 +200,6 @@ class parse_node_t { return source_start != (size_t)(-1); } - - /* Indicate if this node's source range contains a given location. The funny math makes this modulo-overflow safe, though overflow is not expected. */ - bool source_contains_location(size_t where) const - { - return this->has_source() && where >= source_start && where - source_start < source_length; - } }; /* The parse tree itself */ @@ -212,14 +208,20 @@ class parse_node_tree_t : public std::vector public: /* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. */ - const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; - parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid); + const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; + + /* Get the node corresponding to the parent of the given node, or NULL if there is no such child. If expected_type is provided, only returns the parent if it is of that type. Note the asymmetry: get_child asserts since the children are known, but get_parent does not, since the parent may not be known. */ + const parse_node_t *get_parent(const parse_node_t &node, parse_token_type_t expected_type = token_type_invalid) const; + /* Find all the nodes of a given type underneath a given node */ typedef std::vector parse_node_list_t; parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const; }; + +/* Node type specific data, stored in the tag field */ + /* Statement decorations, stored in the tag of plain_statement. This matches the order of productions in decorated_statement */ enum parse_statement_decoration_t { @@ -228,6 +230,16 @@ enum parse_statement_decoration_t parse_statement_decoration_builtin }; +/* Argument flags as a bitmask, stored in the tag of argument */ +enum parse_argument_flags_t +{ + /* Indicates that this or a prior argument was --, so this should not be treated as an option */ + parse_argument_no_options = 1 << 0, + + /* Indicates that the argument is for a cd command */ + parse_argument_is_for_cd = 1 << 1 +}; + /* Fish grammar: # A job_list is a list of jobs, separated by semicolons or newlines @@ -260,9 +272,6 @@ enum parse_statement_decoration_t case_item case_item_list case_item = CASE argument_list STATEMENT_TERMINATOR job_list - argument_list_nonempty = argument_list - argument_list = | argument_list_nonempty - block_statement = block_header job_list arguments_or_redirections_list block_header = for_header | while_header | function_header | begin_header for_header = FOR var_name IN arguments_or_redirections_list @@ -280,6 +289,9 @@ enum parse_statement_decoration_t decorated_statement = plain_statement | COMMAND plain_statement | BUILTIN plain_statement plain_statement = arguments_or_redirections_list optional_background + argument_list = | argument argument_list + + arguments_or_redirections_list = | argument_or_redirection arguments_or_redirections_list argument_or_redirection = argument | redirection From 4f718e83b343cd2cf49c801968dd36cbce84a772 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Mon, 7 Oct 2013 03:56:09 -0700 Subject: [PATCH 028/177] Syntax highlighting now correctly handles cd --- fish_tests.cpp | 27 +++++++++++++++- highlight.cpp | 83 ++++++++++++++++++++++++++++++++++++-------------- parse_tree.cpp | 14 +++++++++ parse_tree.h | 3 ++ 4 files changed, 103 insertions(+), 24 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index 894408591..bc631bf32 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -2003,7 +2003,32 @@ static void test_highlighting(void) {NULL, -1} }; - const highlight_component_t *tests[] = {components1, components2, components3}; + /* Verify that cd shows errors for non-directories */ + const highlight_component_t components4[] = + { + {L"cd", HIGHLIGHT_COMMAND}, + {L"/tmp/fish_highlight_test", HIGHLIGHT_PARAM | HIGHLIGHT_VALID_PATH}, + {NULL, -1} + }; + + const highlight_component_t components5[] = + { + {L"cd", HIGHLIGHT_COMMAND}, + {L"/tmp/fish_highlight_test/foo", HIGHLIGHT_ERROR}, + {NULL, -1} + }; + + const highlight_component_t components6[] = + { + {L"cd", HIGHLIGHT_COMMAND}, + {L"--help", HIGHLIGHT_PARAM}, + {L"-h", HIGHLIGHT_PARAM}, + {L"definitely_not_a_directory", HIGHLIGHT_ERROR}, + {NULL, -1} + }; + + + const highlight_component_t *tests[] = {components1, components2, components3, components4, components5, components6}; for (size_t which = 0; which < sizeof tests / sizeof *tests; which++) { const highlight_component_t *components = tests[which]; diff --git a/highlight.cpp b/highlight.cpp index a8e8326e1..71dba3dcf 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -1467,7 +1467,7 @@ static void color_node(const parse_node_t &node, int color, std::vector &co std::fill(color_array.begin() + node.source_start, color_array.begin() + source_end, color); } -/* This function is a disaster badly in need of refactoring */ +/* This function is a disaster badly in need of refactoring. However, note that it does NOT do any I/O */ static void color_argument(const wcstring &buffstr, std::vector::iterator colors, int normal_status) { const size_t buff_len = buffstr.size(); @@ -1772,10 +1772,45 @@ static bool node_is_potential_path(const wcstring &src, const parse_node_t &node return result; } -// Color all of the arguments of the given command -static void color_arguments(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &parent, std::vector &color_array) +// Gets the expanded command from a plain statement node +static bool plain_statement_get_expanded_command(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &plain_statement, wcstring *out_cmd) { - const parse_node_tree_t::parse_node_list_t nodes = tree.find_nodes(parent, symbol_argument); + assert(plain_statement.type == symbol_plain_statement); + bool result = false; + + // Get the command + const parse_node_t *cmd_node = tree.get_child(plain_statement, 0, parse_token_type_string); + if (cmd_node != NULL && cmd_node->has_source()) + { + wcstring cmd(src, cmd_node->source_start, cmd_node->source_length); + + /* Try expanding it. If we cannot, it's an error. */ + if (expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS)) + { + /* Success, return the expanded string by reference */ + std::swap(cmd, *out_cmd); + result = true; + } + } + return result; +} + +// Color all of the arguments of the given command +static void color_arguments(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &list_node, const wcstring &working_directory, std::vector &color_array) +{ + /* Hack: determine whether the parent is the cd command. */ + bool cmd_is_cd = false; + const parse_node_t *parent = tree.get_parent(list_node, symbol_plain_statement); + if (parent != NULL) + { + wcstring cmd_str; + if (plain_statement_get_expanded_command(src, tree, *parent, &cmd_str)) + { + cmd_is_cd = (cmd_str == L"cd"); + } + } + + const parse_node_tree_t::parse_node_list_t nodes = tree.find_nodes(list_node, symbol_argument); wcstring param; for (node_offset_t i=0; i < nodes.size(); i++) @@ -1784,6 +1819,19 @@ static void color_arguments(const wcstring &src, const parse_node_tree_t &tree, assert(child != NULL && child->type == symbol_argument); param.assign(src, child->source_start, child->source_length); color_argument(param, color_array.begin() + child->source_start, HIGHLIGHT_PARAM); + + if (cmd_is_cd) + { + /* Mark this as an error if it's not 'help' and not a valid cd path */ + if (expand_one(param, EXPAND_SKIP_CMDSUBST)) + { + bool is_help = string_prefixes_string(param, L"--help") || string_prefixes_string(param, L"-h"); + if (!is_help && ! is_potential_cd_path(param, working_directory, PATH_EXPAND_TILDE, NULL)) + { + color_node(*child, HIGHLIGHT_ERROR, color_array); + } + } + } } } @@ -1893,20 +1941,10 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t case symbol_switch_statement: case symbol_boolean_statement: case symbol_decorated_statement: - color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color); - break; - case symbol_if_statement: { // Color the 'end' color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color); - - // Color arguments and redirections - const parse_node_t *arguments = parse_tree.get_child(node, 3, symbol_arguments_or_redirections_list); - if (arguments != NULL) - { - color_arguments(buff, parse_tree, *arguments, color); - } } break; @@ -1948,21 +1986,20 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t } color_node(*cmd_node, is_valid_cmd ? HIGHLIGHT_COMMAND : HIGHLIGHT_ERROR, color); } - - /* Color arguments */ - const parse_node_t *arguments = parse_tree.get_child(node, 1, symbol_arguments_or_redirections_list); - if (arguments != NULL) - { - color_arguments(buff, parse_tree, *arguments, color); - } } break; case symbol_arguments_or_redirections_list: case symbol_argument_list: - /* Nothing, these are handled by their parents */ - break; + { + /* Only work on root lists, so that we don't re-color child lists */ + if (parse_tree.argument_list_is_root(node)) + { + color_arguments(buff, parse_tree, node, working_directory, color); + } + } + break; case parse_special_type_parse_error: case parse_special_type_tokenizer_error: diff --git a/parse_tree.cpp b/parse_tree.cpp index 0a85a1d95..5baef1c01 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -936,3 +936,17 @@ parse_node_tree_t::parse_node_list_t parse_node_tree_t::find_nodes(const parse_n find_nodes_recursive(*this, parent, type, &result); return result; } + + +bool parse_node_tree_t::argument_list_is_root(const parse_node_t &node) const +{ + bool result = true; + assert(node.type == symbol_argument_list || node.type == symbol_arguments_or_redirections_list); + const parse_node_t *parent = this->get_parent(node); + if (parent != NULL) + { + /* We have a parent - check to make sure it's not another list! */ + result = parent->type != symbol_arguments_or_redirections_list && parent->type != symbol_argument_list; + } + return result; +} diff --git a/parse_tree.h b/parse_tree.h index 6fcbde0dc..0355117fc 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -217,6 +217,9 @@ class parse_node_tree_t : public std::vector /* Find all the nodes of a given type underneath a given node */ typedef std::vector parse_node_list_t; parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const; + + /* Indicate if the given argument_list or arguments_or_redirections_list is a root list, or has a parent */ + bool argument_list_is_root(const parse_node_t &node) const; }; From c6eef166646b17e409dda043dee311ad074ff4c9 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Tue, 8 Oct 2013 15:05:30 -0700 Subject: [PATCH 029/177] Autosuggestion adoption of new parser --- highlight.cpp | 208 ++++++++++++++----------------------------------- history.h | 1 + parse_tree.cpp | 42 ++++++++++ parse_tree.h | 14 +++- 4 files changed, 113 insertions(+), 152 deletions(-) diff --git a/highlight.cpp b/highlight.cpp index 71dba3dcf..5080367cd 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -329,6 +329,30 @@ static bool is_potential_cd_path(const wcstring &path, const wcstring &working_d return result; } +/* Given a plain statement node in a parse tree, get the command and return it, expanded appropriately for commands. If we succeed, return true. */ +static bool plain_statement_get_expanded_command(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &plain_statement, wcstring *out_cmd) +{ + assert(plain_statement.type == symbol_plain_statement); + bool result = false; + + // Get the command + const parse_node_t *cmd_node = tree.get_child(plain_statement, 0, parse_token_type_string); + if (cmd_node != NULL && cmd_node->has_source()) + { + wcstring cmd(src, cmd_node->source_start, cmd_node->source_length); + + /* Try expanding it. If we cannot, it's an error. */ + if (expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS)) + { + /* Success, return the expanded string by reference */ + std::swap(cmd, *out_cmd); + result = true; + } + } + return result; +} + + rgb_color_t highlight_get_color(int highlight, bool is_background) { size_t idx=0; @@ -683,124 +707,33 @@ static bool has_expand_reserved(const wcstring &str) } /* Parse a command line. Return by reference the last command, its arguments, and the offset in the string of the beginning of the last argument. This is used by autosuggestions */ -static bool autosuggest_parse_command(const wcstring &str, wcstring *out_command, wcstring_list_t *out_arguments, int *out_last_arg_pos) +static bool autosuggest_parse_command(const wcstring &buff, wcstring *out_expanded_command, const parse_node_t **out_last_arg) { - if (str.empty()) - return false; - - wcstring cmd; - wcstring_list_t args; - int arg_pos = -1; - - bool had_cmd = false; - tokenizer_t tok(str.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); - for (; tok_has_next(&tok); tok_next(&tok)) + bool result = false; + + /* Parse the buffer */ + parse_node_tree_t parse_tree; + parse_t parser; + parser.parse(buff, parse_flag_continue_after_error, &parse_tree, NULL); + + /* Find the last statement */ + const parse_node_t *last_statement = parse_tree.find_last_node_of_type(symbol_plain_statement, NULL); + if (last_statement != NULL) { - int last_type = tok_last_type(&tok); - - switch (last_type) + if (plain_statement_get_expanded_command(buff, parse_tree, *last_statement, out_expanded_command)) { - case TOK_STRING: - { - if (had_cmd) - { - /* Parameter to the command. We store these escaped. */ - args.push_back(tok_last(&tok)); - arg_pos = tok_get_pos(&tok); - } - else - { - /* Command. First check that the command actually exists. */ - wcstring local_cmd = tok_last(&tok); - bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES); - if (! expanded || has_expand_reserved(cmd)) - { - /* We can't expand this cmd, ignore it */ - } - else - { - bool is_subcommand = false; - int mark = tok_get_pos(&tok); - - if (parser_keywords_is_subcommand(cmd)) - { - int sw; - tok_next(&tok); - - sw = parser_keywords_is_switch(tok_last(&tok)); - if (!parser_keywords_is_block(cmd) && - sw == ARG_SWITCH) - { - /* It's an argument to the subcommand itself */ - } - else - { - if (sw == ARG_SKIP) - mark = tok_get_pos(&tok); - is_subcommand = true; - } - tok_set_pos(&tok, mark); - } - - if (!is_subcommand) - { - /* It's really a command */ - had_cmd = true; - cmd = local_cmd; - } - } - - } - break; - } - - case TOK_REDIRECT_NOCLOB: - case TOK_REDIRECT_OUT: - case TOK_REDIRECT_IN: - case TOK_REDIRECT_APPEND: - case TOK_REDIRECT_FD: - { - if (!had_cmd) - { - break; - } - tok_next(&tok); - break; - } - - case TOK_PIPE: - case TOK_BACKGROUND: - case TOK_END: - { - had_cmd = false; - cmd.clear(); - args.clear(); - arg_pos = -1; - break; - } - - case TOK_COMMENT: - case TOK_ERROR: - default: - { - break; - } + /* We got it */ + result = true; + + /* Find the last argument */ + *out_last_arg = parse_tree.find_last_node_of_type(symbol_plain_statement, last_statement); } } - - /* Remember our command if we have one */ - if (had_cmd) - { - if (out_command) out_command->swap(cmd); - if (out_arguments) out_arguments->swap(args); - if (out_last_arg_pos) *out_last_arg_pos = arg_pos; - } - return had_cmd; + return result; } - /* We have to return an escaped string here */ -bool autosuggest_suggest_special(const wcstring &str, const wcstring &working_directory, wcstring &outSuggestion) +bool autosuggest_suggest_special(const wcstring &str, const wcstring &working_directory, wcstring &out_suggestion) { if (str.empty()) return false; @@ -809,23 +742,20 @@ bool autosuggest_suggest_special(const wcstring &str, const wcstring &working_di /* Parse the string */ wcstring parsed_command; - wcstring_list_t parsed_arguments; - int parsed_last_arg_pos = -1; - if (! autosuggest_parse_command(str, &parsed_command, &parsed_arguments, &parsed_last_arg_pos)) - { + const parse_node_t *last_arg_node = NULL; + if (! autosuggest_parse_command(str, &parsed_command, &last_arg_node)) return false; - } bool result = false; - if (parsed_command == L"cd" && ! parsed_arguments.empty()) + if (parsed_command == L"cd" && last_arg_node != NULL && last_arg_node->has_source()) { /* We can possibly handle this specially */ - const wcstring escaped_dir = parsed_arguments.back(); + const wcstring escaped_dir = last_arg_node->get_source(str); wcstring suggested_path; /* We always return true because we recognized the command. This prevents us from falling back to dumber algorithms; for example we won't suggest a non-directory for the cd command. */ result = true; - outSuggestion.clear(); + out_suggestion.clear(); /* Unescape the parameter */ wcstring unescaped_dir = escaped_dir; @@ -844,11 +774,11 @@ bool autosuggest_suggest_special(const wcstring &str, const wcstring &working_di wcstring escaped_suggested_path = parse_util_escape_string_with_quote(suggested_path, quote); /* Return it */ - outSuggestion = str; - outSuggestion.erase(parsed_last_arg_pos); - if (quote != L'\0') outSuggestion.push_back(quote); - outSuggestion.append(escaped_suggested_path); - if (quote != L'\0') outSuggestion.push_back(quote); + out_suggestion = str; + out_suggestion.erase(last_arg_node->source_start); + if (quote != L'\0') out_suggestion.push_back(quote); + out_suggestion.append(escaped_suggested_path); + if (quote != L'\0') out_suggestion.push_back(quote); } } else @@ -866,15 +796,14 @@ bool autosuggest_validate_from_history(const history_item_t &item, file_detectio /* Parse the string */ wcstring parsed_command; - wcstring_list_t parsed_arguments; - int parsed_last_arg_pos = -1; - if (! autosuggest_parse_command(item.str(), &parsed_command, &parsed_arguments, &parsed_last_arg_pos)) + const parse_node_t *last_arg_node = NULL; + if (! autosuggest_parse_command(item.str(), &parsed_command, &last_arg_node)) return false; - if (parsed_command == L"cd" && ! parsed_arguments.empty()) + if (parsed_command == L"cd" && last_arg_node != NULL && last_arg_node->has_source()) { /* We can possibly handle this specially */ - wcstring dir = parsed_arguments.back(); + wcstring dir = last_arg_node->get_source(item.str()); if (expand_one(dir, EXPAND_SKIP_CMDSUBST)) { handled = true; @@ -1772,29 +1701,6 @@ static bool node_is_potential_path(const wcstring &src, const parse_node_t &node return result; } -// Gets the expanded command from a plain statement node -static bool plain_statement_get_expanded_command(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &plain_statement, wcstring *out_cmd) -{ - assert(plain_statement.type == symbol_plain_statement); - bool result = false; - - // Get the command - const parse_node_t *cmd_node = tree.get_child(plain_statement, 0, parse_token_type_string); - if (cmd_node != NULL && cmd_node->has_source()) - { - wcstring cmd(src, cmd_node->source_start, cmd_node->source_length); - - /* Try expanding it. If we cannot, it's an error. */ - if (expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS)) - { - /* Success, return the expanded string by reference */ - std::swap(cmd, *out_cmd); - result = true; - } - } - return result; -} - // Color all of the arguments of the given command static void color_arguments(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &list_node, const wcstring &working_directory, std::vector &color_array) { diff --git a/history.h b/history.h index a19c88440..b9cfc85b0 100644 --- a/history.h +++ b/history.h @@ -61,6 +61,7 @@ class history_item_t { return contents; } + bool empty() const { return contents.empty(); diff --git a/parse_tree.cpp b/parse_tree.cpp index 5baef1c01..30ee6856b 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -937,6 +937,48 @@ parse_node_tree_t::parse_node_list_t parse_node_tree_t::find_nodes(const parse_n return result; } +/* Return true if the given node has the proposed ancestor as an ancestor (or is itself that ancestor) */ +static bool node_has_ancestor(const parse_node_tree_t &tree, const parse_node_t &node, const parse_node_t &proposed_ancestor) +{ + if (&node == &proposed_ancestor) + { + /* Found it */ + return true; + } + else if (node.parent == NODE_OFFSET_INVALID) + { + /* No more parents */ + return false; + } + else + { + /* Recurse to the parent */ + return node_has_ancestor(tree, tree.at(node.parent), proposed_ancestor); + } +} + +const parse_node_t *parse_node_tree_t::find_last_node_of_type(parse_token_type_t type, const parse_node_t *parent) const +{ + const parse_node_t *result = NULL; + // Find nodes of the given type in the tree, working backwards + size_t idx = this->size(); + while (idx--) + { + const parse_node_t &node = this->at(idx); + if (node.type == type) + { + // Types match. Check if it has the right parent + if (parent == NULL || node_has_ancestor(*this, node, *parent)) + { + // Success + result = &node; + break; + } + } + } + return result; +} + bool parse_node_tree_t::argument_list_is_root(const parse_node_t &node) const { diff --git a/parse_tree.h b/parse_tree.h index 0355117fc..b2059914c 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -200,6 +200,15 @@ class parse_node_t { return source_start != (size_t)(-1); } + + /* Gets source for the node, or the empty string if it has no source */ + wcstring get_source(const wcstring &str) const + { + if (! has_source()) + return wcstring(); + else + return wcstring(str, this->source_start, this->source_length); + } }; /* The parse tree itself */ @@ -218,6 +227,9 @@ class parse_node_tree_t : public std::vector typedef std::vector parse_node_list_t; parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const; + /* Finds the last node of a given type underneath a given node, or NULL if it could not be found. If parent is NULL, this finds the last node in the tree of that type. */ + const parse_node_t *find_last_node_of_type(parse_token_type_t type, const parse_node_t *parent = NULL) const; + /* Indicate if the given argument_list or arguments_or_redirections_list is a root list, or has a parent */ bool argument_list_is_root(const parse_node_t &node) const; }; @@ -287,7 +299,7 @@ enum parse_argument_flags_t boolean_statement = AND statement | OR statement | NOT statement # A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" -# The tag of a plain statement indicates which mode to use +# TODO: we should be able to construct plain_statements out of e.g. 'command --help' or even just 'command' decorated_statement = plain_statement | COMMAND plain_statement | BUILTIN plain_statement plain_statement = arguments_or_redirections_list optional_background From 54d7c29221e066ed4150e53f3bcca90013e5d795 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Tue, 8 Oct 2013 18:41:35 -0700 Subject: [PATCH 030/177] Syntax highlighting for command substitutions --- fish_tests.cpp | 14 +++- highlight.cpp | 210 +++++++++++++++++++++++++++++++++++++------------ parse_util.cpp | 38 ++++++++- parse_util.h | 19 +++++ 4 files changed, 229 insertions(+), 52 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index bc631bf32..6c77ec08a 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -2026,9 +2026,21 @@ static void test_highlighting(void) {L"definitely_not_a_directory", HIGHLIGHT_ERROR}, {NULL, -1} }; + + // Command substitutions + const highlight_component_t components7[] = + { + {L"echo", HIGHLIGHT_COMMAND}, + {L"param1", HIGHLIGHT_PARAM}, + {L"(", HIGHLIGHT_OPERATOR}, + {L"ls", HIGHLIGHT_COMMAND}, + {L"param2", HIGHLIGHT_PARAM}, + {L")", HIGHLIGHT_OPERATOR}, + {NULL, -1} + }; - const highlight_component_t *tests[] = {components1, components2, components3, components4, components5, components6}; + const highlight_component_t *tests[] = {components1, components2, components3, components4, components5, components6, components7}; for (size_t which = 0; which < sizeof tests / sizeof *tests; which++) { const highlight_component_t *components = tests[which]; diff --git a/highlight.cpp b/highlight.cpp index 5080367cd..28e32b7a1 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -36,6 +36,8 @@ #include "history.h" #include "parse_tree.h" +#define CURSOR_POSITION_INVALID ((size_t)(-1)) + /** Number of elements in the highlight_var array */ @@ -1382,25 +1384,11 @@ void highlight_shell_classic(const wcstring &buff, std::vector &color, size } } -static void color_node(const parse_node_t &node, int color, std::vector &color_array) -{ - // Can only color nodes with valid source ranges - if (! node.has_source()) - return; - - // Fill the color array with our color in the corresponding range - size_t source_end = node.source_start + node.source_length; - assert(source_end >= node.source_start); - assert(source_end <= color_array.size()); - - std::fill(color_array.begin() + node.source_start, color_array.begin() + source_end, color); -} - -/* This function is a disaster badly in need of refactoring. However, note that it does NOT do any I/O */ -static void color_argument(const wcstring &buffstr, std::vector::iterator colors, int normal_status) +/* This function is a disaster badly in need of refactoring. */ +static void color_argument_internal(const wcstring &buffstr, std::vector::iterator colors) { const size_t buff_len = buffstr.size(); - std::fill(colors, colors + buff_len, normal_status); + std::fill(colors, colors + buff_len, HIGHLIGHT_PARAM); enum {e_unquoted, e_single_quoted, e_double_quoted} mode = e_unquoted; int bracket_count=0; @@ -1679,6 +1667,119 @@ static void color_argument(const wcstring &buffstr, std::vector::iterator c } } +/* Syntax highlighter helper */ +class highlighter_t +{ + /* The string we're highlighting. Note this is a reference memmber variable (to avoid copying)! We must not outlive this! */ + const wcstring &buff; + + /* Cursor position */ + const size_t cursor_pos; + + /* Environment variables. Again, a reference member variable! */ + const env_vars_snapshot_t &vars; + + /* Working directory */ + const wcstring working_directory; + + /* The resulting colors */ + typedef std::vector color_array_t; + color_array_t color_array; + + /* The parse tree of the buff */ + parse_node_tree_t parse_tree; + + /* Color an argument */ + void color_argument(const parse_node_t &node); + + /* Color the arguments of the given node */ + void color_arguments(const parse_node_t &list_node); + + /* Color all the children of the command with the given type */ + void color_children(const parse_node_t &parent, parse_token_type_t type, int color); + + /* Colors the source range of a node with a given color */ + void color_node(const parse_node_t &node, int color); + + public: + + /* Constructor */ + highlighter_t(const wcstring &str, size_t pos, const env_vars_snapshot_t &ev, const wcstring &wd) : buff(str), cursor_pos(pos), vars(ev), working_directory(wd), color_array(str.size()) + { + /* Parse the tree */ + this->parse_tree.clear(); + parse_t parser; + parser.parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &this->parse_tree, NULL); + } + + /* Perform highlighting, returning an array of colors */ + const color_array_t &highlight(); +}; + +void highlighter_t::color_node(const parse_node_t &node, int color) +{ + // Can only color nodes with valid source ranges + if (! node.has_source()) + return; + + // Fill the color array with our color in the corresponding range + size_t source_end = node.source_start + node.source_length; + assert(source_end >= node.source_start); + assert(source_end <= color_array.size()); + + std::fill(this->color_array.begin() + node.source_start, this->color_array.begin() + source_end, color); +} + +void highlighter_t::color_argument(const parse_node_t &node) +{ + if (! node.has_source()) + return; + + const wcstring arg_str = node.get_source(this->buff); + + /* Get an iterator to the colors associated with the argument */ + const size_t arg_start = node.source_start; + const color_array_t::iterator arg_colors = color_array.begin() + arg_start; + + /* Color this argument without concern for command substitutions */ + color_argument_internal(arg_str, arg_colors); + + /* Now do command substitutions */ + size_t cmdsub_cursor = 0, cmdsub_start = 0, cmdsub_end = 0; + wcstring cmdsub_contents; + while (parse_util_locate_cmdsubst_range(arg_str, &cmdsub_cursor, &cmdsub_contents, &cmdsub_start, &cmdsub_end, true /* accept incomplete */) > 0) + { + /* The cmdsub_start is the open paren. cmdsub_end is either the close paren or the end of the string. cmdsub_contents extends from one past cmdsub_start to cmdsub_end */ + assert(cmdsub_end > cmdsub_start); + assert(cmdsub_end - cmdsub_start - 1 == cmdsub_contents.size()); + + /* Found a command substitution. Compute the position of the start and end of the cmdsub contents, within our overall src. */ + const size_t arg_subcmd_start = arg_start + cmdsub_start, arg_subcmd_end = arg_start + cmdsub_end; + + /* Highlight the parens. The open paren must exist; the closed paren may not if it was incomplete. */ + assert(cmdsub_start < arg_str.size()); + this->color_array.at(arg_subcmd_start) = HIGHLIGHT_OPERATOR; + if (arg_subcmd_end < this->buff.size()) + this->color_array.at(arg_subcmd_end) = HIGHLIGHT_OPERATOR; + + /* Compute the cursor's position within the cmdsub. We must be past the open paren (hence >) but can be at the end of the string or closed paren (hence <=) */ + size_t cursor_subpos = CURSOR_POSITION_INVALID; + if (cursor_pos != CURSOR_POSITION_INVALID && cursor_pos > arg_subcmd_start && cursor_pos <= arg_subcmd_end) + { + /* The -1 because the cmdsub_contents does not include the open paren */ + cursor_subpos = cursor_pos - arg_subcmd_start - 1; + } + + /* Highlight it recursively. */ + highlighter_t cmdsub_highlighter(cmdsub_contents, cursor_subpos, this->vars, this->working_directory); + const color_array_t &subcolors = cmdsub_highlighter.highlight(); + + /* Copy out the subcolors back into our array */ + assert(subcolors.size() == cmdsub_contents.size()); + std::copy(subcolors.begin(), subcolors.end(), this->color_array.begin() + arg_subcmd_start + 1); + } +} + // Indicates whether the source range of the given node forms a valid path in the given working_directory static bool node_is_potential_path(const wcstring &src, const parse_node_t &node, const wcstring &working_directory) { @@ -1702,39 +1803,39 @@ static bool node_is_potential_path(const wcstring &src, const parse_node_t &node } // Color all of the arguments of the given command -static void color_arguments(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &list_node, const wcstring &working_directory, std::vector &color_array) +void highlighter_t::color_arguments(const parse_node_t &list_node) { - /* Hack: determine whether the parent is the cd command. */ + /* Hack: determine whether the parent is the cd command, so we can show errors for non-directories */ bool cmd_is_cd = false; - const parse_node_t *parent = tree.get_parent(list_node, symbol_plain_statement); + const parse_node_t *parent = this->parse_tree.get_parent(list_node, symbol_plain_statement); if (parent != NULL) { wcstring cmd_str; - if (plain_statement_get_expanded_command(src, tree, *parent, &cmd_str)) + if (plain_statement_get_expanded_command(this->buff, this->parse_tree, *parent, &cmd_str)) { cmd_is_cd = (cmd_str == L"cd"); } } - const parse_node_tree_t::parse_node_list_t nodes = tree.find_nodes(list_node, symbol_argument); + /* Find all the arguments of this list */ + const parse_node_tree_t::parse_node_list_t nodes = this->parse_tree.find_nodes(list_node, symbol_argument); - wcstring param; for (node_offset_t i=0; i < nodes.size(); i++) { const parse_node_t *child = nodes.at(i); assert(child != NULL && child->type == symbol_argument); - param.assign(src, child->source_start, child->source_length); - color_argument(param, color_array.begin() + child->source_start, HIGHLIGHT_PARAM); + this->color_argument(*child); if (cmd_is_cd) { /* Mark this as an error if it's not 'help' and not a valid cd path */ + wcstring param = child->get_source(this->buff); if (expand_one(param, EXPAND_SKIP_CMDSUBST)) { bool is_help = string_prefixes_string(param, L"--help") || string_prefixes_string(param, L"-h"); if (!is_help && ! is_potential_cd_path(param, working_directory, PATH_EXPAND_TILDE, NULL)) { - color_node(*child, HIGHLIGHT_ERROR, color_array); + this->color_node(*child, HIGHLIGHT_ERROR); } } } @@ -1742,14 +1843,14 @@ static void color_arguments(const wcstring &src, const parse_node_tree_t &tree, } /* Color all the children of the command with the given type */ -static void color_children(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, int color, std::vector &color_array) +void highlighter_t::color_children(const parse_node_t &parent, parse_token_type_t type, int color) { for (node_offset_t idx=0; idx < parent.child_count; idx++) { - const parse_node_t *child = tree.get_child(parent, idx); + const parse_node_t *child = this->parse_tree.get_child(parent, idx); if (child != NULL && child->type == type) { - color_node(*child, color, color_array); + this->color_node(*child, color); } } } @@ -1803,22 +1904,19 @@ static bool command_is_valid(const wcstring &cmd, enum parse_statement_decoratio return is_valid; } -void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) +const highlighter_t::color_array_t & highlighter_t::highlight() { ASSERT_IS_BACKGROUND_THREAD(); - + const size_t length = buff.size(); - assert(buff.size() == color.size()); - + assert(this->buff.size() == this->color_array.size()); + if (length == 0) - return; + return color_array; /* Start out at zero */ - std::fill(color.begin(), color.end(), 0); - - /* Do something sucky and get the current working directory on this background thread. This should really be passed in. */ - const wcstring working_directory = env_get_pwd_slash(); - + std::fill(this->color_array.begin(), this->color_array.end(), 0); + /* Parse the buffer */ parse_node_tree_t parse_tree; parse_t parser; @@ -1850,20 +1948,20 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t case symbol_if_statement: { // Color the 'end' - color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color); + this->color_children(node, parse_token_type_string, HIGHLIGHT_COMMAND); } break; case symbol_redirection: { - color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_REDIRECTION, color); + this->color_children(node, parse_token_type_string, HIGHLIGHT_REDIRECTION); } break; case parse_token_type_background: case parse_token_type_end: { - color_node(node, HIGHLIGHT_END, color); + this->color_node(node, HIGHLIGHT_END); } break; @@ -1890,7 +1988,7 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t { is_valid_cmd = command_is_valid(cmd, decoration, working_directory, vars); } - color_node(*cmd_node, is_valid_cmd ? HIGHLIGHT_COMMAND : HIGHLIGHT_ERROR, color); + this->color_node(*cmd_node, is_valid_cmd ? HIGHLIGHT_COMMAND : HIGHLIGHT_ERROR); } } break; @@ -1902,18 +2000,18 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t /* Only work on root lists, so that we don't re-color child lists */ if (parse_tree.argument_list_is_root(node)) { - color_arguments(buff, parse_tree, node, working_directory, color); + this->color_arguments(node); } } break; case parse_special_type_parse_error: case parse_special_type_tokenizer_error: - color_node(node, HIGHLIGHT_ERROR, color); + this->color_node(node, HIGHLIGHT_ERROR); break; case parse_special_type_comment: - color_node(node, HIGHLIGHT_COMMENT, color); + this->color_node(node, HIGHLIGHT_COMMENT); break; default: @@ -1921,7 +2019,7 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t } } - if (pos <= buff.size()) + if (this->cursor_pos <= this->buff.size()) { /* If the cursor is over an argument, and that argument is a valid path, underline it */ for (parse_node_tree_t::const_iterator iter = parse_tree.begin(); iter != parse_tree.end(); ++iter) @@ -1933,7 +2031,7 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t continue; /* See if this node contains the cursor. We check <= source_length so that, when backspacing (and the cursor is just beyond the last token), we may still underline it */ - if (pos >= node.source_start && pos - node.source_start <= node.source_length) + if (this->cursor_pos >= node.source_start && this->cursor_pos - node.source_start <= node.source_length) { /* See if this is a valid path */ if (node_is_potential_path(buff, node, working_directory)) @@ -1942,15 +2040,27 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t for (size_t i=node.source_start; i < node.source_start + node.source_length; i++) { /* Don't color HIGHLIGHT_ERROR because it looks dorky. For example, trying to cd into a non-directory would show an underline and also red. */ - if (! (color.at(i) & HIGHLIGHT_ERROR)) + if (! (this->color_array.at(i) & HIGHLIGHT_ERROR)) { - color.at(i) |= HIGHLIGHT_VALID_PATH; + this->color_array.at(i) |= HIGHLIGHT_VALID_PATH; } } } } } } + + return color_array; +} + +void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) +{ + /* Do something sucky and get the current working directory on this background thread. This should really be passed in. */ + const wcstring working_directory = env_get_pwd_slash(); + + /* Highlight it! */ + highlighter_t highlighter(buff, pos, vars, working_directory); + color = highlighter.highlight(); } /** diff --git a/parse_util.cpp b/parse_util.cpp index 5b807059e..6427e249f 100644 --- a/parse_util.cpp +++ b/parse_util.cpp @@ -164,7 +164,7 @@ int parse_util_locate_cmdsubst(const wchar_t *in, wchar_t **begin, wchar_t **end CHECK(in, 0); - for (pos = (wchar_t *)in; *pos; pos++) + for (pos = const_cast(in); *pos; pos++) { if (prev != '\\') { @@ -240,6 +240,42 @@ int parse_util_locate_cmdsubst(const wchar_t *in, wchar_t **begin, wchar_t **end return 1; } +int parse_util_locate_cmdsubst_range(const wcstring &str, size_t *inout_cursor_offset, wcstring *out_contents, size_t *out_start, size_t *out_end, bool accept_incomplete) +{ + /* Clear the return values */ + out_contents->clear(); + *out_start = 0; + *out_end = str.size(); + + /* Nothing to do if the offset is at or past the end of the string. */ + if (*inout_cursor_offset >= str.size()) + return 0; + + /* Defer to the wonky version */ + const wchar_t * const buff = str.c_str(); + const wchar_t * const valid_range_start = buff + *inout_cursor_offset, *valid_range_end = buff + str.size(); + wchar_t *cmdsub_begin = NULL, *cmdsub_end = NULL; + int ret = parse_util_locate_cmdsubst(valid_range_start, &cmdsub_begin, &cmdsub_end, accept_incomplete); + if (ret > 0) + { + /* The command substitutions must not be NULL and must be in the valid pointer range, and the end must be bigger than the beginning */ + assert(cmdsub_begin != NULL && cmdsub_begin >= valid_range_start && cmdsub_begin <= valid_range_end); + assert(cmdsub_end != NULL && cmdsub_end > cmdsub_begin && cmdsub_end >= valid_range_start && cmdsub_end <= valid_range_end); + + /* Assign the substring to the out_contents */ + const wchar_t *interior_begin = cmdsub_begin + 1; + out_contents->assign(interior_begin, cmdsub_end - interior_begin); + + /* Return the start and end */ + *out_start = cmdsub_begin - buff; + *out_end = cmdsub_end - buff; + + /* Update the inout_cursor_offset. Note this may cause it to exceed str.size(), though overflow is not likely */ + *inout_cursor_offset = 1 + *out_end; + } + return ret; +} + void parse_util_cmdsubst_extent(const wchar_t *buff, size_t cursor_pos, const wchar_t **a, const wchar_t **b) { const wchar_t * const cursor = buff + cursor_pos; diff --git a/parse_util.h b/parse_util.h index 24147e180..76b33450e 100644 --- a/parse_util.h +++ b/parse_util.h @@ -27,6 +27,25 @@ int parse_util_locate_cmdsubst(const wchar_t *in, wchar_t **end, bool accept_incomplete); +/** + Alternative API. Iterate over command substitutions. + + \param str the string to search for subshells + \param inout_cursor_offset On input, the location to begin the search. On output, either the end of the string, or just after the closed-paren. + \param out_contents On output, the contents of the command substitution + \param out_start On output, the offset of the start of the command substitution (open paren) + \param out_end On output, the offset of the end of the command substitution (close paren), or the end of the string if it was incomplete + \param accept_incomplete whether to permit missing closing parenthesis + \return -1 on syntax error, 0 if no subshells exist and 1 on sucess +*/ + +int parse_util_locate_cmdsubst_range(const wcstring &str, + size_t *inout_cursor_offset, + wcstring *out_contents, + size_t *out_start, + size_t *out_end, + bool accept_incomplete); + /** Find the beginning and end of the command substitution under the cursor. If no subshell is found, the entire string is returned. If From a51bd03a5c86d5532063610cc185f3d377daa4e1 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Tue, 8 Oct 2013 18:48:01 -0700 Subject: [PATCH 031/177] Remove parse_exec stuff --- Makefile.in | 2 +- builtin.cpp | 114 ------- fish.xcodeproj/project.pbxproj | 8 - parse_exec.cpp | 593 --------------------------------- parse_exec.h | 173 ---------- 5 files changed, 1 insertion(+), 889 deletions(-) delete mode 100644 parse_exec.cpp delete mode 100644 parse_exec.h diff --git a/Makefile.in b/Makefile.in index 07dec023c..23e24b389 100644 --- a/Makefile.in +++ b/Makefile.in @@ -100,7 +100,7 @@ FISH_OBJS := function.o builtin.o complete.o env.o exec.o expand.o \ env_universal.o env_universal_common.o input_common.o event.o \ signal.o io.o parse_util.o common.o screen.o path.o autoload.o \ parser_keywords.o iothread.o color.o postfork.o \ - builtin_test.o parse_tree.o parse_productions.o parse_exec.o + builtin_test.o parse_tree.o parse_productions.o FISH_INDENT_OBJS := fish_indent.o print_help.o common.o \ parser_keywords.o wutil.o tokenizer.o diff --git a/builtin.cpp b/builtin.cpp index 06c9a9b89..b84d78e73 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -65,7 +65,6 @@ #include "path.h" #include "history.h" #include "parse_tree.h" -#include "parse_exec.h" /** The default prompt for the read command @@ -3956,104 +3955,6 @@ static int builtin_history(parser_t &parser, wchar_t **argv) #pragma mark Simulator -struct parse_execution_simulator_t : public parse_execution_visitor_t -{ - wcstring_list_t result; - - wcstring &back() - { - assert(! result.empty()); - return result.back(); - } - - void append_src(node_offset_t idx) - { - wcstring tmp; - context->get_source(idx, &tmp); - back().append(tmp); - } - - void append(const wchar_t *s) - { - back().append(s); - } - - bool enter_job_list(void) - { - return true; - } - - bool enter_job(void) - { - result.resize(result.size() + 1); - return true; - } - - void visit_statement(void) - { - } - - virtual void visit_boolean_statement(void) - { - } - - virtual void enter_if_clause(const exec_if_clause_t &statement) - { - } - - virtual void exit_if_clause(const exec_if_clause_t &statement) - { - append_format(back(), L"\nIF successful jump to %lu", (unsigned long)statement.body); - } - - void visit_basic_statement(const exec_basic_statement_t &statement) - { - wcstring &line = this->back(); - if (! line.empty()) - { - line.append(L" "); - } - switch (statement.decoration) - { - case exec_basic_statement_t::decoration_builtin: - line.append(L" "); - break; - - case exec_basic_statement_t::decoration_command: - line.append(L" "); - break; - - default: - break; - } - - line.append(L"cmd:"); - this->append_src(statement.command_idx); - for (size_t i=0; i < statement.arguments().size(); i++) - { - const exec_argument_t &arg = statement.arguments().at(i); - append(L" "); - append(L"arg:"); - append_src(arg.parse_node_idx); - } - } - - void visit_function(const exec_function_header_t &function) - { - wcstring &line = this->back(); - line.append(L"define function: "); - wcstring tmp; - context->get_source(function.name_idx, &tmp); - line.append(tmp); - } - - void exit_job_list(void) - { - } -}; - - - int builtin_parse(parser_t &parser, wchar_t **argv) { struct sigaction act; @@ -4090,21 +3991,6 @@ int builtin_parse(parser_t &parser, wchar_t **argv) { const wcstring dump = parse_dump_tree(parse_tree, src); fprintf(stderr, "%ls", dump.c_str()); - if (0) - { - parse_execution_context_t ctx(parse_tree, src); - parse_execution_simulator_t sim; - sim.context = &ctx; - while (ctx.visit_next_node(&sim)) - { - } - stdout_buffer.append(L"Simulating execution:\n"); - for (size_t i=0; i < sim.result.size(); i++) - { - stdout_buffer.append(sim.result.at(i)); - stdout_buffer.push_back(L'\n'); - } - } } } return STATUS_BUILTIN_OK; diff --git a/fish.xcodeproj/project.pbxproj b/fish.xcodeproj/project.pbxproj index 3b85e4bca..04d0c7cb7 100644 --- a/fish.xcodeproj/project.pbxproj +++ b/fish.xcodeproj/project.pbxproj @@ -112,13 +112,11 @@ D08A32B817B446A300F3A533 /* signal.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855C13B3ACEE0099B651 /* signal.cpp */; }; D08A32B917B446B100F3A533 /* parse_productions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0FE8EE7179FB75F008C9F21 /* parse_productions.cpp */; }; D08A32BA17B446B100F3A533 /* parse_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F351765284C00BFAB82 /* parse_tree.cpp */; }; - D08A32BB17B446B100F3A533 /* parse_exec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F331765281F00BFAB82 /* parse_exec.cpp */; }; D08A32BC17B4473B00F3A533 /* libncurses.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = D0D02A8C15983CFA008E62BD /* libncurses.dylib */; }; D08A32BD17B4474000F3A533 /* libiconv.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = D0D02A8A15983CDF008E62BD /* libiconv.dylib */; }; D0A564FE168D23D800AF6161 /* man in CopyFiles */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; }; D0A56501168D258300AF6161 /* man in Copy Files */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; }; D0C52F371765284C00BFAB82 /* parse_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F351765284C00BFAB82 /* parse_tree.cpp */; }; - D0C52F381765720600BFAB82 /* parse_exec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F331765281F00BFAB82 /* parse_exec.cpp */; }; D0CBD587159EF0E10024809C /* launch_fish.scpt in Resources */ = {isa = PBXBuildFile; fileRef = D0CBD586159EF0E10024809C /* launch_fish.scpt */; }; D0D02A67159837AD008E62BD /* complete.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853713B3ACEE0099B651 /* complete.cpp */; }; D0D02A69159837B2008E62BD /* env.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853A13B3ACEE0099B651 /* env.cpp */; }; @@ -497,8 +495,6 @@ D0B6B0FE14E88BA400AD6C10 /* color.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = color.cpp; sourceTree = ""; }; D0B6B0FF14E88BA400AD6C10 /* color.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = color.h; sourceTree = ""; }; D0C4FD9415A7D7EE00212EF1 /* config.fish */ = {isa = PBXFileReference; lastKnownFileType = text; name = config.fish; path = etc/config.fish; sourceTree = ""; }; - D0C52F331765281F00BFAB82 /* parse_exec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_exec.cpp; sourceTree = ""; }; - D0C52F341765281F00BFAB82 /* parse_exec.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_exec.h; sourceTree = ""; }; D0C52F351765284C00BFAB82 /* parse_tree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_tree.cpp; sourceTree = ""; }; D0C52F361765284C00BFAB82 /* parse_tree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_tree.h; sourceTree = ""; }; D0C6FCC914CFA4B0004CE8AD /* autoload.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = autoload.cpp; sourceTree = ""; }; @@ -665,8 +661,6 @@ D0FE8EE7179FB75F008C9F21 /* parse_productions.cpp */, D0C52F361765284C00BFAB82 /* parse_tree.h */, D0C52F351765284C00BFAB82 /* parse_tree.cpp */, - D0C52F341765281F00BFAB82 /* parse_exec.h */, - D0C52F331765281F00BFAB82 /* parse_exec.cpp */, D0A0850D13B3ACEE0099B651 /* fallback.h */, D0A0853E13B3ACEE0099B651 /* fallback.cpp */, D0A0850E13B3ACEE0099B651 /* function.h */, @@ -1130,7 +1124,6 @@ files = ( D08A32B917B446B100F3A533 /* parse_productions.cpp in Sources */, D08A32BA17B446B100F3A533 /* parse_tree.cpp in Sources */, - D08A32BB17B446B100F3A533 /* parse_exec.cpp in Sources */, D08A32A717B446A300F3A533 /* autoload.cpp in Sources */, D08A32A817B446A300F3A533 /* builtin_test.cpp in Sources */, D08A32A917B446A300F3A533 /* color.cpp in Sources */, @@ -1255,7 +1248,6 @@ D0D02A7B15983928008E62BD /* env_universal_common.cpp in Sources */, D0D02A89159839DF008E62BD /* fish.cpp in Sources */, D0C52F371765284C00BFAB82 /* parse_tree.cpp in Sources */, - D0C52F381765720600BFAB82 /* parse_exec.cpp in Sources */, D0FE8EE8179FB760008C9F21 /* parse_productions.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; diff --git a/parse_exec.cpp b/parse_exec.cpp deleted file mode 100644 index c424ad957..000000000 --- a/parse_exec.cpp +++ /dev/null @@ -1,593 +0,0 @@ -#include "parse_exec.h" -#include - -struct exec_node_t -{ - node_offset_t parse_node_idx; - node_offset_t body_parse_node_idx; - bool visited; - - explicit exec_node_t(node_offset_t pni) : parse_node_idx(pni), body_parse_node_idx(NODE_OFFSET_INVALID), visited(false) - { - } - - explicit exec_node_t(node_offset_t pni, node_offset_t body_pni) : parse_node_idx(pni), body_parse_node_idx(body_pni), visited(false) - { - } -}; - -exec_basic_statement_t::exec_basic_statement_t() : command_idx(0), decoration(decoration_plain) -{ - -} - - -class parse_exec_t -{ - parse_node_tree_t parse_tree; - wcstring src; - - /* The stack of nodes as we execute them */ - std::vector exec_nodes; - - /* The stack of commands being built */ - std::vector assembling_statements; - - /* Current visitor (very transient) */ - struct parse_execution_visitor_t * visitor; - - const parse_node_t &get_child(const parse_node_t &parent, node_offset_t which) const - { - return parse_tree.at(parent.child_offset(which)); - } - - void pop_push_specific(node_offset_t idx1, node_offset_t idx2 = NODE_OFFSET_INVALID, node_offset_t idx3 = NODE_OFFSET_INVALID, node_offset_t idx4 = NODE_OFFSET_INVALID, node_offset_t idx5 = NODE_OFFSET_INVALID) - { - PARSE_ASSERT(! exec_nodes.empty()); - // Figure out the offset of the children - exec_node_t &top = exec_nodes.back(); - const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); - node_offset_t child_node_idx = parse_node.child_start; - - // Remove the top node - exec_nodes.pop_back(); - - // Append the given children, backwards - const node_offset_t idxs[] = {idx5, idx4, idx3, idx2, idx1}; - for (size_t q=0; q < sizeof idxs / sizeof *idxs; q++) - { - node_offset_t idx = idxs[q]; - if (idx != (node_offset_t)(-1)) - { - PARSE_ASSERT(idx < parse_node.child_count); - exec_nodes.push_back(exec_node_t(child_node_idx + idx)); - } - } - - } - - void push(node_offset_t global_idx) - { - exec_nodes.push_back(exec_node_t(global_idx)); - } - - void push(const exec_node_t &node) - { - exec_nodes.push_back(node); - } - - - void pop_push(node_offset_t child_idx, node_offset_t child_count = 1) - { - PARSE_ASSERT(! exec_nodes.empty()); - if (child_count == 0) - { - // No children, just remove the top node - exec_nodes.pop_back(); - } - else - { - // Figure out the offset of the children - exec_node_t &top = exec_nodes.back(); - const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); - PARSE_ASSERT(child_idx < parse_node.child_count); - node_offset_t child_node_idx = parse_node.child_start + child_idx; - - // Remove the top node - exec_nodes.pop_back(); - - // Append the given children, backwards - node_offset_t cursor = child_count; - while (cursor--) - { - exec_nodes.push_back(exec_node_t(child_node_idx + cursor)); - } - } - } - - void pop() - { - PARSE_ASSERT(! exec_nodes.empty()); - exec_nodes.pop_back(); - } - - void pop_push_all() - { - exec_node_t &top = exec_nodes.back(); - const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); - pop_push(0, parse_node.child_count); - } - - void assemble_1_argument_or_redirection(node_offset_t idx, exec_arguments_and_redirections_t *output) const - { - const parse_node_t &node = parse_tree.at(idx); - PARSE_ASSERT(output != NULL); - PARSE_ASSERT(node.type == symbol_argument_or_redirection); - PARSE_ASSERT(node.child_count == 1); - node_offset_t child_idx = node.child_offset(0); - const parse_node_t &child = parse_tree.at(child_idx); - switch (child.type) - { - case parse_token_type_string: - // Argument - { - exec_argument_t arg = exec_argument_t(); - arg.parse_node_idx = child_idx; - output->arguments.push_back(arg); - } - break; - - case parse_token_type_redirection: - // Redirection - { - exec_redirection_t redirect = exec_redirection_t(); - redirect.parse_node_idx = child_idx; - output->redirections.push_back(redirect); - } - break; - - default: - PARSER_DIE(); - break; - } - } - - void assemble_arguments_and_redirections(node_offset_t start_idx, exec_arguments_and_redirections_t *output) const - { - node_offset_t idx = start_idx; - for (;;) - { - const parse_node_t &node = parse_tree.at(idx); - PARSE_ASSERT(node.type == symbol_arguments_or_redirections_list); - PARSE_ASSERT(node.child_count == 0 || node.child_count == 2); - if (node.child_count == 0) - { - // No more children - break; - } - else - { - // Skip to next child - assemble_1_argument_or_redirection(node.child_offset(0), output); - idx = node.child_offset(1); - } - } - } - - void assemble_command_for_plain_statement(node_offset_t idx, parse_keyword_t decoration) - { - const parse_node_t &node = parse_tree.at(idx); - PARSE_ASSERT(node.type == symbol_plain_statement); - PARSE_ASSERT(node.child_count == 2); - exec_basic_statement_t statement; - statement.set_decoration(decoration); - statement.command_idx = node.child_offset(0); - assemble_arguments_and_redirections(node.child_offset(1), &statement.arguments_and_redirections); - visitor->visit_basic_statement(statement); - } - - void assemble_block_statement(node_offset_t parse_node_idx) - { - - const parse_node_t &node = parse_tree.at(parse_node_idx); - PARSE_ASSERT(node.type == symbol_block_statement); - PARSE_ASSERT(node.child_count == 5); - - // Fetch arguments and redirections. These ought to be evaluated before the job list - exec_block_statement_t statement; - assemble_arguments_and_redirections(node.child_offset(4), &statement.arguments_and_redirections); - - // Generic visit - visitor->enter_block_statement(statement); - - // Dig into the header to discover the type - const parse_node_t &header_parent = parse_tree.at(node.child_offset(0)); - PARSE_ASSERT(header_parent.type == symbol_block_header); - PARSE_ASSERT(header_parent.child_count == 1); - const node_offset_t header_idx = header_parent.child_offset(0); - - // Fetch body (job list) - node_offset_t body_idx = node.child_offset(2); - PARSE_ASSERT(parse_tree.at(body_idx).type == symbol_job_list); - - pop(); - push(exec_node_t(header_idx, body_idx)); - } - - /* which: 0 -> if, 1 -> else if, 2 -> else */ - void assemble_if_else_clause(exec_node_t &exec_node, const parse_node_t &node, int which) - { - if (which == 0) - { - PARSE_ASSERT(node.type == symbol_if_clause); - PARSE_ASSERT(node.child_count == 4); - } - else if (which == 2) - { - PARSE_ASSERT(node.type == symbol_else_continuation); - PARSE_ASSERT(node.child_count == 2); - } - - struct exec_if_clause_t clause; - if (which == 0) - { - clause.body = node.child_offset(3); - } - else - { - clause.body = node.child_offset(1); - } - if (! exec_node.visited) - { - visitor->enter_if_clause(clause); - exec_node.visited = true; - if (which == 0) - { - push(node.child_offset(1)); - } - } - else - { - visitor->exit_if_clause(clause); - pop(); - } - } - - void assemble_arguments(node_offset_t start_idx, exec_argument_list_t *output) const - { - node_offset_t idx = start_idx; - for (;;) - { - const parse_node_t &node = parse_tree.at(idx); - PARSE_ASSERT(node.type == symbol_argument_list); - if (node.type == symbol_argument_list) - { - // argument list, may be empty - PARSE_ASSERT(node.child_count == 0 || node.child_count == 1); - if (node.child_count == 0) - { - break; - } - else - { - idx = node.child_offset(0); - } - } - else - { - // nonempty argument list - PARSE_ASSERT(node.child_count == 2); - output->push_back(exec_argument_t(node.child_offset(0))); - idx = node.child_offset(1); - } - } - } - - void assemble_1_case_item(exec_switch_statement_t *statement, node_offset_t node_idx) - { - const parse_node_t &node = parse_tree.at(node_idx); - PARSE_ASSERT(node.type == symbol_case_item); - - // add a new case - size_t len = statement->cases.size(); - statement->cases.resize(len + 1); - exec_switch_case_t &new_case = statement->cases.back(); - - // assemble it - new_case.body = node.child_offset(3); - assemble_arguments(node.child_offset(1), &new_case.arguments); - - - } - - void assemble_case_item_list(exec_switch_statement_t *statement, node_offset_t node_idx) - { - const parse_node_t &node = parse_tree.at(node_idx); - PARSE_ASSERT(node.type == symbol_case_item_list); - PARSE_ASSERT(node.child_count == 0 || node.child_count == 2); - if (node.child_count == 2) - { - assemble_1_case_item(statement, node.child_offset(0)); - assemble_case_item_list(statement, node.child_offset(1)); - } - } - - void assemble_switch_statement(const exec_node_t &exec_node, const parse_node_t &parse_node) - { - PARSE_ASSERT(parse_node.type == symbol_switch_statement); - exec_switch_statement_t statement; - - statement.argument.parse_node_idx = parse_node.child_offset(1); - assemble_case_item_list(&statement, parse_node.child_offset(3)); - - visitor->visit_switch_statement(statement); - - // pop off the switch - pop(); - } - - void assemble_function_header(const exec_node_t &exec_node, const parse_node_t &header) - { - PARSE_ASSERT(header.type == symbol_function_header); - PARSE_ASSERT(&header == &parse_tree.at(exec_node.parse_node_idx)); - PARSE_ASSERT(exec_node.body_parse_node_idx != NODE_OFFSET_INVALID); - exec_function_header_t function_info; - function_info.name_idx = header.child_offset(1); - function_info.body_idx = exec_node.body_parse_node_idx; - assemble_arguments(header.child_offset(2), &function_info.arguments); - visitor->visit_function(function_info); - - // Always pop - pop(); - } - - - void enter_parse_node(size_t idx); - void run_top_node(void); - -public: - - void get_node_string(node_offset_t idx, wcstring *output) const - { - const parse_node_t &node = parse_tree.at(idx); - PARSE_ASSERT(node.source_start <= src.size()); - PARSE_ASSERT(node.source_start + node.source_length <= src.size()); - output->assign(src, node.source_start, node.source_length); - } - - bool visit_next_node(parse_execution_visitor_t *v); - - parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s), visitor(NULL) - { - if (! parse_tree.empty()) - { - exec_nodes.push_back(exec_node_t(0)); - } - } -}; - -void parse_exec_t::run_top_node() -{ - PARSE_ASSERT(! exec_nodes.empty()); - exec_node_t &exec_node = exec_nodes.back(); - const node_offset_t parse_node_idx = exec_node.parse_node_idx; - const parse_node_t &parse_node = parse_tree.at(exec_node.parse_node_idx); - bool log = true; - - if (log) - { - wcstring tmp; - tmp.append(exec_nodes.size(), L' '); - tmp.append(parse_node.describe()); - printf("%ls\n", tmp.c_str()); - } - - switch (parse_node.type) - { - case symbol_job_list: - PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); - if (parse_node.child_count == 0) - { - // No more jobs, done - visitor->exit_job_list(); - pop(); - } - else if (parse_tree.at(parse_node.child_start + 0).type == parse_token_type_end) - { - // Empty job, so just skip it - pop_push(1, 1); - } - else - { - // Normal job - visitor->enter_job_list(); - pop_push(0, 2); - } - break; - - case symbol_job: - { - PARSE_ASSERT(parse_node.child_count == 2); - visitor->enter_job(); - pop_push_all(); - break; - } - - case symbol_job_continuation: - PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 3); - if (parse_node.child_count == 0) - { - // All done with this job - visitor->exit_job(); - pop(); - } - else - { - // Skip the pipe - pop_push(1, 2); - } - break; - - case symbol_statement: - { - PARSE_ASSERT(parse_node.child_count == 1); - pop_push_all(); - break; - } - - case symbol_block_statement: - { - PARSE_ASSERT(parse_node.child_count == 5); - assemble_block_statement(parse_node_idx); - break; - } - - case symbol_block_header: - { - PARSE_ASSERT(parse_node.child_count == 1); - pop_push_all(); - break; - } - - case symbol_function_header: - { - PARSE_ASSERT(parse_node.child_count == 3); - assemble_function_header(exec_node, parse_node); - break; - } - - case symbol_if_statement: - { - PARSE_ASSERT(parse_node.child_count == 4); - pop_push(0, 2); - break; - } - - case symbol_if_clause: - { - PARSE_ASSERT(parse_node.child_count == 4); - assemble_if_else_clause(exec_node, parse_node, 0); - pop(); - break; - } - - case symbol_else_clause: - { - PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); - if (parse_node.child_count == 0) - { - // No else - pop(); - } - else - { - // We have an else - pop_push(1); - } - break; - } - - case symbol_else_continuation: - { - // Figure out if this is an else if or a terminating else - PARSE_ASSERT(parse_node.child_count == 2); - const parse_node_t &first_child = get_child(parse_node, 1); - PARSE_ASSERT(first_child.type == symbol_if_clause || first_child.type == parse_token_type_end); - if (first_child.type == symbol_if_clause) - { - pop_push_all(); - } - else - { - // else - assemble_if_else_clause(exec_node, parse_node, 2); - pop(); - } - break; - } - - case symbol_switch_statement: - { - assemble_switch_statement(exec_node, parse_node); - break; - } - - case symbol_decorated_statement: - { - PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2); - - node_offset_t plain_statement_idx = parse_node.child_offset(parse_node.child_count - 1); - parse_keyword_t decoration = static_cast(parse_node.tag); - assemble_command_for_plain_statement(plain_statement_idx, decoration); - pop(); - break; - } - - // The following symbols should be handled by their parents, i.e. never pushed on our stack - case symbol_case_item_list: - case symbol_plain_statement: - case symbol_arguments_or_redirections_list: - case symbol_argument_or_redirection: - fprintf(stderr, "Unexpected token type %ls at index %ld. This should have been handled by the parent.\n", token_type_description(parse_node.type).c_str(), exec_node.parse_node_idx); - PARSER_DIE(); - break; - - case parse_token_type_end: - PARSE_ASSERT(parse_node.child_count == 0); - pop(); - break; - - default: - fprintf(stderr, "Unhandled token type %ls at index %ld\n", token_type_description(parse_node.type).c_str(), exec_node.parse_node_idx); - PARSER_DIE(); - break; - - } -} - -bool parse_exec_t::visit_next_node(parse_execution_visitor_t *v) -{ - PARSE_ASSERT(v != NULL); - PARSE_ASSERT(visitor == NULL); - if (exec_nodes.empty()) - { - return false; - } - - visitor = v; - run_top_node(); - visitor = NULL; - return true; -} - -void parse_exec_t::enter_parse_node(size_t idx) -{ - PARSE_ASSERT(idx < parse_tree.size()); - exec_node_t exec(idx); - exec_nodes.push_back(exec); -} - - -parse_execution_context_t::parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s) -{ - ctx = new parse_exec_t(n, s); -} - -parse_execution_context_t::~parse_execution_context_t() -{ - delete ctx; -} - -bool parse_execution_context_t::visit_next_node(parse_execution_visitor_t *visitor) -{ - return ctx->visit_next_node(visitor); -} - -void parse_execution_context_t::get_source(node_offset_t idx, wcstring *result) const -{ - return ctx->get_node_string(idx, result); -} - - - - diff --git a/parse_exec.h b/parse_exec.h deleted file mode 100644 index 1eea99ab1..000000000 --- a/parse_exec.h +++ /dev/null @@ -1,173 +0,0 @@ -/**\file parse_exec.h - - Programmatic execution of a parse tree -*/ - -#ifndef FISH_PARSE_EXEC_H -#define FISH_PARSE_EXEC_H - -#include "parse_tree.h" - -struct parse_execution_visitor_t; -class parse_exec_t; -class parse_execution_context_t -{ - parse_exec_t *ctx; //owned - -public: - parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s); - ~parse_execution_context_t(); - - bool visit_next_node(parse_execution_visitor_t *visitor); - - // Gets the source for a node at a given index - void get_source(node_offset_t idx, wcstring *result) const; -}; - - -struct exec_argument_t -{ - node_offset_t parse_node_idx; - exec_argument_t(node_offset_t p) : parse_node_idx(p) - { - } - exec_argument_t() - { - } -}; -typedef std::vector exec_argument_list_t; - -struct exec_redirection_t -{ - node_offset_t parse_node_idx; -}; -typedef std::vector exec_redirection_list_t; - -struct exec_arguments_and_redirections_t -{ - exec_argument_list_t arguments; - exec_redirection_list_t redirections; -}; - -struct exec_basic_statement_t -{ - // Node containing the command - node_offset_t command_idx; - - // Arguments - exec_arguments_and_redirections_t arguments_and_redirections; - - // Decoration - enum - { - decoration_plain, - decoration_command, - decoration_builtin - } decoration; - - exec_basic_statement_t(); - - void set_decoration(uint32_t k) - { - PARSE_ASSERT(k == parse_keyword_none || k == parse_keyword_command || k == parse_keyword_builtin); - switch (k) - { - case parse_keyword_none: - decoration = decoration_plain; - break; - case parse_keyword_command: - decoration = decoration_command; - break; - case parse_keyword_builtin: - decoration = decoration_builtin; - break; - default: - PARSER_DIE(); - break; - } - } - - const exec_argument_list_t &arguments() const - { - return arguments_and_redirections.arguments; - } - - const exec_redirection_list_t &redirections() const - { - return arguments_and_redirections.redirections; - } -}; - -struct exec_function_header_t -{ - // Node containing the function name - node_offset_t name_idx; - - // Node containing the function body - node_offset_t body_idx; - - // Arguments - exec_argument_list_t arguments; -}; - -struct exec_block_statement_t -{ - // Arguments - exec_arguments_and_redirections_t arguments_and_redirections; -}; - -struct exec_if_clause_t -{ - // Node containing the body of the if statement - node_offset_t body; -}; - -struct exec_switch_case_t -{ - exec_argument_list_t arguments; - node_offset_t body; -}; - -struct exec_switch_statement_t -{ - exec_argument_t argument; - std::vector cases; -}; - -struct parse_execution_visitor_t -{ - node_offset_t node_idx; - parse_execution_context_t *context; - - parse_execution_visitor_t() : node_idx(0), context(NULL) - { - } - - virtual bool enter_job_list(void) - { - return true; - } - virtual bool enter_job(void) - { - return true; - } - virtual void visit_statement(void) { } - virtual void visit_function(const exec_function_header_t &function) { } - virtual bool enter_block_statement(const exec_block_statement_t &statement) - { - return true; - } - - virtual void enter_if_clause(const exec_if_clause_t &statement) { } - virtual void exit_if_clause(const exec_if_clause_t &statement) { } - - virtual void visit_switch_statement(const exec_switch_statement_t &header) { } - - - virtual void visit_boolean_statement(void) { } - virtual void visit_basic_statement(const exec_basic_statement_t &statement) { } - virtual void exit_job(void) { } - virtual void exit_job_list(void) { } -}; - -#endif From 7b86b2e05a011e37bf11bba2675ef5db684bca24 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Wed, 9 Oct 2013 02:03:50 -0700 Subject: [PATCH 032/177] Adoption of new parser in abbreviations --- fish_tests.cpp | 20 +++++-- highlight.cpp | 40 ++++++------- parse_productions.cpp | 10 ++-- parse_productions.h | 2 +- parse_tree.cpp | 29 ++++++--- parse_tree.h | 42 ++++++------- reader.cpp | 134 ++++++++++++------------------------------ 7 files changed, 115 insertions(+), 162 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index 6c77ec08a..99ed6cd34 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -61,7 +61,6 @@ #include "signal.h" #include "highlight.h" #include "parse_tree.h" -#include "parse_exec.h" #include "parse_util.h" /** @@ -769,6 +768,11 @@ static void test_abbreviations(void) expanded = reader_expand_abbreviation_in_command(L"of gc", wcslen(L"of gc"), &result); if (expanded) err(L"gc incorrectly expanded on line %ld", (long)__LINE__); + /* others should not be */ + expanded = reader_expand_abbreviation_in_command(L"command gc", wcslen(L"command gc"), &result); + if (expanded) err(L"gc incorrectly expanded on line %ld", (long)__LINE__); + + env_pop(); } @@ -1916,12 +1920,16 @@ static void test_new_parser_fuzzing(void) size_t max = 5; for (size_t len=1; len <= max; len++) { - fprintf(stderr, "%lu / %lu\n", len, max); + fprintf(stderr, "%lu / %lu...", len, max); std::vector tokens(len); + size_t count = 0; + parse_t parser; + parse_node_tree_t parse_tree; do { - parse_t parser; - parse_node_tree_t parse_tree; + parser.clear(); + parse_tree.clear(); + count++; for (size_t i=0; i < len; i++) { const parser_fuzz_token_t &token = tokens[i]; @@ -1931,6 +1939,7 @@ static void test_new_parser_fuzzing(void) // keep going until we wrap } while (! increment(tokens)); + fprintf(stderr, "done (%lu)\n", count); } double end = timef(); say(L"All fuzzed in %f seconds!", end - start); @@ -2108,7 +2117,7 @@ int main(int argc, char **argv) say(L"Testing low-level functionality"); set_main_thread(); setup_fork_guards(); - //proc_init(); + //proc_init(); //disabling this prevents catching SIGINT event_init(); function_init(); builtin_init(); @@ -2116,7 +2125,6 @@ int main(int argc, char **argv) env_init(); test_highlighting(); - return 0; test_new_parser_fuzzing(); test_new_parser_correctness(); test_highlighting(); diff --git a/highlight.cpp b/highlight.cpp index 28e32b7a1..8fe9989b9 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -332,7 +332,7 @@ static bool is_potential_cd_path(const wcstring &path, const wcstring &working_d } /* Given a plain statement node in a parse tree, get the command and return it, expanded appropriately for commands. If we succeed, return true. */ -static bool plain_statement_get_expanded_command(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &plain_statement, wcstring *out_cmd) +bool plain_statement_get_expanded_command(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &plain_statement, wcstring *out_cmd) { assert(plain_statement.type == symbol_plain_statement); bool result = false; @@ -708,15 +708,15 @@ static bool has_expand_reserved(const wcstring &str) return result; } -/* Parse a command line. Return by reference the last command, its arguments, and the offset in the string of the beginning of the last argument. This is used by autosuggestions */ -static bool autosuggest_parse_command(const wcstring &buff, wcstring *out_expanded_command, const parse_node_t **out_last_arg) +/* Parse a command line. Return by reference the last command, and the last argument to that command (as a copied node), if any. This is used by autosuggestions */ +static bool autosuggest_parse_command(const wcstring &buff, wcstring *out_expanded_command, parse_node_t *out_last_arg) { bool result = false; /* Parse the buffer */ parse_node_tree_t parse_tree; parse_t parser; - parser.parse(buff, parse_flag_continue_after_error, &parse_tree, NULL); + parser.parse(buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL); /* Find the last statement */ const parse_node_t *last_statement = parse_tree.find_last_node_of_type(symbol_plain_statement, NULL); @@ -727,8 +727,12 @@ static bool autosuggest_parse_command(const wcstring &buff, wcstring *out_expand /* We got it */ result = true; - /* Find the last argument */ - *out_last_arg = parse_tree.find_last_node_of_type(symbol_plain_statement, last_statement); + /* Find the last argument. If we don't get one, return an invalid node. */ + const parse_node_t *last_arg = parse_tree.find_last_node_of_type(symbol_argument, last_statement); + if (last_arg != NULL) + { + *out_last_arg = *last_arg; + } } } return result; @@ -739,20 +743,20 @@ bool autosuggest_suggest_special(const wcstring &str, const wcstring &working_di { if (str.empty()) return false; - + ASSERT_IS_BACKGROUND_THREAD(); /* Parse the string */ wcstring parsed_command; - const parse_node_t *last_arg_node = NULL; + parse_node_t last_arg_node(token_type_invalid); if (! autosuggest_parse_command(str, &parsed_command, &last_arg_node)) return false; bool result = false; - if (parsed_command == L"cd" && last_arg_node != NULL && last_arg_node->has_source()) + if (parsed_command == L"cd" && last_arg_node.type == symbol_argument && last_arg_node.has_source()) { /* We can possibly handle this specially */ - const wcstring escaped_dir = last_arg_node->get_source(str); + const wcstring escaped_dir = last_arg_node.get_source(str); wcstring suggested_path; /* We always return true because we recognized the command. This prevents us from falling back to dumber algorithms; for example we won't suggest a non-directory for the cd command. */ @@ -771,13 +775,12 @@ bool autosuggest_suggest_special(const wcstring &str, const wcstring &working_di path_flags_t path_flags = (quote == L'\0') ? PATH_EXPAND_TILDE : 0; if (unescaped && is_potential_cd_path(unescaped_dir, working_directory, path_flags, &suggested_path)) { - /* Note: this looks really wrong for strings that have an "unescapable" character in them, e.g. a \t, because parse_util_escape_string_with_quote will insert that character */ wcstring escaped_suggested_path = parse_util_escape_string_with_quote(suggested_path, quote); /* Return it */ out_suggestion = str; - out_suggestion.erase(last_arg_node->source_start); + out_suggestion.erase(last_arg_node.source_start); if (quote != L'\0') out_suggestion.push_back(quote); out_suggestion.append(escaped_suggested_path); if (quote != L'\0') out_suggestion.push_back(quote); @@ -798,14 +801,14 @@ bool autosuggest_validate_from_history(const history_item_t &item, file_detectio /* Parse the string */ wcstring parsed_command; - const parse_node_t *last_arg_node = NULL; + parse_node_t last_arg_node(token_type_invalid); if (! autosuggest_parse_command(item.str(), &parsed_command, &last_arg_node)) return false; - if (parsed_command == L"cd" && last_arg_node != NULL && last_arg_node->has_source()) + if (parsed_command == L"cd" && last_arg_node.type == symbol_argument && last_arg_node.has_source()) { /* We can possibly handle this specially */ - wcstring dir = last_arg_node->get_source(item.str()); + wcstring dir = last_arg_node.get_source(item.str()); if (expand_one(dir, EXPAND_SKIP_CMDSUBST)) { handled = true; @@ -1968,12 +1971,7 @@ const highlighter_t::color_array_t & highlighter_t::highlight() case symbol_plain_statement: { // Get the decoration from the parent - enum parse_statement_decoration_t decoration = parse_statement_decoration_none; - const parse_node_t *decorated_statement = parse_tree.get_parent(node, symbol_decorated_statement); - if (decorated_statement != NULL) - { - decoration = static_cast(decorated_statement->production_idx); - } + enum parse_statement_decoration_t decoration = parse_tree.decoration_for_plain_statement(node); /* Color the command */ const parse_node_t *cmd_node = parse_tree.get_child(node, 0, parse_token_type_string); diff --git a/parse_productions.cpp b/parse_productions.cpp index 0900977f7..38d57ebab 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -27,8 +27,8 @@ static bool production_is_valid(const production_options_t production_list, prod } #define PRODUCTIONS(sym) static const production_options_t productions_##sym -#define RESOLVE(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) -#define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) { return 0; } +#define RESOLVE(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword) +#define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword) { return 0; } #define KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) @@ -418,7 +418,7 @@ RESOLVE(optional_background) } #define TEST(sym) case (symbol_##sym): production_list = & productions_ ## sym ; resolver = resolve_ ## sym ; break; -const production_t *parse_productions::production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_which_production, production_tag_t *out_tag, wcstring *out_error_text) +const production_t *parse_productions::production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_which_production, wcstring *out_error_text) { bool log_it = false; if (log_it) @@ -428,7 +428,7 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n /* Fetch the list of productions and the function to resolve them */ const production_options_t *production_list = NULL; - production_option_idx_t (*resolver)(parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) = NULL; + production_option_idx_t (*resolver)(parse_token_type_t token_type, parse_keyword_t token_keyword) = NULL; switch (node_type) { TEST(job_list) @@ -486,7 +486,7 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n PARSE_ASSERT(resolver != NULL); const production_t *result = NULL; - production_option_idx_t which = resolver(input_type, input_keyword, out_tag); + production_option_idx_t which = resolver(input_type, input_keyword); if (log_it) { diff --git a/parse_productions.h b/parse_productions.h index a0d43f629..7e132d0c4 100644 --- a/parse_productions.h +++ b/parse_productions.h @@ -63,7 +63,7 @@ inline bool production_element_is_valid(production_element_t elem) } /* Fetch a production */ -const production_t *production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_idx, production_tag_t *out_tag, wcstring *out_error_text); +const production_t *production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_idx, wcstring *out_error_text); } diff --git a/parse_tree.cpp b/parse_tree.cpp index 30ee6856b..900513f50 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -720,7 +720,7 @@ void parse_ll_t::accept_token(parse_token_t token) // Get the production for the top of the stack parse_stack_element_t &stack_elem = symbol_stack.back(); parse_node_t &node = nodes.at(stack_elem.node_idx); - const production_t *production = production_for_token(stack_elem.type, token.type, token.keyword, &node.production_idx, &node.tag, NULL /* error text */); + const production_t *production = production_for_token(stack_elem.type, token.type, token.keyword, &node.production_idx, NULL /* error text */); if (production == NULL) { if (should_generate_error_messages) @@ -804,6 +804,9 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n if (parse_flags & parse_flag_include_comments) tok_options |= TOK_SHOW_COMMENTS; + if (parse_flags & parse_flag_accept_incomplete_tokens) + tok_options |= TOK_ACCEPT_UNFINISHED; + this->parser->set_should_generate_error_messages(errors != NULL); tokenizer_t tok = tokenizer_t(str.c_str(), tok_options); @@ -845,14 +848,14 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n // Tag nodes -#if 0 - wcstring result = dump_tree(this->parser->nodes, str); - fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); - fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t)); -#endif - // Acquire the output from the parser this->parser->acquire_output(output, errors); + +#if 0 + //wcstring result = dump_tree(this->parser->nodes, str); + //fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); + fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", output->size(), sizeof(parse_node_t), output->size() * sizeof(parse_node_t)); +#endif // Indicate if we had a fatal error return ! this->parser->has_fatal_error(); @@ -992,3 +995,15 @@ bool parse_node_tree_t::argument_list_is_root(const parse_node_t &node) const } return result; } + +enum parse_statement_decoration_t parse_node_tree_t::decoration_for_plain_statement(const parse_node_t &node) const +{ + assert(node.type == symbol_plain_statement); + enum parse_statement_decoration_t decoration = parse_statement_decoration_none; + const parse_node_t *decorated_statement = this->get_parent(node, symbol_decorated_statement); + if (decorated_statement != NULL) + { + decoration = static_cast(decorated_statement->production_idx); + } + return decoration; +} diff --git a/parse_tree.h b/parse_tree.h index b2059914c..945d550c4 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -125,7 +125,10 @@ enum parse_flag_continue_after_error = 1 << 0, /* Include comment tokens */ - parse_flag_include_comments = 1 << 1 + parse_flag_include_comments = 1 << 1, + + /* Indicate that the tokenizer should accept incomplete tokens */ + parse_flag_accept_incomplete_tokens = 1 << 2 }; typedef unsigned int parse_tree_flags_t; @@ -175,9 +178,6 @@ class parse_node_t node_offset_t child_start; node_offset_t child_count; - /* Type-dependent data */ - uint32_t tag; - /* Which production was used */ uint8_t production_idx; @@ -185,7 +185,7 @@ class parse_node_t wcstring describe(void) const; /* Constructor */ - explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(-1), source_length(0), parent(NODE_OFFSET_INVALID), child_start(0), child_count(0), tag(0) + explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(-1), source_length(0), parent(NODE_OFFSET_INVALID), child_start(0), child_count(0) { } @@ -211,6 +211,15 @@ class parse_node_t } }; +/* Statement decorations. This matches the order of productions in decorated_statement */ +enum parse_statement_decoration_t +{ + parse_statement_decoration_none, + parse_statement_decoration_command, + parse_statement_decoration_builtin +}; + + /* The parse tree itself */ class parse_node_tree_t : public std::vector { @@ -232,27 +241,10 @@ class parse_node_tree_t : public std::vector /* Indicate if the given argument_list or arguments_or_redirections_list is a root list, or has a parent */ bool argument_list_is_root(const parse_node_t &node) const; -}; - - -/* Node type specific data, stored in the tag field */ - -/* Statement decorations, stored in the tag of plain_statement. This matches the order of productions in decorated_statement */ -enum parse_statement_decoration_t -{ - parse_statement_decoration_none, - parse_statement_decoration_command, - parse_statement_decoration_builtin -}; - -/* Argument flags as a bitmask, stored in the tag of argument */ -enum parse_argument_flags_t -{ - /* Indicates that this or a prior argument was --, so this should not be treated as an option */ - parse_argument_no_options = 1 << 0, - /* Indicates that the argument is for a cd command */ - parse_argument_is_for_cd = 1 << 1 + /* Utilities */ + enum parse_statement_decoration_t decoration_for_plain_statement(const parse_node_t &node) const; + }; /* Fish grammar: diff --git a/reader.cpp b/reader.cpp index 228fa9183..0f022c279 100644 --- a/reader.cpp +++ b/reader.cpp @@ -99,6 +99,7 @@ commence. #include "path.h" #include "parse_util.h" #include "parser_keywords.h" +#include "parse_tree.h" /** Maximum length of prefix string when printing completion @@ -659,117 +660,56 @@ bool reader_expand_abbreviation_in_command(const wcstring &cmdline, size_t curso const size_t subcmd_offset = cmdsub_begin - buff; const wcstring subcmd = wcstring(cmdsub_begin, cmdsub_end - cmdsub_begin); - const wchar_t *subcmd_cstr = subcmd.c_str(); - - /* Get the token containing the cursor */ - const wchar_t *subcmd_tok_begin = NULL, *subcmd_tok_end = NULL; - assert(cursor_pos >= subcmd_offset); - size_t subcmd_cursor_pos = cursor_pos - subcmd_offset; - parse_util_token_extent(subcmd_cstr, subcmd_cursor_pos, &subcmd_tok_begin, &subcmd_tok_end, NULL, NULL); - - /* Compute the offset of the token before the cursor within the subcmd */ - assert(subcmd_tok_begin >= subcmd_cstr); - assert(subcmd_tok_end >= subcmd_tok_begin); - const size_t subcmd_tok_begin_offset = subcmd_tok_begin - subcmd_cstr; - const size_t subcmd_tok_length = subcmd_tok_end - subcmd_tok_begin; - - /* Now parse the subcmd, looking for commands */ - bool had_cmd = false, previous_token_is_cmd = false; - tokenizer_t tok(subcmd_cstr, TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); - for (; tok_has_next(&tok); tok_next(&tok)) + const size_t subcmd_cursor_pos = cursor_pos - subcmd_offset; + + /* Parse this subcmd */ + parse_node_tree_t parse_tree; + parse_t parser; + parser.parse(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL); + + /* Look for plain statements where the cursor is at the end of the command */ + const parse_node_t *matching_cmd_node = NULL; + const size_t len = parse_tree.size(); + for (size_t i=0; i < len; i++) { - size_t tok_pos = static_cast(tok_get_pos(&tok)); - if (tok_pos > subcmd_tok_begin_offset) + const parse_node_t &node = parse_tree.at(i); + + /* Only interested in plain statements with source */ + if (node.type != symbol_plain_statement || ! node.has_source()) + continue; + + /* Skip decorated statements */ + if (parse_tree.decoration_for_plain_statement(node) != parse_statement_decoration_none) + continue; + + /* Get the command node. Skip it if we can't or it has no source */ + const parse_node_t *cmd_node = parse_tree.get_child(node, 0, parse_token_type_string); + if (cmd_node == NULL || ! cmd_node->has_source()) + continue; + + /* Now see if its source range contains our cursor, including at the end */ + if (subcmd_cursor_pos >= cmd_node->source_start && subcmd_cursor_pos <= cmd_node->source_start + cmd_node->source_length) { - /* We've passed the token we're interested in */ + /* Success! */ + matching_cmd_node = cmd_node; break; } - - int last_type = tok_last_type(&tok); - - switch (last_type) - { - case TOK_STRING: - { - if (had_cmd) - { - /* Parameter to the command. */ - } - else - { - const wcstring potential_cmd = tok_last(&tok); - if (parser_keywords_is_subcommand(potential_cmd)) - { - if (potential_cmd == L"command" || potential_cmd == L"builtin") - { - /* 'command' and 'builtin' defeat abbreviation expansion. Skip this command. */ - had_cmd = true; - } - else - { - /* Other subcommand. Pretend it doesn't exist so that we can expand the following command */ - had_cmd = false; - } - } - else - { - /* It's a normal command */ - had_cmd = true; - if (tok_pos == subcmd_tok_begin_offset) - { - /* This is the token we care about! */ - previous_token_is_cmd = true; - } - } - } - break; - } - - case TOK_REDIRECT_NOCLOB: - case TOK_REDIRECT_OUT: - case TOK_REDIRECT_IN: - case TOK_REDIRECT_APPEND: - case TOK_REDIRECT_FD: - { - if (!had_cmd) - { - break; - } - tok_next(&tok); - break; - } - - case TOK_PIPE: - case TOK_BACKGROUND: - case TOK_END: - { - had_cmd = false; - break; - } - - case TOK_COMMENT: - case TOK_ERROR: - default: - { - break; - } - } } - + + /* Now if we found a command node, expand it */ bool result = false; - if (previous_token_is_cmd) + if (matching_cmd_node != NULL) { - /* The token is a command. Try expanding it as an abbreviation. */ - const wcstring token = wcstring(subcmd, subcmd_tok_begin_offset, subcmd_tok_length); + assert(matching_cmd_node->type == parse_token_type_string); + const wcstring token = matching_cmd_node->get_source(subcmd); wcstring abbreviation; if (expand_abbreviation(token, &abbreviation)) { /* There was an abbreviation! Replace the token in the full command. Maintain the relative position of the cursor. */ if (output != NULL) { - size_t cmd_tok_begin_offset = subcmd_tok_begin_offset + subcmd_offset; output->assign(cmdline); - output->replace(cmd_tok_begin_offset, subcmd_tok_length, abbreviation); + output->replace(subcmd_offset + matching_cmd_node->source_start, matching_cmd_node->source_length, abbreviation); } result = true; } From e763345f25f1ddc6b4f149da12fe947a9c2c8a71 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Wed, 9 Oct 2013 03:45:58 -0700 Subject: [PATCH 033/177] Reduce child_count in node structure to 8 bits --- parse_tree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parse_tree.h b/parse_tree.h index 945d550c4..b83e47abc 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -176,7 +176,7 @@ class parse_node_t /* Children */ node_offset_t child_start; - node_offset_t child_count; + uint8_t child_count; /* Which production was used */ uint8_t production_idx; From 58447c147f20d55555ed4035e3add1ccafec2998 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Wed, 9 Oct 2013 15:57:10 -0700 Subject: [PATCH 034/177] Make the new parser LL(2). Support for correct handling of e.g. 'command --help' --- fish_tests.cpp | 80 +++++++++++++++++++++++++++ highlight.cpp | 8 +-- parse_productions.cpp | 44 +++++++++++++-- parse_productions.h | 4 +- parse_tree.cpp | 125 +++++++++++++++++++++++++++++------------- parse_tree.h | 11 +++- 6 files changed, 219 insertions(+), 53 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index 99ed6cd34..40a8d7db4 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -1945,6 +1945,85 @@ static void test_new_parser_fuzzing(void) say(L"All fuzzed in %f seconds!", end - start); } +// Parse a statement, returning the command, args (joined by spaces), and the decoration. Returns true if successful. +static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *out_joined_args, enum parse_statement_decoration_t *out_deco) +{ + out_cmd->clear(); + out_joined_args->clear(); + *out_deco = parse_statement_decoration_none; + + bool result = false; + parse_node_tree_t tree; + parse_t parser; + if (parser.parse(src, parse_flag_none, &tree, NULL)) + { + /* Get the statement. Should only have one */ + const parse_node_tree_t::parse_node_list_t stmt_nodes = tree.find_nodes(tree.at(0), symbol_plain_statement); + if (stmt_nodes.size() != 1) + { + say(L"Unexpected number of statements (%lu) found in '%ls'", stmt_nodes.size(), src.c_str()); + return false; + } + const parse_node_t &stmt = *stmt_nodes.at(0); + + /* Return its decoration */ + *out_deco = tree.decoration_for_plain_statement(stmt); + + /* Return its command */ + tree.command_for_plain_statement(stmt, src, out_cmd); + + /* Return arguments separated by spaces */ + const parse_node_tree_t::parse_node_list_t arg_nodes = tree.find_nodes(stmt, symbol_argument); + for (size_t i=0; i < arg_nodes.size(); i++) + { + if (i > 0) out_joined_args->push_back(L' '); + out_joined_args->append(arg_nodes.at(i)->get_source(src)); + } + result = true; + } + return result; +} + +/* Test the LL2 (two token lookahead) nature of the parser by exercising the special builtin and command handling. In particular, 'command foo' should be a decorated statement 'foo' but 'command --help' should be an undecorated statement 'command' with argument '--help', and NOT attempt to run a command called '--help' */ +static void test_new_parser_ll2(void) +{ + say(L"Testing parser two-token lookahead"); + + const struct + { + wcstring src; + wcstring cmd; + wcstring args; + enum parse_statement_decoration_t deco; + } tests[] = + { + {L"echo hello", L"echo", L"hello", parse_statement_decoration_none}, + {L"command echo hello", L"echo", L"hello", parse_statement_decoration_command}, + {L"command command hello", L"command", L"hello", parse_statement_decoration_command}, + {L"builtin command hello", L"command", L"hello", parse_statement_decoration_builtin}, + {L"command --help", L"command", L"--help", parse_statement_decoration_none}, + {L"command -h", L"command", L"-h", parse_statement_decoration_none}, + {L"command", L"command", L"", parse_statement_decoration_none}, + {L"function", L"function", L"", parse_statement_decoration_none}, + {L"function --help", L"function", L"--help", parse_statement_decoration_none} + }; + + for (size_t i=0; i < sizeof tests / sizeof *tests; i++) + { + wcstring cmd, args; + enum parse_statement_decoration_t deco = parse_statement_decoration_none; + bool success = test_1_parse_ll2(tests[i].src, &cmd, &args, &deco); + if (! success) + err(L"Parse of '%ls' failed on line %ld", tests[i].cmd.c_str(), (long)__LINE__); + if (cmd != tests[i].cmd) + err(L"When parsing '%ls', expected command '%ls' but got '%ls' on line %ld", tests[i].src.c_str(), tests[i].cmd.c_str(), cmd.c_str(), (long)__LINE__); + if (args != tests[i].args) + err(L"When parsing '%ls', expected args '%ls' but got '%ls' on line %ld", tests[i].src.c_str(), tests[i].args.c_str(), args.c_str(), (long)__LINE__); + if (deco != tests[i].deco) + err(L"When parsing '%ls', expected decoration %d but got %d on line %ld", tests[i].src.c_str(), (int)tests[i].deco, (int)deco, (long)__LINE__); + } +} + __attribute__((unused)) static void test_new_parser(void) { @@ -2125,6 +2204,7 @@ int main(int argc, char **argv) env_init(); test_highlighting(); + test_new_parser_ll2(); test_new_parser_fuzzing(); test_new_parser_correctness(); test_highlighting(); diff --git a/highlight.cpp b/highlight.cpp index 8fe9989b9..ffd5953c6 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -337,12 +337,10 @@ bool plain_statement_get_expanded_command(const wcstring &src, const parse_node_ assert(plain_statement.type == symbol_plain_statement); bool result = false; - // Get the command - const parse_node_t *cmd_node = tree.get_child(plain_statement, 0, parse_token_type_string); - if (cmd_node != NULL && cmd_node->has_source()) + /* Get the command */ + wcstring cmd; + if (tree.command_for_plain_statement(plain_statement, src, &cmd)) { - wcstring cmd(src, cmd_node->source_start, cmd_node->source_length); - /* Try expanding it. If we cannot, it's an error. */ if (expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS)) { diff --git a/parse_productions.cpp b/parse_productions.cpp index 38d57ebab..90e4a99b8 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -8,7 +8,7 @@ static bool production_is_empty(const production_t production) return production[0] == token_type_invalid; } -// Empty productions are allowed but must be first. Validate that the given production is in the valid range, i.e. it is either not empty or there is a non-empty production after it +/* Empty productions are allowed but must be first. Validate that the given production is in the valid range, i.e. it is either not empty or there is a non-empty production after it */ static bool production_is_valid(const production_options_t production_list, production_option_idx_t which) { if (which < 0 || which >= MAX_PRODUCTIONS) @@ -26,9 +26,24 @@ static bool production_is_valid(const production_options_t production_list, prod return nonempty_found; } +/* Helper function indicates whether a token (typically second token) means 'help'. This is so we can treat e.g. 'command --help' as "invoke the 'command' builtin with --help' instead of 'run the --help command'. + + if naked_invocation_invokes_help is true, then we treat an invalid type or something other than a string as indicating help; this means that the user ran e.g. 'command' with no arguments. +*/ +static inline bool token_means_help(parse_token_type_t type, parse_keyword_t keyword, bool naked_invocation_invokes_help) +{ + if (keyword == parse_keyword_dash_h || keyword == parse_keyword_dashdash_help) + return true; + + if (naked_invocation_invokes_help && type != parse_token_type_string) + return true; + + return false; +} + #define PRODUCTIONS(sym) static const production_options_t productions_##sym -#define RESOLVE(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword) -#define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword) { return 0; } +#define RESOLVE(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, parse_token_type_t token_type2, parse_keyword_t token_keyword2) +#define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, parse_token_type_t token_type2, parse_keyword_t token_keyword2) { return 0; } #define KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) @@ -115,6 +130,17 @@ PRODUCTIONS(statement) = }; RESOLVE(statement) { + // Go to decorated statements if the subsequent token looks like '--help' + // If we are 'begin', then we expect to be invoked with no arguments. But if we are anything else, we require an argument, so do the same thing if the subsequent token is a line end. + if (token_type == parse_token_type_string) + { + bool naked_invocation_invokes_help = (token_keyword != parse_keyword_begin && token_keyword != parse_keyword_end); + if (token_means_help(token_type2, token_keyword2, naked_invocation_invokes_help)) + { + return 4; //decorated statement + } + } + switch (token_type) { case parse_token_type_string: @@ -149,6 +175,8 @@ RESOLVE(statement) case parse_keyword_command: case parse_keyword_builtin: case parse_keyword_case: + case parse_keyword_dash_h: + case parse_keyword_dashdash_help: return 4; } break; @@ -336,6 +364,10 @@ PRODUCTIONS(decorated_statement) = }; RESOLVE(decorated_statement) { + /* If this is e.g. 'command --help' then the command is 'command' and not a decoration */ + if (token_means_help(token_type2, token_keyword2, true /* naked_invocation_is_help */)) + return 0; + switch (token_keyword) { default: @@ -418,7 +450,7 @@ RESOLVE(optional_background) } #define TEST(sym) case (symbol_##sym): production_list = & productions_ ## sym ; resolver = resolve_ ## sym ; break; -const production_t *parse_productions::production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_which_production, wcstring *out_error_text) +const production_t *parse_productions::production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, parse_token_type_t input_type2, parse_keyword_t input_keyword2, production_option_idx_t *out_which_production, wcstring *out_error_text) { bool log_it = false; if (log_it) @@ -428,7 +460,7 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n /* Fetch the list of productions and the function to resolve them */ const production_options_t *production_list = NULL; - production_option_idx_t (*resolver)(parse_token_type_t token_type, parse_keyword_t token_keyword) = NULL; + production_option_idx_t (*resolver)(parse_token_type_t token_type, parse_keyword_t token_keyword, parse_token_type_t token_type2, parse_keyword_t token_keyword2) = NULL; switch (node_type) { TEST(job_list) @@ -486,7 +518,7 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n PARSE_ASSERT(resolver != NULL); const production_t *result = NULL; - production_option_idx_t which = resolver(input_type, input_keyword); + production_option_idx_t which = resolver(input_type, input_keyword, input_type2, input_keyword2); if (log_it) { diff --git a/parse_productions.h b/parse_productions.h index 7e132d0c4..298be0b1c 100644 --- a/parse_productions.h +++ b/parse_productions.h @@ -62,8 +62,8 @@ inline bool production_element_is_valid(production_element_t elem) return elem != token_type_invalid; } -/* Fetch a production */ -const production_t *production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_idx, wcstring *out_error_text); +/* Fetch a production. We are passed two input tokens. The first input token is guaranteed to not be invalid; the second token may be invalid if there's no more tokens. */ +const production_t *production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, parse_token_type_t input_type2, parse_keyword_t input_keyword2, production_option_idx_t *out_idx, wcstring *out_error_text); } diff --git a/parse_tree.cpp b/parse_tree.cpp index 900513f50..2066b8246 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -199,7 +199,7 @@ struct parse_token_t }; /* Convert from tokenizer_t's token type to a parse_token_t type */ -static parse_token_type_t parse_token_type_from_tokenizer_token(enum token_type tokenizer_token_type) +static inline parse_token_type_t parse_token_type_from_tokenizer_token(enum token_type tokenizer_token_type) { parse_token_type_t result = token_type_invalid; switch (tokenizer_token_type) @@ -447,7 +447,7 @@ class parse_ll_t } /* Input */ - void accept_token(parse_token_t token); + void accept_tokens(parse_token_t token1, parse_token_t token2); /* Indicate if we hit a fatal error */ bool has_fatal_error(void) const @@ -678,23 +678,23 @@ bool parse_ll_t::top_node_handle_terminal_types(parse_token_t token) return handled; } -void parse_ll_t::accept_token(parse_token_t token) +void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) { bool logit = false; if (logit) { - fprintf(stderr, "Accept token %ls\n", token.describe().c_str()); + fprintf(stderr, "Accept token %ls\n", token1.describe().c_str()); } - PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); + PARSE_ASSERT(token1.type >= FIRST_PARSE_TOKEN_TYPE); bool consumed = false; // Handle special types specially. Note that these are the only types that can be pushed if the symbol stack is empty. - if (token.type == parse_special_type_parse_error || token.type == parse_special_type_tokenizer_error || token.type == parse_special_type_comment) + if (token1.type == parse_special_type_parse_error || token1.type == parse_special_type_tokenizer_error || token1.type == parse_special_type_comment) { - parse_node_t err_node(token.type); - err_node.source_start = token.source_start; - err_node.source_length = token.source_length; + parse_node_t err_node(token1.type); + err_node.source_start = token1.source_start; + err_node.source_length = token1.source_length; nodes.push_back(err_node); consumed = true; } @@ -703,11 +703,11 @@ void parse_ll_t::accept_token(parse_token_t token) { PARSE_ASSERT(! symbol_stack.empty()); - if (top_node_handle_terminal_types(token)) + if (top_node_handle_terminal_types(token1)) { if (logit) { - fprintf(stderr, "Consumed token %ls\n", token.describe().c_str()); + fprintf(stderr, "Consumed token %ls\n", token1.describe().c_str()); } consumed = true; break; @@ -720,16 +720,16 @@ void parse_ll_t::accept_token(parse_token_t token) // Get the production for the top of the stack parse_stack_element_t &stack_elem = symbol_stack.back(); parse_node_t &node = nodes.at(stack_elem.node_idx); - const production_t *production = production_for_token(stack_elem.type, token.type, token.keyword, &node.production_idx, NULL /* error text */); + const production_t *production = production_for_token(stack_elem.type, token1.type, token1.keyword, token2.type, token2.keyword, &node.production_idx, NULL /* error text */); if (production == NULL) { if (should_generate_error_messages) { - this->parse_error(token, L"Unable to produce a '%ls' from input '%ls'", stack_elem.describe().c_str(), token.describe().c_str()); + this->parse_error(token1, L"Unable to produce a '%ls' from input '%ls'", stack_elem.describe().c_str(), token1.describe().c_str()); } else { - this->parse_error(token, NULL); + this->parse_error(token1, NULL); } // parse_error sets fatal_errored, which ends the loop } @@ -742,7 +742,7 @@ void parse_ll_t::accept_token(parse_token_t token) // If we end up with an empty stack, something bad happened, like an unbalanced end if (symbol_stack.empty()) { - this->parse_error(token, L"All symbols removed from symbol stack. Likely unbalanced else or end?"); + this->parse_error(token1, L"All symbols removed from symbol stack. Likely unbalanced else or end?"); } } } @@ -783,7 +783,9 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) {L"or", parse_keyword_or}, {L"not", parse_keyword_not}, {L"command", parse_keyword_command}, - {L"builtin", parse_keyword_builtin} + {L"builtin", parse_keyword_builtin}, + {L"-h", parse_keyword_dash_h}, + {L"--help", parse_keyword_dashdash_help} }; for (size_t i=0; i < sizeof keywords / sizeof *keywords; i++) @@ -798,8 +800,38 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) return result; } +/* Placeholder invalid token */ +static const parse_token_t kInvalidToken = {token_type_invalid, parse_keyword_none, -1, -1}; + +/* Return a new parse token, advancing the tokenizer */ +static inline parse_token_t next_parse_token(tokenizer_t *tok) +{ + if (! tok_has_next(tok)) + { + return kInvalidToken; + } + + token_type tok_type = static_cast(tok_last_type(tok)); + int tok_start = tok_get_pos(tok); + size_t tok_extent = tok_get_extent(tok); + assert(tok_extent < 10000000); //paranoia + const wchar_t *tok_txt = tok_last(tok); + + parse_token_t result; + result.type = parse_token_type_from_tokenizer_token(tok_type); + result.source_start = (size_t)tok_start; + result.source_length = tok_extent; + result.keyword = keyword_for_token(tok_type, tok_txt); + + tok_next(tok); + return result; +} + bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it) { + this->parser->set_should_generate_error_messages(errors != NULL); + + /* Construct the tokenizer */ tok_flags_t tok_options = TOK_SQUASH_ERRORS; if (parse_flags & parse_flag_include_comments) tok_options |= TOK_SHOW_COMMENTS; @@ -807,32 +839,29 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n if (parse_flags & parse_flag_accept_incomplete_tokens) tok_options |= TOK_ACCEPT_UNFINISHED; - this->parser->set_should_generate_error_messages(errors != NULL); - tokenizer_t tok = tokenizer_t(str.c_str(), tok_options); - for (; tok_has_next(&tok) && ! this->parser->has_fatal_error(); tok_next(&tok)) + + /* We are an LL(2) parser. We pass two tokens at a time. New tokens come in at index 1. Seed our queue with an initial token at index 1. */ + parse_token_t queue[2] = {kInvalidToken, next_parse_token(&tok)}; + + /* Go until the most recently added token is invalid. Note this may mean we don't process anything if there were no tokens. */ + while (queue[1].type != token_type_invalid) { - token_type tok_type = static_cast(tok_last_type(&tok)); - const wchar_t *tok_txt = tok_last(&tok); - int tok_start = tok_get_pos(&tok); - size_t tok_extent = tok_get_extent(&tok); - assert(tok_extent < 10000000); //paranoia - - parse_token_t token; - token.type = parse_token_type_from_tokenizer_token(tok_type); - token.source_start = (size_t)tok_start; - token.source_length = tok_extent; - token.keyword = keyword_for_token(tok_type, tok_txt); - this->parser->accept_token(token); - + /* Push a new token onto the queue */ + queue[0] = queue[1]; + queue[1] = next_parse_token(&tok); + + /* Pass these two tokens. We know that queue[0] is valid; queue[1] may be invalid. */ + this->parser->accept_tokens(queue[0], queue[1]); + + /* Handle errors */ if (this->parser->has_fatal_error()) { if (parse_flags & parse_flag_continue_after_error) { - /* Mark an error and then keep going */ - token.type = parse_special_type_parse_error; - token.keyword = parse_keyword_none; - this->parser->accept_token(token); + /* Mark a special error token, and then keep going */ + const parse_token_t token = {parse_special_type_parse_error, parse_keyword_none, -1, -1}; + this->parser->accept_tokens(token, kInvalidToken); this->parser->reset_symbols(); } else @@ -843,11 +872,10 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n } } + // Teach each node where its source range is this->parser->determine_node_ranges(); - // Tag nodes - // Acquire the output from the parser this->parser->acquire_output(output, errors); @@ -863,6 +891,8 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors) { + const parse_token_t invalid_token = {token_type_invalid, parse_keyword_none, -1, -1}; + // Only strings can have keywords. So if we have a keyword, the type must be a string assert(keyword == parse_keyword_none || token_type == parse_token_type_string); @@ -875,7 +905,7 @@ bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keywo bool wants_errors = (errors != NULL); this->parser->set_should_generate_error_messages(wants_errors); - this->parser->accept_token(token); + this->parser->accept_tokens(token, invalid_token); return ! this->parser->has_fatal_error(); } @@ -1007,3 +1037,20 @@ enum parse_statement_decoration_t parse_node_tree_t::decoration_for_plain_statem } return decoration; } + +bool parse_node_tree_t::command_for_plain_statement(const parse_node_t &node, const wcstring &src, wcstring *out_cmd) const +{ + bool result = false; + assert(node.type == symbol_plain_statement); + const parse_node_t *cmd_node = this->get_child(node, 0, parse_token_type_string); + if (cmd_node != NULL && cmd_node->has_source()) + { + out_cmd->assign(src, cmd_node->source_start, cmd_node->source_length); + result = true; + } + else + { + out_cmd->clear(); + } + return result; +} diff --git a/parse_tree.h b/parse_tree.h index b83e47abc..941ddd4e2 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -112,8 +112,12 @@ enum parse_keyword_t parse_keyword_not, parse_keyword_command, parse_keyword_builtin, + + /* The following are not really keywords but are necessary for e.g. "command --help" to work */ + parse_keyword_dash_h, + parse_keyword_dashdash_help, - LAST_KEYWORD = parse_keyword_builtin + LAST_KEYWORD = parse_keyword_dashdash_help }; @@ -243,7 +247,12 @@ class parse_node_tree_t : public std::vector bool argument_list_is_root(const parse_node_t &node) const; /* Utilities */ + + /* Given a plain statement, get the decoration (from the parent node), or none if there is no decoration */ enum parse_statement_decoration_t decoration_for_plain_statement(const parse_node_t &node) const; + + /* Given a plain statement, get the command by reference (from the child node). Returns true if successful. Clears the command on failure. */ + bool command_for_plain_statement(const parse_node_t &node, const wcstring &src, wcstring *out_cmd) const; }; From 5490f54d005dcc9b35409315aefc698203b81ff1 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 12 Oct 2013 01:17:55 -0700 Subject: [PATCH 035/177] Make parse_t::parse a static method so we don't have to create lots of useless parse_t objects --- builtin.cpp | 3 +-- fish_tests.cpp | 9 +++------ parse_tree.cpp | 8 +++++++- parse_tree.h | 6 ++++-- reader.cpp | 3 +-- 5 files changed, 16 insertions(+), 13 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index b84d78e73..6e669e697 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -3976,8 +3976,7 @@ int builtin_parse(parser_t &parser, wchar_t **argv) const wcstring src = str2wcstring(&txt.at(0), txt.size()); parse_node_tree_t parse_tree; parse_error_list_t errors; - parse_t parser; - bool success = parser.parse(src, parse_flag_none, &parse_tree, &errors, true); + bool success = parse_t::parse(src, parse_flag_none, &parse_tree, &errors, true); if (! success) { stdout_buffer.append(L"Parsing failed:\n"); diff --git a/fish_tests.cpp b/fish_tests.cpp index 40a8d7db4..0cd7dbf08 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -1845,8 +1845,7 @@ static void test_new_parser_correctness(void) const parser_test_t *test = &parser_tests[i]; parse_node_tree_t parse_tree; - parse_t parser; - bool success = parser.parse(test->src, parse_flag_none, &parse_tree, NULL); + bool success = parse_t::parse(test->src, parse_flag_none, &parse_tree, NULL); say(L"%lu / %lu: Parse \"%ls\": %s", i+1, sizeof parser_tests / sizeof *parser_tests, test->src, success ? "yes" : "no"); if (success && ! test->ok) { @@ -1954,8 +1953,7 @@ static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *o bool result = false; parse_node_tree_t tree; - parse_t parser; - if (parser.parse(src, parse_flag_none, &tree, NULL)) + if (parse_t::parse(src, parse_flag_none, &tree, NULL)) { /* Get the statement. Should only have one */ const parse_node_tree_t::parse_node_list_t stmt_nodes = tree.find_nodes(tree.at(0), symbol_plain_statement); @@ -2030,8 +2028,7 @@ static void test_new_parser(void) say(L"Testing new parser"); const wcstring src = L"echo hello world"; parse_node_tree_t parse_tree; - parse_t parser; - bool success = parser.parse(src, parse_flag_none, &parse_tree, NULL); + bool success = parse_t::parse(src, parse_flag_none, &parse_tree, NULL); if (! success) { say(L"Parsing failed"); diff --git a/parse_tree.cpp b/parse_tree.cpp index 2066b8246..a1acdb13b 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -827,7 +827,7 @@ static inline parse_token_t next_parse_token(tokenizer_t *tok) return result; } -bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it) +bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it) { this->parser->set_should_generate_error_messages(errors != NULL); @@ -889,6 +889,12 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n return ! this->parser->has_fatal_error(); } +bool parse_t::parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it) +{ + parse_t parse; + return parse.parse_internal(str, flags, output, errors, log_it); +} + bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors) { const parse_token_t invalid_token = {token_type_invalid, parse_keyword_none, -1, -1}; diff --git a/parse_tree.h b/parse_tree.h index 941ddd4e2..f6e913c25 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -141,12 +141,14 @@ class parse_t { parse_ll_t * const parser; + bool parse_internal(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false); + public: parse_t(); ~parse_t(); - /* Parse a string */ - bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false); + /* Parse a string all at once */ + static bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false); /* Parse a single token */ bool parse_1_token(parse_token_type_t token, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors); diff --git a/reader.cpp b/reader.cpp index 0f022c279..b59f8086c 100644 --- a/reader.cpp +++ b/reader.cpp @@ -664,8 +664,7 @@ bool reader_expand_abbreviation_in_command(const wcstring &cmdline, size_t curso /* Parse this subcmd */ parse_node_tree_t parse_tree; - parse_t parser; - parser.parse(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL); + parse_t::parse(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL); /* Look for plain statements where the cursor is at the end of the command */ const parse_node_t *matching_cmd_node = NULL; From 77e358a001eea9a36e0c466b0af253d992c370bd Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 12 Oct 2013 01:46:22 -0700 Subject: [PATCH 036/177] Support for parsing e.g. 'command --' as a plain statement, instead of executing the command '--'. --- fish_tests.cpp | 2 ++ parse_productions.cpp | 33 ++++++++++++++++++++++++--------- parse_tree.cpp | 2 ++ parse_tree.h | 2 ++ 4 files changed, 30 insertions(+), 9 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index 0cd7dbf08..3884e470e 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -2002,6 +2002,8 @@ static void test_new_parser_ll2(void) {L"command --help", L"command", L"--help", parse_statement_decoration_none}, {L"command -h", L"command", L"-h", parse_statement_decoration_none}, {L"command", L"command", L"", parse_statement_decoration_none}, + {L"command -", L"command", L"-", parse_statement_decoration_none}, + {L"command --", L"command", L"--", parse_statement_decoration_none}, {L"function", L"function", L"", parse_statement_decoration_none}, {L"function --help", L"function", L"--help", parse_statement_decoration_none} }; diff --git a/parse_productions.cpp b/parse_productions.cpp index 90e4a99b8..bb415e603 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -26,19 +26,32 @@ static bool production_is_valid(const production_options_t production_list, prod return nonempty_found; } -/* Helper function indicates whether a token (typically second token) means 'help'. This is so we can treat e.g. 'command --help' as "invoke the 'command' builtin with --help' instead of 'run the --help command'. +/* Helper function indicates whether a token (typically second token) causes the preceding token to be treated as a command instead of giving it a special role. This is so we can treat e.g. 'command --help' as "invoke the 'command' builtin with --help' instead of 'run the --help command'. if naked_invocation_invokes_help is true, then we treat an invalid type or something other than a string as indicating help; this means that the user ran e.g. 'command' with no arguments. */ -static inline bool token_means_help(parse_token_type_t type, parse_keyword_t keyword, bool naked_invocation_invokes_help) +static inline bool token_implies_previous_keyword_is_command(parse_token_type_t type, parse_keyword_t keyword, bool naked_invocation_invokes_help) { - if (keyword == parse_keyword_dash_h || keyword == parse_keyword_dashdash_help) - return true; + bool result = false; + switch (keyword) + { + case parse_keyword_dash: + case parse_keyword_dashdash: + case parse_keyword_dash_h: + case parse_keyword_dashdash_help: + result = true; + break; + + default: + break; + } - if (naked_invocation_invokes_help && type != parse_token_type_string) - return true; + if (! result) + { + result = naked_invocation_invokes_help && type != parse_token_type_string; + } - return false; + return result; } #define PRODUCTIONS(sym) static const production_options_t productions_##sym @@ -135,7 +148,7 @@ RESOLVE(statement) if (token_type == parse_token_type_string) { bool naked_invocation_invokes_help = (token_keyword != parse_keyword_begin && token_keyword != parse_keyword_end); - if (token_means_help(token_type2, token_keyword2, naked_invocation_invokes_help)) + if (token_implies_previous_keyword_is_command(token_type2, token_keyword2, naked_invocation_invokes_help)) { return 4; //decorated statement } @@ -175,6 +188,8 @@ RESOLVE(statement) case parse_keyword_command: case parse_keyword_builtin: case parse_keyword_case: + case parse_keyword_dash: + case parse_keyword_dashdash: case parse_keyword_dash_h: case parse_keyword_dashdash_help: return 4; @@ -365,7 +380,7 @@ PRODUCTIONS(decorated_statement) = RESOLVE(decorated_statement) { /* If this is e.g. 'command --help' then the command is 'command' and not a decoration */ - if (token_means_help(token_type2, token_keyword2, true /* naked_invocation_is_help */)) + if (token_implies_previous_keyword_is_command(token_type2, token_keyword2, true /* naked_invocation_is_help */)) return 0; switch (token_keyword) diff --git a/parse_tree.cpp b/parse_tree.cpp index a1acdb13b..793715ecb 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -784,6 +784,8 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) {L"not", parse_keyword_not}, {L"command", parse_keyword_command}, {L"builtin", parse_keyword_builtin}, + {L"-", parse_keyword_dash}, + {L"--", parse_keyword_dashdash}, {L"-h", parse_keyword_dash_h}, {L"--help", parse_keyword_dashdash_help} }; diff --git a/parse_tree.h b/parse_tree.h index f6e913c25..c46eb9116 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -114,6 +114,8 @@ enum parse_keyword_t parse_keyword_builtin, /* The following are not really keywords but are necessary for e.g. "command --help" to work */ + parse_keyword_dash, + parse_keyword_dashdash, parse_keyword_dash_h, parse_keyword_dashdash_help, From ddec870d252c0ae84fd80f8b4b75d91a97e99395 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 12 Oct 2013 02:46:49 -0700 Subject: [PATCH 037/177] Rework decision process for whether to interpret keywords as structural or as commands (for LL parser). Will allow 'builtin --' to parse as a plain statement, instead of a decorated statement '--' --- fish_tests.cpp | 1 + parse_productions.cpp | 103 +++++++++++++++++------------------------- parse_productions.h | 3 +- parse_tree.cpp | 39 +++++++--------- parse_tree.h | 19 +++++--- 5 files changed, 72 insertions(+), 93 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index 3884e470e..f91f9d3b7 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -2004,6 +2004,7 @@ static void test_new_parser_ll2(void) {L"command", L"command", L"", parse_statement_decoration_none}, {L"command -", L"command", L"-", parse_statement_decoration_none}, {L"command --", L"command", L"--", parse_statement_decoration_none}, + {L"builtin --names", L"builtin", L"--names", parse_statement_decoration_none}, {L"function", L"function", L"", parse_statement_decoration_none}, {L"function --help", L"function", L"--help", parse_statement_decoration_none} }; diff --git a/parse_productions.cpp b/parse_productions.cpp index bb415e603..528ca3cea 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -26,37 +26,9 @@ static bool production_is_valid(const production_options_t production_list, prod return nonempty_found; } -/* Helper function indicates whether a token (typically second token) causes the preceding token to be treated as a command instead of giving it a special role. This is so we can treat e.g. 'command --help' as "invoke the 'command' builtin with --help' instead of 'run the --help command'. - - if naked_invocation_invokes_help is true, then we treat an invalid type or something other than a string as indicating help; this means that the user ran e.g. 'command' with no arguments. -*/ -static inline bool token_implies_previous_keyword_is_command(parse_token_type_t type, parse_keyword_t keyword, bool naked_invocation_invokes_help) -{ - bool result = false; - switch (keyword) - { - case parse_keyword_dash: - case parse_keyword_dashdash: - case parse_keyword_dash_h: - case parse_keyword_dashdash_help: - result = true; - break; - - default: - break; - } - - if (! result) - { - result = naked_invocation_invokes_help && type != parse_token_type_string; - } - - return result; -} - #define PRODUCTIONS(sym) static const production_options_t productions_##sym -#define RESOLVE(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, parse_token_type_t token_type2, parse_keyword_t token_keyword2) -#define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, parse_token_type_t token_type2, parse_keyword_t token_keyword2) { return 0; } +#define RESOLVE(sym) static production_option_idx_t resolve_##sym (const parse_token_t &token1, const parse_token_t &token2) +#define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (const parse_token_t &input1, const parse_token_t &input2) { return 0; } #define KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) @@ -71,11 +43,11 @@ PRODUCTIONS(job_list) = RESOLVE(job_list) { - switch (token_type) + switch (token1.type) { case parse_token_type_string: // 'end' is special - switch (token_keyword) + switch (token1.keyword) { case parse_keyword_end: case parse_keyword_else: @@ -120,7 +92,7 @@ PRODUCTIONS(job_continuation) = }; RESOLVE(job_continuation) { - switch (token_type) + switch (token1.type) { case parse_token_type_pipe: // Pipe, continuation @@ -143,21 +115,29 @@ PRODUCTIONS(statement) = }; RESOLVE(statement) { - // Go to decorated statements if the subsequent token looks like '--help' + // Go to decorated statements if the subsequent token looks like '--' // If we are 'begin', then we expect to be invoked with no arguments. But if we are anything else, we require an argument, so do the same thing if the subsequent token is a line end. - if (token_type == parse_token_type_string) + if (token1.type == parse_token_type_string) { - bool naked_invocation_invokes_help = (token_keyword != parse_keyword_begin && token_keyword != parse_keyword_end); - if (token_implies_previous_keyword_is_command(token_type2, token_keyword2, naked_invocation_invokes_help)) + // If the next token looks like an option (starts with a dash), then parse it as a decorated statement + if (token2.has_dash_prefix) { - return 4; //decorated statement + return 4; } + + // Likewise if the next token doesn't look like an argument at all. This corresponds to e.g. a "naked if". + bool naked_invocation_invokes_help = (token1.keyword != parse_keyword_begin && token1.keyword != parse_keyword_end); + if (naked_invocation_invokes_help && token2.type != parse_token_type_string) + { + return 4; + } + } - switch (token_type) + switch (token1.type) { case parse_token_type_string: - switch (token_keyword) + switch (token1.keyword) { case parse_keyword_and: case parse_keyword_or: @@ -188,10 +168,6 @@ RESOLVE(statement) case parse_keyword_command: case parse_keyword_builtin: case parse_keyword_case: - case parse_keyword_dash: - case parse_keyword_dashdash: - case parse_keyword_dash_h: - case parse_keyword_dashdash_help: return 4; } break; @@ -227,7 +203,7 @@ PRODUCTIONS(else_clause) = }; RESOLVE(else_clause) { - switch (token_keyword) + switch (token1.keyword) { case parse_keyword_else: return 1; @@ -243,7 +219,7 @@ PRODUCTIONS(else_continuation) = }; RESOLVE(else_continuation) { - switch (token_keyword) + switch (token1.keyword) { case parse_keyword_if: return 0; @@ -266,8 +242,8 @@ PRODUCTIONS(case_item_list) = }; RESOLVE(case_item_list) { - if (token_keyword == parse_keyword_case) return 1; - else if (token_type == parse_token_type_end) return 2; //empty line + if (token1.keyword == parse_keyword_case) return 1; + else if (token1.type == parse_token_type_end) return 2; //empty line else return 0; } @@ -284,7 +260,7 @@ PRODUCTIONS(argument_list) = }; RESOLVE(argument_list) { - switch (token_type) + switch (token1.type) { case parse_token_type_string: return 1; @@ -308,7 +284,7 @@ PRODUCTIONS(block_header) = }; RESOLVE(block_header) { - switch (token_keyword) + switch (token1.keyword) { case parse_keyword_else: return NO_PRODUCTION; @@ -358,7 +334,7 @@ PRODUCTIONS(boolean_statement) = }; RESOLVE(boolean_statement) { - switch (token_keyword) + switch (token1.keyword) { case parse_keyword_and: return 0; @@ -379,11 +355,13 @@ PRODUCTIONS(decorated_statement) = }; RESOLVE(decorated_statement) { - /* If this is e.g. 'command --help' then the command is 'command' and not a decoration */ - if (token_implies_previous_keyword_is_command(token_type2, token_keyword2, true /* naked_invocation_is_help */)) + /* If this is e.g. 'command --help' then the command is 'command' and not a decoration. If the second token is not a string, then this is a naked 'command' and we should execute it as undecorated. */ + if (token2.type != parse_token_type_string || token2.has_dash_prefix) + { return 0; + } - switch (token_keyword) + switch (token1.keyword) { default: return 0; @@ -407,7 +385,7 @@ PRODUCTIONS(arguments_or_redirections_list) = }; RESOLVE(arguments_or_redirections_list) { - switch (token_type) + switch (token1.type) { case parse_token_type_string: case parse_token_type_redirection: @@ -424,7 +402,7 @@ PRODUCTIONS(argument_or_redirection) = }; RESOLVE(argument_or_redirection) { - switch (token_type) + switch (token1.type) { case parse_token_type_string: return 0; @@ -455,7 +433,7 @@ PRODUCTIONS(optional_background) = RESOLVE(optional_background) { - switch (token_type) + switch (token1.type) { case parse_token_type_background: return 1; @@ -465,17 +443,17 @@ RESOLVE(optional_background) } #define TEST(sym) case (symbol_##sym): production_list = & productions_ ## sym ; resolver = resolve_ ## sym ; break; -const production_t *parse_productions::production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, parse_token_type_t input_type2, parse_keyword_t input_keyword2, production_option_idx_t *out_which_production, wcstring *out_error_text) +const production_t *parse_productions::production_for_token(parse_token_type_t node_type, const parse_token_t &input1, const parse_token_t &input2, production_option_idx_t *out_which_production, wcstring *out_error_text) { bool log_it = false; if (log_it) { - fprintf(stderr, "Resolving production for %ls with input type %ls <%ls>\n", token_type_description(node_type).c_str(), token_type_description(input_type).c_str(), keyword_description(input_keyword).c_str()); + fprintf(stderr, "Resolving production for %ls with input token <%ls>\n", token_type_description(node_type).c_str(), input1.describe().c_str()); } /* Fetch the list of productions and the function to resolve them */ const production_options_t *production_list = NULL; - production_option_idx_t (*resolver)(parse_token_type_t token_type, parse_keyword_t token_keyword, parse_token_type_t token_type2, parse_keyword_t token_keyword2) = NULL; + production_option_idx_t (*resolver)(const parse_token_t &input1, const parse_token_t &input2) = NULL; switch (node_type) { TEST(job_list) @@ -533,7 +511,7 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n PARSE_ASSERT(resolver != NULL); const production_t *result = NULL; - production_option_idx_t which = resolver(input_type, input_keyword, input_type2, input_keyword2); + production_option_idx_t which = resolver(input1, input2); if (log_it) { @@ -545,7 +523,7 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n { if (log_it) { - fprintf(stderr, "Token type '%ls' has no production for input type '%ls', keyword '%ls' (in %s)\n", token_type_description(node_type).c_str(), token_type_description(input_type).c_str(), keyword_description(input_keyword).c_str(), __FUNCTION__); + fprintf(stderr, "Node type '%ls' has no production for input '%ls' (in %s)\n", token_type_description(node_type).c_str(), input1.describe().c_str(), __FUNCTION__); } result = NULL; } @@ -557,3 +535,4 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n *out_which_production = which; return result; } + diff --git a/parse_productions.h b/parse_productions.h index 298be0b1c..18894ca78 100644 --- a/parse_productions.h +++ b/parse_productions.h @@ -14,7 +14,6 @@ namespace parse_productions #define MAX_PRODUCTIONS 5 #define MAX_SYMBOLS_PER_PRODUCTION 5 - typedef uint32_t production_tag_t; /* A production is an array of unsigned char. Symbols are encoded directly as their symbol value. Keywords are encoded with an offset of LAST_TOKEN_OR_SYMBOL + 1. So essentially we glom together keywords and symbols. */ @@ -63,7 +62,7 @@ inline bool production_element_is_valid(production_element_t elem) } /* Fetch a production. We are passed two input tokens. The first input token is guaranteed to not be invalid; the second token may be invalid if there's no more tokens. */ -const production_t *production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, parse_token_type_t input_type2, parse_keyword_t input_keyword2, production_option_idx_t *out_idx, wcstring *out_error_text); +const production_t *production_for_token(parse_token_type_t node_type, const parse_token_t &input1, const parse_token_t &input2, production_option_idx_t *out_which_production, wcstring *out_error_text); } diff --git a/parse_tree.cpp b/parse_tree.cpp index 793715ecb..207458ef6 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -179,24 +179,18 @@ wcstring parse_node_t::describe(void) const return result; } -/** A struct representing the token type passed to */ -struct parse_token_t -{ - enum parse_token_type_t type; // The type of the token as represented by the parser - enum parse_keyword_t keyword; // Any keyword represented by this parser - size_t source_start; - size_t source_length; - wcstring describe() const +/** Returns a string description of the given parse token */ +wcstring parse_token_t::describe() const +{ + wcstring result = token_type_description(type); + if (keyword != parse_keyword_none) { - wcstring result = token_type_description(type); - if (keyword != parse_keyword_none) - { - append_format(result, L" <%ls>", keyword_description(keyword).c_str()); - } - return result; + append_format(result, L" <%ls>", keyword_description(keyword).c_str()); } -}; + return result; +} + /* Convert from tokenizer_t's token type to a parse_token_t type */ static inline parse_token_type_t parse_token_type_from_tokenizer_token(enum token_type tokenizer_token_type) @@ -720,7 +714,7 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) // Get the production for the top of the stack parse_stack_element_t &stack_elem = symbol_stack.back(); parse_node_t &node = nodes.at(stack_elem.node_idx); - const production_t *production = production_for_token(stack_elem.type, token1.type, token1.keyword, token2.type, token2.keyword, &node.production_idx, NULL /* error text */); + const production_t *production = production_for_token(stack_elem.type, token1, token2, &node.production_idx, NULL /* error text */); if (production == NULL) { if (should_generate_error_messages) @@ -783,11 +777,7 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) {L"or", parse_keyword_or}, {L"not", parse_keyword_not}, {L"command", parse_keyword_command}, - {L"builtin", parse_keyword_builtin}, - {L"-", parse_keyword_dash}, - {L"--", parse_keyword_dashdash}, - {L"-h", parse_keyword_dash_h}, - {L"--help", parse_keyword_dashdash_help} + {L"builtin", parse_keyword_builtin} }; for (size_t i=0; i < sizeof keywords / sizeof *keywords; i++) @@ -803,7 +793,7 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) } /* Placeholder invalid token */ -static const parse_token_t kInvalidToken = {token_type_invalid, parse_keyword_none, -1, -1}; +static const parse_token_t kInvalidToken = {token_type_invalid, parse_keyword_none, false, -1, -1}; /* Return a new parse token, advancing the tokenizer */ static inline parse_token_t next_parse_token(tokenizer_t *tok) @@ -820,10 +810,13 @@ static inline parse_token_t next_parse_token(tokenizer_t *tok) const wchar_t *tok_txt = tok_last(tok); parse_token_t result; + + /* Set the type, keyword, and whether there's a dash prefix. Note that this is quite sketchy, because it ignores quotes. This is the historical behavior. For example, `builtin --names` lists builtins, but `builtin "--names"` attempts to run --names as a command. Amazingly as of this writing (10/12/13) nobody seems to have noticed this. Squint at it really hard ant it even starts to look like a feature. */ result.type = parse_token_type_from_tokenizer_token(tok_type); + result.keyword = keyword_for_token(tok_type, tok_txt); + result.has_dash_prefix = (tok_txt[0] == L'-'); result.source_start = (size_t)tok_start; result.source_length = tok_extent; - result.keyword = keyword_for_token(tok_type, tok_txt); tok_next(tok); return result; diff --git a/parse_tree.h b/parse_tree.h index c46eb9116..c1bcbab96 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -113,13 +113,20 @@ enum parse_keyword_t parse_keyword_command, parse_keyword_builtin, - /* The following are not really keywords but are necessary for e.g. "command --help" to work */ - parse_keyword_dash, - parse_keyword_dashdash, - parse_keyword_dash_h, - parse_keyword_dashdash_help, + LAST_KEYWORD = parse_keyword_builtin +}; - LAST_KEYWORD = parse_keyword_dashdash_help + +/** A struct representing the token type that we use internally */ +struct parse_token_t +{ + enum parse_token_type_t type; // The type of the token as represented by the parser + enum parse_keyword_t keyword; // Any keyword represented by this token + bool has_dash_prefix; // Hackish: whether the source contains a dash prefix + size_t source_start; + size_t source_length; + + wcstring describe() const; }; From b60db798669faeaf87ccd4a8611b4c2b77ff0990 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 12 Oct 2013 11:32:34 -0700 Subject: [PATCH 038/177] Modify fish_tests to support specifying the tests to run via arguments --- fish_tests.cpp | 90 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 58 insertions(+), 32 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index f91f9d3b7..bb13bd2ee 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -63,6 +63,26 @@ #include "parse_tree.h" #include "parse_util.h" +static const char * const * s_arguments; + +/* Indicate if we should test the given function. Either we test everything (all arguments) or we run only tests that have a prefix in s_arguments */ +static bool should_test_function(const char *func_name) +{ + /* No args, test everything */ + if (! s_arguments || ! s_arguments[0]) + return true; + + for (size_t i=0; s_arguments[i] != NULL; i++) + { + if (! strncmp(func_name, s_arguments[i], strlen(s_arguments[i]))) + { + /* Prefix match */ + return true; + } + } + return false; +} + /** The number of tests to run */ @@ -1100,6 +1120,12 @@ static void test_complete(void) assert(completions.size() == 2); assert(completions.at(0).completion == L"$Foo1"); assert(completions.at(1).completion == L"$Bar1"); + + completions.clear(); + complete(L"echo (/bin/ech", completions, COMPLETION_REQUEST_DEFAULT); + assert(completions.size() == 1); + assert(completions.at(0).completion == L"o"); + complete_set_variable_names(NULL); @@ -2192,6 +2218,7 @@ int main(int argc, char **argv) configure_thread_assertions_for_testing(); program_name=L"(ignore)"; + s_arguments = argv; say(L"Testing low-level functionality"); set_main_thread(); @@ -2203,39 +2230,38 @@ int main(int argc, char **argv) reader_init(); env_init(); - test_highlighting(); - test_new_parser_ll2(); - test_new_parser_fuzzing(); - test_new_parser_correctness(); - test_highlighting(); - test_new_parser(); + if (should_test_function("highlighting")) test_highlighting(); + if (should_test_function("new_parser_ll2")) test_new_parser_ll2(); + if (should_test_function("new_parser_fuzzing")) test_new_parser_fuzzing(); + if (should_test_function("new_parser_correctness")) test_new_parser_correctness(); + if (should_test_function("new_parser")) test_new_parser(); - test_format(); - test_escape(); - test_convert(); - test_convert_nulls(); - test_tok(); - test_fork(); - test_parser(); - test_utils(); - test_escape_sequences(); - test_lru(); - test_expand(); - test_fuzzy_match(); - test_abbreviations(); - test_test(); - test_path(); - test_word_motion(); - test_is_potential_path(); - test_colors(); - test_complete(); - test_completion_insertions(); - test_autosuggestion_combining(); - test_autosuggest_suggest_special(); - history_tests_t::test_history(); - history_tests_t::test_history_merge(); - history_tests_t::test_history_races(); - history_tests_t::test_history_formats(); + if (should_test_function("format")) test_format(); + if (should_test_function("escape")) test_escape(); + if (should_test_function("convert")) test_convert(); + if (should_test_function("convert_nulls")) test_convert_nulls(); + if (should_test_function("tok")) test_tok(); + if (should_test_function("fork")) test_fork(); + if (should_test_function("parser")) test_parser(); + if (should_test_function("utils")) test_utils(); + if (should_test_function("escape_sequences")) test_escape_sequences(); + if (should_test_function("lru")) test_lru(); + if (should_test_function("expand")) test_expand(); + if (should_test_function("fuzzy_match")) test_fuzzy_match(); + if (should_test_function("abbreviations")) test_abbreviations(); + if (should_test_function("test")) test_test(); + if (should_test_function("path")) test_path(); + if (should_test_function("word_motion")) test_word_motion(); + if (should_test_function("is_potential_path")) test_is_potential_path(); + if (should_test_function("colors")) test_colors(); + if (should_test_function("complete")) test_complete(); + if (should_test_function("completion_insertions")) test_completion_insertions(); + if (should_test_function("autosuggestion_combining")) test_autosuggestion_combining(); + if (should_test_function("autosuggest_suggest_special")) test_autosuggest_suggest_special(); + if (should_test_function("history")) history_tests_t::test_history(); + if (should_test_function("history_merge")) history_tests_t::test_history_merge(); + if (should_test_function("history_races")) history_tests_t::test_history_races(); + if (should_test_function("history_formats")) history_tests_t::test_history_formats(); //history_tests_t::test_history_speed(); say(L"Encountered %d errors in low-level tests", err_count); From cbd8a27a6d1ec705032486851203f8c4d1b4f56f Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 12 Oct 2013 12:04:31 -0700 Subject: [PATCH 039/177] Beef up completion tests --- fish_tests.cpp | 34 ++++++++++++++++++++++++++++++++-- function.cpp | 8 +++++++- function.h | 2 +- 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index bb13bd2ee..b52b612d1 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -1122,12 +1122,42 @@ static void test_complete(void) assert(completions.at(1).completion == L"$Bar1"); completions.clear(); - complete(L"echo (/bin/ech", completions, COMPLETION_REQUEST_DEFAULT); + complete(L"echo (/bin/mkdi", completions, COMPLETION_REQUEST_DEFAULT); assert(completions.size() == 1); - assert(completions.at(0).completion == L"o"); + assert(completions.at(0).completion == L"r"); + completions.clear(); + complete(L"echo (ls /bin/mkdi", completions, COMPLETION_REQUEST_DEFAULT); + assert(completions.size() == 1); + assert(completions.at(0).completion == L"r"); + + completions.clear(); + complete(L"echo (command ls /bin/mkdi", completions, COMPLETION_REQUEST_DEFAULT); + assert(completions.size() == 1); + assert(completions.at(0).completion == L"r"); + + /* Add a function and test completing it in various ways */ + struct function_data_t func_data; + func_data.name = L"scuttlebutt"; + func_data.definition = L"echo gongoozle"; + function_add(func_data, parser_t::principal_parser()); + /* Complete a function name */ + completions.clear(); + complete(L"echo (scuttlebut", completions, COMPLETION_REQUEST_DEFAULT); + assert(completions.size() == 1); + assert(completions.at(0).completion == L"t"); + /* But not with the command prefix */ + completions.clear(); + complete(L"echo (command scuttlebut", completions, COMPLETION_REQUEST_DEFAULT); + assert(completions.size() == 0); + + /* Not with the builtin prefix */ + completions.clear(); + complete(L"echo (builtin scuttlebut", completions, COMPLETION_REQUEST_DEFAULT); + assert(completions.size() == 0); + complete_set_variable_names(NULL); } diff --git a/function.cpp b/function.cpp index 08fb85560..fef342720 100644 --- a/function.cpp +++ b/function.cpp @@ -192,7 +192,13 @@ void function_add(const function_data_t &data, const parser_t &parser) /* Create and store a new function */ const wchar_t *filename = reader_current_filename(); - int def_offset = parser.line_number_of_character_at_offset(parser.current_block->tok_pos) - 1; + + int def_offset = -1; + if (parser.current_block != NULL) + { + def_offset = parser.line_number_of_character_at_offset(parser.current_block->tok_pos); + } + const function_map_t::value_type new_pair(data.name, function_info_t(data, filename, def_offset, is_autoload)); loaded_functions.insert(new_pair); diff --git a/function.h b/function.h index 2f8dfc36c..fd9455706 100644 --- a/function.h +++ b/function.h @@ -39,7 +39,7 @@ struct function_data_t /** Function definition */ - wchar_t *definition; + const wchar_t *definition; /** List of all event handlers for this function */ From ddf98661e4f16f75bb3deea26ec0c1e3bc651263 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 12 Oct 2013 18:17:03 -0700 Subject: [PATCH 040/177] Adopt new parser in tab completions --- complete.cpp | 320 +++++++++++++++++++------------------------------ highlight.cpp | 9 +- parse_tree.cpp | 28 +++++ parse_tree.h | 9 ++ reader.cpp | 3 + 5 files changed, 169 insertions(+), 200 deletions(-) diff --git a/complete.cpp b/complete.cpp index 8df02b35a..e25042059 100644 --- a/complete.cpp +++ b/complete.cpp @@ -44,6 +44,7 @@ #include "parser_keywords.h" #include "wutil.h" #include "path.h" +#include "parse_tree.h" /* Completion description strings, mostly for different types of files, such as sockets, block devices, etc. @@ -1363,7 +1364,9 @@ struct local_options_t bool completer_t::complete_param(const wcstring &scmd_orig, const wcstring &spopt, const wcstring &sstr, bool use_switches) { - const wchar_t * const cmd_orig = scmd_orig.c_str(), * const popt = spopt.c_str(), * const str = sstr.c_str(); + const wchar_t * const cmd_orig = scmd_orig.c_str(); + const wchar_t * const popt = spopt.c_str(); + const wchar_t * const str = sstr.c_str(); bool use_common=1, use_files=1; @@ -1790,231 +1793,160 @@ bool completer_t::try_complete_user(const wcstring &str) return res; } -void complete(const wcstring &cmd, std::vector &comps, completion_request_flags_t flags, wcstring_list_t *commands_to_load) +void complete(const wcstring &cmd_with_subcmds, std::vector &comps, completion_request_flags_t flags, wcstring_list_t *commands_to_load) { + /* Determine the innermost subcommand */ + const wchar_t *cmdsubst_begin, *cmdsubst_end; + parse_util_cmdsubst_extent(cmd_with_subcmds.c_str(), cmd_with_subcmds.size(), &cmdsubst_begin, &cmdsubst_end); + assert(cmdsubst_begin != NULL && cmdsubst_end != NULL && cmdsubst_end >= cmdsubst_begin); + const wcstring cmd = wcstring(cmdsubst_begin, cmdsubst_end - cmdsubst_begin); + /* Make our completer */ completer_t completer(cmd, flags); - const wchar_t *tok_begin, *tok_end, *cmdsubst_begin, *cmdsubst_end, *prev_begin, *prev_end; - wcstring current_token, prev_token; wcstring current_command; - int on_command=0; - size_t pos; + const size_t pos = cmd.size(); bool done=false; - int use_command = 1; - int use_function = 1; - int use_builtin = 1; - int had_ddash = 0; + bool use_command = 1; + bool use_function = 1; + bool use_builtin = 1; // debug( 1, L"Complete '%ls'", cmd ); - size_t cursor_pos = cmd.size(); - const wchar_t *cmd_cstr = cmd.c_str(); - parse_util_cmdsubst_extent(cmd_cstr, cursor_pos, &cmdsubst_begin, &cmdsubst_end); - parse_util_token_extent(cmd_cstr, cursor_pos, &tok_begin, &tok_end, &prev_begin, &prev_end); - - if (!cmdsubst_begin) - done=1; - + const wchar_t *tok_begin = NULL, *prev_begin = NULL, *prev_end = NULL; + parse_util_token_extent(cmd_cstr, cmd.size(), &tok_begin, NULL, &prev_begin, &prev_end); /** If we are completing a variable name or a tilde expansion user name, we do that and return. No need for any other completions. */ + + const wcstring current_token = tok_begin; if (!done) { - wcstring tmp = tok_begin; - done = completer.try_complete_variable(tmp) || completer.try_complete_user(tmp); + done = completer.try_complete_variable(current_token) || completer.try_complete_user(current_token); } if (!done) { - pos = cursor_pos-(cmdsubst_begin-cmd_cstr); - - const wcstring buff = wcstring(cmdsubst_begin, cmdsubst_end-cmdsubst_begin); - - int had_cmd=0; - int end_loop=0; - - tokenizer_t tok(buff.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); - while (tok_has_next(&tok) && !end_loop) + //const size_t prev_token_len = (prev_begin ? prev_end - prev_begin : 0); + //const wcstring prev_token(prev_begin, prev_token_len); + + parse_node_tree_t tree; + parse_t::parse(cmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL); + + /* Find the plain statement that contains the position */ + const parse_node_t *plain_statement = tree.find_node_matching_source_location(symbol_plain_statement, pos, NULL); + if (plain_statement != NULL) { - switch (tok_last_type(&tok)) + assert(plain_statement->has_source() && plain_statement->type == symbol_plain_statement); + + /* Get the command node */ + const parse_node_t *cmd_node = tree.get_child(*plain_statement, 0, parse_token_type_string); + + /* Get the actual command string */ + if (cmd_node != NULL) + current_command = cmd_node->get_source(cmd); + + /* Check the decoration */ + switch (tree.decoration_for_plain_statement(*plain_statement)) { - - case TOK_STRING: + case parse_statement_decoration_none: + use_command = true; + use_function = false; + use_builtin = false; + break; + + case parse_statement_decoration_command: + use_command = true; + use_function = false; + use_builtin = false; + break; + + case parse_statement_decoration_builtin: + use_command = false; + use_function = false; + use_builtin = true; + break; + } + + if (cmd_node && cmd_node->location_in_or_at_end_of_source_range(pos)) + { + /* Complete command filename */ + completer.complete_cmd(current_token, use_function, use_builtin, use_command); + } + else + { + /* Get all the arguments */ + const parse_node_tree_t::parse_node_list_t all_arguments = tree.find_nodes(*plain_statement, symbol_argument); + + /* See whether we are in an argument. We may also be in a redirection, or nothing at all. */ + size_t matching_arg_index = -1; + for (size_t i=0; i < all_arguments.size(); i++) { - - const wcstring ncmd = tok_last(&tok); - int is_ddash = (ncmd == L"--") && ((tok_get_pos(&tok)+2) < (long)pos); - - if (!had_cmd) + const parse_node_t *node = all_arguments.at(i); + if (node->location_in_or_at_end_of_source_range(pos)) { - - if (parser_keywords_is_subcommand(ncmd)) - { - if (ncmd == L"builtin") - { - use_function = 0; - use_command = 0; - use_builtin = 1; - } - else if (ncmd == L"command") - { - use_command = 1; - use_function = 0; - use_builtin = 0; - } - break; - } - - - if (!is_ddash || - ((use_command && use_function && use_builtin))) - { - current_command = ncmd; - - size_t token_end = tok_get_pos(&tok) + ncmd.size(); - - on_command = (pos <= token_end); - had_cmd=1; - } - + matching_arg_index = i; + break; } - else - { - if (is_ddash) - { - had_ddash = 1; - } - } - - break; - } - - case TOK_END: - case TOK_PIPE: - case TOK_BACKGROUND: - { - had_cmd=0; - had_ddash = 0; - use_command = 1; - use_function = 1; - use_builtin = 1; - break; - } - - case TOK_ERROR: - { - end_loop=1; - break; } - default: + bool had_ddash = false; + wcstring current_argument, previous_argument; + if (matching_arg_index != (size_t)(-1)) { - break; + /* Get the current argument and the previous argument, if we have one */ + current_argument = all_arguments.at(matching_arg_index)->get_source(cmd); + + if (matching_arg_index > 0) + previous_argument = all_arguments.at(matching_arg_index - 1)->get_source(cmd); + + /* Check to see if we have a preceding double-dash */ + for (size_t i=0; i < matching_arg_index; i++) + { + if (all_arguments.at(i)->get_source(cmd) == L"--") + { + had_ddash = true; + break; + } + } } + + bool do_file = false; + + wcstring current_command_unescape = current_command; + wcstring previous_argument_unescape = previous_argument; + wcstring current_argument_unescape = current_argument; + + if (unescape_string(current_command_unescape, 0) && + unescape_string(previous_argument_unescape, 0) && + unescape_string(current_argument_unescape, UNESCAPE_INCOMPLETE)) + { + do_file = completer.complete_param(current_command_unescape, + previous_argument_unescape, + current_argument_unescape, + !had_ddash); + } + + /* If we have found no command specific completions at all, fall back to using file completions. */ + if (completer.empty()) + do_file = true; + + /* But if we are planning on loading commands, don't do file completions. + See https://github.com/fish-shell/fish-shell/issues/378 */ + if (commands_to_load != NULL && completer.has_commands_to_load()) + do_file = false; + + /* And if we're autosuggesting, and the token is empty, don't do file suggestions */ + if ((flags & COMPLETION_REQUEST_AUTOSUGGESTION) && current_argument_unescape.empty()) + do_file = false; + + /* This function wants the unescaped string */ + completer.complete_param_expand(current_token, do_file); } - - if (tok_get_pos(&tok) >= (long)pos) - { - end_loop=1; - } - - tok_next(&tok); - - } - - /* - Get the string to complete - */ - - current_token.assign(tok_begin, cursor_pos-(tok_begin-cmd_cstr)); - - if (prev_begin) - { - prev_token.assign(prev_begin, prev_end - prev_begin); - } - else - { - prev_token.clear(); - } - -// debug( 0, L"on_command: %d, %ls %ls\n", on_command, current_command, current_token ); - - /* - Check if we are using the 'command' or 'builtin' builtins - _and_ we are writing a switch instead of a command. In that - case, complete using the builtins completions, not using a - subcommand. - */ - - if ((on_command || current_token == L"--") && - string_prefixes_string(L"-", current_token) && - !(use_command && use_function && use_builtin)) - { - if (use_command == 0) - current_command = L"builtin"; - else - current_command = L"command"; - - had_cmd = 1; - on_command = 0; - } - - /* - Use command completions if in between commands - */ - if (!had_cmd) - { - on_command=1; - } - - - if (on_command) - { - /* Complete command filename */ - completer.complete_cmd(current_token, use_function, use_builtin, use_command); - } - else - { - bool do_file = false; - - wcstring current_command_unescape = current_command; - wcstring prev_token_unescape = prev_token; - wcstring current_token_unescape = current_token; - - if (unescape_string(current_command_unescape, 0) && - unescape_string(prev_token_unescape, 0) && - unescape_string(current_token_unescape, UNESCAPE_INCOMPLETE)) - { - do_file = completer.complete_param(current_command_unescape, - prev_token_unescape, - current_token_unescape, - !had_ddash); - } - - /* If we have found no command specific completions at - all, fall back to using file completions. - */ - if (completer.empty()) - do_file = true; - - /* But if we are planning on loading commands, don't do file completions. - See https://github.com/fish-shell/fish-shell/issues/378 */ - if (commands_to_load != NULL && completer.has_commands_to_load()) - do_file = false; - - /* And if we're autosuggesting, and the token is empty, don't do file suggestions */ - if ((flags & COMPLETION_REQUEST_AUTOSUGGESTION) && current_token_unescape.empty()) - do_file = false; - - /* - This function wants the unescaped string - */ - completer.complete_param_expand(current_token, do_file); } } diff --git a/highlight.cpp b/highlight.cpp index ffd5953c6..c4ad7d92e 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -713,8 +713,7 @@ static bool autosuggest_parse_command(const wcstring &buff, wcstring *out_expand /* Parse the buffer */ parse_node_tree_t parse_tree; - parse_t parser; - parser.parse(buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL); + parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL); /* Find the last statement */ const parse_node_t *last_statement = parse_tree.find_last_node_of_type(symbol_plain_statement, NULL); @@ -1709,8 +1708,7 @@ class highlighter_t { /* Parse the tree */ this->parse_tree.clear(); - parse_t parser; - parser.parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &this->parse_tree, NULL); + parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &this->parse_tree, NULL); } /* Perform highlighting, returning an array of colors */ @@ -1920,8 +1918,7 @@ const highlighter_t::color_array_t & highlighter_t::highlight() /* Parse the buffer */ parse_node_tree_t parse_tree; - parse_t parser; - parser.parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL); + parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL); #if 0 const wcstring dump = parse_dump_tree(parse_tree, buff); diff --git a/parse_tree.cpp b/parse_tree.cpp index 207458ef6..87e2b3dc0 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -1013,6 +1013,34 @@ const parse_node_t *parse_node_tree_t::find_last_node_of_type(parse_token_type_t return result; } +const parse_node_t *parse_node_tree_t::find_node_matching_source_location(parse_token_type_t type, size_t source_loc, const parse_node_t *parent) const +{ + const parse_node_t *result = NULL; + // Find nodes of the given type in the tree, working backwards + const size_t len = this->size(); + for (size_t idx=0; idx < len; idx++) + { + const parse_node_t &node = this->at(idx); + + /* Types must match */ + if (node.type != type) + continue; + + /* Must contain source location */ + if (! node.location_in_or_at_end_of_source_range(source_loc)) + continue; + + /* If a parent is given, it must be an ancestor */ + if (parent != NULL && node_has_ancestor(*this, node, *parent)) + continue; + + /* Found it */ + result = &node; + break; + } + return result; +} + bool parse_node_tree_t::argument_list_is_root(const parse_node_t &node) const { diff --git a/parse_tree.h b/parse_tree.h index c1bcbab96..62ffb622a 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -224,6 +224,12 @@ class parse_node_t else return wcstring(str, this->source_start, this->source_length); } + + /* Returns whether the given location is within the source range or at its end */ + bool location_in_or_at_end_of_source_range(size_t loc) const + { + return has_source() && source_start <= loc && loc - source_start <= source_length; + } }; /* Statement decorations. This matches the order of productions in decorated_statement */ @@ -254,6 +260,9 @@ class parse_node_tree_t : public std::vector /* Finds the last node of a given type underneath a given node, or NULL if it could not be found. If parent is NULL, this finds the last node in the tree of that type. */ const parse_node_t *find_last_node_of_type(parse_token_type_t type, const parse_node_t *parent = NULL) const; + /* Finds a node containing the given source location */ + const parse_node_t *find_node_matching_source_location(parse_token_type_t type, size_t source_loc, const parse_node_t *parent) const; + /* Indicate if the given argument_list or arguments_or_redirections_list is a root list, or has a parent */ bool argument_list_is_root(const parse_node_t &node) const; diff --git a/reader.cpp b/reader.cpp index b59f8086c..6c1e4a52d 100644 --- a/reader.cpp +++ b/reader.cpp @@ -3187,6 +3187,9 @@ const wchar_t *reader_readline(void) /* Figure out the extent of the token within the command substitution. Note we pass cmdsub_begin here, not buff */ const wchar_t *token_begin, *token_end; parse_util_token_extent(cmdsub_begin, data->buff_pos - (cmdsub_begin-buff), &token_begin, &token_end, 0, 0); + + /* Hack: the token may extend past the end of the command substitution, e.g. in (echo foo) the last token is 'foo)'. Don't let that happen. */ + if (token_end > cmdsub_end) token_end = cmdsub_end; /* Figure out how many steps to get from the current position to the end of the current token. */ size_t end_of_token_offset = token_end - buff; From 41e562ebc22d3e4dfc444e14b579236b82e08b1f Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 13 Oct 2013 13:26:52 -0700 Subject: [PATCH 041/177] Clean up redirection parsing in the tokenizer. --- tokenizer.cpp | 166 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 106 insertions(+), 60 deletions(-) diff --git a/tokenizer.cpp b/tokenizer.cpp index 1ef0bf5dc..8a6fe58a8 100644 --- a/tokenizer.cpp +++ b/tokenizer.cpp @@ -50,7 +50,7 @@ segments. /** Error string for when trying to pipe from fd 0 */ -#define PIPE_ERROR _( L"Can not use fd 0 as pipe output" ) +#define PIPE_ERROR _( L"Cannot use stdin (fd 0) as pipe output" ) /** Characters that separate tokens. They are ordered by frequency of occurrence to increase parsing speed. @@ -435,66 +435,92 @@ static void read_comment(tokenizer_t *tok) tok->last_type = TOK_COMMENT; } -/** - Read a FD redirection. +/* Reads a redirection or an "fd pipe" (like 2>|) from a string. Returns how many characters were consumed. If zero, then this string was not a redirection. + + Also returns by reference the redirection mode, and the fd to redirection. */ -static void read_redirect(tokenizer_t *tok, int fd) +static size_t read_redirection_or_fd_pipe(const wchar_t *buff, enum token_type *out_redirection_mode, int *out_fd) { + bool errored = false; + int fd = 0; enum token_type redirection_mode = TOK_NONE; - if ((*tok->buff == L'>') || - (*tok->buff == L'^')) + size_t idx = 0; + + /* Determine the fd. This may be specified as a prefix like '2>...' or it may be implicit like '>' or '^'. Try parsing out a number; if we did not get any digits then infer it from the first character */ + for (; iswdigit(buff[idx]); idx++) { - tok->buff++; - if (*tok->buff == *(tok->buff-1)) + int digit = buff[idx] - L'0'; + fd = fd * 10 + digit; + } + + if (idx == 0) + { + /* We did not find a leading digit, so there's no explicit fd. Infer it from the type */ + switch (buff[idx]) { - tok->buff++; - redirection_mode = TOK_REDIRECT_APPEND; - } - else - { - redirection_mode = TOK_REDIRECT_OUT; - } - - if (*tok->buff == L'|') - { - if (fd == 0) - { - TOK_CALL_ERROR(tok, TOK_OTHER, PIPE_ERROR); - return; - } - tok->buff++; - tok->last_token = to_string(fd); - tok->last_type = TOK_PIPE; - return; + case L'>': fd = STDOUT_FILENO; break; + case L'<': fd = STDIN_FILENO; break; + case L'^': fd = STDERR_FILENO; break; + default: errored = true; break; } } - else if (*tok->buff == L'<') + + /* Either way we should have ended on the redirection character itself like '>' */ + wchar_t redirect_char = buff[idx++]; //note increment of idx + if (redirect_char == L'>' || redirect_char == L'^') + { + redirection_mode = TOK_REDIRECT_OUT; + if (buff[idx] == redirect_char) + { + /* Doubled up like ^^ or >>. That means append */ + redirection_mode = TOK_REDIRECT_APPEND; + idx++; + } + } + else if (redirect_char == L'<') { - tok->buff++; redirection_mode = TOK_REDIRECT_IN; } else { - TOK_CALL_ERROR(tok, TOK_OTHER, REDIRECT_ERROR); + /* Something else */ + errored = true; } - - tok->last_token = to_string(fd); - - if (*tok->buff == L'&') + + /* Optional characters like & or ?, or the pipe char | */ + wchar_t opt_char = buff[idx]; + if (opt_char == L'&') { - tok->buff++; - tok->last_type = TOK_REDIRECT_FD; + redirection_mode = TOK_REDIRECT_FD; + idx++; } - else if (*tok->buff == L'?') + else if (opt_char == L'?') { - tok->buff++; - tok->last_type = TOK_REDIRECT_NOCLOB; + redirection_mode = TOK_REDIRECT_NOCLOB; + idx++; } - else + else if (opt_char == L'|') { - tok->last_type = redirection_mode; + /* So the string looked like '2>|'. This is not a redirection - it's a pipe! That gets handled elsewhere. */ + redirection_mode = TOK_PIPE; + idx++; } + + /* Don't return valid-looking stuff on error */ + if (errored) + { + idx = 0; + redirection_mode = TOK_NONE; + } + + /* Return stuff */ + if (out_redirection_mode != NULL) + *out_redirection_mode = redirection_mode; + if (out_fd != NULL) + *out_fd = fd; + + return idx; } wchar_t tok_last_quote(tokenizer_t *tok) @@ -606,36 +632,56 @@ void tok_next(tokenizer_t *tok) break; case L'>': - read_redirect(tok, 1); - return; case L'<': - read_redirect(tok, 0); - return; case L'^': - read_redirect(tok, 2); - return; + { + /* There's some duplication with the code in the default case below. The key difference here is that we must never parse these as a string; a failed redirection is an error! */ + enum token_type mode = TOK_NONE; + int fd = -1; + size_t consumed = read_redirection_or_fd_pipe(tok->buff, &mode, &fd); + if (consumed == 0) + { + TOK_CALL_ERROR(tok, TOK_OTHER, REDIRECT_ERROR); + } + else + { + tok->buff += consumed; + tok->last_type = mode; + tok->last_token = to_string(fd); + } + } + break; default: { + /* Maybe a redirection like '2>&1', maybe a pipe like 2>|, maybe just a string */ + size_t consumed = 0; + enum token_type mode = TOK_NONE; + int fd = -1; if (iswdigit(*tok->buff)) + consumed = read_redirection_or_fd_pipe(tok->buff, &mode, &fd); + + if (consumed > 0) { - const wchar_t *orig = tok->buff; - int fd = 0; - while (iswdigit(*tok->buff)) - fd = (fd*10) + (*(tok->buff++) - L'0'); - - switch (*(tok->buff)) + /* It looks like a redirection or a pipe. But we don't support piping fd 0. */ + if (mode == TOK_PIPE && fd == 0) { - case L'^': - case L'>': - case L'<': - read_redirect(tok, fd); - return; + TOK_CALL_ERROR(tok, TOK_OTHER, PIPE_ERROR); + } + else + { + tok->buff += consumed; + tok->last_type = mode; + tok->last_token = to_string(fd); } - tok->buff = orig; } - read_string(tok); + else + { + /* Not a redirection or pipe, so just a stirng */ + read_string(tok); + } } + break; } From ce7c681462a34a0bb04b23b35dfdf7337a4e4dab Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 13 Oct 2013 16:46:02 -0700 Subject: [PATCH 042/177] Fix for bogus completions for function names --- complete.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/complete.cpp b/complete.cpp index e25042059..a9fe01ad5 100644 --- a/complete.cpp +++ b/complete.cpp @@ -1855,8 +1855,8 @@ void complete(const wcstring &cmd_with_subcmds, std::vector &comps { case parse_statement_decoration_none: use_command = true; - use_function = false; - use_builtin = false; + use_function = true; + use_builtin = true; break; case parse_statement_decoration_command: From ce857b077048ced8326e21161385d2bd823c4e05 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 13 Oct 2013 16:58:40 -0700 Subject: [PATCH 043/177] Syntax highlighting for file redirections --- fish_tests.cpp | 81 +++++++++++++++++++--- highlight.cpp | 152 ++++++++++++++++++++++++++++++++++++++++-- parse_productions.cpp | 4 +- parse_tree.cpp | 32 ++++++++- parse_tree.h | 7 +- tokenizer.cpp | 29 ++++++-- tokenizer.h | 3 + 7 files changed, 277 insertions(+), 31 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index b52b612d1..e013743ec 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -396,6 +396,18 @@ static void test_tok() } } } + + /* Test redirection_type_for_string */ + if (redirection_type_for_string(L"<") != TOK_REDIRECT_IN) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"^") != TOK_REDIRECT_OUT) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L">") != TOK_REDIRECT_OUT) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"2>") != TOK_REDIRECT_OUT) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L">>") != TOK_REDIRECT_APPEND) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"2>>") != TOK_REDIRECT_APPEND) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"2>?") != TOK_REDIRECT_NOCLOB) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"9999999999999999>?") != TOK_NONE) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"2>&3") != TOK_REDIRECT_FD) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"2>|") != TOK_NONE) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); } static int test_fork_helper(void *unused) @@ -2182,9 +2194,59 @@ static void test_highlighting(void) {L")", HIGHLIGHT_OPERATOR}, {NULL, -1} }; + + // Redirections substitutions + const highlight_component_t components8[] = + { + {L"echo", HIGHLIGHT_COMMAND}, + {L"param1", HIGHLIGHT_PARAM}, + + /* Input redirection */ + {L"<", HIGHLIGHT_REDIRECTION}, + {L"/bin/echo", HIGHLIGHT_REDIRECTION}, + + /* Output redirection to a valid fd */ + {L"1>&2", HIGHLIGHT_REDIRECTION}, + + /* Output redirection to an invalid fd */ + {L"2>&", HIGHLIGHT_REDIRECTION}, + {L"LOL", HIGHLIGHT_ERROR}, + + /* Just a param, not a redirection */ + {L"/tmp/blah", HIGHLIGHT_PARAM}, + + /* Input redirection from directory */ + {L"<", HIGHLIGHT_REDIRECTION}, + {L"/tmp/", HIGHLIGHT_ERROR}, + + /* Output redirection to an invalid path */ + {L"3>", HIGHLIGHT_REDIRECTION}, + {L"/not/a/valid/path/nope", HIGHLIGHT_ERROR}, + + /* Output redirection to directory */ + {L"3>", HIGHLIGHT_REDIRECTION}, + {L"/tmp/nope/", HIGHLIGHT_ERROR}, + + + /* Redirections to overflow fd */ + {L"99999999999999999999>&2", HIGHLIGHT_ERROR}, + {L"2>&", HIGHLIGHT_REDIRECTION}, + {L"99999999999999999999", HIGHLIGHT_ERROR}, + + /* Output redirection containing a command substitution */ + {L"4>", HIGHLIGHT_REDIRECTION}, + {L"(", HIGHLIGHT_OPERATOR}, + {L"echo", HIGHLIGHT_COMMAND}, + {L"/tmp/somewhere", HIGHLIGHT_PARAM}, + {L")", HIGHLIGHT_OPERATOR}, + + /* Just another param */ + {L"param2", HIGHLIGHT_PARAM}, + {NULL, -1} + }; - const highlight_component_t *tests[] = {components1, components2, components3, components4, components5, components6, components7}; + const highlight_component_t *tests[] = {components1, components2, components3, components4, components5, components6, components7, components8}; for (size_t which = 0; which < sizeof tests / sizeof *tests; which++) { const highlight_component_t *components = tests[which]; @@ -2206,14 +2268,7 @@ static void test_highlighting(void) expected_colors.push_back(0); } text.append(components[i].txt); - - // hackish space handling - const size_t text_len = wcslen(components[i].txt); - for (size_t j=0; j < text_len; j++) - { - bool is_space = (components[i].txt[j] == L' '); - expected_colors.push_back(is_space ? 0 : components[i].color); - } + expected_colors.resize(text.size(), components[i].color); } assert(expected_colors.size() == text.size()); @@ -2227,6 +2282,10 @@ static void test_highlighting(void) assert(expected_colors.size() == colors.size()); for (size_t i=0; i < text.size(); i++) { + // Hackish space handling. We don't care about the colors in spaces. + if (text.at(i) == L' ') + continue; + if (expected_colors.at(i) != colors.at(i)) { const wcstring spaces(i, L' '); @@ -2248,7 +2307,7 @@ int main(int argc, char **argv) configure_thread_assertions_for_testing(); program_name=L"(ignore)"; - s_arguments = argv; + s_arguments = argv + 1; say(L"Testing low-level functionality"); set_main_thread(); @@ -2262,7 +2321,7 @@ int main(int argc, char **argv) if (should_test_function("highlighting")) test_highlighting(); if (should_test_function("new_parser_ll2")) test_new_parser_ll2(); - if (should_test_function("new_parser_fuzzing")) test_new_parser_fuzzing(); + //if (should_test_function("new_parser_fuzzing")) test_new_parser_fuzzing(); //fuzzing is expensive if (should_test_function("new_parser_correctness")) test_new_parser_correctness(); if (should_test_function("new_parser")) test_new_parser(); diff --git a/highlight.cpp b/highlight.cpp index c4ad7d92e..3acaf4968 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include "fallback.h" #include "util.h" @@ -1692,9 +1693,15 @@ class highlighter_t /* Color an argument */ void color_argument(const parse_node_t &node); + /* Color a redirection */ + void color_redirection(const parse_node_t &node); + /* Color the arguments of the given node */ void color_arguments(const parse_node_t &list_node); + /* Color the redirections of the given node */ + void color_redirections(const parse_node_t &list_node); + /* Color all the children of the command with the given type */ void color_children(const parse_node_t &parent, parse_token_type_t type, int color); @@ -1729,6 +1736,7 @@ void highlighter_t::color_node(const parse_node_t &node, int color) std::fill(this->color_array.begin() + node.source_start, this->color_array.begin() + source_end, color); } +/* node does not necessarily have type symbol_argument here */ void highlighter_t::color_argument(const parse_node_t &node) { if (! node.has_source()) @@ -1819,7 +1827,7 @@ void highlighter_t::color_arguments(const parse_node_t &list_node) /* Find all the arguments of this list */ const parse_node_tree_t::parse_node_list_t nodes = this->parse_tree.find_nodes(list_node, symbol_argument); - for (node_offset_t i=0; i < nodes.size(); i++) + for (size_t i=0; i < nodes.size(); i++) { const parse_node_t *child = nodes.at(i); assert(child != NULL && child->type == symbol_argument); @@ -1841,6 +1849,141 @@ void highlighter_t::color_arguments(const parse_node_t &list_node) } } +void highlighter_t::color_redirection(const parse_node_t &redirection_node) +{ + assert(redirection_node.type == symbol_redirection); + if (! redirection_node.has_source()) + return; + + const parse_node_t *redirection_primitive = this->parse_tree.get_child(redirection_node, 0, parse_token_type_redirection); //like 2> + const parse_node_t *redirection_target = this->parse_tree.get_child(redirection_node, 1, parse_token_type_string); //like &1 or file path + + if (redirection_primitive != NULL) + { + wcstring target; + const enum token_type redirect_type = this->parse_tree.type_for_redirection(redirection_node, this->buff, &target); + + /* We may get a TOK_NONE redirection type, e.g. if the redirection is invalid */ + this->color_node(*redirection_primitive, redirect_type == TOK_NONE ? HIGHLIGHT_ERROR : HIGHLIGHT_REDIRECTION); + + /* Check if the argument contains a command substitution. If so, highlight it as a param even though it's a command redirection, and don't try to do any other validation. */ + if (parse_util_locate_cmdsubst(target.c_str(), NULL, NULL, true) != 0) + { + if (redirection_target != NULL) + this->color_argument(*redirection_target); + } + else + { + /* No command substitution, so we can highlight the target file or fd. For example, disallow redirections into a non-existent directory */ + bool target_is_valid = true; + + if (! expand_one(target, EXPAND_SKIP_CMDSUBST)) + { + /* Could not be expanded */ + target_is_valid = false; + } + else + { + /* Ok, we successfully expanded our target. Now verify that it works with this redirection. We will probably need it as a path (but not in the case of fd redirections */ + const wcstring target_path = apply_working_directory(target, this->working_directory); + switch (redirect_type) + { + case TOK_REDIRECT_FD: + { + /* target should be an fd. It must be all digits, and must not overflow. fish_wcstoi returns INT_MAX on overflow; we could instead check errno to disambiguiate this from a real INT_MAX fd, but instead we just disallow that. */ + const wchar_t *target_cstr = target.c_str(); + wchar_t *end = NULL; + int fd = fish_wcstoi(target_cstr, &end, 10); + + /* The iswdigit check ensures there's no leading whitespace, the *end check ensures the entire string was consumed, and the numeric checks ensure the fd is at least zero and there was no overflow */ + target_is_valid = (iswdigit(target_cstr[0]) && *end == L'\0' && fd >= 0 && fd < INT_MAX); + } + break; + + case TOK_REDIRECT_IN: + { + /* Input redirections must have a readable non-directory */ + struct stat buf = {}; + target_is_valid = ! waccess(target_path, R_OK) && ! wstat(target_path, &buf) && ! S_ISDIR(buf.st_mode); + } + break; + + case TOK_REDIRECT_OUT: + case TOK_REDIRECT_APPEND: + case TOK_REDIRECT_NOCLOB: + { + /* Test whether the file exists, and whether it's writable (possibly after creating it). access() returns failure if the file does not exist. */ + bool file_exists = false, file_is_writable = false; + int err = 0; + + struct stat buf = {}; + if (wstat(target_path, &buf) < 0) + { + err = errno; + } + + if (string_suffixes_string(L"/", target)) + { + /* Redirections to things that are directories is definitely not allowed */ + file_exists = false; + file_is_writable = false; + } + else if (err == 0) + { + /* No err. We can write to it if it's not a directory and we have permission */ + file_exists = true; + file_is_writable = ! S_ISDIR(buf.st_mode) && ! waccess(target_path, W_OK); + } + else if (err == ENOENT) + { + /* File does not exist. Check if its parent directory is writable. */ + wcstring parent = wdirname(target_path); + + /* Ensure that the parent ends with the path separator. This will ensure that we get an error if the parent directory is not really a directory. */ + if (! string_suffixes_string(L"/", parent)) + parent.push_back(L'/'); + + /* Now the file is considered writable if the parent directory is writable */ + file_exists = false; + file_is_writable = (0 == waccess(parent, W_OK)); + } + else + { + /* Other errors we treat as not writable. This includes things like ENOTDIR. */ + file_exists = false; + file_is_writable = false; + } + + /* NOCLOB means that we must not overwrite files that exist */ + target_is_valid = file_is_writable && ! (file_exists && redirect_type == TOK_REDIRECT_NOCLOB); + } + break; + + default: + /* We should not get here, since the node was marked as a redirection, but treat it as an error for paranoia */ + target_is_valid = false; + break; + } + } + + if (redirection_target != NULL) + { + this->color_node(*redirection_target, target_is_valid ? HIGHLIGHT_REDIRECTION : HIGHLIGHT_ERROR); + } + } + } +} + +// Color all of the redirections of the given command +void highlighter_t::color_redirections(const parse_node_t &list_node) +{ + const parse_node_tree_t::parse_node_list_t nodes = this->parse_tree.find_nodes(list_node, symbol_redirection); + for (size_t i=0; i < nodes.size(); i++) + { + this->color_redirection(*nodes.at(i)); + } +} + /* Color all the children of the command with the given type */ void highlighter_t::color_children(const parse_node_t &parent, parse_token_type_t type, int color) { @@ -1950,12 +2093,6 @@ const highlighter_t::color_array_t & highlighter_t::highlight() } break; - case symbol_redirection: - { - this->color_children(node, parse_token_type_string, HIGHLIGHT_REDIRECTION); - } - break; - case parse_token_type_background: case parse_token_type_end: { @@ -1994,6 +2131,7 @@ const highlighter_t::color_array_t & highlighter_t::highlight() if (parse_tree.argument_list_is_root(node)) { this->color_arguments(node); + this->color_redirections(node); } } break; diff --git a/parse_productions.cpp b/parse_productions.cpp index 528ca3cea..227955453 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -398,7 +398,7 @@ RESOLVE(arguments_or_redirections_list) PRODUCTIONS(argument_or_redirection) = { {symbol_argument}, - {parse_token_type_redirection} + {symbol_redirection} }; RESOLVE(argument_or_redirection) { @@ -421,7 +421,7 @@ RESOLVE_ONLY(argument) PRODUCTIONS(redirection) = { - {parse_token_type_redirection} + {parse_token_type_redirection, parse_token_type_string} }; RESOLVE_ONLY(redirection) diff --git a/parse_tree.cpp b/parse_tree.cpp index 87e2b3dc0..97421dab1 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -519,8 +519,11 @@ void parse_ll_t::determine_node_ranges(void) for (node_offset_t i=0; i < parent->child_count; i++) { const parse_node_t &child = nodes.at(parent->child_offset(i)); - min_start = std::min(min_start, child.source_start); - max_end = std::max(max_end, child.source_start + child.source_length); + if (child.has_source()) + { + min_start = std::min(min_start, child.source_start); + max_end = std::max(max_end, child.source_start + child.source_length); + } } if (min_start != source_start_invalid) @@ -691,6 +694,10 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) err_node.source_length = token1.source_length; nodes.push_back(err_node); consumed = true; + + /* tokenizer errors are fatal */ + if (token1.type == parse_special_type_tokenizer_error) + this->fatal_errored = true; } while (! consumed && ! this->fatal_errored) @@ -811,7 +818,7 @@ static inline parse_token_t next_parse_token(tokenizer_t *tok) parse_token_t result; - /* Set the type, keyword, and whether there's a dash prefix. Note that this is quite sketchy, because it ignores quotes. This is the historical behavior. For example, `builtin --names` lists builtins, but `builtin "--names"` attempts to run --names as a command. Amazingly as of this writing (10/12/13) nobody seems to have noticed this. Squint at it really hard ant it even starts to look like a feature. */ + /* Set the type, keyword, and whether there's a dash prefix. Note that this is quite sketchy, because it ignores quotes. This is the historical behavior. For example, `builtin --names` lists builtins, but `builtin "--names"` attempts to run --names as a command. Amazingly as of this writing (10/12/13) nobody seems to have noticed this. Squint at it really hard and it even starts to look like a feature. */ result.type = parse_token_type_from_tokenizer_token(tok_type); result.keyword = keyword_for_token(tok_type, tok_txt); result.has_dash_prefix = (tok_txt[0] == L'-'); @@ -906,6 +913,7 @@ bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keywo bool wants_errors = (errors != NULL); this->parser->set_should_generate_error_messages(wants_errors); + /* Passing invalid_token here is totally wrong. This code is only used in testing however. */ this->parser->accept_tokens(token, invalid_token); return ! this->parser->has_fatal_error(); @@ -1083,3 +1091,21 @@ bool parse_node_tree_t::command_for_plain_statement(const parse_node_t &node, co } return result; } + +enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redirection_node, const wcstring &src, wcstring *out_target) const +{ + assert(redirection_node.type == symbol_redirection); + enum token_type result = TOK_NONE; + const parse_node_t *redirection_primitive = this->get_child(redirection_node, 0, parse_token_type_redirection); //like 2> + const parse_node_t *redirection_target = this->get_child(redirection_node, 1, parse_token_type_string); //like &1 or file path + + if (redirection_primitive != NULL && redirection_primitive->has_source()) + { + result = redirection_type_for_string(redirection_primitive->get_source(src)); + } + if (out_target != NULL) + { + *out_target = redirection_target ? redirection_target->get_source(src) : L""; + } + return result; +} diff --git a/parse_tree.h b/parse_tree.h index 62ffb622a..79cae8ccb 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -273,7 +273,9 @@ class parse_node_tree_t : public std::vector /* Given a plain statement, get the command by reference (from the child node). Returns true if successful. Clears the command on failure. */ bool command_for_plain_statement(const parse_node_t &node, const wcstring &src, wcstring *out_cmd) const; - + + /* Given a redirection, get the redirection type (or TOK_NONE) and target (file path, or fd) */ + enum token_type type_for_redirection(const parse_node_t &node, const wcstring &src, wcstring *out_target) const; }; /* Fish grammar: @@ -332,7 +334,8 @@ class parse_node_tree_t : public std::vector argument_or_redirection arguments_or_redirections_list argument_or_redirection = argument | redirection argument = - redirection = + + redirection = terminator = | diff --git a/tokenizer.cpp b/tokenizer.cpp index 8a6fe58a8..2416ce9d6 100644 --- a/tokenizer.cpp +++ b/tokenizer.cpp @@ -435,9 +435,11 @@ static void read_comment(tokenizer_t *tok) tok->last_type = TOK_COMMENT; } + + /* Reads a redirection or an "fd pipe" (like 2>|) from a string. Returns how many characters were consumed. If zero, then this string was not a redirection. - Also returns by reference the redirection mode, and the fd to redirection. + Also returns by reference the redirection mode, and the fd to redirection. If there is overflow, *out_fd is set to -1. */ static size_t read_redirection_or_fd_pipe(const wchar_t *buff, enum token_type *out_redirection_mode, int *out_fd) { @@ -447,13 +449,17 @@ static size_t read_redirection_or_fd_pipe(const wchar_t *buff, enum token_type * size_t idx = 0; - /* Determine the fd. This may be specified as a prefix like '2>...' or it may be implicit like '>' or '^'. Try parsing out a number; if we did not get any digits then infer it from the first character */ + /* Determine the fd. This may be specified as a prefix like '2>...' or it may be implicit like '>' or '^'. Try parsing out a number; if we did not get any digits then infer it from the first character. Watch out for overflow. */ + long long big_fd = 0; for (; iswdigit(buff[idx]); idx++) { - int digit = buff[idx] - L'0'; - fd = fd * 10 + digit; + /* Note that it's important we consume all the digits here, even if it overflows. */ + if (big_fd <= INT_MAX) + big_fd = big_fd * 10 + (buff[idx] - L'0'); } + fd = (big_fd > INT_MAX ? -1 : static_cast(big_fd)); + if (idx == 0) { /* We did not find a leading digit, so there's no explicit fd. Infer it from the type */ @@ -523,6 +529,17 @@ static size_t read_redirection_or_fd_pipe(const wchar_t *buff, enum token_type * return idx; } +enum token_type redirection_type_for_string(const wcstring &str) +{ + enum token_type mode = TOK_NONE; + int fd = 0; + read_redirection_or_fd_pipe(str.c_str(), &mode, &fd); + /* Redirections only, no pipes */ + if (mode == TOK_PIPE || fd < 0) + mode = TOK_NONE; + return mode; +} + wchar_t tok_last_quote(tokenizer_t *tok) { CHECK(tok, 0); @@ -639,7 +656,7 @@ void tok_next(tokenizer_t *tok) enum token_type mode = TOK_NONE; int fd = -1; size_t consumed = read_redirection_or_fd_pipe(tok->buff, &mode, &fd); - if (consumed == 0) + if (consumed == 0 || fd < 0) { TOK_CALL_ERROR(tok, TOK_OTHER, REDIRECT_ERROR); } @@ -663,7 +680,7 @@ void tok_next(tokenizer_t *tok) if (consumed > 0) { - /* It looks like a redirection or a pipe. But we don't support piping fd 0. */ + /* It looks like a redirection or a pipe. But we don't support piping fd 0. Note that fd 0 may be -1, indicating overflow; but we don't treat that as a tokenizer error. */ if (mode == TOK_PIPE && fd == 0) { TOK_CALL_ERROR(tok, TOK_OTHER, PIPE_ERROR); diff --git a/tokenizer.h b/tokenizer.h index dec206a58..8e130f0e7 100644 --- a/tokenizer.h +++ b/tokenizer.h @@ -187,6 +187,9 @@ const wchar_t *tok_get_desc(int type); */ int tok_get_error(tokenizer_t *tok); +/* Helper function to determine redirection type from a string, or TOK_NONE if the redirection is invalid */ +enum token_type redirection_type_for_string(const wcstring &str); + enum move_word_style_t { move_word_style_punctuation, //stop at punctuation From e8ba3c2f4de8c60808e9c919cc3e947d15136e21 Mon Sep 17 00:00:00 2001 From: Konrad Borowski Date: Mon, 14 Oct 2013 09:12:45 +0200 Subject: [PATCH 044/177] Fix compilation errors under Clang. --- parse_productions.h | 1 + parse_tree.h | 1 + 2 files changed, 2 insertions(+) diff --git a/parse_productions.h b/parse_productions.h index 18894ca78..e6c003ab2 100644 --- a/parse_productions.h +++ b/parse_productions.h @@ -7,6 +7,7 @@ #define FISH_PARSE_TREE_CONSTRUCTION_H #include "parse_tree.h" +#include namespace parse_productions { diff --git a/parse_tree.h b/parse_tree.h index 79cae8ccb..8621cea8f 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -13,6 +13,7 @@ #include "common.h" #include "tokenizer.h" #include +#include #define PARSE_ASSERT(a) assert(a) #define PARSER_DIE() do { fprintf(stderr, "Parser dying!\n"); exit_without_destructors(-1); } while (0) From 22d22f6aa883a6f48e9df0cd55254faa2cfc425e Mon Sep 17 00:00:00 2001 From: Konrad Borowski Date: Mon, 14 Oct 2013 11:45:29 +0200 Subject: [PATCH 045/177] Remove undefined behavior from parse_error(). Having function that takes arbitrary number of arguments without actually reading them is undefined behavior, as it could cause stack to be in the corrupted state. Now arguments after token are parsed, even if they aren't needed. See also: http://asciinema.org/a/5904 --- parse_tree.cpp | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/parse_tree.cpp b/parse_tree.cpp index 97421dab1..3e0c52566 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -341,6 +341,7 @@ class parse_ll_t bool top_node_handle_terminal_types(parse_token_t token); void parse_error(const wchar_t *expected, parse_token_t token); + void parse_error(parse_token_t token); void parse_error(parse_token_t token, const wchar_t *format, ...); void append_error_callout(wcstring &error_message, parse_token_t token); @@ -551,19 +552,25 @@ void parse_ll_t::acquire_output(parse_node_tree_t *output, parse_error_list_t *e this->symbol_stack.clear(); } -void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...) +void parse_ll_t::parse_error(parse_token_t token) { this->fatal_errored = true; +} + +void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...) +{ + parse_error(token); + + //this->dump_stack(); + parse_error_t err; + + va_list va; + va_start(va, fmt); + err.text = vformat_string(fmt, va); + va_end(va); + if (this->should_generate_error_messages) { - //this->dump_stack(); - parse_error_t err; - - va_list va; - va_start(va, fmt); - err.text = vformat_string(fmt, va); - va_end(va); - err.source_start = token.source_start; err.source_length = token.source_length; this->errors.push_back(err); @@ -730,7 +737,7 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) } else { - this->parse_error(token1, NULL); + this->parse_error(token1); } // parse_error sets fatal_errored, which ends the loop } From b06e7983733578d783bc585e7b0fc95f401b2e8d Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Wed, 16 Oct 2013 01:17:27 -0700 Subject: [PATCH 046/177] Revert "Remove undefined behavior from parse_error()." Per my understanding this is not undefined behavior. No ABI depends on the called function reading variadic arguments, nor does any standard require it. So if this is crashing something else must be going on. This reverts commit 22d22f6aa883a6f48e9df0cd55254faa2cfc425e. --- parse_tree.cpp | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/parse_tree.cpp b/parse_tree.cpp index 3e0c52566..97421dab1 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -341,7 +341,6 @@ class parse_ll_t bool top_node_handle_terminal_types(parse_token_t token); void parse_error(const wchar_t *expected, parse_token_t token); - void parse_error(parse_token_t token); void parse_error(parse_token_t token, const wchar_t *format, ...); void append_error_callout(wcstring &error_message, parse_token_t token); @@ -552,25 +551,19 @@ void parse_ll_t::acquire_output(parse_node_tree_t *output, parse_error_list_t *e this->symbol_stack.clear(); } -void parse_ll_t::parse_error(parse_token_t token) -{ - this->fatal_errored = true; -} - void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...) { - parse_error(token); - - //this->dump_stack(); - parse_error_t err; - - va_list va; - va_start(va, fmt); - err.text = vformat_string(fmt, va); - va_end(va); - + this->fatal_errored = true; if (this->should_generate_error_messages) { + //this->dump_stack(); + parse_error_t err; + + va_list va; + va_start(va, fmt); + err.text = vformat_string(fmt, va); + va_end(va); + err.source_start = token.source_start; err.source_length = token.source_length; this->errors.push_back(err); @@ -737,7 +730,7 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) } else { - this->parse_error(token1); + this->parse_error(token1, NULL); } // parse_error sets fatal_errored, which ends the loop } From c018bfdb4d04c2d64998f79e726d1699c05397c6 Mon Sep 17 00:00:00 2001 From: Siteshwar Vashisht Date: Sat, 17 Aug 2013 00:02:58 +0530 Subject: [PATCH 047/177] Initial work to add support for angularjs --- share/tools/web_config/angular.js | 14847 ++++++++++++++++ share/tools/web_config/index.html | 851 +- share/tools/web_config/partials/colors.html | 39 + .../tools/web_config/partials/functions.html | 17 + share/tools/web_config/partials/history.html | 13 + share/tools/web_config/partials/prompt.html | 25 + .../tools/web_config/partials/variables.html | 9 + share/tools/web_config/webconfig.css | 364 + share/tools/web_config/webconfig.js | 156 + share/tools/web_config/webconfig.py | 8 +- 10 files changed, 15488 insertions(+), 841 deletions(-) create mode 100644 share/tools/web_config/angular.js create mode 100644 share/tools/web_config/partials/colors.html create mode 100644 share/tools/web_config/partials/functions.html create mode 100644 share/tools/web_config/partials/history.html create mode 100644 share/tools/web_config/partials/prompt.html create mode 100644 share/tools/web_config/partials/variables.html create mode 100644 share/tools/web_config/webconfig.css create mode 100644 share/tools/web_config/webconfig.js diff --git a/share/tools/web_config/angular.js b/share/tools/web_config/angular.js new file mode 100644 index 000000000..a860c8594 --- /dev/null +++ b/share/tools/web_config/angular.js @@ -0,0 +1,14847 @@ +/** + * @license AngularJS v1.0.7 + * (c) 2010-2012 Google, Inc. http://angularjs.org + * License: MIT + */ +(function(window, document, undefined) { +'use strict'; + +//////////////////////////////////// + +/** + * @ngdoc function + * @name angular.lowercase + * @function + * + * @description Converts the specified string to lowercase. + * @param {string} string String to be converted to lowercase. + * @returns {string} Lowercased string. + */ +var lowercase = function(string){return isString(string) ? string.toLowerCase() : string;}; + + +/** + * @ngdoc function + * @name angular.uppercase + * @function + * + * @description Converts the specified string to uppercase. + * @param {string} string String to be converted to uppercase. + * @returns {string} Uppercased string. + */ +var uppercase = function(string){return isString(string) ? string.toUpperCase() : string;}; + + +var manualLowercase = function(s) { + return isString(s) + ? s.replace(/[A-Z]/g, function(ch) {return String.fromCharCode(ch.charCodeAt(0) | 32);}) + : s; +}; +var manualUppercase = function(s) { + return isString(s) + ? s.replace(/[a-z]/g, function(ch) {return String.fromCharCode(ch.charCodeAt(0) & ~32);}) + : s; +}; + + +// String#toLowerCase and String#toUpperCase don't produce correct results in browsers with Turkish +// locale, for this reason we need to detect this case and redefine lowercase/uppercase methods +// with correct but slower alternatives. +if ('i' !== 'I'.toLowerCase()) { + lowercase = manualLowercase; + uppercase = manualUppercase; +} + + +var /** holds major version number for IE or NaN for real browsers */ + msie = int((/msie (\d+)/.exec(lowercase(navigator.userAgent)) || [])[1]), + jqLite, // delay binding since jQuery could be loaded after us. + jQuery, // delay binding + slice = [].slice, + push = [].push, + toString = Object.prototype.toString, + + /** @name angular */ + angular = window.angular || (window.angular = {}), + angularModule, + nodeName_, + uid = ['0', '0', '0']; + + +/** + * @private + * @param {*} obj + * @return {boolean} Returns true if `obj` is an array or array-like object (NodeList, Arguments, ...) + */ +function isArrayLike(obj) { + if (!obj || (typeof obj.length !== 'number')) return false; + + // We have on object which has length property. Should we treat it as array? + if (typeof obj.hasOwnProperty != 'function' && + typeof obj.constructor != 'function') { + // This is here for IE8: it is a bogus object treat it as array; + return true; + } else { + return obj instanceof JQLite || // JQLite + (jQuery && obj instanceof jQuery) || // jQuery + toString.call(obj) !== '[object Object]' || // some browser native object + typeof obj.callee === 'function'; // arguments (on IE8 looks like regular obj) + } +} + + +/** + * @ngdoc function + * @name angular.forEach + * @function + * + * @description + * Invokes the `iterator` function once for each item in `obj` collection, which can be either an + * object or an array. The `iterator` function is invoked with `iterator(value, key)`, where `value` + * is the value of an object property or an array element and `key` is the object property key or + * array element index. Specifying a `context` for the function is optional. + * + * Note: this function was previously known as `angular.foreach`. + * +
+     var values = {name: 'misko', gender: 'male'};
+     var log = [];
+     angular.forEach(values, function(value, key){
+       this.push(key + ': ' + value);
+     }, log);
+     expect(log).toEqual(['name: misko', 'gender:male']);
+   
+ * + * @param {Object|Array} obj Object to iterate over. + * @param {Function} iterator Iterator function. + * @param {Object=} context Object to become context (`this`) for the iterator function. + * @returns {Object|Array} Reference to `obj`. + */ +function forEach(obj, iterator, context) { + var key; + if (obj) { + if (isFunction(obj)){ + for (key in obj) { + if (key != 'prototype' && key != 'length' && key != 'name' && obj.hasOwnProperty(key)) { + iterator.call(context, obj[key], key); + } + } + } else if (obj.forEach && obj.forEach !== forEach) { + obj.forEach(iterator, context); + } else if (isArrayLike(obj)) { + for (key = 0; key < obj.length; key++) + iterator.call(context, obj[key], key); + } else { + for (key in obj) { + if (obj.hasOwnProperty(key)) { + iterator.call(context, obj[key], key); + } + } + } + } + return obj; +} + +function sortedKeys(obj) { + var keys = []; + for (var key in obj) { + if (obj.hasOwnProperty(key)) { + keys.push(key); + } + } + return keys.sort(); +} + +function forEachSorted(obj, iterator, context) { + var keys = sortedKeys(obj); + for ( var i = 0; i < keys.length; i++) { + iterator.call(context, obj[keys[i]], keys[i]); + } + return keys; +} + + +/** + * when using forEach the params are value, key, but it is often useful to have key, value. + * @param {function(string, *)} iteratorFn + * @returns {function(*, string)} + */ +function reverseParams(iteratorFn) { + return function(value, key) { iteratorFn(key, value) }; +} + +/** + * A consistent way of creating unique IDs in angular. The ID is a sequence of alpha numeric + * characters such as '012ABC'. The reason why we are not using simply a number counter is that + * the number string gets longer over time, and it can also overflow, where as the nextId + * will grow much slower, it is a string, and it will never overflow. + * + * @returns an unique alpha-numeric string + */ +function nextUid() { + var index = uid.length; + var digit; + + while(index) { + index--; + digit = uid[index].charCodeAt(0); + if (digit == 57 /*'9'*/) { + uid[index] = 'A'; + return uid.join(''); + } + if (digit == 90 /*'Z'*/) { + uid[index] = '0'; + } else { + uid[index] = String.fromCharCode(digit + 1); + return uid.join(''); + } + } + uid.unshift('0'); + return uid.join(''); +} + + +/** + * Set or clear the hashkey for an object. + * @param obj object + * @param h the hashkey (!truthy to delete the hashkey) + */ +function setHashKey(obj, h) { + if (h) { + obj.$$hashKey = h; + } + else { + delete obj.$$hashKey; + } +} + +/** + * @ngdoc function + * @name angular.extend + * @function + * + * @description + * Extends the destination object `dst` by copying all of the properties from the `src` object(s) + * to `dst`. You can specify multiple `src` objects. + * + * @param {Object} dst Destination object. + * @param {...Object} src Source object(s). + * @returns {Object} Reference to `dst`. + */ +function extend(dst) { + var h = dst.$$hashKey; + forEach(arguments, function(obj){ + if (obj !== dst) { + forEach(obj, function(value, key){ + dst[key] = value; + }); + } + }); + + setHashKey(dst,h); + return dst; +} + +function int(str) { + return parseInt(str, 10); +} + + +function inherit(parent, extra) { + return extend(new (extend(function() {}, {prototype:parent}))(), extra); +} + + +/** + * @ngdoc function + * @name angular.noop + * @function + * + * @description + * A function that performs no operations. This function can be useful when writing code in the + * functional style. +
+     function foo(callback) {
+       var result = calculateResult();
+       (callback || angular.noop)(result);
+     }
+   
+ */ +function noop() {} +noop.$inject = []; + + +/** + * @ngdoc function + * @name angular.identity + * @function + * + * @description + * A function that returns its first argument. This function is useful when writing code in the + * functional style. + * +
+     function transformer(transformationFn, value) {
+       return (transformationFn || identity)(value);
+     };
+   
+ */ +function identity($) {return $;} +identity.$inject = []; + + +function valueFn(value) {return function() {return value;};} + +/** + * @ngdoc function + * @name angular.isUndefined + * @function + * + * @description + * Determines if a reference is undefined. + * + * @param {*} value Reference to check. + * @returns {boolean} True if `value` is undefined. + */ +function isUndefined(value){return typeof value == 'undefined';} + + +/** + * @ngdoc function + * @name angular.isDefined + * @function + * + * @description + * Determines if a reference is defined. + * + * @param {*} value Reference to check. + * @returns {boolean} True if `value` is defined. + */ +function isDefined(value){return typeof value != 'undefined';} + + +/** + * @ngdoc function + * @name angular.isObject + * @function + * + * @description + * Determines if a reference is an `Object`. Unlike `typeof` in JavaScript, `null`s are not + * considered to be objects. + * + * @param {*} value Reference to check. + * @returns {boolean} True if `value` is an `Object` but not `null`. + */ +function isObject(value){return value != null && typeof value == 'object';} + + +/** + * @ngdoc function + * @name angular.isString + * @function + * + * @description + * Determines if a reference is a `String`. + * + * @param {*} value Reference to check. + * @returns {boolean} True if `value` is a `String`. + */ +function isString(value){return typeof value == 'string';} + + +/** + * @ngdoc function + * @name angular.isNumber + * @function + * + * @description + * Determines if a reference is a `Number`. + * + * @param {*} value Reference to check. + * @returns {boolean} True if `value` is a `Number`. + */ +function isNumber(value){return typeof value == 'number';} + + +/** + * @ngdoc function + * @name angular.isDate + * @function + * + * @description + * Determines if a value is a date. + * + * @param {*} value Reference to check. + * @returns {boolean} True if `value` is a `Date`. + */ +function isDate(value){ + return toString.apply(value) == '[object Date]'; +} + + +/** + * @ngdoc function + * @name angular.isArray + * @function + * + * @description + * Determines if a reference is an `Array`. + * + * @param {*} value Reference to check. + * @returns {boolean} True if `value` is an `Array`. + */ +function isArray(value) { + return toString.apply(value) == '[object Array]'; +} + + +/** + * @ngdoc function + * @name angular.isFunction + * @function + * + * @description + * Determines if a reference is a `Function`. + * + * @param {*} value Reference to check. + * @returns {boolean} True if `value` is a `Function`. + */ +function isFunction(value){return typeof value == 'function';} + + +/** + * Checks if `obj` is a window object. + * + * @private + * @param {*} obj Object to check + * @returns {boolean} True if `obj` is a window obj. + */ +function isWindow(obj) { + return obj && obj.document && obj.location && obj.alert && obj.setInterval; +} + + +function isScope(obj) { + return obj && obj.$evalAsync && obj.$watch; +} + + +function isFile(obj) { + return toString.apply(obj) === '[object File]'; +} + + +function isBoolean(value) { + return typeof value == 'boolean'; +} + + +function trim(value) { + return isString(value) ? value.replace(/^\s*/, '').replace(/\s*$/, '') : value; +} + +/** + * @ngdoc function + * @name angular.isElement + * @function + * + * @description + * Determines if a reference is a DOM element (or wrapped jQuery element). + * + * @param {*} value Reference to check. + * @returns {boolean} True if `value` is a DOM element (or wrapped jQuery element). + */ +function isElement(node) { + return node && + (node.nodeName // we are a direct element + || (node.bind && node.find)); // we have a bind and find method part of jQuery API +} + +/** + * @param str 'key1,key2,...' + * @returns {object} in the form of {key1:true, key2:true, ...} + */ +function makeMap(str){ + var obj = {}, items = str.split(","), i; + for ( i = 0; i < items.length; i++ ) + obj[ items[i] ] = true; + return obj; +} + + +if (msie < 9) { + nodeName_ = function(element) { + element = element.nodeName ? element : element[0]; + return (element.scopeName && element.scopeName != 'HTML') + ? uppercase(element.scopeName + ':' + element.nodeName) : element.nodeName; + }; +} else { + nodeName_ = function(element) { + return element.nodeName ? element.nodeName : element[0].nodeName; + }; +} + + +function map(obj, iterator, context) { + var results = []; + forEach(obj, function(value, index, list) { + results.push(iterator.call(context, value, index, list)); + }); + return results; +} + + +/** + * @description + * Determines the number of elements in an array, the number of properties an object has, or + * the length of a string. + * + * Note: This function is used to augment the Object type in Angular expressions. See + * {@link angular.Object} for more information about Angular arrays. + * + * @param {Object|Array|string} obj Object, array, or string to inspect. + * @param {boolean} [ownPropsOnly=false] Count only "own" properties in an object + * @returns {number} The size of `obj` or `0` if `obj` is neither an object nor an array. + */ +function size(obj, ownPropsOnly) { + var size = 0, key; + + if (isArray(obj) || isString(obj)) { + return obj.length; + } else if (isObject(obj)){ + for (key in obj) + if (!ownPropsOnly || obj.hasOwnProperty(key)) + size++; + } + + return size; +} + + +function includes(array, obj) { + return indexOf(array, obj) != -1; +} + +function indexOf(array, obj) { + if (array.indexOf) return array.indexOf(obj); + + for ( var i = 0; i < array.length; i++) { + if (obj === array[i]) return i; + } + return -1; +} + +function arrayRemove(array, value) { + var index = indexOf(array, value); + if (index >=0) + array.splice(index, 1); + return value; +} + +function isLeafNode (node) { + if (node) { + switch (node.nodeName) { + case "OPTION": + case "PRE": + case "TITLE": + return true; + } + } + return false; +} + +/** + * @ngdoc function + * @name angular.copy + * @function + * + * @description + * Creates a deep copy of `source`, which should be an object or an array. + * + * * If no destination is supplied, a copy of the object or array is created. + * * If a destination is provided, all of its elements (for array) or properties (for objects) + * are deleted and then all elements/properties from the source are copied to it. + * * If `source` is not an object or array, `source` is returned. + * + * Note: this function is used to augment the Object type in Angular expressions. See + * {@link ng.$filter} for more information about Angular arrays. + * + * @param {*} source The source that will be used to make a copy. + * Can be any type, including primitives, `null`, and `undefined`. + * @param {(Object|Array)=} destination Destination into which the source is copied. If + * provided, must be of the same type as `source`. + * @returns {*} The copy or updated `destination`, if `destination` was specified. + */ +function copy(source, destination){ + if (isWindow(source) || isScope(source)) throw Error("Can't copy Window or Scope"); + if (!destination) { + destination = source; + if (source) { + if (isArray(source)) { + destination = copy(source, []); + } else if (isDate(source)) { + destination = new Date(source.getTime()); + } else if (isObject(source)) { + destination = copy(source, {}); + } + } + } else { + if (source === destination) throw Error("Can't copy equivalent objects or arrays"); + if (isArray(source)) { + destination.length = 0; + for ( var i = 0; i < source.length; i++) { + destination.push(copy(source[i])); + } + } else { + var h = destination.$$hashKey; + forEach(destination, function(value, key){ + delete destination[key]; + }); + for ( var key in source) { + destination[key] = copy(source[key]); + } + setHashKey(destination,h); + } + } + return destination; +} + +/** + * Create a shallow copy of an object + */ +function shallowCopy(src, dst) { + dst = dst || {}; + + for(var key in src) { + if (src.hasOwnProperty(key) && key.substr(0, 2) !== '$$') { + dst[key] = src[key]; + } + } + + return dst; +} + + +/** + * @ngdoc function + * @name angular.equals + * @function + * + * @description + * Determines if two objects or two values are equivalent. Supports value types, arrays and + * objects. + * + * Two objects or values are considered equivalent if at least one of the following is true: + * + * * Both objects or values pass `===` comparison. + * * Both objects or values are of the same type and all of their properties pass `===` comparison. + * * Both values are NaN. (In JavasScript, NaN == NaN => false. But we consider two NaN as equal) + * + * During a property comparision, properties of `function` type and properties with names + * that begin with `$` are ignored. + * + * Scope and DOMWindow objects are being compared only by identify (`===`). + * + * @param {*} o1 Object or value to compare. + * @param {*} o2 Object or value to compare. + * @returns {boolean} True if arguments are equal. + */ +function equals(o1, o2) { + if (o1 === o2) return true; + if (o1 === null || o2 === null) return false; + if (o1 !== o1 && o2 !== o2) return true; // NaN === NaN + var t1 = typeof o1, t2 = typeof o2, length, key, keySet; + if (t1 == t2) { + if (t1 == 'object') { + if (isArray(o1)) { + if ((length = o1.length) == o2.length) { + for(key=0; key 2 ? sliceArgs(arguments, 2) : []; + if (isFunction(fn) && !(fn instanceof RegExp)) { + return curryArgs.length + ? function() { + return arguments.length + ? fn.apply(self, curryArgs.concat(slice.call(arguments, 0))) + : fn.apply(self, curryArgs); + } + : function() { + return arguments.length + ? fn.apply(self, arguments) + : fn.call(self); + }; + } else { + // in IE, native methods are not functions so they cannot be bound (note: they don't need to be) + return fn; + } +} + + +function toJsonReplacer(key, value) { + var val = value; + + if (/^\$+/.test(key)) { + val = undefined; + } else if (isWindow(value)) { + val = '$WINDOW'; + } else if (value && document === value) { + val = '$DOCUMENT'; + } else if (isScope(value)) { + val = '$SCOPE'; + } + + return val; +} + + +/** + * @ngdoc function + * @name angular.toJson + * @function + * + * @description + * Serializes input into a JSON-formatted string. + * + * @param {Object|Array|Date|string|number} obj Input to be serialized into JSON. + * @param {boolean=} pretty If set to true, the JSON output will contain newlines and whitespace. + * @returns {string} Jsonified string representing `obj`. + */ +function toJson(obj, pretty) { + return JSON.stringify(obj, toJsonReplacer, pretty ? ' ' : null); +} + + +/** + * @ngdoc function + * @name angular.fromJson + * @function + * + * @description + * Deserializes a JSON string. + * + * @param {string} json JSON string to deserialize. + * @returns {Object|Array|Date|string|number} Deserialized thingy. + */ +function fromJson(json) { + return isString(json) + ? JSON.parse(json) + : json; +} + + +function toBoolean(value) { + if (value && value.length !== 0) { + var v = lowercase("" + value); + value = !(v == 'f' || v == '0' || v == 'false' || v == 'no' || v == 'n' || v == '[]'); + } else { + value = false; + } + return value; +} + +/** + * @returns {string} Returns the string representation of the element. + */ +function startingTag(element) { + element = jqLite(element).clone(); + try { + // turns out IE does not let you set .html() on elements which + // are not allowed to have children. So we just ignore it. + element.html(''); + } catch(e) {} + // As Per DOM Standards + var TEXT_NODE = 3; + var elemHtml = jqLite('
').append(element).html(); + try { + return element[0].nodeType === TEXT_NODE ? lowercase(elemHtml) : + elemHtml. + match(/^(<[^>]+>)/)[1]. + replace(/^<([\w\-]+)/, function(match, nodeName) { return '<' + lowercase(nodeName); }); + } catch(e) { + return lowercase(elemHtml); + } + +} + + +///////////////////////////////////////////////// + +/** + * Parses an escaped url query string into key-value pairs. + * @returns Object.<(string|boolean)> + */ +function parseKeyValue(/**string*/keyValue) { + var obj = {}, key_value, key; + forEach((keyValue || "").split('&'), function(keyValue){ + if (keyValue) { + key_value = keyValue.split('='); + key = decodeURIComponent(key_value[0]); + obj[key] = isDefined(key_value[1]) ? decodeURIComponent(key_value[1]) : true; + } + }); + return obj; +} + +function toKeyValue(obj) { + var parts = []; + forEach(obj, function(value, key) { + parts.push(encodeUriQuery(key, true) + (value === true ? '' : '=' + encodeUriQuery(value, true))); + }); + return parts.length ? parts.join('&') : ''; +} + + +/** + * We need our custom method because encodeURIComponent is too agressive and doesn't follow + * http://www.ietf.org/rfc/rfc3986.txt with regards to the character set (pchar) allowed in path + * segments: + * segment = *pchar + * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + * pct-encoded = "%" HEXDIG HEXDIG + * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + * / "*" / "+" / "," / ";" / "=" + */ +function encodeUriSegment(val) { + return encodeUriQuery(val, true). + replace(/%26/gi, '&'). + replace(/%3D/gi, '='). + replace(/%2B/gi, '+'); +} + + +/** + * This method is intended for encoding *key* or *value* parts of query component. We need a custom + * method becuase encodeURIComponent is too agressive and encodes stuff that doesn't have to be + * encoded per http://tools.ietf.org/html/rfc3986: + * query = *( pchar / "/" / "?" ) + * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + * pct-encoded = "%" HEXDIG HEXDIG + * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + * / "*" / "+" / "," / ";" / "=" + */ +function encodeUriQuery(val, pctEncodeSpaces) { + return encodeURIComponent(val). + replace(/%40/gi, '@'). + replace(/%3A/gi, ':'). + replace(/%24/g, '$'). + replace(/%2C/gi, ','). + replace(/%20/g, (pctEncodeSpaces ? '%20' : '+')); +} + + +/** + * @ngdoc directive + * @name ng.directive:ngApp + * + * @element ANY + * @param {angular.Module} ngApp an optional application + * {@link angular.module module} name to load. + * + * @description + * + * Use this directive to auto-bootstrap an application. Only + * one directive can be used per HTML document. The directive + * designates the root of the application and is typically placed + * at the root of the page. + * + * In the example below if the `ngApp` directive would not be placed + * on the `html` element then the document would not be compiled + * and the `{{ 1+2 }}` would not be resolved to `3`. + * + * `ngApp` is the easiest way to bootstrap an application. + * + + + I can add: 1 + 2 = {{ 1+2 }} + + + * + */ +function angularInit(element, bootstrap) { + var elements = [element], + appElement, + module, + names = ['ng:app', 'ng-app', 'x-ng-app', 'data-ng-app'], + NG_APP_CLASS_REGEXP = /\sng[:\-]app(:\s*([\w\d_]+);?)?\s/; + + function append(element) { + element && elements.push(element); + } + + forEach(names, function(name) { + names[name] = true; + append(document.getElementById(name)); + name = name.replace(':', '\\:'); + if (element.querySelectorAll) { + forEach(element.querySelectorAll('.' + name), append); + forEach(element.querySelectorAll('.' + name + '\\:'), append); + forEach(element.querySelectorAll('[' + name + ']'), append); + } + }); + + forEach(elements, function(element) { + if (!appElement) { + var className = ' ' + element.className + ' '; + var match = NG_APP_CLASS_REGEXP.exec(className); + if (match) { + appElement = element; + module = (match[2] || '').replace(/\s+/g, ','); + } else { + forEach(element.attributes, function(attr) { + if (!appElement && names[attr.name]) { + appElement = element; + module = attr.value; + } + }); + } + } + }); + if (appElement) { + bootstrap(appElement, module ? [module] : []); + } +} + +/** + * @ngdoc function + * @name angular.bootstrap + * @description + * Use this function to manually start up angular application. + * + * See: {@link guide/bootstrap Bootstrap} + * + * @param {Element} element DOM element which is the root of angular application. + * @param {Array=} modules an array of module declarations. See: {@link angular.module modules} + * @returns {AUTO.$injector} Returns the newly created injector for this app. + */ +function bootstrap(element, modules) { + var resumeBootstrapInternal = function() { + element = jqLite(element); + modules = modules || []; + modules.unshift(['$provide', function($provide) { + $provide.value('$rootElement', element); + }]); + modules.unshift('ng'); + var injector = createInjector(modules); + injector.invoke(['$rootScope', '$rootElement', '$compile', '$injector', + function(scope, element, compile, injector) { + scope.$apply(function() { + element.data('$injector', injector); + compile(element)(scope); + }); + }] + ); + return injector; + }; + + var NG_DEFER_BOOTSTRAP = /^NG_DEFER_BOOTSTRAP!/; + + if (window && !NG_DEFER_BOOTSTRAP.test(window.name)) { + return resumeBootstrapInternal(); + } + + window.name = window.name.replace(NG_DEFER_BOOTSTRAP, ''); + angular.resumeBootstrap = function(extraModules) { + forEach(extraModules, function(module) { + modules.push(module); + }); + resumeBootstrapInternal(); + }; +} + +var SNAKE_CASE_REGEXP = /[A-Z]/g; +function snake_case(name, separator){ + separator = separator || '_'; + return name.replace(SNAKE_CASE_REGEXP, function(letter, pos) { + return (pos ? separator : '') + letter.toLowerCase(); + }); +} + +function bindJQuery() { + // bind to jQuery if present; + jQuery = window.jQuery; + // reset to jQuery or default to us. + if (jQuery) { + jqLite = jQuery; + extend(jQuery.fn, { + scope: JQLitePrototype.scope, + controller: JQLitePrototype.controller, + injector: JQLitePrototype.injector, + inheritedData: JQLitePrototype.inheritedData + }); + JQLitePatchJQueryRemove('remove', true); + JQLitePatchJQueryRemove('empty'); + JQLitePatchJQueryRemove('html'); + } else { + jqLite = JQLite; + } + angular.element = jqLite; +} + +/** + * throw error if the argument is falsy. + */ +function assertArg(arg, name, reason) { + if (!arg) { + throw new Error("Argument '" + (name || '?') + "' is " + (reason || "required")); + } + return arg; +} + +function assertArgFn(arg, name, acceptArrayAnnotation) { + if (acceptArrayAnnotation && isArray(arg)) { + arg = arg[arg.length - 1]; + } + + assertArg(isFunction(arg), name, 'not a function, got ' + + (arg && typeof arg == 'object' ? arg.constructor.name || 'Object' : typeof arg)); + return arg; +} + +/** + * @ngdoc interface + * @name angular.Module + * @description + * + * Interface for configuring angular {@link angular.module modules}. + */ + +function setupModuleLoader(window) { + + function ensure(obj, name, factory) { + return obj[name] || (obj[name] = factory()); + } + + return ensure(ensure(window, 'angular', Object), 'module', function() { + /** @type {Object.} */ + var modules = {}; + + /** + * @ngdoc function + * @name angular.module + * @description + * + * The `angular.module` is a global place for creating and registering Angular modules. All + * modules (angular core or 3rd party) that should be available to an application must be + * registered using this mechanism. + * + * + * # Module + * + * A module is a collocation of services, directives, filters, and configuration information. Module + * is used to configure the {@link AUTO.$injector $injector}. + * + *
+     * // Create a new module
+     * var myModule = angular.module('myModule', []);
+     *
+     * // register a new service
+     * myModule.value('appName', 'MyCoolApp');
+     *
+     * // configure existing services inside initialization blocks.
+     * myModule.config(function($locationProvider) {
+     *   // Configure existing providers
+     *   $locationProvider.hashPrefix('!');
+     * });
+     * 
+ * + * Then you can create an injector and load your modules like this: + * + *
+     * var injector = angular.injector(['ng', 'MyModule'])
+     * 
+ * + * However it's more likely that you'll just use + * {@link ng.directive:ngApp ngApp} or + * {@link angular.bootstrap} to simplify this process for you. + * + * @param {!string} name The name of the module to create or retrieve. + * @param {Array.=} requires If specified then new module is being created. If unspecified then the + * the module is being retrieved for further configuration. + * @param {Function} configFn Optional configuration function for the module. Same as + * {@link angular.Module#config Module#config()}. + * @returns {module} new module with the {@link angular.Module} api. + */ + return function module(name, requires, configFn) { + if (requires && modules.hasOwnProperty(name)) { + modules[name] = null; + } + return ensure(modules, name, function() { + if (!requires) { + throw Error('No module: ' + name); + } + + /** @type {!Array.>} */ + var invokeQueue = []; + + /** @type {!Array.} */ + var runBlocks = []; + + var config = invokeLater('$injector', 'invoke'); + + /** @type {angular.Module} */ + var moduleInstance = { + // Private state + _invokeQueue: invokeQueue, + _runBlocks: runBlocks, + + /** + * @ngdoc property + * @name angular.Module#requires + * @propertyOf angular.Module + * @returns {Array.} List of module names which must be loaded before this module. + * @description + * Holds the list of modules which the injector will load before the current module is loaded. + */ + requires: requires, + + /** + * @ngdoc property + * @name angular.Module#name + * @propertyOf angular.Module + * @returns {string} Name of the module. + * @description + */ + name: name, + + + /** + * @ngdoc method + * @name angular.Module#provider + * @methodOf angular.Module + * @param {string} name service name + * @param {Function} providerType Construction function for creating new instance of the service. + * @description + * See {@link AUTO.$provide#provider $provide.provider()}. + */ + provider: invokeLater('$provide', 'provider'), + + /** + * @ngdoc method + * @name angular.Module#factory + * @methodOf angular.Module + * @param {string} name service name + * @param {Function} providerFunction Function for creating new instance of the service. + * @description + * See {@link AUTO.$provide#factory $provide.factory()}. + */ + factory: invokeLater('$provide', 'factory'), + + /** + * @ngdoc method + * @name angular.Module#service + * @methodOf angular.Module + * @param {string} name service name + * @param {Function} constructor A constructor function that will be instantiated. + * @description + * See {@link AUTO.$provide#service $provide.service()}. + */ + service: invokeLater('$provide', 'service'), + + /** + * @ngdoc method + * @name angular.Module#value + * @methodOf angular.Module + * @param {string} name service name + * @param {*} object Service instance object. + * @description + * See {@link AUTO.$provide#value $provide.value()}. + */ + value: invokeLater('$provide', 'value'), + + /** + * @ngdoc method + * @name angular.Module#constant + * @methodOf angular.Module + * @param {string} name constant name + * @param {*} object Constant value. + * @description + * Because the constant are fixed, they get applied before other provide methods. + * See {@link AUTO.$provide#constant $provide.constant()}. + */ + constant: invokeLater('$provide', 'constant', 'unshift'), + + /** + * @ngdoc method + * @name angular.Module#filter + * @methodOf angular.Module + * @param {string} name Filter name. + * @param {Function} filterFactory Factory function for creating new instance of filter. + * @description + * See {@link ng.$filterProvider#register $filterProvider.register()}. + */ + filter: invokeLater('$filterProvider', 'register'), + + /** + * @ngdoc method + * @name angular.Module#controller + * @methodOf angular.Module + * @param {string} name Controller name. + * @param {Function} constructor Controller constructor function. + * @description + * See {@link ng.$controllerProvider#register $controllerProvider.register()}. + */ + controller: invokeLater('$controllerProvider', 'register'), + + /** + * @ngdoc method + * @name angular.Module#directive + * @methodOf angular.Module + * @param {string} name directive name + * @param {Function} directiveFactory Factory function for creating new instance of + * directives. + * @description + * See {@link ng.$compileProvider#directive $compileProvider.directive()}. + */ + directive: invokeLater('$compileProvider', 'directive'), + + /** + * @ngdoc method + * @name angular.Module#config + * @methodOf angular.Module + * @param {Function} configFn Execute this function on module load. Useful for service + * configuration. + * @description + * Use this method to register work which needs to be performed on module loading. + */ + config: config, + + /** + * @ngdoc method + * @name angular.Module#run + * @methodOf angular.Module + * @param {Function} initializationFn Execute this function after injector creation. + * Useful for application initialization. + * @description + * Use this method to register work which should be performed when the injector is done + * loading all modules. + */ + run: function(block) { + runBlocks.push(block); + return this; + } + }; + + if (configFn) { + config(configFn); + } + + return moduleInstance; + + /** + * @param {string} provider + * @param {string} method + * @param {String=} insertMethod + * @returns {angular.Module} + */ + function invokeLater(provider, method, insertMethod) { + return function() { + invokeQueue[insertMethod || 'push']([provider, method, arguments]); + return moduleInstance; + } + } + }); + }; + }); + +} + +/** + * @ngdoc property + * @name angular.version + * @description + * An object that contains information about the current AngularJS version. This object has the + * following properties: + * + * - `full` – `{string}` – Full version string, such as "0.9.18". + * - `major` – `{number}` – Major version number, such as "0". + * - `minor` – `{number}` – Minor version number, such as "9". + * - `dot` – `{number}` – Dot version number, such as "18". + * - `codeName` – `{string}` – Code name of the release, such as "jiggling-armfat". + */ +var version = { + full: '1.0.7', // all of these placeholder strings will be replaced by grunt's + major: 1, // package task + minor: 0, + dot: 7, + codeName: 'monochromatic-rainbow' +}; + + +function publishExternalAPI(angular){ + extend(angular, { + 'bootstrap': bootstrap, + 'copy': copy, + 'extend': extend, + 'equals': equals, + 'element': jqLite, + 'forEach': forEach, + 'injector': createInjector, + 'noop':noop, + 'bind':bind, + 'toJson': toJson, + 'fromJson': fromJson, + 'identity':identity, + 'isUndefined': isUndefined, + 'isDefined': isDefined, + 'isString': isString, + 'isFunction': isFunction, + 'isObject': isObject, + 'isNumber': isNumber, + 'isElement': isElement, + 'isArray': isArray, + 'version': version, + 'isDate': isDate, + 'lowercase': lowercase, + 'uppercase': uppercase, + 'callbacks': {counter: 0} + }); + + angularModule = setupModuleLoader(window); + try { + angularModule('ngLocale'); + } catch (e) { + angularModule('ngLocale', []).provider('$locale', $LocaleProvider); + } + + angularModule('ng', ['ngLocale'], ['$provide', + function ngModule($provide) { + $provide.provider('$compile', $CompileProvider). + directive({ + a: htmlAnchorDirective, + input: inputDirective, + textarea: inputDirective, + form: formDirective, + script: scriptDirective, + select: selectDirective, + style: styleDirective, + option: optionDirective, + ngBind: ngBindDirective, + ngBindHtmlUnsafe: ngBindHtmlUnsafeDirective, + ngBindTemplate: ngBindTemplateDirective, + ngClass: ngClassDirective, + ngClassEven: ngClassEvenDirective, + ngClassOdd: ngClassOddDirective, + ngCsp: ngCspDirective, + ngCloak: ngCloakDirective, + ngController: ngControllerDirective, + ngForm: ngFormDirective, + ngHide: ngHideDirective, + ngInclude: ngIncludeDirective, + ngInit: ngInitDirective, + ngNonBindable: ngNonBindableDirective, + ngPluralize: ngPluralizeDirective, + ngRepeat: ngRepeatDirective, + ngShow: ngShowDirective, + ngSubmit: ngSubmitDirective, + ngStyle: ngStyleDirective, + ngSwitch: ngSwitchDirective, + ngSwitchWhen: ngSwitchWhenDirective, + ngSwitchDefault: ngSwitchDefaultDirective, + ngOptions: ngOptionsDirective, + ngView: ngViewDirective, + ngTransclude: ngTranscludeDirective, + ngModel: ngModelDirective, + ngList: ngListDirective, + ngChange: ngChangeDirective, + required: requiredDirective, + ngRequired: requiredDirective, + ngValue: ngValueDirective + }). + directive(ngAttributeAliasDirectives). + directive(ngEventDirectives); + $provide.provider({ + $anchorScroll: $AnchorScrollProvider, + $browser: $BrowserProvider, + $cacheFactory: $CacheFactoryProvider, + $controller: $ControllerProvider, + $document: $DocumentProvider, + $exceptionHandler: $ExceptionHandlerProvider, + $filter: $FilterProvider, + $interpolate: $InterpolateProvider, + $http: $HttpProvider, + $httpBackend: $HttpBackendProvider, + $location: $LocationProvider, + $log: $LogProvider, + $parse: $ParseProvider, + $route: $RouteProvider, + $routeParams: $RouteParamsProvider, + $rootScope: $RootScopeProvider, + $q: $QProvider, + $sniffer: $SnifferProvider, + $templateCache: $TemplateCacheProvider, + $timeout: $TimeoutProvider, + $window: $WindowProvider + }); + } + ]); +} + +////////////////////////////////// +//JQLite +////////////////////////////////// + +/** + * @ngdoc function + * @name angular.element + * @function + * + * @description + * Wraps a raw DOM element or HTML string as a [jQuery](http://jquery.com) element. + * `angular.element` can be either an alias for [jQuery](http://api.jquery.com/jQuery/) function, if + * jQuery is available, or a function that wraps the element or string in Angular's jQuery lite + * implementation (commonly referred to as jqLite). + * + * Real jQuery always takes precedence over jqLite, provided it was loaded before `DOMContentLoaded` + * event fired. + * + * jqLite is a tiny, API-compatible subset of jQuery that allows + * Angular to manipulate the DOM. jqLite implements only the most commonly needed functionality + * within a very small footprint, so only a subset of the jQuery API - methods, arguments and + * invocation styles - are supported. + * + * Note: All element references in Angular are always wrapped with jQuery or jqLite; they are never + * raw DOM references. + * + * ## Angular's jQuery lite provides the following methods: + * + * - [addClass()](http://api.jquery.com/addClass/) + * - [after()](http://api.jquery.com/after/) + * - [append()](http://api.jquery.com/append/) + * - [attr()](http://api.jquery.com/attr/) + * - [bind()](http://api.jquery.com/bind/) - Does not support namespaces + * - [children()](http://api.jquery.com/children/) - Does not support selectors + * - [clone()](http://api.jquery.com/clone/) + * - [contents()](http://api.jquery.com/contents/) + * - [css()](http://api.jquery.com/css/) + * - [data()](http://api.jquery.com/data/) + * - [eq()](http://api.jquery.com/eq/) + * - [find()](http://api.jquery.com/find/) - Limited to lookups by tag name + * - [hasClass()](http://api.jquery.com/hasClass/) + * - [html()](http://api.jquery.com/html/) + * - [next()](http://api.jquery.com/next/) - Does not support selectors + * - [parent()](http://api.jquery.com/parent/) - Does not support selectors + * - [prepend()](http://api.jquery.com/prepend/) + * - [prop()](http://api.jquery.com/prop/) + * - [ready()](http://api.jquery.com/ready/) + * - [remove()](http://api.jquery.com/remove/) + * - [removeAttr()](http://api.jquery.com/removeAttr/) + * - [removeClass()](http://api.jquery.com/removeClass/) + * - [removeData()](http://api.jquery.com/removeData/) + * - [replaceWith()](http://api.jquery.com/replaceWith/) + * - [text()](http://api.jquery.com/text/) + * - [toggleClass()](http://api.jquery.com/toggleClass/) + * - [triggerHandler()](http://api.jquery.com/triggerHandler/) - Doesn't pass native event objects to handlers. + * - [unbind()](http://api.jquery.com/unbind/) - Does not support namespaces + * - [val()](http://api.jquery.com/val/) + * - [wrap()](http://api.jquery.com/wrap/) + * + * ## In addtion to the above, Angular provides additional methods to both jQuery and jQuery lite: + * + * - `controller(name)` - retrieves the controller of the current element or its parent. By default + * retrieves controller associated with the `ngController` directive. If `name` is provided as + * camelCase directive name, then the controller for this directive will be retrieved (e.g. + * `'ngModel'`). + * - `injector()` - retrieves the injector of the current element or its parent. + * - `scope()` - retrieves the {@link api/ng.$rootScope.Scope scope} of the current + * element or its parent. + * - `inheritedData()` - same as `data()`, but walks up the DOM until a value is found or the top + * parent element is reached. + * + * @param {string|DOMElement} element HTML string or DOMElement to be wrapped into jQuery. + * @returns {Object} jQuery object. + */ + +var jqCache = JQLite.cache = {}, + jqName = JQLite.expando = 'ng-' + new Date().getTime(), + jqId = 1, + addEventListenerFn = (window.document.addEventListener + ? function(element, type, fn) {element.addEventListener(type, fn, false);} + : function(element, type, fn) {element.attachEvent('on' + type, fn);}), + removeEventListenerFn = (window.document.removeEventListener + ? function(element, type, fn) {element.removeEventListener(type, fn, false); } + : function(element, type, fn) {element.detachEvent('on' + type, fn); }); + +function jqNextId() { return ++jqId; } + + +var SPECIAL_CHARS_REGEXP = /([\:\-\_]+(.))/g; +var MOZ_HACK_REGEXP = /^moz([A-Z])/; + +/** + * Converts snake_case to camelCase. + * Also there is special case for Moz prefix starting with upper case letter. + * @param name Name to normalize + */ +function camelCase(name) { + return name. + replace(SPECIAL_CHARS_REGEXP, function(_, separator, letter, offset) { + return offset ? letter.toUpperCase() : letter; + }). + replace(MOZ_HACK_REGEXP, 'Moz$1'); +} + +///////////////////////////////////////////// +// jQuery mutation patch +// +// In conjunction with bindJQuery intercepts all jQuery's DOM destruction apis and fires a +// $destroy event on all DOM nodes being removed. +// +///////////////////////////////////////////// + +function JQLitePatchJQueryRemove(name, dispatchThis) { + var originalJqFn = jQuery.fn[name]; + originalJqFn = originalJqFn.$original || originalJqFn; + removePatch.$original = originalJqFn; + jQuery.fn[name] = removePatch; + + function removePatch() { + var list = [this], + fireEvent = dispatchThis, + set, setIndex, setLength, + element, childIndex, childLength, children, + fns, events; + + while(list.length) { + set = list.shift(); + for(setIndex = 0, setLength = set.length; setIndex < setLength; setIndex++) { + element = jqLite(set[setIndex]); + if (fireEvent) { + element.triggerHandler('$destroy'); + } else { + fireEvent = !fireEvent; + } + for(childIndex = 0, childLength = (children = element.children()).length; + childIndex < childLength; + childIndex++) { + list.push(jQuery(children[childIndex])); + } + } + } + return originalJqFn.apply(this, arguments); + } +} + +///////////////////////////////////////////// +function JQLite(element) { + if (element instanceof JQLite) { + return element; + } + if (!(this instanceof JQLite)) { + if (isString(element) && element.charAt(0) != '<') { + throw Error('selectors not implemented'); + } + return new JQLite(element); + } + + if (isString(element)) { + var div = document.createElement('div'); + // Read about the NoScope elements here: + // http://msdn.microsoft.com/en-us/library/ms533897(VS.85).aspx + div.innerHTML = '
 
' + element; // IE insanity to make NoScope elements work! + div.removeChild(div.firstChild); // remove the superfluous div + JQLiteAddNodes(this, div.childNodes); + this.remove(); // detach the elements from the temporary DOM div. + } else { + JQLiteAddNodes(this, element); + } +} + +function JQLiteClone(element) { + return element.cloneNode(true); +} + +function JQLiteDealoc(element){ + JQLiteRemoveData(element); + for ( var i = 0, children = element.childNodes || []; i < children.length; i++) { + JQLiteDealoc(children[i]); + } +} + +function JQLiteUnbind(element, type, fn) { + var events = JQLiteExpandoStore(element, 'events'), + handle = JQLiteExpandoStore(element, 'handle'); + + if (!handle) return; //no listeners registered + + if (isUndefined(type)) { + forEach(events, function(eventHandler, type) { + removeEventListenerFn(element, type, eventHandler); + delete events[type]; + }); + } else { + if (isUndefined(fn)) { + removeEventListenerFn(element, type, events[type]); + delete events[type]; + } else { + arrayRemove(events[type], fn); + } + } +} + +function JQLiteRemoveData(element) { + var expandoId = element[jqName], + expandoStore = jqCache[expandoId]; + + if (expandoStore) { + if (expandoStore.handle) { + expandoStore.events.$destroy && expandoStore.handle({}, '$destroy'); + JQLiteUnbind(element); + } + delete jqCache[expandoId]; + element[jqName] = undefined; // ie does not allow deletion of attributes on elements. + } +} + +function JQLiteExpandoStore(element, key, value) { + var expandoId = element[jqName], + expandoStore = jqCache[expandoId || -1]; + + if (isDefined(value)) { + if (!expandoStore) { + element[jqName] = expandoId = jqNextId(); + expandoStore = jqCache[expandoId] = {}; + } + expandoStore[key] = value; + } else { + return expandoStore && expandoStore[key]; + } +} + +function JQLiteData(element, key, value) { + var data = JQLiteExpandoStore(element, 'data'), + isSetter = isDefined(value), + keyDefined = !isSetter && isDefined(key), + isSimpleGetter = keyDefined && !isObject(key); + + if (!data && !isSimpleGetter) { + JQLiteExpandoStore(element, 'data', data = {}); + } + + if (isSetter) { + data[key] = value; + } else { + if (keyDefined) { + if (isSimpleGetter) { + // don't create data in this case. + return data && data[key]; + } else { + extend(data, key); + } + } else { + return data; + } + } +} + +function JQLiteHasClass(element, selector) { + return ((" " + element.className + " ").replace(/[\n\t]/g, " "). + indexOf( " " + selector + " " ) > -1); +} + +function JQLiteRemoveClass(element, cssClasses) { + if (cssClasses) { + forEach(cssClasses.split(' '), function(cssClass) { + element.className = trim( + (" " + element.className + " ") + .replace(/[\n\t]/g, " ") + .replace(" " + trim(cssClass) + " ", " ") + ); + }); + } +} + +function JQLiteAddClass(element, cssClasses) { + if (cssClasses) { + forEach(cssClasses.split(' '), function(cssClass) { + if (!JQLiteHasClass(element, cssClass)) { + element.className = trim(element.className + ' ' + trim(cssClass)); + } + }); + } +} + +function JQLiteAddNodes(root, elements) { + if (elements) { + elements = (!elements.nodeName && isDefined(elements.length) && !isWindow(elements)) + ? elements + : [ elements ]; + for(var i=0; i < elements.length; i++) { + root.push(elements[i]); + } + } +} + +function JQLiteController(element, name) { + return JQLiteInheritedData(element, '$' + (name || 'ngController' ) + 'Controller'); +} + +function JQLiteInheritedData(element, name, value) { + element = jqLite(element); + + // if element is the document object work with the html element instead + // this makes $(document).scope() possible + if(element[0].nodeType == 9) { + element = element.find('html'); + } + + while (element.length) { + if (value = element.data(name)) return value; + element = element.parent(); + } +} + +////////////////////////////////////////// +// Functions which are declared directly. +////////////////////////////////////////// +var JQLitePrototype = JQLite.prototype = { + ready: function(fn) { + var fired = false; + + function trigger() { + if (fired) return; + fired = true; + fn(); + } + + this.bind('DOMContentLoaded', trigger); // works for modern browsers and IE9 + // we can not use jqLite since we are not done loading and jQuery could be loaded later. + JQLite(window).bind('load', trigger); // fallback to window.onload for others + }, + toString: function() { + var value = []; + forEach(this, function(e){ value.push('' + e);}); + return '[' + value.join(', ') + ']'; + }, + + eq: function(index) { + return (index >= 0) ? jqLite(this[index]) : jqLite(this[this.length + index]); + }, + + length: 0, + push: push, + sort: [].sort, + splice: [].splice +}; + +////////////////////////////////////////// +// Functions iterating getter/setters. +// these functions return self on setter and +// value on get. +////////////////////////////////////////// +var BOOLEAN_ATTR = {}; +forEach('multiple,selected,checked,disabled,readOnly,required'.split(','), function(value) { + BOOLEAN_ATTR[lowercase(value)] = value; +}); +var BOOLEAN_ELEMENTS = {}; +forEach('input,select,option,textarea,button,form'.split(','), function(value) { + BOOLEAN_ELEMENTS[uppercase(value)] = true; +}); + +function getBooleanAttrName(element, name) { + // check dom last since we will most likely fail on name + var booleanAttr = BOOLEAN_ATTR[name.toLowerCase()]; + + // booleanAttr is here twice to minimize DOM access + return booleanAttr && BOOLEAN_ELEMENTS[element.nodeName] && booleanAttr; +} + +forEach({ + data: JQLiteData, + inheritedData: JQLiteInheritedData, + + scope: function(element) { + return JQLiteInheritedData(element, '$scope'); + }, + + controller: JQLiteController , + + injector: function(element) { + return JQLiteInheritedData(element, '$injector'); + }, + + removeAttr: function(element,name) { + element.removeAttribute(name); + }, + + hasClass: JQLiteHasClass, + + css: function(element, name, value) { + name = camelCase(name); + + if (isDefined(value)) { + element.style[name] = value; + } else { + var val; + + if (msie <= 8) { + // this is some IE specific weirdness that jQuery 1.6.4 does not sure why + val = element.currentStyle && element.currentStyle[name]; + if (val === '') val = 'auto'; + } + + val = val || element.style[name]; + + if (msie <= 8) { + // jquery weirdness :-/ + val = (val === '') ? undefined : val; + } + + return val; + } + }, + + attr: function(element, name, value){ + var lowercasedName = lowercase(name); + if (BOOLEAN_ATTR[lowercasedName]) { + if (isDefined(value)) { + if (!!value) { + element[name] = true; + element.setAttribute(name, lowercasedName); + } else { + element[name] = false; + element.removeAttribute(lowercasedName); + } + } else { + return (element[name] || + (element.attributes.getNamedItem(name)|| noop).specified) + ? lowercasedName + : undefined; + } + } else if (isDefined(value)) { + element.setAttribute(name, value); + } else if (element.getAttribute) { + // the extra argument "2" is to get the right thing for a.href in IE, see jQuery code + // some elements (e.g. Document) don't have get attribute, so return undefined + var ret = element.getAttribute(name, 2); + // normalize non-existing attributes to undefined (as jQuery) + return ret === null ? undefined : ret; + } + }, + + prop: function(element, name, value) { + if (isDefined(value)) { + element[name] = value; + } else { + return element[name]; + } + }, + + text: extend((msie < 9) + ? function(element, value) { + if (element.nodeType == 1 /** Element */) { + if (isUndefined(value)) + return element.innerText; + element.innerText = value; + } else { + if (isUndefined(value)) + return element.nodeValue; + element.nodeValue = value; + } + } + : function(element, value) { + if (isUndefined(value)) { + return element.textContent; + } + element.textContent = value; + }, {$dv:''}), + + val: function(element, value) { + if (isUndefined(value)) { + return element.value; + } + element.value = value; + }, + + html: function(element, value) { + if (isUndefined(value)) { + return element.innerHTML; + } + for (var i = 0, childNodes = element.childNodes; i < childNodes.length; i++) { + JQLiteDealoc(childNodes[i]); + } + element.innerHTML = value; + } +}, function(fn, name){ + /** + * Properties: writes return selection, reads return first value + */ + JQLite.prototype[name] = function(arg1, arg2) { + var i, key; + + // JQLiteHasClass has only two arguments, but is a getter-only fn, so we need to special-case it + // in a way that survives minification. + if (((fn.length == 2 && (fn !== JQLiteHasClass && fn !== JQLiteController)) ? arg1 : arg2) === undefined) { + if (isObject(arg1)) { + + // we are a write, but the object properties are the key/values + for(i=0; i < this.length; i++) { + if (fn === JQLiteData) { + // data() takes the whole object in jQuery + fn(this[i], arg1); + } else { + for (key in arg1) { + fn(this[i], key, arg1[key]); + } + } + } + // return self for chaining + return this; + } else { + // we are a read, so read the first child. + if (this.length) + return fn(this[0], arg1, arg2); + } + } else { + // we are a write, so apply to all children + for(i=0; i < this.length; i++) { + fn(this[i], arg1, arg2); + } + // return self for chaining + return this; + } + return fn.$dv; + }; +}); + +function createEventHandler(element, events) { + var eventHandler = function (event, type) { + if (!event.preventDefault) { + event.preventDefault = function() { + event.returnValue = false; //ie + }; + } + + if (!event.stopPropagation) { + event.stopPropagation = function() { + event.cancelBubble = true; //ie + }; + } + + if (!event.target) { + event.target = event.srcElement || document; + } + + if (isUndefined(event.defaultPrevented)) { + var prevent = event.preventDefault; + event.preventDefault = function() { + event.defaultPrevented = true; + prevent.call(event); + }; + event.defaultPrevented = false; + } + + event.isDefaultPrevented = function() { + return event.defaultPrevented; + }; + + forEach(events[type || event.type], function(fn) { + fn.call(element, event); + }); + + // Remove monkey-patched methods (IE), + // as they would cause memory leaks in IE8. + if (msie <= 8) { + // IE7/8 does not allow to delete property on native object + event.preventDefault = null; + event.stopPropagation = null; + event.isDefaultPrevented = null; + } else { + // It shouldn't affect normal browsers (native methods are defined on prototype). + delete event.preventDefault; + delete event.stopPropagation; + delete event.isDefaultPrevented; + } + }; + eventHandler.elem = element; + return eventHandler; +} + +////////////////////////////////////////// +// Functions iterating traversal. +// These functions chain results into a single +// selector. +////////////////////////////////////////// +forEach({ + removeData: JQLiteRemoveData, + + dealoc: JQLiteDealoc, + + bind: function bindFn(element, type, fn){ + var events = JQLiteExpandoStore(element, 'events'), + handle = JQLiteExpandoStore(element, 'handle'); + + if (!events) JQLiteExpandoStore(element, 'events', events = {}); + if (!handle) JQLiteExpandoStore(element, 'handle', handle = createEventHandler(element, events)); + + forEach(type.split(' '), function(type){ + var eventFns = events[type]; + + if (!eventFns) { + if (type == 'mouseenter' || type == 'mouseleave') { + var contains = document.body.contains || document.body.compareDocumentPosition ? + function( a, b ) { + var adown = a.nodeType === 9 ? a.documentElement : a, + bup = b && b.parentNode; + return a === bup || !!( bup && bup.nodeType === 1 && ( + adown.contains ? + adown.contains( bup ) : + a.compareDocumentPosition && a.compareDocumentPosition( bup ) & 16 + )); + } : + function( a, b ) { + if ( b ) { + while ( (b = b.parentNode) ) { + if ( b === a ) { + return true; + } + } + } + return false; + }; + + events[type] = []; + + // Refer to jQuery's implementation of mouseenter & mouseleave + // Read about mouseenter and mouseleave: + // http://www.quirksmode.org/js/events_mouse.html#link8 + var eventmap = { mouseleave : "mouseout", mouseenter : "mouseover"} + bindFn(element, eventmap[type], function(event) { + var ret, target = this, related = event.relatedTarget; + // For mousenter/leave call the handler if related is outside the target. + // NB: No relatedTarget if the mouse left/entered the browser window + if ( !related || (related !== target && !contains(target, related)) ){ + handle(event, type); + } + + }); + + } else { + addEventListenerFn(element, type, handle); + events[type] = []; + } + eventFns = events[type] + } + eventFns.push(fn); + }); + }, + + unbind: JQLiteUnbind, + + replaceWith: function(element, replaceNode) { + var index, parent = element.parentNode; + JQLiteDealoc(element); + forEach(new JQLite(replaceNode), function(node){ + if (index) { + parent.insertBefore(node, index.nextSibling); + } else { + parent.replaceChild(node, element); + } + index = node; + }); + }, + + children: function(element) { + var children = []; + forEach(element.childNodes, function(element){ + if (element.nodeType === 1) + children.push(element); + }); + return children; + }, + + contents: function(element) { + return element.childNodes || []; + }, + + append: function(element, node) { + forEach(new JQLite(node), function(child){ + if (element.nodeType === 1) + element.appendChild(child); + }); + }, + + prepend: function(element, node) { + if (element.nodeType === 1) { + var index = element.firstChild; + forEach(new JQLite(node), function(child){ + if (index) { + element.insertBefore(child, index); + } else { + element.appendChild(child); + index = child; + } + }); + } + }, + + wrap: function(element, wrapNode) { + wrapNode = jqLite(wrapNode)[0]; + var parent = element.parentNode; + if (parent) { + parent.replaceChild(wrapNode, element); + } + wrapNode.appendChild(element); + }, + + remove: function(element) { + JQLiteDealoc(element); + var parent = element.parentNode; + if (parent) parent.removeChild(element); + }, + + after: function(element, newElement) { + var index = element, parent = element.parentNode; + forEach(new JQLite(newElement), function(node){ + parent.insertBefore(node, index.nextSibling); + index = node; + }); + }, + + addClass: JQLiteAddClass, + removeClass: JQLiteRemoveClass, + + toggleClass: function(element, selector, condition) { + if (isUndefined(condition)) { + condition = !JQLiteHasClass(element, selector); + } + (condition ? JQLiteAddClass : JQLiteRemoveClass)(element, selector); + }, + + parent: function(element) { + var parent = element.parentNode; + return parent && parent.nodeType !== 11 ? parent : null; + }, + + next: function(element) { + if (element.nextElementSibling) { + return element.nextElementSibling; + } + + // IE8 doesn't have nextElementSibling + var elm = element.nextSibling; + while (elm != null && elm.nodeType !== 1) { + elm = elm.nextSibling; + } + return elm; + }, + + find: function(element, selector) { + return element.getElementsByTagName(selector); + }, + + clone: JQLiteClone, + + triggerHandler: function(element, eventName) { + var eventFns = (JQLiteExpandoStore(element, 'events') || {})[eventName]; + + forEach(eventFns, function(fn) { + fn.call(element, null); + }); + } +}, function(fn, name){ + /** + * chaining functions + */ + JQLite.prototype[name] = function(arg1, arg2) { + var value; + for(var i=0; i < this.length; i++) { + if (value == undefined) { + value = fn(this[i], arg1, arg2); + if (value !== undefined) { + // any function which returns a value needs to be wrapped + value = jqLite(value); + } + } else { + JQLiteAddNodes(value, fn(this[i], arg1, arg2)); + } + } + return value == undefined ? this : value; + }; +}); + +/** + * Computes a hash of an 'obj'. + * Hash of a: + * string is string + * number is number as string + * object is either result of calling $$hashKey function on the object or uniquely generated id, + * that is also assigned to the $$hashKey property of the object. + * + * @param obj + * @returns {string} hash string such that the same input will have the same hash string. + * The resulting string key is in 'type:hashKey' format. + */ +function hashKey(obj) { + var objType = typeof obj, + key; + + if (objType == 'object' && obj !== null) { + if (typeof (key = obj.$$hashKey) == 'function') { + // must invoke on object to keep the right this + key = obj.$$hashKey(); + } else if (key === undefined) { + key = obj.$$hashKey = nextUid(); + } + } else { + key = obj; + } + + return objType + ':' + key; +} + +/** + * HashMap which can use objects as keys + */ +function HashMap(array){ + forEach(array, this.put, this); +} +HashMap.prototype = { + /** + * Store key value pair + * @param key key to store can be any type + * @param value value to store can be any type + */ + put: function(key, value) { + this[hashKey(key)] = value; + }, + + /** + * @param key + * @returns the value for the key + */ + get: function(key) { + return this[hashKey(key)]; + }, + + /** + * Remove the key/value pair + * @param key + */ + remove: function(key) { + var value = this[key = hashKey(key)]; + delete this[key]; + return value; + } +}; + +/** + * A map where multiple values can be added to the same key such that they form a queue. + * @returns {HashQueueMap} + */ +function HashQueueMap() {} +HashQueueMap.prototype = { + /** + * Same as array push, but using an array as the value for the hash + */ + push: function(key, value) { + var array = this[key = hashKey(key)]; + if (!array) { + this[key] = [value]; + } else { + array.push(value); + } + }, + + /** + * Same as array shift, but using an array as the value for the hash + */ + shift: function(key) { + var array = this[key = hashKey(key)]; + if (array) { + if (array.length == 1) { + delete this[key]; + return array[0]; + } else { + return array.shift(); + } + } + }, + + /** + * return the first item without deleting it + */ + peek: function(key) { + var array = this[hashKey(key)]; + if (array) { + return array[0]; + } + } +}; + +/** + * @ngdoc function + * @name angular.injector + * @function + * + * @description + * Creates an injector function that can be used for retrieving services as well as for + * dependency injection (see {@link guide/di dependency injection}). + * + + * @param {Array.} modules A list of module functions or their aliases. See + * {@link angular.module}. The `ng` module must be explicitly added. + * @returns {function()} Injector function. See {@link AUTO.$injector $injector}. + * + * @example + * Typical usage + *
+ *   // create an injector
+ *   var $injector = angular.injector(['ng']);
+ *
+ *   // use the injector to kick off your application
+ *   // use the type inference to auto inject arguments, or use implicit injection
+ *   $injector.invoke(function($rootScope, $compile, $document){
+ *     $compile($document)($rootScope);
+ *     $rootScope.$digest();
+ *   });
+ * 
+ */ + + +/** + * @ngdoc overview + * @name AUTO + * @description + * + * Implicit module which gets automatically added to each {@link AUTO.$injector $injector}. + */ + +var FN_ARGS = /^function\s*[^\(]*\(\s*([^\)]*)\)/m; +var FN_ARG_SPLIT = /,/; +var FN_ARG = /^\s*(_?)(\S+?)\1\s*$/; +var STRIP_COMMENTS = /((\/\/.*$)|(\/\*[\s\S]*?\*\/))/mg; +function annotate(fn) { + var $inject, + fnText, + argDecl, + last; + + if (typeof fn == 'function') { + if (!($inject = fn.$inject)) { + $inject = []; + fnText = fn.toString().replace(STRIP_COMMENTS, ''); + argDecl = fnText.match(FN_ARGS); + forEach(argDecl[1].split(FN_ARG_SPLIT), function(arg){ + arg.replace(FN_ARG, function(all, underscore, name){ + $inject.push(name); + }); + }); + fn.$inject = $inject; + } + } else if (isArray(fn)) { + last = fn.length - 1; + assertArgFn(fn[last], 'fn'); + $inject = fn.slice(0, last); + } else { + assertArgFn(fn, 'fn', true); + } + return $inject; +} + +/////////////////////////////////////// + +/** + * @ngdoc object + * @name AUTO.$injector + * @function + * + * @description + * + * `$injector` is used to retrieve object instances as defined by + * {@link AUTO.$provide provider}, instantiate types, invoke methods, + * and load modules. + * + * The following always holds true: + * + *
+ *   var $injector = angular.injector();
+ *   expect($injector.get('$injector')).toBe($injector);
+ *   expect($injector.invoke(function($injector){
+ *     return $injector;
+ *   }).toBe($injector);
+ * 
+ * + * # Injection Function Annotation + * + * JavaScript does not have annotations, and annotations are needed for dependency injection. The + * following are all valid ways of annotating function with injection arguments and are equivalent. + * + *
+ *   // inferred (only works if code not minified/obfuscated)
+ *   $injector.invoke(function(serviceA){});
+ *
+ *   // annotated
+ *   function explicit(serviceA) {};
+ *   explicit.$inject = ['serviceA'];
+ *   $injector.invoke(explicit);
+ *
+ *   // inline
+ *   $injector.invoke(['serviceA', function(serviceA){}]);
+ * 
+ * + * ## Inference + * + * In JavaScript calling `toString()` on a function returns the function definition. The definition can then be + * parsed and the function arguments can be extracted. *NOTE:* This does not work with minification, and obfuscation + * tools since these tools change the argument names. + * + * ## `$inject` Annotation + * By adding a `$inject` property onto a function the injection parameters can be specified. + * + * ## Inline + * As an array of injection names, where the last item in the array is the function to call. + */ + +/** + * @ngdoc method + * @name AUTO.$injector#get + * @methodOf AUTO.$injector + * + * @description + * Return an instance of the service. + * + * @param {string} name The name of the instance to retrieve. + * @return {*} The instance. + */ + +/** + * @ngdoc method + * @name AUTO.$injector#invoke + * @methodOf AUTO.$injector + * + * @description + * Invoke the method and supply the method arguments from the `$injector`. + * + * @param {!function} fn The function to invoke. The function arguments come form the function annotation. + * @param {Object=} self The `this` for the invoked method. + * @param {Object=} locals Optional object. If preset then any argument names are read from this object first, before + * the `$injector` is consulted. + * @returns {*} the value returned by the invoked `fn` function. + */ + +/** + * @ngdoc method + * @name AUTO.$injector#instantiate + * @methodOf AUTO.$injector + * @description + * Create a new instance of JS type. The method takes a constructor function invokes the new operator and supplies + * all of the arguments to the constructor function as specified by the constructor annotation. + * + * @param {function} Type Annotated constructor function. + * @param {Object=} locals Optional object. If preset then any argument names are read from this object first, before + * the `$injector` is consulted. + * @returns {Object} new instance of `Type`. + */ + +/** + * @ngdoc method + * @name AUTO.$injector#annotate + * @methodOf AUTO.$injector + * + * @description + * Returns an array of service names which the function is requesting for injection. This API is used by the injector + * to determine which services need to be injected into the function when the function is invoked. There are three + * ways in which the function can be annotated with the needed dependencies. + * + * # Argument names + * + * The simplest form is to extract the dependencies from the arguments of the function. This is done by converting + * the function into a string using `toString()` method and extracting the argument names. + *
+ *   // Given
+ *   function MyController($scope, $route) {
+ *     // ...
+ *   }
+ *
+ *   // Then
+ *   expect(injector.annotate(MyController)).toEqual(['$scope', '$route']);
+ * 
+ * + * This method does not work with code minfication / obfuscation. For this reason the following annotation strategies + * are supported. + * + * # The `$inject` property + * + * If a function has an `$inject` property and its value is an array of strings, then the strings represent names of + * services to be injected into the function. + *
+ *   // Given
+ *   var MyController = function(obfuscatedScope, obfuscatedRoute) {
+ *     // ...
+ *   }
+ *   // Define function dependencies
+ *   MyController.$inject = ['$scope', '$route'];
+ *
+ *   // Then
+ *   expect(injector.annotate(MyController)).toEqual(['$scope', '$route']);
+ * 
+ * + * # The array notation + * + * It is often desirable to inline Injected functions and that's when setting the `$inject` property is very + * inconvenient. In these situations using the array notation to specify the dependencies in a way that survives + * minification is a better choice: + * + *
+ *   // We wish to write this (not minification / obfuscation safe)
+ *   injector.invoke(function($compile, $rootScope) {
+ *     // ...
+ *   });
+ *
+ *   // We are forced to write break inlining
+ *   var tmpFn = function(obfuscatedCompile, obfuscatedRootScope) {
+ *     // ...
+ *   };
+ *   tmpFn.$inject = ['$compile', '$rootScope'];
+ *   injector.invoke(tmpFn);
+ *
+ *   // To better support inline function the inline annotation is supported
+ *   injector.invoke(['$compile', '$rootScope', function(obfCompile, obfRootScope) {
+ *     // ...
+ *   }]);
+ *
+ *   // Therefore
+ *   expect(injector.annotate(
+ *      ['$compile', '$rootScope', function(obfus_$compile, obfus_$rootScope) {}])
+ *    ).toEqual(['$compile', '$rootScope']);
+ * 
+ * + * @param {function|Array.} fn Function for which dependent service names need to be retrieved as described + * above. + * + * @returns {Array.} The names of the services which the function requires. + */ + + + + +/** + * @ngdoc object + * @name AUTO.$provide + * + * @description + * + * Use `$provide` to register new providers with the `$injector`. The providers are the factories for the instance. + * The providers share the same name as the instance they create with `Provider` suffixed to them. + * + * A provider is an object with a `$get()` method. The injector calls the `$get` method to create a new instance of + * a service. The Provider can have additional methods which would allow for configuration of the provider. + * + *
+ *   function GreetProvider() {
+ *     var salutation = 'Hello';
+ *
+ *     this.salutation = function(text) {
+ *       salutation = text;
+ *     };
+ *
+ *     this.$get = function() {
+ *       return function (name) {
+ *         return salutation + ' ' + name + '!';
+ *       };
+ *     };
+ *   }
+ *
+ *   describe('Greeter', function(){
+ *
+ *     beforeEach(module(function($provide) {
+ *       $provide.provider('greet', GreetProvider);
+ *     }));
+ *
+ *     it('should greet', inject(function(greet) {
+ *       expect(greet('angular')).toEqual('Hello angular!');
+ *     }));
+ *
+ *     it('should allow configuration of salutation', function() {
+ *       module(function(greetProvider) {
+ *         greetProvider.salutation('Ahoj');
+ *       });
+ *       inject(function(greet) {
+ *         expect(greet('angular')).toEqual('Ahoj angular!');
+ *       });
+ *     });
+ * 
+ */ + +/** + * @ngdoc method + * @name AUTO.$provide#provider + * @methodOf AUTO.$provide + * @description + * + * Register a provider for a service. The providers can be retrieved and can have additional configuration methods. + * + * @param {string} name The name of the instance. NOTE: the provider will be available under `name + 'Provider'` key. + * @param {(Object|function())} provider If the provider is: + * + * - `Object`: then it should have a `$get` method. The `$get` method will be invoked using + * {@link AUTO.$injector#invoke $injector.invoke()} when an instance needs to be created. + * - `Constructor`: a new instance of the provider will be created using + * {@link AUTO.$injector#instantiate $injector.instantiate()}, then treated as `object`. + * + * @returns {Object} registered provider instance + */ + +/** + * @ngdoc method + * @name AUTO.$provide#factory + * @methodOf AUTO.$provide + * @description + * + * A short hand for configuring services if only `$get` method is required. + * + * @param {string} name The name of the instance. + * @param {function()} $getFn The $getFn for the instance creation. Internally this is a short hand for + * `$provide.provider(name, {$get: $getFn})`. + * @returns {Object} registered provider instance + */ + + +/** + * @ngdoc method + * @name AUTO.$provide#service + * @methodOf AUTO.$provide + * @description + * + * A short hand for registering service of given class. + * + * @param {string} name The name of the instance. + * @param {Function} constructor A class (constructor function) that will be instantiated. + * @returns {Object} registered provider instance + */ + + +/** + * @ngdoc method + * @name AUTO.$provide#value + * @methodOf AUTO.$provide + * @description + * + * A short hand for configuring services if the `$get` method is a constant. + * + * @param {string} name The name of the instance. + * @param {*} value The value. + * @returns {Object} registered provider instance + */ + + +/** + * @ngdoc method + * @name AUTO.$provide#constant + * @methodOf AUTO.$provide + * @description + * + * A constant value, but unlike {@link AUTO.$provide#value value} it can be injected + * into configuration function (other modules) and it is not interceptable by + * {@link AUTO.$provide#decorator decorator}. + * + * @param {string} name The name of the constant. + * @param {*} value The constant value. + * @returns {Object} registered instance + */ + + +/** + * @ngdoc method + * @name AUTO.$provide#decorator + * @methodOf AUTO.$provide + * @description + * + * Decoration of service, allows the decorator to intercept the service instance creation. The + * returned instance may be the original instance, or a new instance which delegates to the + * original instance. + * + * @param {string} name The name of the service to decorate. + * @param {function()} decorator This function will be invoked when the service needs to be + * instantiated. The function is called using the {@link AUTO.$injector#invoke + * injector.invoke} method and is therefore fully injectable. Local injection arguments: + * + * * `$delegate` - The original service instance, which can be monkey patched, configured, + * decorated or delegated to. + */ + + +function createInjector(modulesToLoad) { + var INSTANTIATING = {}, + providerSuffix = 'Provider', + path = [], + loadedModules = new HashMap(), + providerCache = { + $provide: { + provider: supportObject(provider), + factory: supportObject(factory), + service: supportObject(service), + value: supportObject(value), + constant: supportObject(constant), + decorator: decorator + } + }, + providerInjector = createInternalInjector(providerCache, function() { + throw Error("Unknown provider: " + path.join(' <- ')); + }), + instanceCache = {}, + instanceInjector = (instanceCache.$injector = + createInternalInjector(instanceCache, function(servicename) { + var provider = providerInjector.get(servicename + providerSuffix); + return instanceInjector.invoke(provider.$get, provider); + })); + + + forEach(loadModules(modulesToLoad), function(fn) { instanceInjector.invoke(fn || noop); }); + + return instanceInjector; + + //////////////////////////////////// + // $provider + //////////////////////////////////// + + function supportObject(delegate) { + return function(key, value) { + if (isObject(key)) { + forEach(key, reverseParams(delegate)); + } else { + return delegate(key, value); + } + } + } + + function provider(name, provider_) { + if (isFunction(provider_) || isArray(provider_)) { + provider_ = providerInjector.instantiate(provider_); + } + if (!provider_.$get) { + throw Error('Provider ' + name + ' must define $get factory method.'); + } + return providerCache[name + providerSuffix] = provider_; + } + + function factory(name, factoryFn) { return provider(name, { $get: factoryFn }); } + + function service(name, constructor) { + return factory(name, ['$injector', function($injector) { + return $injector.instantiate(constructor); + }]); + } + + function value(name, value) { return factory(name, valueFn(value)); } + + function constant(name, value) { + providerCache[name] = value; + instanceCache[name] = value; + } + + function decorator(serviceName, decorFn) { + var origProvider = providerInjector.get(serviceName + providerSuffix), + orig$get = origProvider.$get; + + origProvider.$get = function() { + var origInstance = instanceInjector.invoke(orig$get, origProvider); + return instanceInjector.invoke(decorFn, null, {$delegate: origInstance}); + }; + } + + //////////////////////////////////// + // Module Loading + //////////////////////////////////// + function loadModules(modulesToLoad){ + var runBlocks = []; + forEach(modulesToLoad, function(module) { + if (loadedModules.get(module)) return; + loadedModules.put(module, true); + if (isString(module)) { + var moduleFn = angularModule(module); + runBlocks = runBlocks.concat(loadModules(moduleFn.requires)).concat(moduleFn._runBlocks); + + try { + for(var invokeQueue = moduleFn._invokeQueue, i = 0, ii = invokeQueue.length; i < ii; i++) { + var invokeArgs = invokeQueue[i], + provider = invokeArgs[0] == '$injector' + ? providerInjector + : providerInjector.get(invokeArgs[0]); + + provider[invokeArgs[1]].apply(provider, invokeArgs[2]); + } + } catch (e) { + if (e.message) e.message += ' from ' + module; + throw e; + } + } else if (isFunction(module)) { + try { + runBlocks.push(providerInjector.invoke(module)); + } catch (e) { + if (e.message) e.message += ' from ' + module; + throw e; + } + } else if (isArray(module)) { + try { + runBlocks.push(providerInjector.invoke(module)); + } catch (e) { + if (e.message) e.message += ' from ' + String(module[module.length - 1]); + throw e; + } + } else { + assertArgFn(module, 'module'); + } + }); + return runBlocks; + } + + //////////////////////////////////// + // internal Injector + //////////////////////////////////// + + function createInternalInjector(cache, factory) { + + function getService(serviceName) { + if (typeof serviceName !== 'string') { + throw Error('Service name expected'); + } + if (cache.hasOwnProperty(serviceName)) { + if (cache[serviceName] === INSTANTIATING) { + throw Error('Circular dependency: ' + path.join(' <- ')); + } + return cache[serviceName]; + } else { + try { + path.unshift(serviceName); + cache[serviceName] = INSTANTIATING; + return cache[serviceName] = factory(serviceName); + } finally { + path.shift(); + } + } + } + + function invoke(fn, self, locals){ + var args = [], + $inject = annotate(fn), + length, i, + key; + + for(i = 0, length = $inject.length; i < length; i++) { + key = $inject[i]; + args.push( + locals && locals.hasOwnProperty(key) + ? locals[key] + : getService(key) + ); + } + if (!fn.$inject) { + // this means that we must be an array. + fn = fn[length]; + } + + + // Performance optimization: http://jsperf.com/apply-vs-call-vs-invoke + switch (self ? -1 : args.length) { + case 0: return fn(); + case 1: return fn(args[0]); + case 2: return fn(args[0], args[1]); + case 3: return fn(args[0], args[1], args[2]); + case 4: return fn(args[0], args[1], args[2], args[3]); + case 5: return fn(args[0], args[1], args[2], args[3], args[4]); + case 6: return fn(args[0], args[1], args[2], args[3], args[4], args[5]); + case 7: return fn(args[0], args[1], args[2], args[3], args[4], args[5], args[6]); + case 8: return fn(args[0], args[1], args[2], args[3], args[4], args[5], args[6], args[7]); + case 9: return fn(args[0], args[1], args[2], args[3], args[4], args[5], args[6], args[7], args[8]); + case 10: return fn(args[0], args[1], args[2], args[3], args[4], args[5], args[6], args[7], args[8], args[9]); + default: return fn.apply(self, args); + } + } + + function instantiate(Type, locals) { + var Constructor = function() {}, + instance, returnedValue; + + // Check if Type is annotated and use just the given function at n-1 as parameter + // e.g. someModule.factory('greeter', ['$window', function(renamed$window) {}]); + Constructor.prototype = (isArray(Type) ? Type[Type.length - 1] : Type).prototype; + instance = new Constructor(); + returnedValue = invoke(Type, instance, locals); + + return isObject(returnedValue) ? returnedValue : instance; + } + + return { + invoke: invoke, + instantiate: instantiate, + get: getService, + annotate: annotate + }; + } +} + +/** + * @ngdoc function + * @name ng.$anchorScroll + * @requires $window + * @requires $location + * @requires $rootScope + * + * @description + * When called, it checks current value of `$location.hash()` and scroll to related element, + * according to rules specified in + * {@link http://dev.w3.org/html5/spec/Overview.html#the-indicated-part-of-the-document Html5 spec}. + * + * It also watches the `$location.hash()` and scroll whenever it changes to match any anchor. + * This can be disabled by calling `$anchorScrollProvider.disableAutoScrolling()`. + */ +function $AnchorScrollProvider() { + + var autoScrollingEnabled = true; + + this.disableAutoScrolling = function() { + autoScrollingEnabled = false; + }; + + this.$get = ['$window', '$location', '$rootScope', function($window, $location, $rootScope) { + var document = $window.document; + + // helper function to get first anchor from a NodeList + // can't use filter.filter, as it accepts only instances of Array + // and IE can't convert NodeList to an array using [].slice + // TODO(vojta): use filter if we change it to accept lists as well + function getFirstAnchor(list) { + var result = null; + forEach(list, function(element) { + if (!result && lowercase(element.nodeName) === 'a') result = element; + }); + return result; + } + + function scroll() { + var hash = $location.hash(), elm; + + // empty hash, scroll to the top of the page + if (!hash) $window.scrollTo(0, 0); + + // element with given id + else if ((elm = document.getElementById(hash))) elm.scrollIntoView(); + + // first anchor with given name :-D + else if ((elm = getFirstAnchor(document.getElementsByName(hash)))) elm.scrollIntoView(); + + // no element and hash == 'top', scroll to the top of the page + else if (hash === 'top') $window.scrollTo(0, 0); + } + + // does not scroll when user clicks on anchor link that is currently on + // (no url change, no $location.hash() change), browser native does scroll + if (autoScrollingEnabled) { + $rootScope.$watch(function autoScrollWatch() {return $location.hash();}, + function autoScrollWatchAction() { + $rootScope.$evalAsync(scroll); + }); + } + + return scroll; + }]; +} + +/** + * ! This is a private undocumented service ! + * + * @name ng.$browser + * @requires $log + * @description + * This object has two goals: + * + * - hide all the global state in the browser caused by the window object + * - abstract away all the browser specific features and inconsistencies + * + * For tests we provide {@link ngMock.$browser mock implementation} of the `$browser` + * service, which can be used for convenient testing of the application without the interaction with + * the real browser apis. + */ +/** + * @param {object} window The global window object. + * @param {object} document jQuery wrapped document. + * @param {function()} XHR XMLHttpRequest constructor. + * @param {object} $log console.log or an object with the same interface. + * @param {object} $sniffer $sniffer service + */ +function Browser(window, document, $log, $sniffer) { + var self = this, + rawDocument = document[0], + location = window.location, + history = window.history, + setTimeout = window.setTimeout, + clearTimeout = window.clearTimeout, + pendingDeferIds = {}; + + self.isMock = false; + + var outstandingRequestCount = 0; + var outstandingRequestCallbacks = []; + + // TODO(vojta): remove this temporary api + self.$$completeOutstandingRequest = completeOutstandingRequest; + self.$$incOutstandingRequestCount = function() { outstandingRequestCount++; }; + + /** + * Executes the `fn` function(supports currying) and decrements the `outstandingRequestCallbacks` + * counter. If the counter reaches 0, all the `outstandingRequestCallbacks` are executed. + */ + function completeOutstandingRequest(fn) { + try { + fn.apply(null, sliceArgs(arguments, 1)); + } finally { + outstandingRequestCount--; + if (outstandingRequestCount === 0) { + while(outstandingRequestCallbacks.length) { + try { + outstandingRequestCallbacks.pop()(); + } catch (e) { + $log.error(e); + } + } + } + } + } + + /** + * @private + * Note: this method is used only by scenario runner + * TODO(vojta): prefix this method with $$ ? + * @param {function()} callback Function that will be called when no outstanding request + */ + self.notifyWhenNoOutstandingRequests = function(callback) { + // force browser to execute all pollFns - this is needed so that cookies and other pollers fire + // at some deterministic time in respect to the test runner's actions. Leaving things up to the + // regular poller would result in flaky tests. + forEach(pollFns, function(pollFn){ pollFn(); }); + + if (outstandingRequestCount === 0) { + callback(); + } else { + outstandingRequestCallbacks.push(callback); + } + }; + + ////////////////////////////////////////////////////////////// + // Poll Watcher API + ////////////////////////////////////////////////////////////// + var pollFns = [], + pollTimeout; + + /** + * @name ng.$browser#addPollFn + * @methodOf ng.$browser + * + * @param {function()} fn Poll function to add + * + * @description + * Adds a function to the list of functions that poller periodically executes, + * and starts polling if not started yet. + * + * @returns {function()} the added function + */ + self.addPollFn = function(fn) { + if (isUndefined(pollTimeout)) startPoller(100, setTimeout); + pollFns.push(fn); + return fn; + }; + + /** + * @param {number} interval How often should browser call poll functions (ms) + * @param {function()} setTimeout Reference to a real or fake `setTimeout` function. + * + * @description + * Configures the poller to run in the specified intervals, using the specified + * setTimeout fn and kicks it off. + */ + function startPoller(interval, setTimeout) { + (function check() { + forEach(pollFns, function(pollFn){ pollFn(); }); + pollTimeout = setTimeout(check, interval); + })(); + } + + ////////////////////////////////////////////////////////////// + // URL API + ////////////////////////////////////////////////////////////// + + var lastBrowserUrl = location.href, + baseElement = document.find('base'); + + /** + * @name ng.$browser#url + * @methodOf ng.$browser + * + * @description + * GETTER: + * Without any argument, this method just returns current value of location.href. + * + * SETTER: + * With at least one argument, this method sets url to new value. + * If html5 history api supported, pushState/replaceState is used, otherwise + * location.href/location.replace is used. + * Returns its own instance to allow chaining + * + * NOTE: this api is intended for use only by the $location service. Please use the + * {@link ng.$location $location service} to change url. + * + * @param {string} url New url (when used as setter) + * @param {boolean=} replace Should new url replace current history record ? + */ + self.url = function(url, replace) { + // setter + if (url) { + if (lastBrowserUrl == url) return; + lastBrowserUrl = url; + if ($sniffer.history) { + if (replace) history.replaceState(null, '', url); + else { + history.pushState(null, '', url); + // Crazy Opera Bug: http://my.opera.com/community/forums/topic.dml?id=1185462 + baseElement.attr('href', baseElement.attr('href')); + } + } else { + if (replace) location.replace(url); + else location.href = url; + } + return self; + // getter + } else { + // the replacement is a workaround for https://bugzilla.mozilla.org/show_bug.cgi?id=407172 + return location.href.replace(/%27/g,"'"); + } + }; + + var urlChangeListeners = [], + urlChangeInit = false; + + function fireUrlChange() { + if (lastBrowserUrl == self.url()) return; + + lastBrowserUrl = self.url(); + forEach(urlChangeListeners, function(listener) { + listener(self.url()); + }); + } + + /** + * @name ng.$browser#onUrlChange + * @methodOf ng.$browser + * @TODO(vojta): refactor to use node's syntax for events + * + * @description + * Register callback function that will be called, when url changes. + * + * It's only called when the url is changed by outside of angular: + * - user types different url into address bar + * - user clicks on history (forward/back) button + * - user clicks on a link + * + * It's not called when url is changed by $browser.url() method + * + * The listener gets called with new url as parameter. + * + * NOTE: this api is intended for use only by the $location service. Please use the + * {@link ng.$location $location service} to monitor url changes in angular apps. + * + * @param {function(string)} listener Listener function to be called when url changes. + * @return {function(string)} Returns the registered listener fn - handy if the fn is anonymous. + */ + self.onUrlChange = function(callback) { + if (!urlChangeInit) { + // We listen on both (hashchange/popstate) when available, as some browsers (e.g. Opera) + // don't fire popstate when user change the address bar and don't fire hashchange when url + // changed by push/replaceState + + // html5 history api - popstate event + if ($sniffer.history) jqLite(window).bind('popstate', fireUrlChange); + // hashchange event + if ($sniffer.hashchange) jqLite(window).bind('hashchange', fireUrlChange); + // polling + else self.addPollFn(fireUrlChange); + + urlChangeInit = true; + } + + urlChangeListeners.push(callback); + return callback; + }; + + ////////////////////////////////////////////////////////////// + // Misc API + ////////////////////////////////////////////////////////////// + + /** + * Returns current + * (always relative - without domain) + * + * @returns {string=} + */ + self.baseHref = function() { + var href = baseElement.attr('href'); + return href ? href.replace(/^https?\:\/\/[^\/]*/, '') : ''; + }; + + ////////////////////////////////////////////////////////////// + // Cookies API + ////////////////////////////////////////////////////////////// + var lastCookies = {}; + var lastCookieString = ''; + var cookiePath = self.baseHref(); + + /** + * @name ng.$browser#cookies + * @methodOf ng.$browser + * + * @param {string=} name Cookie name + * @param {string=} value Cokkie value + * + * @description + * The cookies method provides a 'private' low level access to browser cookies. + * It is not meant to be used directly, use the $cookie service instead. + * + * The return values vary depending on the arguments that the method was called with as follows: + *
    + *
  • cookies() -> hash of all cookies, this is NOT a copy of the internal state, so do not modify it
  • + *
  • cookies(name, value) -> set name to value, if value is undefined delete the cookie
  • + *
  • cookies(name) -> the same as (name, undefined) == DELETES (no one calls it right now that way)
  • + *
+ * + * @returns {Object} Hash of all cookies (if called without any parameter) + */ + self.cookies = function(name, value) { + var cookieLength, cookieArray, cookie, i, index; + + if (name) { + if (value === undefined) { + rawDocument.cookie = escape(name) + "=;path=" + cookiePath + ";expires=Thu, 01 Jan 1970 00:00:00 GMT"; + } else { + if (isString(value)) { + cookieLength = (rawDocument.cookie = escape(name) + '=' + escape(value) + ';path=' + cookiePath).length + 1; + + // per http://www.ietf.org/rfc/rfc2109.txt browser must allow at minimum: + // - 300 cookies + // - 20 cookies per unique domain + // - 4096 bytes per cookie + if (cookieLength > 4096) { + $log.warn("Cookie '"+ name +"' possibly not set or overflowed because it was too large ("+ + cookieLength + " > 4096 bytes)!"); + } + } + } + } else { + if (rawDocument.cookie !== lastCookieString) { + lastCookieString = rawDocument.cookie; + cookieArray = lastCookieString.split("; "); + lastCookies = {}; + + for (i = 0; i < cookieArray.length; i++) { + cookie = cookieArray[i]; + index = cookie.indexOf('='); + if (index > 0) { //ignore nameless cookies + var name = unescape(cookie.substring(0, index)); + // the first value that is seen for a cookie is the most + // specific one. values for the same cookie name that + // follow are for less specific paths. + if (lastCookies[name] === undefined) { + lastCookies[name] = unescape(cookie.substring(index + 1)); + } + } + } + } + return lastCookies; + } + }; + + + /** + * @name ng.$browser#defer + * @methodOf ng.$browser + * @param {function()} fn A function, who's execution should be defered. + * @param {number=} [delay=0] of milliseconds to defer the function execution. + * @returns {*} DeferId that can be used to cancel the task via `$browser.defer.cancel()`. + * + * @description + * Executes a fn asynchroniously via `setTimeout(fn, delay)`. + * + * Unlike when calling `setTimeout` directly, in test this function is mocked and instead of using + * `setTimeout` in tests, the fns are queued in an array, which can be programmatically flushed + * via `$browser.defer.flush()`. + * + */ + self.defer = function(fn, delay) { + var timeoutId; + outstandingRequestCount++; + timeoutId = setTimeout(function() { + delete pendingDeferIds[timeoutId]; + completeOutstandingRequest(fn); + }, delay || 0); + pendingDeferIds[timeoutId] = true; + return timeoutId; + }; + + + /** + * @name ng.$browser#defer.cancel + * @methodOf ng.$browser.defer + * + * @description + * Cancels a defered task identified with `deferId`. + * + * @param {*} deferId Token returned by the `$browser.defer` function. + * @returns {boolean} Returns `true` if the task hasn't executed yet and was successfuly canceled. + */ + self.defer.cancel = function(deferId) { + if (pendingDeferIds[deferId]) { + delete pendingDeferIds[deferId]; + clearTimeout(deferId); + completeOutstandingRequest(noop); + return true; + } + return false; + }; + +} + +function $BrowserProvider(){ + this.$get = ['$window', '$log', '$sniffer', '$document', + function( $window, $log, $sniffer, $document){ + return new Browser($window, $document, $log, $sniffer); + }]; +} + +/** + * @ngdoc object + * @name ng.$cacheFactory + * + * @description + * Factory that constructs cache objects. + * + * + * @param {string} cacheId Name or id of the newly created cache. + * @param {object=} options Options object that specifies the cache behavior. Properties: + * + * - `{number=}` `capacity` — turns the cache into LRU cache. + * + * @returns {object} Newly created cache object with the following set of methods: + * + * - `{object}` `info()` — Returns id, size, and options of cache. + * - `{void}` `put({string} key, {*} value)` — Puts a new key-value pair into the cache. + * - `{{*}}` `get({string} key)` — Returns cached value for `key` or undefined for cache miss. + * - `{void}` `remove({string} key)` — Removes a key-value pair from the cache. + * - `{void}` `removeAll()` — Removes all cached values. + * - `{void}` `destroy()` — Removes references to this cache from $cacheFactory. + * + */ +function $CacheFactoryProvider() { + + this.$get = function() { + var caches = {}; + + function cacheFactory(cacheId, options) { + if (cacheId in caches) { + throw Error('cacheId ' + cacheId + ' taken'); + } + + var size = 0, + stats = extend({}, options, {id: cacheId}), + data = {}, + capacity = (options && options.capacity) || Number.MAX_VALUE, + lruHash = {}, + freshEnd = null, + staleEnd = null; + + return caches[cacheId] = { + + put: function(key, value) { + var lruEntry = lruHash[key] || (lruHash[key] = {key: key}); + + refresh(lruEntry); + + if (isUndefined(value)) return; + if (!(key in data)) size++; + data[key] = value; + + if (size > capacity) { + this.remove(staleEnd.key); + } + }, + + + get: function(key) { + var lruEntry = lruHash[key]; + + if (!lruEntry) return; + + refresh(lruEntry); + + return data[key]; + }, + + + remove: function(key) { + var lruEntry = lruHash[key]; + + if (!lruEntry) return; + + if (lruEntry == freshEnd) freshEnd = lruEntry.p; + if (lruEntry == staleEnd) staleEnd = lruEntry.n; + link(lruEntry.n,lruEntry.p); + + delete lruHash[key]; + delete data[key]; + size--; + }, + + + removeAll: function() { + data = {}; + size = 0; + lruHash = {}; + freshEnd = staleEnd = null; + }, + + + destroy: function() { + data = null; + stats = null; + lruHash = null; + delete caches[cacheId]; + }, + + + info: function() { + return extend({}, stats, {size: size}); + } + }; + + + /** + * makes the `entry` the freshEnd of the LRU linked list + */ + function refresh(entry) { + if (entry != freshEnd) { + if (!staleEnd) { + staleEnd = entry; + } else if (staleEnd == entry) { + staleEnd = entry.n; + } + + link(entry.n, entry.p); + link(entry, freshEnd); + freshEnd = entry; + freshEnd.n = null; + } + } + + + /** + * bidirectionally links two entries of the LRU linked list + */ + function link(nextEntry, prevEntry) { + if (nextEntry != prevEntry) { + if (nextEntry) nextEntry.p = prevEntry; //p stands for previous, 'prev' didn't minify + if (prevEntry) prevEntry.n = nextEntry; //n stands for next, 'next' didn't minify + } + } + } + + + cacheFactory.info = function() { + var info = {}; + forEach(caches, function(cache, cacheId) { + info[cacheId] = cache.info(); + }); + return info; + }; + + + cacheFactory.get = function(cacheId) { + return caches[cacheId]; + }; + + + return cacheFactory; + }; +} + +/** + * @ngdoc object + * @name ng.$templateCache + * + * @description + * Cache used for storing html templates. + * + * See {@link ng.$cacheFactory $cacheFactory}. + * + */ +function $TemplateCacheProvider() { + this.$get = ['$cacheFactory', function($cacheFactory) { + return $cacheFactory('templates'); + }]; +} + +/* ! VARIABLE/FUNCTION NAMING CONVENTIONS THAT APPLY TO THIS FILE! + * + * DOM-related variables: + * + * - "node" - DOM Node + * - "element" - DOM Element or Node + * - "$node" or "$element" - jqLite-wrapped node or element + * + * + * Compiler related stuff: + * + * - "linkFn" - linking fn of a single directive + * - "nodeLinkFn" - function that aggregates all linking fns for a particular node + * - "childLinkFn" - function that aggregates all linking fns for child nodes of a particular node + * - "compositeLinkFn" - function that aggregates all linking fns for a compilation root (nodeList) + */ + + +var NON_ASSIGNABLE_MODEL_EXPRESSION = 'Non-assignable model expression: '; + + +/** + * @ngdoc function + * @name ng.$compile + * @function + * + * @description + * Compiles a piece of HTML string or DOM into a template and produces a template function, which + * can then be used to link {@link ng.$rootScope.Scope scope} and the template together. + * + * The compilation is a process of walking the DOM tree and trying to match DOM elements to + * {@link ng.$compileProvider#directive directives}. For each match it + * executes corresponding template function and collects the + * instance functions into a single template function which is then returned. + * + * The template function can then be used once to produce the view or as it is the case with + * {@link ng.directive:ngRepeat repeater} many-times, in which + * case each call results in a view that is a DOM clone of the original template. + * + + + +
+
+
+
+
+
+ + it('should auto compile', function() { + expect(element('div[compile]').text()).toBe('Hello Angular'); + input('html').enter('{{name}}!'); + expect(element('div[compile]').text()).toBe('Angular!'); + }); + +
+ + * + * + * @param {string|DOMElement} element Element or HTML string to compile into a template function. + * @param {function(angular.Scope[, cloneAttachFn]} transclude function available to directives. + * @param {number} maxPriority only apply directives lower then given priority (Only effects the + * root element(s), not their children) + * @returns {function(scope[, cloneAttachFn])} a link function which is used to bind template + * (a DOM element/tree) to a scope. Where: + * + * * `scope` - A {@link ng.$rootScope.Scope Scope} to bind to. + * * `cloneAttachFn` - If `cloneAttachFn` is provided, then the link function will clone the + * `template` and call the `cloneAttachFn` function allowing the caller to attach the + * cloned elements to the DOM document at the appropriate place. The `cloneAttachFn` is + * called as:
`cloneAttachFn(clonedElement, scope)` where: + * + * * `clonedElement` - is a clone of the original `element` passed into the compiler. + * * `scope` - is the current scope with which the linking function is working with. + * + * Calling the linking function returns the element of the template. It is either the original element + * passed in, or the clone of the element if the `cloneAttachFn` is provided. + * + * After linking the view is not updated until after a call to $digest which typically is done by + * Angular automatically. + * + * If you need access to the bound view, there are two ways to do it: + * + * - If you are not asking the linking function to clone the template, create the DOM element(s) + * before you send them to the compiler and keep this reference around. + *
+ *     var element = $compile('

{{total}}

')(scope); + *
+ * + * - if on the other hand, you need the element to be cloned, the view reference from the original + * example would not point to the clone, but rather to the original template that was cloned. In + * this case, you can access the clone via the cloneAttachFn: + *
+ *     var templateHTML = angular.element('

{{total}}

'), + * scope = ....; + * + * var clonedElement = $compile(templateHTML)(scope, function(clonedElement, scope) { + * //attach the clone to DOM document at the right place + * }); + * + * //now we have reference to the cloned DOM via `clone` + *
+ * + * + * For information on how the compiler works, see the + * {@link guide/compiler Angular HTML Compiler} section of the Developer Guide. + */ + + +/** + * @ngdoc service + * @name ng.$compileProvider + * @function + * + * @description + */ +$CompileProvider.$inject = ['$provide']; +function $CompileProvider($provide) { + var hasDirectives = {}, + Suffix = 'Directive', + COMMENT_DIRECTIVE_REGEXP = /^\s*directive\:\s*([\d\w\-_]+)\s+(.*)$/, + CLASS_DIRECTIVE_REGEXP = /(([\d\w\-_]+)(?:\:([^;]+))?;?)/, + MULTI_ROOT_TEMPLATE_ERROR = 'Template must have exactly one root element. was: ', + urlSanitizationWhitelist = /^\s*(https?|ftp|mailto|file):/; + + + /** + * @ngdoc function + * @name ng.$compileProvider#directive + * @methodOf ng.$compileProvider + * @function + * + * @description + * Register a new directives with the compiler. + * + * @param {string} name Name of the directive in camel-case. (ie ngBind which will match as + * ng-bind). + * @param {function} directiveFactory An injectable directive factroy function. See {@link guide/directive} for more + * info. + * @returns {ng.$compileProvider} Self for chaining. + */ + this.directive = function registerDirective(name, directiveFactory) { + if (isString(name)) { + assertArg(directiveFactory, 'directive'); + if (!hasDirectives.hasOwnProperty(name)) { + hasDirectives[name] = []; + $provide.factory(name + Suffix, ['$injector', '$exceptionHandler', + function($injector, $exceptionHandler) { + var directives = []; + forEach(hasDirectives[name], function(directiveFactory) { + try { + var directive = $injector.invoke(directiveFactory); + if (isFunction(directive)) { + directive = { compile: valueFn(directive) }; + } else if (!directive.compile && directive.link) { + directive.compile = valueFn(directive.link); + } + directive.priority = directive.priority || 0; + directive.name = directive.name || name; + directive.require = directive.require || (directive.controller && directive.name); + directive.restrict = directive.restrict || 'A'; + directives.push(directive); + } catch (e) { + $exceptionHandler(e); + } + }); + return directives; + }]); + } + hasDirectives[name].push(directiveFactory); + } else { + forEach(name, reverseParams(registerDirective)); + } + return this; + }; + + + /** + * @ngdoc function + * @name ng.$compileProvider#urlSanitizationWhitelist + * @methodOf ng.$compileProvider + * @function + * + * @description + * Retrieves or overrides the default regular expression that is used for whitelisting of safe + * urls during a[href] sanitization. + * + * The sanitization is a security measure aimed at prevent XSS attacks via html links. + * + * Any url about to be assigned to a[href] via data-binding is first normalized and turned into an + * absolute url. Afterwards the url is matched against the `urlSanitizationWhitelist` regular + * expression. If a match is found the original url is written into the dom. Otherwise the + * absolute url is prefixed with `'unsafe:'` string and only then it is written into the DOM. + * + * @param {RegExp=} regexp New regexp to whitelist urls with. + * @returns {RegExp|ng.$compileProvider} Current RegExp if called without value or self for + * chaining otherwise. + */ + this.urlSanitizationWhitelist = function(regexp) { + if (isDefined(regexp)) { + urlSanitizationWhitelist = regexp; + return this; + } + return urlSanitizationWhitelist; + }; + + + this.$get = [ + '$injector', '$interpolate', '$exceptionHandler', '$http', '$templateCache', '$parse', + '$controller', '$rootScope', '$document', + function($injector, $interpolate, $exceptionHandler, $http, $templateCache, $parse, + $controller, $rootScope, $document) { + + var Attributes = function(element, attr) { + this.$$element = element; + this.$attr = attr || {}; + }; + + Attributes.prototype = { + $normalize: directiveNormalize, + + + /** + * Set a normalized attribute on the element in a way such that all directives + * can share the attribute. This function properly handles boolean attributes. + * @param {string} key Normalized key. (ie ngAttribute) + * @param {string|boolean} value The value to set. If `null` attribute will be deleted. + * @param {boolean=} writeAttr If false, does not write the value to DOM element attribute. + * Defaults to true. + * @param {string=} attrName Optional none normalized name. Defaults to key. + */ + $set: function(key, value, writeAttr, attrName) { + var booleanKey = getBooleanAttrName(this.$$element[0], key), + $$observers = this.$$observers, + normalizedVal; + + if (booleanKey) { + this.$$element.prop(key, value); + attrName = booleanKey; + } + + this[key] = value; + + // translate normalized key to actual key + if (attrName) { + this.$attr[key] = attrName; + } else { + attrName = this.$attr[key]; + if (!attrName) { + this.$attr[key] = attrName = snake_case(key, '-'); + } + } + + + // sanitize a[href] values + if (nodeName_(this.$$element[0]) === 'A' && key === 'href') { + urlSanitizationNode.setAttribute('href', value); + + // href property always returns normalized absolute url, so we can match against that + normalizedVal = urlSanitizationNode.href; + if (!normalizedVal.match(urlSanitizationWhitelist)) { + this[key] = value = 'unsafe:' + normalizedVal; + } + } + + + if (writeAttr !== false) { + if (value === null || value === undefined) { + this.$$element.removeAttr(attrName); + } else { + this.$$element.attr(attrName, value); + } + } + + // fire observers + $$observers && forEach($$observers[key], function(fn) { + try { + fn(value); + } catch (e) { + $exceptionHandler(e); + } + }); + }, + + + /** + * Observe an interpolated attribute. + * The observer will never be called, if given attribute is not interpolated. + * + * @param {string} key Normalized key. (ie ngAttribute) . + * @param {function(*)} fn Function that will be called whenever the attribute value changes. + * @returns {function(*)} the `fn` Function passed in. + */ + $observe: function(key, fn) { + var attrs = this, + $$observers = (attrs.$$observers || (attrs.$$observers = {})), + listeners = ($$observers[key] || ($$observers[key] = [])); + + listeners.push(fn); + $rootScope.$evalAsync(function() { + if (!listeners.$$inter) { + // no one registered attribute interpolation function, so lets call it manually + fn(attrs[key]); + } + }); + return fn; + } + }; + + var urlSanitizationNode = $document[0].createElement('a'), + startSymbol = $interpolate.startSymbol(), + endSymbol = $interpolate.endSymbol(), + denormalizeTemplate = (startSymbol == '{{' || endSymbol == '}}') + ? identity + : function denormalizeTemplate(template) { + return template.replace(/\{\{/g, startSymbol).replace(/}}/g, endSymbol); + }; + + + return compile; + + //================================ + + function compile($compileNodes, transcludeFn, maxPriority) { + if (!($compileNodes instanceof jqLite)) { + // jquery always rewraps, whereas we need to preserve the original selector so that we can modify it. + $compileNodes = jqLite($compileNodes); + } + // We can not compile top level text elements since text nodes can be merged and we will + // not be able to attach scope data to them, so we will wrap them in + forEach($compileNodes, function(node, index){ + if (node.nodeType == 3 /* text node */ && node.nodeValue.match(/\S+/) /* non-empty */ ) { + $compileNodes[index] = jqLite(node).wrap('').parent()[0]; + } + }); + var compositeLinkFn = compileNodes($compileNodes, transcludeFn, $compileNodes, maxPriority); + return function publicLinkFn(scope, cloneConnectFn){ + assertArg(scope, 'scope'); + // important!!: we must call our jqLite.clone() since the jQuery one is trying to be smart + // and sometimes changes the structure of the DOM. + var $linkNode = cloneConnectFn + ? JQLitePrototype.clone.call($compileNodes) // IMPORTANT!!! + : $compileNodes; + + // Attach scope only to non-text nodes. + for(var i = 0, ii = $linkNode.length; i + addDirective(directives, + directiveNormalize(nodeName_(node).toLowerCase()), 'E', maxPriority); + + // iterate over the attributes + for (var attr, name, nName, value, nAttrs = node.attributes, + j = 0, jj = nAttrs && nAttrs.length; j < jj; j++) { + attr = nAttrs[j]; + if (attr.specified) { + name = attr.name; + nName = directiveNormalize(name.toLowerCase()); + attrsMap[nName] = name; + attrs[nName] = value = trim((msie && name == 'href') + ? decodeURIComponent(node.getAttribute(name, 2)) + : attr.value); + if (getBooleanAttrName(node, nName)) { + attrs[nName] = true; // presence means true + } + addAttrInterpolateDirective(node, directives, value, nName); + addDirective(directives, nName, 'A', maxPriority); + } + } + + // use class as directive + className = node.className; + if (isString(className) && className !== '') { + while (match = CLASS_DIRECTIVE_REGEXP.exec(className)) { + nName = directiveNormalize(match[2]); + if (addDirective(directives, nName, 'C', maxPriority)) { + attrs[nName] = trim(match[3]); + } + className = className.substr(match.index + match[0].length); + } + } + break; + case 3: /* Text Node */ + addTextInterpolateDirective(directives, node.nodeValue); + break; + case 8: /* Comment */ + try { + match = COMMENT_DIRECTIVE_REGEXP.exec(node.nodeValue); + if (match) { + nName = directiveNormalize(match[1]); + if (addDirective(directives, nName, 'M', maxPriority)) { + attrs[nName] = trim(match[2]); + } + } + } catch (e) { + // turns out that under some circumstances IE9 throws errors when one attempts to read comment's node value. + // Just ignore it and continue. (Can't seem to reproduce in test case.) + } + break; + } + + directives.sort(byPriority); + return directives; + } + + + /** + * Once the directives have been collected, their compile functions are executed. This method + * is responsible for inlining directive templates as well as terminating the application + * of the directives if the terminal directive has been reached. + * + * @param {Array} directives Array of collected directives to execute their compile function. + * this needs to be pre-sorted by priority order. + * @param {Node} compileNode The raw DOM node to apply the compile functions to + * @param {Object} templateAttrs The shared attribute function + * @param {function(angular.Scope[, cloneAttachFn]} transcludeFn A linking function, where the + * scope argument is auto-generated to the new child of the transcluded parent scope. + * @param {JQLite} jqCollection If we are working on the root of the compile tree then this + * argument has the root jqLite array so that we can replace nodes on it. + * @returns linkFn + */ + function applyDirectivesToNode(directives, compileNode, templateAttrs, transcludeFn, jqCollection) { + var terminalPriority = -Number.MAX_VALUE, + preLinkFns = [], + postLinkFns = [], + newScopeDirective = null, + newIsolateScopeDirective = null, + templateDirective = null, + $compileNode = templateAttrs.$$element = jqLite(compileNode), + directive, + directiveName, + $template, + transcludeDirective, + childTranscludeFn = transcludeFn, + controllerDirectives, + linkFn, + directiveValue; + + // executes all directives on the current element + for(var i = 0, ii = directives.length; i < ii; i++) { + directive = directives[i]; + $template = undefined; + + if (terminalPriority > directive.priority) { + break; // prevent further processing of directives + } + + if (directiveValue = directive.scope) { + assertNoDuplicate('isolated scope', newIsolateScopeDirective, directive, $compileNode); + if (isObject(directiveValue)) { + safeAddClass($compileNode, 'ng-isolate-scope'); + newIsolateScopeDirective = directive; + } + safeAddClass($compileNode, 'ng-scope'); + newScopeDirective = newScopeDirective || directive; + } + + directiveName = directive.name; + + if (directiveValue = directive.controller) { + controllerDirectives = controllerDirectives || {}; + assertNoDuplicate("'" + directiveName + "' controller", + controllerDirectives[directiveName], directive, $compileNode); + controllerDirectives[directiveName] = directive; + } + + if (directiveValue = directive.transclude) { + assertNoDuplicate('transclusion', transcludeDirective, directive, $compileNode); + transcludeDirective = directive; + terminalPriority = directive.priority; + if (directiveValue == 'element') { + $template = jqLite(compileNode); + $compileNode = templateAttrs.$$element = + jqLite(document.createComment(' ' + directiveName + ': ' + templateAttrs[directiveName] + ' ')); + compileNode = $compileNode[0]; + replaceWith(jqCollection, jqLite($template[0]), compileNode); + childTranscludeFn = compile($template, transcludeFn, terminalPriority); + } else { + $template = jqLite(JQLiteClone(compileNode)).contents(); + $compileNode.html(''); // clear contents + childTranscludeFn = compile($template, transcludeFn); + } + } + + if ((directiveValue = directive.template)) { + assertNoDuplicate('template', templateDirective, directive, $compileNode); + templateDirective = directive; + directiveValue = denormalizeTemplate(directiveValue); + + if (directive.replace) { + $template = jqLite('
' + + trim(directiveValue) + + '
').contents(); + compileNode = $template[0]; + + if ($template.length != 1 || compileNode.nodeType !== 1) { + throw new Error(MULTI_ROOT_TEMPLATE_ERROR + directiveValue); + } + + replaceWith(jqCollection, $compileNode, compileNode); + + var newTemplateAttrs = {$attr: {}}; + + // combine directives from the original node and from the template: + // - take the array of directives for this element + // - split it into two parts, those that were already applied and those that weren't + // - collect directives from the template, add them to the second group and sort them + // - append the second group with new directives to the first group + directives = directives.concat( + collectDirectives( + compileNode, + directives.splice(i + 1, directives.length - (i + 1)), + newTemplateAttrs + ) + ); + mergeTemplateAttributes(templateAttrs, newTemplateAttrs); + + ii = directives.length; + } else { + $compileNode.html(directiveValue); + } + } + + if (directive.templateUrl) { + assertNoDuplicate('template', templateDirective, directive, $compileNode); + templateDirective = directive; + nodeLinkFn = compileTemplateUrl(directives.splice(i, directives.length - i), + nodeLinkFn, $compileNode, templateAttrs, jqCollection, directive.replace, + childTranscludeFn); + ii = directives.length; + } else if (directive.compile) { + try { + linkFn = directive.compile($compileNode, templateAttrs, childTranscludeFn); + if (isFunction(linkFn)) { + addLinkFns(null, linkFn); + } else if (linkFn) { + addLinkFns(linkFn.pre, linkFn.post); + } + } catch (e) { + $exceptionHandler(e, startingTag($compileNode)); + } + } + + if (directive.terminal) { + nodeLinkFn.terminal = true; + terminalPriority = Math.max(terminalPriority, directive.priority); + } + + } + + nodeLinkFn.scope = newScopeDirective && newScopeDirective.scope; + nodeLinkFn.transclude = transcludeDirective && childTranscludeFn; + + // might be normal or delayed nodeLinkFn depending on if templateUrl is present + return nodeLinkFn; + + //////////////////// + + function addLinkFns(pre, post) { + if (pre) { + pre.require = directive.require; + preLinkFns.push(pre); + } + if (post) { + post.require = directive.require; + postLinkFns.push(post); + } + } + + + function getControllers(require, $element) { + var value, retrievalMethod = 'data', optional = false; + if (isString(require)) { + while((value = require.charAt(0)) == '^' || value == '?') { + require = require.substr(1); + if (value == '^') { + retrievalMethod = 'inheritedData'; + } + optional = optional || value == '?'; + } + value = $element[retrievalMethod]('$' + require + 'Controller'); + if (!value && !optional) { + throw Error("No controller: " + require); + } + return value; + } else if (isArray(require)) { + value = []; + forEach(require, function(require) { + value.push(getControllers(require, $element)); + }); + } + return value; + } + + + function nodeLinkFn(childLinkFn, scope, linkNode, $rootElement, boundTranscludeFn) { + var attrs, $element, i, ii, linkFn, controller; + + if (compileNode === linkNode) { + attrs = templateAttrs; + } else { + attrs = shallowCopy(templateAttrs, new Attributes(jqLite(linkNode), templateAttrs.$attr)); + } + $element = attrs.$$element; + + if (newIsolateScopeDirective) { + var LOCAL_REGEXP = /^\s*([@=&])\s*(\w*)\s*$/; + + var parentScope = scope.$parent || scope; + + forEach(newIsolateScopeDirective.scope, function(definiton, scopeName) { + var match = definiton.match(LOCAL_REGEXP) || [], + attrName = match[2]|| scopeName, + mode = match[1], // @, =, or & + lastValue, + parentGet, parentSet; + + scope.$$isolateBindings[scopeName] = mode + attrName; + + switch (mode) { + + case '@': { + attrs.$observe(attrName, function(value) { + scope[scopeName] = value; + }); + attrs.$$observers[attrName].$$scope = parentScope; + break; + } + + case '=': { + parentGet = $parse(attrs[attrName]); + parentSet = parentGet.assign || function() { + // reset the change, or we will throw this exception on every $digest + lastValue = scope[scopeName] = parentGet(parentScope); + throw Error(NON_ASSIGNABLE_MODEL_EXPRESSION + attrs[attrName] + + ' (directive: ' + newIsolateScopeDirective.name + ')'); + }; + lastValue = scope[scopeName] = parentGet(parentScope); + scope.$watch(function parentValueWatch() { + var parentValue = parentGet(parentScope); + + if (parentValue !== scope[scopeName]) { + // we are out of sync and need to copy + if (parentValue !== lastValue) { + // parent changed and it has precedence + lastValue = scope[scopeName] = parentValue; + } else { + // if the parent can be assigned then do so + parentSet(parentScope, parentValue = lastValue = scope[scopeName]); + } + } + return parentValue; + }); + break; + } + + case '&': { + parentGet = $parse(attrs[attrName]); + scope[scopeName] = function(locals) { + return parentGet(parentScope, locals); + }; + break; + } + + default: { + throw Error('Invalid isolate scope definition for directive ' + + newIsolateScopeDirective.name + ': ' + definiton); + } + } + }); + } + + if (controllerDirectives) { + forEach(controllerDirectives, function(directive) { + var locals = { + $scope: scope, + $element: $element, + $attrs: attrs, + $transclude: boundTranscludeFn + }; + + controller = directive.controller; + if (controller == '@') { + controller = attrs[directive.name]; + } + + $element.data( + '$' + directive.name + 'Controller', + $controller(controller, locals)); + }); + } + + // PRELINKING + for(i = 0, ii = preLinkFns.length; i < ii; i++) { + try { + linkFn = preLinkFns[i]; + linkFn(scope, $element, attrs, + linkFn.require && getControllers(linkFn.require, $element)); + } catch (e) { + $exceptionHandler(e, startingTag($element)); + } + } + + // RECURSION + childLinkFn && childLinkFn(scope, linkNode.childNodes, undefined, boundTranscludeFn); + + // POSTLINKING + for(i = 0, ii = postLinkFns.length; i < ii; i++) { + try { + linkFn = postLinkFns[i]; + linkFn(scope, $element, attrs, + linkFn.require && getControllers(linkFn.require, $element)); + } catch (e) { + $exceptionHandler(e, startingTag($element)); + } + } + } + } + + + /** + * looks up the directive and decorates it with exception handling and proper parameters. We + * call this the boundDirective. + * + * @param {string} name name of the directive to look up. + * @param {string} location The directive must be found in specific format. + * String containing any of theses characters: + * + * * `E`: element name + * * `A': attribute + * * `C`: class + * * `M`: comment + * @returns true if directive was added. + */ + function addDirective(tDirectives, name, location, maxPriority) { + var match = false; + if (hasDirectives.hasOwnProperty(name)) { + for(var directive, directives = $injector.get(name + Suffix), + i = 0, ii = directives.length; i directive.priority) && + directive.restrict.indexOf(location) != -1) { + tDirectives.push(directive); + match = true; + } + } catch(e) { $exceptionHandler(e); } + } + } + return match; + } + + + /** + * When the element is replaced with HTML template then the new attributes + * on the template need to be merged with the existing attributes in the DOM. + * The desired effect is to have both of the attributes present. + * + * @param {object} dst destination attributes (original DOM) + * @param {object} src source attributes (from the directive template) + */ + function mergeTemplateAttributes(dst, src) { + var srcAttr = src.$attr, + dstAttr = dst.$attr, + $element = dst.$$element; + + // reapply the old attributes to the new element + forEach(dst, function(value, key) { + if (key.charAt(0) != '$') { + if (src[key]) { + value += (key === 'style' ? ';' : ' ') + src[key]; + } + dst.$set(key, value, true, srcAttr[key]); + } + }); + + // copy the new attributes on the old attrs object + forEach(src, function(value, key) { + if (key == 'class') { + safeAddClass($element, value); + dst['class'] = (dst['class'] ? dst['class'] + ' ' : '') + value; + } else if (key == 'style') { + $element.attr('style', $element.attr('style') + ';' + value); + } else if (key.charAt(0) != '$' && !dst.hasOwnProperty(key)) { + dst[key] = value; + dstAttr[key] = srcAttr[key]; + } + }); + } + + + function compileTemplateUrl(directives, beforeTemplateNodeLinkFn, $compileNode, tAttrs, + $rootElement, replace, childTranscludeFn) { + var linkQueue = [], + afterTemplateNodeLinkFn, + afterTemplateChildLinkFn, + beforeTemplateCompileNode = $compileNode[0], + origAsyncDirective = directives.shift(), + // The fact that we have to copy and patch the directive seems wrong! + derivedSyncDirective = extend({}, origAsyncDirective, { + controller: null, templateUrl: null, transclude: null, scope: null + }); + + $compileNode.html(''); + + $http.get(origAsyncDirective.templateUrl, {cache: $templateCache}). + success(function(content) { + var compileNode, tempTemplateAttrs, $template; + + content = denormalizeTemplate(content); + + if (replace) { + $template = jqLite('
' + trim(content) + '
').contents(); + compileNode = $template[0]; + + if ($template.length != 1 || compileNode.nodeType !== 1) { + throw new Error(MULTI_ROOT_TEMPLATE_ERROR + content); + } + + tempTemplateAttrs = {$attr: {}}; + replaceWith($rootElement, $compileNode, compileNode); + collectDirectives(compileNode, directives, tempTemplateAttrs); + mergeTemplateAttributes(tAttrs, tempTemplateAttrs); + } else { + compileNode = beforeTemplateCompileNode; + $compileNode.html(content); + } + + directives.unshift(derivedSyncDirective); + afterTemplateNodeLinkFn = applyDirectivesToNode(directives, compileNode, tAttrs, childTranscludeFn); + afterTemplateChildLinkFn = compileNodes($compileNode[0].childNodes, childTranscludeFn); + + + while(linkQueue.length) { + var controller = linkQueue.pop(), + linkRootElement = linkQueue.pop(), + beforeTemplateLinkNode = linkQueue.pop(), + scope = linkQueue.pop(), + linkNode = compileNode; + + if (beforeTemplateLinkNode !== beforeTemplateCompileNode) { + // it was cloned therefore we have to clone as well. + linkNode = JQLiteClone(compileNode); + replaceWith(linkRootElement, jqLite(beforeTemplateLinkNode), linkNode); + } + + afterTemplateNodeLinkFn(function() { + beforeTemplateNodeLinkFn(afterTemplateChildLinkFn, scope, linkNode, $rootElement, controller); + }, scope, linkNode, $rootElement, controller); + } + linkQueue = null; + }). + error(function(response, code, headers, config) { + throw Error('Failed to load template: ' + config.url); + }); + + return function delayedNodeLinkFn(ignoreChildLinkFn, scope, node, rootElement, controller) { + if (linkQueue) { + linkQueue.push(scope); + linkQueue.push(node); + linkQueue.push(rootElement); + linkQueue.push(controller); + } else { + afterTemplateNodeLinkFn(function() { + beforeTemplateNodeLinkFn(afterTemplateChildLinkFn, scope, node, rootElement, controller); + }, scope, node, rootElement, controller); + } + }; + } + + + /** + * Sorting function for bound directives. + */ + function byPriority(a, b) { + return b.priority - a.priority; + } + + + function assertNoDuplicate(what, previousDirective, directive, element) { + if (previousDirective) { + throw Error('Multiple directives [' + previousDirective.name + ', ' + + directive.name + '] asking for ' + what + ' on: ' + startingTag(element)); + } + } + + + function addTextInterpolateDirective(directives, text) { + var interpolateFn = $interpolate(text, true); + if (interpolateFn) { + directives.push({ + priority: 0, + compile: valueFn(function textInterpolateLinkFn(scope, node) { + var parent = node.parent(), + bindings = parent.data('$binding') || []; + bindings.push(interpolateFn); + safeAddClass(parent.data('$binding', bindings), 'ng-binding'); + scope.$watch(interpolateFn, function interpolateFnWatchAction(value) { + node[0].nodeValue = value; + }); + }) + }); + } + } + + + function addAttrInterpolateDirective(node, directives, value, name) { + var interpolateFn = $interpolate(value, true); + + // no interpolation found -> ignore + if (!interpolateFn) return; + + + directives.push({ + priority: 100, + compile: valueFn(function attrInterpolateLinkFn(scope, element, attr) { + var $$observers = (attr.$$observers || (attr.$$observers = {})); + + if (name === 'class') { + // we need to interpolate classes again, in the case the element was replaced + // and therefore the two class attrs got merged - we want to interpolate the result + interpolateFn = $interpolate(attr[name], true); + } + + attr[name] = undefined; + ($$observers[name] || ($$observers[name] = [])).$$inter = true; + (attr.$$observers && attr.$$observers[name].$$scope || scope). + $watch(interpolateFn, function interpolateFnWatchAction(value) { + attr.$set(name, value); + }); + }) + }); + } + + + /** + * This is a special jqLite.replaceWith, which can replace items which + * have no parents, provided that the containing jqLite collection is provided. + * + * @param {JqLite=} $rootElement The root of the compile tree. Used so that we can replace nodes + * in the root of the tree. + * @param {JqLite} $element The jqLite element which we are going to replace. We keep the shell, + * but replace its DOM node reference. + * @param {Node} newNode The new DOM node. + */ + function replaceWith($rootElement, $element, newNode) { + var oldNode = $element[0], + parent = oldNode.parentNode, + i, ii; + + if ($rootElement) { + for(i = 0, ii = $rootElement.length; i < ii; i++) { + if ($rootElement[i] == oldNode) { + $rootElement[i] = newNode; + break; + } + } + } + + if (parent) { + parent.replaceChild(newNode, oldNode); + } + + newNode[jqLite.expando] = oldNode[jqLite.expando]; + $element[0] = newNode; + } + }]; +} + +var PREFIX_REGEXP = /^(x[\:\-_]|data[\:\-_])/i; +/** + * Converts all accepted directives format into proper directive name. + * All of these will become 'myDirective': + * my:DiRective + * my-directive + * x-my-directive + * data-my:directive + * + * Also there is special case for Moz prefix starting with upper case letter. + * @param name Name to normalize + */ +function directiveNormalize(name) { + return camelCase(name.replace(PREFIX_REGEXP, '')); +} + +/** + * @ngdoc object + * @name ng.$compile.directive.Attributes + * @description + * + * A shared object between directive compile / linking functions which contains normalized DOM element + * attributes. The the values reflect current binding state `{{ }}`. The normalization is needed + * since all of these are treated as equivalent in Angular: + * + * + */ + +/** + * @ngdoc property + * @name ng.$compile.directive.Attributes#$attr + * @propertyOf ng.$compile.directive.Attributes + * @returns {object} A map of DOM element attribute names to the normalized name. This is + * needed to do reverse lookup from normalized name back to actual name. + */ + + +/** + * @ngdoc function + * @name ng.$compile.directive.Attributes#$set + * @methodOf ng.$compile.directive.Attributes + * @function + * + * @description + * Set DOM element attribute value. + * + * + * @param {string} name Normalized element attribute name of the property to modify. The name is + * revers translated using the {@link ng.$compile.directive.Attributes#$attr $attr} + * property to the original name. + * @param {string} value Value to set the attribute to. + */ + + + +/** + * Closure compiler type information + */ + +function nodesetLinkingFn( + /* angular.Scope */ scope, + /* NodeList */ nodeList, + /* Element */ rootElement, + /* function(Function) */ boundTranscludeFn +){} + +function directiveLinkingFn( + /* nodesetLinkingFn */ nodesetLinkingFn, + /* angular.Scope */ scope, + /* Node */ node, + /* Element */ rootElement, + /* function(Function) */ boundTranscludeFn +){} + +/** + * @ngdoc object + * @name ng.$controllerProvider + * @description + * The {@link ng.$controller $controller service} is used by Angular to create new + * controllers. + * + * This provider allows controller registration via the + * {@link ng.$controllerProvider#register register} method. + */ +function $ControllerProvider() { + var controllers = {}; + + + /** + * @ngdoc function + * @name ng.$controllerProvider#register + * @methodOf ng.$controllerProvider + * @param {string} name Controller name + * @param {Function|Array} constructor Controller constructor fn (optionally decorated with DI + * annotations in the array notation). + */ + this.register = function(name, constructor) { + if (isObject(name)) { + extend(controllers, name) + } else { + controllers[name] = constructor; + } + }; + + + this.$get = ['$injector', '$window', function($injector, $window) { + + /** + * @ngdoc function + * @name ng.$controller + * @requires $injector + * + * @param {Function|string} constructor If called with a function then it's considered to be the + * controller constructor function. Otherwise it's considered to be a string which is used + * to retrieve the controller constructor using the following steps: + * + * * check if a controller with given name is registered via `$controllerProvider` + * * check if evaluating the string on the current scope returns a constructor + * * check `window[constructor]` on the global `window` object + * + * @param {Object} locals Injection locals for Controller. + * @return {Object} Instance of given controller. + * + * @description + * `$controller` service is responsible for instantiating controllers. + * + * It's just a simple call to {@link AUTO.$injector $injector}, but extracted into + * a service, so that one can override this service with {@link https://gist.github.com/1649788 + * BC version}. + */ + return function(constructor, locals) { + if(isString(constructor)) { + var name = constructor; + constructor = controllers.hasOwnProperty(name) + ? controllers[name] + : getter(locals.$scope, name, true) || getter($window, name, true); + + assertArgFn(constructor, name, true); + } + + return $injector.instantiate(constructor, locals); + }; + }]; +} + +/** + * @ngdoc object + * @name ng.$document + * @requires $window + * + * @description + * A {@link angular.element jQuery (lite)}-wrapped reference to the browser's `window.document` + * element. + */ +function $DocumentProvider(){ + this.$get = ['$window', function(window){ + return jqLite(window.document); + }]; +} + +/** + * @ngdoc function + * @name ng.$exceptionHandler + * @requires $log + * + * @description + * Any uncaught exception in angular expressions is delegated to this service. + * The default implementation simply delegates to `$log.error` which logs it into + * the browser console. + * + * In unit tests, if `angular-mocks.js` is loaded, this service is overridden by + * {@link ngMock.$exceptionHandler mock $exceptionHandler} which aids in testing. + * + * @param {Error} exception Exception associated with the error. + * @param {string=} cause optional information about the context in which + * the error was thrown. + * + */ +function $ExceptionHandlerProvider() { + this.$get = ['$log', function($log) { + return function(exception, cause) { + $log.error.apply($log, arguments); + }; + }]; +} + +/** + * @ngdoc object + * @name ng.$interpolateProvider + * @function + * + * @description + * + * Used for configuring the interpolation markup. Defaults to `{{` and `}}`. + */ +function $InterpolateProvider() { + var startSymbol = '{{'; + var endSymbol = '}}'; + + /** + * @ngdoc method + * @name ng.$interpolateProvider#startSymbol + * @methodOf ng.$interpolateProvider + * @description + * Symbol to denote start of expression in the interpolated string. Defaults to `{{`. + * + * @param {string=} value new value to set the starting symbol to. + * @returns {string|self} Returns the symbol when used as getter and self if used as setter. + */ + this.startSymbol = function(value){ + if (value) { + startSymbol = value; + return this; + } else { + return startSymbol; + } + }; + + /** + * @ngdoc method + * @name ng.$interpolateProvider#endSymbol + * @methodOf ng.$interpolateProvider + * @description + * Symbol to denote the end of expression in the interpolated string. Defaults to `}}`. + * + * @param {string=} value new value to set the ending symbol to. + * @returns {string|self} Returns the symbol when used as getter and self if used as setter. + */ + this.endSymbol = function(value){ + if (value) { + endSymbol = value; + return this; + } else { + return endSymbol; + } + }; + + + this.$get = ['$parse', function($parse) { + var startSymbolLength = startSymbol.length, + endSymbolLength = endSymbol.length; + + /** + * @ngdoc function + * @name ng.$interpolate + * @function + * + * @requires $parse + * + * @description + * + * Compiles a string with markup into an interpolation function. This service is used by the + * HTML {@link ng.$compile $compile} service for data binding. See + * {@link ng.$interpolateProvider $interpolateProvider} for configuring the + * interpolation markup. + * + * +
+         var $interpolate = ...; // injected
+         var exp = $interpolate('Hello {{name}}!');
+         expect(exp({name:'Angular'}).toEqual('Hello Angular!');
+       
+ * + * + * @param {string} text The text with markup to interpolate. + * @param {boolean=} mustHaveExpression if set to true then the interpolation string must have + * embedded expression in order to return an interpolation function. Strings with no + * embedded expression will return null for the interpolation function. + * @returns {function(context)} an interpolation function which is used to compute the interpolated + * string. The function has these parameters: + * + * * `context`: an object against which any expressions embedded in the strings are evaluated + * against. + * + */ + function $interpolate(text, mustHaveExpression) { + var startIndex, + endIndex, + index = 0, + parts = [], + length = text.length, + hasInterpolation = false, + fn, + exp, + concat = []; + + while(index < length) { + if ( ((startIndex = text.indexOf(startSymbol, index)) != -1) && + ((endIndex = text.indexOf(endSymbol, startIndex + startSymbolLength)) != -1) ) { + (index != startIndex) && parts.push(text.substring(index, startIndex)); + parts.push(fn = $parse(exp = text.substring(startIndex + startSymbolLength, endIndex))); + fn.exp = exp; + index = endIndex + endSymbolLength; + hasInterpolation = true; + } else { + // we did not find anything, so we have to add the remainder to the parts array + (index != length) && parts.push(text.substring(index)); + index = length; + } + } + + if (!(length = parts.length)) { + // we added, nothing, must have been an empty string. + parts.push(''); + length = 1; + } + + if (!mustHaveExpression || hasInterpolation) { + concat.length = length; + fn = function(context) { + for(var i = 0, ii = length, part; i html5 url + } else { + return composeProtocolHostPort(match.protocol, match.host, match.port) + + pathPrefixFromBase(basePath) + match.hash.substr(hashPrefix.length); + } +} + + +function convertToHashbangUrl(url, basePath, hashPrefix) { + var match = matchUrl(url); + + // already hashbang url + if (decodeURIComponent(match.path) == basePath && !isUndefined(match.hash) && + match.hash.indexOf(hashPrefix) === 0) { + return url; + // convert html5 url -> hashbang url + } else { + var search = match.search && '?' + match.search || '', + hash = match.hash && '#' + match.hash || '', + pathPrefix = pathPrefixFromBase(basePath), + path = match.path.substr(pathPrefix.length); + + if (match.path.indexOf(pathPrefix) !== 0) { + throw Error('Invalid url "' + url + '", missing path prefix "' + pathPrefix + '" !'); + } + + return composeProtocolHostPort(match.protocol, match.host, match.port) + basePath + + '#' + hashPrefix + path + search + hash; + } +} + + +/** + * LocationUrl represents an url + * This object is exposed as $location service when HTML5 mode is enabled and supported + * + * @constructor + * @param {string} url HTML5 url + * @param {string} pathPrefix + */ +function LocationUrl(url, pathPrefix, appBaseUrl) { + pathPrefix = pathPrefix || ''; + + /** + * Parse given html5 (regular) url string into properties + * @param {string} newAbsoluteUrl HTML5 url + * @private + */ + this.$$parse = function(newAbsoluteUrl) { + var match = matchUrl(newAbsoluteUrl, this); + + if (match.path.indexOf(pathPrefix) !== 0) { + throw Error('Invalid url "' + newAbsoluteUrl + '", missing path prefix "' + pathPrefix + '" !'); + } + + this.$$path = decodeURIComponent(match.path.substr(pathPrefix.length)); + this.$$search = parseKeyValue(match.search); + this.$$hash = match.hash && decodeURIComponent(match.hash) || ''; + + this.$$compose(); + }; + + /** + * Compose url and update `absUrl` property + * @private + */ + this.$$compose = function() { + var search = toKeyValue(this.$$search), + hash = this.$$hash ? '#' + encodeUriSegment(this.$$hash) : ''; + + this.$$url = encodePath(this.$$path) + (search ? '?' + search : '') + hash; + this.$$absUrl = composeProtocolHostPort(this.$$protocol, this.$$host, this.$$port) + + pathPrefix + this.$$url; + }; + + + this.$$rewriteAppUrl = function(absoluteLinkUrl) { + if(absoluteLinkUrl.indexOf(appBaseUrl) == 0) { + return absoluteLinkUrl; + } + } + + + this.$$parse(url); +} + + +/** + * LocationHashbangUrl represents url + * This object is exposed as $location service when html5 history api is disabled or not supported + * + * @constructor + * @param {string} url Legacy url + * @param {string} hashPrefix Prefix for hash part (containing path and search) + */ +function LocationHashbangUrl(url, hashPrefix, appBaseUrl) { + var basePath; + + /** + * Parse given hashbang url into properties + * @param {string} url Hashbang url + * @private + */ + this.$$parse = function(url) { + var match = matchUrl(url, this); + + + if (match.hash && match.hash.indexOf(hashPrefix) !== 0) { + throw Error('Invalid url "' + url + '", missing hash prefix "' + hashPrefix + '" !'); + } + + basePath = match.path + (match.search ? '?' + match.search : ''); + match = HASH_MATCH.exec((match.hash || '').substr(hashPrefix.length)); + if (match[1]) { + this.$$path = (match[1].charAt(0) == '/' ? '' : '/') + decodeURIComponent(match[1]); + } else { + this.$$path = ''; + } + + this.$$search = parseKeyValue(match[3]); + this.$$hash = match[5] && decodeURIComponent(match[5]) || ''; + + this.$$compose(); + }; + + /** + * Compose hashbang url and update `absUrl` property + * @private + */ + this.$$compose = function() { + var search = toKeyValue(this.$$search), + hash = this.$$hash ? '#' + encodeUriSegment(this.$$hash) : ''; + + this.$$url = encodePath(this.$$path) + (search ? '?' + search : '') + hash; + this.$$absUrl = composeProtocolHostPort(this.$$protocol, this.$$host, this.$$port) + + basePath + (this.$$url ? '#' + hashPrefix + this.$$url : ''); + }; + + this.$$rewriteAppUrl = function(absoluteLinkUrl) { + if(absoluteLinkUrl.indexOf(appBaseUrl) == 0) { + return absoluteLinkUrl; + } + } + + + this.$$parse(url); +} + + +LocationUrl.prototype = { + + /** + * Has any change been replacing ? + * @private + */ + $$replace: false, + + /** + * @ngdoc method + * @name ng.$location#absUrl + * @methodOf ng.$location + * + * @description + * This method is getter only. + * + * Return full url representation with all segments encoded according to rules specified in + * {@link http://www.ietf.org/rfc/rfc3986.txt RFC 3986}. + * + * @return {string} full url + */ + absUrl: locationGetter('$$absUrl'), + + /** + * @ngdoc method + * @name ng.$location#url + * @methodOf ng.$location + * + * @description + * This method is getter / setter. + * + * Return url (e.g. `/path?a=b#hash`) when called without any parameter. + * + * Change path, search and hash, when called with parameter and return `$location`. + * + * @param {string=} url New url without base prefix (e.g. `/path?a=b#hash`) + * @return {string} url + */ + url: function(url, replace) { + if (isUndefined(url)) + return this.$$url; + + var match = PATH_MATCH.exec(url); + if (match[1]) this.path(decodeURIComponent(match[1])); + if (match[2] || match[1]) this.search(match[3] || ''); + this.hash(match[5] || '', replace); + + return this; + }, + + /** + * @ngdoc method + * @name ng.$location#protocol + * @methodOf ng.$location + * + * @description + * This method is getter only. + * + * Return protocol of current url. + * + * @return {string} protocol of current url + */ + protocol: locationGetter('$$protocol'), + + /** + * @ngdoc method + * @name ng.$location#host + * @methodOf ng.$location + * + * @description + * This method is getter only. + * + * Return host of current url. + * + * @return {string} host of current url. + */ + host: locationGetter('$$host'), + + /** + * @ngdoc method + * @name ng.$location#port + * @methodOf ng.$location + * + * @description + * This method is getter only. + * + * Return port of current url. + * + * @return {Number} port + */ + port: locationGetter('$$port'), + + /** + * @ngdoc method + * @name ng.$location#path + * @methodOf ng.$location + * + * @description + * This method is getter / setter. + * + * Return path of current url when called without any parameter. + * + * Change path when called with parameter and return `$location`. + * + * Note: Path should always begin with forward slash (/), this method will add the forward slash + * if it is missing. + * + * @param {string=} path New path + * @return {string} path + */ + path: locationGetterSetter('$$path', function(path) { + return path.charAt(0) == '/' ? path : '/' + path; + }), + + /** + * @ngdoc method + * @name ng.$location#search + * @methodOf ng.$location + * + * @description + * This method is getter / setter. + * + * Return search part (as object) of current url when called without any parameter. + * + * Change search part when called with parameter and return `$location`. + * + * @param {string|object=} search New search params - string or hash object + * @param {string=} paramValue If `search` is a string, then `paramValue` will override only a + * single search parameter. If the value is `null`, the parameter will be deleted. + * + * @return {string} search + */ + search: function(search, paramValue) { + if (isUndefined(search)) + return this.$$search; + + if (isDefined(paramValue)) { + if (paramValue === null) { + delete this.$$search[search]; + } else { + this.$$search[search] = paramValue; + } + } else { + this.$$search = isString(search) ? parseKeyValue(search) : search; + } + + this.$$compose(); + return this; + }, + + /** + * @ngdoc method + * @name ng.$location#hash + * @methodOf ng.$location + * + * @description + * This method is getter / setter. + * + * Return hash fragment when called without any parameter. + * + * Change hash fragment when called with parameter and return `$location`. + * + * @param {string=} hash New hash fragment + * @return {string} hash + */ + hash: locationGetterSetter('$$hash', identity), + + /** + * @ngdoc method + * @name ng.$location#replace + * @methodOf ng.$location + * + * @description + * If called, all changes to $location during current `$digest` will be replacing current history + * record, instead of adding new one. + */ + replace: function() { + this.$$replace = true; + return this; + } +}; + +LocationHashbangUrl.prototype = inherit(LocationUrl.prototype); + +function LocationHashbangInHtml5Url(url, hashPrefix, appBaseUrl, baseExtra) { + LocationHashbangUrl.apply(this, arguments); + + + this.$$rewriteAppUrl = function(absoluteLinkUrl) { + if (absoluteLinkUrl.indexOf(appBaseUrl) == 0) { + return appBaseUrl + baseExtra + '#' + hashPrefix + absoluteLinkUrl.substr(appBaseUrl.length); + } + } +} + +LocationHashbangInHtml5Url.prototype = inherit(LocationHashbangUrl.prototype); + +function locationGetter(property) { + return function() { + return this[property]; + }; +} + + +function locationGetterSetter(property, preprocess) { + return function(value) { + if (isUndefined(value)) + return this[property]; + + this[property] = preprocess(value); + this.$$compose(); + + return this; + }; +} + + +/** + * @ngdoc object + * @name ng.$location + * + * @requires $browser + * @requires $sniffer + * @requires $rootElement + * + * @description + * The $location service parses the URL in the browser address bar (based on the + * {@link https://developer.mozilla.org/en/window.location window.location}) and makes the URL + * available to your application. Changes to the URL in the address bar are reflected into + * $location service and changes to $location are reflected into the browser address bar. + * + * **The $location service:** + * + * - Exposes the current URL in the browser address bar, so you can + * - Watch and observe the URL. + * - Change the URL. + * - Synchronizes the URL with the browser when the user + * - Changes the address bar. + * - Clicks the back or forward button (or clicks a History link). + * - Clicks on a link. + * - Represents the URL object as a set of methods (protocol, host, port, path, search, hash). + * + * For more information see {@link guide/dev_guide.services.$location Developer Guide: Angular + * Services: Using $location} + */ + +/** + * @ngdoc object + * @name ng.$locationProvider + * @description + * Use the `$locationProvider` to configure how the application deep linking paths are stored. + */ +function $LocationProvider(){ + var hashPrefix = '', + html5Mode = false; + + /** + * @ngdoc property + * @name ng.$locationProvider#hashPrefix + * @methodOf ng.$locationProvider + * @description + * @param {string=} prefix Prefix for hash part (containing path and search) + * @returns {*} current value if used as getter or itself (chaining) if used as setter + */ + this.hashPrefix = function(prefix) { + if (isDefined(prefix)) { + hashPrefix = prefix; + return this; + } else { + return hashPrefix; + } + }; + + /** + * @ngdoc property + * @name ng.$locationProvider#html5Mode + * @methodOf ng.$locationProvider + * @description + * @param {string=} mode Use HTML5 strategy if available. + * @returns {*} current value if used as getter or itself (chaining) if used as setter + */ + this.html5Mode = function(mode) { + if (isDefined(mode)) { + html5Mode = mode; + return this; + } else { + return html5Mode; + } + }; + + this.$get = ['$rootScope', '$browser', '$sniffer', '$rootElement', + function( $rootScope, $browser, $sniffer, $rootElement) { + var $location, + basePath, + pathPrefix, + initUrl = $browser.url(), + initUrlParts = matchUrl(initUrl), + appBaseUrl; + + if (html5Mode) { + basePath = $browser.baseHref() || '/'; + pathPrefix = pathPrefixFromBase(basePath); + appBaseUrl = + composeProtocolHostPort(initUrlParts.protocol, initUrlParts.host, initUrlParts.port) + + pathPrefix + '/'; + + if ($sniffer.history) { + $location = new LocationUrl( + convertToHtml5Url(initUrl, basePath, hashPrefix), + pathPrefix, appBaseUrl); + } else { + $location = new LocationHashbangInHtml5Url( + convertToHashbangUrl(initUrl, basePath, hashPrefix), + hashPrefix, appBaseUrl, basePath.substr(pathPrefix.length + 1)); + } + } else { + appBaseUrl = + composeProtocolHostPort(initUrlParts.protocol, initUrlParts.host, initUrlParts.port) + + (initUrlParts.path || '') + + (initUrlParts.search ? ('?' + initUrlParts.search) : '') + + '#' + hashPrefix + '/'; + + $location = new LocationHashbangUrl(initUrl, hashPrefix, appBaseUrl); + } + + $rootElement.bind('click', function(event) { + // TODO(vojta): rewrite link when opening in new tab/window (in legacy browser) + // currently we open nice url link and redirect then + + if (event.ctrlKey || event.metaKey || event.which == 2) return; + + var elm = jqLite(event.target); + + // traverse the DOM up to find first A tag + while (lowercase(elm[0].nodeName) !== 'a') { + // ignore rewriting if no A tag (reached root element, or no parent - removed from document) + if (elm[0] === $rootElement[0] || !(elm = elm.parent())[0]) return; + } + + var absHref = elm.prop('href'), + rewrittenUrl = $location.$$rewriteAppUrl(absHref); + + if (absHref && !elm.attr('target') && rewrittenUrl) { + // update location manually + $location.$$parse(rewrittenUrl); + $rootScope.$apply(); + event.preventDefault(); + // hack to work around FF6 bug 684208 when scenario runner clicks on links + window.angular['ff-684208-preventDefault'] = true; + } + }); + + + // rewrite hashbang url <> html5 url + if ($location.absUrl() != initUrl) { + $browser.url($location.absUrl(), true); + } + + // update $location when $browser url changes + $browser.onUrlChange(function(newUrl) { + if ($location.absUrl() != newUrl) { + if ($rootScope.$broadcast('$locationChangeStart', newUrl, $location.absUrl()).defaultPrevented) { + $browser.url($location.absUrl()); + return; + } + $rootScope.$evalAsync(function() { + var oldUrl = $location.absUrl(); + + $location.$$parse(newUrl); + afterLocationChange(oldUrl); + }); + if (!$rootScope.$$phase) $rootScope.$digest(); + } + }); + + // update browser + var changeCounter = 0; + $rootScope.$watch(function $locationWatch() { + var oldUrl = $browser.url(); + var currentReplace = $location.$$replace; + + if (!changeCounter || oldUrl != $location.absUrl()) { + changeCounter++; + $rootScope.$evalAsync(function() { + if ($rootScope.$broadcast('$locationChangeStart', $location.absUrl(), oldUrl). + defaultPrevented) { + $location.$$parse(oldUrl); + } else { + $browser.url($location.absUrl(), currentReplace); + afterLocationChange(oldUrl); + } + }); + } + $location.$$replace = false; + + return changeCounter; + }); + + return $location; + + function afterLocationChange(oldUrl) { + $rootScope.$broadcast('$locationChangeSuccess', $location.absUrl(), oldUrl); + } +}]; +} + +/** + * @ngdoc object + * @name ng.$log + * @requires $window + * + * @description + * Simple service for logging. Default implementation writes the message + * into the browser's console (if present). + * + * The main purpose of this service is to simplify debugging and troubleshooting. + * + * @example + + + function LogCtrl($scope, $log) { + $scope.$log = $log; + $scope.message = 'Hello World!'; + } + + +
+

Reload this page with open console, enter text and hit the log button...

+ Message: + + + + + +
+
+
+ */ + +function $LogProvider(){ + this.$get = ['$window', function($window){ + return { + /** + * @ngdoc method + * @name ng.$log#log + * @methodOf ng.$log + * + * @description + * Write a log message + */ + log: consoleLog('log'), + + /** + * @ngdoc method + * @name ng.$log#warn + * @methodOf ng.$log + * + * @description + * Write a warning message + */ + warn: consoleLog('warn'), + + /** + * @ngdoc method + * @name ng.$log#info + * @methodOf ng.$log + * + * @description + * Write an information message + */ + info: consoleLog('info'), + + /** + * @ngdoc method + * @name ng.$log#error + * @methodOf ng.$log + * + * @description + * Write an error message + */ + error: consoleLog('error') + }; + + function formatError(arg) { + if (arg instanceof Error) { + if (arg.stack) { + arg = (arg.message && arg.stack.indexOf(arg.message) === -1) + ? 'Error: ' + arg.message + '\n' + arg.stack + : arg.stack; + } else if (arg.sourceURL) { + arg = arg.message + '\n' + arg.sourceURL + ':' + arg.line; + } + } + return arg; + } + + function consoleLog(type) { + var console = $window.console || {}, + logFn = console[type] || console.log || noop; + + if (logFn.apply) { + return function() { + var args = []; + forEach(arguments, function(arg) { + args.push(formatError(arg)); + }); + return logFn.apply(console, args); + }; + } + + // we are IE which either doesn't have window.console => this is noop and we do nothing, + // or we are IE where console.log doesn't have apply so we log at least first 2 args + return function(arg1, arg2) { + logFn(arg1, arg2); + } + } + }]; +} + +var OPERATORS = { + 'null':function(){return null;}, + 'true':function(){return true;}, + 'false':function(){return false;}, + undefined:noop, + '+':function(self, locals, a,b){ + a=a(self, locals); b=b(self, locals); + if (isDefined(a)) { + if (isDefined(b)) { + return a + b; + } + return a; + } + return isDefined(b)?b:undefined;}, + '-':function(self, locals, a,b){a=a(self, locals); b=b(self, locals); return (isDefined(a)?a:0)-(isDefined(b)?b:0);}, + '*':function(self, locals, a,b){return a(self, locals)*b(self, locals);}, + '/':function(self, locals, a,b){return a(self, locals)/b(self, locals);}, + '%':function(self, locals, a,b){return a(self, locals)%b(self, locals);}, + '^':function(self, locals, a,b){return a(self, locals)^b(self, locals);}, + '=':noop, + '==':function(self, locals, a,b){return a(self, locals)==b(self, locals);}, + '!=':function(self, locals, a,b){return a(self, locals)!=b(self, locals);}, + '<':function(self, locals, a,b){return a(self, locals)':function(self, locals, a,b){return a(self, locals)>b(self, locals);}, + '<=':function(self, locals, a,b){return a(self, locals)<=b(self, locals);}, + '>=':function(self, locals, a,b){return a(self, locals)>=b(self, locals);}, + '&&':function(self, locals, a,b){return a(self, locals)&&b(self, locals);}, + '||':function(self, locals, a,b){return a(self, locals)||b(self, locals);}, + '&':function(self, locals, a,b){return a(self, locals)&b(self, locals);}, +// '|':function(self, locals, a,b){return a|b;}, + '|':function(self, locals, a,b){return b(self, locals)(self, locals, a(self, locals));}, + '!':function(self, locals, a){return !a(self, locals);} +}; +var ESCAPE = {"n":"\n", "f":"\f", "r":"\r", "t":"\t", "v":"\v", "'":"'", '"':'"'}; + +function lex(text, csp){ + var tokens = [], + token, + index = 0, + json = [], + ch, + lastCh = ':'; // can start regexp + + while (index < text.length) { + ch = text.charAt(index); + if (is('"\'')) { + readString(ch); + } else if (isNumber(ch) || is('.') && isNumber(peek())) { + readNumber(); + } else if (isIdent(ch)) { + readIdent(); + // identifiers can only be if the preceding char was a { or , + if (was('{,') && json[0]=='{' && + (token=tokens[tokens.length-1])) { + token.json = token.text.indexOf('.') == -1; + } + } else if (is('(){}[].,;:')) { + tokens.push({ + index:index, + text:ch, + json:(was(':[,') && is('{[')) || is('}]:,') + }); + if (is('{[')) json.unshift(ch); + if (is('}]')) json.shift(); + index++; + } else if (isWhitespace(ch)) { + index++; + continue; + } else { + var ch2 = ch + peek(), + fn = OPERATORS[ch], + fn2 = OPERATORS[ch2]; + if (fn2) { + tokens.push({index:index, text:ch2, fn:fn2}); + index += 2; + } else if (fn) { + tokens.push({index:index, text:ch, fn:fn, json: was('[,:') && is('+-')}); + index += 1; + } else { + throwError("Unexpected next character ", index, index+1); + } + } + lastCh = ch; + } + return tokens; + + function is(chars) { + return chars.indexOf(ch) != -1; + } + + function was(chars) { + return chars.indexOf(lastCh) != -1; + } + + function peek() { + return index + 1 < text.length ? text.charAt(index + 1) : false; + } + function isNumber(ch) { + return '0' <= ch && ch <= '9'; + } + function isWhitespace(ch) { + return ch == ' ' || ch == '\r' || ch == '\t' || + ch == '\n' || ch == '\v' || ch == '\u00A0'; // IE treats non-breaking space as \u00A0 + } + function isIdent(ch) { + return 'a' <= ch && ch <= 'z' || + 'A' <= ch && ch <= 'Z' || + '_' == ch || ch == '$'; + } + function isExpOperator(ch) { + return ch == '-' || ch == '+' || isNumber(ch); + } + + function throwError(error, start, end) { + end = end || index; + throw Error("Lexer Error: " + error + " at column" + + (isDefined(start) + ? "s " + start + "-" + index + " [" + text.substring(start, end) + "]" + : " " + end) + + " in expression [" + text + "]."); + } + + function readNumber() { + var number = ""; + var start = index; + while (index < text.length) { + var ch = lowercase(text.charAt(index)); + if (ch == '.' || isNumber(ch)) { + number += ch; + } else { + var peekCh = peek(); + if (ch == 'e' && isExpOperator(peekCh)) { + number += ch; + } else if (isExpOperator(ch) && + peekCh && isNumber(peekCh) && + number.charAt(number.length - 1) == 'e') { + number += ch; + } else if (isExpOperator(ch) && + (!peekCh || !isNumber(peekCh)) && + number.charAt(number.length - 1) == 'e') { + throwError('Invalid exponent'); + } else { + break; + } + } + index++; + } + number = 1 * number; + tokens.push({index:start, text:number, json:true, + fn:function() {return number;}}); + } + function readIdent() { + var ident = "", + start = index, + lastDot, peekIndex, methodName, ch; + + while (index < text.length) { + ch = text.charAt(index); + if (ch == '.' || isIdent(ch) || isNumber(ch)) { + if (ch == '.') lastDot = index; + ident += ch; + } else { + break; + } + index++; + } + + //check if this is not a method invocation and if it is back out to last dot + if (lastDot) { + peekIndex = index; + while(peekIndex < text.length) { + ch = text.charAt(peekIndex); + if (ch == '(') { + methodName = ident.substr(lastDot - start + 1); + ident = ident.substr(0, lastDot - start); + index = peekIndex; + break; + } + if(isWhitespace(ch)) { + peekIndex++; + } else { + break; + } + } + } + + + var token = { + index:start, + text:ident + }; + + if (OPERATORS.hasOwnProperty(ident)) { + token.fn = token.json = OPERATORS[ident]; + } else { + var getter = getterFn(ident, csp); + token.fn = extend(function(self, locals) { + return (getter(self, locals)); + }, { + assign: function(self, value) { + return setter(self, ident, value); + } + }); + } + + tokens.push(token); + + if (methodName) { + tokens.push({ + index:lastDot, + text: '.', + json: false + }); + tokens.push({ + index: lastDot + 1, + text: methodName, + json: false + }); + } + } + + function readString(quote) { + var start = index; + index++; + var string = ""; + var rawString = quote; + var escape = false; + while (index < text.length) { + var ch = text.charAt(index); + rawString += ch; + if (escape) { + if (ch == 'u') { + var hex = text.substring(index + 1, index + 5); + if (!hex.match(/[\da-f]{4}/i)) + throwError( "Invalid unicode escape [\\u" + hex + "]"); + index += 4; + string += String.fromCharCode(parseInt(hex, 16)); + } else { + var rep = ESCAPE[ch]; + if (rep) { + string += rep; + } else { + string += ch; + } + } + escape = false; + } else if (ch == '\\') { + escape = true; + } else if (ch == quote) { + index++; + tokens.push({ + index:start, + text:rawString, + string:string, + json:true, + fn:function() { return string; } + }); + return; + } else { + string += ch; + } + index++; + } + throwError("Unterminated quote", start); + } +} + +///////////////////////////////////////// + +function parser(text, json, $filter, csp){ + var ZERO = valueFn(0), + value, + tokens = lex(text, csp), + assignment = _assignment, + functionCall = _functionCall, + fieldAccess = _fieldAccess, + objectIndex = _objectIndex, + filterChain = _filterChain; + + if(json){ + // The extra level of aliasing is here, just in case the lexer misses something, so that + // we prevent any accidental execution in JSON. + assignment = logicalOR; + functionCall = + fieldAccess = + objectIndex = + filterChain = + function() { throwError("is not valid json", {text:text, index:0}); }; + value = primary(); + } else { + value = statements(); + } + if (tokens.length !== 0) { + throwError("is an unexpected token", tokens[0]); + } + return value; + + /////////////////////////////////// + function throwError(msg, token) { + throw Error("Syntax Error: Token '" + token.text + + "' " + msg + " at column " + + (token.index + 1) + " of the expression [" + + text + "] starting at [" + text.substring(token.index) + "]."); + } + + function peekToken() { + if (tokens.length === 0) + throw Error("Unexpected end of expression: " + text); + return tokens[0]; + } + + function peek(e1, e2, e3, e4) { + if (tokens.length > 0) { + var token = tokens[0]; + var t = token.text; + if (t==e1 || t==e2 || t==e3 || t==e4 || + (!e1 && !e2 && !e3 && !e4)) { + return token; + } + } + return false; + } + + function expect(e1, e2, e3, e4){ + var token = peek(e1, e2, e3, e4); + if (token) { + if (json && !token.json) { + throwError("is not valid json", token); + } + tokens.shift(); + return token; + } + return false; + } + + function consume(e1){ + if (!expect(e1)) { + throwError("is unexpected, expecting [" + e1 + "]", peek()); + } + } + + function unaryFn(fn, right) { + return function(self, locals) { + return fn(self, locals, right); + }; + } + + function binaryFn(left, fn, right) { + return function(self, locals) { + return fn(self, locals, left, right); + }; + } + + function statements() { + var statements = []; + while(true) { + if (tokens.length > 0 && !peek('}', ')', ';', ']')) + statements.push(filterChain()); + if (!expect(';')) { + // optimize for the common case where there is only one statement. + // TODO(size): maybe we should not support multiple statements? + return statements.length == 1 + ? statements[0] + : function(self, locals){ + var value; + for ( var i = 0; i < statements.length; i++) { + var statement = statements[i]; + if (statement) + value = statement(self, locals); + } + return value; + }; + } + } + } + + function _filterChain() { + var left = expression(); + var token; + while(true) { + if ((token = expect('|'))) { + left = binaryFn(left, token.fn, filter()); + } else { + return left; + } + } + } + + function filter() { + var token = expect(); + var fn = $filter(token.text); + var argsFn = []; + while(true) { + if ((token = expect(':'))) { + argsFn.push(expression()); + } else { + var fnInvoke = function(self, locals, input){ + var args = [input]; + for ( var i = 0; i < argsFn.length; i++) { + args.push(argsFn[i](self, locals)); + } + return fn.apply(self, args); + }; + return function() { + return fnInvoke; + }; + } + } + } + + function expression() { + return assignment(); + } + + function _assignment() { + var left = logicalOR(); + var right; + var token; + if ((token = expect('='))) { + if (!left.assign) { + throwError("implies assignment but [" + + text.substring(0, token.index) + "] can not be assigned to", token); + } + right = logicalOR(); + return function(scope, locals){ + return left.assign(scope, right(scope, locals), locals); + }; + } else { + return left; + } + } + + function logicalOR() { + var left = logicalAND(); + var token; + while(true) { + if ((token = expect('||'))) { + left = binaryFn(left, token.fn, logicalAND()); + } else { + return left; + } + } + } + + function logicalAND() { + var left = equality(); + var token; + if ((token = expect('&&'))) { + left = binaryFn(left, token.fn, logicalAND()); + } + return left; + } + + function equality() { + var left = relational(); + var token; + if ((token = expect('==','!='))) { + left = binaryFn(left, token.fn, equality()); + } + return left; + } + + function relational() { + var left = additive(); + var token; + if ((token = expect('<', '>', '<=', '>='))) { + left = binaryFn(left, token.fn, relational()); + } + return left; + } + + function additive() { + var left = multiplicative(); + var token; + while ((token = expect('+','-'))) { + left = binaryFn(left, token.fn, multiplicative()); + } + return left; + } + + function multiplicative() { + var left = unary(); + var token; + while ((token = expect('*','/','%'))) { + left = binaryFn(left, token.fn, unary()); + } + return left; + } + + function unary() { + var token; + if (expect('+')) { + return primary(); + } else if ((token = expect('-'))) { + return binaryFn(ZERO, token.fn, unary()); + } else if ((token = expect('!'))) { + return unaryFn(token.fn, unary()); + } else { + return primary(); + } + } + + + function primary() { + var primary; + if (expect('(')) { + primary = filterChain(); + consume(')'); + } else if (expect('[')) { + primary = arrayDeclaration(); + } else if (expect('{')) { + primary = object(); + } else { + var token = expect(); + primary = token.fn; + if (!primary) { + throwError("not a primary expression", token); + } + } + + var next, context; + while ((next = expect('(', '[', '.'))) { + if (next.text === '(') { + primary = functionCall(primary, context); + context = null; + } else if (next.text === '[') { + context = primary; + primary = objectIndex(primary); + } else if (next.text === '.') { + context = primary; + primary = fieldAccess(primary); + } else { + throwError("IMPOSSIBLE"); + } + } + return primary; + } + + function _fieldAccess(object) { + var field = expect().text; + var getter = getterFn(field, csp); + return extend( + function(scope, locals, self) { + return getter(self || object(scope, locals), locals); + }, + { + assign:function(scope, value, locals) { + return setter(object(scope, locals), field, value); + } + } + ); + } + + function _objectIndex(obj) { + var indexFn = expression(); + consume(']'); + return extend( + function(self, locals){ + var o = obj(self, locals), + i = indexFn(self, locals), + v, p; + + if (!o) return undefined; + v = o[i]; + if (v && v.then) { + p = v; + if (!('$$v' in v)) { + p.$$v = undefined; + p.then(function(val) { p.$$v = val; }); + } + v = v.$$v; + } + return v; + }, { + assign:function(self, value, locals){ + return obj(self, locals)[indexFn(self, locals)] = value; + } + }); + } + + function _functionCall(fn, contextGetter) { + var argsFn = []; + if (peekToken().text != ')') { + do { + argsFn.push(expression()); + } while (expect(',')); + } + consume(')'); + return function(scope, locals){ + var args = [], + context = contextGetter ? contextGetter(scope, locals) : scope; + + for ( var i = 0; i < argsFn.length; i++) { + args.push(argsFn[i](scope, locals)); + } + var fnPtr = fn(scope, locals, context) || noop; + // IE stupidity! + return fnPtr.apply + ? fnPtr.apply(context, args) + : fnPtr(args[0], args[1], args[2], args[3], args[4]); + }; + } + + // This is used with json array declaration + function arrayDeclaration () { + var elementFns = []; + if (peekToken().text != ']') { + do { + elementFns.push(expression()); + } while (expect(',')); + } + consume(']'); + return function(self, locals){ + var array = []; + for ( var i = 0; i < elementFns.length; i++) { + array.push(elementFns[i](self, locals)); + } + return array; + }; + } + + function object () { + var keyValues = []; + if (peekToken().text != '}') { + do { + var token = expect(), + key = token.string || token.text; + consume(":"); + var value = expression(); + keyValues.push({key:key, value:value}); + } while (expect(',')); + } + consume('}'); + return function(self, locals){ + var object = {}; + for ( var i = 0; i < keyValues.length; i++) { + var keyValue = keyValues[i]; + object[keyValue.key] = keyValue.value(self, locals); + } + return object; + }; + } +} + +////////////////////////////////////////////////// +// Parser helper functions +////////////////////////////////////////////////// + +function setter(obj, path, setValue) { + var element = path.split('.'); + for (var i = 0; element.length > 1; i++) { + var key = element.shift(); + var propertyObj = obj[key]; + if (!propertyObj) { + propertyObj = {}; + obj[key] = propertyObj; + } + obj = propertyObj; + } + obj[element.shift()] = setValue; + return setValue; +} + +/** + * Return the value accesible from the object by path. Any undefined traversals are ignored + * @param {Object} obj starting object + * @param {string} path path to traverse + * @param {boolean=true} bindFnToScope + * @returns value as accesbile by path + */ +//TODO(misko): this function needs to be removed +function getter(obj, path, bindFnToScope) { + if (!path) return obj; + var keys = path.split('.'); + var key; + var lastInstance = obj; + var len = keys.length; + + for (var i = 0; i < len; i++) { + key = keys[i]; + if (obj) { + obj = (lastInstance = obj)[key]; + } + } + if (!bindFnToScope && isFunction(obj)) { + return bind(lastInstance, obj); + } + return obj; +} + +var getterFnCache = {}; + +/** + * Implementation of the "Black Hole" variant from: + * - http://jsperf.com/angularjs-parse-getter/4 + * - http://jsperf.com/path-evaluation-simplified/7 + */ +function cspSafeGetterFn(key0, key1, key2, key3, key4) { + return function(scope, locals) { + var pathVal = (locals && locals.hasOwnProperty(key0)) ? locals : scope, + promise; + + if (pathVal === null || pathVal === undefined) return pathVal; + + pathVal = pathVal[key0]; + if (pathVal && pathVal.then) { + if (!("$$v" in pathVal)) { + promise = pathVal; + promise.$$v = undefined; + promise.then(function(val) { promise.$$v = val; }); + } + pathVal = pathVal.$$v; + } + if (!key1 || pathVal === null || pathVal === undefined) return pathVal; + + pathVal = pathVal[key1]; + if (pathVal && pathVal.then) { + if (!("$$v" in pathVal)) { + promise = pathVal; + promise.$$v = undefined; + promise.then(function(val) { promise.$$v = val; }); + } + pathVal = pathVal.$$v; + } + if (!key2 || pathVal === null || pathVal === undefined) return pathVal; + + pathVal = pathVal[key2]; + if (pathVal && pathVal.then) { + if (!("$$v" in pathVal)) { + promise = pathVal; + promise.$$v = undefined; + promise.then(function(val) { promise.$$v = val; }); + } + pathVal = pathVal.$$v; + } + if (!key3 || pathVal === null || pathVal === undefined) return pathVal; + + pathVal = pathVal[key3]; + if (pathVal && pathVal.then) { + if (!("$$v" in pathVal)) { + promise = pathVal; + promise.$$v = undefined; + promise.then(function(val) { promise.$$v = val; }); + } + pathVal = pathVal.$$v; + } + if (!key4 || pathVal === null || pathVal === undefined) return pathVal; + + pathVal = pathVal[key4]; + if (pathVal && pathVal.then) { + if (!("$$v" in pathVal)) { + promise = pathVal; + promise.$$v = undefined; + promise.then(function(val) { promise.$$v = val; }); + } + pathVal = pathVal.$$v; + } + return pathVal; + }; +} + +function getterFn(path, csp) { + if (getterFnCache.hasOwnProperty(path)) { + return getterFnCache[path]; + } + + var pathKeys = path.split('.'), + pathKeysLength = pathKeys.length, + fn; + + if (csp) { + fn = (pathKeysLength < 6) + ? cspSafeGetterFn(pathKeys[0], pathKeys[1], pathKeys[2], pathKeys[3], pathKeys[4]) + : function(scope, locals) { + var i = 0, val; + do { + val = cspSafeGetterFn( + pathKeys[i++], pathKeys[i++], pathKeys[i++], pathKeys[i++], pathKeys[i++] + )(scope, locals); + + locals = undefined; // clear after first iteration + scope = val; + } while (i < pathKeysLength); + return val; + } + } else { + var code = 'var l, fn, p;\n'; + forEach(pathKeys, function(key, index) { + code += 'if(s === null || s === undefined) return s;\n' + + 'l=s;\n' + + 's='+ (index + // we simply dereference 's' on any .dot notation + ? 's' + // but if we are first then we check locals first, and if so read it first + : '((k&&k.hasOwnProperty("' + key + '"))?k:s)') + '["' + key + '"]' + ';\n' + + 'if (s && s.then) {\n' + + ' if (!("$$v" in s)) {\n' + + ' p=s;\n' + + ' p.$$v = undefined;\n' + + ' p.then(function(v) {p.$$v=v;});\n' + + '}\n' + + ' s=s.$$v\n' + + '}\n'; + }); + code += 'return s;'; + fn = Function('s', 'k', code); // s=scope, k=locals + fn.toString = function() { return code; }; + } + + return getterFnCache[path] = fn; +} + +/////////////////////////////////// + +/** + * @ngdoc function + * @name ng.$parse + * @function + * + * @description + * + * Converts Angular {@link guide/expression expression} into a function. + * + *
+ *   var getter = $parse('user.name');
+ *   var setter = getter.assign;
+ *   var context = {user:{name:'angular'}};
+ *   var locals = {user:{name:'local'}};
+ *
+ *   expect(getter(context)).toEqual('angular');
+ *   setter(context, 'newValue');
+ *   expect(context.user.name).toEqual('newValue');
+ *   expect(getter(context, locals)).toEqual('local');
+ * 
+ * + * + * @param {string} expression String expression to compile. + * @returns {function(context, locals)} a function which represents the compiled expression: + * + * * `context` – `{object}` – an object against which any expressions embedded in the strings + * are evaluated against (tipically a scope object). + * * `locals` – `{object=}` – local variables context object, useful for overriding values in + * `context`. + * + * The return function also has an `assign` property, if the expression is assignable, which + * allows one to set values to expressions. + * + */ +function $ParseProvider() { + var cache = {}; + this.$get = ['$filter', '$sniffer', function($filter, $sniffer) { + return function(exp) { + switch(typeof exp) { + case 'string': + return cache.hasOwnProperty(exp) + ? cache[exp] + : cache[exp] = parser(exp, false, $filter, $sniffer.csp); + case 'function': + return exp; + default: + return noop; + } + }; + }]; +} + +/** + * @ngdoc service + * @name ng.$q + * @requires $rootScope + * + * @description + * A promise/deferred implementation inspired by [Kris Kowal's Q](https://github.com/kriskowal/q). + * + * [The CommonJS Promise proposal](http://wiki.commonjs.org/wiki/Promises) describes a promise as an + * interface for interacting with an object that represents the result of an action that is + * performed asynchronously, and may or may not be finished at any given point in time. + * + * From the perspective of dealing with error handling, deferred and promise APIs are to + * asynchronous programming what `try`, `catch` and `throw` keywords are to synchronous programming. + * + *
+ *   // for the purpose of this example let's assume that variables `$q` and `scope` are
+ *   // available in the current lexical scope (they could have been injected or passed in).
+ *
+ *   function asyncGreet(name) {
+ *     var deferred = $q.defer();
+ *
+ *     setTimeout(function() {
+ *       // since this fn executes async in a future turn of the event loop, we need to wrap
+ *       // our code into an $apply call so that the model changes are properly observed.
+ *       scope.$apply(function() {
+ *         if (okToGreet(name)) {
+ *           deferred.resolve('Hello, ' + name + '!');
+ *         } else {
+ *           deferred.reject('Greeting ' + name + ' is not allowed.');
+ *         }
+ *       });
+ *     }, 1000);
+ *
+ *     return deferred.promise;
+ *   }
+ *
+ *   var promise = asyncGreet('Robin Hood');
+ *   promise.then(function(greeting) {
+ *     alert('Success: ' + greeting);
+ *   }, function(reason) {
+ *     alert('Failed: ' + reason);
+ *   });
+ * 
+ * + * At first it might not be obvious why this extra complexity is worth the trouble. The payoff + * comes in the way of + * [guarantees that promise and deferred APIs make](https://github.com/kriskowal/uncommonjs/blob/master/promises/specification.md). + * + * Additionally the promise api allows for composition that is very hard to do with the + * traditional callback ([CPS](http://en.wikipedia.org/wiki/Continuation-passing_style)) approach. + * For more on this please see the [Q documentation](https://github.com/kriskowal/q) especially the + * section on serial or parallel joining of promises. + * + * + * # The Deferred API + * + * A new instance of deferred is constructed by calling `$q.defer()`. + * + * The purpose of the deferred object is to expose the associated Promise instance as well as APIs + * that can be used for signaling the successful or unsuccessful completion of the task. + * + * **Methods** + * + * - `resolve(value)` – resolves the derived promise with the `value`. If the value is a rejection + * constructed via `$q.reject`, the promise will be rejected instead. + * - `reject(reason)` – rejects the derived promise with the `reason`. This is equivalent to + * resolving it with a rejection constructed via `$q.reject`. + * + * **Properties** + * + * - promise – `{Promise}` – promise object associated with this deferred. + * + * + * # The Promise API + * + * A new promise instance is created when a deferred instance is created and can be retrieved by + * calling `deferred.promise`. + * + * The purpose of the promise object is to allow for interested parties to get access to the result + * of the deferred task when it completes. + * + * **Methods** + * + * - `then(successCallback, errorCallback)` – regardless of when the promise was or will be resolved + * or rejected calls one of the success or error callbacks asynchronously as soon as the result + * is available. The callbacks are called with a single argument the result or rejection reason. + * + * This method *returns a new promise* which is resolved or rejected via the return value of the + * `successCallback` or `errorCallback`. + * + * + * # Chaining promises + * + * Because calling `then` api of a promise returns a new derived promise, it is easily possible + * to create a chain of promises: + * + *
+ *   promiseB = promiseA.then(function(result) {
+ *     return result + 1;
+ *   });
+ *
+ *   // promiseB will be resolved immediately after promiseA is resolved and its value will be
+ *   // the result of promiseA incremented by 1
+ * 
+ * + * It is possible to create chains of any length and since a promise can be resolved with another + * promise (which will defer its resolution further), it is possible to pause/defer resolution of + * the promises at any point in the chain. This makes it possible to implement powerful apis like + * $http's response interceptors. + * + * + * # Differences between Kris Kowal's Q and $q + * + * There are three main differences: + * + * - $q is integrated with the {@link ng.$rootScope.Scope} Scope model observation + * mechanism in angular, which means faster propagation of resolution or rejection into your + * models and avoiding unnecessary browser repaints, which would result in flickering UI. + * - $q promises are recognized by the templating engine in angular, which means that in templates + * you can treat promises attached to a scope as if they were the resulting values. + * - Q has many more features than $q, but that comes at a cost of bytes. $q is tiny, but contains + * all the important functionality needed for common async tasks. + * + * # Testing + * + *
+ *    it('should simulate promise', inject(function($q, $rootScope) {
+ *      var deferred = $q.defer();
+ *      var promise = deferred.promise;
+ *      var resolvedValue;
+ * 
+ *      promise.then(function(value) { resolvedValue = value; });
+ *      expect(resolvedValue).toBeUndefined();
+ * 
+ *      // Simulate resolving of promise
+ *      deferred.resolve(123);
+ *      // Note that the 'then' function does not get called synchronously.
+ *      // This is because we want the promise API to always be async, whether or not
+ *      // it got called synchronously or asynchronously.
+ *      expect(resolvedValue).toBeUndefined();
+ * 
+ *      // Propagate promise resolution to 'then' functions using $apply().
+ *      $rootScope.$apply();
+ *      expect(resolvedValue).toEqual(123);
+ *    });
+ *  
+ */ +function $QProvider() { + + this.$get = ['$rootScope', '$exceptionHandler', function($rootScope, $exceptionHandler) { + return qFactory(function(callback) { + $rootScope.$evalAsync(callback); + }, $exceptionHandler); + }]; +} + + +/** + * Constructs a promise manager. + * + * @param {function(function)} nextTick Function for executing functions in the next turn. + * @param {function(...*)} exceptionHandler Function into which unexpected exceptions are passed for + * debugging purposes. + * @returns {object} Promise manager. + */ +function qFactory(nextTick, exceptionHandler) { + + /** + * @ngdoc + * @name ng.$q#defer + * @methodOf ng.$q + * @description + * Creates a `Deferred` object which represents a task which will finish in the future. + * + * @returns {Deferred} Returns a new instance of deferred. + */ + var defer = function() { + var pending = [], + value, deferred; + + deferred = { + + resolve: function(val) { + if (pending) { + var callbacks = pending; + pending = undefined; + value = ref(val); + + if (callbacks.length) { + nextTick(function() { + var callback; + for (var i = 0, ii = callbacks.length; i < ii; i++) { + callback = callbacks[i]; + value.then(callback[0], callback[1]); + } + }); + } + } + }, + + + reject: function(reason) { + deferred.resolve(reject(reason)); + }, + + + promise: { + then: function(callback, errback) { + var result = defer(); + + var wrappedCallback = function(value) { + try { + result.resolve((callback || defaultCallback)(value)); + } catch(e) { + exceptionHandler(e); + result.reject(e); + } + }; + + var wrappedErrback = function(reason) { + try { + result.resolve((errback || defaultErrback)(reason)); + } catch(e) { + exceptionHandler(e); + result.reject(e); + } + }; + + if (pending) { + pending.push([wrappedCallback, wrappedErrback]); + } else { + value.then(wrappedCallback, wrappedErrback); + } + + return result.promise; + } + } + }; + + return deferred; + }; + + + var ref = function(value) { + if (value && value.then) return value; + return { + then: function(callback) { + var result = defer(); + nextTick(function() { + result.resolve(callback(value)); + }); + return result.promise; + } + }; + }; + + + /** + * @ngdoc + * @name ng.$q#reject + * @methodOf ng.$q + * @description + * Creates a promise that is resolved as rejected with the specified `reason`. This api should be + * used to forward rejection in a chain of promises. If you are dealing with the last promise in + * a promise chain, you don't need to worry about it. + * + * When comparing deferreds/promises to the familiar behavior of try/catch/throw, think of + * `reject` as the `throw` keyword in JavaScript. This also means that if you "catch" an error via + * a promise error callback and you want to forward the error to the promise derived from the + * current promise, you have to "rethrow" the error by returning a rejection constructed via + * `reject`. + * + *
+   *   promiseB = promiseA.then(function(result) {
+   *     // success: do something and resolve promiseB
+   *     //          with the old or a new result
+   *     return result;
+   *   }, function(reason) {
+   *     // error: handle the error if possible and
+   *     //        resolve promiseB with newPromiseOrValue,
+   *     //        otherwise forward the rejection to promiseB
+   *     if (canHandle(reason)) {
+   *      // handle the error and recover
+   *      return newPromiseOrValue;
+   *     }
+   *     return $q.reject(reason);
+   *   });
+   * 
+ * + * @param {*} reason Constant, message, exception or an object representing the rejection reason. + * @returns {Promise} Returns a promise that was already resolved as rejected with the `reason`. + */ + var reject = function(reason) { + return { + then: function(callback, errback) { + var result = defer(); + nextTick(function() { + result.resolve((errback || defaultErrback)(reason)); + }); + return result.promise; + } + }; + }; + + + /** + * @ngdoc + * @name ng.$q#when + * @methodOf ng.$q + * @description + * Wraps an object that might be a value or a (3rd party) then-able promise into a $q promise. + * This is useful when you are dealing with an object that might or might not be a promise, or if + * the promise comes from a source that can't be trusted. + * + * @param {*} value Value or a promise + * @returns {Promise} Returns a promise of the passed value or promise + */ + var when = function(value, callback, errback) { + var result = defer(), + done; + + var wrappedCallback = function(value) { + try { + return (callback || defaultCallback)(value); + } catch (e) { + exceptionHandler(e); + return reject(e); + } + }; + + var wrappedErrback = function(reason) { + try { + return (errback || defaultErrback)(reason); + } catch (e) { + exceptionHandler(e); + return reject(e); + } + }; + + nextTick(function() { + ref(value).then(function(value) { + if (done) return; + done = true; + result.resolve(ref(value).then(wrappedCallback, wrappedErrback)); + }, function(reason) { + if (done) return; + done = true; + result.resolve(wrappedErrback(reason)); + }); + }); + + return result.promise; + }; + + + function defaultCallback(value) { + return value; + } + + + function defaultErrback(reason) { + return reject(reason); + } + + + /** + * @ngdoc + * @name ng.$q#all + * @methodOf ng.$q + * @description + * Combines multiple promises into a single promise that is resolved when all of the input + * promises are resolved. + * + * @param {Array.} promises An array of promises. + * @returns {Promise} Returns a single promise that will be resolved with an array of values, + * each value corresponding to the promise at the same index in the `promises` array. If any of + * the promises is resolved with a rejection, this resulting promise will be resolved with the + * same rejection. + */ + function all(promises) { + var deferred = defer(), + counter = promises.length, + results = []; + + if (counter) { + forEach(promises, function(promise, index) { + ref(promise).then(function(value) { + if (index in results) return; + results[index] = value; + if (!(--counter)) deferred.resolve(results); + }, function(reason) { + if (index in results) return; + deferred.reject(reason); + }); + }); + } else { + deferred.resolve(results); + } + + return deferred.promise; + } + + return { + defer: defer, + reject: reject, + when: when, + all: all + }; +} + +/** + * @ngdoc object + * @name ng.$routeProvider + * @function + * + * @description + * + * Used for configuring routes. See {@link ng.$route $route} for an example. + */ +function $RouteProvider(){ + var routes = {}; + + /** + * @ngdoc method + * @name ng.$routeProvider#when + * @methodOf ng.$routeProvider + * + * @param {string} path Route path (matched against `$location.path`). If `$location.path` + * contains redundant trailing slash or is missing one, the route will still match and the + * `$location.path` will be updated to add or drop the trailing slash to exactly match the + * route definition. + * + * `path` can contain named groups starting with a colon (`:name`). All characters up to the + * next slash are matched and stored in `$routeParams` under the given `name` when the route + * matches. + * + * @param {Object} route Mapping information to be assigned to `$route.current` on route + * match. + * + * Object properties: + * + * - `controller` – `{(string|function()=}` – Controller fn that should be associated with newly + * created scope or the name of a {@link angular.Module#controller registered controller} + * if passed as a string. + * - `template` – `{string=}` – html template as a string that should be used by + * {@link ng.directive:ngView ngView} or + * {@link ng.directive:ngInclude ngInclude} directives. + * this property takes precedence over `templateUrl`. + * - `templateUrl` – `{string=}` – path to an html template that should be used by + * {@link ng.directive:ngView ngView}. + * - `resolve` - `{Object.=}` - An optional map of dependencies which should + * be injected into the controller. If any of these dependencies are promises, they will be + * resolved and converted to a value before the controller is instantiated and the + * `$routeChangeSuccess` event is fired. The map object is: + * + * - `key` – `{string}`: a name of a dependency to be injected into the controller. + * - `factory` - `{string|function}`: If `string` then it is an alias for a service. + * Otherwise if function, then it is {@link api/AUTO.$injector#invoke injected} + * and the return value is treated as the dependency. If the result is a promise, it is resolved + * before its value is injected into the controller. + * + * - `redirectTo` – {(string|function())=} – value to update + * {@link ng.$location $location} path with and trigger route redirection. + * + * If `redirectTo` is a function, it will be called with the following parameters: + * + * - `{Object.}` - route parameters extracted from the current + * `$location.path()` by applying the current route templateUrl. + * - `{string}` - current `$location.path()` + * - `{Object}` - current `$location.search()` + * + * The custom `redirectTo` function is expected to return a string which will be used + * to update `$location.path()` and `$location.search()`. + * + * - `[reloadOnSearch=true]` - {boolean=} - reload route when only $location.search() + * changes. + * + * If the option is set to `false` and url in the browser changes, then + * `$routeUpdate` event is broadcasted on the root scope. + * + * @returns {Object} self + * + * @description + * Adds a new route definition to the `$route` service. + */ + this.when = function(path, route) { + routes[path] = extend({reloadOnSearch: true}, route); + + // create redirection for trailing slashes + if (path) { + var redirectPath = (path[path.length-1] == '/') + ? path.substr(0, path.length-1) + : path +'/'; + + routes[redirectPath] = {redirectTo: path}; + } + + return this; + }; + + /** + * @ngdoc method + * @name ng.$routeProvider#otherwise + * @methodOf ng.$routeProvider + * + * @description + * Sets route definition that will be used on route change when no other route definition + * is matched. + * + * @param {Object} params Mapping information to be assigned to `$route.current`. + * @returns {Object} self + */ + this.otherwise = function(params) { + this.when(null, params); + return this; + }; + + + this.$get = ['$rootScope', '$location', '$routeParams', '$q', '$injector', '$http', '$templateCache', + function( $rootScope, $location, $routeParams, $q, $injector, $http, $templateCache) { + + /** + * @ngdoc object + * @name ng.$route + * @requires $location + * @requires $routeParams + * + * @property {Object} current Reference to the current route definition. + * The route definition contains: + * + * - `controller`: The controller constructor as define in route definition. + * - `locals`: A map of locals which is used by {@link ng.$controller $controller} service for + * controller instantiation. The `locals` contain + * the resolved values of the `resolve` map. Additionally the `locals` also contain: + * + * - `$scope` - The current route scope. + * - `$template` - The current route template HTML. + * + * @property {Array.} routes Array of all configured routes. + * + * @description + * Is used for deep-linking URLs to controllers and views (HTML partials). + * It watches `$location.url()` and tries to map the path to an existing route definition. + * + * You can define routes through {@link ng.$routeProvider $routeProvider}'s API. + * + * The `$route` service is typically used in conjunction with {@link ng.directive:ngView ngView} + * directive and the {@link ng.$routeParams $routeParams} service. + * + * @example + This example shows how changing the URL hash causes the `$route` to match a route against the + URL, and the `ngView` pulls in the partial. + + Note that this example is using {@link ng.directive:script inlined templates} + to get it working on jsfiddle as well. + + + +
+ Choose: + Moby | + Moby: Ch1 | + Gatsby | + Gatsby: Ch4 | + Scarlet Letter
+ +
+
+ +
$location.path() = {{$location.path()}}
+
$route.current.templateUrl = {{$route.current.templateUrl}}
+
$route.current.params = {{$route.current.params}}
+
$route.current.scope.name = {{$route.current.scope.name}}
+
$routeParams = {{$routeParams}}
+
+
+ + + controller: {{name}}
+ Book Id: {{params.bookId}}
+
+ + + controller: {{name}}
+ Book Id: {{params.bookId}}
+ Chapter Id: {{params.chapterId}} +
+ + + angular.module('ngView', [], function($routeProvider, $locationProvider) { + $routeProvider.when('/Book/:bookId', { + templateUrl: 'book.html', + controller: BookCntl, + resolve: { + // I will cause a 1 second delay + delay: function($q, $timeout) { + var delay = $q.defer(); + $timeout(delay.resolve, 1000); + return delay.promise; + } + } + }); + $routeProvider.when('/Book/:bookId/ch/:chapterId', { + templateUrl: 'chapter.html', + controller: ChapterCntl + }); + + // configure html5 to get links working on jsfiddle + $locationProvider.html5Mode(true); + }); + + function MainCntl($scope, $route, $routeParams, $location) { + $scope.$route = $route; + $scope.$location = $location; + $scope.$routeParams = $routeParams; + } + + function BookCntl($scope, $routeParams) { + $scope.name = "BookCntl"; + $scope.params = $routeParams; + } + + function ChapterCntl($scope, $routeParams) { + $scope.name = "ChapterCntl"; + $scope.params = $routeParams; + } + + + + it('should load and compile correct template', function() { + element('a:contains("Moby: Ch1")').click(); + var content = element('.doc-example-live [ng-view]').text(); + expect(content).toMatch(/controller\: ChapterCntl/); + expect(content).toMatch(/Book Id\: Moby/); + expect(content).toMatch(/Chapter Id\: 1/); + + element('a:contains("Scarlet")').click(); + sleep(2); // promises are not part of scenario waiting + content = element('.doc-example-live [ng-view]').text(); + expect(content).toMatch(/controller\: BookCntl/); + expect(content).toMatch(/Book Id\: Scarlet/); + }); + +
+ */ + + /** + * @ngdoc event + * @name ng.$route#$routeChangeStart + * @eventOf ng.$route + * @eventType broadcast on root scope + * @description + * Broadcasted before a route change. At this point the route services starts + * resolving all of the dependencies needed for the route change to occurs. + * Typically this involves fetching the view template as well as any dependencies + * defined in `resolve` route property. Once all of the dependencies are resolved + * `$routeChangeSuccess` is fired. + * + * @param {Route} next Future route information. + * @param {Route} current Current route information. + */ + + /** + * @ngdoc event + * @name ng.$route#$routeChangeSuccess + * @eventOf ng.$route + * @eventType broadcast on root scope + * @description + * Broadcasted after a route dependencies are resolved. + * {@link ng.directive:ngView ngView} listens for the directive + * to instantiate the controller and render the view. + * + * @param {Object} angularEvent Synthetic event object. + * @param {Route} current Current route information. + * @param {Route|Undefined} previous Previous route information, or undefined if current is first route entered. + */ + + /** + * @ngdoc event + * @name ng.$route#$routeChangeError + * @eventOf ng.$route + * @eventType broadcast on root scope + * @description + * Broadcasted if any of the resolve promises are rejected. + * + * @param {Route} current Current route information. + * @param {Route} previous Previous route information. + * @param {Route} rejection Rejection of the promise. Usually the error of the failed promise. + */ + + /** + * @ngdoc event + * @name ng.$route#$routeUpdate + * @eventOf ng.$route + * @eventType broadcast on root scope + * @description + * + * The `reloadOnSearch` property has been set to false, and we are reusing the same + * instance of the Controller. + */ + + var forceReload = false, + $route = { + routes: routes, + + /** + * @ngdoc method + * @name ng.$route#reload + * @methodOf ng.$route + * + * @description + * Causes `$route` service to reload the current route even if + * {@link ng.$location $location} hasn't changed. + * + * As a result of that, {@link ng.directive:ngView ngView} + * creates new scope, reinstantiates the controller. + */ + reload: function() { + forceReload = true; + $rootScope.$evalAsync(updateRoute); + } + }; + + $rootScope.$on('$locationChangeSuccess', updateRoute); + + return $route; + + ///////////////////////////////////////////////////// + + /** + * @param on {string} current url + * @param when {string} route when template to match the url against + * @return {?Object} + */ + function switchRouteMatcher(on, when) { + // TODO(i): this code is convoluted and inefficient, we should construct the route matching + // regex only once and then reuse it + + // Escape regexp special characters. + when = '^' + when.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&") + '$'; + var regex = '', + params = [], + dst = {}; + + var re = /:(\w+)/g, + paramMatch, + lastMatchedIndex = 0; + + while ((paramMatch = re.exec(when)) !== null) { + // Find each :param in `when` and replace it with a capturing group. + // Append all other sections of when unchanged. + regex += when.slice(lastMatchedIndex, paramMatch.index); + regex += '([^\\/]*)'; + params.push(paramMatch[1]); + lastMatchedIndex = re.lastIndex; + } + // Append trailing path part. + regex += when.substr(lastMatchedIndex); + + var match = on.match(new RegExp(regex)); + if (match) { + forEach(params, function(name, index) { + dst[name] = match[index + 1]; + }); + } + return match ? dst : null; + } + + function updateRoute() { + var next = parseRoute(), + last = $route.current; + + if (next && last && next.$$route === last.$$route + && equals(next.pathParams, last.pathParams) && !next.reloadOnSearch && !forceReload) { + last.params = next.params; + copy(last.params, $routeParams); + $rootScope.$broadcast('$routeUpdate', last); + } else if (next || last) { + forceReload = false; + $rootScope.$broadcast('$routeChangeStart', next, last); + $route.current = next; + if (next) { + if (next.redirectTo) { + if (isString(next.redirectTo)) { + $location.path(interpolate(next.redirectTo, next.params)).search(next.params) + .replace(); + } else { + $location.url(next.redirectTo(next.pathParams, $location.path(), $location.search())) + .replace(); + } + } + } + + $q.when(next). + then(function() { + if (next) { + var keys = [], + values = [], + template; + + forEach(next.resolve || {}, function(value, key) { + keys.push(key); + values.push(isString(value) ? $injector.get(value) : $injector.invoke(value)); + }); + if (isDefined(template = next.template)) { + } else if (isDefined(template = next.templateUrl)) { + template = $http.get(template, {cache: $templateCache}). + then(function(response) { return response.data; }); + } + if (isDefined(template)) { + keys.push('$template'); + values.push(template); + } + return $q.all(values).then(function(values) { + var locals = {}; + forEach(values, function(value, index) { + locals[keys[index]] = value; + }); + return locals; + }); + } + }). + // after route change + then(function(locals) { + if (next == $route.current) { + if (next) { + next.locals = locals; + copy(next.params, $routeParams); + } + $rootScope.$broadcast('$routeChangeSuccess', next, last); + } + }, function(error) { + if (next == $route.current) { + $rootScope.$broadcast('$routeChangeError', next, last, error); + } + }); + } + } + + + /** + * @returns the current active route, by matching it against the URL + */ + function parseRoute() { + // Match a route + var params, match; + forEach(routes, function(route, path) { + if (!match && (params = switchRouteMatcher($location.path(), path))) { + match = inherit(route, { + params: extend({}, $location.search(), params), + pathParams: params}); + match.$$route = route; + } + }); + // No route matched; fallback to "otherwise" route + return match || routes[null] && inherit(routes[null], {params: {}, pathParams:{}}); + } + + /** + * @returns interpolation of the redirect path with the parametrs + */ + function interpolate(string, params) { + var result = []; + forEach((string||'').split(':'), function(segment, i) { + if (i == 0) { + result.push(segment); + } else { + var segmentMatch = segment.match(/(\w+)(.*)/); + var key = segmentMatch[1]; + result.push(params[key]); + result.push(segmentMatch[2] || ''); + delete params[key]; + } + }); + return result.join(''); + } + }]; +} + +/** + * @ngdoc object + * @name ng.$routeParams + * @requires $route + * + * @description + * Current set of route parameters. The route parameters are a combination of the + * {@link ng.$location $location} `search()`, and `path()`. The `path` parameters + * are extracted when the {@link ng.$route $route} path is matched. + * + * In case of parameter name collision, `path` params take precedence over `search` params. + * + * The service guarantees that the identity of the `$routeParams` object will remain unchanged + * (but its properties will likely change) even when a route change occurs. + * + * @example + *
+ *  // Given:
+ *  // URL: http://server.com/index.html#/Chapter/1/Section/2?search=moby
+ *  // Route: /Chapter/:chapterId/Section/:sectionId
+ *  //
+ *  // Then
+ *  $routeParams ==> {chapterId:1, sectionId:2, search:'moby'}
+ * 
+ */ +function $RouteParamsProvider() { + this.$get = valueFn({}); +} + +/** + * DESIGN NOTES + * + * The design decisions behind the scope are heavily favored for speed and memory consumption. + * + * The typical use of scope is to watch the expressions, which most of the time return the same + * value as last time so we optimize the operation. + * + * Closures construction is expensive in terms of speed as well as memory: + * - No closures, instead use prototypical inheritance for API + * - Internal state needs to be stored on scope directly, which means that private state is + * exposed as $$____ properties + * + * Loop operations are optimized by using while(count--) { ... } + * - this means that in order to keep the same order of execution as addition we have to add + * items to the array at the beginning (shift) instead of at the end (push) + * + * Child scopes are created and removed often + * - Using an array would be slow since inserts in middle are expensive so we use linked list + * + * There are few watches then a lot of observers. This is why you don't want the observer to be + * implemented in the same way as watch. Watch requires return of initialization function which + * are expensive to construct. + */ + + +/** + * @ngdoc object + * @name ng.$rootScopeProvider + * @description + * + * Provider for the $rootScope service. + */ + +/** + * @ngdoc function + * @name ng.$rootScopeProvider#digestTtl + * @methodOf ng.$rootScopeProvider + * @description + * + * Sets the number of digest iterations the scope should attempt to execute before giving up and + * assuming that the model is unstable. + * + * The current default is 10 iterations. + * + * @param {number} limit The number of digest iterations. + */ + + +/** + * @ngdoc object + * @name ng.$rootScope + * @description + * + * Every application has a single root {@link ng.$rootScope.Scope scope}. + * All other scopes are child scopes of the root scope. Scopes provide mechanism for watching the model and provide + * event processing life-cycle. See {@link guide/scope developer guide on scopes}. + */ +function $RootScopeProvider(){ + var TTL = 10; + + this.digestTtl = function(value) { + if (arguments.length) { + TTL = value; + } + return TTL; + }; + + this.$get = ['$injector', '$exceptionHandler', '$parse', + function( $injector, $exceptionHandler, $parse) { + + /** + * @ngdoc function + * @name ng.$rootScope.Scope + * + * @description + * A root scope can be retrieved using the {@link ng.$rootScope $rootScope} key from the + * {@link AUTO.$injector $injector}. Child scopes are created using the + * {@link ng.$rootScope.Scope#$new $new()} method. (Most scopes are created automatically when + * compiled HTML template is executed.) + * + * Here is a simple scope snippet to show how you can interact with the scope. + *
+        angular.injector(['ng']).invoke(function($rootScope) {
+           var scope = $rootScope.$new();
+           scope.salutation = 'Hello';
+           scope.name = 'World';
+
+           expect(scope.greeting).toEqual(undefined);
+
+           scope.$watch('name', function() {
+             scope.greeting = scope.salutation + ' ' + scope.name + '!';
+           }); // initialize the watch
+
+           expect(scope.greeting).toEqual(undefined);
+           scope.name = 'Misko';
+           // still old value, since watches have not been called yet
+           expect(scope.greeting).toEqual(undefined);
+
+           scope.$digest(); // fire all  the watches
+           expect(scope.greeting).toEqual('Hello Misko!');
+        });
+     * 
+ * + * # Inheritance + * A scope can inherit from a parent scope, as in this example: + *
+         var parent = $rootScope;
+         var child = parent.$new();
+
+         parent.salutation = "Hello";
+         child.name = "World";
+         expect(child.salutation).toEqual('Hello');
+
+         child.salutation = "Welcome";
+         expect(child.salutation).toEqual('Welcome');
+         expect(parent.salutation).toEqual('Hello');
+     * 
+ * + * + * @param {Object.=} providers Map of service factory which need to be provided + * for the current scope. Defaults to {@link ng}. + * @param {Object.=} instanceCache Provides pre-instantiated services which should + * append/override services provided by `providers`. This is handy when unit-testing and having + * the need to override a default service. + * @returns {Object} Newly created scope. + * + */ + function Scope() { + this.$id = nextUid(); + this.$$phase = this.$parent = this.$$watchers = + this.$$nextSibling = this.$$prevSibling = + this.$$childHead = this.$$childTail = null; + this['this'] = this.$root = this; + this.$$destroyed = false; + this.$$asyncQueue = []; + this.$$listeners = {}; + this.$$isolateBindings = {}; + } + + /** + * @ngdoc property + * @name ng.$rootScope.Scope#$id + * @propertyOf ng.$rootScope.Scope + * @returns {number} Unique scope ID (monotonically increasing alphanumeric sequence) useful for + * debugging. + */ + + + Scope.prototype = { + /** + * @ngdoc function + * @name ng.$rootScope.Scope#$new + * @methodOf ng.$rootScope.Scope + * @function + * + * @description + * Creates a new child {@link ng.$rootScope.Scope scope}. + * + * The parent scope will propagate the {@link ng.$rootScope.Scope#$digest $digest()} and + * {@link ng.$rootScope.Scope#$digest $digest()} events. The scope can be removed from the scope + * hierarchy using {@link ng.$rootScope.Scope#$destroy $destroy()}. + * + * {@link ng.$rootScope.Scope#$destroy $destroy()} must be called on a scope when it is desired for + * the scope and its child scopes to be permanently detached from the parent and thus stop + * participating in model change detection and listener notification by invoking. + * + * @param {boolean} isolate if true then the scope does not prototypically inherit from the + * parent scope. The scope is isolated, as it can not see parent scope properties. + * When creating widgets it is useful for the widget to not accidentally read parent + * state. + * + * @returns {Object} The newly created child scope. + * + */ + $new: function(isolate) { + var Child, + child; + + if (isFunction(isolate)) { + // TODO: remove at some point + throw Error('API-CHANGE: Use $controller to instantiate controllers.'); + } + if (isolate) { + child = new Scope(); + child.$root = this.$root; + } else { + Child = function() {}; // should be anonymous; This is so that when the minifier munges + // the name it does not become random set of chars. These will then show up as class + // name in the debugger. + Child.prototype = this; + child = new Child(); + child.$id = nextUid(); + } + child['this'] = child; + child.$$listeners = {}; + child.$parent = this; + child.$$asyncQueue = []; + child.$$watchers = child.$$nextSibling = child.$$childHead = child.$$childTail = null; + child.$$prevSibling = this.$$childTail; + if (this.$$childHead) { + this.$$childTail.$$nextSibling = child; + this.$$childTail = child; + } else { + this.$$childHead = this.$$childTail = child; + } + return child; + }, + + /** + * @ngdoc function + * @name ng.$rootScope.Scope#$watch + * @methodOf ng.$rootScope.Scope + * @function + * + * @description + * Registers a `listener` callback to be executed whenever the `watchExpression` changes. + * + * - The `watchExpression` is called on every call to {@link ng.$rootScope.Scope#$digest $digest()} and + * should return the value which will be watched. (Since {@link ng.$rootScope.Scope#$digest $digest()} + * reruns when it detects changes the `watchExpression` can execute multiple times per + * {@link ng.$rootScope.Scope#$digest $digest()} and should be idempotent.) + * - The `listener` is called only when the value from the current `watchExpression` and the + * previous call to `watchExpression` are not equal (with the exception of the initial run, + * see below). The inequality is determined according to + * {@link angular.equals} function. To save the value of the object for later comparison, the + * {@link angular.copy} function is used. It also means that watching complex options will + * have adverse memory and performance implications. + * - The watch `listener` may change the model, which may trigger other `listener`s to fire. This + * is achieved by rerunning the watchers until no changes are detected. The rerun iteration + * limit is 10 to prevent an infinite loop deadlock. + * + * + * If you want to be notified whenever {@link ng.$rootScope.Scope#$digest $digest} is called, + * you can register a `watchExpression` function with no `listener`. (Since `watchExpression` + * can execute multiple times per {@link ng.$rootScope.Scope#$digest $digest} cycle when a change is + * detected, be prepared for multiple calls to your listener.) + * + * After a watcher is registered with the scope, the `listener` fn is called asynchronously + * (via {@link ng.$rootScope.Scope#$evalAsync $evalAsync}) to initialize the + * watcher. In rare cases, this is undesirable because the listener is called when the result + * of `watchExpression` didn't change. To detect this scenario within the `listener` fn, you + * can compare the `newVal` and `oldVal`. If these two values are identical (`===`) then the + * listener was called due to initialization. + * + * + * # Example + *
+           // let's assume that scope was dependency injected as the $rootScope
+           var scope = $rootScope;
+           scope.name = 'misko';
+           scope.counter = 0;
+
+           expect(scope.counter).toEqual(0);
+           scope.$watch('name', function(newValue, oldValue) { scope.counter = scope.counter + 1; });
+           expect(scope.counter).toEqual(0);
+
+           scope.$digest();
+           // no variable change
+           expect(scope.counter).toEqual(0);
+
+           scope.name = 'adam';
+           scope.$digest();
+           expect(scope.counter).toEqual(1);
+       * 
+ * + * + * + * @param {(function()|string)} watchExpression Expression that is evaluated on each + * {@link ng.$rootScope.Scope#$digest $digest} cycle. A change in the return value triggers a + * call to the `listener`. + * + * - `string`: Evaluated as {@link guide/expression expression} + * - `function(scope)`: called with current `scope` as a parameter. + * @param {(function()|string)=} listener Callback called whenever the return value of + * the `watchExpression` changes. + * + * - `string`: Evaluated as {@link guide/expression expression} + * - `function(newValue, oldValue, scope)`: called with current and previous values as parameters. + * + * @param {boolean=} objectEquality Compare object for equality rather than for reference. + * @returns {function()} Returns a deregistration function for this listener. + */ + $watch: function(watchExp, listener, objectEquality) { + var scope = this, + get = compileToFn(watchExp, 'watch'), + array = scope.$$watchers, + watcher = { + fn: listener, + last: initWatchVal, + get: get, + exp: watchExp, + eq: !!objectEquality + }; + + // in the case user pass string, we need to compile it, do we really need this ? + if (!isFunction(listener)) { + var listenFn = compileToFn(listener || noop, 'listener'); + watcher.fn = function(newVal, oldVal, scope) {listenFn(scope);}; + } + + if (!array) { + array = scope.$$watchers = []; + } + // we use unshift since we use a while loop in $digest for speed. + // the while loop reads in reverse order. + array.unshift(watcher); + + return function() { + arrayRemove(array, watcher); + }; + }, + + /** + * @ngdoc function + * @name ng.$rootScope.Scope#$digest + * @methodOf ng.$rootScope.Scope + * @function + * + * @description + * Processes all of the {@link ng.$rootScope.Scope#$watch watchers} of the current scope and its children. + * Because a {@link ng.$rootScope.Scope#$watch watcher}'s listener can change the model, the + * `$digest()` keeps calling the {@link ng.$rootScope.Scope#$watch watchers} until no more listeners are + * firing. This means that it is possible to get into an infinite loop. This function will throw + * `'Maximum iteration limit exceeded.'` if the number of iterations exceeds 10. + * + * Usually you don't call `$digest()` directly in + * {@link ng.directive:ngController controllers} or in + * {@link ng.$compileProvider#directive directives}. + * Instead a call to {@link ng.$rootScope.Scope#$apply $apply()} (typically from within a + * {@link ng.$compileProvider#directive directives}) will force a `$digest()`. + * + * If you want to be notified whenever `$digest()` is called, + * you can register a `watchExpression` function with {@link ng.$rootScope.Scope#$watch $watch()} + * with no `listener`. + * + * You may have a need to call `$digest()` from within unit-tests, to simulate the scope + * life-cycle. + * + * # Example + *
+           var scope = ...;
+           scope.name = 'misko';
+           scope.counter = 0;
+
+           expect(scope.counter).toEqual(0);
+           scope.$watch('name', function(newValue, oldValue) {
+             scope.counter = scope.counter + 1;
+           });
+           expect(scope.counter).toEqual(0);
+
+           scope.$digest();
+           // no variable change
+           expect(scope.counter).toEqual(0);
+
+           scope.name = 'adam';
+           scope.$digest();
+           expect(scope.counter).toEqual(1);
+       * 
+ * + */ + $digest: function() { + var watch, value, last, + watchers, + asyncQueue, + length, + dirty, ttl = TTL, + next, current, target = this, + watchLog = [], + logIdx, logMsg; + + beginPhase('$digest'); + + do { + dirty = false; + current = target; + do { + asyncQueue = current.$$asyncQueue; + while(asyncQueue.length) { + try { + current.$eval(asyncQueue.shift()); + } catch (e) { + $exceptionHandler(e); + } + } + if ((watchers = current.$$watchers)) { + // process our watches + length = watchers.length; + while (length--) { + try { + watch = watchers[length]; + // Most common watches are on primitives, in which case we can short + // circuit it with === operator, only when === fails do we use .equals + if ((value = watch.get(current)) !== (last = watch.last) && + !(watch.eq + ? equals(value, last) + : (typeof value == 'number' && typeof last == 'number' + && isNaN(value) && isNaN(last)))) { + dirty = true; + watch.last = watch.eq ? copy(value) : value; + watch.fn(value, ((last === initWatchVal) ? value : last), current); + if (ttl < 5) { + logIdx = 4 - ttl; + if (!watchLog[logIdx]) watchLog[logIdx] = []; + logMsg = (isFunction(watch.exp)) + ? 'fn: ' + (watch.exp.name || watch.exp.toString()) + : watch.exp; + logMsg += '; newVal: ' + toJson(value) + '; oldVal: ' + toJson(last); + watchLog[logIdx].push(logMsg); + } + } + } catch (e) { + $exceptionHandler(e); + } + } + } + + // Insanity Warning: scope depth-first traversal + // yes, this code is a bit crazy, but it works and we have tests to prove it! + // this piece should be kept in sync with the traversal in $broadcast + if (!(next = (current.$$childHead || (current !== target && current.$$nextSibling)))) { + while(current !== target && !(next = current.$$nextSibling)) { + current = current.$parent; + } + } + } while ((current = next)); + + if(dirty && !(ttl--)) { + clearPhase(); + throw Error(TTL + ' $digest() iterations reached. Aborting!\n' + + 'Watchers fired in the last 5 iterations: ' + toJson(watchLog)); + } + } while (dirty || asyncQueue.length); + + clearPhase(); + }, + + + /** + * @ngdoc event + * @name ng.$rootScope.Scope#$destroy + * @eventOf ng.$rootScope.Scope + * @eventType broadcast on scope being destroyed + * + * @description + * Broadcasted when a scope and its children are being destroyed. + */ + + /** + * @ngdoc function + * @name ng.$rootScope.Scope#$destroy + * @methodOf ng.$rootScope.Scope + * @function + * + * @description + * Removes the current scope (and all of its children) from the parent scope. Removal implies + * that calls to {@link ng.$rootScope.Scope#$digest $digest()} will no longer + * propagate to the current scope and its children. Removal also implies that the current + * scope is eligible for garbage collection. + * + * The `$destroy()` is usually used by directives such as + * {@link ng.directive:ngRepeat ngRepeat} for managing the + * unrolling of the loop. + * + * Just before a scope is destroyed a `$destroy` event is broadcasted on this scope. + * Application code can register a `$destroy` event handler that will give it chance to + * perform any necessary cleanup. + */ + $destroy: function() { + // we can't destroy the root scope or a scope that has been already destroyed + if ($rootScope == this || this.$$destroyed) return; + var parent = this.$parent; + + this.$broadcast('$destroy'); + this.$$destroyed = true; + + if (parent.$$childHead == this) parent.$$childHead = this.$$nextSibling; + if (parent.$$childTail == this) parent.$$childTail = this.$$prevSibling; + if (this.$$prevSibling) this.$$prevSibling.$$nextSibling = this.$$nextSibling; + if (this.$$nextSibling) this.$$nextSibling.$$prevSibling = this.$$prevSibling; + + // This is bogus code that works around Chrome's GC leak + // see: https://github.com/angular/angular.js/issues/1313#issuecomment-10378451 + this.$parent = this.$$nextSibling = this.$$prevSibling = this.$$childHead = + this.$$childTail = null; + }, + + /** + * @ngdoc function + * @name ng.$rootScope.Scope#$eval + * @methodOf ng.$rootScope.Scope + * @function + * + * @description + * Executes the `expression` on the current scope returning the result. Any exceptions in the + * expression are propagated (uncaught). This is useful when evaluating Angular expressions. + * + * # Example + *
+           var scope = ng.$rootScope.Scope();
+           scope.a = 1;
+           scope.b = 2;
+
+           expect(scope.$eval('a+b')).toEqual(3);
+           expect(scope.$eval(function(scope){ return scope.a + scope.b; })).toEqual(3);
+       * 
+ * + * @param {(string|function())=} expression An angular expression to be executed. + * + * - `string`: execute using the rules as defined in {@link guide/expression expression}. + * - `function(scope)`: execute the function with the current `scope` parameter. + * + * @returns {*} The result of evaluating the expression. + */ + $eval: function(expr, locals) { + return $parse(expr)(this, locals); + }, + + /** + * @ngdoc function + * @name ng.$rootScope.Scope#$evalAsync + * @methodOf ng.$rootScope.Scope + * @function + * + * @description + * Executes the expression on the current scope at a later point in time. + * + * The `$evalAsync` makes no guarantees as to when the `expression` will be executed, only that: + * + * - it will execute in the current script execution context (before any DOM rendering). + * - at least one {@link ng.$rootScope.Scope#$digest $digest cycle} will be performed after + * `expression` execution. + * + * Any exceptions from the execution of the expression are forwarded to the + * {@link ng.$exceptionHandler $exceptionHandler} service. + * + * @param {(string|function())=} expression An angular expression to be executed. + * + * - `string`: execute using the rules as defined in {@link guide/expression expression}. + * - `function(scope)`: execute the function with the current `scope` parameter. + * + */ + $evalAsync: function(expr) { + this.$$asyncQueue.push(expr); + }, + + /** + * @ngdoc function + * @name ng.$rootScope.Scope#$apply + * @methodOf ng.$rootScope.Scope + * @function + * + * @description + * `$apply()` is used to execute an expression in angular from outside of the angular framework. + * (For example from browser DOM events, setTimeout, XHR or third party libraries). + * Because we are calling into the angular framework we need to perform proper scope life-cycle + * of {@link ng.$exceptionHandler exception handling}, + * {@link ng.$rootScope.Scope#$digest executing watches}. + * + * ## Life cycle + * + * # Pseudo-Code of `$apply()` + *
+           function $apply(expr) {
+             try {
+               return $eval(expr);
+             } catch (e) {
+               $exceptionHandler(e);
+             } finally {
+               $root.$digest();
+             }
+           }
+       * 
+ * + * + * Scope's `$apply()` method transitions through the following stages: + * + * 1. The {@link guide/expression expression} is executed using the + * {@link ng.$rootScope.Scope#$eval $eval()} method. + * 2. Any exceptions from the execution of the expression are forwarded to the + * {@link ng.$exceptionHandler $exceptionHandler} service. + * 3. The {@link ng.$rootScope.Scope#$watch watch} listeners are fired immediately after the expression + * was executed using the {@link ng.$rootScope.Scope#$digest $digest()} method. + * + * + * @param {(string|function())=} exp An angular expression to be executed. + * + * - `string`: execute using the rules as defined in {@link guide/expression expression}. + * - `function(scope)`: execute the function with current `scope` parameter. + * + * @returns {*} The result of evaluating the expression. + */ + $apply: function(expr) { + try { + beginPhase('$apply'); + return this.$eval(expr); + } catch (e) { + $exceptionHandler(e); + } finally { + clearPhase(); + try { + $rootScope.$digest(); + } catch (e) { + $exceptionHandler(e); + throw e; + } + } + }, + + /** + * @ngdoc function + * @name ng.$rootScope.Scope#$on + * @methodOf ng.$rootScope.Scope + * @function + * + * @description + * Listens on events of a given type. See {@link ng.$rootScope.Scope#$emit $emit} for discussion of + * event life cycle. + * + * The event listener function format is: `function(event, args...)`. The `event` object + * passed into the listener has the following attributes: + * + * - `targetScope` - `{Scope}`: the scope on which the event was `$emit`-ed or `$broadcast`-ed. + * - `currentScope` - `{Scope}`: the current scope which is handling the event. + * - `name` - `{string}`: Name of the event. + * - `stopPropagation` - `{function=}`: calling `stopPropagation` function will cancel further event + * propagation (available only for events that were `$emit`-ed). + * - `preventDefault` - `{function}`: calling `preventDefault` sets `defaultPrevented` flag to true. + * - `defaultPrevented` - `{boolean}`: true if `preventDefault` was called. + * + * @param {string} name Event name to listen on. + * @param {function(event, args...)} listener Function to call when the event is emitted. + * @returns {function()} Returns a deregistration function for this listener. + */ + $on: function(name, listener) { + var namedListeners = this.$$listeners[name]; + if (!namedListeners) { + this.$$listeners[name] = namedListeners = []; + } + namedListeners.push(listener); + + return function() { + namedListeners[indexOf(namedListeners, listener)] = null; + }; + }, + + + /** + * @ngdoc function + * @name ng.$rootScope.Scope#$emit + * @methodOf ng.$rootScope.Scope + * @function + * + * @description + * Dispatches an event `name` upwards through the scope hierarchy notifying the + * registered {@link ng.$rootScope.Scope#$on} listeners. + * + * The event life cycle starts at the scope on which `$emit` was called. All + * {@link ng.$rootScope.Scope#$on listeners} listening for `name` event on this scope get notified. + * Afterwards, the event traverses upwards toward the root scope and calls all registered + * listeners along the way. The event will stop propagating if one of the listeners cancels it. + * + * Any exception emitted from the {@link ng.$rootScope.Scope#$on listeners} will be passed + * onto the {@link ng.$exceptionHandler $exceptionHandler} service. + * + * @param {string} name Event name to emit. + * @param {...*} args Optional set of arguments which will be passed onto the event listeners. + * @return {Object} Event object, see {@link ng.$rootScope.Scope#$on} + */ + $emit: function(name, args) { + var empty = [], + namedListeners, + scope = this, + stopPropagation = false, + event = { + name: name, + targetScope: scope, + stopPropagation: function() {stopPropagation = true;}, + preventDefault: function() { + event.defaultPrevented = true; + }, + defaultPrevented: false + }, + listenerArgs = concat([event], arguments, 1), + i, length; + + do { + namedListeners = scope.$$listeners[name] || empty; + event.currentScope = scope; + for (i=0, length=namedListeners.length; i 7), + hasEvent: function(event) { + // IE9 implements 'input' event it's so fubared that we rather pretend that it doesn't have + // it. In particular the event is not fired when backspace or delete key are pressed or + // when cut operation is performed. + if (event == 'input' && msie == 9) return false; + + if (isUndefined(eventSupport[event])) { + var divElm = $window.document.createElement('div'); + eventSupport[event] = 'on' + event in divElm; + } + + return eventSupport[event]; + }, + // TODO(i): currently there is no way to feature detect CSP without triggering alerts + csp: false + }; + }]; +} + +/** + * @ngdoc object + * @name ng.$window + * + * @description + * A reference to the browser's `window` object. While `window` + * is globally available in JavaScript, it causes testability problems, because + * it is a global variable. In angular we always refer to it through the + * `$window` service, so it may be overriden, removed or mocked for testing. + * + * All expressions are evaluated with respect to current scope so they don't + * suffer from window globality. + * + * @example + + + +
+ + +
+
+ + it('should display the greeting in the input box', function() { + input('greeting').enter('Hello, E2E Tests'); + // If we click the button it will block the test runner + // element(':button').click(); + }); + +
+ */ +function $WindowProvider(){ + this.$get = valueFn(window); +} + +/** + * Parse headers into key value object + * + * @param {string} headers Raw headers as a string + * @returns {Object} Parsed headers as key value object + */ +function parseHeaders(headers) { + var parsed = {}, key, val, i; + + if (!headers) return parsed; + + forEach(headers.split('\n'), function(line) { + i = line.indexOf(':'); + key = lowercase(trim(line.substr(0, i))); + val = trim(line.substr(i + 1)); + + if (key) { + if (parsed[key]) { + parsed[key] += ', ' + val; + } else { + parsed[key] = val; + } + } + }); + + return parsed; +} + + +/** + * Returns a function that provides access to parsed headers. + * + * Headers are lazy parsed when first requested. + * @see parseHeaders + * + * @param {(string|Object)} headers Headers to provide access to. + * @returns {function(string=)} Returns a getter function which if called with: + * + * - if called with single an argument returns a single header value or null + * - if called with no arguments returns an object containing all headers. + */ +function headersGetter(headers) { + var headersObj = isObject(headers) ? headers : undefined; + + return function(name) { + if (!headersObj) headersObj = parseHeaders(headers); + + if (name) { + return headersObj[lowercase(name)] || null; + } + + return headersObj; + }; +} + + +/** + * Chain all given functions + * + * This function is used for both request and response transforming + * + * @param {*} data Data to transform. + * @param {function(string=)} headers Http headers getter fn. + * @param {(function|Array.)} fns Function or an array of functions. + * @returns {*} Transformed data. + */ +function transformData(data, headers, fns) { + if (isFunction(fns)) + return fns(data, headers); + + forEach(fns, function(fn) { + data = fn(data, headers); + }); + + return data; +} + + +function isSuccess(status) { + return 200 <= status && status < 300; +} + + +function $HttpProvider() { + var JSON_START = /^\s*(\[|\{[^\{])/, + JSON_END = /[\}\]]\s*$/, + PROTECTION_PREFIX = /^\)\]\}',?\n/; + + var $config = this.defaults = { + // transform incoming response data + transformResponse: [function(data) { + if (isString(data)) { + // strip json vulnerability protection prefix + data = data.replace(PROTECTION_PREFIX, ''); + if (JSON_START.test(data) && JSON_END.test(data)) + data = fromJson(data, true); + } + return data; + }], + + // transform outgoing request data + transformRequest: [function(d) { + return isObject(d) && !isFile(d) ? toJson(d) : d; + }], + + // default headers + headers: { + common: { + 'Accept': 'application/json, text/plain, */*', + 'X-Requested-With': 'XMLHttpRequest' + }, + post: {'Content-Type': 'application/json;charset=utf-8'}, + put: {'Content-Type': 'application/json;charset=utf-8'} + } + }; + + var providerResponseInterceptors = this.responseInterceptors = []; + + this.$get = ['$httpBackend', '$browser', '$cacheFactory', '$rootScope', '$q', '$injector', + function($httpBackend, $browser, $cacheFactory, $rootScope, $q, $injector) { + + var defaultCache = $cacheFactory('$http'), + responseInterceptors = []; + + forEach(providerResponseInterceptors, function(interceptor) { + responseInterceptors.push( + isString(interceptor) + ? $injector.get(interceptor) + : $injector.invoke(interceptor) + ); + }); + + + /** + * @ngdoc function + * @name ng.$http + * @requires $httpBackend + * @requires $browser + * @requires $cacheFactory + * @requires $rootScope + * @requires $q + * @requires $injector + * + * @description + * The `$http` service is a core Angular service that facilitates communication with the remote + * HTTP servers via the browser's {@link https://developer.mozilla.org/en/xmlhttprequest + * XMLHttpRequest} object or via {@link http://en.wikipedia.org/wiki/JSONP JSONP}. + * + * For unit testing applications that use `$http` service, see + * {@link ngMock.$httpBackend $httpBackend mock}. + * + * For a higher level of abstraction, please check out the {@link ngResource.$resource + * $resource} service. + * + * The $http API is based on the {@link ng.$q deferred/promise APIs} exposed by + * the $q service. While for simple usage patterns this doesn't matter much, for advanced usage + * it is important to familiarize yourself with these APIs and the guarantees they provide. + * + * + * # General usage + * The `$http` service is a function which takes a single argument — a configuration object — + * that is used to generate an HTTP request and returns a {@link ng.$q promise} + * with two $http specific methods: `success` and `error`. + * + *
+     *   $http({method: 'GET', url: '/someUrl'}).
+     *     success(function(data, status, headers, config) {
+     *       // this callback will be called asynchronously
+     *       // when the response is available
+     *     }).
+     *     error(function(data, status, headers, config) {
+     *       // called asynchronously if an error occurs
+     *       // or server returns response with an error status.
+     *     });
+     * 
+ * + * Since the returned value of calling the $http function is a `promise`, you can also use + * the `then` method to register callbacks, and these callbacks will receive a single argument – + * an object representing the response. See the API signature and type info below for more + * details. + * + * A response status code between 200 and 299 is considered a success status and + * will result in the success callback being called. Note that if the response is a redirect, + * XMLHttpRequest will transparently follow it, meaning that the error callback will not be + * called for such responses. + * + * # Shortcut methods + * + * Since all invocations of the $http service require passing in an HTTP method and URL, and + * POST/PUT requests require request data to be provided as well, shortcut methods + * were created: + * + *
+     *   $http.get('/someUrl').success(successCallback);
+     *   $http.post('/someUrl', data).success(successCallback);
+     * 
+ * + * Complete list of shortcut methods: + * + * - {@link ng.$http#get $http.get} + * - {@link ng.$http#head $http.head} + * - {@link ng.$http#post $http.post} + * - {@link ng.$http#put $http.put} + * - {@link ng.$http#delete $http.delete} + * - {@link ng.$http#jsonp $http.jsonp} + * + * + * # Setting HTTP Headers + * + * The $http service will automatically add certain HTTP headers to all requests. These defaults + * can be fully configured by accessing the `$httpProvider.defaults.headers` configuration + * object, which currently contains this default configuration: + * + * - `$httpProvider.defaults.headers.common` (headers that are common for all requests): + * - `Accept: application/json, text/plain, * / *` + * - `X-Requested-With: XMLHttpRequest` + * - `$httpProvider.defaults.headers.post`: (header defaults for POST requests) + * - `Content-Type: application/json` + * - `$httpProvider.defaults.headers.put` (header defaults for PUT requests) + * - `Content-Type: application/json` + * + * To add or overwrite these defaults, simply add or remove a property from these configuration + * objects. To add headers for an HTTP method other than POST or PUT, simply add a new object + * with the lowercased HTTP method name as the key, e.g. + * `$httpProvider.defaults.headers.get['My-Header']='value'`. + * + * Additionally, the defaults can be set at runtime via the `$http.defaults` object in the same + * fashion. + * + * + * # Transforming Requests and Responses + * + * Both requests and responses can be transformed using transform functions. By default, Angular + * applies these transformations: + * + * Request transformations: + * + * - If the `data` property of the request configuration object contains an object, serialize it into + * JSON format. + * + * Response transformations: + * + * - If XSRF prefix is detected, strip it (see Security Considerations section below). + * - If JSON response is detected, deserialize it using a JSON parser. + * + * To globally augment or override the default transforms, modify the `$httpProvider.defaults.transformRequest` and + * `$httpProvider.defaults.transformResponse` properties. These properties are by default an + * array of transform functions, which allows you to `push` or `unshift` a new transformation function into the + * transformation chain. You can also decide to completely override any default transformations by assigning your + * transformation functions to these properties directly without the array wrapper. + * + * Similarly, to locally override the request/response transforms, augment the `transformRequest` and/or + * `transformResponse` properties of the configuration object passed into `$http`. + * + * + * # Caching + * + * To enable caching, set the configuration property `cache` to `true`. When the cache is + * enabled, `$http` stores the response from the server in local cache. Next time the + * response is served from the cache without sending a request to the server. + * + * Note that even if the response is served from cache, delivery of the data is asynchronous in + * the same way that real requests are. + * + * If there are multiple GET requests for the same URL that should be cached using the same + * cache, but the cache is not populated yet, only one request to the server will be made and + * the remaining requests will be fulfilled using the response from the first request. + * + * + * # Response interceptors + * + * Before you start creating interceptors, be sure to understand the + * {@link ng.$q $q and deferred/promise APIs}. + * + * For purposes of global error handling, authentication or any kind of synchronous or + * asynchronous preprocessing of received responses, it is desirable to be able to intercept + * responses for http requests before they are handed over to the application code that + * initiated these requests. The response interceptors leverage the {@link ng.$q + * promise apis} to fulfil this need for both synchronous and asynchronous preprocessing. + * + * The interceptors are service factories that are registered with the $httpProvider by + * adding them to the `$httpProvider.responseInterceptors` array. The factory is called and + * injected with dependencies (if specified) and returns the interceptor — a function that + * takes a {@link ng.$q promise} and returns the original or a new promise. + * + *
+     *   // register the interceptor as a service
+     *   $provide.factory('myHttpInterceptor', function($q, dependency1, dependency2) {
+     *     return function(promise) {
+     *       return promise.then(function(response) {
+     *         // do something on success
+     *       }, function(response) {
+     *         // do something on error
+     *         if (canRecover(response)) {
+     *           return responseOrNewPromise
+     *         }
+     *         return $q.reject(response);
+     *       });
+     *     }
+     *   });
+     *
+     *   $httpProvider.responseInterceptors.push('myHttpInterceptor');
+     *
+     *
+     *   // register the interceptor via an anonymous factory
+     *   $httpProvider.responseInterceptors.push(function($q, dependency1, dependency2) {
+     *     return function(promise) {
+     *       // same as above
+     *     }
+     *   });
+     * 
+ * + * + * # Security Considerations + * + * When designing web applications, consider security threats from: + * + * - {@link http://haacked.com/archive/2008/11/20/anatomy-of-a-subtle-json-vulnerability.aspx + * JSON vulnerability} + * - {@link http://en.wikipedia.org/wiki/Cross-site_request_forgery XSRF} + * + * Both server and the client must cooperate in order to eliminate these threats. Angular comes + * pre-configured with strategies that address these issues, but for this to work backend server + * cooperation is required. + * + * ## JSON Vulnerability Protection + * + * A {@link http://haacked.com/archive/2008/11/20/anatomy-of-a-subtle-json-vulnerability.aspx + * JSON vulnerability} allows third party website to turn your JSON resource URL into + * {@link http://en.wikipedia.org/wiki/JSONP JSONP} request under some conditions. To + * counter this your server can prefix all JSON requests with following string `")]}',\n"`. + * Angular will automatically strip the prefix before processing it as JSON. + * + * For example if your server needs to return: + *
+     * ['one','two']
+     * 
+ * + * which is vulnerable to attack, your server can return: + *
+     * )]}',
+     * ['one','two']
+     * 
+ * + * Angular will strip the prefix, before processing the JSON. + * + * + * ## Cross Site Request Forgery (XSRF) Protection + * + * {@link http://en.wikipedia.org/wiki/Cross-site_request_forgery XSRF} is a technique by which + * an unauthorized site can gain your user's private data. Angular provides a mechanism + * to counter XSRF. When performing XHR requests, the $http service reads a token from a cookie + * called `XSRF-TOKEN` and sets it as the HTTP header `X-XSRF-TOKEN`. Since only JavaScript that + * runs on your domain could read the cookie, your server can be assured that the XHR came from + * JavaScript running on your domain. + * + * To take advantage of this, your server needs to set a token in a JavaScript readable session + * cookie called `XSRF-TOKEN` on the first HTTP GET request. On subsequent XHR requests the + * server can verify that the cookie matches `X-XSRF-TOKEN` HTTP header, and therefore be sure + * that only JavaScript running on your domain could have sent the request. The token must be + * unique for each user and must be verifiable by the server (to prevent the JavaScript from making + * up its own tokens). We recommend that the token is a digest of your site's authentication + * cookie with a {@link https://en.wikipedia.org/wiki/Salt_(cryptography) salt} for added security. + * + * + * @param {object} config Object describing the request to be made and how it should be + * processed. The object has following properties: + * + * - **method** – `{string}` – HTTP method (e.g. 'GET', 'POST', etc) + * - **url** – `{string}` – Absolute or relative URL of the resource that is being requested. + * - **params** – `{Object.}` – Map of strings or objects which will be turned to + * `?key1=value1&key2=value2` after the url. If the value is not a string, it will be JSONified. + * - **data** – `{string|Object}` – Data to be sent as the request message data. + * - **headers** – `{Object}` – Map of strings representing HTTP headers to send to the server. + * - **transformRequest** – `{function(data, headersGetter)|Array.}` – + * transform function or an array of such functions. The transform function takes the http + * request body and headers and returns its transformed (typically serialized) version. + * - **transformResponse** – `{function(data, headersGetter)|Array.}` – + * transform function or an array of such functions. The transform function takes the http + * response body and headers and returns its transformed (typically deserialized) version. + * - **cache** – `{boolean|Cache}` – If true, a default $http cache will be used to cache the + * GET request, otherwise if a cache instance built with + * {@link ng.$cacheFactory $cacheFactory}, this cache will be used for + * caching. + * - **timeout** – `{number}` – timeout in milliseconds. + * - **withCredentials** - `{boolean}` - whether to to set the `withCredentials` flag on the + * XHR object. See {@link https://developer.mozilla.org/en/http_access_control#section_5 + * requests with credentials} for more information. + * + * @returns {HttpPromise} Returns a {@link ng.$q promise} object with the + * standard `then` method and two http specific methods: `success` and `error`. The `then` + * method takes two arguments a success and an error callback which will be called with a + * response object. The `success` and `error` methods take a single argument - a function that + * will be called when the request succeeds or fails respectively. The arguments passed into + * these functions are destructured representation of the response object passed into the + * `then` method. The response object has these properties: + * + * - **data** – `{string|Object}` – The response body transformed with the transform functions. + * - **status** – `{number}` – HTTP status code of the response. + * - **headers** – `{function([headerName])}` – Header getter function. + * - **config** – `{Object}` – The configuration object that was used to generate the request. + * + * @property {Array.} pendingRequests Array of config objects for currently pending + * requests. This is primarily meant to be used for debugging purposes. + * + * + * @example + + +
+ + +
+ + + +
http status code: {{status}}
+
http response data: {{data}}
+
+
+ + function FetchCtrl($scope, $http, $templateCache) { + $scope.method = 'GET'; + $scope.url = 'http-hello.html'; + + $scope.fetch = function() { + $scope.code = null; + $scope.response = null; + + $http({method: $scope.method, url: $scope.url, cache: $templateCache}). + success(function(data, status) { + $scope.status = status; + $scope.data = data; + }). + error(function(data, status) { + $scope.data = data || "Request failed"; + $scope.status = status; + }); + }; + + $scope.updateModel = function(method, url) { + $scope.method = method; + $scope.url = url; + }; + } + + + Hello, $http! + + + it('should make an xhr GET request', function() { + element(':button:contains("Sample GET")').click(); + element(':button:contains("fetch")').click(); + expect(binding('status')).toBe('200'); + expect(binding('data')).toMatch(/Hello, \$http!/); + }); + + it('should make a JSONP request to angularjs.org', function() { + element(':button:contains("Sample JSONP")').click(); + element(':button:contains("fetch")').click(); + expect(binding('status')).toBe('200'); + expect(binding('data')).toMatch(/Super Hero!/); + }); + + it('should make JSONP request to invalid URL and invoke the error handler', + function() { + element(':button:contains("Invalid JSONP")').click(); + element(':button:contains("fetch")').click(); + expect(binding('status')).toBe('0'); + expect(binding('data')).toBe('Request failed'); + }); + +
+ */ + function $http(config) { + config.method = uppercase(config.method); + + var reqTransformFn = config.transformRequest || $config.transformRequest, + respTransformFn = config.transformResponse || $config.transformResponse, + defHeaders = $config.headers, + reqHeaders = extend({'X-XSRF-TOKEN': $browser.cookies()['XSRF-TOKEN']}, + defHeaders.common, defHeaders[lowercase(config.method)], config.headers), + reqData = transformData(config.data, headersGetter(reqHeaders), reqTransformFn), + promise; + + // strip content-type if data is undefined + if (isUndefined(config.data)) { + delete reqHeaders['Content-Type']; + } + + // send request + promise = sendReq(config, reqData, reqHeaders); + + + // transform future response + promise = promise.then(transformResponse, transformResponse); + + // apply interceptors + forEach(responseInterceptors, function(interceptor) { + promise = interceptor(promise); + }); + + promise.success = function(fn) { + promise.then(function(response) { + fn(response.data, response.status, response.headers, config); + }); + return promise; + }; + + promise.error = function(fn) { + promise.then(null, function(response) { + fn(response.data, response.status, response.headers, config); + }); + return promise; + }; + + return promise; + + function transformResponse(response) { + // make a copy since the response must be cacheable + var resp = extend({}, response, { + data: transformData(response.data, response.headers, respTransformFn) + }); + return (isSuccess(response.status)) + ? resp + : $q.reject(resp); + } + } + + $http.pendingRequests = []; + + /** + * @ngdoc method + * @name ng.$http#get + * @methodOf ng.$http + * + * @description + * Shortcut method to perform `GET` request. + * + * @param {string} url Relative or absolute URL specifying the destination of the request + * @param {Object=} config Optional configuration object + * @returns {HttpPromise} Future object + */ + + /** + * @ngdoc method + * @name ng.$http#delete + * @methodOf ng.$http + * + * @description + * Shortcut method to perform `DELETE` request. + * + * @param {string} url Relative or absolute URL specifying the destination of the request + * @param {Object=} config Optional configuration object + * @returns {HttpPromise} Future object + */ + + /** + * @ngdoc method + * @name ng.$http#head + * @methodOf ng.$http + * + * @description + * Shortcut method to perform `HEAD` request. + * + * @param {string} url Relative or absolute URL specifying the destination of the request + * @param {Object=} config Optional configuration object + * @returns {HttpPromise} Future object + */ + + /** + * @ngdoc method + * @name ng.$http#jsonp + * @methodOf ng.$http + * + * @description + * Shortcut method to perform `JSONP` request. + * + * @param {string} url Relative or absolute URL specifying the destination of the request. + * Should contain `JSON_CALLBACK` string. + * @param {Object=} config Optional configuration object + * @returns {HttpPromise} Future object + */ + createShortMethods('get', 'delete', 'head', 'jsonp'); + + /** + * @ngdoc method + * @name ng.$http#post + * @methodOf ng.$http + * + * @description + * Shortcut method to perform `POST` request. + * + * @param {string} url Relative or absolute URL specifying the destination of the request + * @param {*} data Request content + * @param {Object=} config Optional configuration object + * @returns {HttpPromise} Future object + */ + + /** + * @ngdoc method + * @name ng.$http#put + * @methodOf ng.$http + * + * @description + * Shortcut method to perform `PUT` request. + * + * @param {string} url Relative or absolute URL specifying the destination of the request + * @param {*} data Request content + * @param {Object=} config Optional configuration object + * @returns {HttpPromise} Future object + */ + createShortMethodsWithData('post', 'put'); + + /** + * @ngdoc property + * @name ng.$http#defaults + * @propertyOf ng.$http + * + * @description + * Runtime equivalent of the `$httpProvider.defaults` property. Allows configuration of + * default headers as well as request and response transformations. + * + * See "Setting HTTP Headers" and "Transforming Requests and Responses" sections above. + */ + $http.defaults = $config; + + + return $http; + + + function createShortMethods(names) { + forEach(arguments, function(name) { + $http[name] = function(url, config) { + return $http(extend(config || {}, { + method: name, + url: url + })); + }; + }); + } + + + function createShortMethodsWithData(name) { + forEach(arguments, function(name) { + $http[name] = function(url, data, config) { + return $http(extend(config || {}, { + method: name, + url: url, + data: data + })); + }; + }); + } + + + /** + * Makes the request. + * + * !!! ACCESSES CLOSURE VARS: + * $httpBackend, $config, $log, $rootScope, defaultCache, $http.pendingRequests + */ + function sendReq(config, reqData, reqHeaders) { + var deferred = $q.defer(), + promise = deferred.promise, + cache, + cachedResp, + url = buildUrl(config.url, config.params); + + $http.pendingRequests.push(config); + promise.then(removePendingReq, removePendingReq); + + + if (config.cache && config.method == 'GET') { + cache = isObject(config.cache) ? config.cache : defaultCache; + } + + if (cache) { + cachedResp = cache.get(url); + if (cachedResp) { + if (cachedResp.then) { + // cached request has already been sent, but there is no response yet + cachedResp.then(removePendingReq, removePendingReq); + return cachedResp; + } else { + // serving from cache + if (isArray(cachedResp)) { + resolvePromise(cachedResp[1], cachedResp[0], copy(cachedResp[2])); + } else { + resolvePromise(cachedResp, 200, {}); + } + } + } else { + // put the promise for the non-transformed response into cache as a placeholder + cache.put(url, promise); + } + } + + // if we won't have the response in cache, send the request to the backend + if (!cachedResp) { + $httpBackend(config.method, url, reqData, done, reqHeaders, config.timeout, + config.withCredentials); + } + + return promise; + + + /** + * Callback registered to $httpBackend(): + * - caches the response if desired + * - resolves the raw $http promise + * - calls $apply + */ + function done(status, response, headersString) { + if (cache) { + if (isSuccess(status)) { + cache.put(url, [status, response, parseHeaders(headersString)]); + } else { + // remove promise from the cache + cache.remove(url); + } + } + + resolvePromise(response, status, headersString); + $rootScope.$apply(); + } + + + /** + * Resolves the raw $http promise. + */ + function resolvePromise(response, status, headers) { + // normalize internal statuses to 0 + status = Math.max(status, 0); + + (isSuccess(status) ? deferred.resolve : deferred.reject)({ + data: response, + status: status, + headers: headersGetter(headers), + config: config + }); + } + + + function removePendingReq() { + var idx = indexOf($http.pendingRequests, config); + if (idx !== -1) $http.pendingRequests.splice(idx, 1); + } + } + + + function buildUrl(url, params) { + if (!params) return url; + var parts = []; + forEachSorted(params, function(value, key) { + if (value == null || value == undefined) return; + if (isObject(value)) { + value = toJson(value); + } + parts.push(encodeURIComponent(key) + '=' + encodeURIComponent(value)); + }); + return url + ((url.indexOf('?') == -1) ? '?' : '&') + parts.join('&'); + } + + + }]; +} + +var XHR = window.XMLHttpRequest || function() { + try { return new ActiveXObject("Msxml2.XMLHTTP.6.0"); } catch (e1) {} + try { return new ActiveXObject("Msxml2.XMLHTTP.3.0"); } catch (e2) {} + try { return new ActiveXObject("Msxml2.XMLHTTP"); } catch (e3) {} + throw new Error("This browser does not support XMLHttpRequest."); +}; + + +/** + * @ngdoc object + * @name ng.$httpBackend + * @requires $browser + * @requires $window + * @requires $document + * + * @description + * HTTP backend used by the {@link ng.$http service} that delegates to + * XMLHttpRequest object or JSONP and deals with browser incompatibilities. + * + * You should never need to use this service directly, instead use the higher-level abstractions: + * {@link ng.$http $http} or {@link ngResource.$resource $resource}. + * + * During testing this implementation is swapped with {@link ngMock.$httpBackend mock + * $httpBackend} which can be trained with responses. + */ +function $HttpBackendProvider() { + this.$get = ['$browser', '$window', '$document', function($browser, $window, $document) { + return createHttpBackend($browser, XHR, $browser.defer, $window.angular.callbacks, + $document[0], $window.location.protocol.replace(':', '')); + }]; +} + +function createHttpBackend($browser, XHR, $browserDefer, callbacks, rawDocument, locationProtocol) { + // TODO(vojta): fix the signature + return function(method, url, post, callback, headers, timeout, withCredentials) { + $browser.$$incOutstandingRequestCount(); + url = url || $browser.url(); + + if (lowercase(method) == 'jsonp') { + var callbackId = '_' + (callbacks.counter++).toString(36); + callbacks[callbackId] = function(data) { + callbacks[callbackId].data = data; + }; + + jsonpReq(url.replace('JSON_CALLBACK', 'angular.callbacks.' + callbackId), + function() { + if (callbacks[callbackId].data) { + completeRequest(callback, 200, callbacks[callbackId].data); + } else { + completeRequest(callback, -2); + } + delete callbacks[callbackId]; + }); + } else { + var xhr = new XHR(); + xhr.open(method, url, true); + forEach(headers, function(value, key) { + if (value) xhr.setRequestHeader(key, value); + }); + + var status; + + // In IE6 and 7, this might be called synchronously when xhr.send below is called and the + // response is in the cache. the promise api will ensure that to the app code the api is + // always async + xhr.onreadystatechange = function() { + if (xhr.readyState == 4) { + var responseHeaders = xhr.getAllResponseHeaders(); + + // TODO(vojta): remove once Firefox 21 gets released. + // begin: workaround to overcome Firefox CORS http response headers bug + // https://bugzilla.mozilla.org/show_bug.cgi?id=608735 + // Firefox already patched in nightly. Should land in Firefox 21. + + // CORS "simple response headers" http://www.w3.org/TR/cors/ + var value, + simpleHeaders = ["Cache-Control", "Content-Language", "Content-Type", + "Expires", "Last-Modified", "Pragma"]; + if (!responseHeaders) { + responseHeaders = ""; + forEach(simpleHeaders, function (header) { + var value = xhr.getResponseHeader(header); + if (value) { + responseHeaders += header + ": " + value + "\n"; + } + }); + } + // end of the workaround. + + completeRequest(callback, status || xhr.status, xhr.responseText, + responseHeaders); + } + }; + + if (withCredentials) { + xhr.withCredentials = true; + } + + xhr.send(post || ''); + + if (timeout > 0) { + $browserDefer(function() { + status = -1; + xhr.abort(); + }, timeout); + } + } + + + function completeRequest(callback, status, response, headersString) { + // URL_MATCH is defined in src/service/location.js + var protocol = (url.match(URL_MATCH) || ['', locationProtocol])[1]; + + // fix status code for file protocol (it's always 0) + status = (protocol == 'file') ? (response ? 200 : 404) : status; + + // normalize IE bug (http://bugs.jquery.com/ticket/1450) + status = status == 1223 ? 204 : status; + + callback(status, response, headersString); + $browser.$$completeOutstandingRequest(noop); + } + }; + + function jsonpReq(url, done) { + // we can't use jQuery/jqLite here because jQuery does crazy shit with script elements, e.g.: + // - fetches local scripts via XHR and evals them + // - adds and immediately removes script elements from the document + var script = rawDocument.createElement('script'), + doneWrapper = function() { + rawDocument.body.removeChild(script); + if (done) done(); + }; + + script.type = 'text/javascript'; + script.src = url; + + if (msie) { + script.onreadystatechange = function() { + if (/loaded|complete/.test(script.readyState)) doneWrapper(); + }; + } else { + script.onload = script.onerror = doneWrapper; + } + + rawDocument.body.appendChild(script); + } +} + +/** + * @ngdoc object + * @name ng.$locale + * + * @description + * $locale service provides localization rules for various Angular components. As of right now the + * only public api is: + * + * * `id` – `{string}` – locale id formatted as `languageId-countryId` (e.g. `en-us`) + */ +function $LocaleProvider(){ + this.$get = function() { + return { + id: 'en-us', + + NUMBER_FORMATS: { + DECIMAL_SEP: '.', + GROUP_SEP: ',', + PATTERNS: [ + { // Decimal Pattern + minInt: 1, + minFrac: 0, + maxFrac: 3, + posPre: '', + posSuf: '', + negPre: '-', + negSuf: '', + gSize: 3, + lgSize: 3 + },{ //Currency Pattern + minInt: 1, + minFrac: 2, + maxFrac: 2, + posPre: '\u00A4', + posSuf: '', + negPre: '(\u00A4', + negSuf: ')', + gSize: 3, + lgSize: 3 + } + ], + CURRENCY_SYM: '$' + }, + + DATETIME_FORMATS: { + MONTH: 'January,February,March,April,May,June,July,August,September,October,November,December' + .split(','), + SHORTMONTH: 'Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec'.split(','), + DAY: 'Sunday,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday'.split(','), + SHORTDAY: 'Sun,Mon,Tue,Wed,Thu,Fri,Sat'.split(','), + AMPMS: ['AM','PM'], + medium: 'MMM d, y h:mm:ss a', + short: 'M/d/yy h:mm a', + fullDate: 'EEEE, MMMM d, y', + longDate: 'MMMM d, y', + mediumDate: 'MMM d, y', + shortDate: 'M/d/yy', + mediumTime: 'h:mm:ss a', + shortTime: 'h:mm a' + }, + + pluralCat: function(num) { + if (num === 1) { + return 'one'; + } + return 'other'; + } + }; + }; +} + +function $TimeoutProvider() { + this.$get = ['$rootScope', '$browser', '$q', '$exceptionHandler', + function($rootScope, $browser, $q, $exceptionHandler) { + var deferreds = {}; + + + /** + * @ngdoc function + * @name ng.$timeout + * @requires $browser + * + * @description + * Angular's wrapper for `window.setTimeout`. The `fn` function is wrapped into a try/catch + * block and delegates any exceptions to + * {@link ng.$exceptionHandler $exceptionHandler} service. + * + * The return value of registering a timeout function is a promise, which will be resolved when + * the timeout is reached and the timeout function is executed. + * + * To cancel a timeout request, call `$timeout.cancel(promise)`. + * + * In tests you can use {@link ngMock.$timeout `$timeout.flush()`} to + * synchronously flush the queue of deferred functions. + * + * @param {function()} fn A function, whose execution should be delayed. + * @param {number=} [delay=0] Delay in milliseconds. + * @param {boolean=} [invokeApply=true] If set to `false` skips model dirty checking, otherwise + * will invoke `fn` within the {@link ng.$rootScope.Scope#$apply $apply} block. + * @returns {Promise} Promise that will be resolved when the timeout is reached. The value this + * promise will be resolved with is the return value of the `fn` function. + */ + function timeout(fn, delay, invokeApply) { + var deferred = $q.defer(), + promise = deferred.promise, + skipApply = (isDefined(invokeApply) && !invokeApply), + timeoutId, cleanup; + + timeoutId = $browser.defer(function() { + try { + deferred.resolve(fn()); + } catch(e) { + deferred.reject(e); + $exceptionHandler(e); + } + + if (!skipApply) $rootScope.$apply(); + }, delay); + + cleanup = function() { + delete deferreds[promise.$$timeoutId]; + }; + + promise.$$timeoutId = timeoutId; + deferreds[timeoutId] = deferred; + promise.then(cleanup, cleanup); + + return promise; + } + + + /** + * @ngdoc function + * @name ng.$timeout#cancel + * @methodOf ng.$timeout + * + * @description + * Cancels a task associated with the `promise`. As a result of this, the promise will be + * resolved with a rejection. + * + * @param {Promise=} promise Promise returned by the `$timeout` function. + * @returns {boolean} Returns `true` if the task hasn't executed yet and was successfully + * canceled. + */ + timeout.cancel = function(promise) { + if (promise && promise.$$timeoutId in deferreds) { + deferreds[promise.$$timeoutId].reject('canceled'); + return $browser.defer.cancel(promise.$$timeoutId); + } + return false; + }; + + return timeout; + }]; +} + +/** + * @ngdoc object + * @name ng.$filterProvider + * @description + * + * Filters are just functions which transform input to an output. However filters need to be Dependency Injected. To + * achieve this a filter definition consists of a factory function which is annotated with dependencies and is + * responsible for creating a filter function. + * + *
+ *   // Filter registration
+ *   function MyModule($provide, $filterProvider) {
+ *     // create a service to demonstrate injection (not always needed)
+ *     $provide.value('greet', function(name){
+ *       return 'Hello ' + name + '!';
+ *     });
+ *
+ *     // register a filter factory which uses the
+ *     // greet service to demonstrate DI.
+ *     $filterProvider.register('greet', function(greet){
+ *       // return the filter function which uses the greet service
+ *       // to generate salutation
+ *       return function(text) {
+ *         // filters need to be forgiving so check input validity
+ *         return text && greet(text) || text;
+ *       };
+ *     });
+ *   }
+ * 
+ * + * The filter function is registered with the `$injector` under the filter name suffixe with `Filter`. + *
+ *   it('should be the same instance', inject(
+ *     function($filterProvider) {
+ *       $filterProvider.register('reverse', function(){
+ *         return ...;
+ *       });
+ *     },
+ *     function($filter, reverseFilter) {
+ *       expect($filter('reverse')).toBe(reverseFilter);
+ *     });
+ * 
+ * + * + * For more information about how angular filters work, and how to create your own filters, see + * {@link guide/dev_guide.templates.filters Understanding Angular Filters} in the angular Developer + * Guide. + */ +/** + * @ngdoc method + * @name ng.$filterProvider#register + * @methodOf ng.$filterProvider + * @description + * Register filter factory function. + * + * @param {String} name Name of the filter. + * @param {function} fn The filter factory function which is injectable. + */ + + +/** + * @ngdoc function + * @name ng.$filter + * @function + * @description + * Filters are used for formatting data displayed to the user. + * + * The general syntax in templates is as follows: + * + * {{ expression [| filter_name[:parameter_value] ... ] }} + * + * @param {String} name Name of the filter function to retrieve + * @return {Function} the filter function + */ +$FilterProvider.$inject = ['$provide']; +function $FilterProvider($provide) { + var suffix = 'Filter'; + + function register(name, factory) { + return $provide.factory(name + suffix, factory); + } + this.register = register; + + this.$get = ['$injector', function($injector) { + return function(name) { + return $injector.get(name + suffix); + } + }]; + + //////////////////////////////////////// + + register('currency', currencyFilter); + register('date', dateFilter); + register('filter', filterFilter); + register('json', jsonFilter); + register('limitTo', limitToFilter); + register('lowercase', lowercaseFilter); + register('number', numberFilter); + register('orderBy', orderByFilter); + register('uppercase', uppercaseFilter); +} + +/** + * @ngdoc filter + * @name ng.filter:filter + * @function + * + * @description + * Selects a subset of items from `array` and returns it as a new array. + * + * Note: This function is used to augment the `Array` type in Angular expressions. See + * {@link ng.$filter} for more information about Angular arrays. + * + * @param {Array} array The source array. + * @param {string|Object|function()} expression The predicate to be used for selecting items from + * `array`. + * + * Can be one of: + * + * - `string`: Predicate that results in a substring match using the value of `expression` + * string. All strings or objects with string properties in `array` that contain this string + * will be returned. The predicate can be negated by prefixing the string with `!`. + * + * - `Object`: A pattern object can be used to filter specific properties on objects contained + * by `array`. For example `{name:"M", phone:"1"}` predicate will return an array of items + * which have property `name` containing "M" and property `phone` containing "1". A special + * property name `$` can be used (as in `{$:"text"}`) to accept a match against any + * property of the object. That's equivalent to the simple substring match with a `string` + * as described above. + * + * - `function`: A predicate function can be used to write arbitrary filters. The function is + * called for each element of `array`. The final result is an array of those elements that + * the predicate returned true for. + * + * @example + + +
+ + Search: + + + + + + +
NamePhone
{{friend.name}}{{friend.phone}}
+
+ Any:
+ Name only
+ Phone only
+ + + + + + +
NamePhone
{{friend.name}}{{friend.phone}}
+
+ + it('should search across all fields when filtering with a string', function() { + input('searchText').enter('m'); + expect(repeater('#searchTextResults tr', 'friend in friends').column('friend.name')). + toEqual(['Mary', 'Mike', 'Adam']); + + input('searchText').enter('76'); + expect(repeater('#searchTextResults tr', 'friend in friends').column('friend.name')). + toEqual(['John', 'Julie']); + }); + + it('should search in specific fields when filtering with a predicate object', function() { + input('search.$').enter('i'); + expect(repeater('#searchObjResults tr', 'friend in friends').column('friend.name')). + toEqual(['Mary', 'Mike', 'Julie']); + }); + +
+ */ +function filterFilter() { + return function(array, expression) { + if (!isArray(array)) return array; + var predicates = []; + predicates.check = function(value) { + for (var j = 0; j < predicates.length; j++) { + if(!predicates[j](value)) { + return false; + } + } + return true; + }; + var search = function(obj, text){ + if (text.charAt(0) === '!') { + return !search(obj, text.substr(1)); + } + switch (typeof obj) { + case "boolean": + case "number": + case "string": + return ('' + obj).toLowerCase().indexOf(text) > -1; + case "object": + for ( var objKey in obj) { + if (objKey.charAt(0) !== '$' && search(obj[objKey], text)) { + return true; + } + } + return false; + case "array": + for ( var i = 0; i < obj.length; i++) { + if (search(obj[i], text)) { + return true; + } + } + return false; + default: + return false; + } + }; + switch (typeof expression) { + case "boolean": + case "number": + case "string": + expression = {$:expression}; + case "object": + for (var key in expression) { + if (key == '$') { + (function() { + var text = (''+expression[key]).toLowerCase(); + if (!text) return; + predicates.push(function(value) { + return search(value, text); + }); + })(); + } else { + (function() { + var path = key; + var text = (''+expression[key]).toLowerCase(); + if (!text) return; + predicates.push(function(value) { + return search(getter(value, path), text); + }); + })(); + } + } + break; + case 'function': + predicates.push(expression); + break; + default: + return array; + } + var filtered = []; + for ( var j = 0; j < array.length; j++) { + var value = array[j]; + if (predicates.check(value)) { + filtered.push(value); + } + } + return filtered; + } +} + +/** + * @ngdoc filter + * @name ng.filter:currency + * @function + * + * @description + * Formats a number as a currency (ie $1,234.56). When no currency symbol is provided, default + * symbol for current locale is used. + * + * @param {number} amount Input to filter. + * @param {string=} symbol Currency symbol or identifier to be displayed. + * @returns {string} Formatted number. + * + * + * @example + + + +
+
+ default currency symbol ($): {{amount | currency}}
+ custom currency identifier (USD$): {{amount | currency:"USD$"}} +
+
+ + it('should init with 1234.56', function() { + expect(binding('amount | currency')).toBe('$1,234.56'); + expect(binding('amount | currency:"USD$"')).toBe('USD$1,234.56'); + }); + it('should update', function() { + input('amount').enter('-1234'); + expect(binding('amount | currency')).toBe('($1,234.00)'); + expect(binding('amount | currency:"USD$"')).toBe('(USD$1,234.00)'); + }); + +
+ */ +currencyFilter.$inject = ['$locale']; +function currencyFilter($locale) { + var formats = $locale.NUMBER_FORMATS; + return function(amount, currencySymbol){ + if (isUndefined(currencySymbol)) currencySymbol = formats.CURRENCY_SYM; + return formatNumber(amount, formats.PATTERNS[1], formats.GROUP_SEP, formats.DECIMAL_SEP, 2). + replace(/\u00A4/g, currencySymbol); + }; +} + +/** + * @ngdoc filter + * @name ng.filter:number + * @function + * + * @description + * Formats a number as text. + * + * If the input is not a number an empty string is returned. + * + * @param {number|string} number Number to format. + * @param {(number|string)=} [fractionSize=2] Number of decimal places to round the number to. + * @returns {string} Number rounded to decimalPlaces and places a “,” after each third digit. + * + * @example + + + +
+ Enter number:
+ Default formatting: {{val | number}}
+ No fractions: {{val | number:0}}
+ Negative number: {{-val | number:4}} +
+
+ + it('should format numbers', function() { + expect(binding('val | number')).toBe('1,234.568'); + expect(binding('val | number:0')).toBe('1,235'); + expect(binding('-val | number:4')).toBe('-1,234.5679'); + }); + + it('should update', function() { + input('val').enter('3374.333'); + expect(binding('val | number')).toBe('3,374.333'); + expect(binding('val | number:0')).toBe('3,374'); + expect(binding('-val | number:4')).toBe('-3,374.3330'); + }); + +
+ */ + + +numberFilter.$inject = ['$locale']; +function numberFilter($locale) { + var formats = $locale.NUMBER_FORMATS; + return function(number, fractionSize) { + return formatNumber(number, formats.PATTERNS[0], formats.GROUP_SEP, formats.DECIMAL_SEP, + fractionSize); + }; +} + +var DECIMAL_SEP = '.'; +function formatNumber(number, pattern, groupSep, decimalSep, fractionSize) { + if (isNaN(number) || !isFinite(number)) return ''; + + var isNegative = number < 0; + number = Math.abs(number); + var numStr = number + '', + formatedText = '', + parts = []; + + var hasExponent = false; + if (numStr.indexOf('e') !== -1) { + var match = numStr.match(/([\d\.]+)e(-?)(\d+)/); + if (match && match[2] == '-' && match[3] > fractionSize + 1) { + numStr = '0'; + } else { + formatedText = numStr; + hasExponent = true; + } + } + + if (!hasExponent) { + var fractionLen = (numStr.split(DECIMAL_SEP)[1] || '').length; + + // determine fractionSize if it is not specified + if (isUndefined(fractionSize)) { + fractionSize = Math.min(Math.max(pattern.minFrac, fractionLen), pattern.maxFrac); + } + + var pow = Math.pow(10, fractionSize); + number = Math.round(number * pow) / pow; + var fraction = ('' + number).split(DECIMAL_SEP); + var whole = fraction[0]; + fraction = fraction[1] || ''; + + var pos = 0, + lgroup = pattern.lgSize, + group = pattern.gSize; + + if (whole.length >= (lgroup + group)) { + pos = whole.length - lgroup; + for (var i = 0; i < pos; i++) { + if ((pos - i)%group === 0 && i !== 0) { + formatedText += groupSep; + } + formatedText += whole.charAt(i); + } + } + + for (i = pos; i < whole.length; i++) { + if ((whole.length - i)%lgroup === 0 && i !== 0) { + formatedText += groupSep; + } + formatedText += whole.charAt(i); + } + + // format fraction part. + while(fraction.length < fractionSize) { + fraction += '0'; + } + + if (fractionSize && fractionSize !== "0") formatedText += decimalSep + fraction.substr(0, fractionSize); + } + + parts.push(isNegative ? pattern.negPre : pattern.posPre); + parts.push(formatedText); + parts.push(isNegative ? pattern.negSuf : pattern.posSuf); + return parts.join(''); +} + +function padNumber(num, digits, trim) { + var neg = ''; + if (num < 0) { + neg = '-'; + num = -num; + } + num = '' + num; + while(num.length < digits) num = '0' + num; + if (trim) + num = num.substr(num.length - digits); + return neg + num; +} + + +function dateGetter(name, size, offset, trim) { + offset = offset || 0; + return function(date) { + var value = date['get' + name](); + if (offset > 0 || value > -offset) + value += offset; + if (value === 0 && offset == -12 ) value = 12; + return padNumber(value, size, trim); + }; +} + +function dateStrGetter(name, shortForm) { + return function(date, formats) { + var value = date['get' + name](); + var get = uppercase(shortForm ? ('SHORT' + name) : name); + + return formats[get][value]; + }; +} + +function timeZoneGetter(date) { + var zone = -1 * date.getTimezoneOffset(); + var paddedZone = (zone >= 0) ? "+" : ""; + + paddedZone += padNumber(Math[zone > 0 ? 'floor' : 'ceil'](zone / 60), 2) + + padNumber(Math.abs(zone % 60), 2); + + return paddedZone; +} + +function ampmGetter(date, formats) { + return date.getHours() < 12 ? formats.AMPMS[0] : formats.AMPMS[1]; +} + +var DATE_FORMATS = { + yyyy: dateGetter('FullYear', 4), + yy: dateGetter('FullYear', 2, 0, true), + y: dateGetter('FullYear', 1), + MMMM: dateStrGetter('Month'), + MMM: dateStrGetter('Month', true), + MM: dateGetter('Month', 2, 1), + M: dateGetter('Month', 1, 1), + dd: dateGetter('Date', 2), + d: dateGetter('Date', 1), + HH: dateGetter('Hours', 2), + H: dateGetter('Hours', 1), + hh: dateGetter('Hours', 2, -12), + h: dateGetter('Hours', 1, -12), + mm: dateGetter('Minutes', 2), + m: dateGetter('Minutes', 1), + ss: dateGetter('Seconds', 2), + s: dateGetter('Seconds', 1), + EEEE: dateStrGetter('Day'), + EEE: dateStrGetter('Day', true), + a: ampmGetter, + Z: timeZoneGetter +}; + +var DATE_FORMATS_SPLIT = /((?:[^yMdHhmsaZE']+)|(?:'(?:[^']|'')*')|(?:E+|y+|M+|d+|H+|h+|m+|s+|a|Z))(.*)/, + NUMBER_STRING = /^\d+$/; + +/** + * @ngdoc filter + * @name ng.filter:date + * @function + * + * @description + * Formats `date` to a string based on the requested `format`. + * + * `format` string can be composed of the following elements: + * + * * `'yyyy'`: 4 digit representation of year (e.g. AD 1 => 0001, AD 2010 => 2010) + * * `'yy'`: 2 digit representation of year, padded (00-99). (e.g. AD 2001 => 01, AD 2010 => 10) + * * `'y'`: 1 digit representation of year, e.g. (AD 1 => 1, AD 199 => 199) + * * `'MMMM'`: Month in year (January-December) + * * `'MMM'`: Month in year (Jan-Dec) + * * `'MM'`: Month in year, padded (01-12) + * * `'M'`: Month in year (1-12) + * * `'dd'`: Day in month, padded (01-31) + * * `'d'`: Day in month (1-31) + * * `'EEEE'`: Day in Week,(Sunday-Saturday) + * * `'EEE'`: Day in Week, (Sun-Sat) + * * `'HH'`: Hour in day, padded (00-23) + * * `'H'`: Hour in day (0-23) + * * `'hh'`: Hour in am/pm, padded (01-12) + * * `'h'`: Hour in am/pm, (1-12) + * * `'mm'`: Minute in hour, padded (00-59) + * * `'m'`: Minute in hour (0-59) + * * `'ss'`: Second in minute, padded (00-59) + * * `'s'`: Second in minute (0-59) + * * `'a'`: am/pm marker + * * `'Z'`: 4 digit (+sign) representation of the timezone offset (-1200-+1200) + * + * `format` string can also be one of the following predefined + * {@link guide/i18n localizable formats}: + * + * * `'medium'`: equivalent to `'MMM d, y h:mm:ss a'` for en_US locale + * (e.g. Sep 3, 2010 12:05:08 pm) + * * `'short'`: equivalent to `'M/d/yy h:mm a'` for en_US locale (e.g. 9/3/10 12:05 pm) + * * `'fullDate'`: equivalent to `'EEEE, MMMM d,y'` for en_US locale + * (e.g. Friday, September 3, 2010) + * * `'longDate'`: equivalent to `'MMMM d, y'` for en_US locale (e.g. September 3, 2010 + * * `'mediumDate'`: equivalent to `'MMM d, y'` for en_US locale (e.g. Sep 3, 2010) + * * `'shortDate'`: equivalent to `'M/d/yy'` for en_US locale (e.g. 9/3/10) + * * `'mediumTime'`: equivalent to `'h:mm:ss a'` for en_US locale (e.g. 12:05:08 pm) + * * `'shortTime'`: equivalent to `'h:mm a'` for en_US locale (e.g. 12:05 pm) + * + * `format` string can contain literal values. These need to be quoted with single quotes (e.g. + * `"h 'in the morning'"`). In order to output single quote, use two single quotes in a sequence + * (e.g. `"h o''clock"`). + * + * @param {(Date|number|string)} date Date to format either as Date object, milliseconds (string or + * number) or various ISO 8601 datetime string formats (e.g. yyyy-MM-ddTHH:mm:ss.SSSZ and its + * shorter versions like yyyy-MM-ddTHH:mmZ, yyyy-MM-dd or yyyyMMddTHHmmssZ). If no timezone is + * specified in the string input, the time is considered to be in the local timezone. + * @param {string=} format Formatting rules (see Description). If not specified, + * `mediumDate` is used. + * @returns {string} Formatted string or the input if input is not recognized as date/millis. + * + * @example + + + {{1288323623006 | date:'medium'}}: + {{1288323623006 | date:'medium'}}
+ {{1288323623006 | date:'yyyy-MM-dd HH:mm:ss Z'}}: + {{1288323623006 | date:'yyyy-MM-dd HH:mm:ss Z'}}
+ {{1288323623006 | date:'MM/dd/yyyy @ h:mma'}}: + {{'1288323623006' | date:'MM/dd/yyyy @ h:mma'}}
+
+ + it('should format date', function() { + expect(binding("1288323623006 | date:'medium'")). + toMatch(/Oct 2\d, 2010 \d{1,2}:\d{2}:\d{2} (AM|PM)/); + expect(binding("1288323623006 | date:'yyyy-MM-dd HH:mm:ss Z'")). + toMatch(/2010\-10\-2\d \d{2}:\d{2}:\d{2} (\-|\+)?\d{4}/); + expect(binding("'1288323623006' | date:'MM/dd/yyyy @ h:mma'")). + toMatch(/10\/2\d\/2010 @ \d{1,2}:\d{2}(AM|PM)/); + }); + +
+ */ +dateFilter.$inject = ['$locale']; +function dateFilter($locale) { + + + var R_ISO8601_STR = /^(\d{4})-?(\d\d)-?(\d\d)(?:T(\d\d)(?::?(\d\d)(?::?(\d\d)(?:\.(\d+))?)?)?(Z|([+-])(\d\d):?(\d\d))?)?$/; + function jsonStringToDate(string){ + var match; + if (match = string.match(R_ISO8601_STR)) { + var date = new Date(0), + tzHour = 0, + tzMin = 0; + if (match[9]) { + tzHour = int(match[9] + match[10]); + tzMin = int(match[9] + match[11]); + } + date.setUTCFullYear(int(match[1]), int(match[2]) - 1, int(match[3])); + date.setUTCHours(int(match[4]||0) - tzHour, int(match[5]||0) - tzMin, int(match[6]||0), int(match[7]||0)); + return date; + } + return string; + } + + + return function(date, format) { + var text = '', + parts = [], + fn, match; + + format = format || 'mediumDate'; + format = $locale.DATETIME_FORMATS[format] || format; + if (isString(date)) { + if (NUMBER_STRING.test(date)) { + date = int(date); + } else { + date = jsonStringToDate(date); + } + } + + if (isNumber(date)) { + date = new Date(date); + } + + if (!isDate(date)) { + return date; + } + + while(format) { + match = DATE_FORMATS_SPLIT.exec(format); + if (match) { + parts = concat(parts, match, 1); + format = parts.pop(); + } else { + parts.push(format); + format = null; + } + } + + forEach(parts, function(value){ + fn = DATE_FORMATS[value]; + text += fn ? fn(date, $locale.DATETIME_FORMATS) + : value.replace(/(^'|'$)/g, '').replace(/''/g, "'"); + }); + + return text; + }; +} + + +/** + * @ngdoc filter + * @name ng.filter:json + * @function + * + * @description + * Allows you to convert a JavaScript object into JSON string. + * + * This filter is mostly useful for debugging. When using the double curly {{value}} notation + * the binding is automatically converted to JSON. + * + * @param {*} object Any JavaScript object (including arrays and primitive types) to filter. + * @returns {string} JSON string. + * + * + * @example: + + +
{{ {'name':'value'} | json }}
+
+ + it('should jsonify filtered objects', function() { + expect(binding("{'name':'value'}")).toMatch(/\{\n "name": ?"value"\n}/); + }); + +
+ * + */ +function jsonFilter() { + return function(object) { + return toJson(object, true); + }; +} + + +/** + * @ngdoc filter + * @name ng.filter:lowercase + * @function + * @description + * Converts string to lowercase. + * @see angular.lowercase + */ +var lowercaseFilter = valueFn(lowercase); + + +/** + * @ngdoc filter + * @name ng.filter:uppercase + * @function + * @description + * Converts string to uppercase. + * @see angular.uppercase + */ +var uppercaseFilter = valueFn(uppercase); + +/** + * @ngdoc function + * @name ng.filter:limitTo + * @function + * + * @description + * Creates a new array containing only a specified number of elements in an array. The elements + * are taken from either the beginning or the end of the source array, as specified by the + * value and sign (positive or negative) of `limit`. + * + * Note: This function is used to augment the `Array` type in Angular expressions. See + * {@link ng.$filter} for more information about Angular arrays. + * + * @param {Array} array Source array to be limited. + * @param {string|Number} limit The length of the returned array. If the `limit` number is + * positive, `limit` number of items from the beginning of the source array are copied. + * If the number is negative, `limit` number of items from the end of the source array are + * copied. The `limit` will be trimmed if it exceeds `array.length` + * @returns {Array} A new sub-array of length `limit` or less if input array had less than `limit` + * elements. + * + * @example + + + +
+ Limit {{numbers}} to: +

Output: {{ numbers | limitTo:limit }}

+
+
+ + it('should limit the numer array to first three items', function() { + expect(element('.doc-example-live input[ng-model=limit]').val()).toBe('3'); + expect(binding('numbers | limitTo:limit')).toEqual('[1,2,3]'); + }); + + it('should update the output when -3 is entered', function() { + input('limit').enter(-3); + expect(binding('numbers | limitTo:limit')).toEqual('[7,8,9]'); + }); + + it('should not exceed the maximum size of input array', function() { + input('limit').enter(100); + expect(binding('numbers | limitTo:limit')).toEqual('[1,2,3,4,5,6,7,8,9]'); + }); + +
+ */ +function limitToFilter(){ + return function(array, limit) { + if (!(array instanceof Array)) return array; + limit = int(limit); + var out = [], + i, n; + + // check that array is iterable + if (!array || !(array instanceof Array)) + return out; + + // if abs(limit) exceeds maximum length, trim it + if (limit > array.length) + limit = array.length; + else if (limit < -array.length) + limit = -array.length; + + if (limit > 0) { + i = 0; + n = limit; + } else { + i = array.length + limit; + n = array.length; + } + + for (; i} expression A predicate to be + * used by the comparator to determine the order of elements. + * + * Can be one of: + * + * - `function`: Getter function. The result of this function will be sorted using the + * `<`, `=`, `>` operator. + * - `string`: An Angular expression which evaluates to an object to order by, such as 'name' + * to sort by a property called 'name'. Optionally prefixed with `+` or `-` to control + * ascending or descending sort order (for example, +name or -name). + * - `Array`: An array of function or string predicates. The first predicate in the array + * is used for sorting, but when two items are equivalent, the next predicate is used. + * + * @param {boolean=} reverse Reverse the order the array. + * @returns {Array} Sorted copy of the source array. + * + * @example + + + +
+
Sorting predicate = {{predicate}}; reverse = {{reverse}}
+
+ [ unsorted ] + + + + + + + + + + + +
Name + (^)Phone NumberAge
{{friend.name}}{{friend.phone}}{{friend.age}}
+
+
+ + it('should be reverse ordered by aged', function() { + expect(binding('predicate')).toBe('-age'); + expect(repeater('table.friend', 'friend in friends').column('friend.age')). + toEqual(['35', '29', '21', '19', '10']); + expect(repeater('table.friend', 'friend in friends').column('friend.name')). + toEqual(['Adam', 'Julie', 'Mike', 'Mary', 'John']); + }); + + it('should reorder the table when user selects different predicate', function() { + element('.doc-example-live a:contains("Name")').click(); + expect(repeater('table.friend', 'friend in friends').column('friend.name')). + toEqual(['Adam', 'John', 'Julie', 'Mary', 'Mike']); + expect(repeater('table.friend', 'friend in friends').column('friend.age')). + toEqual(['35', '10', '29', '19', '21']); + + element('.doc-example-live a:contains("Phone")').click(); + expect(repeater('table.friend', 'friend in friends').column('friend.phone')). + toEqual(['555-9876', '555-8765', '555-5678', '555-4321', '555-1212']); + expect(repeater('table.friend', 'friend in friends').column('friend.name')). + toEqual(['Mary', 'Julie', 'Adam', 'Mike', 'John']); + }); + +
+ */ +orderByFilter.$inject = ['$parse']; +function orderByFilter($parse){ + return function(array, sortPredicate, reverseOrder) { + if (!isArray(array)) return array; + if (!sortPredicate) return array; + sortPredicate = isArray(sortPredicate) ? sortPredicate: [sortPredicate]; + sortPredicate = map(sortPredicate, function(predicate){ + var descending = false, get = predicate || identity; + if (isString(predicate)) { + if ((predicate.charAt(0) == '+' || predicate.charAt(0) == '-')) { + descending = predicate.charAt(0) == '-'; + predicate = predicate.substring(1); + } + get = $parse(predicate); + } + return reverseComparator(function(a,b){ + return compare(get(a),get(b)); + }, descending); + }); + var arrayCopy = []; + for ( var i = 0; i < array.length; i++) { arrayCopy.push(array[i]); } + return arrayCopy.sort(reverseComparator(comparator, reverseOrder)); + + function comparator(o1, o2){ + for ( var i = 0; i < sortPredicate.length; i++) { + var comp = sortPredicate[i](o1, o2); + if (comp !== 0) return comp; + } + return 0; + } + function reverseComparator(comp, descending) { + return toBoolean(descending) + ? function(a,b){return comp(b,a);} + : comp; + } + function compare(v1, v2){ + var t1 = typeof v1; + var t2 = typeof v2; + if (t1 == t2) { + if (t1 == "string") v1 = v1.toLowerCase(); + if (t1 == "string") v2 = v2.toLowerCase(); + if (v1 === v2) return 0; + return v1 < v2 ? -1 : 1; + } else { + return t1 < t2 ? -1 : 1; + } + } + } +} + +function ngDirective(directive) { + if (isFunction(directive)) { + directive = { + link: directive + } + } + directive.restrict = directive.restrict || 'AC'; + return valueFn(directive); +} + +/** + * @ngdoc directive + * @name ng.directive:a + * @restrict E + * + * @description + * Modifies the default behavior of html A tag, so that the default action is prevented when href + * attribute is empty. + * + * The reasoning for this change is to allow easy creation of action links with `ngClick` directive + * without changing the location or causing page reloads, e.g.: + * `Save` + */ +var htmlAnchorDirective = valueFn({ + restrict: 'E', + compile: function(element, attr) { + + if (msie <= 8) { + + // turn link into a stylable link in IE + // but only if it doesn't have name attribute, in which case it's an anchor + if (!attr.href && !attr.name) { + attr.$set('href', ''); + } + + // add a comment node to anchors to workaround IE bug that causes element content to be reset + // to new attribute content if attribute is updated with value containing @ and element also + // contains value with @ + // see issue #1949 + element.append(document.createComment('IE fix')); + } + + return function(scope, element) { + element.bind('click', function(event){ + // if we have no href url, then don't navigate anywhere. + if (!element.attr('href')) { + event.preventDefault(); + } + }); + } + } +}); + +/** + * @ngdoc directive + * @name ng.directive:ngHref + * @restrict A + * + * @description + * Using Angular markup like {{hash}} in an href attribute makes + * the page open to a wrong URL, if the user clicks that link before + * angular has a chance to replace the {{hash}} with actual URL, the + * link will be broken and will most likely return a 404 error. + * The `ngHref` directive solves this problem. + * + * The buggy way to write it: + *
+ * 
+ * 
+ * + * The correct way to write it: + *
+ * 
+ * 
+ * + * @element A + * @param {template} ngHref any string which can contain `{{}}` markup. + * + * @example + * This example uses `link` variable inside `href` attribute: + + +
+
link 1 (link, don't reload)
+ link 2 (link, don't reload)
+ link 3 (link, reload!)
+ anchor (link, don't reload)
+ anchor (no link)
+ link (link, change location) + + + it('should execute ng-click but not reload when href without value', function() { + element('#link-1').click(); + expect(input('value').val()).toEqual('1'); + expect(element('#link-1').attr('href')).toBe(""); + }); + + it('should execute ng-click but not reload when href empty string', function() { + element('#link-2').click(); + expect(input('value').val()).toEqual('2'); + expect(element('#link-2').attr('href')).toBe(""); + }); + + it('should execute ng-click and change url when ng-href specified', function() { + expect(element('#link-3').attr('href')).toBe("/123"); + + element('#link-3').click(); + expect(browser().window().path()).toEqual('/123'); + }); + + it('should execute ng-click but not reload when href empty string and name specified', function() { + element('#link-4').click(); + expect(input('value').val()).toEqual('4'); + expect(element('#link-4').attr('href')).toBe(''); + }); + + it('should execute ng-click but not reload when no href but name specified', function() { + element('#link-5').click(); + expect(input('value').val()).toEqual('5'); + expect(element('#link-5').attr('href')).toBe(undefined); + }); + + it('should only change url when only ng-href', function() { + input('value').enter('6'); + expect(element('#link-6').attr('href')).toBe('6'); + + element('#link-6').click(); + expect(browser().location().url()).toEqual('/6'); + }); + + + */ + +/** + * @ngdoc directive + * @name ng.directive:ngSrc + * @restrict A + * + * @description + * Using Angular markup like `{{hash}}` in a `src` attribute doesn't + * work right: The browser will fetch from the URL with the literal + * text `{{hash}}` until Angular replaces the expression inside + * `{{hash}}`. The `ngSrc` directive solves this problem. + * + * The buggy way to write it: + *
+ * 
+ * 
+ * + * The correct way to write it: + *
+ * 
+ * 
+ * + * @element IMG + * @param {template} ngSrc any string which can contain `{{}}` markup. + */ + +/** + * @ngdoc directive + * @name ng.directive:ngDisabled + * @restrict A + * + * @description + * + * The following markup will make the button enabled on Chrome/Firefox but not on IE8 and older IEs: + *
+ * 
+ * + *
+ *
+ * + * The HTML specs do not require browsers to preserve the special attributes such as disabled. + * (The presence of them means true and absence means false) + * This prevents the angular compiler from correctly retrieving the binding expression. + * To solve this problem, we introduce the `ngDisabled` directive. + * + * @example + + + Click me to toggle:
+ +
+ + it('should toggle button', function() { + expect(element('.doc-example-live :button').prop('disabled')).toBeFalsy(); + input('checked').check(); + expect(element('.doc-example-live :button').prop('disabled')).toBeTruthy(); + }); + +
+ * + * @element INPUT + * @param {expression} ngDisabled Angular expression that will be evaluated. + */ + + +/** + * @ngdoc directive + * @name ng.directive:ngChecked + * @restrict A + * + * @description + * The HTML specs do not require browsers to preserve the special attributes such as checked. + * (The presence of them means true and absence means false) + * This prevents the angular compiler from correctly retrieving the binding expression. + * To solve this problem, we introduce the `ngChecked` directive. + * @example + + + Check me to check both:
+ +
+ + it('should check both checkBoxes', function() { + expect(element('.doc-example-live #checkSlave').prop('checked')).toBeFalsy(); + input('master').check(); + expect(element('.doc-example-live #checkSlave').prop('checked')).toBeTruthy(); + }); + +
+ * + * @element INPUT + * @param {expression} ngChecked Angular expression that will be evaluated. + */ + + +/** + * @ngdoc directive + * @name ng.directive:ngMultiple + * @restrict A + * + * @description + * The HTML specs do not require browsers to preserve the special attributes such as multiple. + * (The presence of them means true and absence means false) + * This prevents the angular compiler from correctly retrieving the binding expression. + * To solve this problem, we introduce the `ngMultiple` directive. + * + * @example + + + Check me check multiple:
+ +
+ + it('should toggle multiple', function() { + expect(element('.doc-example-live #select').prop('multiple')).toBeFalsy(); + input('checked').check(); + expect(element('.doc-example-live #select').prop('multiple')).toBeTruthy(); + }); + +
+ * + * @element SELECT + * @param {expression} ngMultiple Angular expression that will be evaluated. + */ + + +/** + * @ngdoc directive + * @name ng.directive:ngReadonly + * @restrict A + * + * @description + * The HTML specs do not require browsers to preserve the special attributes such as readonly. + * (The presence of them means true and absence means false) + * This prevents the angular compiler from correctly retrieving the binding expression. + * To solve this problem, we introduce the `ngReadonly` directive. + * @example + + + Check me to make text readonly:
+ +
+ + it('should toggle readonly attr', function() { + expect(element('.doc-example-live :text').prop('readonly')).toBeFalsy(); + input('checked').check(); + expect(element('.doc-example-live :text').prop('readonly')).toBeTruthy(); + }); + +
+ * + * @element INPUT + * @param {string} expression Angular expression that will be evaluated. + */ + + +/** + * @ngdoc directive + * @name ng.directive:ngSelected + * @restrict A + * + * @description + * The HTML specs do not require browsers to preserve the special attributes such as selected. + * (The presence of them means true and absence means false) + * This prevents the angular compiler from correctly retrieving the binding expression. + * To solve this problem, we introduced the `ngSelected` directive. + * @example + + + Check me to select:
+ +
+ + it('should select Greetings!', function() { + expect(element('.doc-example-live #greet').prop('selected')).toBeFalsy(); + input('selected').check(); + expect(element('.doc-example-live #greet').prop('selected')).toBeTruthy(); + }); + +
+ * + * @element OPTION + * @param {string} expression Angular expression that will be evaluated. + */ + + +var ngAttributeAliasDirectives = {}; + + +// boolean attrs are evaluated +forEach(BOOLEAN_ATTR, function(propName, attrName) { + var normalized = directiveNormalize('ng-' + attrName); + ngAttributeAliasDirectives[normalized] = function() { + return { + priority: 100, + compile: function() { + return function(scope, element, attr) { + scope.$watch(attr[normalized], function ngBooleanAttrWatchAction(value) { + attr.$set(attrName, !!value); + }); + }; + } + }; + }; +}); + + +// ng-src, ng-href are interpolated +forEach(['src', 'href'], function(attrName) { + var normalized = directiveNormalize('ng-' + attrName); + ngAttributeAliasDirectives[normalized] = function() { + return { + priority: 99, // it needs to run after the attributes are interpolated + link: function(scope, element, attr) { + attr.$observe(normalized, function(value) { + if (!value) + return; + + attr.$set(attrName, value); + + // on IE, if "ng:src" directive declaration is used and "src" attribute doesn't exist + // then calling element.setAttribute('src', 'foo') doesn't do anything, so we need + // to set the property as well to achieve the desired effect. + // we use attr[attrName] value since $set can sanitize the url. + if (msie) element.prop(attrName, attr[attrName]); + }); + } + }; + }; +}); + +var nullFormCtrl = { + $addControl: noop, + $removeControl: noop, + $setValidity: noop, + $setDirty: noop +}; + +/** + * @ngdoc object + * @name ng.directive:form.FormController + * + * @property {boolean} $pristine True if user has not interacted with the form yet. + * @property {boolean} $dirty True if user has already interacted with the form. + * @property {boolean} $valid True if all of the containing forms and controls are valid. + * @property {boolean} $invalid True if at least one containing control or form is invalid. + * + * @property {Object} $error Is an object hash, containing references to all invalid controls or + * forms, where: + * + * - keys are validation tokens (error names) — such as `required`, `url` or `email`), + * - values are arrays of controls or forms that are invalid with given error. + * + * @description + * `FormController` keeps track of all its controls and nested forms as well as state of them, + * such as being valid/invalid or dirty/pristine. + * + * Each {@link ng.directive:form form} directive creates an instance + * of `FormController`. + * + */ +//asks for $scope to fool the BC controller module +FormController.$inject = ['$element', '$attrs', '$scope']; +function FormController(element, attrs) { + var form = this, + parentForm = element.parent().controller('form') || nullFormCtrl, + invalidCount = 0, // used to easily determine if we are valid + errors = form.$error = {}; + + // init state + form.$name = attrs.name; + form.$dirty = false; + form.$pristine = true; + form.$valid = true; + form.$invalid = false; + + parentForm.$addControl(form); + + // Setup initial state of the control + element.addClass(PRISTINE_CLASS); + toggleValidCss(true); + + // convenience method for easy toggling of classes + function toggleValidCss(isValid, validationErrorKey) { + validationErrorKey = validationErrorKey ? '-' + snake_case(validationErrorKey, '-') : ''; + element. + removeClass((isValid ? INVALID_CLASS : VALID_CLASS) + validationErrorKey). + addClass((isValid ? VALID_CLASS : INVALID_CLASS) + validationErrorKey); + } + + form.$addControl = function(control) { + if (control.$name && !form.hasOwnProperty(control.$name)) { + form[control.$name] = control; + } + }; + + form.$removeControl = function(control) { + if (control.$name && form[control.$name] === control) { + delete form[control.$name]; + } + forEach(errors, function(queue, validationToken) { + form.$setValidity(validationToken, true, control); + }); + }; + + form.$setValidity = function(validationToken, isValid, control) { + var queue = errors[validationToken]; + + if (isValid) { + if (queue) { + arrayRemove(queue, control); + if (!queue.length) { + invalidCount--; + if (!invalidCount) { + toggleValidCss(isValid); + form.$valid = true; + form.$invalid = false; + } + errors[validationToken] = false; + toggleValidCss(true, validationToken); + parentForm.$setValidity(validationToken, true, form); + } + } + + } else { + if (!invalidCount) { + toggleValidCss(isValid); + } + if (queue) { + if (includes(queue, control)) return; + } else { + errors[validationToken] = queue = []; + invalidCount++; + toggleValidCss(false, validationToken); + parentForm.$setValidity(validationToken, false, form); + } + queue.push(control); + + form.$valid = false; + form.$invalid = true; + } + }; + + form.$setDirty = function() { + element.removeClass(PRISTINE_CLASS).addClass(DIRTY_CLASS); + form.$dirty = true; + form.$pristine = false; + parentForm.$setDirty(); + }; + +} + + +/** + * @ngdoc directive + * @name ng.directive:ngForm + * @restrict EAC + * + * @description + * Nestable alias of {@link ng.directive:form `form`} directive. HTML + * does not allow nesting of form elements. It is useful to nest forms, for example if the validity of a + * sub-group of controls needs to be determined. + * + * @param {string=} name|ngForm Name of the form. If specified, the form controller will be published into + * related scope, under this name. + * + */ + + /** + * @ngdoc directive + * @name ng.directive:form + * @restrict E + * + * @description + * Directive that instantiates + * {@link ng.directive:form.FormController FormController}. + * + * If `name` attribute is specified, the form controller is published onto the current scope under + * this name. + * + * # Alias: {@link ng.directive:ngForm `ngForm`} + * + * In angular forms can be nested. This means that the outer form is valid when all of the child + * forms are valid as well. However browsers do not allow nesting of `
` elements, for this + * reason angular provides {@link ng.directive:ngForm `ngForm`} alias + * which behaves identical to `` but allows form nesting. + * + * + * # CSS classes + * - `ng-valid` Is set if the form is valid. + * - `ng-invalid` Is set if the form is invalid. + * - `ng-pristine` Is set if the form is pristine. + * - `ng-dirty` Is set if the form is dirty. + * + * + * # Submitting a form and preventing default action + * + * Since the role of forms in client-side Angular applications is different than in classical + * roundtrip apps, it is desirable for the browser not to translate the form submission into a full + * page reload that sends the data to the server. Instead some javascript logic should be triggered + * to handle the form submission in application specific way. + * + * For this reason, Angular prevents the default action (form submission to the server) unless the + * `` element has an `action` attribute specified. + * + * You can use one of the following two ways to specify what javascript method should be called when + * a form is submitted: + * + * - {@link ng.directive:ngSubmit ngSubmit} directive on the form element + * - {@link ng.directive:ngClick ngClick} directive on the first + * button or input field of type submit (input[type=submit]) + * + * To prevent double execution of the handler, use only one of ngSubmit or ngClick directives. This + * is because of the following form submission rules coming from the html spec: + * + * - If a form has only one input field then hitting enter in this field triggers form submit + * (`ngSubmit`) + * - if a form has has 2+ input fields and no buttons or input[type=submit] then hitting enter + * doesn't trigger submit + * - if a form has one or more input fields and one or more buttons or input[type=submit] then + * hitting enter in any of the input fields will trigger the click handler on the *first* button or + * input[type=submit] (`ngClick`) *and* a submit handler on the enclosing form (`ngSubmit`) + * + * @param {string=} name Name of the form. If specified, the form controller will be published into + * related scope, under this name. + * + * @example + + + + + userType: + Required!
+ userType = {{userType}}
+ myForm.input.$valid = {{myForm.input.$valid}}
+ myForm.input.$error = {{myForm.input.$error}}
+ myForm.$valid = {{myForm.$valid}}
+ myForm.$error.required = {{!!myForm.$error.required}}
+ +
+ + it('should initialize to model', function() { + expect(binding('userType')).toEqual('guest'); + expect(binding('myForm.input.$valid')).toEqual('true'); + }); + + it('should be invalid if empty', function() { + input('userType').enter(''); + expect(binding('userType')).toEqual(''); + expect(binding('myForm.input.$valid')).toEqual('false'); + }); + +
+ */ +var formDirectiveFactory = function(isNgForm) { + return ['$timeout', function($timeout) { + var formDirective = { + name: 'form', + restrict: 'E', + controller: FormController, + compile: function() { + return { + pre: function(scope, formElement, attr, controller) { + if (!attr.action) { + // we can't use jq events because if a form is destroyed during submission the default + // action is not prevented. see #1238 + // + // IE 9 is not affected because it doesn't fire a submit event and try to do a full + // page reload if the form was destroyed by submission of the form via a click handler + // on a button in the form. Looks like an IE9 specific bug. + var preventDefaultListener = function(event) { + event.preventDefault + ? event.preventDefault() + : event.returnValue = false; // IE + }; + + addEventListenerFn(formElement[0], 'submit', preventDefaultListener); + + // unregister the preventDefault listener so that we don't not leak memory but in a + // way that will achieve the prevention of the default action. + formElement.bind('$destroy', function() { + $timeout(function() { + removeEventListenerFn(formElement[0], 'submit', preventDefaultListener); + }, 0, false); + }); + } + + var parentFormCtrl = formElement.parent().controller('form'), + alias = attr.name || attr.ngForm; + + if (alias) { + scope[alias] = controller; + } + if (parentFormCtrl) { + formElement.bind('$destroy', function() { + parentFormCtrl.$removeControl(controller); + if (alias) { + scope[alias] = undefined; + } + extend(controller, nullFormCtrl); //stop propagating child destruction handlers upwards + }); + } + } + }; + } + }; + + return isNgForm ? extend(copy(formDirective), {restrict: 'EAC'}) : formDirective; + }]; +}; + +var formDirective = formDirectiveFactory(); +var ngFormDirective = formDirectiveFactory(true); + +var URL_REGEXP = /^(ftp|http|https):\/\/(\w+:{0,1}\w*@)?(\S+)(:[0-9]+)?(\/|\/([\w#!:.?+=&%@!\-\/]))?$/; +var EMAIL_REGEXP = /^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}$/; +var NUMBER_REGEXP = /^\s*(\-|\+)?(\d+|(\d*(\.\d*)))\s*$/; + +var inputType = { + + /** + * @ngdoc inputType + * @name ng.directive:input.text + * + * @description + * Standard HTML text input with angular data binding. + * + * @param {string} ngModel Assignable angular expression to data-bind to. + * @param {string=} name Property name of the form under which the control is published. + * @param {string=} required Adds `required` validation error key if the value is not entered. + * @param {string=} ngRequired Adds `required` attribute and `required` validation constraint to + * the element when the ngRequired expression evaluates to true. Use `ngRequired` instead of + * `required` when you want to data-bind to the `required` attribute. + * @param {number=} ngMinlength Sets `minlength` validation error key if the value is shorter than + * minlength. + * @param {number=} ngMaxlength Sets `maxlength` validation error key if the value is longer than + * maxlength. + * @param {string=} ngPattern Sets `pattern` validation error key if the value does not match the + * RegExp pattern expression. Expected value is `/regexp/` for inline patterns or `regexp` for + * patterns defined as scope expressions. + * @param {string=} ngChange Angular expression to be executed when input changes due to user + * interaction with the input element. + * + * @example + + + +
+ Single word: + + Required! + + Single word only! + + text = {{text}}
+ myForm.input.$valid = {{myForm.input.$valid}}
+ myForm.input.$error = {{myForm.input.$error}}
+ myForm.$valid = {{myForm.$valid}}
+ myForm.$error.required = {{!!myForm.$error.required}}
+
+
+ + it('should initialize to model', function() { + expect(binding('text')).toEqual('guest'); + expect(binding('myForm.input.$valid')).toEqual('true'); + }); + + it('should be invalid if empty', function() { + input('text').enter(''); + expect(binding('text')).toEqual(''); + expect(binding('myForm.input.$valid')).toEqual('false'); + }); + + it('should be invalid if multi word', function() { + input('text').enter('hello world'); + expect(binding('myForm.input.$valid')).toEqual('false'); + }); + +
+ */ + 'text': textInputType, + + + /** + * @ngdoc inputType + * @name ng.directive:input.number + * + * @description + * Text input with number validation and transformation. Sets the `number` validation + * error if not a valid number. + * + * @param {string} ngModel Assignable angular expression to data-bind to. + * @param {string=} name Property name of the form under which the control is published. + * @param {string=} min Sets the `min` validation error key if the value entered is less than `min`. + * @param {string=} max Sets the `max` validation error key if the value entered is greater than `max`. + * @param {string=} required Sets `required` validation error key if the value is not entered. + * @param {string=} ngRequired Adds `required` attribute and `required` validation constraint to + * the element when the ngRequired expression evaluates to true. Use `ngRequired` instead of + * `required` when you want to data-bind to the `required` attribute. + * @param {number=} ngMinlength Sets `minlength` validation error key if the value is shorter than + * minlength. + * @param {number=} ngMaxlength Sets `maxlength` validation error key if the value is longer than + * maxlength. + * @param {string=} ngPattern Sets `pattern` validation error key if the value does not match the + * RegExp pattern expression. Expected value is `/regexp/` for inline patterns or `regexp` for + * patterns defined as scope expressions. + * @param {string=} ngChange Angular expression to be executed when input changes due to user + * interaction with the input element. + * + * @example + + + +
+ Number: + + Required! + + Not valid number! + value = {{value}}
+ myForm.input.$valid = {{myForm.input.$valid}}
+ myForm.input.$error = {{myForm.input.$error}}
+ myForm.$valid = {{myForm.$valid}}
+ myForm.$error.required = {{!!myForm.$error.required}}
+
+
+ + it('should initialize to model', function() { + expect(binding('value')).toEqual('12'); + expect(binding('myForm.input.$valid')).toEqual('true'); + }); + + it('should be invalid if empty', function() { + input('value').enter(''); + expect(binding('value')).toEqual(''); + expect(binding('myForm.input.$valid')).toEqual('false'); + }); + + it('should be invalid if over max', function() { + input('value').enter('123'); + expect(binding('value')).toEqual(''); + expect(binding('myForm.input.$valid')).toEqual('false'); + }); + +
+ */ + 'number': numberInputType, + + + /** + * @ngdoc inputType + * @name ng.directive:input.url + * + * @description + * Text input with URL validation. Sets the `url` validation error key if the content is not a + * valid URL. + * + * @param {string} ngModel Assignable angular expression to data-bind to. + * @param {string=} name Property name of the form under which the control is published. + * @param {string=} required Sets `required` validation error key if the value is not entered. + * @param {string=} ngRequired Adds `required` attribute and `required` validation constraint to + * the element when the ngRequired expression evaluates to true. Use `ngRequired` instead of + * `required` when you want to data-bind to the `required` attribute. + * @param {number=} ngMinlength Sets `minlength` validation error key if the value is shorter than + * minlength. + * @param {number=} ngMaxlength Sets `maxlength` validation error key if the value is longer than + * maxlength. + * @param {string=} ngPattern Sets `pattern` validation error key if the value does not match the + * RegExp pattern expression. Expected value is `/regexp/` for inline patterns or `regexp` for + * patterns defined as scope expressions. + * @param {string=} ngChange Angular expression to be executed when input changes due to user + * interaction with the input element. + * + * @example + + + +
+ URL: + + Required! + + Not valid url! + text = {{text}}
+ myForm.input.$valid = {{myForm.input.$valid}}
+ myForm.input.$error = {{myForm.input.$error}}
+ myForm.$valid = {{myForm.$valid}}
+ myForm.$error.required = {{!!myForm.$error.required}}
+ myForm.$error.url = {{!!myForm.$error.url}}
+
+
+ + it('should initialize to model', function() { + expect(binding('text')).toEqual('http://google.com'); + expect(binding('myForm.input.$valid')).toEqual('true'); + }); + + it('should be invalid if empty', function() { + input('text').enter(''); + expect(binding('text')).toEqual(''); + expect(binding('myForm.input.$valid')).toEqual('false'); + }); + + it('should be invalid if not url', function() { + input('text').enter('xxx'); + expect(binding('myForm.input.$valid')).toEqual('false'); + }); + +
+ */ + 'url': urlInputType, + + + /** + * @ngdoc inputType + * @name ng.directive:input.email + * + * @description + * Text input with email validation. Sets the `email` validation error key if not a valid email + * address. + * + * @param {string} ngModel Assignable angular expression to data-bind to. + * @param {string=} name Property name of the form under which the control is published. + * @param {string=} required Sets `required` validation error key if the value is not entered. + * @param {string=} ngRequired Adds `required` attribute and `required` validation constraint to + * the element when the ngRequired expression evaluates to true. Use `ngRequired` instead of + * `required` when you want to data-bind to the `required` attribute. + * @param {number=} ngMinlength Sets `minlength` validation error key if the value is shorter than + * minlength. + * @param {number=} ngMaxlength Sets `maxlength` validation error key if the value is longer than + * maxlength. + * @param {string=} ngPattern Sets `pattern` validation error key if the value does not match the + * RegExp pattern expression. Expected value is `/regexp/` for inline patterns or `regexp` for + * patterns defined as scope expressions. + * + * @example + + + +
+ Email: + + Required! + + Not valid email! + text = {{text}}
+ myForm.input.$valid = {{myForm.input.$valid}}
+ myForm.input.$error = {{myForm.input.$error}}
+ myForm.$valid = {{myForm.$valid}}
+ myForm.$error.required = {{!!myForm.$error.required}}
+ myForm.$error.email = {{!!myForm.$error.email}}
+
+
+ + it('should initialize to model', function() { + expect(binding('text')).toEqual('me@example.com'); + expect(binding('myForm.input.$valid')).toEqual('true'); + }); + + it('should be invalid if empty', function() { + input('text').enter(''); + expect(binding('text')).toEqual(''); + expect(binding('myForm.input.$valid')).toEqual('false'); + }); + + it('should be invalid if not email', function() { + input('text').enter('xxx'); + expect(binding('myForm.input.$valid')).toEqual('false'); + }); + +
+ */ + 'email': emailInputType, + + + /** + * @ngdoc inputType + * @name ng.directive:input.radio + * + * @description + * HTML radio button. + * + * @param {string} ngModel Assignable angular expression to data-bind to. + * @param {string} value The value to which the expression should be set when selected. + * @param {string=} name Property name of the form under which the control is published. + * @param {string=} ngChange Angular expression to be executed when input changes due to user + * interaction with the input element. + * + * @example + + + +
+ Red
+ Green
+ Blue
+ color = {{color}}
+
+
+ + it('should change state', function() { + expect(binding('color')).toEqual('blue'); + + input('color').select('red'); + expect(binding('color')).toEqual('red'); + }); + +
+ */ + 'radio': radioInputType, + + + /** + * @ngdoc inputType + * @name ng.directive:input.checkbox + * + * @description + * HTML checkbox. + * + * @param {string} ngModel Assignable angular expression to data-bind to. + * @param {string=} name Property name of the form under which the control is published. + * @param {string=} ngTrueValue The value to which the expression should be set when selected. + * @param {string=} ngFalseValue The value to which the expression should be set when not selected. + * @param {string=} ngChange Angular expression to be executed when input changes due to user + * interaction with the input element. + * + * @example + + + +
+ Value1:
+ Value2:
+ value1 = {{value1}}
+ value2 = {{value2}}
+
+
+ + it('should change state', function() { + expect(binding('value1')).toEqual('true'); + expect(binding('value2')).toEqual('YES'); + + input('value1').check(); + input('value2').check(); + expect(binding('value1')).toEqual('false'); + expect(binding('value2')).toEqual('NO'); + }); + +
+ */ + 'checkbox': checkboxInputType, + + 'hidden': noop, + 'button': noop, + 'submit': noop, + 'reset': noop +}; + + +function isEmpty(value) { + return isUndefined(value) || value === '' || value === null || value !== value; +} + + +function textInputType(scope, element, attr, ctrl, $sniffer, $browser) { + + var listener = function() { + var value = trim(element.val()); + + if (ctrl.$viewValue !== value) { + scope.$apply(function() { + ctrl.$setViewValue(value); + }); + } + }; + + // if the browser does support "input" event, we are fine - except on IE9 which doesn't fire the + // input event on backspace, delete or cut + if ($sniffer.hasEvent('input')) { + element.bind('input', listener); + } else { + var timeout; + + var deferListener = function() { + if (!timeout) { + timeout = $browser.defer(function() { + listener(); + timeout = null; + }); + } + }; + + element.bind('keydown', function(event) { + var key = event.keyCode; + + // ignore + // command modifiers arrows + if (key === 91 || (15 < key && key < 19) || (37 <= key && key <= 40)) return; + + deferListener(); + }); + + // if user paste into input using mouse, we need "change" event to catch it + element.bind('change', listener); + + // if user modifies input value using context menu in IE, we need "paste" and "cut" events to catch it + if ($sniffer.hasEvent('paste')) { + element.bind('paste cut', deferListener); + } + } + + + ctrl.$render = function() { + element.val(isEmpty(ctrl.$viewValue) ? '' : ctrl.$viewValue); + }; + + // pattern validator + var pattern = attr.ngPattern, + patternValidator; + + var validate = function(regexp, value) { + if (isEmpty(value) || regexp.test(value)) { + ctrl.$setValidity('pattern', true); + return value; + } else { + ctrl.$setValidity('pattern', false); + return undefined; + } + }; + + if (pattern) { + if (pattern.match(/^\/(.*)\/$/)) { + pattern = new RegExp(pattern.substr(1, pattern.length - 2)); + patternValidator = function(value) { + return validate(pattern, value) + }; + } else { + patternValidator = function(value) { + var patternObj = scope.$eval(pattern); + + if (!patternObj || !patternObj.test) { + throw new Error('Expected ' + pattern + ' to be a RegExp but was ' + patternObj); + } + return validate(patternObj, value); + }; + } + + ctrl.$formatters.push(patternValidator); + ctrl.$parsers.push(patternValidator); + } + + // min length validator + if (attr.ngMinlength) { + var minlength = int(attr.ngMinlength); + var minLengthValidator = function(value) { + if (!isEmpty(value) && value.length < minlength) { + ctrl.$setValidity('minlength', false); + return undefined; + } else { + ctrl.$setValidity('minlength', true); + return value; + } + }; + + ctrl.$parsers.push(minLengthValidator); + ctrl.$formatters.push(minLengthValidator); + } + + // max length validator + if (attr.ngMaxlength) { + var maxlength = int(attr.ngMaxlength); + var maxLengthValidator = function(value) { + if (!isEmpty(value) && value.length > maxlength) { + ctrl.$setValidity('maxlength', false); + return undefined; + } else { + ctrl.$setValidity('maxlength', true); + return value; + } + }; + + ctrl.$parsers.push(maxLengthValidator); + ctrl.$formatters.push(maxLengthValidator); + } +} + +function numberInputType(scope, element, attr, ctrl, $sniffer, $browser) { + textInputType(scope, element, attr, ctrl, $sniffer, $browser); + + ctrl.$parsers.push(function(value) { + var empty = isEmpty(value); + if (empty || NUMBER_REGEXP.test(value)) { + ctrl.$setValidity('number', true); + return value === '' ? null : (empty ? value : parseFloat(value)); + } else { + ctrl.$setValidity('number', false); + return undefined; + } + }); + + ctrl.$formatters.push(function(value) { + return isEmpty(value) ? '' : '' + value; + }); + + if (attr.min) { + var min = parseFloat(attr.min); + var minValidator = function(value) { + if (!isEmpty(value) && value < min) { + ctrl.$setValidity('min', false); + return undefined; + } else { + ctrl.$setValidity('min', true); + return value; + } + }; + + ctrl.$parsers.push(minValidator); + ctrl.$formatters.push(minValidator); + } + + if (attr.max) { + var max = parseFloat(attr.max); + var maxValidator = function(value) { + if (!isEmpty(value) && value > max) { + ctrl.$setValidity('max', false); + return undefined; + } else { + ctrl.$setValidity('max', true); + return value; + } + }; + + ctrl.$parsers.push(maxValidator); + ctrl.$formatters.push(maxValidator); + } + + ctrl.$formatters.push(function(value) { + + if (isEmpty(value) || isNumber(value)) { + ctrl.$setValidity('number', true); + return value; + } else { + ctrl.$setValidity('number', false); + return undefined; + } + }); +} + +function urlInputType(scope, element, attr, ctrl, $sniffer, $browser) { + textInputType(scope, element, attr, ctrl, $sniffer, $browser); + + var urlValidator = function(value) { + if (isEmpty(value) || URL_REGEXP.test(value)) { + ctrl.$setValidity('url', true); + return value; + } else { + ctrl.$setValidity('url', false); + return undefined; + } + }; + + ctrl.$formatters.push(urlValidator); + ctrl.$parsers.push(urlValidator); +} + +function emailInputType(scope, element, attr, ctrl, $sniffer, $browser) { + textInputType(scope, element, attr, ctrl, $sniffer, $browser); + + var emailValidator = function(value) { + if (isEmpty(value) || EMAIL_REGEXP.test(value)) { + ctrl.$setValidity('email', true); + return value; + } else { + ctrl.$setValidity('email', false); + return undefined; + } + }; + + ctrl.$formatters.push(emailValidator); + ctrl.$parsers.push(emailValidator); +} + +function radioInputType(scope, element, attr, ctrl) { + // make the name unique, if not defined + if (isUndefined(attr.name)) { + element.attr('name', nextUid()); + } + + element.bind('click', function() { + if (element[0].checked) { + scope.$apply(function() { + ctrl.$setViewValue(attr.value); + }); + } + }); + + ctrl.$render = function() { + var value = attr.value; + element[0].checked = (value == ctrl.$viewValue); + }; + + attr.$observe('value', ctrl.$render); +} + +function checkboxInputType(scope, element, attr, ctrl) { + var trueValue = attr.ngTrueValue, + falseValue = attr.ngFalseValue; + + if (!isString(trueValue)) trueValue = true; + if (!isString(falseValue)) falseValue = false; + + element.bind('click', function() { + scope.$apply(function() { + ctrl.$setViewValue(element[0].checked); + }); + }); + + ctrl.$render = function() { + element[0].checked = ctrl.$viewValue; + }; + + ctrl.$formatters.push(function(value) { + return value === trueValue; + }); + + ctrl.$parsers.push(function(value) { + return value ? trueValue : falseValue; + }); +} + + +/** + * @ngdoc directive + * @name ng.directive:textarea + * @restrict E + * + * @description + * HTML textarea element control with angular data-binding. The data-binding and validation + * properties of this element are exactly the same as those of the + * {@link ng.directive:input input element}. + * + * @param {string} ngModel Assignable angular expression to data-bind to. + * @param {string=} name Property name of the form under which the control is published. + * @param {string=} required Sets `required` validation error key if the value is not entered. + * @param {string=} ngRequired Adds `required` attribute and `required` validation constraint to + * the element when the ngRequired expression evaluates to true. Use `ngRequired` instead of + * `required` when you want to data-bind to the `required` attribute. + * @param {number=} ngMinlength Sets `minlength` validation error key if the value is shorter than + * minlength. + * @param {number=} ngMaxlength Sets `maxlength` validation error key if the value is longer than + * maxlength. + * @param {string=} ngPattern Sets `pattern` validation error key if the value does not match the + * RegExp pattern expression. Expected value is `/regexp/` for inline patterns or `regexp` for + * patterns defined as scope expressions. + * @param {string=} ngChange Angular expression to be executed when input changes due to user + * interaction with the input element. + */ + + +/** + * @ngdoc directive + * @name ng.directive:input + * @restrict E + * + * @description + * HTML input element control with angular data-binding. Input control follows HTML5 input types + * and polyfills the HTML5 validation behavior for older browsers. + * + * @param {string} ngModel Assignable angular expression to data-bind to. + * @param {string=} name Property name of the form under which the control is published. + * @param {string=} required Sets `required` validation error key if the value is not entered. + * @param {boolean=} ngRequired Sets `required` attribute if set to true + * @param {number=} ngMinlength Sets `minlength` validation error key if the value is shorter than + * minlength. + * @param {number=} ngMaxlength Sets `maxlength` validation error key if the value is longer than + * maxlength. + * @param {string=} ngPattern Sets `pattern` validation error key if the value does not match the + * RegExp pattern expression. Expected value is `/regexp/` for inline patterns or `regexp` for + * patterns defined as scope expressions. + * @param {string=} ngChange Angular expression to be executed when input changes due to user + * interaction with the input element. + * + * @example + + + +
+
+ User name: + + Required!
+ Last name: + + Too short! + + Too long!
+
+
+ user = {{user}}
+ myForm.userName.$valid = {{myForm.userName.$valid}}
+ myForm.userName.$error = {{myForm.userName.$error}}
+ myForm.lastName.$valid = {{myForm.lastName.$valid}}
+ myForm.lastName.$error = {{myForm.lastName.$error}}
+ myForm.$valid = {{myForm.$valid}}
+ myForm.$error.required = {{!!myForm.$error.required}}
+ myForm.$error.minlength = {{!!myForm.$error.minlength}}
+ myForm.$error.maxlength = {{!!myForm.$error.maxlength}}
+
+
+ + it('should initialize to model', function() { + expect(binding('user')).toEqual('{"name":"guest","last":"visitor"}'); + expect(binding('myForm.userName.$valid')).toEqual('true'); + expect(binding('myForm.$valid')).toEqual('true'); + }); + + it('should be invalid if empty when required', function() { + input('user.name').enter(''); + expect(binding('user')).toEqual('{"last":"visitor"}'); + expect(binding('myForm.userName.$valid')).toEqual('false'); + expect(binding('myForm.$valid')).toEqual('false'); + }); + + it('should be valid if empty when min length is set', function() { + input('user.last').enter(''); + expect(binding('user')).toEqual('{"name":"guest","last":""}'); + expect(binding('myForm.lastName.$valid')).toEqual('true'); + expect(binding('myForm.$valid')).toEqual('true'); + }); + + it('should be invalid if less than required min length', function() { + input('user.last').enter('xx'); + expect(binding('user')).toEqual('{"name":"guest"}'); + expect(binding('myForm.lastName.$valid')).toEqual('false'); + expect(binding('myForm.lastName.$error')).toMatch(/minlength/); + expect(binding('myForm.$valid')).toEqual('false'); + }); + + it('should be invalid if longer than max length', function() { + input('user.last').enter('some ridiculously long name'); + expect(binding('user')) + .toEqual('{"name":"guest"}'); + expect(binding('myForm.lastName.$valid')).toEqual('false'); + expect(binding('myForm.lastName.$error')).toMatch(/maxlength/); + expect(binding('myForm.$valid')).toEqual('false'); + }); + +
+ */ +var inputDirective = ['$browser', '$sniffer', function($browser, $sniffer) { + return { + restrict: 'E', + require: '?ngModel', + link: function(scope, element, attr, ctrl) { + if (ctrl) { + (inputType[lowercase(attr.type)] || inputType.text)(scope, element, attr, ctrl, $sniffer, + $browser); + } + } + }; +}]; + +var VALID_CLASS = 'ng-valid', + INVALID_CLASS = 'ng-invalid', + PRISTINE_CLASS = 'ng-pristine', + DIRTY_CLASS = 'ng-dirty'; + +/** + * @ngdoc object + * @name ng.directive:ngModel.NgModelController + * + * @property {string} $viewValue Actual string value in the view. + * @property {*} $modelValue The value in the model, that the control is bound to. + * @property {Array.} $parsers Whenever the control reads value from the DOM, it executes + * all of these functions to sanitize / convert the value as well as validate. + * + * @property {Array.} $formatters Whenever the model value changes, it executes all of + * these functions to convert the value as well as validate. + * + * @property {Object} $error An bject hash with all errors as keys. + * + * @property {boolean} $pristine True if user has not interacted with the control yet. + * @property {boolean} $dirty True if user has already interacted with the control. + * @property {boolean} $valid True if there is no error. + * @property {boolean} $invalid True if at least one error on the control. + * + * @description + * + * `NgModelController` provides API for the `ng-model` directive. The controller contains + * services for data-binding, validation, CSS update, value formatting and parsing. It + * specifically does not contain any logic which deals with DOM rendering or listening to + * DOM events. The `NgModelController` is meant to be extended by other directives where, the + * directive provides DOM manipulation and the `NgModelController` provides the data-binding. + * + * This example shows how to use `NgModelController` with a custom control to achieve + * data-binding. Notice how different directives (`contenteditable`, `ng-model`, and `required`) + * collaborate together to achieve the desired result. + * + * + + [contenteditable] { + border: 1px solid black; + background-color: white; + min-height: 20px; + } + + .ng-invalid { + border: 1px solid red; + } + + + + angular.module('customControl', []). + directive('contenteditable', function() { + return { + restrict: 'A', // only activate on element attribute + require: '?ngModel', // get a hold of NgModelController + link: function(scope, element, attrs, ngModel) { + if(!ngModel) return; // do nothing if no ng-model + + // Specify how UI should be updated + ngModel.$render = function() { + element.html(ngModel.$viewValue || ''); + }; + + // Listen for change events to enable binding + element.bind('blur keyup change', function() { + scope.$apply(read); + }); + read(); // initialize + + // Write data to the model + function read() { + ngModel.$setViewValue(element.html()); + } + } + }; + }); + + +
+
Change me!
+ Required! +
+ +
+
+ + it('should data-bind and become invalid', function() { + var contentEditable = element('[contenteditable]'); + + expect(contentEditable.text()).toEqual('Change me!'); + input('userContent').enter(''); + expect(contentEditable.text()).toEqual(''); + expect(contentEditable.prop('className')).toMatch(/ng-invalid-required/); + }); + + *
+ * + */ +var NgModelController = ['$scope', '$exceptionHandler', '$attrs', '$element', '$parse', + function($scope, $exceptionHandler, $attr, $element, $parse) { + this.$viewValue = Number.NaN; + this.$modelValue = Number.NaN; + this.$parsers = []; + this.$formatters = []; + this.$viewChangeListeners = []; + this.$pristine = true; + this.$dirty = false; + this.$valid = true; + this.$invalid = false; + this.$name = $attr.name; + + var ngModelGet = $parse($attr.ngModel), + ngModelSet = ngModelGet.assign; + + if (!ngModelSet) { + throw Error(NON_ASSIGNABLE_MODEL_EXPRESSION + $attr.ngModel + + ' (' + startingTag($element) + ')'); + } + + /** + * @ngdoc function + * @name ng.directive:ngModel.NgModelController#$render + * @methodOf ng.directive:ngModel.NgModelController + * + * @description + * Called when the view needs to be updated. It is expected that the user of the ng-model + * directive will implement this method. + */ + this.$render = noop; + + var parentForm = $element.inheritedData('$formController') || nullFormCtrl, + invalidCount = 0, // used to easily determine if we are valid + $error = this.$error = {}; // keep invalid keys here + + + // Setup initial state of the control + $element.addClass(PRISTINE_CLASS); + toggleValidCss(true); + + // convenience method for easy toggling of classes + function toggleValidCss(isValid, validationErrorKey) { + validationErrorKey = validationErrorKey ? '-' + snake_case(validationErrorKey, '-') : ''; + $element. + removeClass((isValid ? INVALID_CLASS : VALID_CLASS) + validationErrorKey). + addClass((isValid ? VALID_CLASS : INVALID_CLASS) + validationErrorKey); + } + + /** + * @ngdoc function + * @name ng.directive:ngModel.NgModelController#$setValidity + * @methodOf ng.directive:ngModel.NgModelController + * + * @description + * Change the validity state, and notifies the form when the control changes validity. (i.e. it + * does not notify form if given validator is already marked as invalid). + * + * This method should be called by validators - i.e. the parser or formatter functions. + * + * @param {string} validationErrorKey Name of the validator. the `validationErrorKey` will assign + * to `$error[validationErrorKey]=isValid` so that it is available for data-binding. + * The `validationErrorKey` should be in camelCase and will get converted into dash-case + * for class name. Example: `myError` will result in `ng-valid-my-error` and `ng-invalid-my-error` + * class and can be bound to as `{{someForm.someControl.$error.myError}}` . + * @param {boolean} isValid Whether the current state is valid (true) or invalid (false). + */ + this.$setValidity = function(validationErrorKey, isValid) { + if ($error[validationErrorKey] === !isValid) return; + + if (isValid) { + if ($error[validationErrorKey]) invalidCount--; + if (!invalidCount) { + toggleValidCss(true); + this.$valid = true; + this.$invalid = false; + } + } else { + toggleValidCss(false); + this.$invalid = true; + this.$valid = false; + invalidCount++; + } + + $error[validationErrorKey] = !isValid; + toggleValidCss(isValid, validationErrorKey); + + parentForm.$setValidity(validationErrorKey, isValid, this); + }; + + + /** + * @ngdoc function + * @name ng.directive:ngModel.NgModelController#$setViewValue + * @methodOf ng.directive:ngModel.NgModelController + * + * @description + * Read a value from view. + * + * This method should be called from within a DOM event handler. + * For example {@link ng.directive:input input} or + * {@link ng.directive:select select} directives call it. + * + * It internally calls all `parsers` and if resulted value is valid, updates the model and + * calls all registered change listeners. + * + * @param {string} value Value from the view. + */ + this.$setViewValue = function(value) { + this.$viewValue = value; + + // change to dirty + if (this.$pristine) { + this.$dirty = true; + this.$pristine = false; + $element.removeClass(PRISTINE_CLASS).addClass(DIRTY_CLASS); + parentForm.$setDirty(); + } + + forEach(this.$parsers, function(fn) { + value = fn(value); + }); + + if (this.$modelValue !== value) { + this.$modelValue = value; + ngModelSet($scope, value); + forEach(this.$viewChangeListeners, function(listener) { + try { + listener(); + } catch(e) { + $exceptionHandler(e); + } + }) + } + }; + + // model -> value + var ctrl = this; + + $scope.$watch(function ngModelWatch() { + var value = ngModelGet($scope); + + // if scope model value and ngModel value are out of sync + if (ctrl.$modelValue !== value) { + + var formatters = ctrl.$formatters, + idx = formatters.length; + + ctrl.$modelValue = value; + while(idx--) { + value = formatters[idx](value); + } + + if (ctrl.$viewValue !== value) { + ctrl.$viewValue = value; + ctrl.$render(); + } + } + }); +}]; + + +/** + * @ngdoc directive + * @name ng.directive:ngModel + * + * @element input + * + * @description + * Is directive that tells Angular to do two-way data binding. It works together with `input`, + * `select`, `textarea`. You can easily write your own directives to use `ngModel` as well. + * + * `ngModel` is responsible for: + * + * - binding the view into the model, which other directives such as `input`, `textarea` or `select` + * require, + * - providing validation behavior (i.e. required, number, email, url), + * - keeping state of the control (valid/invalid, dirty/pristine, validation errors), + * - setting related css class onto the element (`ng-valid`, `ng-invalid`, `ng-dirty`, `ng-pristine`), + * - register the control with parent {@link ng.directive:form form}. + * + * For basic examples, how to use `ngModel`, see: + * + * - {@link ng.directive:input input} + * - {@link ng.directive:input.text text} + * - {@link ng.directive:input.checkbox checkbox} + * - {@link ng.directive:input.radio radio} + * - {@link ng.directive:input.number number} + * - {@link ng.directive:input.email email} + * - {@link ng.directive:input.url url} + * - {@link ng.directive:select select} + * - {@link ng.directive:textarea textarea} + * + */ +var ngModelDirective = function() { + return { + require: ['ngModel', '^?form'], + controller: NgModelController, + link: function(scope, element, attr, ctrls) { + // notify others, especially parent forms + + var modelCtrl = ctrls[0], + formCtrl = ctrls[1] || nullFormCtrl; + + formCtrl.$addControl(modelCtrl); + + element.bind('$destroy', function() { + formCtrl.$removeControl(modelCtrl); + }); + } + }; +}; + + +/** + * @ngdoc directive + * @name ng.directive:ngChange + * @restrict E + * + * @description + * Evaluate given expression when user changes the input. + * The expression is not evaluated when the value change is coming from the model. + * + * Note, this directive requires `ngModel` to be present. + * + * @element input + * + * @example + * + * + * + *
+ * + * + *
+ * debug = {{confirmed}}
+ * counter = {{counter}} + *
+ *
+ * + * it('should evaluate the expression if changing from view', function() { + * expect(binding('counter')).toEqual('0'); + * element('#ng-change-example1').click(); + * expect(binding('counter')).toEqual('1'); + * expect(binding('confirmed')).toEqual('true'); + * }); + * + * it('should not evaluate the expression if changing from model', function() { + * element('#ng-change-example2').click(); + * expect(binding('counter')).toEqual('0'); + * expect(binding('confirmed')).toEqual('true'); + * }); + * + *
+ */ +var ngChangeDirective = valueFn({ + require: 'ngModel', + link: function(scope, element, attr, ctrl) { + ctrl.$viewChangeListeners.push(function() { + scope.$eval(attr.ngChange); + }); + } +}); + + +var requiredDirective = function() { + return { + require: '?ngModel', + link: function(scope, elm, attr, ctrl) { + if (!ctrl) return; + attr.required = true; // force truthy in case we are on non input element + + var validator = function(value) { + if (attr.required && (isEmpty(value) || value === false)) { + ctrl.$setValidity('required', false); + return; + } else { + ctrl.$setValidity('required', true); + return value; + } + }; + + ctrl.$formatters.push(validator); + ctrl.$parsers.unshift(validator); + + attr.$observe('required', function() { + validator(ctrl.$viewValue); + }); + } + }; +}; + + +/** + * @ngdoc directive + * @name ng.directive:ngList + * + * @description + * Text input that converts between comma-separated string into an array of strings. + * + * @element input + * @param {string=} ngList optional delimiter that should be used to split the value. If + * specified in form `/something/` then the value will be converted into a regular expression. + * + * @example + + + +
+ List: + + Required! + names = {{names}}
+ myForm.namesInput.$valid = {{myForm.namesInput.$valid}}
+ myForm.namesInput.$error = {{myForm.namesInput.$error}}
+ myForm.$valid = {{myForm.$valid}}
+ myForm.$error.required = {{!!myForm.$error.required}}
+
+
+ + it('should initialize to model', function() { + expect(binding('names')).toEqual('["igor","misko","vojta"]'); + expect(binding('myForm.namesInput.$valid')).toEqual('true'); + }); + + it('should be invalid if empty', function() { + input('names').enter(''); + expect(binding('names')).toEqual('[]'); + expect(binding('myForm.namesInput.$valid')).toEqual('false'); + }); + +
+ */ +var ngListDirective = function() { + return { + require: 'ngModel', + link: function(scope, element, attr, ctrl) { + var match = /\/(.*)\//.exec(attr.ngList), + separator = match && new RegExp(match[1]) || attr.ngList || ','; + + var parse = function(viewValue) { + var list = []; + + if (viewValue) { + forEach(viewValue.split(separator), function(value) { + if (value) list.push(trim(value)); + }); + } + + return list; + }; + + ctrl.$parsers.push(parse); + ctrl.$formatters.push(function(value) { + if (isArray(value)) { + return value.join(', '); + } + + return undefined; + }); + } + }; +}; + + +var CONSTANT_VALUE_REGEXP = /^(true|false|\d+)$/; + +var ngValueDirective = function() { + return { + priority: 100, + compile: function(tpl, tplAttr) { + if (CONSTANT_VALUE_REGEXP.test(tplAttr.ngValue)) { + return function(scope, elm, attr) { + attr.$set('value', scope.$eval(attr.ngValue)); + }; + } else { + return function(scope, elm, attr) { + scope.$watch(attr.ngValue, function valueWatchAction(value) { + attr.$set('value', value, false); + }); + }; + } + } + }; +}; + +/** + * @ngdoc directive + * @name ng.directive:ngBind + * + * @description + * The `ngBind` attribute tells Angular to replace the text content of the specified HTML element + * with the value of a given expression, and to update the text content when the value of that + * expression changes. + * + * Typically, you don't use `ngBind` directly, but instead you use the double curly markup like + * `{{ expression }}` which is similar but less verbose. + * + * One scenario in which the use of `ngBind` is preferred over `{{ expression }}` binding is when + * it's desirable to put bindings into template that is momentarily displayed by the browser in its + * raw state before Angular compiles it. Since `ngBind` is an element attribute, it makes the + * bindings invisible to the user while the page is loading. + * + * An alternative solution to this problem would be using the + * {@link ng.directive:ngCloak ngCloak} directive. + * + * + * @element ANY + * @param {expression} ngBind {@link guide/expression Expression} to evaluate. + * + * @example + * Enter a name in the Live Preview text box; the greeting below the text box changes instantly. + + + +
+ Enter name:
+ Hello ! +
+
+ + it('should check ng-bind', function() { + expect(using('.doc-example-live').binding('name')).toBe('Whirled'); + using('.doc-example-live').input('name').enter('world'); + expect(using('.doc-example-live').binding('name')).toBe('world'); + }); + +
+ */ +var ngBindDirective = ngDirective(function(scope, element, attr) { + element.addClass('ng-binding').data('$binding', attr.ngBind); + scope.$watch(attr.ngBind, function ngBindWatchAction(value) { + element.text(value == undefined ? '' : value); + }); +}); + + +/** + * @ngdoc directive + * @name ng.directive:ngBindTemplate + * + * @description + * The `ngBindTemplate` directive specifies that the element + * text should be replaced with the template in ngBindTemplate. + * Unlike ngBind the ngBindTemplate can contain multiple `{{` `}}` + * expressions. (This is required since some HTML elements + * can not have SPAN elements such as TITLE, or OPTION to name a few.) + * + * @element ANY + * @param {string} ngBindTemplate template of form + * {{ expression }} to eval. + * + * @example + * Try it here: enter text in text box and watch the greeting change. + + + +
+ Salutation:
+ Name:
+

+       
+
+ + it('should check ng-bind', function() { + expect(using('.doc-example-live').binding('salutation')). + toBe('Hello'); + expect(using('.doc-example-live').binding('name')). + toBe('World'); + using('.doc-example-live').input('salutation').enter('Greetings'); + using('.doc-example-live').input('name').enter('user'); + expect(using('.doc-example-live').binding('salutation')). + toBe('Greetings'); + expect(using('.doc-example-live').binding('name')). + toBe('user'); + }); + +
+ */ +var ngBindTemplateDirective = ['$interpolate', function($interpolate) { + return function(scope, element, attr) { + // TODO: move this to scenario runner + var interpolateFn = $interpolate(element.attr(attr.$attr.ngBindTemplate)); + element.addClass('ng-binding').data('$binding', interpolateFn); + attr.$observe('ngBindTemplate', function(value) { + element.text(value); + }); + } +}]; + + +/** + * @ngdoc directive + * @name ng.directive:ngBindHtmlUnsafe + * + * @description + * Creates a binding that will innerHTML the result of evaluating the `expression` into the current + * element. *The innerHTML-ed content will not be sanitized!* You should use this directive only if + * {@link ngSanitize.directive:ngBindHtml ngBindHtml} directive is too + * restrictive and when you absolutely trust the source of the content you are binding to. + * + * See {@link ngSanitize.$sanitize $sanitize} docs for examples. + * + * @element ANY + * @param {expression} ngBindHtmlUnsafe {@link guide/expression Expression} to evaluate. + */ +var ngBindHtmlUnsafeDirective = [function() { + return function(scope, element, attr) { + element.addClass('ng-binding').data('$binding', attr.ngBindHtmlUnsafe); + scope.$watch(attr.ngBindHtmlUnsafe, function ngBindHtmlUnsafeWatchAction(value) { + element.html(value || ''); + }); + }; +}]; + +function classDirective(name, selector) { + name = 'ngClass' + name; + return ngDirective(function(scope, element, attr) { + var oldVal = undefined; + + scope.$watch(attr[name], ngClassWatchAction, true); + + attr.$observe('class', function(value) { + var ngClass = scope.$eval(attr[name]); + ngClassWatchAction(ngClass, ngClass); + }); + + + if (name !== 'ngClass') { + scope.$watch('$index', function($index, old$index) { + var mod = $index & 1; + if (mod !== old$index & 1) { + if (mod === selector) { + addClass(scope.$eval(attr[name])); + } else { + removeClass(scope.$eval(attr[name])); + } + } + }); + } + + + function ngClassWatchAction(newVal) { + if (selector === true || scope.$index % 2 === selector) { + if (oldVal && !equals(newVal,oldVal)) { + removeClass(oldVal); + } + addClass(newVal); + } + oldVal = copy(newVal); + } + + + function removeClass(classVal) { + if (isObject(classVal) && !isArray(classVal)) { + classVal = map(classVal, function(v, k) { if (v) return k }); + } + element.removeClass(isArray(classVal) ? classVal.join(' ') : classVal); + } + + + function addClass(classVal) { + if (isObject(classVal) && !isArray(classVal)) { + classVal = map(classVal, function(v, k) { if (v) return k }); + } + if (classVal) { + element.addClass(isArray(classVal) ? classVal.join(' ') : classVal); + } + } + }); +} + +/** + * @ngdoc directive + * @name ng.directive:ngClass + * + * @description + * The `ngClass` allows you to set CSS class on HTML element dynamically by databinding an + * expression that represents all classes to be added. + * + * The directive won't add duplicate classes if a particular class was already set. + * + * When the expression changes, the previously added classes are removed and only then the + * new classes are added. + * + * @element ANY + * @param {expression} ngClass {@link guide/expression Expression} to eval. The result + * of the evaluation can be a string representing space delimited class + * names, an array, or a map of class names to boolean values. + * + * @example + + + + +
+ Sample Text +
+ + .my-class { + color: red; + } + + + it('should check ng-class', function() { + expect(element('.doc-example-live span').prop('className')).not(). + toMatch(/my-class/); + + using('.doc-example-live').element(':button:first').click(); + + expect(element('.doc-example-live span').prop('className')). + toMatch(/my-class/); + + using('.doc-example-live').element(':button:last').click(); + + expect(element('.doc-example-live span').prop('className')).not(). + toMatch(/my-class/); + }); + +
+ */ +var ngClassDirective = classDirective('', true); + +/** + * @ngdoc directive + * @name ng.directive:ngClassOdd + * + * @description + * The `ngClassOdd` and `ngClassEven` directives work exactly as + * {@link ng.directive:ngClass ngClass}, except it works in + * conjunction with `ngRepeat` and takes affect only on odd (even) rows. + * + * This directive can be applied only within a scope of an + * {@link ng.directive:ngRepeat ngRepeat}. + * + * @element ANY + * @param {expression} ngClassOdd {@link guide/expression Expression} to eval. The result + * of the evaluation can be a string representing space delimited class names or an array. + * + * @example + + +
    +
  1. + + {{name}} + +
  2. +
+
+ + .odd { + color: red; + } + .even { + color: blue; + } + + + it('should check ng-class-odd and ng-class-even', function() { + expect(element('.doc-example-live li:first span').prop('className')). + toMatch(/odd/); + expect(element('.doc-example-live li:last span').prop('className')). + toMatch(/even/); + }); + +
+ */ +var ngClassOddDirective = classDirective('Odd', 0); + +/** + * @ngdoc directive + * @name ng.directive:ngClassEven + * + * @description + * The `ngClassOdd` and `ngClassEven` directives work exactly as + * {@link ng.directive:ngClass ngClass}, except it works in + * conjunction with `ngRepeat` and takes affect only on odd (even) rows. + * + * This directive can be applied only within a scope of an + * {@link ng.directive:ngRepeat ngRepeat}. + * + * @element ANY + * @param {expression} ngClassEven {@link guide/expression Expression} to eval. The + * result of the evaluation can be a string representing space delimited class names or an array. + * + * @example + + +
    +
  1. + + {{name}}       + +
  2. +
+
+ + .odd { + color: red; + } + .even { + color: blue; + } + + + it('should check ng-class-odd and ng-class-even', function() { + expect(element('.doc-example-live li:first span').prop('className')). + toMatch(/odd/); + expect(element('.doc-example-live li:last span').prop('className')). + toMatch(/even/); + }); + +
+ */ +var ngClassEvenDirective = classDirective('Even', 1); + +/** + * @ngdoc directive + * @name ng.directive:ngCloak + * + * @description + * The `ngCloak` directive is used to prevent the Angular html template from being briefly + * displayed by the browser in its raw (uncompiled) form while your application is loading. Use this + * directive to avoid the undesirable flicker effect caused by the html template display. + * + * The directive can be applied to the `` element, but typically a fine-grained application is + * prefered in order to benefit from progressive rendering of the browser view. + * + * `ngCloak` works in cooperation with a css rule that is embedded within `angular.js` and + * `angular.min.js` files. Following is the css rule: + * + *
+ * [ng\:cloak], [ng-cloak], [data-ng-cloak], [x-ng-cloak], .ng-cloak, .x-ng-cloak {
+ *   display: none;
+ * }
+ * 
+ * + * When this css rule is loaded by the browser, all html elements (including their children) that + * are tagged with the `ng-cloak` directive are hidden. When Angular comes across this directive + * during the compilation of the template it deletes the `ngCloak` element attribute, which + * makes the compiled element visible. + * + * For the best result, `angular.js` script must be loaded in the head section of the html file; + * alternatively, the css rule (above) must be included in the external stylesheet of the + * application. + * + * Legacy browsers, like IE7, do not provide attribute selector support (added in CSS 2.1) so they + * cannot match the `[ng\:cloak]` selector. To work around this limitation, you must add the css + * class `ngCloak` in addition to `ngCloak` directive as shown in the example below. + * + * @element ANY + * + * @example + + +
{{ 'hello' }}
+
{{ 'hello IE7' }}
+
+ + it('should remove the template directive and css class', function() { + expect(element('.doc-example-live #template1').attr('ng-cloak')). + not().toBeDefined(); + expect(element('.doc-example-live #template2').attr('ng-cloak')). + not().toBeDefined(); + }); + +
+ * + */ +var ngCloakDirective = ngDirective({ + compile: function(element, attr) { + attr.$set('ngCloak', undefined); + element.removeClass('ng-cloak'); + } +}); + +/** + * @ngdoc directive + * @name ng.directive:ngController + * + * @description + * The `ngController` directive assigns behavior to a scope. This is a key aspect of how angular + * supports the principles behind the Model-View-Controller design pattern. + * + * MVC components in angular: + * + * * Model — The Model is data in scope properties; scopes are attached to the DOM. + * * View — The template (HTML with data bindings) is rendered into the View. + * * Controller — The `ngController` directive specifies a Controller class; the class has + * methods that typically express the business logic behind the application. + * + * Note that an alternative way to define controllers is via the {@link ng.$route $route} service. + * + * @element ANY + * @scope + * @param {expression} ngController Name of a globally accessible constructor function or an + * {@link guide/expression expression} that on the current scope evaluates to a + * constructor function. + * + * @example + * Here is a simple form for editing user contact information. Adding, removing, clearing, and + * greeting are methods declared on the controller (see source tab). These methods can + * easily be called from the angular markup. Notice that the scope becomes the `this` for the + * controller's instance. This allows for easy access to the view data from the controller. Also + * notice that any changes to the data are automatically reflected in the View without the need + * for a manual update. + + + +
+ Name: + [ greet ]
+ Contact: +
    +
  • + + + [ clear + | X ] +
  • +
  • [ add ]
  • +
+
+
+ + it('should check controller', function() { + expect(element('.doc-example-live div>:input').val()).toBe('John Smith'); + expect(element('.doc-example-live li:nth-child(1) input').val()) + .toBe('408 555 1212'); + expect(element('.doc-example-live li:nth-child(2) input').val()) + .toBe('john.smith@example.org'); + + element('.doc-example-live li:first a:contains("clear")').click(); + expect(element('.doc-example-live li:first input').val()).toBe(''); + + element('.doc-example-live li:last a:contains("add")').click(); + expect(element('.doc-example-live li:nth-child(3) input').val()) + .toBe('yourname@example.org'); + }); + +
+ */ +var ngControllerDirective = [function() { + return { + scope: true, + controller: '@' + }; +}]; + +/** + * @ngdoc directive + * @name ng.directive:ngCsp + * @priority 1000 + * + * @element html + * @description + * Enables [CSP (Content Security Policy)](https://developer.mozilla.org/en/Security/CSP) support. + * + * This is necessary when developing things like Google Chrome Extensions. + * + * CSP forbids apps to use `eval` or `Function(string)` generated functions (among other things). + * For us to be compatible, we just need to implement the "getterFn" in $parse without violating + * any of these restrictions. + * + * AngularJS uses `Function(string)` generated functions as a speed optimization. By applying `ngCsp` + * it is be possible to opt into the CSP compatible mode. When this mode is on AngularJS will + * evaluate all expressions up to 30% slower than in non-CSP mode, but no security violations will + * be raised. + * + * In order to use this feature put `ngCsp` directive on the root element of the application. + * + * @example + * This example shows how to apply the `ngCsp` directive to the `html` tag. +
+     
+     
+     ...
+     ...
+     
+   
+ */ + +var ngCspDirective = ['$sniffer', function($sniffer) { + return { + priority: 1000, + compile: function() { + $sniffer.csp = true; + } + }; +}]; + +/** + * @ngdoc directive + * @name ng.directive:ngClick + * + * @description + * The ngClick allows you to specify custom behavior when + * element is clicked. + * + * @element ANY + * @param {expression} ngClick {@link guide/expression Expression} to evaluate upon + * click. (Event object is available as `$event`) + * + * @example + + + + count: {{count}} + + + it('should check ng-click', function() { + expect(binding('count')).toBe('0'); + element('.doc-example-live :button').click(); + expect(binding('count')).toBe('1'); + }); + + + */ +/* + * A directive that allows creation of custom onclick handlers that are defined as angular + * expressions and are compiled and executed within the current scope. + * + * Events that are handled via these handler are always configured not to propagate further. + */ +var ngEventDirectives = {}; +forEach( + 'click dblclick mousedown mouseup mouseover mouseout mousemove mouseenter mouseleave'.split(' '), + function(name) { + var directiveName = directiveNormalize('ng-' + name); + ngEventDirectives[directiveName] = ['$parse', function($parse) { + return function(scope, element, attr) { + var fn = $parse(attr[directiveName]); + element.bind(lowercase(name), function(event) { + scope.$apply(function() { + fn(scope, {$event:event}); + }); + }); + }; + }]; + } +); + +/** + * @ngdoc directive + * @name ng.directive:ngDblclick + * + * @description + * The `ngDblclick` directive allows you to specify custom behavior on dblclick event. + * + * @element ANY + * @param {expression} ngDblclick {@link guide/expression Expression} to evaluate upon + * dblclick. (Event object is available as `$event`) + * + * @example + * See {@link ng.directive:ngClick ngClick} + */ + + +/** + * @ngdoc directive + * @name ng.directive:ngMousedown + * + * @description + * The ngMousedown directive allows you to specify custom behavior on mousedown event. + * + * @element ANY + * @param {expression} ngMousedown {@link guide/expression Expression} to evaluate upon + * mousedown. (Event object is available as `$event`) + * + * @example + * See {@link ng.directive:ngClick ngClick} + */ + + +/** + * @ngdoc directive + * @name ng.directive:ngMouseup + * + * @description + * Specify custom behavior on mouseup event. + * + * @element ANY + * @param {expression} ngMouseup {@link guide/expression Expression} to evaluate upon + * mouseup. (Event object is available as `$event`) + * + * @example + * See {@link ng.directive:ngClick ngClick} + */ + +/** + * @ngdoc directive + * @name ng.directive:ngMouseover + * + * @description + * Specify custom behavior on mouseover event. + * + * @element ANY + * @param {expression} ngMouseover {@link guide/expression Expression} to evaluate upon + * mouseover. (Event object is available as `$event`) + * + * @example + * See {@link ng.directive:ngClick ngClick} + */ + + +/** + * @ngdoc directive + * @name ng.directive:ngMouseenter + * + * @description + * Specify custom behavior on mouseenter event. + * + * @element ANY + * @param {expression} ngMouseenter {@link guide/expression Expression} to evaluate upon + * mouseenter. (Event object is available as `$event`) + * + * @example + * See {@link ng.directive:ngClick ngClick} + */ + + +/** + * @ngdoc directive + * @name ng.directive:ngMouseleave + * + * @description + * Specify custom behavior on mouseleave event. + * + * @element ANY + * @param {expression} ngMouseleave {@link guide/expression Expression} to evaluate upon + * mouseleave. (Event object is available as `$event`) + * + * @example + * See {@link ng.directive:ngClick ngClick} + */ + + +/** + * @ngdoc directive + * @name ng.directive:ngMousemove + * + * @description + * Specify custom behavior on mousemove event. + * + * @element ANY + * @param {expression} ngMousemove {@link guide/expression Expression} to evaluate upon + * mousemove. (Event object is available as `$event`) + * + * @example + * See {@link ng.directive:ngClick ngClick} + */ + + +/** + * @ngdoc directive + * @name ng.directive:ngSubmit + * + * @description + * Enables binding angular expressions to onsubmit events. + * + * Additionally it prevents the default action (which for form means sending the request to the + * server and reloading the current page). + * + * @element form + * @param {expression} ngSubmit {@link guide/expression Expression} to eval. + * + * @example + + + +
+ Enter text and hit enter: + + +
list={{list}}
+
+
+ + it('should check ng-submit', function() { + expect(binding('list')).toBe('[]'); + element('.doc-example-live #submit').click(); + expect(binding('list')).toBe('["hello"]'); + expect(input('text').val()).toBe(''); + }); + it('should ignore empty strings', function() { + expect(binding('list')).toBe('[]'); + element('.doc-example-live #submit').click(); + element('.doc-example-live #submit').click(); + expect(binding('list')).toBe('["hello"]'); + }); + +
+ */ +var ngSubmitDirective = ngDirective(function(scope, element, attrs) { + element.bind('submit', function() { + scope.$apply(attrs.ngSubmit); + }); +}); + +/** + * @ngdoc directive + * @name ng.directive:ngInclude + * @restrict ECA + * + * @description + * Fetches, compiles and includes an external HTML fragment. + * + * Keep in mind that Same Origin Policy applies to included resources + * (e.g. ngInclude won't work for cross-domain requests on all browsers and for + * file:// access on some browsers). + * + * @scope + * + * @param {string} ngInclude|src angular expression evaluating to URL. If the source is a string constant, + * make sure you wrap it in quotes, e.g. `src="'myPartialTemplate.html'"`. + * @param {string=} onload Expression to evaluate when a new partial is loaded. + * + * @param {string=} autoscroll Whether `ngInclude` should call {@link ng.$anchorScroll + * $anchorScroll} to scroll the viewport after the content is loaded. + * + * - If the attribute is not set, disable scrolling. + * - If the attribute is set without value, enable scrolling. + * - Otherwise enable scrolling only if the expression evaluates to truthy value. + * + * @example + + +
+ + url of the template: {{template.url}} +
+
+
+
+ + function Ctrl($scope) { + $scope.templates = + [ { name: 'template1.html', url: 'template1.html'} + , { name: 'template2.html', url: 'template2.html'} ]; + $scope.template = $scope.templates[0]; + } + + + Content of template1.html + + + Content of template2.html + + + it('should load template1.html', function() { + expect(element('.doc-example-live [ng-include]').text()). + toMatch(/Content of template1.html/); + }); + it('should load template2.html', function() { + select('template').option('1'); + expect(element('.doc-example-live [ng-include]').text()). + toMatch(/Content of template2.html/); + }); + it('should change to blank', function() { + select('template').option(''); + expect(element('.doc-example-live [ng-include]').text()).toEqual(''); + }); + +
+ */ + + +/** + * @ngdoc event + * @name ng.directive:ngInclude#$includeContentLoaded + * @eventOf ng.directive:ngInclude + * @eventType emit on the current ngInclude scope + * @description + * Emitted every time the ngInclude content is reloaded. + */ +var ngIncludeDirective = ['$http', '$templateCache', '$anchorScroll', '$compile', + function($http, $templateCache, $anchorScroll, $compile) { + return { + restrict: 'ECA', + terminal: true, + compile: function(element, attr) { + var srcExp = attr.ngInclude || attr.src, + onloadExp = attr.onload || '', + autoScrollExp = attr.autoscroll; + + return function(scope, element) { + var changeCounter = 0, + childScope; + + var clearContent = function() { + if (childScope) { + childScope.$destroy(); + childScope = null; + } + + element.html(''); + }; + + scope.$watch(srcExp, function ngIncludeWatchAction(src) { + var thisChangeId = ++changeCounter; + + if (src) { + $http.get(src, {cache: $templateCache}).success(function(response) { + if (thisChangeId !== changeCounter) return; + + if (childScope) childScope.$destroy(); + childScope = scope.$new(); + + element.html(response); + $compile(element.contents())(childScope); + + if (isDefined(autoScrollExp) && (!autoScrollExp || scope.$eval(autoScrollExp))) { + $anchorScroll(); + } + + childScope.$emit('$includeContentLoaded'); + scope.$eval(onloadExp); + }).error(function() { + if (thisChangeId === changeCounter) clearContent(); + }); + } else clearContent(); + }); + }; + } + }; +}]; + +/** + * @ngdoc directive + * @name ng.directive:ngInit + * + * @description + * The `ngInit` directive specifies initialization tasks to be executed + * before the template enters execution mode during bootstrap. + * + * @element ANY + * @param {expression} ngInit {@link guide/expression Expression} to eval. + * + * @example + + +
+ {{greeting}} {{person}}! +
+
+ + it('should check greeting', function() { + expect(binding('greeting')).toBe('Hello'); + expect(binding('person')).toBe('World'); + }); + +
+ */ +var ngInitDirective = ngDirective({ + compile: function() { + return { + pre: function(scope, element, attrs) { + scope.$eval(attrs.ngInit); + } + } + } +}); + +/** + * @ngdoc directive + * @name ng.directive:ngNonBindable + * @priority 1000 + * + * @description + * Sometimes it is necessary to write code which looks like bindings but which should be left alone + * by angular. Use `ngNonBindable` to make angular ignore a chunk of HTML. + * + * @element ANY + * + * @example + * In this example there are two location where a simple binding (`{{}}`) is present, but the one + * wrapped in `ngNonBindable` is left alone. + * + * @example + + +
Normal: {{1 + 2}}
+
Ignored: {{1 + 2}}
+
+ + it('should check ng-non-bindable', function() { + expect(using('.doc-example-live').binding('1 + 2')).toBe('3'); + expect(using('.doc-example-live').element('div:last').text()). + toMatch(/1 \+ 2/); + }); + +
+ */ +var ngNonBindableDirective = ngDirective({ terminal: true, priority: 1000 }); + +/** + * @ngdoc directive + * @name ng.directive:ngPluralize + * @restrict EA + * + * @description + * # Overview + * `ngPluralize` is a directive that displays messages according to en-US localization rules. + * These rules are bundled with angular.js and the rules can be overridden + * (see {@link guide/i18n Angular i18n} dev guide). You configure ngPluralize directive + * by specifying the mappings between + * {@link http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html + * plural categories} and the strings to be displayed. + * + * # Plural categories and explicit number rules + * There are two + * {@link http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html + * plural categories} in Angular's default en-US locale: "one" and "other". + * + * While a pural category may match many numbers (for example, in en-US locale, "other" can match + * any number that is not 1), an explicit number rule can only match one number. For example, the + * explicit number rule for "3" matches the number 3. You will see the use of plural categories + * and explicit number rules throughout later parts of this documentation. + * + * # Configuring ngPluralize + * You configure ngPluralize by providing 2 attributes: `count` and `when`. + * You can also provide an optional attribute, `offset`. + * + * The value of the `count` attribute can be either a string or an {@link guide/expression + * Angular expression}; these are evaluated on the current scope for its bound value. + * + * The `when` attribute specifies the mappings between plural categories and the actual + * string to be displayed. The value of the attribute should be a JSON object so that Angular + * can interpret it correctly. + * + * The following example shows how to configure ngPluralize: + * + *
+ * 
+ * 
+ *
+ * + * In the example, `"0: Nobody is viewing."` is an explicit number rule. If you did not + * specify this rule, 0 would be matched to the "other" category and "0 people are viewing" + * would be shown instead of "Nobody is viewing". You can specify an explicit number rule for + * other numbers, for example 12, so that instead of showing "12 people are viewing", you can + * show "a dozen people are viewing". + * + * You can use a set of closed braces(`{}`) as a placeholder for the number that you want substituted + * into pluralized strings. In the previous example, Angular will replace `{}` with + * `{{personCount}}`. The closed braces `{}` is a placeholder + * for {{numberExpression}}. + * + * # Configuring ngPluralize with offset + * The `offset` attribute allows further customization of pluralized text, which can result in + * a better user experience. For example, instead of the message "4 people are viewing this document", + * you might display "John, Kate and 2 others are viewing this document". + * The offset attribute allows you to offset a number by any desired value. + * Let's take a look at an example: + * + *
+ * 
+ * 
+ * 
+ * + * Notice that we are still using two plural categories(one, other), but we added + * three explicit number rules 0, 1 and 2. + * When one person, perhaps John, views the document, "John is viewing" will be shown. + * When three people view the document, no explicit number rule is found, so + * an offset of 2 is taken off 3, and Angular uses 1 to decide the plural category. + * In this case, plural category 'one' is matched and "John, Marry and one other person are viewing" + * is shown. + * + * Note that when you specify offsets, you must provide explicit number rules for + * numbers from 0 up to and including the offset. If you use an offset of 3, for example, + * you must provide explicit number rules for 0, 1, 2 and 3. You must also provide plural strings for + * plural categories "one" and "other". + * + * @param {string|expression} count The variable to be bounded to. + * @param {string} when The mapping between plural category to its correspoding strings. + * @param {number=} offset Offset to deduct from the total number. + * + * @example + + + +
+ Person 1:
+ Person 2:
+ Number of People:
+ + + Without Offset: + +
+ + + With Offset(2): + + +
+
+ + it('should show correct pluralized string', function() { + expect(element('.doc-example-live ng-pluralize:first').text()). + toBe('1 person is viewing.'); + expect(element('.doc-example-live ng-pluralize:last').text()). + toBe('Igor is viewing.'); + + using('.doc-example-live').input('personCount').enter('0'); + expect(element('.doc-example-live ng-pluralize:first').text()). + toBe('Nobody is viewing.'); + expect(element('.doc-example-live ng-pluralize:last').text()). + toBe('Nobody is viewing.'); + + using('.doc-example-live').input('personCount').enter('2'); + expect(element('.doc-example-live ng-pluralize:first').text()). + toBe('2 people are viewing.'); + expect(element('.doc-example-live ng-pluralize:last').text()). + toBe('Igor and Misko are viewing.'); + + using('.doc-example-live').input('personCount').enter('3'); + expect(element('.doc-example-live ng-pluralize:first').text()). + toBe('3 people are viewing.'); + expect(element('.doc-example-live ng-pluralize:last').text()). + toBe('Igor, Misko and one other person are viewing.'); + + using('.doc-example-live').input('personCount').enter('4'); + expect(element('.doc-example-live ng-pluralize:first').text()). + toBe('4 people are viewing.'); + expect(element('.doc-example-live ng-pluralize:last').text()). + toBe('Igor, Misko and 2 other people are viewing.'); + }); + + it('should show data-binded names', function() { + using('.doc-example-live').input('personCount').enter('4'); + expect(element('.doc-example-live ng-pluralize:last').text()). + toBe('Igor, Misko and 2 other people are viewing.'); + + using('.doc-example-live').input('person1').enter('Di'); + using('.doc-example-live').input('person2').enter('Vojta'); + expect(element('.doc-example-live ng-pluralize:last').text()). + toBe('Di, Vojta and 2 other people are viewing.'); + }); + +
+ */ +var ngPluralizeDirective = ['$locale', '$interpolate', function($locale, $interpolate) { + var BRACE = /{}/g; + return { + restrict: 'EA', + link: function(scope, element, attr) { + var numberExp = attr.count, + whenExp = element.attr(attr.$attr.when), // this is because we have {{}} in attrs + offset = attr.offset || 0, + whens = scope.$eval(whenExp), + whensExpFns = {}, + startSymbol = $interpolate.startSymbol(), + endSymbol = $interpolate.endSymbol(); + + forEach(whens, function(expression, key) { + whensExpFns[key] = + $interpolate(expression.replace(BRACE, startSymbol + numberExp + '-' + + offset + endSymbol)); + }); + + scope.$watch(function ngPluralizeWatch() { + var value = parseFloat(scope.$eval(numberExp)); + + if (!isNaN(value)) { + //if explicit number rule such as 1, 2, 3... is defined, just use it. Otherwise, + //check it against pluralization rules in $locale service + if (!(value in whens)) value = $locale.pluralCat(value - offset); + return whensExpFns[value](scope, element, true); + } else { + return ''; + } + }, function ngPluralizeWatchAction(newVal) { + element.text(newVal); + }); + } + }; +}]; + +/** + * @ngdoc directive + * @name ng.directive:ngRepeat + * + * @description + * The `ngRepeat` directive instantiates a template once per item from a collection. Each template + * instance gets its own scope, where the given loop variable is set to the current collection item, + * and `$index` is set to the item index or key. + * + * Special properties are exposed on the local scope of each template instance, including: + * + * * `$index` – `{number}` – iterator offset of the repeated element (0..length-1) + * * `$first` – `{boolean}` – true if the repeated element is first in the iterator. + * * `$middle` – `{boolean}` – true if the repeated element is between the first and last in the iterator. + * * `$last` – `{boolean}` – true if the repeated element is last in the iterator. + * + * + * @element ANY + * @scope + * @priority 1000 + * @param {repeat_expression} ngRepeat The expression indicating how to enumerate a collection. Two + * formats are currently supported: + * + * * `variable in expression` – where variable is the user defined loop variable and `expression` + * is a scope expression giving the collection to enumerate. + * + * For example: `track in cd.tracks`. + * + * * `(key, value) in expression` – where `key` and `value` can be any user defined identifiers, + * and `expression` is the scope expression giving the collection to enumerate. + * + * For example: `(name, age) in {'adam':10, 'amalie':12}`. + * + * @example + * This example initializes the scope to a list of names and + * then uses `ngRepeat` to display every person: + + +
+ I have {{friends.length}} friends. They are: +
    +
  • + [{{$index + 1}}] {{friend.name}} who is {{friend.age}} years old. +
  • +
+
+
+ + it('should check ng-repeat', function() { + var r = using('.doc-example-live').repeater('ul li'); + expect(r.count()).toBe(2); + expect(r.row(0)).toEqual(["1","John","25"]); + expect(r.row(1)).toEqual(["2","Mary","28"]); + }); + +
+ */ +var ngRepeatDirective = ngDirective({ + transclude: 'element', + priority: 1000, + terminal: true, + compile: function(element, attr, linker) { + return function(scope, iterStartElement, attr){ + var expression = attr.ngRepeat; + var match = expression.match(/^\s*(.+)\s+in\s+(.*)\s*$/), + lhs, rhs, valueIdent, keyIdent; + if (! match) { + throw Error("Expected ngRepeat in form of '_item_ in _collection_' but got '" + + expression + "'."); + } + lhs = match[1]; + rhs = match[2]; + match = lhs.match(/^(?:([\$\w]+)|\(([\$\w]+)\s*,\s*([\$\w]+)\))$/); + if (!match) { + throw Error("'item' in 'item in collection' should be identifier or (key, value) but got '" + + lhs + "'."); + } + valueIdent = match[3] || match[1]; + keyIdent = match[2]; + + // Store a list of elements from previous run. This is a hash where key is the item from the + // iterator, and the value is an array of objects with following properties. + // - scope: bound scope + // - element: previous element. + // - index: position + // We need an array of these objects since the same object can be returned from the iterator. + // We expect this to be a rare case. + var lastOrder = new HashQueueMap(); + + scope.$watch(function ngRepeatWatch(scope){ + var index, length, + collection = scope.$eval(rhs), + cursor = iterStartElement, // current position of the node + // Same as lastOrder but it has the current state. It will become the + // lastOrder on the next iteration. + nextOrder = new HashQueueMap(), + arrayBound, + childScope, + key, value, // key/value of iteration + array, + last; // last object information {scope, element, index} + + + + if (!isArray(collection)) { + // if object, extract keys, sort them and use to determine order of iteration over obj props + array = []; + for(key in collection) { + if (collection.hasOwnProperty(key) && key.charAt(0) != '$') { + array.push(key); + } + } + array.sort(); + } else { + array = collection || []; + } + + arrayBound = array.length-1; + + // we are not using forEach for perf reasons (trying to avoid #call) + for (index = 0, length = array.length; index < length; index++) { + key = (collection === array) ? index : array[index]; + value = collection[key]; + + last = lastOrder.shift(value); + + if (last) { + // if we have already seen this object, then we need to reuse the + // associated scope/element + childScope = last.scope; + nextOrder.push(value, last); + + if (index === last.index) { + // do nothing + cursor = last.element; + } else { + // existing item which got moved + last.index = index; + // This may be a noop, if the element is next, but I don't know of a good way to + // figure this out, since it would require extra DOM access, so let's just hope that + // the browsers realizes that it is noop, and treats it as such. + cursor.after(last.element); + cursor = last.element; + } + } else { + // new item which we don't know about + childScope = scope.$new(); + } + + childScope[valueIdent] = value; + if (keyIdent) childScope[keyIdent] = key; + childScope.$index = index; + + childScope.$first = (index === 0); + childScope.$last = (index === arrayBound); + childScope.$middle = !(childScope.$first || childScope.$last); + + if (!last) { + linker(childScope, function(clone){ + cursor.after(clone); + last = { + scope: childScope, + element: (cursor = clone), + index: index + }; + nextOrder.push(value, last); + }); + } + } + + //shrink children + for (key in lastOrder) { + if (lastOrder.hasOwnProperty(key)) { + array = lastOrder[key]; + while(array.length) { + value = array.pop(); + value.element.remove(); + value.scope.$destroy(); + } + } + } + + lastOrder = nextOrder; + }); + }; + } +}); + +/** + * @ngdoc directive + * @name ng.directive:ngShow + * + * @description + * The `ngShow` and `ngHide` directives show or hide a portion of the DOM tree (HTML) + * conditionally. + * + * @element ANY + * @param {expression} ngShow If the {@link guide/expression expression} is truthy + * then the element is shown or hidden respectively. + * + * @example + + + Click me:
+ Show: I show up when your checkbox is checked.
+ Hide: I hide when your checkbox is checked. +
+ + it('should check ng-show / ng-hide', function() { + expect(element('.doc-example-live span:first:hidden').count()).toEqual(1); + expect(element('.doc-example-live span:last:visible').count()).toEqual(1); + + input('checked').check(); + + expect(element('.doc-example-live span:first:visible').count()).toEqual(1); + expect(element('.doc-example-live span:last:hidden').count()).toEqual(1); + }); + +
+ */ +//TODO(misko): refactor to remove element from the DOM +var ngShowDirective = ngDirective(function(scope, element, attr){ + scope.$watch(attr.ngShow, function ngShowWatchAction(value){ + element.css('display', toBoolean(value) ? '' : 'none'); + }); +}); + + +/** + * @ngdoc directive + * @name ng.directive:ngHide + * + * @description + * The `ngHide` and `ngShow` directives hide or show a portion of the DOM tree (HTML) + * conditionally. + * + * @element ANY + * @param {expression} ngHide If the {@link guide/expression expression} is truthy then + * the element is shown or hidden respectively. + * + * @example + + + Click me:
+ Show: I show up when you checkbox is checked?
+ Hide: I hide when you checkbox is checked? +
+ + it('should check ng-show / ng-hide', function() { + expect(element('.doc-example-live span:first:hidden').count()).toEqual(1); + expect(element('.doc-example-live span:last:visible').count()).toEqual(1); + + input('checked').check(); + + expect(element('.doc-example-live span:first:visible').count()).toEqual(1); + expect(element('.doc-example-live span:last:hidden').count()).toEqual(1); + }); + +
+ */ +//TODO(misko): refactor to remove element from the DOM +var ngHideDirective = ngDirective(function(scope, element, attr){ + scope.$watch(attr.ngHide, function ngHideWatchAction(value){ + element.css('display', toBoolean(value) ? 'none' : ''); + }); +}); + +/** + * @ngdoc directive + * @name ng.directive:ngStyle + * + * @description + * The `ngStyle` directive allows you to set CSS style on an HTML element conditionally. + * + * @element ANY + * @param {expression} ngStyle {@link guide/expression Expression} which evals to an + * object whose keys are CSS style names and values are corresponding values for those CSS + * keys. + * + * @example + + + + +
+ Sample Text +
myStyle={{myStyle}}
+
+ + span { + color: black; + } + + + it('should check ng-style', function() { + expect(element('.doc-example-live span').css('color')).toBe('rgb(0, 0, 0)'); + element('.doc-example-live :button[value=set]').click(); + expect(element('.doc-example-live span').css('color')).toBe('rgb(255, 0, 0)'); + element('.doc-example-live :button[value=clear]').click(); + expect(element('.doc-example-live span').css('color')).toBe('rgb(0, 0, 0)'); + }); + +
+ */ +var ngStyleDirective = ngDirective(function(scope, element, attr) { + scope.$watch(attr.ngStyle, function ngStyleWatchAction(newStyles, oldStyles) { + if (oldStyles && (newStyles !== oldStyles)) { + forEach(oldStyles, function(val, style) { element.css(style, '');}); + } + if (newStyles) element.css(newStyles); + }, true); +}); + +/** + * @ngdoc directive + * @name ng.directive:ngSwitch + * @restrict EA + * + * @description + * Conditionally change the DOM structure. + * + * @usage + * + * ... + * ... + * ... + * ... + * + * + * @scope + * @param {*} ngSwitch|on expression to match against ng-switch-when. + * @paramDescription + * On child elments add: + * + * * `ngSwitchWhen`: the case statement to match against. If match then this + * case will be displayed. + * * `ngSwitchDefault`: the default case when no other casses match. + * + * @example + + + +
+ + selection={{selection}} +
+
+
Settings Div
+ Home Span + default +
+
+
+ + it('should start in settings', function() { + expect(element('.doc-example-live [ng-switch]').text()).toMatch(/Settings Div/); + }); + it('should change to home', function() { + select('selection').option('home'); + expect(element('.doc-example-live [ng-switch]').text()).toMatch(/Home Span/); + }); + it('should select deafault', function() { + select('selection').option('other'); + expect(element('.doc-example-live [ng-switch]').text()).toMatch(/default/); + }); + +
+ */ +var NG_SWITCH = 'ng-switch'; +var ngSwitchDirective = valueFn({ + restrict: 'EA', + require: 'ngSwitch', + // asks for $scope to fool the BC controller module + controller: ['$scope', function ngSwitchController() { + this.cases = {}; + }], + link: function(scope, element, attr, ctrl) { + var watchExpr = attr.ngSwitch || attr.on, + selectedTransclude, + selectedElement, + selectedScope; + + scope.$watch(watchExpr, function ngSwitchWatchAction(value) { + if (selectedElement) { + selectedScope.$destroy(); + selectedElement.remove(); + selectedElement = selectedScope = null; + } + if ((selectedTransclude = ctrl.cases['!' + value] || ctrl.cases['?'])) { + scope.$eval(attr.change); + selectedScope = scope.$new(); + selectedTransclude(selectedScope, function(caseElement) { + selectedElement = caseElement; + element.append(caseElement); + }); + } + }); + } +}); + +var ngSwitchWhenDirective = ngDirective({ + transclude: 'element', + priority: 500, + require: '^ngSwitch', + compile: function(element, attrs, transclude) { + return function(scope, element, attr, ctrl) { + ctrl.cases['!' + attrs.ngSwitchWhen] = transclude; + }; + } +}); + +var ngSwitchDefaultDirective = ngDirective({ + transclude: 'element', + priority: 500, + require: '^ngSwitch', + compile: function(element, attrs, transclude) { + return function(scope, element, attr, ctrl) { + ctrl.cases['?'] = transclude; + }; + } +}); + +/** + * @ngdoc directive + * @name ng.directive:ngTransclude + * + * @description + * Insert the transcluded DOM here. + * + * @element ANY + * + * @example + + + +
+
+
+ {{text}} +
+
+ + it('should have transcluded', function() { + input('title').enter('TITLE'); + input('text').enter('TEXT'); + expect(binding('title')).toEqual('TITLE'); + expect(binding('text')).toEqual('TEXT'); + }); + +
+ * + */ +var ngTranscludeDirective = ngDirective({ + controller: ['$transclude', '$element', function($transclude, $element) { + $transclude(function(clone) { + $element.append(clone); + }); + }] +}); + +/** + * @ngdoc directive + * @name ng.directive:ngView + * @restrict ECA + * + * @description + * # Overview + * `ngView` is a directive that complements the {@link ng.$route $route} service by + * including the rendered template of the current route into the main layout (`index.html`) file. + * Every time the current route changes, the included view changes with it according to the + * configuration of the `$route` service. + * + * @scope + * @example + + +
+ Choose: + Moby | + Moby: Ch1 | + Gatsby | + Gatsby: Ch4 | + Scarlet Letter
+ +
+
+ +
$location.path() = {{$location.path()}}
+
$route.current.templateUrl = {{$route.current.templateUrl}}
+
$route.current.params = {{$route.current.params}}
+
$route.current.scope.name = {{$route.current.scope.name}}
+
$routeParams = {{$routeParams}}
+
+
+ + + controller: {{name}}
+ Book Id: {{params.bookId}}
+
+ + + controller: {{name}}
+ Book Id: {{params.bookId}}
+ Chapter Id: {{params.chapterId}} +
+ + + angular.module('ngView', [], function($routeProvider, $locationProvider) { + $routeProvider.when('/Book/:bookId', { + templateUrl: 'book.html', + controller: BookCntl + }); + $routeProvider.when('/Book/:bookId/ch/:chapterId', { + templateUrl: 'chapter.html', + controller: ChapterCntl + }); + + // configure html5 to get links working on jsfiddle + $locationProvider.html5Mode(true); + }); + + function MainCntl($scope, $route, $routeParams, $location) { + $scope.$route = $route; + $scope.$location = $location; + $scope.$routeParams = $routeParams; + } + + function BookCntl($scope, $routeParams) { + $scope.name = "BookCntl"; + $scope.params = $routeParams; + } + + function ChapterCntl($scope, $routeParams) { + $scope.name = "ChapterCntl"; + $scope.params = $routeParams; + } + + + + it('should load and compile correct template', function() { + element('a:contains("Moby: Ch1")').click(); + var content = element('.doc-example-live [ng-view]').text(); + expect(content).toMatch(/controller\: ChapterCntl/); + expect(content).toMatch(/Book Id\: Moby/); + expect(content).toMatch(/Chapter Id\: 1/); + + element('a:contains("Scarlet")').click(); + content = element('.doc-example-live [ng-view]').text(); + expect(content).toMatch(/controller\: BookCntl/); + expect(content).toMatch(/Book Id\: Scarlet/); + }); + +
+ */ + + +/** + * @ngdoc event + * @name ng.directive:ngView#$viewContentLoaded + * @eventOf ng.directive:ngView + * @eventType emit on the current ngView scope + * @description + * Emitted every time the ngView content is reloaded. + */ +var ngViewDirective = ['$http', '$templateCache', '$route', '$anchorScroll', '$compile', + '$controller', + function($http, $templateCache, $route, $anchorScroll, $compile, + $controller) { + return { + restrict: 'ECA', + terminal: true, + link: function(scope, element, attr) { + var lastScope, + onloadExp = attr.onload || ''; + + scope.$on('$routeChangeSuccess', update); + update(); + + + function destroyLastScope() { + if (lastScope) { + lastScope.$destroy(); + lastScope = null; + } + } + + function clearContent() { + element.html(''); + destroyLastScope(); + } + + function update() { + var locals = $route.current && $route.current.locals, + template = locals && locals.$template; + + if (template) { + element.html(template); + destroyLastScope(); + + var link = $compile(element.contents()), + current = $route.current, + controller; + + lastScope = current.scope = scope.$new(); + if (current.controller) { + locals.$scope = lastScope; + controller = $controller(current.controller, locals); + element.children().data('$ngControllerController', controller); + } + + link(lastScope); + lastScope.$emit('$viewContentLoaded'); + lastScope.$eval(onloadExp); + + // $anchorScroll might listen on event... + $anchorScroll(); + } else { + clearContent(); + } + } + } + }; +}]; + +/** + * @ngdoc directive + * @name ng.directive:script + * + * @description + * Load content of a script tag, with type `text/ng-template`, into `$templateCache`, so that the + * template can be used by `ngInclude`, `ngView` or directive templates. + * + * @restrict E + * @param {'text/ng-template'} type must be set to `'text/ng-template'` + * + * @example + + + + + Load inlined template +
+
+ + it('should load template defined inside script tag', function() { + element('#tpl-link').click(); + expect(element('#tpl-content').text()).toMatch(/Content of the template/); + }); + +
+ */ +var scriptDirective = ['$templateCache', function($templateCache) { + return { + restrict: 'E', + terminal: true, + compile: function(element, attr) { + if (attr.type == 'text/ng-template') { + var templateUrl = attr.id, + // IE is not consistent, in scripts we have to read .text but in other nodes we have to read .textContent + text = element[0].text; + + $templateCache.put(templateUrl, text); + } + } + }; +}]; + +/** + * @ngdoc directive + * @name ng.directive:select + * @restrict E + * + * @description + * HTML `SELECT` element with angular data-binding. + * + * # `ngOptions` + * + * Optionally `ngOptions` attribute can be used to dynamically generate a list of `