diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ddec525c8..cce2df670 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -71,6 +71,7 @@ Other improvements ------------------ - ``fish_indent`` and ``fish_key_reader`` are now available as builtins, and if fish is called with that name it will act like the given tool (as a multi-call binary). This allows truly distributing fish as a single file. (:issue:`10876`) +- ``fish_indent --dump-parse-tree`` now emits simple metrics about the tree including its memory consumption. For distributors ---------------- diff --git a/src/ast.rs b/src/ast.rs index 284ebec9d..2a57de0b2 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -98,9 +98,6 @@ fn accept_mut(&mut self, visitor: &mut dyn NodeVisitorMut, reversed: bool) { /// Node is the base trait of all AST nodes. pub trait Node: Acceptor + ConcreteNode + std::fmt::Debug { - /// The parent node, or null if this is root. - fn parent(&self) -> Option<&dyn Node>; - /// The type of this node. fn typ(&self) -> Type; @@ -139,15 +136,57 @@ fn source<'s>(&self, orig: &'s wstr) -> &'s wstr { self.try_source(orig).unwrap_or_default() } - // The address of the object, for comparison. - fn as_ptr(&self) -> *const (); - - fn pointer_eq(&self, rhs: &dyn Node) -> bool { - std::ptr::eq(self.as_ptr(), rhs.as_ptr()) + /// The raw byte size of this node, excluding children. + /// This also excludes the allocations stored in lists. + fn self_memory_size(&self) -> usize { + std::mem::size_of_val(self) } + + /// Convert to the dynamic Node type. fn as_node(&self) -> &dyn Node; } +/// Return true if two nodes are the same object. +#[inline(always)] +pub fn is_same_node(lhs: &dyn Node, rhs: &dyn Node) -> bool { + // There are two subtleties here: + // + // 1. Two &dyn Node may reference the same underlying object + // with different vtables - for example if one is a &dyn Node + // and the other is a &dyn NodeMut. + // + // 2. Two &dyn Node may reference different underlying objects + // with the same base pointer - for example if a Node is a + // the first field in another Node. + // + // Therefore we make the following assumptions, which seem sound enough: + // 1. If two nodes have different base pointers, they reference different objects + // (though NOT true in C++). + // 2. If two nodes have the same base pointer and same vtable, they reference + // the same object. + // 3. If two nodes have the same base pointer but different vtables, they are the same iff + // their types are equal: a Node cannot have a Node of the same type at the same address + // (unless it's a ZST, which does not apply here). + // + // Note this is performance-sensitive. + + // Different base pointers => not the same. + let lptr = lhs as *const dyn Node as *const (); + let rptr = rhs as *const dyn Node as *const (); + if !std::ptr::eq(lptr, rptr) { + return false; + } + + // Same base pointer and same vtable => same object. + if std::ptr::eq(lhs, rhs) { + return true; + } + + // Same base pointer, but different vtables. + // Compare the types. + lhs.typ() == rhs.typ() +} + /// NodeMut is a mutable node. trait NodeMut: Node + AcceptorMut + ConcreteNodeMut {} @@ -404,7 +443,6 @@ pub trait Leaf: Node { fn has_source(&self) -> bool { self.range().is_some() } - fn leaf_as_node(&self) -> &dyn Node; } // A token node is a node which contains a token, which must be one of a fixed set. @@ -432,8 +470,8 @@ fn allows_keyword(&self, kw: ParseKeyword) -> bool { // A simple variable-sized array, possibly empty. pub trait List: Node { type ContentsNode: Node + Default; - fn contents(&self) -> &[Box]; - fn contents_mut(&mut self) -> &mut Vec>; + fn contents(&self) -> &[Self::ContentsNode]; + fn contents_mut(&mut self) -> &mut Box<[Self::ContentsNode]>; /// Return our count. fn count(&self) -> usize { self.contents().len() @@ -443,11 +481,11 @@ fn is_empty(&self) -> bool { self.contents().is_empty() } /// Iteration support. - fn iter(&self) -> std::slice::Iter> { + fn iter(&self) -> std::slice::Iter { self.contents().iter() } fn get(&self, index: usize) -> Option<&Self::ContentsNode> { - self.contents().get(index).map(|b| &**b) + self.contents().get(index) } } @@ -468,9 +506,6 @@ impl Node for $name { fn typ(&self) -> Type { Type::$type } - fn parent(&self) -> Option<&dyn Node> { - self.parent.map(|p| unsafe { &*p }) - } fn category(&self) -> Category { Category::$category } @@ -486,9 +521,6 @@ fn try_source_range(&self) -> Option { Some(visitor.total) } } - fn as_ptr(&self) -> *const () { - (self as *const $name).cast() - } fn as_node(&self) -> &dyn Node { self } @@ -507,9 +539,6 @@ fn range(&self) -> Option { fn range_mut(&mut self) -> &mut Option { &mut self.range } - fn leaf_as_node(&self) -> &dyn Node { - self - } } impl Acceptor for $name { #[allow(unused_variables)] @@ -522,10 +551,6 @@ fn accept_mut(&mut self, visitor: &mut dyn NodeVisitorMut, reversed: bool) { visitor.did_visit_fields_of(self, VisitResult::Continue(())); } } - impl $name { - /// Set the parent fields of all nodes in the tree rooted at `self`. - fn set_parents(&mut self) {} - } }; } @@ -534,7 +559,6 @@ macro_rules! define_keyword_node { ( $name:ident, $($allowed:ident),* $(,)? ) => { #[derive(Default, Debug)] pub struct $name { - parent: Option<*const dyn Node>, range: Option, keyword: ParseKeyword, } @@ -575,7 +599,6 @@ macro_rules! define_token_node { ( $name:ident, $($allowed:ident),* $(,)? ) => { #[derive(Default, Debug)] pub struct $name { - parent: Option<*const dyn Node>, range: Option, parse_token_type: ParseTokenType, } @@ -629,22 +652,21 @@ macro_rules! define_list_node { ) => { #[derive(Default, Debug)] pub struct $name { - parent: Option<*const dyn Node>, - list_contents: Vec>, + list_contents: Box<[$contents]>, } implement_node!($name, list, $type); impl List for $name { type ContentsNode = $contents; - fn contents(&self) -> &[Box] { + fn contents(&self) -> &[Self::ContentsNode] { &self.list_contents } - fn contents_mut(&mut self) -> &mut Vec> { + fn contents_mut(&mut self) -> &mut Box<[Self::ContentsNode]> { &mut self.list_contents } } impl<'a> IntoIterator for &'a $name { - type Item = &'a Box<$contents>; - type IntoIter = std::slice::Iter<'a, Box<$contents>>; + type Item = &'a $contents; + type IntoIter = std::slice::Iter<'a, $contents>; fn into_iter(self) -> Self::IntoIter { self.contents().into_iter() } @@ -652,12 +674,12 @@ fn into_iter(self) -> Self::IntoIter { impl Index for $name { type Output = <$name as List>::ContentsNode; fn index(&self, index: usize) -> &Self::Output { - &*self.contents()[index] + &self.contents()[index] } } impl IndexMut for $name { fn index_mut(&mut self, index: usize) -> &mut Self::Output { - &mut *self.contents_mut()[index] + &mut self.contents_mut()[index] } } impl Acceptor for $name { @@ -677,15 +699,6 @@ fn accept_mut(&mut self, visitor: &mut dyn NodeVisitorMut, reversed: bool) { visitor.did_visit_fields_of(self, flow); } } - impl $name { - /// Set the parent fields of all nodes in the tree rooted at `self`. - fn set_parents(&mut self) { - for i in 0..self.count() { - self[i].parent = Some(self); - self[i].set_parents(); - } - } - } }; } @@ -776,14 +789,6 @@ fn accept_mut(&mut self, visitor: &mut dyn NodeVisitorMut, reversed: bool) { visitor.did_visit_fields_of(self, flow); } } - impl $name { - /// Set the parent fields of all nodes in the tree rooted at `self`. - fn set_parents(&mut self) { - $( - set_parent_of_field!(self, $field_name, $field_type); - )* - } - } } } @@ -982,108 +987,10 @@ macro_rules! visit_result { }; } -macro_rules! set_parent_of_field { - ( - $self:ident, - $field_name:ident, - (variant<$field_type:ident>) - ) => { - set_parent_of_union_field!($self, $field_name, $field_type); - }; - ( - $self:ident, - $field_name:ident, - (Option<$field_type:ident>) - ) => { - if $self.$field_name.is_some() { - $self.$field_name.as_mut().unwrap().parent = Some($self); - $self.$field_name.as_mut().unwrap().set_parents(); - } - }; - ( - $self:ident, - $field_name:ident, - $field_type:tt - ) => { - $self.$field_name.parent = Some($self); - $self.$field_name.set_parents(); - }; -} - -macro_rules! set_parent_of_union_field { - ( - $self:ident, - $field_name:ident, - ArgumentOrRedirectionVariant - ) => { - if matches!($self.$field_name, ArgumentOrRedirectionVariant::Argument(_)) { - $self.$field_name.as_mut_argument().parent = Some($self); - $self.$field_name.as_mut_argument().set_parents(); - } else { - $self.$field_name.as_mut_redirection().parent = Some($self); - $self.$field_name.as_mut_redirection().set_parents(); - } - }; - ( - $self:ident, - $field_name:ident, - StatementVariant - ) => { - if matches!($self.$field_name, StatementVariant::NotStatement(_)) { - $self.$field_name.as_mut_not_statement().parent = Some($self); - $self.$field_name.as_mut_not_statement().set_parents(); - } else if matches!($self.$field_name, StatementVariant::BlockStatement(_)) { - $self.$field_name.as_mut_block_statement().parent = Some($self); - $self.$field_name.as_mut_block_statement().set_parents(); - } else if matches!($self.$field_name, StatementVariant::BraceStatement(_)) { - $self.$field_name.as_mut_brace_statement().parent = Some($self); - $self.$field_name.as_mut_brace_statement().set_parents(); - } else if matches!($self.$field_name, StatementVariant::IfStatement(_)) { - $self.$field_name.as_mut_if_statement().parent = Some($self); - $self.$field_name.as_mut_if_statement().set_parents(); - } else if matches!($self.$field_name, StatementVariant::SwitchStatement(_)) { - $self.$field_name.as_mut_switch_statement().parent = Some($self); - $self.$field_name.as_mut_switch_statement().set_parents(); - } else if matches!($self.$field_name, StatementVariant::DecoratedStatement(_)) { - $self.$field_name.as_mut_decorated_statement().parent = Some($self); - $self.$field_name.as_mut_decorated_statement().set_parents(); - } - }; - ( - $self:ident, - $field_name:ident, - BlockStatementHeaderVariant - ) => { - if matches!($self.$field_name, BlockStatementHeaderVariant::ForHeader(_)) { - $self.$field_name.as_mut_for_header().parent = Some($self); - $self.$field_name.as_mut_for_header().set_parents(); - } else if matches!( - $self.$field_name, - BlockStatementHeaderVariant::WhileHeader(_) - ) { - $self.$field_name.as_mut_while_header().parent = Some($self); - $self.$field_name.as_mut_while_header().set_parents(); - } else if matches!( - $self.$field_name, - BlockStatementHeaderVariant::FunctionHeader(_) - ) { - $self.$field_name.as_mut_function_header().parent = Some($self); - $self.$field_name.as_mut_function_header().set_parents(); - } else if matches!( - $self.$field_name, - BlockStatementHeaderVariant::BeginHeader(_) - ) { - $self.$field_name.as_mut_begin_header().parent = Some($self); - $self.$field_name.as_mut_begin_header().set_parents(); - } - }; -} - /// A redirection has an operator like > or 2>, and a target like /dev/null or &1. /// Note that pipes are not redirections. #[derive(Default, Debug)] pub struct Redirection { - parent: Option<*const dyn Node>, pub oper: TokenRedirection, pub target: String_, } @@ -1124,7 +1031,6 @@ fn as_mut_variable_assignment_list(&mut self) -> Option<&mut VariableAssignmentL /// An argument or redirection holds either an argument or redirection. #[derive(Default, Debug)] pub struct ArgumentOrRedirection { - parent: Option<*const dyn Node>, pub contents: ArgumentOrRedirectionVariant, } implement_node!(ArgumentOrRedirection, branch, argument_or_redirection); @@ -1168,7 +1074,6 @@ fn as_mut_argument_or_redirection_list(&mut self) -> Option<&mut ArgumentOrRedir /// A statement is a normal command, or an if / while / etc #[derive(Default, Debug)] pub struct Statement { - parent: Option<*const dyn Node>, pub contents: StatementVariant, } implement_node!(Statement, branch, statement); @@ -1188,7 +1093,6 @@ fn as_mut_statement(&mut self) -> Option<&mut Statement> { /// like if statements, where we require a command). #[derive(Default, Debug)] pub struct JobPipeline { - parent: Option<*const dyn Node>, /// Maybe the time keyword. pub time: Option, /// A (possibly empty) list of variable assignments. @@ -1223,7 +1127,6 @@ fn as_mut_job_pipeline(&mut self) -> Option<&mut JobPipeline> { /// A job_conjunction is a job followed by a && or || continuations. #[derive(Default, Debug)] pub struct JobConjunction { - parent: Option<*const dyn Node>, /// The job conjunction decorator. pub decorator: Option, /// The job itself. @@ -1261,14 +1164,13 @@ fn can_be_parsed(pop: &mut Populator<'_>) -> bool { || (token.typ == ParseTokenType::string && !matches!( token.keyword, - ParseKeyword::kw_case | ParseKeyword::kw_end | ParseKeyword::kw_else + ParseKeyword::Case | ParseKeyword::End | ParseKeyword::Else )) } } #[derive(Default, Debug)] pub struct ForHeader { - parent: Option<*const dyn Node>, /// 'for' pub kw_for: KeywordFor, /// var_name @@ -1302,7 +1204,6 @@ fn as_mut_for_header(&mut self) -> Option<&mut ForHeader> { #[derive(Default, Debug)] pub struct WhileHeader { - parent: Option<*const dyn Node>, /// 'while' pub kw_while: KeywordWhile, pub condition: JobConjunction, @@ -1328,7 +1229,6 @@ fn as_mut_while_header(&mut self) -> Option<&mut WhileHeader> { #[derive(Default, Debug)] pub struct FunctionHeader { - parent: Option<*const dyn Node>, pub kw_function: KeywordFunction, /// functions require at least one argument. pub first_arg: Argument, @@ -1356,7 +1256,6 @@ fn as_mut_function_header(&mut self) -> Option<&mut FunctionHeader> { #[derive(Default, Debug)] pub struct BeginHeader { - parent: Option<*const dyn Node>, pub kw_begin: KeywordBegin, /// Note that 'begin' does NOT require a semi or nl afterwards. /// This is valid: begin echo hi; end @@ -1381,7 +1280,6 @@ fn as_mut_begin_header(&mut self) -> Option<&mut BeginHeader> { #[derive(Default, Debug)] pub struct BlockStatement { - parent: Option<*const dyn Node>, /// A header like for, while, etc. pub header: BlockStatementHeaderVariant, /// List of jobs in this block. @@ -1412,7 +1310,6 @@ fn as_mut_block_statement(&mut self) -> Option<&mut BlockStatement> { #[derive(Default, Debug)] pub struct BraceStatement { - parent: Option<*const dyn Node>, /// The opening brace, in command position. pub left_brace: TokenLeftBrace, /// List of jobs in this block. @@ -1443,7 +1340,6 @@ fn as_mut_brace_statement(&mut self) -> Option<&mut BraceStatement> { #[derive(Default, Debug)] pub struct IfClause { - parent: Option<*const dyn Node>, /// The 'if' keyword. pub kw_if: KeywordIf, /// The 'if' condition. @@ -1474,7 +1370,6 @@ fn as_mut_if_clause(&mut self) -> Option<&mut IfClause> { #[derive(Default, Debug)] pub struct ElseifClause { - parent: Option<*const dyn Node>, /// The 'else' keyword. pub kw_else: KeywordElse, /// The 'if' clause following it. @@ -1498,8 +1393,8 @@ fn as_mut_elseif_clause(&mut self) -> Option<&mut ElseifClause> { } impl CheckParse for ElseifClause { fn can_be_parsed(pop: &mut Populator<'_>) -> bool { - pop.peek_token(0).keyword == ParseKeyword::kw_else - && pop.peek_token(1).keyword == ParseKeyword::kw_if + pop.peek_token(0).keyword == ParseKeyword::Else + && pop.peek_token(1).keyword == ParseKeyword::If } } @@ -1517,7 +1412,6 @@ fn as_mut_elseif_clause_list(&mut self) -> Option<&mut ElseifClauseList> { #[derive(Default, Debug)] pub struct ElseClause { - parent: Option<*const dyn Node>, /// else ; body pub kw_else: KeywordElse, pub semi_nl: Option, @@ -1542,13 +1436,12 @@ fn as_mut_else_clause(&mut self) -> Option<&mut ElseClause> { } impl CheckParse for ElseClause { fn can_be_parsed(pop: &mut Populator<'_>) -> bool { - pop.peek_token(0).keyword == ParseKeyword::kw_else + pop.peek_token(0).keyword == ParseKeyword::Else } } #[derive(Default, Debug)] pub struct IfStatement { - parent: Option<*const dyn Node>, /// if part pub if_clause: IfClause, /// else if list @@ -1582,7 +1475,6 @@ fn as_mut_if_statement(&mut self) -> Option<&mut IfStatement> { #[derive(Default, Debug)] pub struct CaseItem { - parent: Option<*const dyn Node>, /// case \ ; body pub kw_case: KeywordCase, pub arguments: ArgumentList, @@ -1609,13 +1501,12 @@ fn as_mut_case_item(&mut self) -> Option<&mut CaseItem> { } impl CheckParse for CaseItem { fn can_be_parsed(pop: &mut Populator<'_>) -> bool { - pop.peek_token(0).keyword == ParseKeyword::kw_case + pop.peek_token(0).keyword == ParseKeyword::Case } } #[derive(Default, Debug)] pub struct SwitchStatement { - parent: Option<*const dyn Node>, /// switch \ ; body ; end args_redirs pub kw_switch: KeywordSwitch, pub argument: Argument, @@ -1649,7 +1540,6 @@ fn as_mut_switch_statement(&mut self) -> Option<&mut SwitchStatement> { /// "builtin" or "command" or "exec" #[derive(Default, Debug)] pub struct DecoratedStatement { - parent: Option<*const dyn Node>, /// An optional decoration (command, builtin, exec, etc). pub opt_decoration: Option, /// Command to run. @@ -1678,7 +1568,6 @@ fn as_mut_decorated_statement(&mut self) -> Option<&mut DecoratedStatement> { /// A not statement like `not true` or `! true` #[derive(Default, Debug)] pub struct NotStatement { - parent: Option<*const dyn Node>, /// Keyword, either not or exclam. pub kw: KeywordNot, pub time: Option, @@ -1706,7 +1595,6 @@ fn as_mut_not_statement(&mut self) -> Option<&mut NotStatement> { #[derive(Default, Debug)] pub struct JobContinuation { - parent: Option<*const dyn Node>, pub pipe: TokenPipe, pub newlines: MaybeNewlines, pub variables: VariableAssignmentList, @@ -1750,7 +1638,6 @@ fn as_mut_job_continuation_list(&mut self) -> Option<&mut JobContinuationList> { #[derive(Default, Debug)] pub struct JobConjunctionContinuation { - parent: Option<*const dyn Node>, /// The && or || token. pub conjunction: TokenConjunction, pub newlines: MaybeNewlines, @@ -1790,7 +1677,6 @@ fn can_be_parsed(pop: &mut Populator<'_>) -> bool { /// instances of this. #[derive(Default, Debug)] pub struct AndorJob { - parent: Option<*const dyn Node>, pub job: JobConjunction, } implement_node!(AndorJob, branch, andor_job); @@ -1808,7 +1694,7 @@ fn as_mut_andor_job(&mut self) -> Option<&mut AndorJob> { impl CheckParse for AndorJob { fn can_be_parsed(pop: &mut Populator<'_>) -> bool { let keyword = pop.peek_token(0).keyword; - if !matches!(keyword, ParseKeyword::kw_and | ParseKeyword::kw_or) { + if !matches!(keyword, ParseKeyword::And | ParseKeyword::Or) { return false; } // Check that the argument to and/or is a string that's not help. Otherwise @@ -1838,7 +1724,6 @@ fn as_mut_andor_job_list(&mut self) -> Option<&mut AndorJobList> { /// In practice the tok_ends are ignored by fish code so we do not bother to store them. #[derive(Default, Debug)] pub struct FreestandingArgumentList { - parent: Option<*const dyn Node>, pub arguments: ArgumentList, } implement_node!(FreestandingArgumentList, branch, freestanding_argument_list); @@ -1912,7 +1797,6 @@ fn as_mut_case_item_list(&mut self) -> Option<&mut CaseItemList> { /// A variable_assignment contains a source range like FOO=bar. #[derive(Default, Debug)] pub struct VariableAssignment { - parent: Option<*const dyn Node>, range: Option, } implement_node!(VariableAssignment, leaf, variable_assignment); @@ -1953,7 +1837,6 @@ fn can_be_parsed(pop: &mut Populator<'_>) -> bool { /// Zero or more newlines. #[derive(Default, Debug)] pub struct MaybeNewlines { - parent: Option<*const dyn Node>, range: Option, } implement_node!(MaybeNewlines, leaf, maybe_newlines); @@ -1979,7 +1862,6 @@ fn as_mut_maybe_newlines(&mut self) -> Option<&mut MaybeNewlines> { /// This is a separate type because it is sometimes useful to find all arguments. #[derive(Default, Debug)] pub struct Argument { - parent: Option<*const dyn Node>, range: Option, } implement_node!(Argument, leaf, argument); @@ -2015,27 +1897,27 @@ fn can_be_parsed(pop: &mut Populator<'_>) -> bool { define_token_node!(TokenRightBrace, right_brace); define_token_node!(TokenRedirection, redirection); -define_keyword_node!(DecoratedStatementDecorator, kw_command, kw_builtin, kw_exec); -define_keyword_node!(JobConjunctionDecorator, kw_and, kw_or); -define_keyword_node!(KeywordBegin, kw_begin); -define_keyword_node!(KeywordCase, kw_case); -define_keyword_node!(KeywordElse, kw_else); -define_keyword_node!(KeywordEnd, kw_end); -define_keyword_node!(KeywordFor, kw_for); -define_keyword_node!(KeywordFunction, kw_function); -define_keyword_node!(KeywordIf, kw_if); -define_keyword_node!(KeywordIn, kw_in); -define_keyword_node!(KeywordNot, kw_not, kw_exclam); -define_keyword_node!(KeywordSwitch, kw_switch); -define_keyword_node!(KeywordTime, kw_time); -define_keyword_node!(KeywordWhile, kw_while); +define_keyword_node!(DecoratedStatementDecorator, Command, Builtin, Exec); +define_keyword_node!(JobConjunctionDecorator, And, Or); +define_keyword_node!(KeywordBegin, Begin); +define_keyword_node!(KeywordCase, Case); +define_keyword_node!(KeywordElse, Else); +define_keyword_node!(KeywordEnd, End); +define_keyword_node!(KeywordFor, For); +define_keyword_node!(KeywordFunction, Function); +define_keyword_node!(KeywordIf, If); +define_keyword_node!(KeywordIn, In); +define_keyword_node!(KeywordNot, Not, Exclam); +define_keyword_node!(KeywordSwitch, Switch); +define_keyword_node!(KeywordTime, Time); +define_keyword_node!(KeywordWhile, While); impl CheckParse for JobConjunctionDecorator { fn can_be_parsed(pop: &mut Populator<'_>) -> bool { // This is for a job conjunction like `and stuff` // But if it's `and --help` then we treat it as an ordinary command. let keyword = pop.peek_token(0).keyword; - if !matches!(keyword, ParseKeyword::kw_and | ParseKeyword::kw_or) { + if !matches!(keyword, ParseKeyword::And | ParseKeyword::Or) { return false; } !pop.peek_token(1).is_help_argument @@ -2051,7 +1933,7 @@ fn can_be_parsed(pop: &mut Populator<'_>) -> bool { let keyword = pop.peek_token(0).keyword; if !matches!( keyword, - ParseKeyword::kw_command | ParseKeyword::kw_builtin | ParseKeyword::kw_exec + ParseKeyword::Command | ParseKeyword::Builtin | ParseKeyword::Exec ) { return false; } @@ -2064,7 +1946,7 @@ impl CheckParse for KeywordTime { fn can_be_parsed(pop: &mut Populator<'_>) -> bool { // Time keyword is only the time builtin if the next argument doesn't have a dash. let keyword = pop.peek_token(0).keyword; - if !matches!(keyword, ParseKeyword::kw_time) { + if !matches!(keyword, ParseKeyword::Time) { return false; } !pop.peek_token(1).is_dash_prefix_string() @@ -2079,9 +1961,9 @@ pub fn decoration(&self) -> StatementDecoration { }; let decorator: &dyn Keyword = decorator; match decorator.keyword() { - ParseKeyword::kw_command => StatementDecoration::command, - ParseKeyword::kw_builtin => StatementDecoration::builtin, - ParseKeyword::kw_exec => StatementDecoration::exec, + ParseKeyword::Command => StatementDecoration::command, + ParseKeyword::Builtin => StatementDecoration::builtin, + ParseKeyword::Exec => StatementDecoration::exec, _ => panic!("Unexpected keyword in statement decoration"), } } @@ -2130,18 +2012,6 @@ fn embedded_node(&self) -> &dyn NodeMut { ArgumentOrRedirectionVariant::Redirection(node) => node, } } - fn as_mut_argument(&mut self) -> &mut Argument { - match self { - ArgumentOrRedirectionVariant::Argument(node) => node, - _ => panic!(), - } - } - fn as_mut_redirection(&mut self) -> &mut Redirection { - match self { - ArgumentOrRedirectionVariant::Redirection(redirection) => redirection, - _ => panic!(), - } - } } impl ArgumentOrRedirection { @@ -2252,30 +2122,6 @@ fn embedded_node(&self) -> &dyn NodeMut { BlockStatementHeaderVariant::BeginHeader(node) => node, } } - fn as_mut_for_header(&mut self) -> &mut ForHeader { - match self { - BlockStatementHeaderVariant::ForHeader(node) => node, - _ => panic!(), - } - } - fn as_mut_while_header(&mut self) -> &mut WhileHeader { - match self { - BlockStatementHeaderVariant::WhileHeader(node) => node, - _ => panic!(), - } - } - fn as_mut_function_header(&mut self) -> &mut FunctionHeader { - match self { - BlockStatementHeaderVariant::FunctionHeader(node) => node, - _ => panic!(), - } - } - fn as_mut_begin_header(&mut self) -> &mut BeginHeader { - match self { - BlockStatementHeaderVariant::BeginHeader(node) => node, - _ => panic!(), - } - } } #[derive(Debug)] @@ -2378,42 +2224,6 @@ fn embedded_node(&self) -> &dyn NodeMut { StatementVariant::DecoratedStatement(node) => node, } } - fn as_mut_not_statement(&mut self) -> &mut NotStatement { - match self { - StatementVariant::NotStatement(node) => node, - _ => panic!(), - } - } - fn as_mut_block_statement(&mut self) -> &mut BlockStatement { - match self { - StatementVariant::BlockStatement(node) => node, - _ => panic!(), - } - } - fn as_mut_brace_statement(&mut self) -> &mut BraceStatement { - match self { - StatementVariant::BraceStatement(node) => node, - _ => panic!(), - } - } - fn as_mut_if_statement(&mut self) -> &mut IfStatement { - match self { - StatementVariant::IfStatement(node) => node, - _ => panic!(), - } - } - fn as_mut_switch_statement(&mut self) -> &mut SwitchStatement { - match self { - StatementVariant::SwitchStatement(node) => node, - _ => panic!(), - } - } - fn as_mut_decorated_statement(&mut self) -> &mut DecoratedStatement { - match self { - StatementVariant::DecoratedStatement(node) => node, - _ => panic!(), - } - } } /// Return a string literal name for an ast type. @@ -2461,35 +2271,99 @@ pub fn ast_type_to_string(t: Type) -> &'static wstr { } } +// An entry in the traversal stack. We retain nodes after visiting them, so that we can reconstruct the parent path +// as the list of visited nodes remaining on the stack. Once we've visited a node twice, then it's popped, as that +// means all of its children have been visited (or skipped). +enum TraversalEntry<'a> { + NeedsVisit(&'a dyn Node), + Visited(&'a dyn Node), +} + // A way to visit nodes iteratively. // This is pre-order. Each node is visited before its children. // Example: // let tv = Traversal::new(start); // while let Some(node) = tv.next() {...} pub struct Traversal<'a> { - stack: Vec<&'a dyn Node>, + stack: Vec>, } impl<'a> Traversal<'a> { // Construct starting with a node pub fn new(n: &'a dyn Node) -> Self { - Self { stack: vec![n] } + Self { + stack: vec![TraversalEntry::NeedsVisit(n)], + } + } + + // Return an iterator over the parent nodes - those which have been visited but not yet popped. + // Parents are returned in reverse order (immediate parent, grandparent, etc). + // The most recently visited node is first; that is first parent node will be the node most recently + // returned by next(); + pub fn parent_nodes(&self) -> impl Iterator + '_ { + self.stack.iter().rev().filter_map(|entry| match entry { + TraversalEntry::Visited(node) => Some(*node), + _ => None, + }) + } + + // Return the parent node of the given node, asserting it is on the stack + // (i.e. we are processing it or one of its children, transitively). + // Note this does NOT return parents of nodes that have not yet been yielded by the iterator; + // for example `traversal.parent(¤t.child)` will panic because `current.child` has not been + // yielded yet. + pub fn parent(&self, node: &dyn Node) -> &'a dyn Node { + let mut iter = self.parent_nodes(); + while let Some(n) = iter.next() { + if is_same_node(node, n) { + return iter.next().expect("Node is root and has no parent"); + } + } + panic!("Node {:?} has either been popped off of the stack or not yet visited. Cannot find parent.", node.describe()); + } + + // Skip the children of the last visited node, which must be passed + // as a sanity check. This node must be the last visited node on the stack. + // For convenience, also remove the (visited) node itself. + pub fn skip_children(&mut self, node: &dyn Node) { + for idx in (0..self.stack.len()).rev() { + if let TraversalEntry::Visited(n) = self.stack[idx] { + assert!( + is_same_node(node, n), + "Passed node is not the last visited node" + ); + self.stack.truncate(idx); + return; + } + } + panic!("Passed node is not on the stack"); } } impl<'a> Iterator for Traversal<'a> { type Item = &'a dyn Node; + // Return the next node. fn next(&mut self) -> Option<&'a dyn Node> { - let node = self.stack.pop()?; - // We want to visit in reverse order so the first child ends up on top of the stack. + let node = loop { + match self.stack.pop()? { + TraversalEntry::NeedsVisit(n) => { + // Leave a marker for the node we just visited. + self.stack.push(TraversalEntry::Visited(n)); + break n; + } + TraversalEntry::Visited(_) => {} + } + }; + // Append this node's children to our stack. node.accept(self, true /* reverse */); Some(node) } } impl<'a, 'v: 'a> NodeVisitor<'v> for Traversal<'a> { + // Record that a child of a node needs to be visited. fn visit(&mut self, node: &'a dyn Node) { - self.stack.push(node) + self.stack.push(TraversalEntry::NeedsVisit(node)); } } @@ -2513,6 +2387,8 @@ pub struct Extras { pub struct Ast { // The top node. // Its type depends on what was requested to parse. + // Note Node parent pointers are implemented via raw pointers, + // so we must enforce pointer stability. top: Box, /// Whether any errors were encountered during parsing. any_error: bool, @@ -2558,9 +2434,6 @@ pub fn walk(&'_ self) -> Traversal<'_> { pub fn top(&self) -> &dyn Node { self.top.as_node() } - fn top_mut(&mut self) -> &mut dyn NodeMut { - &mut *self.top - } /// Return whether any errors were encountered during parsing. pub fn errored(&self) -> bool { self.any_error @@ -2571,8 +2444,9 @@ pub fn errored(&self) -> bool { pub fn dump(&self, orig: &wstr) -> WString { let mut result = WString::new(); - for node in self.walk() { - let depth = get_depth(node); + let mut traversal = self.walk(); + while let Some(node) = traversal.next() { + let depth = traversal.parent_nodes().count() - 1; // dot-| padding result += &str::repeat("! ", depth)[..]; @@ -2606,7 +2480,7 @@ pub fn dump(&self, orig: &wstr) -> WString { WString::from_str("") } _ => { - token_type_user_presentable_description(n.token_type(), ParseKeyword::none) + token_type_user_presentable_description(n.token_type(), ParseKeyword::None) } }; result += &desc[..]; @@ -2619,17 +2493,6 @@ pub fn dump(&self, orig: &wstr) -> WString { } } -// Return the depth of a node, i.e. number of parent links. -fn get_depth(node: &dyn Node) -> usize { - let mut result = 0; - let mut cursor = node; - while let Some(parent) = cursor.parent() { - result += 1; - cursor = parent; - } - result -} - struct SourceRangeVisitor { /// Total range we have encountered. total: SourceRange, @@ -3056,7 +2919,7 @@ fn did_visit_fields_of<'a>(&'a mut self, node: &'a dyn NodeMut, flow: VisitResul if next_token.typ == ParseTokenType::string && matches!( next_token.keyword, - ParseKeyword::kw_case | ParseKeyword::kw_else | ParseKeyword::kw_end + ParseKeyword::Case | ParseKeyword::Else | ParseKeyword::End ) { self.consume_excess_token_generating_error(); @@ -3087,9 +2950,9 @@ fn visit_argument_or_redirection( node: &mut ArgumentOrRedirectionVariant, ) -> VisitResult { if let Some(arg) = self.try_parse::() { - *node = ArgumentOrRedirectionVariant::Argument(*arg); + *node = ArgumentOrRedirectionVariant::Argument(arg); } else if let Some(redir) = self.try_parse::() { - *node = ArgumentOrRedirectionVariant::Redirection(*redir); + *node = ArgumentOrRedirectionVariant::Redirection(redir); } else { internal_error!( self, @@ -3115,22 +2978,22 @@ fn visit_decorated_statement_decorator( &mut self, node: &mut Option, ) { - *node = self.try_parse::().map(|b| *b); + *node = self.try_parse::(); } fn visit_job_conjunction_decorator(&mut self, node: &mut Option) { - *node = self.try_parse::().map(|b| *b); + *node = self.try_parse::(); } fn visit_else_clause(&mut self, node: &mut Option) { - *node = self.try_parse::().map(|b| *b); + *node = self.try_parse::(); } fn visit_semi_nl(&mut self, node: &mut Option) { - *node = self.try_parse::().map(|b| *b); + *node = self.try_parse::(); } fn visit_time(&mut self, node: &mut Option) { - *node = self.try_parse::().map(|b| *b); + *node = self.try_parse::(); } fn visit_token_background(&mut self, node: &mut Option) { - *node = self.try_parse::().map(|b| *b); + *node = self.try_parse::(); } } @@ -3160,7 +3023,7 @@ fn token_types_user_presentable_description(types: &'static [ParseTokenType]) -> if !res.is_empty() { res += L!(" or "); } - res += &token_type_user_presentable_description(*typ, ParseKeyword::none)[..]; + res += &token_type_user_presentable_description(*typ, ParseKeyword::None)[..]; } res } @@ -3386,7 +3249,7 @@ fn consume_token_type(&mut self, typ: ParseTokenType) -> SourceRange { tok, ParseErrorCode::generic, "Expected %ls, but found %ls", - token_type_user_presentable_description(typ, ParseKeyword::none), + token_type_user_presentable_description(typ, ParseKeyword::None), tok.user_presentable_description() ); return SourceRange::new(0, 0); @@ -3411,7 +3274,7 @@ fn consume_excess_token_generating_error(&mut self) { tok, ParseErrorCode::generic, "Expected %ls, but found %ls", - token_type_user_presentable_description(ParseTokenType::string, ParseKeyword::none), + token_type_user_presentable_description(ParseTokenType::string, ParseKeyword::None), tok.user_presentable_description() ); return; @@ -3422,7 +3285,7 @@ fn consume_excess_token_generating_error(&mut self) { ParseTokenType::string => { // There are three keywords which end a job list. match tok.keyword { - ParseKeyword::kw_case => { + ParseKeyword::Case => { parse_error!( self, tok, @@ -3430,7 +3293,7 @@ fn consume_excess_token_generating_error(&mut self) { "'case' builtin not inside of switch block" ); } - ParseKeyword::kw_end => { + ParseKeyword::End => { parse_error!( self, tok, @@ -3438,7 +3301,7 @@ fn consume_excess_token_generating_error(&mut self) { "'end' outside of a block" ); } - ParseKeyword::kw_else => { + ParseKeyword::Else => { parse_error!( self, tok, @@ -3541,7 +3404,6 @@ fn populate_list(&mut self, list: &mut ListType, exhaust_stream: } // We're going to populate a vector with our nodes. - // Later on we will copy this to the heap with a single allocation. let mut contents = vec![]; loop { @@ -3608,7 +3470,7 @@ fn populate_list(&mut self, list: &mut ListType, exhaust_stream: "Contents size out of bounds" ); assert!(list.contents().is_empty(), "List should still be empty"); - *list.contents_mut() = contents; + *list.contents_mut() = contents.into_boxed_slice(); } FLOGF!( @@ -3641,12 +3503,12 @@ fn new_decorated_statement(slf: &mut Populator<'_>) -> StatementVariant { "Expected %s, but found %ls", token_type_user_presentable_description( ParseTokenType::end, - ParseKeyword::none + ParseKeyword::None ), slf.peek_token(0).user_presentable_description() ); } - StatementVariant::DecoratedStatement(*embedded) + StatementVariant::DecoratedStatement(embedded) } if self.peek_token(0).typ == ParseTokenType::terminate && self.allow_incomplete() { @@ -3654,7 +3516,7 @@ fn new_decorated_statement(slf: &mut Populator<'_>) -> StatementVariant { // Construct a decorated statement, which will be unsourced. self.allocate_visit::(); } else if self.peek_token(0).typ == ParseTokenType::left_brace { - let embedded = self.allocate_visit::(); + let embedded = self.allocate_boxed_visit::(); return StatementVariant::BraceStatement(embedded); } else if self.peek_token(0).typ != ParseTokenType::string { // We may be unwinding already; do not produce another error. @@ -3699,11 +3561,11 @@ fn new_decorated_statement(slf: &mut Populator<'_>) -> StatementVariant { // If we are one of these, then look for specifically help arguments. Otherwise, if the next token // looks like an option (starts with a dash), then parse it as a decorated statement. let help_only_kws = [ - ParseKeyword::kw_begin, - ParseKeyword::kw_function, - ParseKeyword::kw_if, - ParseKeyword::kw_switch, - ParseKeyword::kw_while, + ParseKeyword::Begin, + ParseKeyword::Function, + ParseKeyword::If, + ParseKeyword::Switch, + ParseKeyword::While, ]; if if help_only_kws.contains(&self.peek_token(0).keyword) { self.peek_token(1).is_help_argument @@ -3715,8 +3577,8 @@ fn new_decorated_statement(slf: &mut Populator<'_>) -> StatementVariant { // Likewise if the next token doesn't look like an argument at all. This corresponds to // e.g. a "naked if". - let naked_invocation_invokes_help = ![ParseKeyword::kw_begin, ParseKeyword::kw_end] - .contains(&self.peek_token(0).keyword); + let naked_invocation_invokes_help = + ![ParseKeyword::Begin, ParseKeyword::End].contains(&self.peek_token(0).keyword); if naked_invocation_invokes_help && [ParseTokenType::end, ParseTokenType::terminate] .contains(&self.peek_token(1).typ) @@ -3726,26 +3588,26 @@ fn new_decorated_statement(slf: &mut Populator<'_>) -> StatementVariant { } match self.peek_token(0).keyword { - ParseKeyword::kw_not | ParseKeyword::kw_exclam => { - let embedded = self.allocate_visit::(); + ParseKeyword::Not | ParseKeyword::Exclam => { + let embedded = self.allocate_boxed_visit::(); StatementVariant::NotStatement(embedded) } - ParseKeyword::kw_for - | ParseKeyword::kw_while - | ParseKeyword::kw_function - | ParseKeyword::kw_begin => { - let embedded = self.allocate_visit::(); + ParseKeyword::For + | ParseKeyword::While + | ParseKeyword::Function + | ParseKeyword::Begin => { + let embedded = self.allocate_boxed_visit::(); StatementVariant::BlockStatement(embedded) } - ParseKeyword::kw_if => { - let embedded = self.allocate_visit::(); + ParseKeyword::If => { + let embedded = self.allocate_boxed_visit::(); StatementVariant::IfStatement(embedded) } - ParseKeyword::kw_switch => { - let embedded = self.allocate_visit::(); + ParseKeyword::Switch => { + let embedded = self.allocate_boxed_visit::(); StatementVariant::SwitchStatement(embedded) } - ParseKeyword::kw_end => { + ParseKeyword::End => { // 'end' is forbidden as a command. // For example, `if end` or `while end` will produce this error. // We still have to descend into the decorated statement because @@ -3767,21 +3629,21 @@ fn new_decorated_statement(slf: &mut Populator<'_>) -> StatementVariant { /// This must never return null. fn allocate_populate_block_header(&mut self) -> BlockStatementHeaderVariant { match self.peek_token(0).keyword { - ParseKeyword::kw_for => { + ParseKeyword::For => { let embedded = self.allocate_visit::(); - BlockStatementHeaderVariant::ForHeader(*embedded) + BlockStatementHeaderVariant::ForHeader(embedded) } - ParseKeyword::kw_while => { + ParseKeyword::While => { let embedded = self.allocate_visit::(); - BlockStatementHeaderVariant::WhileHeader(*embedded) + BlockStatementHeaderVariant::WhileHeader(embedded) } - ParseKeyword::kw_function => { + ParseKeyword::Function => { let embedded = self.allocate_visit::(); - BlockStatementHeaderVariant::FunctionHeader(*embedded) + BlockStatementHeaderVariant::FunctionHeader(embedded) } - ParseKeyword::kw_begin => { + ParseKeyword::Begin => { let embedded = self.allocate_visit::(); - BlockStatementHeaderVariant::BeginHeader(*embedded) + BlockStatementHeaderVariant::BeginHeader(embedded) } _ => { internal_error!( @@ -3793,7 +3655,7 @@ fn allocate_populate_block_header(&mut self) -> BlockStatementHeaderVariant { } } - fn try_parse(&mut self) -> Option> { + fn try_parse(&mut self) -> Option { if !T::can_be_parsed(self) { return None; } @@ -3815,10 +3677,17 @@ fn allocate(&self) -> Box { result } - // Given a node type, allocate it, invoke its default constructor, + // Given a node type, allocate it, invoking its default constructor, // and then visit it as a field. - // Return the resulting Node pointer. It is never null. - fn allocate_visit(&mut self) -> Box { + // Return the resulting Node. + fn allocate_visit(&mut self) -> T { + let mut result = T::default(); + self.visit_mut(&mut result); + result + } + + // Like allocate_visit, but returns the value as a Box. + fn allocate_boxed_visit(&mut self) -> Box { let mut result = Box::::default(); let _ = self.visit_mut(&mut *result); result @@ -3849,7 +3718,7 @@ fn visit_variable_assignment(&mut self, varas: &mut VariableAssignment) { fn visit_job_continuation(&mut self, node: &mut JobContinuation) { // Special error handling to catch 'and' and 'or' in pipelines, like `true | and false`. - if [ParseKeyword::kw_and, ParseKeyword::kw_or].contains(&self.peek_token(1).keyword) { + if [ParseKeyword::And, ParseKeyword::Or].contains(&self.peek_token(1).keyword) { parse_error!( self, self.peek_token(1), @@ -3917,7 +3786,7 @@ fn visit_keyword(&mut self, keyword: &mut dyn Keyword) -> VisitResult { // Special error reporting for keyword_t. let allowed_keywords = keyword.allowed_keywords(); - if keyword.allowed_keywords() == [ParseKeyword::kw_end] { + if keyword.allowed_keywords() == [ParseKeyword::End] { return VisitResult::Break(MissingEndError { allowed_keywords, token: *self.peek_token(0), @@ -4007,23 +3876,6 @@ fn parse_from_top( semis: pops.semis, errors: pops.errors, }; - - if top_type == Type::job_list { - // Set all parent nodes. - // It turns out to be more convenient to do this after the parse phase. - ast.top_mut() - .as_mut_job_list() - .as_mut() - .unwrap() - .set_parents(); - } else { - ast.top_mut() - .as_mut_freestanding_argument_list() - .as_mut() - .unwrap() - .set_parents(); - } - ast } @@ -4105,7 +3957,7 @@ pub(crate) fn unescape_keyword(tok: TokenType, token: &wstr) -> Cow<'_, wstr> { Cow::Owned(unescape_string(token, UnescapeStringStyle::default()).unwrap_or_default()) } -/// Given a token, returns the keyword it matches, or ParseKeyword::none. +/// Given a token, returns the keyword it matches, or ParseKeyword::None. fn keyword_for_token(tok: TokenType, token: &wstr) -> ParseKeyword { ParseKeyword::from(&unescape_keyword(tok, token)[..]) } diff --git a/src/builtins/fish_indent.rs b/src/builtins/fish_indent.rs index 87e26bee1..acc6f8578 100644 --- a/src/builtins/fish_indent.rs +++ b/src/builtins/fish_indent.rs @@ -48,17 +48,20 @@ /// certain runs, weight line breaks, have a cost model, etc. struct PrettyPrinter<'source, 'ast> { /// The parsed ast. - ast: Ast, + ast: &'ast Ast, state: PrettyPrinterState<'source, 'ast>, } struct PrettyPrinterState<'source, 'ast> { - /// Original source. + // Original source. source: &'source wstr, - /// The indents of our string. - /// This has the same length as 'source' and describes the indentation level. + // The traversal of the ast. + traversal: Traversal<'ast>, + + // The indents of our string. + // This has the same length as 'source' and describes the indentation level. indents: Vec, /// The prettifier output. @@ -85,6 +88,60 @@ struct PrettyPrinterState<'source, 'ast> { errors: Option<&'ast SourceRangeList>, } +#[derive(Copy, Clone, Default, Debug)] +struct AstSizeMetrics { + /// The total number of nodes. + node_count: usize, + /// The number of branches, leaves, and lists, tokens, and keywords. + /// Note tokens and keywords are also counted as leaves. + branch_count: usize, + leaf_count: usize, + list_count: usize, + token_count: usize, + keyword_count: usize, + // An estimate of the total allocated size of the ast in bytes. + memory_size: usize, +} + +impl std::fmt::Display for AstSizeMetrics { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "AstSizeMetrics:")?; + writeln!(f, " nodes: {}", self.node_count)?; + writeln!(f, " branches: {}", self.branch_count)?; + writeln!(f, " leaves: {}", self.leaf_count)?; + writeln!(f, " lists: {}", self.list_count)?; + writeln!(f, " tokens: {}", self.token_count)?; + writeln!(f, " keywords: {}", self.keyword_count)?; + + let memsize = self.memory_size; + let (val, unit) = if memsize >= 1024 * 1024 { + (memsize as f64 / (1024.0 * 1024.0), "MB") + } else { + (memsize as f64 / 1024.0, "KB") + }; + writeln!(f, " memory: {} bytes ({:.2} {})", memsize, val, unit) + } +} + +impl<'a> NodeVisitor<'a> for AstSizeMetrics { + fn visit(&mut self, node: &'a dyn Node) { + self.node_count += 1; + self.memory_size += node.self_memory_size(); + match node.category() { + Category::branch => self.branch_count += 1, + Category::leaf => self.leaf_count += 1, + Category::list => self.list_count += 1, + } + if node.as_token().is_some() { + self.token_count += 1; + } + if node.as_keyword().is_some() { + self.keyword_count += 1; + } + node.accept(self, false); + } +} + /// Flags we support. #[derive(Copy, Clone, Default)] struct GapFlags { @@ -105,11 +162,13 @@ struct GapFlags { } impl<'source, 'ast> PrettyPrinter<'source, 'ast> { - fn new(source: &'source wstr, do_indent: bool) -> Self { + fn new(source: &'source wstr, ast: &'ast Ast, do_indent: bool) -> Self { + let traversal = Traversal::new(ast.top()); let mut zelf = Self { - ast: Ast::parse(source, parse_flags(), None), + ast, state: PrettyPrinterState { source, + traversal, indents: if do_indent /* Whether to indent, or just insert spaces. */ { @@ -135,10 +194,10 @@ fn new(source: &'source wstr, do_indent: bool) -> Self { } // Entry point. Prettify our source code and return it. - fn prettify(&'ast mut self) -> WString { + fn prettify(&mut self) -> WString { self.state.output.clear(); self.state.errors = Some(&self.ast.extras.errors); - self.state.visit(self.ast.top()); + self.state.prettify_traversal(); // Trailing gap text. self.state.emit_gap_text_before( @@ -340,13 +399,13 @@ fn gap_text_flags_before_node(&self, node: &dyn Node) -> GapFlags { ParseTokenType::string => { // Allow escaped newlines before commands that follow a variable assignment // since both can be long (#7955). - let p = node.parent().unwrap(); + let p = self.traversal.parent(node); if p.typ() != Type::decorated_statement { return result; } - let p = p.parent().unwrap(); + let p = self.traversal.parent(p); assert_eq!(p.typ(), Type::statement); - let p = p.parent().unwrap(); + let p = self.traversal.parent(p); if let Some(job) = p.as_job_pipeline() { if !job.variables.is_empty() { result.allow_escaped_newlines = true; @@ -672,8 +731,8 @@ fn visit_semi_nl(&mut self, node: &dyn ast::Token) { } fn is_multi_line_brace(&self, node: &dyn ast::Token) -> bool { - node.parent() - .unwrap() + self.traversal + .parent(node.as_node()) .as_brace_statement() .is_some_and(|brace_statement| { self.multi_line_brace_statement_locations @@ -750,11 +809,61 @@ fn visit_maybe_newlines(&mut self, node: &ast::MaybeNewlines) { self.emit_gap_text(gap_range, flags); } - fn visit_begin_header(&mut self) { + fn visit_begin_header(&mut self, node: &ast::BeginHeader) { + self.emit_node_text(&node.kw_begin); + if let Some(semi_nl) = &node.semi_nl { + self.visit_semi_nl(semi_nl); + } + // 'begin' does not require a newline after it, but we insert one. if !self.at_line_start() { self.emit_newline(); } } + + // Prettify our ast traversal, populating the output. + fn prettify_traversal(&mut self) { + while let Some(node) = self.traversal.next() { + // Leaf nodes we just visit their text. + if node.as_keyword().is_some() { + self.emit_node_text(node); + continue; + } + if let Some(token) = node.as_token() { + match token.token_type() { + ParseTokenType::end => self.visit_semi_nl(token), + ParseTokenType::left_brace => self.visit_left_brace(token), + ParseTokenType::right_brace => self.visit_right_brace(token), + _ => self.emit_node_text(node), + } + continue; + } + match node.typ() { + Type::argument | Type::variable_assignment => { + self.emit_node_text(node); + self.traversal.skip_children(node); + } + Type::redirection => { + self.visit_redirection(node.as_redirection().unwrap()); + self.traversal.skip_children(node); + } + Type::maybe_newlines => { + self.visit_maybe_newlines(node.as_maybe_newlines().unwrap()); + self.traversal.skip_children(node); + } + Type::begin_header => { + self.visit_begin_header(node.as_begin_header().unwrap()); + self.traversal.skip_children(node); + } + _ => { + // For branch and list nodes, default is to visit their children. + if [Category::branch, Category::list].contains(&node.category()) { + continue; + } + panic!("unexpected node type"); + } + } + } + } } // The flags we use to parse. @@ -765,50 +874,6 @@ fn parse_flags() -> ParseTreeFlags { | ParseTreeFlags::SHOW_BLANK_LINES } -impl<'source, 'ast> NodeVisitor<'_> for PrettyPrinterState<'source, 'ast> { - // Default implementation is to just visit children. - fn visit(&mut self, node: &'_ dyn Node) { - // Leaf nodes we just visit their text. - if node.as_keyword().is_some() { - self.emit_node_text(node); - return; - } - if let Some(token) = node.as_token() { - match token.token_type() { - ParseTokenType::end => self.visit_semi_nl(token), - ParseTokenType::left_brace => self.visit_left_brace(token), - ParseTokenType::right_brace => self.visit_right_brace(token), - _ => self.emit_node_text(node), - } - return; - } - match node.typ() { - Type::argument | Type::variable_assignment => { - self.emit_node_text(node); - } - Type::redirection => { - self.visit_redirection(node.as_redirection().unwrap()); - } - Type::maybe_newlines => { - self.visit_maybe_newlines(node.as_maybe_newlines().unwrap()); - } - Type::begin_header => { - // 'begin' does not require a newline after it, but we insert one. - node.accept(self, false); - self.visit_begin_header(); - } - _ => { - // For branch and list nodes, default is to visit their children. - if [Category::branch, Category::list].contains(&node.category()) { - node.accept(self, false); - return; - } - panic!("unexpected node type"); - } - } - } -} - /// Return whether a character at a given index is escaped. /// A character is escaped if it has an odd number of backslashes. fn char_is_escaped(text: &wstr, idx: usize) -> bool { @@ -1190,8 +1255,14 @@ fn prettify(streams: &mut IoStreams, src: &wstr, do_indent: bool) -> WString { ); let ast_dump = ast.dump(src); streams.err.appendln(ast_dump); + + // Output metrics too. + let mut metrics = AstSizeMetrics::default(); + metrics.visit(ast.top()); + streams.err.appendln(format!("{}", metrics)); } - let mut printer = PrettyPrinter::new(src, do_indent); + let ast = Ast::parse(src, parse_flags(), None); + let mut printer = PrettyPrinter::new(src, &ast, do_indent); printer.prettify() } diff --git a/src/highlight/highlight.rs b/src/highlight/highlight.rs index 33092a4ae..8cdcb15e9 100644 --- a/src/highlight/highlight.rs +++ b/src/highlight/highlight.rs @@ -2,7 +2,7 @@ use crate::abbrs::{self, with_abbrs}; use crate::ast::{ self, Argument, Ast, BlockStatement, BlockStatementHeaderVariant, BraceStatement, - DecoratedStatement, Keyword, Leaf, List, Node, NodeVisitor, Redirection, Token, Type, + DecoratedStatement, Keyword, List, Node, NodeVisitor, Redirection, Token, Type, VariableAssignment, }; use crate::builtins::shared::builtin_exists; @@ -844,27 +844,27 @@ fn visit_children(&mut self, node: &dyn Node) { fn visit_keyword(&mut self, node: &dyn Keyword) { let mut role = HighlightRole::normal; match node.keyword() { - ParseKeyword::kw_begin - | ParseKeyword::kw_builtin - | ParseKeyword::kw_case - | ParseKeyword::kw_command - | ParseKeyword::kw_else - | ParseKeyword::kw_end - | ParseKeyword::kw_exec - | ParseKeyword::kw_for - | ParseKeyword::kw_function - | ParseKeyword::kw_if - | ParseKeyword::kw_in - | ParseKeyword::kw_switch - | ParseKeyword::kw_while => role = HighlightRole::keyword, - ParseKeyword::kw_and - | ParseKeyword::kw_or - | ParseKeyword::kw_not - | ParseKeyword::kw_exclam - | ParseKeyword::kw_time => role = HighlightRole::operat, - ParseKeyword::none => (), + ParseKeyword::Begin + | ParseKeyword::Builtin + | ParseKeyword::Case + | ParseKeyword::Command + | ParseKeyword::Else + | ParseKeyword::End + | ParseKeyword::Exec + | ParseKeyword::For + | ParseKeyword::Function + | ParseKeyword::If + | ParseKeyword::In + | ParseKeyword::Switch + | ParseKeyword::While => role = HighlightRole::keyword, + ParseKeyword::And + | ParseKeyword::Or + | ParseKeyword::Not + | ParseKeyword::Exclam + | ParseKeyword::Time => role = HighlightRole::operat, + ParseKeyword::None => (), }; - self.color_node(node.leaf_as_node(), HighlightSpec::with_fg(role)); + self.color_node(node.as_node(), HighlightSpec::with_fg(role)); } fn visit_token(&mut self, tok: &dyn Token) { let mut role = HighlightRole::normal; @@ -884,11 +884,11 @@ fn visit_token(&mut self, tok: &dyn Token) { } _ => (), } - self.color_node(tok.leaf_as_node(), HighlightSpec::with_fg(role)); + self.color_node(tok.as_node(), HighlightSpec::with_fg(role)); } // Visit an argument, perhaps knowing that our command is cd. fn visit_argument(&mut self, arg: &Argument, cmd_is_cd: bool, options_allowed: bool) { - self.color_as_argument(arg.as_node(), options_allowed); + self.color_as_argument(arg, options_allowed); if !self.io_still_ok() { return; } @@ -912,7 +912,7 @@ fn visit_argument(&mut self, arg: &Argument, cmd_is_cd: bool, options_allowed: b self.color_array[i].valid_path = true; } } - Err(..) => self.color_node(arg.as_node(), HighlightSpec::with_fg(HighlightRole::error)), + Err(..) => self.color_node(arg, HighlightSpec::with_fg(HighlightRole::error)), } } @@ -926,10 +926,7 @@ fn visit_redirection(&mut self, redir: &Redirection) { // It may have parsed successfully yet still be invalid (e.g. 9999999999999>&1) // If so, color the whole thing invalid and stop. if !oper.is_valid() { - self.color_node( - redir.as_node(), - HighlightSpec::with_fg(HighlightRole::error), - ); + self.color_node(redir, HighlightSpec::with_fg(HighlightRole::error)); return; } @@ -943,7 +940,7 @@ fn visit_redirection(&mut self, redir: &Redirection) { // Check if the argument contains a command substitution. If so, highlight it as a param // even though it's a command redirection, and don't try to do any other validation. if has_cmdsub(&target) { - self.color_as_argument(redir.target.leaf_as_node(), true); + self.color_as_argument(&redir.target, true); return; } // No command substitution, so we can highlight the target file or fd. For example, @@ -959,7 +956,7 @@ fn visit_redirection(&mut self, redir: &Redirection) { self.file_tester.test_redirection_target(&target, oper.mode) }; self.color_node( - redir.target.leaf_as_node(), + &redir.target, HighlightSpec::with_fg(if target_is_valid { HighlightRole::redirection } else { diff --git a/src/parse_constants.rs b/src/parse_constants.rs index 46f9b8587..08249e137 100644 --- a/src/parse_constants.rs +++ b/src/parse_constants.rs @@ -81,30 +81,29 @@ pub enum ParseTokenType { comment, } -#[repr(u8)] #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum ParseKeyword { - // 'none' is not a keyword, it is a sentinel indicating nothing. - none, - - kw_and, - kw_begin, - kw_builtin, - kw_case, - kw_command, - kw_else, - kw_end, - kw_exclam, - kw_exec, - kw_for, - kw_function, - kw_if, - kw_in, - kw_not, - kw_or, - kw_switch, - kw_time, - kw_while, + // 'None' is not a keyword, it is a sentinel indicating nothing. + // Note it proves convenient to keep this as a value rather than using Option. + None, + And, + Begin, + Builtin, + Case, + Command, + Else, + End, + Exclam, + Exec, + For, + Function, + If, + In, + Not, + Or, + Switch, + Time, + While, } // Statement decorations like 'command' or 'exec'. @@ -224,7 +223,7 @@ pub fn to_wstr(self) -> &'static wstr { impl Default for ParseKeyword { fn default() -> Self { - ParseKeyword::none + ParseKeyword::None } } @@ -232,24 +231,24 @@ impl ParseKeyword { /// Return the keyword as a string. pub fn to_wstr(self) -> &'static wstr { match self { - ParseKeyword::kw_and => L!("and"), - ParseKeyword::kw_begin => L!("begin"), - ParseKeyword::kw_builtin => L!("builtin"), - ParseKeyword::kw_case => L!("case"), - ParseKeyword::kw_command => L!("command"), - ParseKeyword::kw_else => L!("else"), - ParseKeyword::kw_end => L!("end"), - ParseKeyword::kw_exclam => L!("!"), - ParseKeyword::kw_exec => L!("exec"), - ParseKeyword::kw_for => L!("for"), - ParseKeyword::kw_function => L!("function"), - ParseKeyword::kw_if => L!("if"), - ParseKeyword::kw_in => L!("in"), - ParseKeyword::kw_not => L!("not"), - ParseKeyword::kw_or => L!("or"), - ParseKeyword::kw_switch => L!("switch"), - ParseKeyword::kw_time => L!("time"), - ParseKeyword::kw_while => L!("while"), + ParseKeyword::And => L!("and"), + ParseKeyword::Begin => L!("begin"), + ParseKeyword::Builtin => L!("builtin"), + ParseKeyword::Case => L!("case"), + ParseKeyword::Command => L!("command"), + ParseKeyword::Else => L!("else"), + ParseKeyword::End => L!("end"), + ParseKeyword::Exclam => L!("!"), + ParseKeyword::Exec => L!("exec"), + ParseKeyword::For => L!("for"), + ParseKeyword::Function => L!("function"), + ParseKeyword::If => L!("if"), + ParseKeyword::In => L!("in"), + ParseKeyword::Not => L!("not"), + ParseKeyword::Or => L!("or"), + ParseKeyword::Switch => L!("switch"), + ParseKeyword::Time => L!("time"), + ParseKeyword::While => L!("while"), _ => L!("unknown_keyword"), } } @@ -263,26 +262,28 @@ fn to_arg(self) -> fish_printf::Arg<'static> { impl From<&wstr> for ParseKeyword { fn from(s: &wstr) -> Self { - match s { - _ if s == "!" => ParseKeyword::kw_exclam, - _ if s == "and" => ParseKeyword::kw_and, - _ if s == "begin" => ParseKeyword::kw_begin, - _ if s == "builtin" => ParseKeyword::kw_builtin, - _ if s == "case" => ParseKeyword::kw_case, - _ if s == "command" => ParseKeyword::kw_command, - _ if s == "else" => ParseKeyword::kw_else, - _ if s == "end" => ParseKeyword::kw_end, - _ if s == "exec" => ParseKeyword::kw_exec, - _ if s == "for" => ParseKeyword::kw_for, - _ if s == "function" => ParseKeyword::kw_function, - _ if s == "if" => ParseKeyword::kw_if, - _ if s == "in" => ParseKeyword::kw_in, - _ if s == "not" => ParseKeyword::kw_not, - _ if s == "or" => ParseKeyword::kw_or, - _ if s == "switch" => ParseKeyword::kw_switch, - _ if s == "time" => ParseKeyword::kw_time, - _ if s == "while" => ParseKeyword::kw_while, - _ => ParseKeyword::none, + // Note this is called in hot loops. + let c0 = s.as_char_slice().get(0).copied().unwrap_or('\0'); + match c0 { + '!' if s == L!("!") => ParseKeyword::Exclam, + 'a' if s == L!("and") => ParseKeyword::And, + 'b' if s == L!("begin") => ParseKeyword::Begin, + 'b' if s == L!("builtin") => ParseKeyword::Builtin, + 'c' if s == L!("case") => ParseKeyword::Case, + 'c' if s == L!("command") => ParseKeyword::Command, + 'e' if s == L!("else") => ParseKeyword::Else, + 'e' if s == L!("end") => ParseKeyword::End, + 'e' if s == L!("exec") => ParseKeyword::Exec, + 'f' if s == L!("for") => ParseKeyword::For, + 'f' if s == L!("function") => ParseKeyword::Function, + 'i' if s == L!("if") => ParseKeyword::If, + 'i' if s == L!("in") => ParseKeyword::In, + 'n' if s == L!("not") => ParseKeyword::Not, + 'o' if s == L!("or") => ParseKeyword::Or, + 's' if s == L!("switch") => ParseKeyword::Switch, + 't' if s == L!("time") => ParseKeyword::Time, + 'w' if s == L!("while") => ParseKeyword::While, + _ => ParseKeyword::None, } } } @@ -423,7 +424,7 @@ pub fn token_type_user_presentable_description( type_: ParseTokenType, keyword: ParseKeyword, ) -> WString { - if keyword != ParseKeyword::none { + if keyword != ParseKeyword::None { return sprintf!("keyword: '%ls'", keyword.to_wstr()); } match type_ { diff --git a/src/parse_execution.rs b/src/parse_execution.rs index 5e92a09af..9f6039343 100644 --- a/src/parse_execution.rs +++ b/src/parse_execution.rs @@ -611,7 +611,7 @@ fn apply_variable_assignments( } *block = Some(ctx.parser().push_block(Block::variable_assignment_block())); for variable_assignment in variable_assignment_list { - let source = self.node_source(&**variable_assignment); + let source = self.node_source(variable_assignment); let equals_pos = variable_assignment_equals_pos(source).unwrap(); let variable_name = &source[..equals_pos]; let expression = &source[equals_pos + 1..]; @@ -1347,7 +1347,7 @@ fn run_begin_statement( fn get_argument_nodes(args: &ast::ArgumentList) -> AstArgsList<'_> { let mut result = AstArgsList::new(); for arg in args { - result.push(&**arg); + result.push(arg); } result } @@ -1725,11 +1725,11 @@ fn test_and_run_1_job_conjunction( if let Some(deco) = &jc.decorator { let last_status = ctx.parser().get_last_status(); match deco.keyword() { - ParseKeyword::kw_and => { + ParseKeyword::And => { // AND. Skip if the last job failed. skip = last_status != 0; } - ParseKeyword::kw_or => { + ParseKeyword::Or => { // OR. Skip if the last job succeeded. skip = last_status == 0; } diff --git a/src/parse_tree.rs b/src/parse_tree.rs index bfa4d2912..4ed99e853 100644 --- a/src/parse_tree.rs +++ b/src/parse_tree.rs @@ -39,7 +39,7 @@ impl ParseToken { pub fn new(typ: ParseTokenType) -> Self { ParseToken { typ, - keyword: ParseKeyword::none, + keyword: ParseKeyword::None, has_dash_prefix: false, is_help_argument: false, is_newline: false, @@ -73,7 +73,7 @@ pub fn is_dash_prefix_string(&self) -> bool { /// Returns a string description of the given parse token. pub fn describe(&self) -> WString { let mut result = self.typ.to_wstr().to_owned(); - if self.keyword != ParseKeyword::none { + if self.keyword != ParseKeyword::None { sprintf!(=> &mut result, " <%ls>", self.keyword.to_wstr()) } result diff --git a/src/parse_util.rs b/src/parse_util.rs index f853a309f..eb3ec9b6d 100644 --- a/src/parse_util.rs +++ b/src/parse_util.rs @@ -1,5 +1,7 @@ //! Various mostly unrelated utility functions related to parsing, loading and evaluating fish code. -use crate::ast::{self, Ast, Keyword, Leaf, List, Node, NodeVisitor, Token}; +use crate::ast::{ + self, is_same_node, Ast, Keyword, Leaf, List, Node, NodeVisitor, Token, Traversal, +}; use crate::builtins::shared::builtin_exists; use crate::common::{ escape_string, unescape_string, valid_var_name, valid_var_name_char, EscapeFlags, @@ -825,7 +827,10 @@ pub fn apply_indents(src: &wstr, indents: &[i32]) -> WString { // Visit all of our nodes. When we get a job_list or case_item_list, increment indent while // visiting its children. struct IndentVisitor<'a> { - // companion: Pin<&'a mut indent_visitor_t>, + // The parent node of the node we are currently visiting, or None if we are the root. + parent: Option<&'a dyn ast::Node>, + + // companion: Pin<&'a mut IndentVisitor>, // The one-past-the-last index of the most recently encountered leaf node. // We use this to populate the indents even if there's no tokens in the range. last_leaf_end: usize, @@ -849,9 +854,11 @@ struct IndentVisitor<'a> { // List of locations of escaped newline characters. line_continuations: Vec, } + impl<'a> IndentVisitor<'a> { fn new(src: &'a wstr, indents: &'a mut Vec, initial_indent: i32) -> Self { Self { + parent: None, last_leaf_end: 0, last_indent: initial_indent - 1, unclosed: false, @@ -972,6 +979,7 @@ fn indent_string_part(&mut self, range: Range, is_double_quoted: bool) { } } } + impl<'a> NodeVisitor<'a> for IndentVisitor<'a> { // Default implementation is to just visit children. fn visit(&mut self, node: &'a dyn Node) { @@ -987,13 +995,14 @@ fn visit(&mut self, node: &'a dyn Node) { // Increment indents for conditions in headers (#1665). Type::job_conjunction => { - if [Type::while_header, Type::if_clause].contains(&node.parent().unwrap().typ()) { + let typ = self.parent.unwrap().typ(); + if matches!(typ, Type::if_clause | Type::while_header) { inc = 1; dec = 1; } } - // Increment indents for job_continuation_t if it contains a newline. + // Increment indents for JobContinuation if it contains a newline. // This is a bit of a hack - it indents cases like: // cmd1 | // ....cmd2 @@ -1036,12 +1045,12 @@ fn visit(&mut self, node: &'a dyn Node) { // To address this, if we see that the switch statement was not closed, do not // decrement the indent afterwards. inc = 1; - let switchs = node.parent().unwrap().as_switch_statement().unwrap(); + let switchs = self.parent.unwrap().as_switch_statement().unwrap(); dec = if switchs.end.has_source() { 1 } else { 0 }; } Type::token_base => { let token_type = node.as_token().unwrap().token_type(); - let parent_type = node.parent().unwrap().typ(); + let parent_type = self.parent.unwrap().typ(); if parent_type == Type::begin_header && token_type == ParseTokenType::end { // The newline after "begin" is optional, so it is part of the header. // The header is not in the indented block, so indent the newline here. @@ -1091,7 +1100,9 @@ fn visit(&mut self, node: &'a dyn Node) { self.last_indent = self.indent; } + let saved = self.parent.replace(node); node.accept(self, false); + self.parent = saved; self.indent -= dec; } } @@ -1185,7 +1196,8 @@ pub fn parse_util_detect_errors_in_ast( // Verify return only within a function. // Verify no variable expansions. - for node in ast::Traversal::new(ast.top()) { + let mut traversal = ast::Traversal::new(ast.top()); + while let Some(node) = traversal.next() { if let Some(jc) = node.as_job_continuation() { // Somewhat clumsy way of checking for a statement without source in a pipeline. // See if our pipe has source but our statement does not. @@ -1214,23 +1226,28 @@ pub fn parse_util_detect_errors_in_ast( // while foo & ; end // If it's not a background job, nothing to do. if job.bg.is_some() { - errored |= detect_errors_in_backgrounded_job(job, &mut out_errors); + errored |= detect_errors_in_backgrounded_job(&traversal, job, &mut out_errors); } } else if let Some(stmt) = node.as_decorated_statement() { - errored |= detect_errors_in_decorated_statement(buff_src, stmt, &mut out_errors); + errored |= + detect_errors_in_decorated_statement(buff_src, &traversal, stmt, &mut out_errors); } else if let Some(block) = node.as_block_statement() { // If our 'end' had no source, we are unsourced. if !block.end.has_source() { has_unclosed_block = true; } - errored |= - detect_errors_in_block_redirection_list(&block.args_or_redirs, &mut out_errors); + errored |= detect_errors_in_block_redirection_list( + node, + &block.args_or_redirs, + &mut out_errors, + ); } else if let Some(brace_statement) = node.as_brace_statement() { // If our closing brace had no source, we are unsourced. if !brace_statement.right_brace.has_source() { has_unclosed_block = true; } errored |= detect_errors_in_block_redirection_list( + node, &brace_statement.args_or_redirs, &mut out_errors, ); @@ -1240,14 +1257,17 @@ pub fn parse_util_detect_errors_in_ast( has_unclosed_block = true; } errored |= - detect_errors_in_block_redirection_list(&ifs.args_or_redirs, &mut out_errors); + detect_errors_in_block_redirection_list(node, &ifs.args_or_redirs, &mut out_errors); } else if let Some(switchs) = node.as_switch_statement() { // If our 'end' had no source, we are unsourced. if !switchs.end.has_source() { has_unclosed_block = true; } - errored |= - detect_errors_in_block_redirection_list(&switchs.args_or_redirs, &mut out_errors); + errored |= detect_errors_in_block_redirection_list( + node, + &switchs.args_or_redirs, + &mut out_errors, + ); } } @@ -1505,8 +1525,9 @@ fn detect_errors_in_job_conjunction( false } -/// Given that the job given by node should be backgrounded, return true if we detect any errors. +/// Given that the job should be backgrounded, return true if we detect any errors. fn detect_errors_in_backgrounded_job( + traversal: &Traversal, job: &ast::JobPipeline, parse_errors: &mut Option<&mut ParseErrorList>, ) -> bool { @@ -1520,35 +1541,34 @@ fn detect_errors_in_backgrounded_job( // foo & ; or bar // if foo & ; end // while foo & ; end - let Some(job_conj) = job.parent().unwrap().as_job_conjunction() else { + let Some(job_conj) = traversal.parent(job).as_job_conjunction() else { return false; }; - if job_conj.parent().unwrap().as_if_clause().is_some() - || job_conj.parent().unwrap().as_while_header().is_some() - { + let job_conj_parent = traversal.parent(job_conj); + if job_conj_parent.as_if_clause().is_some() || job_conj_parent.as_while_header().is_some() { errored = append_syntax_error!( parse_errors, source_range.start(), source_range.length(), BACKGROUND_IN_CONDITIONAL_ERROR_MSG ); - } else if let Some(jlist) = job_conj.parent().unwrap().as_job_list() { + } else if let Some(jlist) = job_conj_parent.as_job_list() { // This isn't very complete, e.g. we don't catch 'foo & ; not and bar'. // Find the index of ourselves in the job list. let index = jlist .iter() - .position(|job| job.pointer_eq(job_conj)) + .position(|job| is_same_node(job, job_conj)) .expect("Should have found the job in the list"); // Try getting the next job and check its decorator. if let Some(next) = jlist.get(index + 1) { if let Some(deco) = &next.decorator { assert!( - [ParseKeyword::kw_and, ParseKeyword::kw_or].contains(&deco.keyword()), + [ParseKeyword::And, ParseKeyword::Or].contains(&deco.keyword()), "Unexpected decorator keyword" ); - let deco_name = if deco.keyword() == ParseKeyword::kw_and { + let deco_name = if deco.keyword() == ParseKeyword::And { L!("and") } else { L!("or") @@ -1567,9 +1587,10 @@ fn detect_errors_in_backgrounded_job( } /// Given a source buffer `buff_src` and decorated statement `dst` within it, return true if there -/// is an error and false if not. `storage` may be used to reduce allocations. +/// is an error and false if not. fn detect_errors_in_decorated_statement( buff_src: &wstr, + traversal: &ast::Traversal, dst: &ast::DecoratedStatement, parse_errors: &mut Option<&mut ParseErrorList>, ) -> bool { @@ -1586,22 +1607,18 @@ fn detect_errors_in_decorated_statement( } // Get the statement we are part of. - let st = dst.parent().unwrap().as_statement().unwrap(); + let st = traversal.parent(dst).as_statement().unwrap(); // Walk up to the job. - let mut job = None; - let mut cursor = dst.parent(); - while job.is_none() { - let c = cursor.expect("Reached root without finding a job"); - job = c.as_job_pipeline(); - cursor = c.parent(); - } - let job = job.expect("Should have found the job"); + let job = traversal + .parent_nodes() + .find_map(|n| n.as_job_pipeline()) + .expect("Should have found the job"); // Check our pipeline position. let pipe_pos = if job.continuation.is_empty() { PipelinePosition::none - } else if job.statement.pointer_eq(st) { + } else if is_same_node(&job.statement, st) { PipelinePosition::first } else { PipelinePosition::subsequent @@ -1700,30 +1717,31 @@ fn detect_errors_in_decorated_statement( } // Check that we don't break or continue from outside a loop. - if !errored && [L!("break"), L!("continue")].contains(&&command[..]) && !first_arg_is_help { + if !errored && (command == "break" || command == "continue") && !first_arg_is_help { // Walk up until we hit a 'for' or 'while' loop. If we hit a function first, // stop the search; we can't break an outer loop from inside a function. // This is a little funny because we can't tell if it's a 'for' or 'while' // loop from the ancestor alone; we need the header. That is, we hit a // block_statement, and have to check its header. let mut found_loop = false; - let mut ancestor: Option<&dyn Node> = Some(dst); - while let Some(anc) = ancestor { - if let Some(block) = anc.as_block_statement() { - if [ast::Type::for_header, ast::Type::while_header] - .contains(&block.header.typ()) - { + for block in traversal + .parent_nodes() + .filter_map(|anc| anc.as_block_statement()) + { + match block.header.typ() { + ast::Type::for_header | ast::Type::while_header => { // This is a loop header, so we can break or continue. found_loop = true; break; - } else if block.header.typ() == ast::Type::function_header { + } + ast::Type::function_header => { // This is a function header, so we cannot break or // continue. We stop our search here. found_loop = false; break; } + _ => {} } - ancestor = anc.parent(); } if !found_loop { @@ -1779,34 +1797,23 @@ fn detect_errors_in_decorated_statement( errored } -// Given we have a trailing argument_or_redirection_list, like `begin; end > /dev/null`, verify that -// there are no arguments in the list. +// Given we have a trailing ArgumentOrRedirectionList, like `begin; end > /dev/null`, verify that +// there are no arguments in the list. The parent of the list is provided. fn detect_errors_in_block_redirection_list( + parent: &dyn Node, args_or_redirs: &ast::ArgumentOrRedirectionList, out_errors: &mut Option<&mut ParseErrorList>, ) -> bool { let Some(first_arg) = get_first_arg(args_or_redirs) else { return false; }; - if args_or_redirs - .parent() - .unwrap() - .as_brace_statement() - .is_some() - { - return append_syntax_error!( - out_errors, - first_arg.source_range().start(), - first_arg.source_range().length(), - RIGHT_BRACE_ARG_ERR_MSG - ); + let r = first_arg.source_range(); + if parent.as_brace_statement().is_some() { + append_syntax_error!(out_errors, r.start(), r.length(), RIGHT_BRACE_ARG_ERR_MSG); + } else { + append_syntax_error!(out_errors, r.start(), r.length(), END_ARG_ERR_MSG); } - append_syntax_error!( - out_errors, - first_arg.source_range().start(), - first_arg.source_range().length(), - END_ARG_ERR_MSG - ) + true } /// Given a string containing a variable expansion error, append an appropriate error to the errors diff --git a/src/reader.rs b/src/reader.rs index 50a26333d..1b7f321ad 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -42,7 +42,7 @@ use errno::{errno, Errno}; use crate::abbrs::abbrs_match; -use crate::ast::{self, Ast, Category, Traversal}; +use crate::ast::{is_same_node, Ast, Category}; use crate::builtins::shared::ErrorCode; use crate::builtins::shared::STATUS_CMD_ERROR; use crate::builtins::shared::STATUS_CMD_OK; @@ -5352,22 +5352,9 @@ fn extract_tokens(s: &wstr) -> Vec { | ParseTreeFlags::LEAVE_UNTERMINATED; let ast = Ast::parse(s, ast_flags, None); - // Helper to check if a node is the command portion of a decorated statement. - let is_command = |node: &dyn ast::Node| { - let mut cursor = Some(node); - while let Some(cur) = cursor { - if let Some(stmt) = cur.as_decorated_statement() { - if node.pointer_eq(&stmt.command) { - return true; - } - } - cursor = cur.parent(); - } - false - }; - let mut result = vec![]; - for node in Traversal::new(ast.top()) { + let mut traversal = ast.walk(); + while let Some(node) = traversal.next() { // We are only interested in leaf nodes with source. if node.category() != Category::leaf { continue; @@ -5404,10 +5391,12 @@ fn extract_tokens(s: &wstr) -> Vec { if !has_cmd_subs { // Common case of no command substitutions in this leaf node. - result.push(PositionedToken { - range, - is_cmd: is_command(node), - }) + // Check if a node is the command portion of a decorated statement. + let is_cmd = traversal + .parent(node) + .as_decorated_statement() + .is_some_and(|stmt| is_same_node(node, &stmt.command)); + result.push(PositionedToken { range, is_cmd }) } } diff --git a/src/tests/ast.rs b/src/tests/ast.rs new file mode 100644 index 000000000..e43f4c2eb --- /dev/null +++ b/src/tests/ast.rs @@ -0,0 +1,46 @@ +use crate::ast::{is_same_node, Ast, Node}; +use crate::wchar::prelude::*; + +const FISH_FUNC: &str = r#" +function stuff --description 'Stuff' + set -l log "/tmp/chaos_log.(random)" + set -x PATH /custom/bin $PATH + + echo "[$USER] Hooray" | tee -a $log 2>/dev/null + + time if test (count $argv) -eq 0 + echo "No targets specified" >> $log 2>&1 + return 1 + end + + for target in $argv + command bash -c "echo" >> $log 2> /dev/null + switch $status + case 0 + echo "Success" | tee -a $log + case '*' + echo "Failure" >> $log + end + end + set_color green +end +"#; + +#[test] +fn test_is_same_node() { + // is_same_node is pretty subtle! Let's check it. + let src = WString::from_str(FISH_FUNC); + let ast = Ast::parse(&src, Default::default(), None); + assert!(!ast.errored()); + let all_nodes: Vec<&dyn Node> = ast.walk().collect(); + for i in 0..all_nodes.len() { + for j in 0..all_nodes.len() { + let same = is_same_node(all_nodes[i], all_nodes[j]); + if i == j { + assert!(same, "Node {} should be the same as itself", i); + } else { + assert!(!same, "Node {} should not be the same as node {}", i, j); + } + } + } +} diff --git a/src/tests/ast_bench.rs b/src/tests/ast_bench.rs new file mode 100644 index 000000000..d22430728 --- /dev/null +++ b/src/tests/ast_bench.rs @@ -0,0 +1,45 @@ +// Run with cargo +nightly bench --features=benchmark +#[cfg(feature = "benchmark")] +mod bench { + extern crate test; + use crate::ast::Ast; + use crate::wchar::prelude::*; + use test::Bencher; + + // Return a long string suitable for benchmarking. + fn generate_fish_script() -> WString { + let mut buff = WString::new(); + let s = &mut buff; + + for i in 0..1000 { + // command with args and redirections + sprintf!(=> s, + "echo arg%d arg%d > out%d.txt 2> err%d.txt\n", + i, i + 1, i, i + ); + + // simple block + sprintf!(=> s, "begin\n echo inside block %d\nend\n", i ); + + // conditional + sprintf!(=> s, "if test %d\n echo even\nelse\n echo odd\nend\n", i % 2); + + // loop + sprintf!(=> s, "for x in a b c\n echo $x %d\nend\n", i); + + // pipeline + sprintf!(=> s, "echo foo%d | grep f | wc -l\n", i); + } + + buff + } + + #[bench] + fn bench_ast_construction(b: &mut Bencher) { + let src = generate_fish_script(); + b.bytes = (src.len() * 4) as u64; // 4 bytes per character + b.iter(|| { + let _ast = Ast::parse(&src, Default::default(), None); + }); + } +} diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 207b1b257..61bd1945f 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -1,4 +1,6 @@ mod abbrs; +mod ast; +mod ast_bench; mod common; mod complete; mod debounce; diff --git a/src/tests/parser.rs b/src/tests/parser.rs index 9f4f826fa..c3b32f3e3 100644 --- a/src/tests/parser.rs +++ b/src/tests/parser.rs @@ -1,10 +1,10 @@ -use crate::ast::{self, Ast, JobPipeline, List, Node, Traversal}; +use crate::ast::{self, is_same_node, Ast, JobPipeline, List, Node, Traversal}; use crate::common::ScopeGuard; use crate::env::EnvStack; use crate::expand::ExpandFlags; use crate::io::{IoBufferfill, IoChain}; use crate::parse_constants::{ - ParseErrorCode, ParseTreeFlags, ParserTestErrorBits, StatementDecoration, + ParseErrorCode, ParseTokenType, ParseTreeFlags, ParserTestErrorBits, StatementDecoration, }; use crate::parse_tree::{parse_source, LineCounter}; use crate::parse_util::{parse_util_detect_errors, parse_util_detect_errors_in_argument}; @@ -818,3 +818,166 @@ fn test_line_counter_empty() { assert_eq!(line_counter.line_offset_of_node(), None); assert_eq!(line_counter.source_offset_of_node(), None); } + +// Helper for testing a simple ast traversal. +// The ast is always for the command 'true;'. +struct TrueSemiAstTester<'a> { + // The AST we are testing. + ast: &'a Ast, + + // Expected parent-child relationships, in the order we expect to encounter them. + parent_child: Box<[(&'a dyn Node, &'a dyn Node)]>, +} + +impl<'a> TrueSemiAstTester<'a> { + const TRUE_SEMI: &'static wstr = L!("true;"); + fn new(ast: &'a Ast) -> Self { + let job_list = ast.top().as_job_list().expect("Expected job_list"); + let job_conjunction = &job_list[0]; + let job_pipeline = &job_conjunction.job; + let variable_assignment_list = &job_pipeline.variables; + let statement = &job_pipeline.statement; + + let decorated_statement = statement + .contents + .as_decorated_statement() + .expect("Expected decorated_statement"); + let command = &decorated_statement.command; + let args_or_redirs = &decorated_statement.args_or_redirs; + let job_continuation = &job_pipeline.continuation; + let job_conjunction_continuation = &job_conjunction.continuations; + let semi_nl = job_conjunction.semi_nl.as_ref().expect("Expected semi_nl"); + + // Helpful parent-child map, such that the children are in the order that we expect to encounter them + // in the AST. + let parent_child: &[(&'a dyn Node, &'a dyn Node)] = &[ + (job_list, job_conjunction), + (job_conjunction, job_pipeline), + (job_pipeline, variable_assignment_list), + (job_pipeline, statement), + (statement, decorated_statement), + (decorated_statement, command), + (decorated_statement, args_or_redirs), + (job_pipeline, job_continuation), + (job_conjunction, job_conjunction_continuation), + (job_conjunction, semi_nl), + ]; + Self { + ast, + parent_child: Box::from(parent_child), + } + } + + // Expected nodes, in-order. + fn expected_nodes(&self) -> Vec<&'a dyn Node> { + let mut expected: Vec<&dyn Node> = vec![self.ast.top()]; + expected.extend(self.parent_child.iter().map(|&(_p, c)| c)); + expected + } + + // Helper function to construct the parent list of a given node, such at the first entry is + // the node itself, and the last entry is the root node. + fn get_parents<'s>(&'s self, node: &'a dyn Node) -> impl Iterator + 's { + let mut next = Some(node); + std::iter::from_fn(move || { + let out = next?; + next = self + .parent_child + .iter() + .find_map(|&(p, c)| is_same_node(c, out).then_some(p)); + Some(out) + }) + } +} + +#[test] +fn test_ast() { + // Light testing of the AST and traversals. + let ast = Ast::parse(TrueSemiAstTester::TRUE_SEMI, ParseTreeFlags::empty(), None); + let tester = TrueSemiAstTester::new(&ast); + assert!(ast.top().as_job_list().is_some(), "Expected job_list"); + + // Walk the AST and collect all nodes. + // See is_same_node comments for why we can't use assert_eq! here. + let found = ast.walk().collect::>(); + let expected = tester.expected_nodes(); + assert_eq!(found.len(), expected.len()); + for idx in 0..found.len() { + assert!(is_same_node(found[idx], expected[idx])); + } + + // Walk and check parents. + let mut traversal = ast.walk(); + while let Some(node) = traversal.next() { + let expected_parents = tester.get_parents(node).collect::>(); + let found_parents = traversal.parent_nodes().collect::>(); + assert_eq!(found_parents.len(), expected_parents.len()); + for idx in 0..found_parents.len() { + assert!(is_same_node(found_parents[idx], expected_parents[idx])); + } + } + + // Find the decorated statement. + let decorated_statement = ast + .walk() + .find(|n| n.typ() == ast::Type::decorated_statement) + .expect("Expected decorated statement"); + + // Test the skip feature. Don't descend into the decorated_statement. + let expected_skip: Vec<&dyn Node> = expected + .iter() + .copied() + .filter(|&n| { + // Discard nodes who have the decorated_statement as a parent, + // excepting the decorated_statement itself. + tester + .get_parents(n) + .skip(1) + .all(|p| !is_same_node(p, decorated_statement)) + }) + .collect(); + + let mut found = vec![]; + let mut traversal = ast.walk(); + while let Some(node) = traversal.next() { + if is_same_node(node, decorated_statement) { + traversal.skip_children(node); + } + found.push(node); + } + assert_eq!(found.len(), expected_skip.len()); + for idx in 0..found.len() { + assert!(is_same_node(found[idx], expected_skip[idx])); + } +} + +#[test] +#[should_panic] +fn test_traversal_skip_children_panics() { + // Test that we panic if we try to skip children of a node that is not the current node. + let ast = Ast::parse(L!("true;"), ParseTreeFlags::empty(), None); + let mut traversal = ast.walk(); + while let Some(node) = traversal.next() { + if node.typ() == ast::Type::decorated_statement { + // Should panic as we can only skip the current node. + traversal.skip_children(ast.top()); + } + } +} + +#[test] +#[should_panic] +fn test_traversal_parent_panics() { + // Can only get the parent of nodes still on the stack. + let ast = Ast::parse(L!("true;"), ParseTreeFlags::empty(), None); + let mut traversal = ast.walk(); + let mut decorated_statement = None; + while let Some(node) = traversal.next() { + if node.as_decorated_statement().is_some() { + decorated_statement = Some(node); + } else if node.as_token().map(|t| t.token_type()) == Some(ParseTokenType::end) { + // should panic as the decorated_statement is not on the stack. + let _ = traversal.parent(decorated_statement.unwrap()); + } + } +}