From 5f0584a6e69d24ffd0ce241fa209b14eb415f7e7 Mon Sep 17 00:00:00 2001 From: Peter Ammon Date: Sun, 16 Mar 2025 13:28:35 -0700 Subject: [PATCH 01/19] Teach fish_indent to emit basic parse tree size metrics A good basis to begin optimization. --- CHANGELOG.rst | 1 + src/ast.rs | 6 +++ src/builtins/fish_indent.rs | 81 +++++++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ddec525c8..cce2df670 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -71,6 +71,7 @@ Other improvements ------------------ - ``fish_indent`` and ``fish_key_reader`` are now available as builtins, and if fish is called with that name it will act like the given tool (as a multi-call binary). This allows truly distributing fish as a single file. (:issue:`10876`) +- ``fish_indent --dump-parse-tree`` now emits simple metrics about the tree including its memory consumption. For distributors ---------------- diff --git a/src/ast.rs b/src/ast.rs index 284ebec9d..643bea9ca 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -139,6 +139,12 @@ fn source<'s>(&self, orig: &'s wstr) -> &'s wstr { self.try_source(orig).unwrap_or_default() } + /// The raw byte size of this node, excluding children. + /// This also excludes the allocations stored in lists. + fn self_memory_size(&self) -> usize { + std::mem::size_of_val(self) + } + // The address of the object, for comparison. fn as_ptr(&self) -> *const (); diff --git a/src/builtins/fish_indent.rs b/src/builtins/fish_indent.rs index 87e26bee1..972cb698e 100644 --- a/src/builtins/fish_indent.rs +++ b/src/builtins/fish_indent.rs @@ -85,6 +85,82 @@ struct PrettyPrinterState<'source, 'ast> { errors: Option<&'ast SourceRangeList>, } +#[derive(Copy, Clone, Default, Debug)] +struct AstSizeMetrics { + /// The total number of nodes. + node_count: usize, + /// The number of branches, leaves, and lists, tokens, and keywords. + /// Note tokens and keywords are also counted as leaves. + branch_count: usize, + leaf_count: usize, + list_count: usize, + token_count: usize, + keyword_count: usize, + // An estimate of the total allocated size of the ast in bytes. + memory_size: usize, +} + +impl std::fmt::Display for AstSizeMetrics { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "AstSizeMetrics:")?; + writeln!(f, " nodes: {}", self.node_count)?; + writeln!(f, " branches: {}", self.branch_count)?; + writeln!(f, " leaves: {}", self.leaf_count)?; + writeln!(f, " lists: {}", self.list_count)?; + writeln!(f, " tokens: {}", self.token_count)?; + writeln!(f, " keywords: {}", self.keyword_count)?; + + let memsize = self.memory_size; + let (val, unit) = if memsize >= 1024 * 1024 { + (memsize as f64 / (1024.0 * 1024.0), "MB") + } else { + (memsize as f64 / 1024.0, "KB") + }; + writeln!(f, " memory: {} bytes ({:.2} {})", memsize, val, unit) + } +} + +/// If this is a list node, return the amount of memory in bytes occupied by its allocated pointers. +/// Note this is separate from the memory occupied by the nodes themselves, +/// as nodes are boxed to ensure pointer stability. We don't bother to look at the boxed type. +fn list_pointer_memory_size(n: &dyn Node) -> usize { + let count = match n.typ() { + Type::variable_assignment_list => n.as_variable_assignment_list().unwrap().count(), + Type::argument_or_redirection_list => n.as_argument_or_redirection_list().unwrap().count(), + Type::elseif_clause_list => n.as_elseif_clause_list().unwrap().count(), + Type::job_continuation_list => n.as_job_continuation_list().unwrap().count(), + Type::andor_job_list => n.as_andor_job_list().unwrap().count(), + Type::freestanding_argument_list => 0, // not actually a list node + Type::job_conjunction_continuation_list => { + n.as_job_conjunction_continuation_list().unwrap().count() + } + Type::case_item_list => n.as_case_item_list().unwrap().count(), + Type::argument_list => n.as_argument_list().unwrap().count(), + Type::job_list => n.as_job_list().unwrap().count(), + _ => 0, + }; + count * std::mem::size_of::>() +} + +impl<'a> NodeVisitor<'a> for AstSizeMetrics { + fn visit(&mut self, node: &'a dyn Node) { + self.node_count += 1; + self.memory_size += node.self_memory_size() + list_pointer_memory_size(node); + match node.category() { + Category::branch => self.branch_count += 1, + Category::leaf => self.leaf_count += 1, + Category::list => self.list_count += 1, + } + if node.as_token().is_some() { + self.token_count += 1; + } + if node.as_keyword().is_some() { + self.keyword_count += 1; + } + node.accept(self, false); + } +} + /// Flags we support. #[derive(Copy, Clone, Default)] struct GapFlags { @@ -1190,6 +1266,11 @@ fn prettify(streams: &mut IoStreams, src: &wstr, do_indent: bool) -> WString { ); let ast_dump = ast.dump(src); streams.err.appendln(ast_dump); + + // Output metrics too. + let mut metrics = AstSizeMetrics::default(); + metrics.visit(ast.top()); + streams.err.appendln(format!("{}", metrics)); } let mut printer = PrettyPrinter::new(src, do_indent); printer.prettify() From 271a85571de27d44b7399f2039760933bba85294 Mon Sep 17 00:00:00 2001 From: Peter Ammon Date: Sun, 16 Mar 2025 14:21:34 -0700 Subject: [PATCH 02/19] Add a benchmark for AST construction Run with `cargo +nightly bench --features=benchmark` --- src/tests/ast_bench.rs | 45 ++++++++++++++++++++++++++++++++++++++++++ src/tests/mod.rs | 1 + 2 files changed, 46 insertions(+) create mode 100644 src/tests/ast_bench.rs diff --git a/src/tests/ast_bench.rs b/src/tests/ast_bench.rs new file mode 100644 index 000000000..d22430728 --- /dev/null +++ b/src/tests/ast_bench.rs @@ -0,0 +1,45 @@ +// Run with cargo +nightly bench --features=benchmark +#[cfg(feature = "benchmark")] +mod bench { + extern crate test; + use crate::ast::Ast; + use crate::wchar::prelude::*; + use test::Bencher; + + // Return a long string suitable for benchmarking. + fn generate_fish_script() -> WString { + let mut buff = WString::new(); + let s = &mut buff; + + for i in 0..1000 { + // command with args and redirections + sprintf!(=> s, + "echo arg%d arg%d > out%d.txt 2> err%d.txt\n", + i, i + 1, i, i + ); + + // simple block + sprintf!(=> s, "begin\n echo inside block %d\nend\n", i ); + + // conditional + sprintf!(=> s, "if test %d\n echo even\nelse\n echo odd\nend\n", i % 2); + + // loop + sprintf!(=> s, "for x in a b c\n echo $x %d\nend\n", i); + + // pipeline + sprintf!(=> s, "echo foo%d | grep f | wc -l\n", i); + } + + buff + } + + #[bench] + fn bench_ast_construction(b: &mut Bencher) { + let src = generate_fish_script(); + b.bytes = (src.len() * 4) as u64; // 4 bytes per character + b.iter(|| { + let _ast = Ast::parse(&src, Default::default(), None); + }); + } +} diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 207b1b257..dba77503f 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -1,4 +1,5 @@ mod abbrs; +mod ast_bench; mod common; mod complete; mod debounce; From be88d103ba3a678459f65e4aebc3399b4b738fa4 Mon Sep 17 00:00:00 2001 From: Peter Ammon Date: Sun, 16 Mar 2025 13:45:48 -0700 Subject: [PATCH 03/19] ast: Use boxed slice instead of vec for list nodes This saves a decent amount of memory, both because we no longer have excess capacity sitting around that we'll never use, but also because we no longer need to store the "capacity" value. --- src/ast.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 643bea9ca..f98a796d4 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -439,7 +439,7 @@ fn allows_keyword(&self, kw: ParseKeyword) -> bool { pub trait List: Node { type ContentsNode: Node + Default; fn contents(&self) -> &[Box]; - fn contents_mut(&mut self) -> &mut Vec>; + fn contents_mut(&mut self) -> &mut Box<[Box]>; /// Return our count. fn count(&self) -> usize { self.contents().len() @@ -636,7 +636,8 @@ macro_rules! define_list_node { #[derive(Default, Debug)] pub struct $name { parent: Option<*const dyn Node>, - list_contents: Vec>, + // Note we box the nodes themselves, for pointer stability. + list_contents: Box<[Box<$contents>]>, } implement_node!($name, list, $type); impl List for $name { @@ -644,7 +645,7 @@ impl List for $name { fn contents(&self) -> &[Box] { &self.list_contents } - fn contents_mut(&mut self) -> &mut Vec> { + fn contents_mut(&mut self) -> &mut Box<[Box]> { &mut self.list_contents } } @@ -3547,7 +3548,6 @@ fn populate_list(&mut self, list: &mut ListType, exhaust_stream: } // We're going to populate a vector with our nodes. - // Later on we will copy this to the heap with a single allocation. let mut contents = vec![]; loop { @@ -3614,7 +3614,7 @@ fn populate_list(&mut self, list: &mut ListType, exhaust_stream: "Contents size out of bounds" ); assert!(list.contents().is_empty(), "List should still be empty"); - *list.contents_mut() = contents; + *list.contents_mut() = contents.into_boxed_slice(); } FLOGF!( From 3b3063287ba10af6f3448befa2b19053db30306b Mon Sep 17 00:00:00 2001 From: Peter Ammon Date: Sun, 16 Mar 2025 14:47:26 -0700 Subject: [PATCH 04/19] ast: Add some comments about raw pointers and stability --- src/ast.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/ast.rs b/src/ast.rs index f98a796d4..8a5591a9f 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -98,7 +98,7 @@ fn accept_mut(&mut self, visitor: &mut dyn NodeVisitorMut, reversed: bool) { /// Node is the base trait of all AST nodes. pub trait Node: Acceptor + ConcreteNode + std::fmt::Debug { - /// The parent node, or null if this is root. + /// The parent node, or None if this is root. fn parent(&self) -> Option<&dyn Node>; /// The type of this node. @@ -2520,6 +2520,8 @@ pub struct Extras { pub struct Ast { // The top node. // Its type depends on what was requested to parse. + // Note Node parent pointers are implemented via raw pointers, + // so we must enforce pointer stability. top: Box, /// Whether any errors were encountered during parsing. any_error: bool, @@ -4017,6 +4019,8 @@ fn parse_from_top( if top_type == Type::job_list { // Set all parent nodes. // It turns out to be more convenient to do this after the parse phase. + // Note: parent nodes are implemented as raw pointers! This means that the contents Ast must not + // change or move after construction. ast.top_mut() .as_mut_job_list() .as_mut() From 0b4883d07f5b41f37d0c2b5915707d0beb7429bf Mon Sep 17 00:00:00 2001 From: Peter Ammon Date: Sun, 16 Mar 2025 14:48:25 -0700 Subject: [PATCH 05/19] ast: Bravely stop boxing items in lists Prior to this commit, lists of items (e.g. an argument list to a command) would each be Boxed, i.e. we had effectively Vec>. The rationale here is that we had raw pointers and pointer stability was important to enforce. But we have fewer raw pointers now - only the parent pointers - and we can be confident that the Ast will not change or move after construction. So remove this intermediate Box, simplifying some logic and reducing ast size by ~5%. This slows down Ast construction because we're still constructing the Box and moving things in and out of it - that will be addressed in subsequent commits. --- src/ast.rs | 25 ++++++++++++------------- src/builtins/fish_indent.rs | 24 +----------------------- src/parse_execution.rs | 4 ++-- 3 files changed, 15 insertions(+), 38 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 8a5591a9f..f55db8c5f 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -438,8 +438,8 @@ fn allows_keyword(&self, kw: ParseKeyword) -> bool { // A simple variable-sized array, possibly empty. pub trait List: Node { type ContentsNode: Node + Default; - fn contents(&self) -> &[Box]; - fn contents_mut(&mut self) -> &mut Box<[Box]>; + fn contents(&self) -> &[Self::ContentsNode]; + fn contents_mut(&mut self) -> &mut Box<[Self::ContentsNode]>; /// Return our count. fn count(&self) -> usize { self.contents().len() @@ -449,11 +449,11 @@ fn is_empty(&self) -> bool { self.contents().is_empty() } /// Iteration support. - fn iter(&self) -> std::slice::Iter> { + fn iter(&self) -> std::slice::Iter { self.contents().iter() } fn get(&self, index: usize) -> Option<&Self::ContentsNode> { - self.contents().get(index).map(|b| &**b) + self.contents().get(index) } } @@ -636,22 +636,21 @@ macro_rules! define_list_node { #[derive(Default, Debug)] pub struct $name { parent: Option<*const dyn Node>, - // Note we box the nodes themselves, for pointer stability. - list_contents: Box<[Box<$contents>]>, + list_contents: Box<[$contents]>, } implement_node!($name, list, $type); impl List for $name { type ContentsNode = $contents; - fn contents(&self) -> &[Box] { + fn contents(&self) -> &[Self::ContentsNode] { &self.list_contents } - fn contents_mut(&mut self) -> &mut Box<[Box]> { + fn contents_mut(&mut self) -> &mut Box<[Self::ContentsNode]> { &mut self.list_contents } } impl<'a> IntoIterator for &'a $name { - type Item = &'a Box<$contents>; - type IntoIter = std::slice::Iter<'a, Box<$contents>>; + type Item = &'a $contents; + type IntoIter = std::slice::Iter<'a, $contents>; fn into_iter(self) -> Self::IntoIter { self.contents().into_iter() } @@ -659,12 +658,12 @@ fn into_iter(self) -> Self::IntoIter { impl Index for $name { type Output = <$name as List>::ContentsNode; fn index(&self, index: usize) -> &Self::Output { - &*self.contents()[index] + &self.contents()[index] } } impl IndexMut for $name { fn index_mut(&mut self, index: usize) -> &mut Self::Output { - &mut *self.contents_mut()[index] + &mut self.contents_mut()[index] } } impl Acceptor for $name { @@ -3598,7 +3597,7 @@ fn populate_list(&mut self, list: &mut ListType, exhaust_stream: if contents.is_empty() { contents.reserve(16); } - contents.push(node); + contents.push(*node); } else if exhaust_stream && self.peek_type(0) != ParseTokenType::terminate { // We aren't allowed to stop. Produce an error and keep going. self.consume_excess_token_generating_error() diff --git a/src/builtins/fish_indent.rs b/src/builtins/fish_indent.rs index 972cb698e..511d9b1c2 100644 --- a/src/builtins/fish_indent.rs +++ b/src/builtins/fish_indent.rs @@ -120,32 +120,10 @@ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { } } -/// If this is a list node, return the amount of memory in bytes occupied by its allocated pointers. -/// Note this is separate from the memory occupied by the nodes themselves, -/// as nodes are boxed to ensure pointer stability. We don't bother to look at the boxed type. -fn list_pointer_memory_size(n: &dyn Node) -> usize { - let count = match n.typ() { - Type::variable_assignment_list => n.as_variable_assignment_list().unwrap().count(), - Type::argument_or_redirection_list => n.as_argument_or_redirection_list().unwrap().count(), - Type::elseif_clause_list => n.as_elseif_clause_list().unwrap().count(), - Type::job_continuation_list => n.as_job_continuation_list().unwrap().count(), - Type::andor_job_list => n.as_andor_job_list().unwrap().count(), - Type::freestanding_argument_list => 0, // not actually a list node - Type::job_conjunction_continuation_list => { - n.as_job_conjunction_continuation_list().unwrap().count() - } - Type::case_item_list => n.as_case_item_list().unwrap().count(), - Type::argument_list => n.as_argument_list().unwrap().count(), - Type::job_list => n.as_job_list().unwrap().count(), - _ => 0, - }; - count * std::mem::size_of::>() -} - impl<'a> NodeVisitor<'a> for AstSizeMetrics { fn visit(&mut self, node: &'a dyn Node) { self.node_count += 1; - self.memory_size += node.self_memory_size() + list_pointer_memory_size(node); + self.memory_size += node.self_memory_size(); match node.category() { Category::branch => self.branch_count += 1, Category::leaf => self.leaf_count += 1, diff --git a/src/parse_execution.rs b/src/parse_execution.rs index 5e92a09af..0a52a255c 100644 --- a/src/parse_execution.rs +++ b/src/parse_execution.rs @@ -611,7 +611,7 @@ fn apply_variable_assignments( } *block = Some(ctx.parser().push_block(Block::variable_assignment_block())); for variable_assignment in variable_assignment_list { - let source = self.node_source(&**variable_assignment); + let source = self.node_source(variable_assignment); let equals_pos = variable_assignment_equals_pos(source).unwrap(); let variable_name = &source[..equals_pos]; let expression = &source[equals_pos + 1..]; @@ -1347,7 +1347,7 @@ fn run_begin_statement( fn get_argument_nodes(args: &ast::ArgumentList) -> AstArgsList<'_> { let mut result = AstArgsList::new(); for arg in args { - result.push(&**arg); + result.push(arg); } result } From a874237bff506ccddccc34d277a84a9b1495a245 Mon Sep 17 00:00:00 2001 From: Peter Ammon Date: Sun, 16 Mar 2025 15:14:12 -0700 Subject: [PATCH 06/19] ast: Bravely stop allocating so much in Boxes Now that we have more confidence in our pointers, we can allocate directly more often, instead of always through Box. This recovers the performance lost from the previous commit. --- src/ast.rs | 53 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index f55db8c5f..91921613c 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -3095,9 +3095,9 @@ fn visit_argument_or_redirection( node: &mut ArgumentOrRedirectionVariant, ) -> VisitResult { if let Some(arg) = self.try_parse::() { - *node = ArgumentOrRedirectionVariant::Argument(*arg); + *node = ArgumentOrRedirectionVariant::Argument(arg); } else if let Some(redir) = self.try_parse::() { - *node = ArgumentOrRedirectionVariant::Redirection(*redir); + *node = ArgumentOrRedirectionVariant::Redirection(redir); } else { internal_error!( self, @@ -3123,22 +3123,22 @@ fn visit_decorated_statement_decorator( &mut self, node: &mut Option, ) { - *node = self.try_parse::().map(|b| *b); + *node = self.try_parse::(); } fn visit_job_conjunction_decorator(&mut self, node: &mut Option) { - *node = self.try_parse::().map(|b| *b); + *node = self.try_parse::(); } fn visit_else_clause(&mut self, node: &mut Option) { - *node = self.try_parse::().map(|b| *b); + *node = self.try_parse::(); } fn visit_semi_nl(&mut self, node: &mut Option) { - *node = self.try_parse::().map(|b| *b); + *node = self.try_parse::(); } fn visit_time(&mut self, node: &mut Option) { - *node = self.try_parse::().map(|b| *b); + *node = self.try_parse::(); } fn visit_token_background(&mut self, node: &mut Option) { - *node = self.try_parse::().map(|b| *b); + *node = self.try_parse::(); } } @@ -3597,7 +3597,7 @@ fn populate_list(&mut self, list: &mut ListType, exhaust_stream: if contents.is_empty() { contents.reserve(16); } - contents.push(*node); + contents.push(node); } else if exhaust_stream && self.peek_type(0) != ParseTokenType::terminate { // We aren't allowed to stop. Produce an error and keep going. self.consume_excess_token_generating_error() @@ -3653,7 +3653,7 @@ fn new_decorated_statement(slf: &mut Populator<'_>) -> StatementVariant { slf.peek_token(0).user_presentable_description() ); } - StatementVariant::DecoratedStatement(*embedded) + StatementVariant::DecoratedStatement(embedded) } if self.peek_token(0).typ == ParseTokenType::terminate && self.allow_incomplete() { @@ -3661,7 +3661,7 @@ fn new_decorated_statement(slf: &mut Populator<'_>) -> StatementVariant { // Construct a decorated statement, which will be unsourced. self.allocate_visit::(); } else if self.peek_token(0).typ == ParseTokenType::left_brace { - let embedded = self.allocate_visit::(); + let embedded = self.allocate_boxed_visit::(); return StatementVariant::BraceStatement(embedded); } else if self.peek_token(0).typ != ParseTokenType::string { // We may be unwinding already; do not produce another error. @@ -3734,22 +3734,22 @@ fn new_decorated_statement(slf: &mut Populator<'_>) -> StatementVariant { match self.peek_token(0).keyword { ParseKeyword::kw_not | ParseKeyword::kw_exclam => { - let embedded = self.allocate_visit::(); + let embedded = self.allocate_boxed_visit::(); StatementVariant::NotStatement(embedded) } ParseKeyword::kw_for | ParseKeyword::kw_while | ParseKeyword::kw_function | ParseKeyword::kw_begin => { - let embedded = self.allocate_visit::(); + let embedded = self.allocate_boxed_visit::(); StatementVariant::BlockStatement(embedded) } ParseKeyword::kw_if => { - let embedded = self.allocate_visit::(); + let embedded = self.allocate_boxed_visit::(); StatementVariant::IfStatement(embedded) } ParseKeyword::kw_switch => { - let embedded = self.allocate_visit::(); + let embedded = self.allocate_boxed_visit::(); StatementVariant::SwitchStatement(embedded) } ParseKeyword::kw_end => { @@ -3776,19 +3776,19 @@ fn allocate_populate_block_header(&mut self) -> BlockStatementHeaderVariant { match self.peek_token(0).keyword { ParseKeyword::kw_for => { let embedded = self.allocate_visit::(); - BlockStatementHeaderVariant::ForHeader(*embedded) + BlockStatementHeaderVariant::ForHeader(embedded) } ParseKeyword::kw_while => { let embedded = self.allocate_visit::(); - BlockStatementHeaderVariant::WhileHeader(*embedded) + BlockStatementHeaderVariant::WhileHeader(embedded) } ParseKeyword::kw_function => { let embedded = self.allocate_visit::(); - BlockStatementHeaderVariant::FunctionHeader(*embedded) + BlockStatementHeaderVariant::FunctionHeader(embedded) } ParseKeyword::kw_begin => { let embedded = self.allocate_visit::(); - BlockStatementHeaderVariant::BeginHeader(*embedded) + BlockStatementHeaderVariant::BeginHeader(embedded) } _ => { internal_error!( @@ -3800,7 +3800,7 @@ fn allocate_populate_block_header(&mut self) -> BlockStatementHeaderVariant { } } - fn try_parse(&mut self) -> Option> { + fn try_parse(&mut self) -> Option { if !T::can_be_parsed(self) { return None; } @@ -3822,10 +3822,17 @@ fn allocate(&self) -> Box { result } - // Given a node type, allocate it, invoke its default constructor, + // Given a node type, allocate it, invoking its default constructor, // and then visit it as a field. - // Return the resulting Node pointer. It is never null. - fn allocate_visit(&mut self) -> Box { + // Return the resulting Node. + fn allocate_visit(&mut self) -> T { + let mut result = T::default(); + self.visit_mut(&mut result); + result + } + + // Like allocate_visit, but returns the value as a Box. + fn allocate_boxed_visit(&mut self) -> Box { let mut result = Box::::default(); let _ = self.visit_mut(&mut *result); result From cef7c3c5c4f83c3cde2f9d9ac127d795fad56d51 Mon Sep 17 00:00:00 2001 From: Peter Ammon Date: Wed, 19 Mar 2025 15:11:28 -0700 Subject: [PATCH 07/19] Switch ParseKeyword to Rust naming conventions --- src/ast.rs | 114 +++++++++++++++++------------------ src/highlight/highlight.rs | 38 ++++++------ src/parse_constants.rs | 120 ++++++++++++++++++------------------- src/parse_execution.rs | 4 +- src/parse_tree.rs | 4 +- src/parse_util.rs | 4 +- 6 files changed, 142 insertions(+), 142 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 91921613c..c4061864a 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1267,7 +1267,7 @@ fn can_be_parsed(pop: &mut Populator<'_>) -> bool { || (token.typ == ParseTokenType::string && !matches!( token.keyword, - ParseKeyword::kw_case | ParseKeyword::kw_end | ParseKeyword::kw_else + ParseKeyword::Case | ParseKeyword::End | ParseKeyword::Else )) } } @@ -1504,8 +1504,8 @@ fn as_mut_elseif_clause(&mut self) -> Option<&mut ElseifClause> { } impl CheckParse for ElseifClause { fn can_be_parsed(pop: &mut Populator<'_>) -> bool { - pop.peek_token(0).keyword == ParseKeyword::kw_else - && pop.peek_token(1).keyword == ParseKeyword::kw_if + pop.peek_token(0).keyword == ParseKeyword::Else + && pop.peek_token(1).keyword == ParseKeyword::If } } @@ -1548,7 +1548,7 @@ fn as_mut_else_clause(&mut self) -> Option<&mut ElseClause> { } impl CheckParse for ElseClause { fn can_be_parsed(pop: &mut Populator<'_>) -> bool { - pop.peek_token(0).keyword == ParseKeyword::kw_else + pop.peek_token(0).keyword == ParseKeyword::Else } } @@ -1615,7 +1615,7 @@ fn as_mut_case_item(&mut self) -> Option<&mut CaseItem> { } impl CheckParse for CaseItem { fn can_be_parsed(pop: &mut Populator<'_>) -> bool { - pop.peek_token(0).keyword == ParseKeyword::kw_case + pop.peek_token(0).keyword == ParseKeyword::Case } } @@ -1814,7 +1814,7 @@ fn as_mut_andor_job(&mut self) -> Option<&mut AndorJob> { impl CheckParse for AndorJob { fn can_be_parsed(pop: &mut Populator<'_>) -> bool { let keyword = pop.peek_token(0).keyword; - if !matches!(keyword, ParseKeyword::kw_and | ParseKeyword::kw_or) { + if !matches!(keyword, ParseKeyword::And | ParseKeyword::Or) { return false; } // Check that the argument to and/or is a string that's not help. Otherwise @@ -2021,27 +2021,27 @@ fn can_be_parsed(pop: &mut Populator<'_>) -> bool { define_token_node!(TokenRightBrace, right_brace); define_token_node!(TokenRedirection, redirection); -define_keyword_node!(DecoratedStatementDecorator, kw_command, kw_builtin, kw_exec); -define_keyword_node!(JobConjunctionDecorator, kw_and, kw_or); -define_keyword_node!(KeywordBegin, kw_begin); -define_keyword_node!(KeywordCase, kw_case); -define_keyword_node!(KeywordElse, kw_else); -define_keyword_node!(KeywordEnd, kw_end); -define_keyword_node!(KeywordFor, kw_for); -define_keyword_node!(KeywordFunction, kw_function); -define_keyword_node!(KeywordIf, kw_if); -define_keyword_node!(KeywordIn, kw_in); -define_keyword_node!(KeywordNot, kw_not, kw_exclam); -define_keyword_node!(KeywordSwitch, kw_switch); -define_keyword_node!(KeywordTime, kw_time); -define_keyword_node!(KeywordWhile, kw_while); +define_keyword_node!(DecoratedStatementDecorator, Command, Builtin, Exec); +define_keyword_node!(JobConjunctionDecorator, And, Or); +define_keyword_node!(KeywordBegin, Begin); +define_keyword_node!(KeywordCase, Case); +define_keyword_node!(KeywordElse, Else); +define_keyword_node!(KeywordEnd, End); +define_keyword_node!(KeywordFor, For); +define_keyword_node!(KeywordFunction, Function); +define_keyword_node!(KeywordIf, If); +define_keyword_node!(KeywordIn, In); +define_keyword_node!(KeywordNot, Not, Exclam); +define_keyword_node!(KeywordSwitch, Switch); +define_keyword_node!(KeywordTime, Time); +define_keyword_node!(KeywordWhile, While); impl CheckParse for JobConjunctionDecorator { fn can_be_parsed(pop: &mut Populator<'_>) -> bool { // This is for a job conjunction like `and stuff` // But if it's `and --help` then we treat it as an ordinary command. let keyword = pop.peek_token(0).keyword; - if !matches!(keyword, ParseKeyword::kw_and | ParseKeyword::kw_or) { + if !matches!(keyword, ParseKeyword::And | ParseKeyword::Or) { return false; } !pop.peek_token(1).is_help_argument @@ -2057,7 +2057,7 @@ fn can_be_parsed(pop: &mut Populator<'_>) -> bool { let keyword = pop.peek_token(0).keyword; if !matches!( keyword, - ParseKeyword::kw_command | ParseKeyword::kw_builtin | ParseKeyword::kw_exec + ParseKeyword::Command | ParseKeyword::Builtin | ParseKeyword::Exec ) { return false; } @@ -2070,7 +2070,7 @@ impl CheckParse for KeywordTime { fn can_be_parsed(pop: &mut Populator<'_>) -> bool { // Time keyword is only the time builtin if the next argument doesn't have a dash. let keyword = pop.peek_token(0).keyword; - if !matches!(keyword, ParseKeyword::kw_time) { + if !matches!(keyword, ParseKeyword::Time) { return false; } !pop.peek_token(1).is_dash_prefix_string() @@ -2085,9 +2085,9 @@ pub fn decoration(&self) -> StatementDecoration { }; let decorator: &dyn Keyword = decorator; match decorator.keyword() { - ParseKeyword::kw_command => StatementDecoration::command, - ParseKeyword::kw_builtin => StatementDecoration::builtin, - ParseKeyword::kw_exec => StatementDecoration::exec, + ParseKeyword::Command => StatementDecoration::command, + ParseKeyword::Builtin => StatementDecoration::builtin, + ParseKeyword::Exec => StatementDecoration::exec, _ => panic!("Unexpected keyword in statement decoration"), } } @@ -2614,7 +2614,7 @@ pub fn dump(&self, orig: &wstr) -> WString { WString::from_str("") } _ => { - token_type_user_presentable_description(n.token_type(), ParseKeyword::none) + token_type_user_presentable_description(n.token_type(), ParseKeyword::None) } }; result += &desc[..]; @@ -3064,7 +3064,7 @@ fn did_visit_fields_of<'a>(&'a mut self, node: &'a dyn NodeMut, flow: VisitResul if next_token.typ == ParseTokenType::string && matches!( next_token.keyword, - ParseKeyword::kw_case | ParseKeyword::kw_else | ParseKeyword::kw_end + ParseKeyword::Case | ParseKeyword::Else | ParseKeyword::End ) { self.consume_excess_token_generating_error(); @@ -3168,7 +3168,7 @@ fn token_types_user_presentable_description(types: &'static [ParseTokenType]) -> if !res.is_empty() { res += L!(" or "); } - res += &token_type_user_presentable_description(*typ, ParseKeyword::none)[..]; + res += &token_type_user_presentable_description(*typ, ParseKeyword::None)[..]; } res } @@ -3394,7 +3394,7 @@ fn consume_token_type(&mut self, typ: ParseTokenType) -> SourceRange { tok, ParseErrorCode::generic, "Expected %ls, but found %ls", - token_type_user_presentable_description(typ, ParseKeyword::none), + token_type_user_presentable_description(typ, ParseKeyword::None), tok.user_presentable_description() ); return SourceRange::new(0, 0); @@ -3419,7 +3419,7 @@ fn consume_excess_token_generating_error(&mut self) { tok, ParseErrorCode::generic, "Expected %ls, but found %ls", - token_type_user_presentable_description(ParseTokenType::string, ParseKeyword::none), + token_type_user_presentable_description(ParseTokenType::string, ParseKeyword::None), tok.user_presentable_description() ); return; @@ -3430,7 +3430,7 @@ fn consume_excess_token_generating_error(&mut self) { ParseTokenType::string => { // There are three keywords which end a job list. match tok.keyword { - ParseKeyword::kw_case => { + ParseKeyword::Case => { parse_error!( self, tok, @@ -3438,7 +3438,7 @@ fn consume_excess_token_generating_error(&mut self) { "'case' builtin not inside of switch block" ); } - ParseKeyword::kw_end => { + ParseKeyword::End => { parse_error!( self, tok, @@ -3446,7 +3446,7 @@ fn consume_excess_token_generating_error(&mut self) { "'end' outside of a block" ); } - ParseKeyword::kw_else => { + ParseKeyword::Else => { parse_error!( self, tok, @@ -3648,7 +3648,7 @@ fn new_decorated_statement(slf: &mut Populator<'_>) -> StatementVariant { "Expected %s, but found %ls", token_type_user_presentable_description( ParseTokenType::end, - ParseKeyword::none + ParseKeyword::None ), slf.peek_token(0).user_presentable_description() ); @@ -3706,11 +3706,11 @@ fn new_decorated_statement(slf: &mut Populator<'_>) -> StatementVariant { // If we are one of these, then look for specifically help arguments. Otherwise, if the next token // looks like an option (starts with a dash), then parse it as a decorated statement. let help_only_kws = [ - ParseKeyword::kw_begin, - ParseKeyword::kw_function, - ParseKeyword::kw_if, - ParseKeyword::kw_switch, - ParseKeyword::kw_while, + ParseKeyword::Begin, + ParseKeyword::Function, + ParseKeyword::If, + ParseKeyword::Switch, + ParseKeyword::While, ]; if if help_only_kws.contains(&self.peek_token(0).keyword) { self.peek_token(1).is_help_argument @@ -3722,8 +3722,8 @@ fn new_decorated_statement(slf: &mut Populator<'_>) -> StatementVariant { // Likewise if the next token doesn't look like an argument at all. This corresponds to // e.g. a "naked if". - let naked_invocation_invokes_help = ![ParseKeyword::kw_begin, ParseKeyword::kw_end] - .contains(&self.peek_token(0).keyword); + let naked_invocation_invokes_help = + ![ParseKeyword::Begin, ParseKeyword::End].contains(&self.peek_token(0).keyword); if naked_invocation_invokes_help && [ParseTokenType::end, ParseTokenType::terminate] .contains(&self.peek_token(1).typ) @@ -3733,26 +3733,26 @@ fn new_decorated_statement(slf: &mut Populator<'_>) -> StatementVariant { } match self.peek_token(0).keyword { - ParseKeyword::kw_not | ParseKeyword::kw_exclam => { + ParseKeyword::Not | ParseKeyword::Exclam => { let embedded = self.allocate_boxed_visit::(); StatementVariant::NotStatement(embedded) } - ParseKeyword::kw_for - | ParseKeyword::kw_while - | ParseKeyword::kw_function - | ParseKeyword::kw_begin => { + ParseKeyword::For + | ParseKeyword::While + | ParseKeyword::Function + | ParseKeyword::Begin => { let embedded = self.allocate_boxed_visit::(); StatementVariant::BlockStatement(embedded) } - ParseKeyword::kw_if => { + ParseKeyword::If => { let embedded = self.allocate_boxed_visit::(); StatementVariant::IfStatement(embedded) } - ParseKeyword::kw_switch => { + ParseKeyword::Switch => { let embedded = self.allocate_boxed_visit::(); StatementVariant::SwitchStatement(embedded) } - ParseKeyword::kw_end => { + ParseKeyword::End => { // 'end' is forbidden as a command. // For example, `if end` or `while end` will produce this error. // We still have to descend into the decorated statement because @@ -3774,19 +3774,19 @@ fn new_decorated_statement(slf: &mut Populator<'_>) -> StatementVariant { /// This must never return null. fn allocate_populate_block_header(&mut self) -> BlockStatementHeaderVariant { match self.peek_token(0).keyword { - ParseKeyword::kw_for => { + ParseKeyword::For => { let embedded = self.allocate_visit::(); BlockStatementHeaderVariant::ForHeader(embedded) } - ParseKeyword::kw_while => { + ParseKeyword::While => { let embedded = self.allocate_visit::(); BlockStatementHeaderVariant::WhileHeader(embedded) } - ParseKeyword::kw_function => { + ParseKeyword::Function => { let embedded = self.allocate_visit::(); BlockStatementHeaderVariant::FunctionHeader(embedded) } - ParseKeyword::kw_begin => { + ParseKeyword::Begin => { let embedded = self.allocate_visit::(); BlockStatementHeaderVariant::BeginHeader(embedded) } @@ -3863,7 +3863,7 @@ fn visit_variable_assignment(&mut self, varas: &mut VariableAssignment) { fn visit_job_continuation(&mut self, node: &mut JobContinuation) { // Special error handling to catch 'and' and 'or' in pipelines, like `true | and false`. - if [ParseKeyword::kw_and, ParseKeyword::kw_or].contains(&self.peek_token(1).keyword) { + if [ParseKeyword::And, ParseKeyword::Or].contains(&self.peek_token(1).keyword) { parse_error!( self, self.peek_token(1), @@ -3931,7 +3931,7 @@ fn visit_keyword(&mut self, keyword: &mut dyn Keyword) -> VisitResult { // Special error reporting for keyword_t. let allowed_keywords = keyword.allowed_keywords(); - if keyword.allowed_keywords() == [ParseKeyword::kw_end] { + if keyword.allowed_keywords() == [ParseKeyword::End] { return VisitResult::Break(MissingEndError { allowed_keywords, token: *self.peek_token(0), @@ -4121,7 +4121,7 @@ pub(crate) fn unescape_keyword(tok: TokenType, token: &wstr) -> Cow<'_, wstr> { Cow::Owned(unescape_string(token, UnescapeStringStyle::default()).unwrap_or_default()) } -/// Given a token, returns the keyword it matches, or ParseKeyword::none. +/// Given a token, returns the keyword it matches, or ParseKeyword::None. fn keyword_for_token(tok: TokenType, token: &wstr) -> ParseKeyword { ParseKeyword::from(&unescape_keyword(tok, token)[..]) } diff --git a/src/highlight/highlight.rs b/src/highlight/highlight.rs index 33092a4ae..12b8d3026 100644 --- a/src/highlight/highlight.rs +++ b/src/highlight/highlight.rs @@ -844,25 +844,25 @@ fn visit_children(&mut self, node: &dyn Node) { fn visit_keyword(&mut self, node: &dyn Keyword) { let mut role = HighlightRole::normal; match node.keyword() { - ParseKeyword::kw_begin - | ParseKeyword::kw_builtin - | ParseKeyword::kw_case - | ParseKeyword::kw_command - | ParseKeyword::kw_else - | ParseKeyword::kw_end - | ParseKeyword::kw_exec - | ParseKeyword::kw_for - | ParseKeyword::kw_function - | ParseKeyword::kw_if - | ParseKeyword::kw_in - | ParseKeyword::kw_switch - | ParseKeyword::kw_while => role = HighlightRole::keyword, - ParseKeyword::kw_and - | ParseKeyword::kw_or - | ParseKeyword::kw_not - | ParseKeyword::kw_exclam - | ParseKeyword::kw_time => role = HighlightRole::operat, - ParseKeyword::none => (), + ParseKeyword::Begin + | ParseKeyword::Builtin + | ParseKeyword::Case + | ParseKeyword::Command + | ParseKeyword::Else + | ParseKeyword::End + | ParseKeyword::Exec + | ParseKeyword::For + | ParseKeyword::Function + | ParseKeyword::If + | ParseKeyword::In + | ParseKeyword::Switch + | ParseKeyword::While => role = HighlightRole::keyword, + ParseKeyword::And + | ParseKeyword::Or + | ParseKeyword::Not + | ParseKeyword::Exclam + | ParseKeyword::Time => role = HighlightRole::operat, + ParseKeyword::None => (), }; self.color_node(node.leaf_as_node(), HighlightSpec::with_fg(role)); } diff --git a/src/parse_constants.rs b/src/parse_constants.rs index 46f9b8587..d9a669112 100644 --- a/src/parse_constants.rs +++ b/src/parse_constants.rs @@ -84,27 +84,27 @@ pub enum ParseTokenType { #[repr(u8)] #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum ParseKeyword { - // 'none' is not a keyword, it is a sentinel indicating nothing. - none, - - kw_and, - kw_begin, - kw_builtin, - kw_case, - kw_command, - kw_else, - kw_end, - kw_exclam, - kw_exec, - kw_for, - kw_function, - kw_if, - kw_in, - kw_not, - kw_or, - kw_switch, - kw_time, - kw_while, + // 'None' is not a keyword, it is a sentinel indicating nothing. + // Note it proves convenient to keep this as a value rather than using Option. + None, + And, + Begin, + Builtin, + Case, + Command, + Else, + End, + Exclam, + Exec, + For, + Function, + If, + In, + Not, + Or, + Switch, + Time, + While, } // Statement decorations like 'command' or 'exec'. @@ -224,7 +224,7 @@ pub fn to_wstr(self) -> &'static wstr { impl Default for ParseKeyword { fn default() -> Self { - ParseKeyword::none + ParseKeyword::None } } @@ -232,24 +232,24 @@ impl ParseKeyword { /// Return the keyword as a string. pub fn to_wstr(self) -> &'static wstr { match self { - ParseKeyword::kw_and => L!("and"), - ParseKeyword::kw_begin => L!("begin"), - ParseKeyword::kw_builtin => L!("builtin"), - ParseKeyword::kw_case => L!("case"), - ParseKeyword::kw_command => L!("command"), - ParseKeyword::kw_else => L!("else"), - ParseKeyword::kw_end => L!("end"), - ParseKeyword::kw_exclam => L!("!"), - ParseKeyword::kw_exec => L!("exec"), - ParseKeyword::kw_for => L!("for"), - ParseKeyword::kw_function => L!("function"), - ParseKeyword::kw_if => L!("if"), - ParseKeyword::kw_in => L!("in"), - ParseKeyword::kw_not => L!("not"), - ParseKeyword::kw_or => L!("or"), - ParseKeyword::kw_switch => L!("switch"), - ParseKeyword::kw_time => L!("time"), - ParseKeyword::kw_while => L!("while"), + ParseKeyword::And => L!("and"), + ParseKeyword::Begin => L!("begin"), + ParseKeyword::Builtin => L!("builtin"), + ParseKeyword::Case => L!("case"), + ParseKeyword::Command => L!("command"), + ParseKeyword::Else => L!("else"), + ParseKeyword::End => L!("end"), + ParseKeyword::Exclam => L!("!"), + ParseKeyword::Exec => L!("exec"), + ParseKeyword::For => L!("for"), + ParseKeyword::Function => L!("function"), + ParseKeyword::If => L!("if"), + ParseKeyword::In => L!("in"), + ParseKeyword::Not => L!("not"), + ParseKeyword::Or => L!("or"), + ParseKeyword::Switch => L!("switch"), + ParseKeyword::Time => L!("time"), + ParseKeyword::While => L!("while"), _ => L!("unknown_keyword"), } } @@ -264,25 +264,25 @@ fn to_arg(self) -> fish_printf::Arg<'static> { impl From<&wstr> for ParseKeyword { fn from(s: &wstr) -> Self { match s { - _ if s == "!" => ParseKeyword::kw_exclam, - _ if s == "and" => ParseKeyword::kw_and, - _ if s == "begin" => ParseKeyword::kw_begin, - _ if s == "builtin" => ParseKeyword::kw_builtin, - _ if s == "case" => ParseKeyword::kw_case, - _ if s == "command" => ParseKeyword::kw_command, - _ if s == "else" => ParseKeyword::kw_else, - _ if s == "end" => ParseKeyword::kw_end, - _ if s == "exec" => ParseKeyword::kw_exec, - _ if s == "for" => ParseKeyword::kw_for, - _ if s == "function" => ParseKeyword::kw_function, - _ if s == "if" => ParseKeyword::kw_if, - _ if s == "in" => ParseKeyword::kw_in, - _ if s == "not" => ParseKeyword::kw_not, - _ if s == "or" => ParseKeyword::kw_or, - _ if s == "switch" => ParseKeyword::kw_switch, - _ if s == "time" => ParseKeyword::kw_time, - _ if s == "while" => ParseKeyword::kw_while, - _ => ParseKeyword::none, + _ if s == "!" => ParseKeyword::Exclam, + _ if s == "and" => ParseKeyword::And, + _ if s == "begin" => ParseKeyword::Begin, + _ if s == "builtin" => ParseKeyword::Builtin, + _ if s == "case" => ParseKeyword::Case, + _ if s == "command" => ParseKeyword::Command, + _ if s == "else" => ParseKeyword::Else, + _ if s == "end" => ParseKeyword::End, + _ if s == "exec" => ParseKeyword::Exec, + _ if s == "for" => ParseKeyword::For, + _ if s == "function" => ParseKeyword::Function, + _ if s == "if" => ParseKeyword::If, + _ if s == "in" => ParseKeyword::In, + _ if s == "not" => ParseKeyword::Not, + _ if s == "or" => ParseKeyword::Or, + _ if s == "switch" => ParseKeyword::Switch, + _ if s == "time" => ParseKeyword::Time, + _ if s == "while" => ParseKeyword::While, + _ => ParseKeyword::None, } } } @@ -423,7 +423,7 @@ pub fn token_type_user_presentable_description( type_: ParseTokenType, keyword: ParseKeyword, ) -> WString { - if keyword != ParseKeyword::none { + if keyword != ParseKeyword::None { return sprintf!("keyword: '%ls'", keyword.to_wstr()); } match type_ { diff --git a/src/parse_execution.rs b/src/parse_execution.rs index 0a52a255c..9f6039343 100644 --- a/src/parse_execution.rs +++ b/src/parse_execution.rs @@ -1725,11 +1725,11 @@ fn test_and_run_1_job_conjunction( if let Some(deco) = &jc.decorator { let last_status = ctx.parser().get_last_status(); match deco.keyword() { - ParseKeyword::kw_and => { + ParseKeyword::And => { // AND. Skip if the last job failed. skip = last_status != 0; } - ParseKeyword::kw_or => { + ParseKeyword::Or => { // OR. Skip if the last job succeeded. skip = last_status == 0; } diff --git a/src/parse_tree.rs b/src/parse_tree.rs index bfa4d2912..4ed99e853 100644 --- a/src/parse_tree.rs +++ b/src/parse_tree.rs @@ -39,7 +39,7 @@ impl ParseToken { pub fn new(typ: ParseTokenType) -> Self { ParseToken { typ, - keyword: ParseKeyword::none, + keyword: ParseKeyword::None, has_dash_prefix: false, is_help_argument: false, is_newline: false, @@ -73,7 +73,7 @@ pub fn is_dash_prefix_string(&self) -> bool { /// Returns a string description of the given parse token. pub fn describe(&self) -> WString { let mut result = self.typ.to_wstr().to_owned(); - if self.keyword != ParseKeyword::none { + if self.keyword != ParseKeyword::None { sprintf!(=> &mut result, " <%ls>", self.keyword.to_wstr()) } result diff --git a/src/parse_util.rs b/src/parse_util.rs index f853a309f..3107fec98 100644 --- a/src/parse_util.rs +++ b/src/parse_util.rs @@ -1545,10 +1545,10 @@ fn detect_errors_in_backgrounded_job( if let Some(next) = jlist.get(index + 1) { if let Some(deco) = &next.decorator { assert!( - [ParseKeyword::kw_and, ParseKeyword::kw_or].contains(&deco.keyword()), + [ParseKeyword::And, ParseKeyword::Or].contains(&deco.keyword()), "Unexpected decorator keyword" ); - let deco_name = if deco.keyword() == ParseKeyword::kw_and { + let deco_name = if deco.keyword() == ParseKeyword::And { L!("and") } else { L!("or") From 3f4839e5f2b406406665a9b7b46eff5f45bcd0ee Mon Sep 17 00:00:00 2001 From: Peter Ammon Date: Thu, 20 Mar 2025 10:25:20 -0700 Subject: [PATCH 08/19] Optimize ParseKeyword::from(&wstr) This is a hot function and is easy to optimize. --- src/parse_constants.rs | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/src/parse_constants.rs b/src/parse_constants.rs index d9a669112..08249e137 100644 --- a/src/parse_constants.rs +++ b/src/parse_constants.rs @@ -81,7 +81,6 @@ pub enum ParseTokenType { comment, } -#[repr(u8)] #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum ParseKeyword { // 'None' is not a keyword, it is a sentinel indicating nothing. @@ -263,25 +262,27 @@ fn to_arg(self) -> fish_printf::Arg<'static> { impl From<&wstr> for ParseKeyword { fn from(s: &wstr) -> Self { - match s { - _ if s == "!" => ParseKeyword::Exclam, - _ if s == "and" => ParseKeyword::And, - _ if s == "begin" => ParseKeyword::Begin, - _ if s == "builtin" => ParseKeyword::Builtin, - _ if s == "case" => ParseKeyword::Case, - _ if s == "command" => ParseKeyword::Command, - _ if s == "else" => ParseKeyword::Else, - _ if s == "end" => ParseKeyword::End, - _ if s == "exec" => ParseKeyword::Exec, - _ if s == "for" => ParseKeyword::For, - _ if s == "function" => ParseKeyword::Function, - _ if s == "if" => ParseKeyword::If, - _ if s == "in" => ParseKeyword::In, - _ if s == "not" => ParseKeyword::Not, - _ if s == "or" => ParseKeyword::Or, - _ if s == "switch" => ParseKeyword::Switch, - _ if s == "time" => ParseKeyword::Time, - _ if s == "while" => ParseKeyword::While, + // Note this is called in hot loops. + let c0 = s.as_char_slice().get(0).copied().unwrap_or('\0'); + match c0 { + '!' if s == L!("!") => ParseKeyword::Exclam, + 'a' if s == L!("and") => ParseKeyword::And, + 'b' if s == L!("begin") => ParseKeyword::Begin, + 'b' if s == L!("builtin") => ParseKeyword::Builtin, + 'c' if s == L!("case") => ParseKeyword::Case, + 'c' if s == L!("command") => ParseKeyword::Command, + 'e' if s == L!("else") => ParseKeyword::Else, + 'e' if s == L!("end") => ParseKeyword::End, + 'e' if s == L!("exec") => ParseKeyword::Exec, + 'f' if s == L!("for") => ParseKeyword::For, + 'f' if s == L!("function") => ParseKeyword::Function, + 'i' if s == L!("if") => ParseKeyword::If, + 'i' if s == L!("in") => ParseKeyword::In, + 'n' if s == L!("not") => ParseKeyword::Not, + 'o' if s == L!("or") => ParseKeyword::Or, + 's' if s == L!("switch") => ParseKeyword::Switch, + 't' if s == L!("time") => ParseKeyword::Time, + 'w' if s == L!("while") => ParseKeyword::While, _ => ParseKeyword::None, } } From b33795533c5d54c10d71d4861c3bf1fc023f6433 Mon Sep 17 00:00:00 2001 From: Peter Ammon Date: Sun, 13 Apr 2025 11:22:41 -0700 Subject: [PATCH 09/19] ast: remove leaf_as_node() Plain old as_node() is fine. --- src/ast.rs | 4 ---- src/highlight/highlight.rs | 10 +++++----- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index c4061864a..0a9380c9c 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -410,7 +410,6 @@ pub trait Leaf: Node { fn has_source(&self) -> bool { self.range().is_some() } - fn leaf_as_node(&self) -> &dyn Node; } // A token node is a node which contains a token, which must be one of a fixed set. @@ -513,9 +512,6 @@ fn range(&self) -> Option { fn range_mut(&mut self) -> &mut Option { &mut self.range } - fn leaf_as_node(&self) -> &dyn Node { - self - } } impl Acceptor for $name { #[allow(unused_variables)] diff --git a/src/highlight/highlight.rs b/src/highlight/highlight.rs index 12b8d3026..4258b89ae 100644 --- a/src/highlight/highlight.rs +++ b/src/highlight/highlight.rs @@ -2,7 +2,7 @@ use crate::abbrs::{self, with_abbrs}; use crate::ast::{ self, Argument, Ast, BlockStatement, BlockStatementHeaderVariant, BraceStatement, - DecoratedStatement, Keyword, Leaf, List, Node, NodeVisitor, Redirection, Token, Type, + DecoratedStatement, Keyword, List, Node, NodeVisitor, Redirection, Token, Type, VariableAssignment, }; use crate::builtins::shared::builtin_exists; @@ -864,7 +864,7 @@ fn visit_keyword(&mut self, node: &dyn Keyword) { | ParseKeyword::Time => role = HighlightRole::operat, ParseKeyword::None => (), }; - self.color_node(node.leaf_as_node(), HighlightSpec::with_fg(role)); + self.color_node(node.as_node(), HighlightSpec::with_fg(role)); } fn visit_token(&mut self, tok: &dyn Token) { let mut role = HighlightRole::normal; @@ -884,7 +884,7 @@ fn visit_token(&mut self, tok: &dyn Token) { } _ => (), } - self.color_node(tok.leaf_as_node(), HighlightSpec::with_fg(role)); + self.color_node(tok.as_node(), HighlightSpec::with_fg(role)); } // Visit an argument, perhaps knowing that our command is cd. fn visit_argument(&mut self, arg: &Argument, cmd_is_cd: bool, options_allowed: bool) { @@ -943,7 +943,7 @@ fn visit_redirection(&mut self, redir: &Redirection) { // Check if the argument contains a command substitution. If so, highlight it as a param // even though it's a command redirection, and don't try to do any other validation. if has_cmdsub(&target) { - self.color_as_argument(redir.target.leaf_as_node(), true); + self.color_as_argument(redir.target.as_node(), true); return; } // No command substitution, so we can highlight the target file or fd. For example, @@ -959,7 +959,7 @@ fn visit_redirection(&mut self, redir: &Redirection) { self.file_tester.test_redirection_target(&target, oper.mode) }; self.color_node( - redir.target.leaf_as_node(), + redir.target.as_node(), HighlightSpec::with_fg(if target_is_valid { HighlightRole::redirection } else { From 7d98cc88507b632bddd6660e38d78119ff847ed4 Mon Sep 17 00:00:00 2001 From: Peter Ammon Date: Sun, 13 Apr 2025 11:49:06 -0700 Subject: [PATCH 10/19] ast: stop using Node::ptr_eq This function hid a bug! It converted two Nodes to `*const ()` which discards the vtable pointer, using only the data pointer; but two nodes can and do have the same data pointer (e.g. if one node is the first item in another). Add the (statically dispatched) is_same_node function with a warning comment, and adopt that instead. --- src/ast.rs | 51 ++++++++++++++++++++++++++++++++++++++--------- src/parse_util.rs | 6 +++--- src/reader.rs | 4 ++-- src/tests/ast.rs | 46 ++++++++++++++++++++++++++++++++++++++++++ src/tests/mod.rs | 1 + 5 files changed, 94 insertions(+), 14 deletions(-) create mode 100644 src/tests/ast.rs diff --git a/src/ast.rs b/src/ast.rs index 0a9380c9c..a8c82da8c 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -145,15 +145,51 @@ fn self_memory_size(&self) -> usize { std::mem::size_of_val(self) } - // The address of the object, for comparison. - fn as_ptr(&self) -> *const (); - - fn pointer_eq(&self, rhs: &dyn Node) -> bool { - std::ptr::eq(self.as_ptr(), rhs.as_ptr()) - } + /// Convert to the dynamic Node type. fn as_node(&self) -> &dyn Node; } +/// Return true if two nodes are the same object. +#[inline(always)] +pub fn is_same_node(lhs: &dyn Node, rhs: &dyn Node) -> bool { + // There are two subtleties here: + // + // 1. Two &dyn Node may reference the same underlying object + // with different vtables - for example if one is a &dyn Node + // and the other is a &dyn NodeMut. + // + // 2. Two &dyn Node may reference different underlying objects + // with the same base pointer - for example if a Node is a + // the first field in another Node. + // + // Therefore we make the following assumptions, which seem sound enough: + // 1. If two nodes have different base pointers, they reference different objects + // (though NOT true in C++). + // 2. If two nodes have the same base pointer and same vtable, they reference + // the same object. + // 3. If two nodes have the same base pointer but different vtables, they are the same iff + // their types are equal: a Node cannot have a Node of the same type at the same address + // (unless it's a ZST, which does not apply here). + // + // Note this is performance-sensitive. + + // Different base pointers => not the same. + let lptr = lhs as *const dyn Node as *const (); + let rptr = rhs as *const dyn Node as *const (); + if !std::ptr::eq(lptr, rptr) { + return false; + } + + // Same base pointer and same vtable => same object. + if std::ptr::eq(lhs, rhs) { + return true; + } + + // Same base pointer, but different vtables. + // Compare the types. + lhs.typ() == rhs.typ() +} + /// NodeMut is a mutable node. trait NodeMut: Node + AcceptorMut + ConcreteNodeMut {} @@ -491,9 +527,6 @@ fn try_source_range(&self) -> Option { Some(visitor.total) } } - fn as_ptr(&self) -> *const () { - (self as *const $name).cast() - } fn as_node(&self) -> &dyn Node { self } diff --git a/src/parse_util.rs b/src/parse_util.rs index 3107fec98..6a2938fef 100644 --- a/src/parse_util.rs +++ b/src/parse_util.rs @@ -1,5 +1,5 @@ //! Various mostly unrelated utility functions related to parsing, loading and evaluating fish code. -use crate::ast::{self, Ast, Keyword, Leaf, List, Node, NodeVisitor, Token}; +use crate::ast::{self, is_same_node, Ast, Keyword, Leaf, List, Node, NodeVisitor, Token}; use crate::builtins::shared::builtin_exists; use crate::common::{ escape_string, unescape_string, valid_var_name, valid_var_name_char, EscapeFlags, @@ -1538,7 +1538,7 @@ fn detect_errors_in_backgrounded_job( // Find the index of ourselves in the job list. let index = jlist .iter() - .position(|job| job.pointer_eq(job_conj)) + .position(|job| is_same_node(job, job_conj)) .expect("Should have found the job in the list"); // Try getting the next job and check its decorator. @@ -1601,7 +1601,7 @@ fn detect_errors_in_decorated_statement( // Check our pipeline position. let pipe_pos = if job.continuation.is_empty() { PipelinePosition::none - } else if job.statement.pointer_eq(st) { + } else if is_same_node(&job.statement, st) { PipelinePosition::first } else { PipelinePosition::subsequent diff --git a/src/reader.rs b/src/reader.rs index 50a26333d..56906c1eb 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -42,7 +42,7 @@ use errno::{errno, Errno}; use crate::abbrs::abbrs_match; -use crate::ast::{self, Ast, Category, Traversal}; +use crate::ast::{self, is_same_node, Ast, Category, Traversal}; use crate::builtins::shared::ErrorCode; use crate::builtins::shared::STATUS_CMD_ERROR; use crate::builtins::shared::STATUS_CMD_OK; @@ -5357,7 +5357,7 @@ fn extract_tokens(s: &wstr) -> Vec { let mut cursor = Some(node); while let Some(cur) = cursor { if let Some(stmt) = cur.as_decorated_statement() { - if node.pointer_eq(&stmt.command) { + if is_same_node(node, &stmt.command) { return true; } } diff --git a/src/tests/ast.rs b/src/tests/ast.rs new file mode 100644 index 000000000..e43f4c2eb --- /dev/null +++ b/src/tests/ast.rs @@ -0,0 +1,46 @@ +use crate::ast::{is_same_node, Ast, Node}; +use crate::wchar::prelude::*; + +const FISH_FUNC: &str = r#" +function stuff --description 'Stuff' + set -l log "/tmp/chaos_log.(random)" + set -x PATH /custom/bin $PATH + + echo "[$USER] Hooray" | tee -a $log 2>/dev/null + + time if test (count $argv) -eq 0 + echo "No targets specified" >> $log 2>&1 + return 1 + end + + for target in $argv + command bash -c "echo" >> $log 2> /dev/null + switch $status + case 0 + echo "Success" | tee -a $log + case '*' + echo "Failure" >> $log + end + end + set_color green +end +"#; + +#[test] +fn test_is_same_node() { + // is_same_node is pretty subtle! Let's check it. + let src = WString::from_str(FISH_FUNC); + let ast = Ast::parse(&src, Default::default(), None); + assert!(!ast.errored()); + let all_nodes: Vec<&dyn Node> = ast.walk().collect(); + for i in 0..all_nodes.len() { + for j in 0..all_nodes.len() { + let same = is_same_node(all_nodes[i], all_nodes[j]); + if i == j { + assert!(same, "Node {} should be the same as itself", i); + } else { + assert!(!same, "Node {} should not be the same as node {}", i, j); + } + } + } +} diff --git a/src/tests/mod.rs b/src/tests/mod.rs index dba77503f..61bd1945f 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -1,4 +1,5 @@ mod abbrs; +mod ast; mod ast_bench; mod common; mod complete; From f7543dd447d1ed2bc2a9e5f46004db3df61813a3 Mon Sep 17 00:00:00 2001 From: Peter Ammon Date: Mon, 14 Apr 2025 19:21:24 -0700 Subject: [PATCH 11/19] ast: remove certain as_node() function calls We can remove all of them once MSRV becomes 1.86, which adds support for trait upcasting coercion; we can remove a few today. --- src/highlight/highlight.rs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/highlight/highlight.rs b/src/highlight/highlight.rs index 4258b89ae..8cdcb15e9 100644 --- a/src/highlight/highlight.rs +++ b/src/highlight/highlight.rs @@ -888,7 +888,7 @@ fn visit_token(&mut self, tok: &dyn Token) { } // Visit an argument, perhaps knowing that our command is cd. fn visit_argument(&mut self, arg: &Argument, cmd_is_cd: bool, options_allowed: bool) { - self.color_as_argument(arg.as_node(), options_allowed); + self.color_as_argument(arg, options_allowed); if !self.io_still_ok() { return; } @@ -912,7 +912,7 @@ fn visit_argument(&mut self, arg: &Argument, cmd_is_cd: bool, options_allowed: b self.color_array[i].valid_path = true; } } - Err(..) => self.color_node(arg.as_node(), HighlightSpec::with_fg(HighlightRole::error)), + Err(..) => self.color_node(arg, HighlightSpec::with_fg(HighlightRole::error)), } } @@ -926,10 +926,7 @@ fn visit_redirection(&mut self, redir: &Redirection) { // It may have parsed successfully yet still be invalid (e.g. 9999999999999>&1) // If so, color the whole thing invalid and stop. if !oper.is_valid() { - self.color_node( - redir.as_node(), - HighlightSpec::with_fg(HighlightRole::error), - ); + self.color_node(redir, HighlightSpec::with_fg(HighlightRole::error)); return; } @@ -943,7 +940,7 @@ fn visit_redirection(&mut self, redir: &Redirection) { // Check if the argument contains a command substitution. If so, highlight it as a param // even though it's a command redirection, and don't try to do any other validation. if has_cmdsub(&target) { - self.color_as_argument(redir.target.as_node(), true); + self.color_as_argument(&redir.target, true); return; } // No command substitution, so we can highlight the target file or fd. For example, @@ -959,7 +956,7 @@ fn visit_redirection(&mut self, redir: &Redirection) { self.file_tester.test_redirection_target(&target, oper.mode) }; self.color_node( - redir.target.as_node(), + &redir.target, HighlightSpec::with_fg(if target_is_valid { HighlightRole::redirection } else { From 8f46b617dbdf5e2ece4862cf331ece61ef356703 Mon Sep 17 00:00:00 2001 From: Peter Ammon Date: Sun, 20 Apr 2025 11:04:04 -0700 Subject: [PATCH 12/19] ast: make Traversal more powerful Prior to this commit, Traversal was a convenient way to walk the nodes of an ast in pre-order. This worked simply: it kept a stack of Nodes, and then when a Node was visited, it was popped off and its children added. Enhance Traversal to track whether each node on the Stack is `NeedsVisit` or `Visited`. The idea is that, when a Node is yielded from next(), we can reconstruct its parents as those Visited nodes on the Traversal stack. This will allow clients to get the parents of Nodes as they are traversed without each Node needing to explicitly save its parent. --- src/ast.rs | 74 ++++++++++++++++++-- src/tests/parser.rs | 167 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 234 insertions(+), 7 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index a8c82da8c..24ad99982 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -2496,35 +2496,99 @@ pub fn ast_type_to_string(t: Type) -> &'static wstr { } } +// An entry in the traversal stack. We retain nodes after visiting them, so that we can reconstruct the parent path +// as the list of visited nodes remaining on the stack. Once we've visited a node twice, then it's popped, as that +// means all of its children have been visited (or skipped). +enum TraversalEntry<'a> { + NeedsVisit(&'a dyn Node), + Visited(&'a dyn Node), +} + // A way to visit nodes iteratively. // This is pre-order. Each node is visited before its children. // Example: // let tv = Traversal::new(start); // while let Some(node) = tv.next() {...} pub struct Traversal<'a> { - stack: Vec<&'a dyn Node>, + stack: Vec>, } impl<'a> Traversal<'a> { // Construct starting with a node pub fn new(n: &'a dyn Node) -> Self { - Self { stack: vec![n] } + Self { + stack: vec![TraversalEntry::NeedsVisit(n)], + } + } + + // Return an iterator over the parent nodes - those which have been visited but not yet popped. + // Parents are returned in reverse order (immediate parent, grandparent, etc). + // The most recently visited node is first; that is first parent node will be the node most recently + // returned by next(); + pub fn parent_nodes(&self) -> impl Iterator + '_ { + self.stack.iter().rev().filter_map(|entry| match entry { + TraversalEntry::Visited(node) => Some(*node), + _ => None, + }) + } + + // Return the parent node of the given node, asserting it is on the stack + // (i.e. we are processing it or one of its children, transitively). + // Note this does NOT return parents of nodes that have not yet been yielded by the iterator; + // for example `traversal.parent(¤t.child)` will panic because `current.child` has not been + // yielded yet. + pub fn parent(&self, node: &dyn Node) -> &'a dyn Node { + let mut iter = self.parent_nodes(); + while let Some(n) = iter.next() { + if is_same_node(node, n) { + return iter.next().expect("Node is root and has no parent"); + } + } + panic!("Node {:?} has either been popped off of the stack or not yet visited. Cannot find parent.", node.describe()); + } + + // Skip the children of the last visited node, which must be passed + // as a sanity check. This node must be the last visited node on the stack. + // For convenience, also remove the (visited) node itself. + pub fn skip_children(&mut self, node: &dyn Node) { + for idx in (0..self.stack.len()).rev() { + if let TraversalEntry::Visited(n) = self.stack[idx] { + assert!( + is_same_node(node, n), + "Passed node is not the last visited node" + ); + self.stack.truncate(idx); + return; + } + } + panic!("Passed node is not on the stack"); } } impl<'a> Iterator for Traversal<'a> { type Item = &'a dyn Node; + // Return the next node. fn next(&mut self) -> Option<&'a dyn Node> { - let node = self.stack.pop()?; - // We want to visit in reverse order so the first child ends up on top of the stack. + let node = loop { + match self.stack.pop()? { + TraversalEntry::NeedsVisit(n) => { + // Leave a marker for the node we just visited. + self.stack.push(TraversalEntry::Visited(n)); + break n; + } + TraversalEntry::Visited(_) => {} + } + }; + // Append this node's children to our stack. node.accept(self, true /* reverse */); Some(node) } } impl<'a, 'v: 'a> NodeVisitor<'v> for Traversal<'a> { + // Record that a child of a node needs to be visited. fn visit(&mut self, node: &'a dyn Node) { - self.stack.push(node) + self.stack.push(TraversalEntry::NeedsVisit(node)); } } diff --git a/src/tests/parser.rs b/src/tests/parser.rs index 9f4f826fa..c3b32f3e3 100644 --- a/src/tests/parser.rs +++ b/src/tests/parser.rs @@ -1,10 +1,10 @@ -use crate::ast::{self, Ast, JobPipeline, List, Node, Traversal}; +use crate::ast::{self, is_same_node, Ast, JobPipeline, List, Node, Traversal}; use crate::common::ScopeGuard; use crate::env::EnvStack; use crate::expand::ExpandFlags; use crate::io::{IoBufferfill, IoChain}; use crate::parse_constants::{ - ParseErrorCode, ParseTreeFlags, ParserTestErrorBits, StatementDecoration, + ParseErrorCode, ParseTokenType, ParseTreeFlags, ParserTestErrorBits, StatementDecoration, }; use crate::parse_tree::{parse_source, LineCounter}; use crate::parse_util::{parse_util_detect_errors, parse_util_detect_errors_in_argument}; @@ -818,3 +818,166 @@ fn test_line_counter_empty() { assert_eq!(line_counter.line_offset_of_node(), None); assert_eq!(line_counter.source_offset_of_node(), None); } + +// Helper for testing a simple ast traversal. +// The ast is always for the command 'true;'. +struct TrueSemiAstTester<'a> { + // The AST we are testing. + ast: &'a Ast, + + // Expected parent-child relationships, in the order we expect to encounter them. + parent_child: Box<[(&'a dyn Node, &'a dyn Node)]>, +} + +impl<'a> TrueSemiAstTester<'a> { + const TRUE_SEMI: &'static wstr = L!("true;"); + fn new(ast: &'a Ast) -> Self { + let job_list = ast.top().as_job_list().expect("Expected job_list"); + let job_conjunction = &job_list[0]; + let job_pipeline = &job_conjunction.job; + let variable_assignment_list = &job_pipeline.variables; + let statement = &job_pipeline.statement; + + let decorated_statement = statement + .contents + .as_decorated_statement() + .expect("Expected decorated_statement"); + let command = &decorated_statement.command; + let args_or_redirs = &decorated_statement.args_or_redirs; + let job_continuation = &job_pipeline.continuation; + let job_conjunction_continuation = &job_conjunction.continuations; + let semi_nl = job_conjunction.semi_nl.as_ref().expect("Expected semi_nl"); + + // Helpful parent-child map, such that the children are in the order that we expect to encounter them + // in the AST. + let parent_child: &[(&'a dyn Node, &'a dyn Node)] = &[ + (job_list, job_conjunction), + (job_conjunction, job_pipeline), + (job_pipeline, variable_assignment_list), + (job_pipeline, statement), + (statement, decorated_statement), + (decorated_statement, command), + (decorated_statement, args_or_redirs), + (job_pipeline, job_continuation), + (job_conjunction, job_conjunction_continuation), + (job_conjunction, semi_nl), + ]; + Self { + ast, + parent_child: Box::from(parent_child), + } + } + + // Expected nodes, in-order. + fn expected_nodes(&self) -> Vec<&'a dyn Node> { + let mut expected: Vec<&dyn Node> = vec![self.ast.top()]; + expected.extend(self.parent_child.iter().map(|&(_p, c)| c)); + expected + } + + // Helper function to construct the parent list of a given node, such at the first entry is + // the node itself, and the last entry is the root node. + fn get_parents<'s>(&'s self, node: &'a dyn Node) -> impl Iterator + 's { + let mut next = Some(node); + std::iter::from_fn(move || { + let out = next?; + next = self + .parent_child + .iter() + .find_map(|&(p, c)| is_same_node(c, out).then_some(p)); + Some(out) + }) + } +} + +#[test] +fn test_ast() { + // Light testing of the AST and traversals. + let ast = Ast::parse(TrueSemiAstTester::TRUE_SEMI, ParseTreeFlags::empty(), None); + let tester = TrueSemiAstTester::new(&ast); + assert!(ast.top().as_job_list().is_some(), "Expected job_list"); + + // Walk the AST and collect all nodes. + // See is_same_node comments for why we can't use assert_eq! here. + let found = ast.walk().collect::>(); + let expected = tester.expected_nodes(); + assert_eq!(found.len(), expected.len()); + for idx in 0..found.len() { + assert!(is_same_node(found[idx], expected[idx])); + } + + // Walk and check parents. + let mut traversal = ast.walk(); + while let Some(node) = traversal.next() { + let expected_parents = tester.get_parents(node).collect::>(); + let found_parents = traversal.parent_nodes().collect::>(); + assert_eq!(found_parents.len(), expected_parents.len()); + for idx in 0..found_parents.len() { + assert!(is_same_node(found_parents[idx], expected_parents[idx])); + } + } + + // Find the decorated statement. + let decorated_statement = ast + .walk() + .find(|n| n.typ() == ast::Type::decorated_statement) + .expect("Expected decorated statement"); + + // Test the skip feature. Don't descend into the decorated_statement. + let expected_skip: Vec<&dyn Node> = expected + .iter() + .copied() + .filter(|&n| { + // Discard nodes who have the decorated_statement as a parent, + // excepting the decorated_statement itself. + tester + .get_parents(n) + .skip(1) + .all(|p| !is_same_node(p, decorated_statement)) + }) + .collect(); + + let mut found = vec![]; + let mut traversal = ast.walk(); + while let Some(node) = traversal.next() { + if is_same_node(node, decorated_statement) { + traversal.skip_children(node); + } + found.push(node); + } + assert_eq!(found.len(), expected_skip.len()); + for idx in 0..found.len() { + assert!(is_same_node(found[idx], expected_skip[idx])); + } +} + +#[test] +#[should_panic] +fn test_traversal_skip_children_panics() { + // Test that we panic if we try to skip children of a node that is not the current node. + let ast = Ast::parse(L!("true;"), ParseTreeFlags::empty(), None); + let mut traversal = ast.walk(); + while let Some(node) = traversal.next() { + if node.typ() == ast::Type::decorated_statement { + // Should panic as we can only skip the current node. + traversal.skip_children(ast.top()); + } + } +} + +#[test] +#[should_panic] +fn test_traversal_parent_panics() { + // Can only get the parent of nodes still on the stack. + let ast = Ast::parse(L!("true;"), ParseTreeFlags::empty(), None); + let mut traversal = ast.walk(); + let mut decorated_statement = None; + while let Some(node) = traversal.next() { + if node.as_decorated_statement().is_some() { + decorated_statement = Some(node); + } else if node.as_token().map(|t| t.token_type()) == Some(ParseTokenType::end) { + // should panic as the decorated_statement is not on the stack. + let _ = traversal.parent(decorated_statement.unwrap()); + } + } +} From 996b34b5cb242d48f79c9cb5a3ad791fb4792218 Mon Sep 17 00:00:00 2001 From: Peter Ammon Date: Sun, 20 Apr 2025 11:04:33 -0700 Subject: [PATCH 13/19] ast: pretty-print to stop using ast parent pointers These depths are used in calculating indents for ast pretty-printing. This moves away from parent back-pointers, so that we can ultimately remove them. --- src/ast.rs | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 24ad99982..5c6aab5f5 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -2672,8 +2672,9 @@ pub fn errored(&self) -> bool { pub fn dump(&self, orig: &wstr) -> WString { let mut result = WString::new(); - for node in self.walk() { - let depth = get_depth(node); + let mut traversal = self.walk(); + while let Some(node) = traversal.next() { + let depth = traversal.parent_nodes().count() - 1; // dot-| padding result += &str::repeat("! ", depth)[..]; @@ -2720,17 +2721,6 @@ pub fn dump(&self, orig: &wstr) -> WString { } } -// Return the depth of a node, i.e. number of parent links. -fn get_depth(node: &dyn Node) -> usize { - let mut result = 0; - let mut cursor = node; - while let Some(parent) = cursor.parent() { - result += 1; - cursor = parent; - } - result -} - struct SourceRangeVisitor { /// Total range we have encountered. total: SourceRange, From 68a357be3d00b5520c7cdfeeda5828e30688b52e Mon Sep 17 00:00:00 2001 From: Peter Ammon Date: Sat, 5 Apr 2025 21:37:18 -0700 Subject: [PATCH 14/19] fish_indent to stop using ast parent pointers Reimplement fish_indent to discover parents through traversals, not ast parent pointers. --- src/builtins/fish_indent.rs | 130 ++++++++++++++++++++---------------- 1 file changed, 71 insertions(+), 59 deletions(-) diff --git a/src/builtins/fish_indent.rs b/src/builtins/fish_indent.rs index 511d9b1c2..acc6f8578 100644 --- a/src/builtins/fish_indent.rs +++ b/src/builtins/fish_indent.rs @@ -48,17 +48,20 @@ /// certain runs, weight line breaks, have a cost model, etc. struct PrettyPrinter<'source, 'ast> { /// The parsed ast. - ast: Ast, + ast: &'ast Ast, state: PrettyPrinterState<'source, 'ast>, } struct PrettyPrinterState<'source, 'ast> { - /// Original source. + // Original source. source: &'source wstr, - /// The indents of our string. - /// This has the same length as 'source' and describes the indentation level. + // The traversal of the ast. + traversal: Traversal<'ast>, + + // The indents of our string. + // This has the same length as 'source' and describes the indentation level. indents: Vec, /// The prettifier output. @@ -159,11 +162,13 @@ struct GapFlags { } impl<'source, 'ast> PrettyPrinter<'source, 'ast> { - fn new(source: &'source wstr, do_indent: bool) -> Self { + fn new(source: &'source wstr, ast: &'ast Ast, do_indent: bool) -> Self { + let traversal = Traversal::new(ast.top()); let mut zelf = Self { - ast: Ast::parse(source, parse_flags(), None), + ast, state: PrettyPrinterState { source, + traversal, indents: if do_indent /* Whether to indent, or just insert spaces. */ { @@ -189,10 +194,10 @@ fn new(source: &'source wstr, do_indent: bool) -> Self { } // Entry point. Prettify our source code and return it. - fn prettify(&'ast mut self) -> WString { + fn prettify(&mut self) -> WString { self.state.output.clear(); self.state.errors = Some(&self.ast.extras.errors); - self.state.visit(self.ast.top()); + self.state.prettify_traversal(); // Trailing gap text. self.state.emit_gap_text_before( @@ -394,13 +399,13 @@ fn gap_text_flags_before_node(&self, node: &dyn Node) -> GapFlags { ParseTokenType::string => { // Allow escaped newlines before commands that follow a variable assignment // since both can be long (#7955). - let p = node.parent().unwrap(); + let p = self.traversal.parent(node); if p.typ() != Type::decorated_statement { return result; } - let p = p.parent().unwrap(); + let p = self.traversal.parent(p); assert_eq!(p.typ(), Type::statement); - let p = p.parent().unwrap(); + let p = self.traversal.parent(p); if let Some(job) = p.as_job_pipeline() { if !job.variables.is_empty() { result.allow_escaped_newlines = true; @@ -726,8 +731,8 @@ fn visit_semi_nl(&mut self, node: &dyn ast::Token) { } fn is_multi_line_brace(&self, node: &dyn ast::Token) -> bool { - node.parent() - .unwrap() + self.traversal + .parent(node.as_node()) .as_brace_statement() .is_some_and(|brace_statement| { self.multi_line_brace_statement_locations @@ -804,11 +809,61 @@ fn visit_maybe_newlines(&mut self, node: &ast::MaybeNewlines) { self.emit_gap_text(gap_range, flags); } - fn visit_begin_header(&mut self) { + fn visit_begin_header(&mut self, node: &ast::BeginHeader) { + self.emit_node_text(&node.kw_begin); + if let Some(semi_nl) = &node.semi_nl { + self.visit_semi_nl(semi_nl); + } + // 'begin' does not require a newline after it, but we insert one. if !self.at_line_start() { self.emit_newline(); } } + + // Prettify our ast traversal, populating the output. + fn prettify_traversal(&mut self) { + while let Some(node) = self.traversal.next() { + // Leaf nodes we just visit their text. + if node.as_keyword().is_some() { + self.emit_node_text(node); + continue; + } + if let Some(token) = node.as_token() { + match token.token_type() { + ParseTokenType::end => self.visit_semi_nl(token), + ParseTokenType::left_brace => self.visit_left_brace(token), + ParseTokenType::right_brace => self.visit_right_brace(token), + _ => self.emit_node_text(node), + } + continue; + } + match node.typ() { + Type::argument | Type::variable_assignment => { + self.emit_node_text(node); + self.traversal.skip_children(node); + } + Type::redirection => { + self.visit_redirection(node.as_redirection().unwrap()); + self.traversal.skip_children(node); + } + Type::maybe_newlines => { + self.visit_maybe_newlines(node.as_maybe_newlines().unwrap()); + self.traversal.skip_children(node); + } + Type::begin_header => { + self.visit_begin_header(node.as_begin_header().unwrap()); + self.traversal.skip_children(node); + } + _ => { + // For branch and list nodes, default is to visit their children. + if [Category::branch, Category::list].contains(&node.category()) { + continue; + } + panic!("unexpected node type"); + } + } + } + } } // The flags we use to parse. @@ -819,50 +874,6 @@ fn parse_flags() -> ParseTreeFlags { | ParseTreeFlags::SHOW_BLANK_LINES } -impl<'source, 'ast> NodeVisitor<'_> for PrettyPrinterState<'source, 'ast> { - // Default implementation is to just visit children. - fn visit(&mut self, node: &'_ dyn Node) { - // Leaf nodes we just visit their text. - if node.as_keyword().is_some() { - self.emit_node_text(node); - return; - } - if let Some(token) = node.as_token() { - match token.token_type() { - ParseTokenType::end => self.visit_semi_nl(token), - ParseTokenType::left_brace => self.visit_left_brace(token), - ParseTokenType::right_brace => self.visit_right_brace(token), - _ => self.emit_node_text(node), - } - return; - } - match node.typ() { - Type::argument | Type::variable_assignment => { - self.emit_node_text(node); - } - Type::redirection => { - self.visit_redirection(node.as_redirection().unwrap()); - } - Type::maybe_newlines => { - self.visit_maybe_newlines(node.as_maybe_newlines().unwrap()); - } - Type::begin_header => { - // 'begin' does not require a newline after it, but we insert one. - node.accept(self, false); - self.visit_begin_header(); - } - _ => { - // For branch and list nodes, default is to visit their children. - if [Category::branch, Category::list].contains(&node.category()) { - node.accept(self, false); - return; - } - panic!("unexpected node type"); - } - } - } -} - /// Return whether a character at a given index is escaped. /// A character is escaped if it has an odd number of backslashes. fn char_is_escaped(text: &wstr, idx: usize) -> bool { @@ -1250,7 +1261,8 @@ fn prettify(streams: &mut IoStreams, src: &wstr, do_indent: bool) -> WString { metrics.visit(ast.top()); streams.err.appendln(format!("{}", metrics)); } - let mut printer = PrettyPrinter::new(src, do_indent); + let ast = Ast::parse(src, parse_flags(), None); + let mut printer = PrettyPrinter::new(src, &ast, do_indent); printer.prettify() } From 119716fbf26b3feb47627db488f6be2cac01a5aa Mon Sep 17 00:00:00 2001 From: Peter Ammon Date: Sat, 5 Apr 2025 18:43:01 -0700 Subject: [PATCH 15/19] extract_tokens::is_command to stop using ast parent pointers Simple change to remove a user of parent back-references. --- src/reader.rs | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/src/reader.rs b/src/reader.rs index 56906c1eb..1b7f321ad 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -42,7 +42,7 @@ use errno::{errno, Errno}; use crate::abbrs::abbrs_match; -use crate::ast::{self, is_same_node, Ast, Category, Traversal}; +use crate::ast::{is_same_node, Ast, Category}; use crate::builtins::shared::ErrorCode; use crate::builtins::shared::STATUS_CMD_ERROR; use crate::builtins::shared::STATUS_CMD_OK; @@ -5352,22 +5352,9 @@ fn extract_tokens(s: &wstr) -> Vec { | ParseTreeFlags::LEAVE_UNTERMINATED; let ast = Ast::parse(s, ast_flags, None); - // Helper to check if a node is the command portion of a decorated statement. - let is_command = |node: &dyn ast::Node| { - let mut cursor = Some(node); - while let Some(cur) = cursor { - if let Some(stmt) = cur.as_decorated_statement() { - if is_same_node(node, &stmt.command) { - return true; - } - } - cursor = cur.parent(); - } - false - }; - let mut result = vec![]; - for node in Traversal::new(ast.top()) { + let mut traversal = ast.walk(); + while let Some(node) = traversal.next() { // We are only interested in leaf nodes with source. if node.category() != Category::leaf { continue; @@ -5404,10 +5391,12 @@ fn extract_tokens(s: &wstr) -> Vec { if !has_cmd_subs { // Common case of no command substitutions in this leaf node. - result.push(PositionedToken { - range, - is_cmd: is_command(node), - }) + // Check if a node is the command portion of a decorated statement. + let is_cmd = traversal + .parent(node) + .as_decorated_statement() + .is_some_and(|stmt| is_same_node(node, &stmt.command)); + result.push(PositionedToken { range, is_cmd }) } } From 374b504eeb1f67a58cc295c04abc35e57203297b Mon Sep 17 00:00:00 2001 From: Peter Ammon Date: Sat, 12 Apr 2025 09:56:05 -0700 Subject: [PATCH 16/19] parse_util_detect_errors_in_ast to stop using ast parent pointers Continue to move away from parent pointers. --- src/parse_util.rs | 106 +++++++++++++++++++++++----------------------- 1 file changed, 52 insertions(+), 54 deletions(-) diff --git a/src/parse_util.rs b/src/parse_util.rs index 6a2938fef..d17adfe73 100644 --- a/src/parse_util.rs +++ b/src/parse_util.rs @@ -1,5 +1,7 @@ //! Various mostly unrelated utility functions related to parsing, loading and evaluating fish code. -use crate::ast::{self, is_same_node, Ast, Keyword, Leaf, List, Node, NodeVisitor, Token}; +use crate::ast::{ + self, is_same_node, Ast, Keyword, Leaf, List, Node, NodeVisitor, Token, Traversal, +}; use crate::builtins::shared::builtin_exists; use crate::common::{ escape_string, unescape_string, valid_var_name, valid_var_name_char, EscapeFlags, @@ -1185,7 +1187,8 @@ pub fn parse_util_detect_errors_in_ast( // Verify return only within a function. // Verify no variable expansions. - for node in ast::Traversal::new(ast.top()) { + let mut traversal = ast::Traversal::new(ast.top()); + while let Some(node) = traversal.next() { if let Some(jc) = node.as_job_continuation() { // Somewhat clumsy way of checking for a statement without source in a pipeline. // See if our pipe has source but our statement does not. @@ -1214,23 +1217,28 @@ pub fn parse_util_detect_errors_in_ast( // while foo & ; end // If it's not a background job, nothing to do. if job.bg.is_some() { - errored |= detect_errors_in_backgrounded_job(job, &mut out_errors); + errored |= detect_errors_in_backgrounded_job(&traversal, job, &mut out_errors); } } else if let Some(stmt) = node.as_decorated_statement() { - errored |= detect_errors_in_decorated_statement(buff_src, stmt, &mut out_errors); + errored |= + detect_errors_in_decorated_statement(buff_src, &traversal, stmt, &mut out_errors); } else if let Some(block) = node.as_block_statement() { // If our 'end' had no source, we are unsourced. if !block.end.has_source() { has_unclosed_block = true; } - errored |= - detect_errors_in_block_redirection_list(&block.args_or_redirs, &mut out_errors); + errored |= detect_errors_in_block_redirection_list( + node, + &block.args_or_redirs, + &mut out_errors, + ); } else if let Some(brace_statement) = node.as_brace_statement() { // If our closing brace had no source, we are unsourced. if !brace_statement.right_brace.has_source() { has_unclosed_block = true; } errored |= detect_errors_in_block_redirection_list( + node, &brace_statement.args_or_redirs, &mut out_errors, ); @@ -1240,14 +1248,17 @@ pub fn parse_util_detect_errors_in_ast( has_unclosed_block = true; } errored |= - detect_errors_in_block_redirection_list(&ifs.args_or_redirs, &mut out_errors); + detect_errors_in_block_redirection_list(node, &ifs.args_or_redirs, &mut out_errors); } else if let Some(switchs) = node.as_switch_statement() { // If our 'end' had no source, we are unsourced. if !switchs.end.has_source() { has_unclosed_block = true; } - errored |= - detect_errors_in_block_redirection_list(&switchs.args_or_redirs, &mut out_errors); + errored |= detect_errors_in_block_redirection_list( + node, + &switchs.args_or_redirs, + &mut out_errors, + ); } } @@ -1505,8 +1516,9 @@ fn detect_errors_in_job_conjunction( false } -/// Given that the job given by node should be backgrounded, return true if we detect any errors. +/// Given that the job should be backgrounded, return true if we detect any errors. fn detect_errors_in_backgrounded_job( + traversal: &Traversal, job: &ast::JobPipeline, parse_errors: &mut Option<&mut ParseErrorList>, ) -> bool { @@ -1520,20 +1532,19 @@ fn detect_errors_in_backgrounded_job( // foo & ; or bar // if foo & ; end // while foo & ; end - let Some(job_conj) = job.parent().unwrap().as_job_conjunction() else { + let Some(job_conj) = traversal.parent(job).as_job_conjunction() else { return false; }; - if job_conj.parent().unwrap().as_if_clause().is_some() - || job_conj.parent().unwrap().as_while_header().is_some() - { + let job_conj_parent = traversal.parent(job_conj); + if job_conj_parent.as_if_clause().is_some() || job_conj_parent.as_while_header().is_some() { errored = append_syntax_error!( parse_errors, source_range.start(), source_range.length(), BACKGROUND_IN_CONDITIONAL_ERROR_MSG ); - } else if let Some(jlist) = job_conj.parent().unwrap().as_job_list() { + } else if let Some(jlist) = job_conj_parent.as_job_list() { // This isn't very complete, e.g. we don't catch 'foo & ; not and bar'. // Find the index of ourselves in the job list. let index = jlist @@ -1567,9 +1578,10 @@ fn detect_errors_in_backgrounded_job( } /// Given a source buffer `buff_src` and decorated statement `dst` within it, return true if there -/// is an error and false if not. `storage` may be used to reduce allocations. +/// is an error and false if not. fn detect_errors_in_decorated_statement( buff_src: &wstr, + traversal: &ast::Traversal, dst: &ast::DecoratedStatement, parse_errors: &mut Option<&mut ParseErrorList>, ) -> bool { @@ -1586,17 +1598,13 @@ fn detect_errors_in_decorated_statement( } // Get the statement we are part of. - let st = dst.parent().unwrap().as_statement().unwrap(); + let st = traversal.parent(dst).as_statement().unwrap(); // Walk up to the job. - let mut job = None; - let mut cursor = dst.parent(); - while job.is_none() { - let c = cursor.expect("Reached root without finding a job"); - job = c.as_job_pipeline(); - cursor = c.parent(); - } - let job = job.expect("Should have found the job"); + let job = traversal + .parent_nodes() + .find_map(|n| n.as_job_pipeline()) + .expect("Should have found the job"); // Check our pipeline position. let pipe_pos = if job.continuation.is_empty() { @@ -1700,30 +1708,31 @@ fn detect_errors_in_decorated_statement( } // Check that we don't break or continue from outside a loop. - if !errored && [L!("break"), L!("continue")].contains(&&command[..]) && !first_arg_is_help { + if !errored && (command == "break" || command == "continue") && !first_arg_is_help { // Walk up until we hit a 'for' or 'while' loop. If we hit a function first, // stop the search; we can't break an outer loop from inside a function. // This is a little funny because we can't tell if it's a 'for' or 'while' // loop from the ancestor alone; we need the header. That is, we hit a // block_statement, and have to check its header. let mut found_loop = false; - let mut ancestor: Option<&dyn Node> = Some(dst); - while let Some(anc) = ancestor { - if let Some(block) = anc.as_block_statement() { - if [ast::Type::for_header, ast::Type::while_header] - .contains(&block.header.typ()) - { + for block in traversal + .parent_nodes() + .filter_map(|anc| anc.as_block_statement()) + { + match block.header.typ() { + ast::Type::for_header | ast::Type::while_header => { // This is a loop header, so we can break or continue. found_loop = true; break; - } else if block.header.typ() == ast::Type::function_header { + } + ast::Type::function_header => { // This is a function header, so we cannot break or // continue. We stop our search here. found_loop = false; break; } + _ => {} } - ancestor = anc.parent(); } if !found_loop { @@ -1779,34 +1788,23 @@ fn detect_errors_in_decorated_statement( errored } -// Given we have a trailing argument_or_redirection_list, like `begin; end > /dev/null`, verify that -// there are no arguments in the list. +// Given we have a trailing ArgumentOrRedirectionList, like `begin; end > /dev/null`, verify that +// there are no arguments in the list. The parent of the list is provided. fn detect_errors_in_block_redirection_list( + parent: &dyn Node, args_or_redirs: &ast::ArgumentOrRedirectionList, out_errors: &mut Option<&mut ParseErrorList>, ) -> bool { let Some(first_arg) = get_first_arg(args_or_redirs) else { return false; }; - if args_or_redirs - .parent() - .unwrap() - .as_brace_statement() - .is_some() - { - return append_syntax_error!( - out_errors, - first_arg.source_range().start(), - first_arg.source_range().length(), - RIGHT_BRACE_ARG_ERR_MSG - ); + let r = first_arg.source_range(); + if parent.as_brace_statement().is_some() { + append_syntax_error!(out_errors, r.start(), r.length(), RIGHT_BRACE_ARG_ERR_MSG); + } else { + append_syntax_error!(out_errors, r.start(), r.length(), END_ARG_ERR_MSG); } - append_syntax_error!( - out_errors, - first_arg.source_range().start(), - first_arg.source_range().length(), - END_ARG_ERR_MSG - ) + true } /// Given a string containing a variable expansion error, append an appropriate error to the errors From a528567d5c0919b245d12d4761acc0eb134e1681 Mon Sep 17 00:00:00 2001 From: Peter Ammon Date: Sat, 19 Apr 2025 18:16:26 -0700 Subject: [PATCH 17/19] IndentVisitor to stop using parent pointers We can just store these ourselves; don't have to be baked into the AST. --- src/parse_util.rs | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/parse_util.rs b/src/parse_util.rs index d17adfe73..eb3ec9b6d 100644 --- a/src/parse_util.rs +++ b/src/parse_util.rs @@ -827,7 +827,10 @@ pub fn apply_indents(src: &wstr, indents: &[i32]) -> WString { // Visit all of our nodes. When we get a job_list or case_item_list, increment indent while // visiting its children. struct IndentVisitor<'a> { - // companion: Pin<&'a mut indent_visitor_t>, + // The parent node of the node we are currently visiting, or None if we are the root. + parent: Option<&'a dyn ast::Node>, + + // companion: Pin<&'a mut IndentVisitor>, // The one-past-the-last index of the most recently encountered leaf node. // We use this to populate the indents even if there's no tokens in the range. last_leaf_end: usize, @@ -851,9 +854,11 @@ struct IndentVisitor<'a> { // List of locations of escaped newline characters. line_continuations: Vec, } + impl<'a> IndentVisitor<'a> { fn new(src: &'a wstr, indents: &'a mut Vec, initial_indent: i32) -> Self { Self { + parent: None, last_leaf_end: 0, last_indent: initial_indent - 1, unclosed: false, @@ -974,6 +979,7 @@ fn indent_string_part(&mut self, range: Range, is_double_quoted: bool) { } } } + impl<'a> NodeVisitor<'a> for IndentVisitor<'a> { // Default implementation is to just visit children. fn visit(&mut self, node: &'a dyn Node) { @@ -989,13 +995,14 @@ fn visit(&mut self, node: &'a dyn Node) { // Increment indents for conditions in headers (#1665). Type::job_conjunction => { - if [Type::while_header, Type::if_clause].contains(&node.parent().unwrap().typ()) { + let typ = self.parent.unwrap().typ(); + if matches!(typ, Type::if_clause | Type::while_header) { inc = 1; dec = 1; } } - // Increment indents for job_continuation_t if it contains a newline. + // Increment indents for JobContinuation if it contains a newline. // This is a bit of a hack - it indents cases like: // cmd1 | // ....cmd2 @@ -1038,12 +1045,12 @@ fn visit(&mut self, node: &'a dyn Node) { // To address this, if we see that the switch statement was not closed, do not // decrement the indent afterwards. inc = 1; - let switchs = node.parent().unwrap().as_switch_statement().unwrap(); + let switchs = self.parent.unwrap().as_switch_statement().unwrap(); dec = if switchs.end.has_source() { 1 } else { 0 }; } Type::token_base => { let token_type = node.as_token().unwrap().token_type(); - let parent_type = node.parent().unwrap().typ(); + let parent_type = self.parent.unwrap().typ(); if parent_type == Type::begin_header && token_type == ParseTokenType::end { // The newline after "begin" is optional, so it is part of the header. // The header is not in the indented block, so indent the newline here. @@ -1093,7 +1100,9 @@ fn visit(&mut self, node: &'a dyn Node) { self.last_indent = self.indent; } + let saved = self.parent.replace(node); node.accept(self, false); + self.parent = saved; self.indent -= dec; } } From ff87e2cf0af9d394cf31ed50f53530b6ef4e4a12 Mon Sep 17 00:00:00 2001 From: Peter Ammon Date: Sat, 5 Apr 2025 14:46:41 -0700 Subject: [PATCH 18/19] ast: remove parent pointers This removes parent back-pointers from ast nodes. Nodes no longer store references to their parents, resulting in memory size reductions for the ast (__fish_complete_gpg.fish goes from 508 KB to 198 KB in memory usage). --- src/ast.rs | 175 ----------------------------------------------------- 1 file changed, 175 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 5c6aab5f5..0b46b15d4 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -98,9 +98,6 @@ fn accept_mut(&mut self, visitor: &mut dyn NodeVisitorMut, reversed: bool) { /// Node is the base trait of all AST nodes. pub trait Node: Acceptor + ConcreteNode + std::fmt::Debug { - /// The parent node, or None if this is root. - fn parent(&self) -> Option<&dyn Node>; - /// The type of this node. fn typ(&self) -> Type; @@ -509,9 +506,6 @@ impl Node for $name { fn typ(&self) -> Type { Type::$type } - fn parent(&self) -> Option<&dyn Node> { - self.parent.map(|p| unsafe { &*p }) - } fn category(&self) -> Category { Category::$category } @@ -557,10 +551,6 @@ fn accept_mut(&mut self, visitor: &mut dyn NodeVisitorMut, reversed: bool) { visitor.did_visit_fields_of(self, VisitResult::Continue(())); } } - impl $name { - /// Set the parent fields of all nodes in the tree rooted at `self`. - fn set_parents(&mut self) {} - } }; } @@ -569,7 +559,6 @@ macro_rules! define_keyword_node { ( $name:ident, $($allowed:ident),* $(,)? ) => { #[derive(Default, Debug)] pub struct $name { - parent: Option<*const dyn Node>, range: Option, keyword: ParseKeyword, } @@ -610,7 +599,6 @@ macro_rules! define_token_node { ( $name:ident, $($allowed:ident),* $(,)? ) => { #[derive(Default, Debug)] pub struct $name { - parent: Option<*const dyn Node>, range: Option, parse_token_type: ParseTokenType, } @@ -664,7 +652,6 @@ macro_rules! define_list_node { ) => { #[derive(Default, Debug)] pub struct $name { - parent: Option<*const dyn Node>, list_contents: Box<[$contents]>, } implement_node!($name, list, $type); @@ -712,15 +699,6 @@ fn accept_mut(&mut self, visitor: &mut dyn NodeVisitorMut, reversed: bool) { visitor.did_visit_fields_of(self, flow); } } - impl $name { - /// Set the parent fields of all nodes in the tree rooted at `self`. - fn set_parents(&mut self) { - for i in 0..self.count() { - self[i].parent = Some(self); - self[i].set_parents(); - } - } - } }; } @@ -811,14 +789,6 @@ fn accept_mut(&mut self, visitor: &mut dyn NodeVisitorMut, reversed: bool) { visitor.did_visit_fields_of(self, flow); } } - impl $name { - /// Set the parent fields of all nodes in the tree rooted at `self`. - fn set_parents(&mut self) { - $( - set_parent_of_field!(self, $field_name, $field_type); - )* - } - } } } @@ -1017,108 +987,10 @@ macro_rules! visit_result { }; } -macro_rules! set_parent_of_field { - ( - $self:ident, - $field_name:ident, - (variant<$field_type:ident>) - ) => { - set_parent_of_union_field!($self, $field_name, $field_type); - }; - ( - $self:ident, - $field_name:ident, - (Option<$field_type:ident>) - ) => { - if $self.$field_name.is_some() { - $self.$field_name.as_mut().unwrap().parent = Some($self); - $self.$field_name.as_mut().unwrap().set_parents(); - } - }; - ( - $self:ident, - $field_name:ident, - $field_type:tt - ) => { - $self.$field_name.parent = Some($self); - $self.$field_name.set_parents(); - }; -} - -macro_rules! set_parent_of_union_field { - ( - $self:ident, - $field_name:ident, - ArgumentOrRedirectionVariant - ) => { - if matches!($self.$field_name, ArgumentOrRedirectionVariant::Argument(_)) { - $self.$field_name.as_mut_argument().parent = Some($self); - $self.$field_name.as_mut_argument().set_parents(); - } else { - $self.$field_name.as_mut_redirection().parent = Some($self); - $self.$field_name.as_mut_redirection().set_parents(); - } - }; - ( - $self:ident, - $field_name:ident, - StatementVariant - ) => { - if matches!($self.$field_name, StatementVariant::NotStatement(_)) { - $self.$field_name.as_mut_not_statement().parent = Some($self); - $self.$field_name.as_mut_not_statement().set_parents(); - } else if matches!($self.$field_name, StatementVariant::BlockStatement(_)) { - $self.$field_name.as_mut_block_statement().parent = Some($self); - $self.$field_name.as_mut_block_statement().set_parents(); - } else if matches!($self.$field_name, StatementVariant::BraceStatement(_)) { - $self.$field_name.as_mut_brace_statement().parent = Some($self); - $self.$field_name.as_mut_brace_statement().set_parents(); - } else if matches!($self.$field_name, StatementVariant::IfStatement(_)) { - $self.$field_name.as_mut_if_statement().parent = Some($self); - $self.$field_name.as_mut_if_statement().set_parents(); - } else if matches!($self.$field_name, StatementVariant::SwitchStatement(_)) { - $self.$field_name.as_mut_switch_statement().parent = Some($self); - $self.$field_name.as_mut_switch_statement().set_parents(); - } else if matches!($self.$field_name, StatementVariant::DecoratedStatement(_)) { - $self.$field_name.as_mut_decorated_statement().parent = Some($self); - $self.$field_name.as_mut_decorated_statement().set_parents(); - } - }; - ( - $self:ident, - $field_name:ident, - BlockStatementHeaderVariant - ) => { - if matches!($self.$field_name, BlockStatementHeaderVariant::ForHeader(_)) { - $self.$field_name.as_mut_for_header().parent = Some($self); - $self.$field_name.as_mut_for_header().set_parents(); - } else if matches!( - $self.$field_name, - BlockStatementHeaderVariant::WhileHeader(_) - ) { - $self.$field_name.as_mut_while_header().parent = Some($self); - $self.$field_name.as_mut_while_header().set_parents(); - } else if matches!( - $self.$field_name, - BlockStatementHeaderVariant::FunctionHeader(_) - ) { - $self.$field_name.as_mut_function_header().parent = Some($self); - $self.$field_name.as_mut_function_header().set_parents(); - } else if matches!( - $self.$field_name, - BlockStatementHeaderVariant::BeginHeader(_) - ) { - $self.$field_name.as_mut_begin_header().parent = Some($self); - $self.$field_name.as_mut_begin_header().set_parents(); - } - }; -} - /// A redirection has an operator like > or 2>, and a target like /dev/null or &1. /// Note that pipes are not redirections. #[derive(Default, Debug)] pub struct Redirection { - parent: Option<*const dyn Node>, pub oper: TokenRedirection, pub target: String_, } @@ -1159,7 +1031,6 @@ fn as_mut_variable_assignment_list(&mut self) -> Option<&mut VariableAssignmentL /// An argument or redirection holds either an argument or redirection. #[derive(Default, Debug)] pub struct ArgumentOrRedirection { - parent: Option<*const dyn Node>, pub contents: ArgumentOrRedirectionVariant, } implement_node!(ArgumentOrRedirection, branch, argument_or_redirection); @@ -1203,7 +1074,6 @@ fn as_mut_argument_or_redirection_list(&mut self) -> Option<&mut ArgumentOrRedir /// A statement is a normal command, or an if / while / etc #[derive(Default, Debug)] pub struct Statement { - parent: Option<*const dyn Node>, pub contents: StatementVariant, } implement_node!(Statement, branch, statement); @@ -1223,7 +1093,6 @@ fn as_mut_statement(&mut self) -> Option<&mut Statement> { /// like if statements, where we require a command). #[derive(Default, Debug)] pub struct JobPipeline { - parent: Option<*const dyn Node>, /// Maybe the time keyword. pub time: Option, /// A (possibly empty) list of variable assignments. @@ -1258,7 +1127,6 @@ fn as_mut_job_pipeline(&mut self) -> Option<&mut JobPipeline> { /// A job_conjunction is a job followed by a && or || continuations. #[derive(Default, Debug)] pub struct JobConjunction { - parent: Option<*const dyn Node>, /// The job conjunction decorator. pub decorator: Option, /// The job itself. @@ -1303,7 +1171,6 @@ fn can_be_parsed(pop: &mut Populator<'_>) -> bool { #[derive(Default, Debug)] pub struct ForHeader { - parent: Option<*const dyn Node>, /// 'for' pub kw_for: KeywordFor, /// var_name @@ -1337,7 +1204,6 @@ fn as_mut_for_header(&mut self) -> Option<&mut ForHeader> { #[derive(Default, Debug)] pub struct WhileHeader { - parent: Option<*const dyn Node>, /// 'while' pub kw_while: KeywordWhile, pub condition: JobConjunction, @@ -1363,7 +1229,6 @@ fn as_mut_while_header(&mut self) -> Option<&mut WhileHeader> { #[derive(Default, Debug)] pub struct FunctionHeader { - parent: Option<*const dyn Node>, pub kw_function: KeywordFunction, /// functions require at least one argument. pub first_arg: Argument, @@ -1391,7 +1256,6 @@ fn as_mut_function_header(&mut self) -> Option<&mut FunctionHeader> { #[derive(Default, Debug)] pub struct BeginHeader { - parent: Option<*const dyn Node>, pub kw_begin: KeywordBegin, /// Note that 'begin' does NOT require a semi or nl afterwards. /// This is valid: begin echo hi; end @@ -1416,7 +1280,6 @@ fn as_mut_begin_header(&mut self) -> Option<&mut BeginHeader> { #[derive(Default, Debug)] pub struct BlockStatement { - parent: Option<*const dyn Node>, /// A header like for, while, etc. pub header: BlockStatementHeaderVariant, /// List of jobs in this block. @@ -1447,7 +1310,6 @@ fn as_mut_block_statement(&mut self) -> Option<&mut BlockStatement> { #[derive(Default, Debug)] pub struct BraceStatement { - parent: Option<*const dyn Node>, /// The opening brace, in command position. pub left_brace: TokenLeftBrace, /// List of jobs in this block. @@ -1478,7 +1340,6 @@ fn as_mut_brace_statement(&mut self) -> Option<&mut BraceStatement> { #[derive(Default, Debug)] pub struct IfClause { - parent: Option<*const dyn Node>, /// The 'if' keyword. pub kw_if: KeywordIf, /// The 'if' condition. @@ -1509,7 +1370,6 @@ fn as_mut_if_clause(&mut self) -> Option<&mut IfClause> { #[derive(Default, Debug)] pub struct ElseifClause { - parent: Option<*const dyn Node>, /// The 'else' keyword. pub kw_else: KeywordElse, /// The 'if' clause following it. @@ -1552,7 +1412,6 @@ fn as_mut_elseif_clause_list(&mut self) -> Option<&mut ElseifClauseList> { #[derive(Default, Debug)] pub struct ElseClause { - parent: Option<*const dyn Node>, /// else ; body pub kw_else: KeywordElse, pub semi_nl: Option, @@ -1583,7 +1442,6 @@ fn can_be_parsed(pop: &mut Populator<'_>) -> bool { #[derive(Default, Debug)] pub struct IfStatement { - parent: Option<*const dyn Node>, /// if part pub if_clause: IfClause, /// else if list @@ -1617,7 +1475,6 @@ fn as_mut_if_statement(&mut self) -> Option<&mut IfStatement> { #[derive(Default, Debug)] pub struct CaseItem { - parent: Option<*const dyn Node>, /// case \ ; body pub kw_case: KeywordCase, pub arguments: ArgumentList, @@ -1650,7 +1507,6 @@ fn can_be_parsed(pop: &mut Populator<'_>) -> bool { #[derive(Default, Debug)] pub struct SwitchStatement { - parent: Option<*const dyn Node>, /// switch \ ; body ; end args_redirs pub kw_switch: KeywordSwitch, pub argument: Argument, @@ -1684,7 +1540,6 @@ fn as_mut_switch_statement(&mut self) -> Option<&mut SwitchStatement> { /// "builtin" or "command" or "exec" #[derive(Default, Debug)] pub struct DecoratedStatement { - parent: Option<*const dyn Node>, /// An optional decoration (command, builtin, exec, etc). pub opt_decoration: Option, /// Command to run. @@ -1713,7 +1568,6 @@ fn as_mut_decorated_statement(&mut self) -> Option<&mut DecoratedStatement> { /// A not statement like `not true` or `! true` #[derive(Default, Debug)] pub struct NotStatement { - parent: Option<*const dyn Node>, /// Keyword, either not or exclam. pub kw: KeywordNot, pub time: Option, @@ -1741,7 +1595,6 @@ fn as_mut_not_statement(&mut self) -> Option<&mut NotStatement> { #[derive(Default, Debug)] pub struct JobContinuation { - parent: Option<*const dyn Node>, pub pipe: TokenPipe, pub newlines: MaybeNewlines, pub variables: VariableAssignmentList, @@ -1785,7 +1638,6 @@ fn as_mut_job_continuation_list(&mut self) -> Option<&mut JobContinuationList> { #[derive(Default, Debug)] pub struct JobConjunctionContinuation { - parent: Option<*const dyn Node>, /// The && or || token. pub conjunction: TokenConjunction, pub newlines: MaybeNewlines, @@ -1825,7 +1677,6 @@ fn can_be_parsed(pop: &mut Populator<'_>) -> bool { /// instances of this. #[derive(Default, Debug)] pub struct AndorJob { - parent: Option<*const dyn Node>, pub job: JobConjunction, } implement_node!(AndorJob, branch, andor_job); @@ -1873,7 +1724,6 @@ fn as_mut_andor_job_list(&mut self) -> Option<&mut AndorJobList> { /// In practice the tok_ends are ignored by fish code so we do not bother to store them. #[derive(Default, Debug)] pub struct FreestandingArgumentList { - parent: Option<*const dyn Node>, pub arguments: ArgumentList, } implement_node!(FreestandingArgumentList, branch, freestanding_argument_list); @@ -1947,7 +1797,6 @@ fn as_mut_case_item_list(&mut self) -> Option<&mut CaseItemList> { /// A variable_assignment contains a source range like FOO=bar. #[derive(Default, Debug)] pub struct VariableAssignment { - parent: Option<*const dyn Node>, range: Option, } implement_node!(VariableAssignment, leaf, variable_assignment); @@ -1988,7 +1837,6 @@ fn can_be_parsed(pop: &mut Populator<'_>) -> bool { /// Zero or more newlines. #[derive(Default, Debug)] pub struct MaybeNewlines { - parent: Option<*const dyn Node>, range: Option, } implement_node!(MaybeNewlines, leaf, maybe_newlines); @@ -2014,7 +1862,6 @@ fn as_mut_maybe_newlines(&mut self) -> Option<&mut MaybeNewlines> { /// This is a separate type because it is sometimes useful to find all arguments. #[derive(Default, Debug)] pub struct Argument { - parent: Option<*const dyn Node>, range: Option, } implement_node!(Argument, leaf, argument); @@ -2659,9 +2506,6 @@ pub fn walk(&'_ self) -> Traversal<'_> { pub fn top(&self) -> &dyn Node { self.top.as_node() } - fn top_mut(&mut self) -> &mut dyn NodeMut { - &mut *self.top - } /// Return whether any errors were encountered during parsing. pub fn errored(&self) -> bool { self.any_error @@ -4104,25 +3948,6 @@ fn parse_from_top( semis: pops.semis, errors: pops.errors, }; - - if top_type == Type::job_list { - // Set all parent nodes. - // It turns out to be more convenient to do this after the parse phase. - // Note: parent nodes are implemented as raw pointers! This means that the contents Ast must not - // change or move after construction. - ast.top_mut() - .as_mut_job_list() - .as_mut() - .unwrap() - .set_parents(); - } else { - ast.top_mut() - .as_mut_freestanding_argument_list() - .as_mut() - .unwrap() - .set_parents(); - } - ast } From e6bc8ffa137f52cef0d683fc2fec2013453c2abf Mon Sep 17 00:00:00 2001 From: Peter Ammon Date: Sun, 20 Apr 2025 10:47:24 -0700 Subject: [PATCH 19/19] ast: remove dead code Having removed parent pointers, a bunch of existing ast "as_" functions have become unused; remove them. --- src/ast.rs | 72 ------------------------------------------------------ 1 file changed, 72 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 0b46b15d4..2a57de0b2 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -2012,18 +2012,6 @@ fn embedded_node(&self) -> &dyn NodeMut { ArgumentOrRedirectionVariant::Redirection(node) => node, } } - fn as_mut_argument(&mut self) -> &mut Argument { - match self { - ArgumentOrRedirectionVariant::Argument(node) => node, - _ => panic!(), - } - } - fn as_mut_redirection(&mut self) -> &mut Redirection { - match self { - ArgumentOrRedirectionVariant::Redirection(redirection) => redirection, - _ => panic!(), - } - } } impl ArgumentOrRedirection { @@ -2134,30 +2122,6 @@ fn embedded_node(&self) -> &dyn NodeMut { BlockStatementHeaderVariant::BeginHeader(node) => node, } } - fn as_mut_for_header(&mut self) -> &mut ForHeader { - match self { - BlockStatementHeaderVariant::ForHeader(node) => node, - _ => panic!(), - } - } - fn as_mut_while_header(&mut self) -> &mut WhileHeader { - match self { - BlockStatementHeaderVariant::WhileHeader(node) => node, - _ => panic!(), - } - } - fn as_mut_function_header(&mut self) -> &mut FunctionHeader { - match self { - BlockStatementHeaderVariant::FunctionHeader(node) => node, - _ => panic!(), - } - } - fn as_mut_begin_header(&mut self) -> &mut BeginHeader { - match self { - BlockStatementHeaderVariant::BeginHeader(node) => node, - _ => panic!(), - } - } } #[derive(Debug)] @@ -2260,42 +2224,6 @@ fn embedded_node(&self) -> &dyn NodeMut { StatementVariant::DecoratedStatement(node) => node, } } - fn as_mut_not_statement(&mut self) -> &mut NotStatement { - match self { - StatementVariant::NotStatement(node) => node, - _ => panic!(), - } - } - fn as_mut_block_statement(&mut self) -> &mut BlockStatement { - match self { - StatementVariant::BlockStatement(node) => node, - _ => panic!(), - } - } - fn as_mut_brace_statement(&mut self) -> &mut BraceStatement { - match self { - StatementVariant::BraceStatement(node) => node, - _ => panic!(), - } - } - fn as_mut_if_statement(&mut self) -> &mut IfStatement { - match self { - StatementVariant::IfStatement(node) => node, - _ => panic!(), - } - } - fn as_mut_switch_statement(&mut self) -> &mut SwitchStatement { - match self { - StatementVariant::SwitchStatement(node) => node, - _ => panic!(), - } - } - fn as_mut_decorated_statement(&mut self) -> &mut DecoratedStatement { - match self { - StatementVariant::DecoratedStatement(node) => node, - _ => panic!(), - } - } } /// Return a string literal name for an ast type.