diff --git a/src/function.rs b/src/function.rs index da25d529b..a2598be65 100644 --- a/src/function.rs +++ b/src/function.rs @@ -385,7 +385,6 @@ pub fn copy_definition_file(&self) -> Option<&wstr> { pub fn definition_lineno(&self) -> i32 { // Return one plus the number of newlines at offsets less than the start of our function's // statement (which includes the header). - // TODO: merge with line_offset_of_character_at_offset? let Some(source_range) = self.func_node.try_source_range() else { panic!("Function has no source range"); }; diff --git a/src/parse_tree.rs b/src/parse_tree.rs index 27f7ad6f1..38f063baa 100644 --- a/src/parse_tree.rs +++ b/src/parse_tree.rs @@ -110,6 +110,38 @@ unsafe impl Sync for ParsedSource {} const _: () = assert_send::(); const _: () = assert_sync::(); +/// Cache for efficient line number computation when processing multiple nodes. +/// Caches the last source's offset and newline count. +#[derive(Default)] +pub struct SourceLineCache { + last_source: *const ParsedSource, // Pointer to last source used + offset: usize, // Exclusive offset of the number of newlines counted + count: usize, // Count of newlines up to offset +} + +impl ParsedSource { + /// Compute the 1-based line number for a given offset, using and updating the cache. + pub fn lineno_for_offset(&self, offset: usize, cache: &mut SourceLineCache) -> u32 { + let self_ptr = std::ptr::from_ref(self); + + // If source changed, reset cache. + if cache.last_source != self_ptr { + cache.last_source = self_ptr; + cache.offset = 0; + cache.count = 0; + } + + if offset > cache.offset { + cache.count += count_newlines(&self.src[cache.offset..offset]); + } else if offset < cache.offset { + cache.count -= count_newlines(&self.src[offset..cache.offset]); + } + cache.offset = offset; + + cache.count as u32 + 1 + } +} + impl ParsedSource { pub fn new(src: WString, ast: Ast) -> Self { ParsedSource { src, ast } @@ -120,8 +152,6 @@ pub fn line_counter(self: &Arc) -> LineCounter { LineCounter { parsed_source: Pin::new(Arc::clone(self)), node: std::ptr::null(), - cached_offset: 0, - cached_count: 0, } } @@ -190,6 +220,18 @@ pub fn parsed_source(&self) -> &ParsedSource { pub fn parsed_source_ref(&self) -> ParsedSourceRef { Pin::into_inner(self.parsed_source.clone()) } + + /// Return the 1-based line number of this node, or None if unsourced. + pub fn lineno(&self) -> Option { + self.lineno_with_cache(&mut SourceLineCache::default()) + } + + /// Return the 1-based line number of this node using a cache, or None if unsourced. + pub fn lineno_with_cache(&self, cache: &mut SourceLineCache) -> Option { + let range = self.try_source_range()?; + let lineno = self.parsed_source.lineno_for_offset(range.start(), cache); + Some(std::num::NonZeroU32::new(lineno).unwrap()) + } } // Safety: NodeRef is Send and Sync because it's just a pointer into a parse tree, which is pinned. @@ -221,10 +263,6 @@ pub struct LineCounter { /// The node itself. This points into the parsed source, or it may be null. pub node: *const NodeType, - - // Cached line number information: the line number of the start of the node, and the number of newlines. - cached_offset: usize, - cached_count: usize, } impl LineCounter { @@ -235,42 +273,9 @@ pub fn empty() -> Self { LineCounter { parsed_source, node: std::ptr::null(), - cached_offset: 0, - cached_count: 0, } } - // Count the number of newlines, leveraging our cache. - pub fn line_offset_of_character_at_offset(&mut self, offset: usize) -> usize { - let src = &self.parsed_source.src; - assert!(offset <= src.len()); - - // Easy hack to handle 0. - if offset == 0 { - return 0; - } - - // We want to return the number of newlines at offsets less than the given offset. - #[allow(clippy::comparison_chain)] // TODO(MSRV>=1.90) for old clippy - if offset > self.cached_offset { - // Add one for every newline we find in the range [cached_offset, offset). - // The codegen is substantially better when using a char slice than the char iterator. - self.cached_count += count_newlines(&src[self.cached_offset..offset]); - } else if offset < self.cached_offset { - // Subtract one for every newline we find in the range [offset, cached_range.start). - self.cached_count -= count_newlines(&src[offset..self.cached_offset]); - } - self.cached_offset = offset; - self.cached_count - } - - // Returns the 0-based line number of the node. - pub fn line_offset_of_node(&mut self) -> Option { - let src_offset = self.source_offset_of_node()?; - // Safe to cast/truncate to u32 since SourceRange stores it as a u32 internally - Some(self.line_offset_of_character_at_offset(src_offset) as u32) - } - // Return the 0 based character offset of the node. pub fn source_offset_of_node(&mut self) -> Option { // Safety: any node is valid for the lifetime of the source. @@ -283,4 +288,15 @@ pub fn source_offset_of_node(&mut self) -> Option { pub fn get_source(&self) -> &wstr { &self.parsed_source.src } + + /// Return a NodeRef for the current node, if any. + pub fn node_ref(&self) -> Option> { + if self.node.is_null() { + return None; + } + Some(NodeRef::new( + Pin::into_inner(self.parsed_source.clone()), + self.node, + )) + } } diff --git a/src/parser.rs b/src/parser.rs index c2a30adca..ff0bddf2e 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -26,8 +26,7 @@ SOURCE_LOCATION_UNKNOWN, }; use crate::parse_execution::{EndExecutionReason, ExecutionContext}; -use crate::parse_tree::NodeRef; -use crate::parse_tree::{LineCounter, ParsedSourceRef, parse_source}; +use crate::parse_tree::{LineCounter, NodeRef, ParsedSourceRef, SourceLineCache, parse_source}; use crate::portable_atomic::AtomicU64; use crate::prelude::*; use crate::proc::{JobGroupRef, JobList, JobRef, Pid, ProcStatus, job_reap}; @@ -81,8 +80,8 @@ pub struct Block { /// Name of the file that created this block pub src_filename: Option>, - /// Line number where this block was created, starting from 1. - pub src_lineno: Option, + /// The node containing this block, for lazy line number computation. + src_node: Option>, } impl Block { @@ -95,6 +94,11 @@ pub fn data(&self) -> Option<&BlockData> { pub fn wants_pop_env(&self) -> bool { self.typ() != BlockType::top } + + /// Return the 1-based line number of this block, using a cache. + pub fn src_lineno(&self, cache: &mut SourceLineCache) -> Option { + self.src_node.as_ref()?.lineno_with_cache(cache) + } } impl Block { @@ -124,7 +128,7 @@ pub fn description(&self) -> WString { } .to_owned(); - if let Some(src_lineno) = self.src_lineno { + if let Some(src_lineno) = self.src_lineno(&mut SourceLineCache::default()) { result.push_utfstr(&sprintf!(" (line %d)", src_lineno.get())); } if let Some(src_filename) = &self.src_filename { @@ -821,16 +825,21 @@ pub fn current_line(&self) -> WString { /// Returns the current line number, indexed from 1. pub fn get_lineno(&self) -> Option { - // The offset is 0 based; the number is 1 based. self.line_counter - .borrow_mut() - .line_offset_of_node() - .map(|offset| NonZeroU32::new(offset.saturating_add(1)).unwrap()) + .borrow() + .node_ref() + .and_then(|n| n.lineno()) } /// Returns the current line number, indexed from 1, or zero if not sourced. pub fn get_lineno_for_display(&self) -> u32 { - self.get_lineno().map_or(0, |val| val.get()) + self.get_lineno().map_or(0, |n| n.get()) + } + + /// Returns a NodeRef to the current node being executed, if any. + /// This can be used for lazy line number computation. + pub fn current_node_ref(&self) -> Option> { + self.line_counter.borrow().node_ref() } /// Return whether we are currently evaluating a "block" such as an if statement. @@ -1021,8 +1030,8 @@ pub fn sync_uvars_and_fire(&self, always: bool) { /// Pushes a new block. Returns an id (index) of the block, which is stored in the parser. pub fn push_block(&self, mut block: Block) -> BlockId { - block.src_lineno = self.get_lineno(); block.src_filename = self.current_filename(); + block.src_node = self.line_counter.borrow().node_ref(); if block.typ() != BlockType::top { let new_scope = block.typ() == BlockType::function_call { shadows: true }; self.vars().push(new_scope); @@ -1212,7 +1221,7 @@ pub fn current_filename(&self) -> Option { Some(BlockData::Function { name, .. }) => { function::get_props(name).and_then(|props| props.definition_file.clone()) } - Some(BlockData::Source { file }) => Some(file.clone()), + Some(BlockData::Source { file, .. }) => Some(file.clone()), _ => None, }) .or_else(|| self.libdata().current_filename.clone()) @@ -1226,6 +1235,7 @@ pub fn is_interactive(&self) -> bool { /// Return a string representing the current stack trace. pub fn stack_trace(&self) -> WString { + let mut line_cache = SourceLineCache::default(); self.blocks_iter_rev() // Stop at event handler. No reason to believe that any other code is relevant. // It might make sense in the future to continue printing the stack trace of the code @@ -1233,7 +1243,7 @@ pub fn stack_trace(&self) -> WString { // detect that. .take_while(|b| b.typ() != BlockType::event) .fold(WString::new(), |mut trace, b| { - append_block_description_to_stack_trace(self, &b, &mut trace); + append_block_description_to_stack_trace(self, &b, &mut trace, &mut line_cache); trace }) } @@ -1360,8 +1370,13 @@ fn print_profile(items: &[ProfileItem], out: &mut File) { } /// Append stack trace info for the block `b` to `trace`. -fn append_block_description_to_stack_trace(parser: &Parser, b: &Block, trace: &mut WString) { - let mut print_call_site = false; +fn append_block_description_to_stack_trace( + parser: &Parser, + b: &Block, + trace: &mut WString, + line_cache: &mut SourceLineCache, +) { + let mut print_source_location = false; match b.typ() { BlockType::function_call { .. } => { let Some(BlockData::Function { name, args, .. }) = b.data() else { @@ -1390,11 +1405,11 @@ fn append_block_description_to_stack_trace(parser: &Parser, b: &Block, trace: &m trace.push_utfstr(&wgettext_fmt!(" with arguments '%s'", args_str)); } trace.push('\n'); - print_call_site = true; + print_source_location = true; } BlockType::subst => { trace.push_utfstr(&wgettext!("in command substitution\n")); - print_call_site = true; + print_source_location = true; } BlockType::source => { let Some(BlockData::Source { file, .. }) = b.data() else { @@ -1405,7 +1420,7 @@ fn append_block_description_to_stack_trace(parser: &Parser, b: &Block, trace: &m "from sourcing file %s\n", &user_presentable_path(source_dest, parser.vars()) )); - print_call_site = true; + print_source_location = true; } BlockType::event => { let Some(BlockData::Event(event)) = b.data() else { @@ -1413,7 +1428,7 @@ fn append_block_description_to_stack_trace(parser: &Parser, b: &Block, trace: &m }; let description = event::get_desc(parser, event); trace.push_utfstr(&wgettext_fmt!("in event handler: %s\n", &description)); - print_call_site = true; + print_source_location = true; } BlockType::top | BlockType::begin @@ -1425,12 +1440,11 @@ fn append_block_description_to_stack_trace(parser: &Parser, b: &Block, trace: &m | BlockType::variable_assignment => {} } - if print_call_site { - // Print where the function is called. + if print_source_location { if let Some(file) = b.src_filename.as_ref() { trace.push_utfstr(&sprintf!( "\tcalled on line %d of file %s\n", - b.src_lineno.map_or(0, |n| n.get()), + b.src_lineno(line_cache).map_or(0, |n| n.get()), user_presentable_path(file, parser.vars()) )); } else if parser.libdata().within_fish_init { @@ -1484,9 +1498,7 @@ pub enum LoopStatus { #[cfg(test)] mod tests { use super::{CancelBehavior, Parser}; - use crate::ast::{ - self, Ast, Castable, JobList, JobPipeline, Kind, Node, Traversal, is_same_node, - }; + use crate::ast::{self, Ast, JobList, Kind, Node, Traversal, is_same_node}; use crate::common::str2wcstring; use crate::env::EnvStack; use crate::expand::ExpandFlags; @@ -1494,7 +1506,6 @@ mod tests { use crate::parse_constants::{ ParseErrorCode, ParseTokenType, ParseTreeFlags, ParserTestErrorBits, StatementDecoration, }; - use crate::parse_tree::{LineCounter, parse_source}; use crate::parse_util::{detect_errors_in_argument, detect_parse_errors}; use crate::prelude::*; use crate::reader::{fake_scoped_reader, reader_reset_interrupted}; @@ -2257,64 +2268,6 @@ fn test_cancellation() { signal_clear_cancel(); } - #[test] - fn test_line_counter() { - let src = L!("echo line1; echo still_line_1;\n\necho line3"); - let ps = parse_source(src.to_owned(), ParseTreeFlags::default(), None) - .expect("Failed to parse source"); - assert!(!ps.ast.errored()); - let mut line_counter = ps.line_counter(); - - // Test line_offset_of_character_at_offset, both forwards and backwards to exercise the cache. - let mut expected = 0; - for (idx, c) in src.chars().enumerate() { - let line_offset = line_counter.line_offset_of_character_at_offset(idx); - assert_eq!(line_offset, expected); - if c == '\n' { - expected += 1; - } - } - for (idx, c) in src.chars().enumerate().rev() { - if c == '\n' { - expected -= 1; - } - let line_offset = line_counter.line_offset_of_character_at_offset(idx); - assert_eq!(line_offset, expected); - } - - let pipelines: Vec<_> = ps.ast.walk().filter_map(ast::JobPipeline::cast).collect(); - assert_eq!(pipelines.len(), 3); - let src_offsets = [0, 0, 2]; - assert_eq!(line_counter.source_offset_of_node(), None); - assert_eq!(line_counter.line_offset_of_node(), None); - - for (idx, &node) in pipelines.iter().enumerate() { - line_counter.node = std::ptr::from_ref(node); - assert_eq!( - line_counter.source_offset_of_node(), - Some(node.source_range().start()) - ); - assert_eq!(line_counter.line_offset_of_node(), Some(src_offsets[idx])); - } - - for (idx, &node) in pipelines.iter().enumerate().rev() { - line_counter.node = std::ptr::from_ref(node); - assert_eq!( - line_counter.source_offset_of_node(), - Some(node.source_range().start()) - ); - assert_eq!(line_counter.line_offset_of_node(), Some(src_offsets[idx])); - } - } - - #[test] - fn test_line_counter_empty() { - let mut line_counter = LineCounter::::empty(); - assert_eq!(line_counter.line_offset_of_character_at_offset(0), 0); - assert_eq!(line_counter.line_offset_of_node(), None); - assert_eq!(line_counter.source_offset_of_node(), None); - } - // Helper for testing a simple ast traversal. // The ast is always for the command 'true;'. struct TrueSemiAstTester<'a> {