From 4ea222cd3458e985661b1e2bff6a078f179f6375 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 13 Jan 2024 17:06:35 -0800 Subject: [PATCH] Improve codegen of line_offset_of_character_at_offset This function is a hotspot, but it has inefficient codegen: 1. For whatever reason, the chars() iterator of wstr is slower than that of a slice. Use the slice. 2. Unnecessary overflow checks were preventing vectorization. Switch to a more optimized implementation. This improves aliases benchmark time by about 9%. --- src/parse_execution.rs | 13 ++++--------- src/wcstringutil.rs | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/src/parse_execution.rs b/src/parse_execution.rs index d4f963251..e03db1377 100644 --- a/src/parse_execution.rs +++ b/src/parse_execution.rs @@ -48,6 +48,7 @@ use crate::trace::{trace_if_enabled, trace_if_enabled_with_args}; use crate::wchar::{wstr, WString, L}; use crate::wchar_ext::WExt; +use crate::wcstringutil::count_newlines; use crate::wildcard::wildcard_match; use crate::wutil::{wgettext, wgettext_maybe_fmt}; use libc::{c_int, ENOTDIR, EXIT_SUCCESS, STDERR_FILENO, STDOUT_FILENO}; @@ -1946,19 +1947,13 @@ fn line_offset_of_character_at_offset(&self, offset: usize) -> usize { let mut cached_lineno = self.cached_lineno.borrow_mut(); if offset > cached_lineno.offset { // Add one for every newline we find in the range [cached_lineno.offset, offset). + // The codegen is substantially better when using a char slice than the char iterator. let offset = std::cmp::min(offset, src.len()); - let i = src[cached_lineno.offset..offset] - .chars() - .filter(|c| *c == '\n') - .count(); - cached_lineno.count += i; + cached_lineno.count += count_newlines(&src[cached_lineno.offset..offset]); cached_lineno.offset = offset; } else if offset < cached_lineno.offset { // Subtract one for every newline we find in the range [offset, cached_range.start). - cached_lineno.count -= src[offset..cached_lineno.offset] - .chars() - .filter(|c| *c == '\n') - .count(); + cached_lineno.count -= count_newlines(&src[offset..cached_lineno.offset]); cached_lineno.offset = offset; } cached_lineno.count diff --git a/src/wcstringutil.rs b/src/wcstringutil.rs index e08a89b4b..14e8489e8 100644 --- a/src/wcstringutil.rs +++ b/src/wcstringutil.rs @@ -8,6 +8,20 @@ use crate::wchar::{decode_byte_from_char, prelude::*}; use crate::wutil::encoding::{wcrtomb, zero_mbstate, AT_LEAST_MB_LEN_MAX}; +/// Return the number of newlines in a string. +pub fn count_newlines(s: &wstr) -> usize { + // This is a performance-sensitive function. + // The native filter().count() produces sub-optimal codegen because of overflow checks, + // which we currently enable in release mode. Implement it more efficiently. + let mut count: usize = 0; + for c in s.as_char_slice() { + if *c == '\n' { + count = count.wrapping_add(1); + } + } + count +} + /// Test if a string prefixes another without regard to case. Returns true if a is a prefix of b. pub fn string_prefixes_string_case_insensitive(proposed_prefix: &wstr, value: &wstr) -> bool { let prefix_size = proposed_prefix.len(); @@ -643,3 +657,13 @@ fn test_line_iterator() { ] ); } + +#[test] +fn test_count_newlines() { + assert_eq!(count_newlines(L!("")), 0); + assert_eq!(count_newlines(L!("foo")), 0); + assert_eq!(count_newlines(L!("foo\nbar")), 1); + assert_eq!(count_newlines(L!("foo\nbar\nbaz")), 2); + assert_eq!(count_newlines(L!("\n")), 1); + assert_eq!(count_newlines(L!("\n\n")), 2); +}