From 4ea222cd3458e985661b1e2bff6a078f179f6375 Mon Sep 17 00:00:00 2001
From: ridiculousfish <rf@fishshell.com>
Date: Sat, 13 Jan 2024 17:06:35 -0800
Subject: [PATCH] Improve codegen of line_offset_of_character_at_offset

This function is a hotspot, but it has inefficient codegen:

1. For whatever reason, the chars() iterator of wstr is slower
   than that of a slice. Use the slice.

2. Unnecessary overflow checks were preventing vectorization.

Switch to a more optimized implementation.

This improves aliases benchmark time by about 9%.
---
 src/parse_execution.rs | 13 ++++---------
 src/wcstringutil.rs    | 24 ++++++++++++++++++++++++
 2 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/src/parse_execution.rs b/src/parse_execution.rs
index d4f963251..e03db1377 100644
--- a/src/parse_execution.rs
+++ b/src/parse_execution.rs
@@ -48,6 +48,7 @@
 use crate::trace::{trace_if_enabled, trace_if_enabled_with_args};
 use crate::wchar::{wstr, WString, L};
 use crate::wchar_ext::WExt;
+use crate::wcstringutil::count_newlines;
 use crate::wildcard::wildcard_match;
 use crate::wutil::{wgettext, wgettext_maybe_fmt};
 use libc::{c_int, ENOTDIR, EXIT_SUCCESS, STDERR_FILENO, STDOUT_FILENO};
@@ -1946,19 +1947,13 @@ fn line_offset_of_character_at_offset(&self, offset: usize) -> usize {
         let mut cached_lineno = self.cached_lineno.borrow_mut();
         if offset > cached_lineno.offset {
             // Add one for every newline we find in the range [cached_lineno.offset, offset).
+            // The codegen is substantially better when using a char slice than the char iterator.
             let offset = std::cmp::min(offset, src.len());
-            let i = src[cached_lineno.offset..offset]
-                .chars()
-                .filter(|c| *c == '\n')
-                .count();
-            cached_lineno.count += i;
+            cached_lineno.count += count_newlines(&src[cached_lineno.offset..offset]);
             cached_lineno.offset = offset;
         } else if offset < cached_lineno.offset {
             // Subtract one for every newline we find in the range [offset, cached_range.start).
-            cached_lineno.count -= src[offset..cached_lineno.offset]
-                .chars()
-                .filter(|c| *c == '\n')
-                .count();
+            cached_lineno.count -= count_newlines(&src[offset..cached_lineno.offset]);
             cached_lineno.offset = offset;
         }
         cached_lineno.count
diff --git a/src/wcstringutil.rs b/src/wcstringutil.rs
index e08a89b4b..14e8489e8 100644
--- a/src/wcstringutil.rs
+++ b/src/wcstringutil.rs
@@ -8,6 +8,20 @@
 use crate::wchar::{decode_byte_from_char, prelude::*};
 use crate::wutil::encoding::{wcrtomb, zero_mbstate, AT_LEAST_MB_LEN_MAX};
 
+/// Return the number of newlines in a string.
+pub fn count_newlines(s: &wstr) -> usize {
+    // This is a performance-sensitive function.
+    // The native filter().count() produces sub-optimal codegen because of overflow checks,
+    // which we currently enable in release mode. Implement it more efficiently.
+    let mut count: usize = 0;
+    for c in s.as_char_slice() {
+        if *c == '\n' {
+            count = count.wrapping_add(1);
+        }
+    }
+    count
+}
+
 /// Test if a string prefixes another without regard to case. Returns true if a is a prefix of b.
 pub fn string_prefixes_string_case_insensitive(proposed_prefix: &wstr, value: &wstr) -> bool {
     let prefix_size = proposed_prefix.len();
@@ -643,3 +657,13 @@ fn test_line_iterator() {
         ]
     );
 }
+
+#[test]
+fn test_count_newlines() {
+    assert_eq!(count_newlines(L!("")), 0);
+    assert_eq!(count_newlines(L!("foo")), 0);
+    assert_eq!(count_newlines(L!("foo\nbar")), 1);
+    assert_eq!(count_newlines(L!("foo\nbar\nbaz")), 2);
+    assert_eq!(count_newlines(L!("\n")), 1);
+    assert_eq!(count_newlines(L!("\n\n")), 2);
+}