From a1728d61afd05c809118d7bce1d78f01c2e05e9b Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Thu, 27 Sep 2018 22:28:39 -0400
Subject: [PATCH] Report errors on invalid replacements in string replace

If the replacement in `string replace` is invalid, prior to this fix we would
enter into an infinite loop trying to parse it. Instead report errors correctly.

Fixes #3381
---
 src/builtin_string.cpp | 60 ++++++++++++++++++++----------------------
 src/common.cpp         | 46 ++++++++++++++++----------------
 src/common.h           | 21 ++++++++-------
 src/fish_tests.cpp     |  2 +-
 4 files changed, 64 insertions(+), 65 deletions(-)

diff --git a/src/builtin_string.cpp b/src/builtin_string.cpp
index 465f4124f..2dc57eb3b 100644
--- a/src/builtin_string.cpp
+++ b/src/builtin_string.cpp
@@ -133,7 +133,7 @@ class arg_iterator_t {
         }
     }
 };
-}
+}  // namespace
 
 // This is used by the string subcommands to communicate with the option parser which flags are
 // valid and get the result of parsing the command for flags.
@@ -411,32 +411,23 @@ static wcstring construct_short_opts(options_t *opts) {  //!OCLINT(high npath co
 
 // Note that several long flags share the same short flag. That is okay. The caller is expected
 // to indicate that a max of one of the long flags sharing a short flag is valid.
-static const struct woption long_options[] = {{L"all", no_argument, NULL, 'a'},
-                                              {L"chars", required_argument, NULL, 'c'},
-                                              {L"count", required_argument, NULL, 'n'},
-                                              {L"entire", no_argument, NULL, 'e'},
-                                              {L"filter", no_argument, NULL, 'f'},
-                                              {L"ignore-case", no_argument, NULL, 'i'},
-                                              {L"index", no_argument, NULL, 'n'},
-                                              {L"invert", no_argument, NULL, 'v'},
-                                              {L"left", no_argument, NULL, 'l'},
-                                              {L"length", required_argument, NULL, 'l'},
-                                              {L"max", required_argument, NULL, 'm'},
-                                              {L"no-empty", no_argument, NULL, 'n'},
-                                              {L"no-newline", no_argument, NULL, 'N'},
-                                              {L"no-quoted", no_argument, NULL, 'n'},
-                                              {L"quiet", no_argument, NULL, 'q'},
-                                              {L"regex", no_argument, NULL, 'r'},
-                                              {L"right", no_argument, NULL, 'r'},
-                                              {L"start", required_argument, NULL, 's'},
-                                              {L"style", required_argument, NULL, 1},
-                                              {NULL, 0, NULL, 0}};
+static const struct woption long_options[] = {
+    {L"all", no_argument, NULL, 'a'},         {L"chars", required_argument, NULL, 'c'},
+    {L"count", required_argument, NULL, 'n'}, {L"entire", no_argument, NULL, 'e'},
+    {L"filter", no_argument, NULL, 'f'},      {L"ignore-case", no_argument, NULL, 'i'},
+    {L"index", no_argument, NULL, 'n'},       {L"invert", no_argument, NULL, 'v'},
+    {L"left", no_argument, NULL, 'l'},        {L"length", required_argument, NULL, 'l'},
+    {L"max", required_argument, NULL, 'm'},   {L"no-empty", no_argument, NULL, 'n'},
+    {L"no-newline", no_argument, NULL, 'N'},  {L"no-quoted", no_argument, NULL, 'n'},
+    {L"quiet", no_argument, NULL, 'q'},       {L"regex", no_argument, NULL, 'r'},
+    {L"right", no_argument, NULL, 'r'},       {L"start", required_argument, NULL, 's'},
+    {L"style", required_argument, NULL, 1},   {NULL, 0, NULL, 0}};
 
 static std::unordered_map<char, decltype(*handle_flag_N)> flag_to_function = {
     {'N', handle_flag_N}, {'a', handle_flag_a}, {'c', handle_flag_c}, {'e', handle_flag_e},
     {'f', handle_flag_f}, {'i', handle_flag_i}, {'l', handle_flag_l}, {'m', handle_flag_m},
     {'n', handle_flag_n}, {'q', handle_flag_q}, {'r', handle_flag_r}, {'s', handle_flag_s},
-    {'v', handle_flag_v}, {1, handle_flag_1} };
+    {'v', handle_flag_v}, {1, handle_flag_1}};
 
 /// Parse the arguments for flags recognized by a specific string subcommand.
 static int parse_opts(options_t *opts, int *optind, int n_req_args, int argc, wchar_t **argv,
@@ -689,8 +680,9 @@ struct compiled_regex_t {
         int err_code = 0;
         PCRE2_SIZE err_offset = 0;
 
-        code = pcre2_compile(PCRE2_SPTR(pattern.c_str()), pattern.length(),
-                             options | (ignore_case ? PCRE2_CASELESS : 0), &err_code, &err_offset, 0);
+        code =
+            pcre2_compile(PCRE2_SPTR(pattern.c_str()), pattern.length(),
+                          options | (ignore_case ? PCRE2_CASELESS : 0), &err_code, &err_offset, 0);
         if (code == 0) {
             string_error(streams, _(L"%ls: Regular expression compile error: %ls\n"), argv0,
                          pcre2_strerror(err_code).c_str());
@@ -789,7 +781,7 @@ class pcre2_matcher_t : public string_matcher_t {
         // See pcre2demo.c for an explanation of this logic.
         PCRE2_SIZE arglen = arg.length();
         int rc = report_match(
-                              arg, pcre2_match(regex.code, PCRE2_SPTR(arg.c_str()), arglen, 0, 0, regex.match, 0));
+            arg, pcre2_match(regex.code, PCRE2_SPTR(arg.c_str()), arglen, 0, 0, regex.match, 0));
         if (rc < 0) {  // pcre2 match error.
             return false;
         } else if (rc == 0) {  // no match
@@ -815,8 +807,8 @@ class pcre2_matcher_t : public string_matcher_t {
                 options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
             }
 
-            rc = report_match(arg, pcre2_match(regex.code, PCRE2_SPTR(arg.c_str()), arglen, offset, options,
-                                               regex.match, 0));
+            rc = report_match(arg, pcre2_match(regex.code, PCRE2_SPTR(arg.c_str()), arglen, offset,
+                                               options, regex.match, 0));
             if (rc < 0) {
                 return false;
             }
@@ -955,7 +947,8 @@ bool literal_replacer_t::replace_matches(const wcstring &arg) {
         const wchar_t *cur = arg.c_str();
         const wchar_t *end = cur + arg.size();
         while (cur < end) {
-            if ((opts.all || !replacement_occurred) && cmp_func(cur, pattern.c_str(), patlen) == 0) {
+            if ((opts.all || !replacement_occurred) &&
+                cmp_func(cur, pattern.c_str(), patlen) == 0) {
                 result += replacement;
                 cur += patlen;
                 replacement_occurred = true;
@@ -978,7 +971,7 @@ bool literal_replacer_t::replace_matches(const wcstring &arg) {
 /// A return value of true means all is well (even if no replacements were performed), false
 /// indicates an unrecoverable error.
 bool regex_replacer_t::replace_matches(const wcstring &arg) {
-    if (!regex.code) return false;  // pcre2_compile() failed
+    if (!regex.code) return false;   // pcre2_compile() failed
     if (!replacement) return false;  // replacement was an invalid string
 
     uint32_t options = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | PCRE2_SUBSTITUTE_EXTENDED |
@@ -1076,9 +1069,11 @@ static int string_split_maybe0(parser_t &parser, io_streams_t &streams, int argc
     arg_iterator_t aiter(argv, optind, streams, is_split0);
     while (const wcstring *arg = aiter.nextstr()) {
         if (opts.right) {
-            split_about(arg->rbegin(), arg->rend(), sep.rbegin(), sep.rend(), &splits, opts.max, opts.no_empty);
+            split_about(arg->rbegin(), arg->rend(), sep.rbegin(), sep.rend(), &splits, opts.max,
+                        opts.no_empty);
         } else {
-            split_about(arg->begin(), arg->end(), sep.begin(), sep.end(), &splits, opts.max, opts.no_empty);
+            split_about(arg->begin(), arg->end(), sep.begin(), sep.end(), &splits, opts.max,
+                        opts.no_empty);
         }
         arg_count++;
     }
@@ -1247,7 +1242,8 @@ static int string_trim(parser_t &parser, io_streams_t &streams, int argc, wchar_
 }
 
 // A helper function for lower and upper.
-static int string_transform(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv, std::wint_t (*func)(std::wint_t)) {
+static int string_transform(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv,
+                            std::wint_t (*func)(std::wint_t)) {
     options_t opts;
     opts.quiet_valid = true;
     int optind;
diff --git a/src/common.cpp b/src/common.cpp
index 3211e313f..b5d2a8bde 100644
--- a/src/common.cpp
+++ b/src/common.cpp
@@ -180,8 +180,7 @@ show_stackframe(const wchar_t msg_level, int frame_count, int skip_levels) {
 
 #else   // HAVE_BACKTRACE_SYMBOLS
 
-void __attribute__((noinline))
-show_stackframe(const wchar_t msg_level, int, int) {
+void __attribute__((noinline)) show_stackframe(const wchar_t msg_level, int, int) {
     debug_shared(msg_level, L"Sorry, but your system does not support backtraces");
 }
 #endif  // HAVE_BACKTRACE_SYMBOLS
@@ -331,8 +330,8 @@ char *wcs2str(const wchar_t *in) {
     // Here we probably allocate a buffer probably much larger than necessary.
     char *out = (char *)malloc(MAX_UTF8_BYTES * wcslen(in) + 1);
     assert(out);
-    //Instead of returning the return value of wcs2str_internal, return `out` directly.
-    //This eliminates false warnings in coverity about resource leaks.
+    // Instead of returning the return value of wcs2str_internal, return `out` directly.
+    // This eliminates false warnings in coverity about resource leaks.
     wcs2str_internal(in, out);
     return out;
 }
@@ -521,18 +520,16 @@ void fish_setlocale() {
     if (can_be_encoded(L'\u2026')) {
         ellipsis_char = L'\u2026';
         ellipsis_str = L"\u2026";
-    }
-    else {
-        ellipsis_char = L'$'; // "horizontal ellipsis"
+    } else {
+        ellipsis_char = L'$';  // "horizontal ellipsis"
         ellipsis_str = L"...";
     }
     if (is_windows_subsystem_for_linux()) {
-        //neither of \u23CE and \u25CF can be displayed in the default fonts on Windows, though
-        //they can be *encoded* just fine. Use alternative glyphs.
+        // neither of \u23CE and \u25CF can be displayed in the default fonts on Windows, though
+        // they can be *encoded* just fine. Use alternative glyphs.
         omitted_newline_char = can_be_encoded(L'\u00b6') ? L'\u00b6' : L'~';   // "pilcrow"
         obfuscation_read_char = can_be_encoded(L'\u2022') ? L'\u2022' : L'*';  // "bullet"
-    }
-    else {
+    } else {
         omitted_newline_char = can_be_encoded(L'\u23CE') ? L'\u23CE' : L'~';   // "return"
         obfuscation_read_char = can_be_encoded(L'\u25CF') ? L'\u25CF' : L'#';  // "black circle"
     }
@@ -590,7 +587,6 @@ bool should_suppress_stderr_for_tests() {
     return program_name && !wcscmp(program_name, TESTS_PROGRAM_NAME);
 }
 
-
 static void debug_shared(const wchar_t level, const wcstring &msg) {
     pid_t current_pid = getpid();
 
@@ -805,7 +801,7 @@ wcstring reformat_for_screen(const wcstring &msg) {
 
 /// Escape a string in a fashion suitable for using as a URL. Store the result in out_str.
 static void escape_string_url(const wcstring &in, wcstring &out) {
-    for (auto& c1 : in) {
+    for (auto &c1 : in) {
         // This silliness is so we get the correct result whether chars are signed or unsigned.
         unsigned int c2 = (unsigned int)c1 & 0xFF;
         if (!(c2 & 0x80) &&
@@ -1027,7 +1023,8 @@ static void escape_string_script(const wchar_t *orig_in, size_t in_len, wcstring
                 case L';':
                 case L'"':
                 case L'~': {
-                    bool char_is_normal = (c == L'~' && no_tilde) || (c == L'^' && no_caret) || (c == L'?' && no_qmark);
+                    bool char_is_normal = (c == L'~' && no_tilde) || (c == L'^' && no_caret) ||
+                                          (c == L'?' && no_qmark);
                     if (!char_is_normal) {
                         need_escape = 1;
                         if (escape_all) out += L'\\';
@@ -1346,7 +1343,12 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in
     int brace_count = 0;
 
     bool errored = false;
-    enum { mode_unquoted, mode_single_quotes, mode_double_quotes, mode_braces } mode = mode_unquoted;
+    enum {
+        mode_unquoted,
+        mode_single_quotes,
+        mode_double_quotes,
+        mode_braces
+    } mode = mode_unquoted;
 
     for (size_t input_position = 0; input_position < input_len && !errored; input_position++) {
         const wchar_t c = input[input_position];
@@ -1418,7 +1420,8 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in
                         // We can't parse them properly, but it shouldn't hurt,
                         // so we don't assert here.
                         // See #4954.
-                        // assert(brace_count > 0 && "imbalanced brackets are a tokenizer error, we shouldn't be able to get here");
+                        // assert(brace_count > 0 && "imbalanced brackets are a tokenizer error, we
+                        // shouldn't be able to get here");
                         brace_count--;
                         brace_text_start = brace_text_start && brace_count > 0;
                         to_append_or_none = BRACE_END;
@@ -1748,10 +1751,11 @@ bool string_suffixes_string(const wchar_t *proposed_suffix, const wcstring &valu
            value.compare(value.size() - suffix_size, suffix_size, proposed_suffix) == 0;
 }
 
-bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix, const wcstring &value) {
+bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix,
+                                             const wcstring &value) {
     size_t suffix_size = proposed_suffix.size();
-    return suffix_size <= value.size() &&
-           wcsncasecmp(value.c_str() + (value.size() - suffix_size), proposed_suffix.c_str(), suffix_size) == 0;
+    return suffix_size <= value.size() && wcsncasecmp(value.c_str() + (value.size() - suffix_size),
+                                                      proposed_suffix.c_str(), suffix_size) == 0;
 }
 
 /// Returns true if seq, represented as a subsequence, is contained within string.
@@ -2061,9 +2065,7 @@ void setup_fork_guards() {
     initial_pid = getpid();
 }
 
-void save_term_foreground_process_group() {
-    initial_fg_process_group = tcgetpgrp(STDIN_FILENO);
-}
+void save_term_foreground_process_group() { initial_fg_process_group = tcgetpgrp(STDIN_FILENO); }
 
 void restore_term_foreground_process_group() {
     if (initial_fg_process_group == -1) return;
diff --git a/src/common.h b/src/common.h
index d08ca7556..f3832bb8b 100644
--- a/src/common.h
+++ b/src/common.h
@@ -22,9 +22,9 @@
 #include <mutex>
 #include <sstream>
 #include <string>
+#include <tuple>
 #include <type_traits>
 #include <unordered_map>
-#include <tuple>
 #include <vector>
 
 #include "fallback.h"  // IWYU pragma: keep
@@ -187,7 +187,8 @@ extern struct termios shell_modes;
 /// The character to use where the text has been truncated. Is an ellipsis on unicode system and a $
 /// on other systems.
 extern wchar_t ellipsis_char;
-/// The character or string to use where text has been truncated (ellipsis if possible, otherwise ...)
+/// The character or string to use where text has been truncated (ellipsis if possible, otherwise
+/// ...)
 extern const wchar_t *ellipsis_str;
 
 /// Character representing an omitted newline at the end of text.
@@ -328,7 +329,8 @@ bool string_prefixes_string(const wchar_t *proposed_prefix, const wchar_t *value
 /// Test if a string is a suffix of another.
 bool string_suffixes_string(const wcstring &proposed_suffix, const wcstring &value);
 bool string_suffixes_string(const wchar_t *proposed_suffix, const wcstring &value);
-bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix, const wcstring &value);
+bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix,
+                                             const wcstring &value);
 
 /// Test if a string prefixes another without regard to case. Returns true if a is a prefix of b.
 bool string_prefixes_string_case_insensitive(const wcstring &proposed_prefix,
@@ -534,7 +536,7 @@ class null_terminated_array_t {
     void operator=(null_terminated_array_t rhs);
     null_terminated_array_t(const null_terminated_array_t &);
 
-    typedef std::vector<std::basic_string<CharType_t> > string_list_t;
+    typedef std::vector<std::basic_string<CharType_t>> string_list_t;
 
     size_t size() const {
         size_t len = 0;
@@ -882,7 +884,6 @@ struct enum_map {
     const wchar_t *const str;
 };
 
-
 /// Given a string return the matching enum. Return the sentinal enum if no match is made. The map
 /// must be sorted by the `str` member. A binary search is twice as fast as a linear search with 16
 /// elements in the map.
@@ -916,13 +917,13 @@ static const wchar_t *enum_to_str(T enum_val, const enum_map<T> map[]) {
     return NULL;
 };
 
-template<typename... Args>
+template <typename... Args>
 using tuple_list = std::vector<std::tuple<Args...>>;
 
-//Given a container mapping one X to many Y, return a list of {X,Y}
-template<typename X, typename Y>
+// Given a container mapping one X to many Y, return a list of {X,Y}
+template <typename X, typename Y>
 inline tuple_list<X, Y> flatten(const std::unordered_map<X, std::vector<Y>> &list) {
-    tuple_list<X, Y> results(list.size() * 1.5); //just a guess as to the initial size
+    tuple_list<X, Y> results(list.size() * 1.5);  // just a guess as to the initial size
     for (auto &kv : list) {
         for (auto &v : kv.second) {
             results.emplace_back(std::make_tuple(kv.first, v));
@@ -1004,7 +1005,7 @@ struct hash<const wcstring> {
         return hasher((wcstring)w);
     }
 };
-}
+}  // namespace std
 #endif
 
 #endif
diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp
index b2dfc7185..43dbdb753 100644
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@@ -4465,7 +4465,7 @@ static void test_string() {
         {{L"string", L"trim", L"-c", L"\\/", L"a/"}, STATUS_CMD_OK, L"a\n"},
         {{L"string", L"trim", L"-c", L"\\/", L"\\a/"}, STATUS_CMD_OK, L"a\n"},
         {{L"string", L"trim", L"-c", L"", L".a."}, STATUS_CMD_ERROR, L".a.\n"}};
-    
+
     for (const auto &t : string_tests) {
         run_one_string_test(t.argv, t.expected_rc, t.expected_out);
     }