diff --git a/src/builtin_string.cpp b/src/builtin_string.cpp index 4f0834038..465f4124f 100644 --- a/src/builtin_string.cpp +++ b/src/builtin_string.cpp @@ -906,14 +906,19 @@ class literal_replacer_t : public string_replacer_t { bool replace_matches(const wcstring &arg) override; }; -static wcstring interpret_escapes(const wcstring &arg) { +static maybe_t interpret_escapes(const wcstring &arg) { wcstring result; result.reserve(arg.size()); const wchar_t *cursor = arg.c_str(); const wchar_t *end = cursor + arg.size(); while (cursor < end) { if (*cursor == L'\\') { - cursor += read_unquoted_escape(cursor, &result, true, false); + if (auto escape_len = read_unquoted_escape(cursor, &result, true, false)) { + cursor += *escape_len; + } else { + // Invalid escape. + return none(); + } } else { result.push_back(*cursor); cursor++; @@ -924,7 +929,7 @@ static wcstring interpret_escapes(const wcstring &arg) { class regex_replacer_t : public string_replacer_t { compiled_regex_t regex; - wcstring replacement; + maybe_t replacement; public: regex_replacer_t(const wchar_t *argv0, const wcstring &pattern, const wcstring &replacement_, @@ -974,6 +979,7 @@ bool literal_replacer_t::replace_matches(const wcstring &arg) { /// indicates an unrecoverable error. bool regex_replacer_t::replace_matches(const wcstring &arg) { if (!regex.code) return false; // pcre2_compile() failed + if (!replacement) return false; // replacement was an invalid string uint32_t options = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | PCRE2_SUBSTITUTE_EXTENDED | (opts.all ? PCRE2_SUBSTITUTE_GLOBAL : 0); @@ -991,7 +997,7 @@ bool regex_replacer_t::replace_matches(const wcstring &arg) { 0, // start offset options, regex.match, 0, // match context - PCRE2_SPTR(replacement.c_str()), replacement.length(), + PCRE2_SPTR(replacement->c_str()), replacement->length(), (PCRE2_UCHAR *)output, &outlen); if (pcre2_rc != PCRE2_ERROR_NOMEMORY || bufsize >= outlen) { diff --git a/src/common.cpp b/src/common.cpp index 4b0d54436..3211e313f 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -1161,11 +1161,9 @@ static wint_t string_last_char(const wcstring &str) { /// Given a null terminated string starting with a backslash, read the escape as if it is unquoted, /// appending to result. Return the number of characters consumed, or 0 on error. -size_t read_unquoted_escape(const wchar_t *input, wcstring *result, bool allow_incomplete, - bool unescape_special) { - if (input[0] != L'\\') { - return 0; // not an escape - } +maybe_t read_unquoted_escape(const wchar_t *input, wcstring *result, bool allow_incomplete, + bool unescape_special) { + assert(input[0] == L'\\' && "Not an escape"); // Here's the character we'll ultimately append, or NOT_A_WCHAR for none. Note that L'\0' is a // valid thing to append. @@ -1328,7 +1326,9 @@ size_t read_unquoted_escape(const wchar_t *input, wcstring *result, bool allow_i assert((wint_t)result_char == result_char_or_none); result->push_back(result_char); } - return errored ? 0 : in_pos; + if (errored) return none(); + + return in_pos; } /// Returns the unescaped version of input_str into output_str (by reference). Returns true if @@ -1357,16 +1357,16 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in case L'\\': { // Backslashes (escapes) are complicated and may result in errors, or appending // INTERNAL_SEPARATORs, so we have to handle them specially. - size_t escape_chars = read_unquoted_escape(input + input_position, &result, - allow_incomplete, unescape_special); - if (escape_chars == 0) { - // A 0 return indicates an error. + auto escape_chars = read_unquoted_escape(input + input_position, &result, + allow_incomplete, unescape_special); + if (!escape_chars) { + // A none() return indicates an error. errored = true; } else { // Skip over the characters we read, minus one because the outer loop will // increment it. - assert(escape_chars > 0); - input_position += escape_chars - 1; + assert(*escape_chars > 0); + input_position += *escape_chars - 1; } // We've already appended, don't append anything else. to_append_or_none = NOT_A_WCHAR; diff --git a/src/common.h b/src/common.h index b8fabb804..d08ca7556 100644 --- a/src/common.h +++ b/src/common.h @@ -28,7 +28,8 @@ #include #include "fallback.h" // IWYU pragma: keep -#include "signal.h" // IWYU pragma: keep +#include "maybe.h" +#include "signal.h" // IWYU pragma: keep // Define a symbol we can use elsewhere in our code to determine if we're being built on MS Windows // under Cygwin. @@ -771,9 +772,9 @@ wcstring debug_escape(const wcstring &in); /// defined in a private use area of Unicode. This assumes wchar_t is a unicode character set. /// Given a null terminated string starting with a backslash, read the escape as if it is unquoted, -/// appending to result. Return the number of characters consumed, or 0 on error. -size_t read_unquoted_escape(const wchar_t *input, wcstring *result, bool allow_incomplete, - bool unescape_special); +/// appending to result. Return the number of characters consumed, or none() on error. +maybe_t read_unquoted_escape(const wchar_t *input, wcstring *result, bool allow_incomplete, + bool unescape_special); /// Unescapes a string in-place. A true result indicates the string was unescaped, a false result /// indicates the string was unmodified. diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index fd77d4cee..b2dfc7185 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -4180,6 +4180,7 @@ static void run_one_string_test(const wchar_t *const *argv, int expected_rc, } static void test_string() { + say(L"Testing builtin_string"); const struct string_test { const wchar_t *argv[15]; int expected_rc; @@ -4369,6 +4370,7 @@ static void test_string() { {{L"string", L"replace", L"-r", L"a", L"$1", L"a", 0}, STATUS_INVALID_ARGS, L""}, {{L"string", L"replace", L"-r", L"(a)", L"$2", L"a", 0}, STATUS_INVALID_ARGS, L""}, {{L"string", L"replace", L"-r", L"*", L".", L"a", 0}, STATUS_INVALID_ARGS, L""}, + {{L"string", L"replace", L"-ra", L"x", L"\\c", 0}, STATUS_CMD_ERROR, L""}, {{L"string", L"replace", L"-r", L"^(.)", L"\t$1", L"abc", L"x", 0}, STATUS_CMD_OK, L"\tabc\n\tx\n"}, @@ -4463,6 +4465,7 @@ static void test_string() { {{L"string", L"trim", L"-c", L"\\/", L"a/"}, STATUS_CMD_OK, L"a\n"}, {{L"string", L"trim", L"-c", L"\\/", L"\\a/"}, STATUS_CMD_OK, L"a\n"}, {{L"string", L"trim", L"-c", L"", L".a."}, STATUS_CMD_ERROR, L".a.\n"}}; + for (const auto &t : string_tests) { run_one_string_test(t.argv, t.expected_rc, t.expected_out); }