From a1728d61afd05c809118d7bce1d78f01c2e05e9b Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 27 Sep 2018 22:28:39 -0400 Subject: [PATCH] Report errors on invalid replacements in string replace If the replacement in `string replace` is invalid, prior to this fix we would enter into an infinite loop trying to parse it. Instead report errors correctly. Fixes #3381 --- src/builtin_string.cpp | 60 ++++++++++++++++++++---------------------- src/common.cpp | 46 ++++++++++++++++---------------- src/common.h | 21 ++++++++------- src/fish_tests.cpp | 2 +- 4 files changed, 64 insertions(+), 65 deletions(-) diff --git a/src/builtin_string.cpp b/src/builtin_string.cpp index 465f4124f..2dc57eb3b 100644 --- a/src/builtin_string.cpp +++ b/src/builtin_string.cpp @@ -133,7 +133,7 @@ class arg_iterator_t { } } }; -} +} // namespace // This is used by the string subcommands to communicate with the option parser which flags are // valid and get the result of parsing the command for flags. @@ -411,32 +411,23 @@ static wcstring construct_short_opts(options_t *opts) { //!OCLINT(high npath co // Note that several long flags share the same short flag. That is okay. The caller is expected // to indicate that a max of one of the long flags sharing a short flag is valid. -static const struct woption long_options[] = {{L"all", no_argument, NULL, 'a'}, - {L"chars", required_argument, NULL, 'c'}, - {L"count", required_argument, NULL, 'n'}, - {L"entire", no_argument, NULL, 'e'}, - {L"filter", no_argument, NULL, 'f'}, - {L"ignore-case", no_argument, NULL, 'i'}, - {L"index", no_argument, NULL, 'n'}, - {L"invert", no_argument, NULL, 'v'}, - {L"left", no_argument, NULL, 'l'}, - {L"length", required_argument, NULL, 'l'}, - {L"max", required_argument, NULL, 'm'}, - {L"no-empty", no_argument, NULL, 'n'}, - {L"no-newline", no_argument, NULL, 'N'}, - {L"no-quoted", no_argument, NULL, 'n'}, - {L"quiet", no_argument, NULL, 'q'}, - {L"regex", no_argument, NULL, 'r'}, - {L"right", no_argument, NULL, 'r'}, - {L"start", required_argument, NULL, 's'}, - {L"style", required_argument, NULL, 1}, - {NULL, 0, NULL, 0}}; +static const struct woption long_options[] = { + {L"all", no_argument, NULL, 'a'}, {L"chars", required_argument, NULL, 'c'}, + {L"count", required_argument, NULL, 'n'}, {L"entire", no_argument, NULL, 'e'}, + {L"filter", no_argument, NULL, 'f'}, {L"ignore-case", no_argument, NULL, 'i'}, + {L"index", no_argument, NULL, 'n'}, {L"invert", no_argument, NULL, 'v'}, + {L"left", no_argument, NULL, 'l'}, {L"length", required_argument, NULL, 'l'}, + {L"max", required_argument, NULL, 'm'}, {L"no-empty", no_argument, NULL, 'n'}, + {L"no-newline", no_argument, NULL, 'N'}, {L"no-quoted", no_argument, NULL, 'n'}, + {L"quiet", no_argument, NULL, 'q'}, {L"regex", no_argument, NULL, 'r'}, + {L"right", no_argument, NULL, 'r'}, {L"start", required_argument, NULL, 's'}, + {L"style", required_argument, NULL, 1}, {NULL, 0, NULL, 0}}; static std::unordered_map flag_to_function = { {'N', handle_flag_N}, {'a', handle_flag_a}, {'c', handle_flag_c}, {'e', handle_flag_e}, {'f', handle_flag_f}, {'i', handle_flag_i}, {'l', handle_flag_l}, {'m', handle_flag_m}, {'n', handle_flag_n}, {'q', handle_flag_q}, {'r', handle_flag_r}, {'s', handle_flag_s}, - {'v', handle_flag_v}, {1, handle_flag_1} }; + {'v', handle_flag_v}, {1, handle_flag_1}}; /// Parse the arguments for flags recognized by a specific string subcommand. static int parse_opts(options_t *opts, int *optind, int n_req_args, int argc, wchar_t **argv, @@ -689,8 +680,9 @@ struct compiled_regex_t { int err_code = 0; PCRE2_SIZE err_offset = 0; - code = pcre2_compile(PCRE2_SPTR(pattern.c_str()), pattern.length(), - options | (ignore_case ? PCRE2_CASELESS : 0), &err_code, &err_offset, 0); + code = + pcre2_compile(PCRE2_SPTR(pattern.c_str()), pattern.length(), + options | (ignore_case ? PCRE2_CASELESS : 0), &err_code, &err_offset, 0); if (code == 0) { string_error(streams, _(L"%ls: Regular expression compile error: %ls\n"), argv0, pcre2_strerror(err_code).c_str()); @@ -789,7 +781,7 @@ class pcre2_matcher_t : public string_matcher_t { // See pcre2demo.c for an explanation of this logic. PCRE2_SIZE arglen = arg.length(); int rc = report_match( - arg, pcre2_match(regex.code, PCRE2_SPTR(arg.c_str()), arglen, 0, 0, regex.match, 0)); + arg, pcre2_match(regex.code, PCRE2_SPTR(arg.c_str()), arglen, 0, 0, regex.match, 0)); if (rc < 0) { // pcre2 match error. return false; } else if (rc == 0) { // no match @@ -815,8 +807,8 @@ class pcre2_matcher_t : public string_matcher_t { options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; } - rc = report_match(arg, pcre2_match(regex.code, PCRE2_SPTR(arg.c_str()), arglen, offset, options, - regex.match, 0)); + rc = report_match(arg, pcre2_match(regex.code, PCRE2_SPTR(arg.c_str()), arglen, offset, + options, regex.match, 0)); if (rc < 0) { return false; } @@ -955,7 +947,8 @@ bool literal_replacer_t::replace_matches(const wcstring &arg) { const wchar_t *cur = arg.c_str(); const wchar_t *end = cur + arg.size(); while (cur < end) { - if ((opts.all || !replacement_occurred) && cmp_func(cur, pattern.c_str(), patlen) == 0) { + if ((opts.all || !replacement_occurred) && + cmp_func(cur, pattern.c_str(), patlen) == 0) { result += replacement; cur += patlen; replacement_occurred = true; @@ -978,7 +971,7 @@ bool literal_replacer_t::replace_matches(const wcstring &arg) { /// A return value of true means all is well (even if no replacements were performed), false /// indicates an unrecoverable error. bool regex_replacer_t::replace_matches(const wcstring &arg) { - if (!regex.code) return false; // pcre2_compile() failed + if (!regex.code) return false; // pcre2_compile() failed if (!replacement) return false; // replacement was an invalid string uint32_t options = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | PCRE2_SUBSTITUTE_EXTENDED | @@ -1076,9 +1069,11 @@ static int string_split_maybe0(parser_t &parser, io_streams_t &streams, int argc arg_iterator_t aiter(argv, optind, streams, is_split0); while (const wcstring *arg = aiter.nextstr()) { if (opts.right) { - split_about(arg->rbegin(), arg->rend(), sep.rbegin(), sep.rend(), &splits, opts.max, opts.no_empty); + split_about(arg->rbegin(), arg->rend(), sep.rbegin(), sep.rend(), &splits, opts.max, + opts.no_empty); } else { - split_about(arg->begin(), arg->end(), sep.begin(), sep.end(), &splits, opts.max, opts.no_empty); + split_about(arg->begin(), arg->end(), sep.begin(), sep.end(), &splits, opts.max, + opts.no_empty); } arg_count++; } @@ -1247,7 +1242,8 @@ static int string_trim(parser_t &parser, io_streams_t &streams, int argc, wchar_ } // A helper function for lower and upper. -static int string_transform(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv, std::wint_t (*func)(std::wint_t)) { +static int string_transform(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv, + std::wint_t (*func)(std::wint_t)) { options_t opts; opts.quiet_valid = true; int optind; diff --git a/src/common.cpp b/src/common.cpp index 3211e313f..b5d2a8bde 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -180,8 +180,7 @@ show_stackframe(const wchar_t msg_level, int frame_count, int skip_levels) { #else // HAVE_BACKTRACE_SYMBOLS -void __attribute__((noinline)) -show_stackframe(const wchar_t msg_level, int, int) { +void __attribute__((noinline)) show_stackframe(const wchar_t msg_level, int, int) { debug_shared(msg_level, L"Sorry, but your system does not support backtraces"); } #endif // HAVE_BACKTRACE_SYMBOLS @@ -331,8 +330,8 @@ char *wcs2str(const wchar_t *in) { // Here we probably allocate a buffer probably much larger than necessary. char *out = (char *)malloc(MAX_UTF8_BYTES * wcslen(in) + 1); assert(out); - //Instead of returning the return value of wcs2str_internal, return `out` directly. - //This eliminates false warnings in coverity about resource leaks. + // Instead of returning the return value of wcs2str_internal, return `out` directly. + // This eliminates false warnings in coverity about resource leaks. wcs2str_internal(in, out); return out; } @@ -521,18 +520,16 @@ void fish_setlocale() { if (can_be_encoded(L'\u2026')) { ellipsis_char = L'\u2026'; ellipsis_str = L"\u2026"; - } - else { - ellipsis_char = L'$'; // "horizontal ellipsis" + } else { + ellipsis_char = L'$'; // "horizontal ellipsis" ellipsis_str = L"..."; } if (is_windows_subsystem_for_linux()) { - //neither of \u23CE and \u25CF can be displayed in the default fonts on Windows, though - //they can be *encoded* just fine. Use alternative glyphs. + // neither of \u23CE and \u25CF can be displayed in the default fonts on Windows, though + // they can be *encoded* just fine. Use alternative glyphs. omitted_newline_char = can_be_encoded(L'\u00b6') ? L'\u00b6' : L'~'; // "pilcrow" obfuscation_read_char = can_be_encoded(L'\u2022') ? L'\u2022' : L'*'; // "bullet" - } - else { + } else { omitted_newline_char = can_be_encoded(L'\u23CE') ? L'\u23CE' : L'~'; // "return" obfuscation_read_char = can_be_encoded(L'\u25CF') ? L'\u25CF' : L'#'; // "black circle" } @@ -590,7 +587,6 @@ bool should_suppress_stderr_for_tests() { return program_name && !wcscmp(program_name, TESTS_PROGRAM_NAME); } - static void debug_shared(const wchar_t level, const wcstring &msg) { pid_t current_pid = getpid(); @@ -805,7 +801,7 @@ wcstring reformat_for_screen(const wcstring &msg) { /// Escape a string in a fashion suitable for using as a URL. Store the result in out_str. static void escape_string_url(const wcstring &in, wcstring &out) { - for (auto& c1 : in) { + for (auto &c1 : in) { // This silliness is so we get the correct result whether chars are signed or unsigned. unsigned int c2 = (unsigned int)c1 & 0xFF; if (!(c2 & 0x80) && @@ -1027,7 +1023,8 @@ static void escape_string_script(const wchar_t *orig_in, size_t in_len, wcstring case L';': case L'"': case L'~': { - bool char_is_normal = (c == L'~' && no_tilde) || (c == L'^' && no_caret) || (c == L'?' && no_qmark); + bool char_is_normal = (c == L'~' && no_tilde) || (c == L'^' && no_caret) || + (c == L'?' && no_qmark); if (!char_is_normal) { need_escape = 1; if (escape_all) out += L'\\'; @@ -1346,7 +1343,12 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in int brace_count = 0; bool errored = false; - enum { mode_unquoted, mode_single_quotes, mode_double_quotes, mode_braces } mode = mode_unquoted; + enum { + mode_unquoted, + mode_single_quotes, + mode_double_quotes, + mode_braces + } mode = mode_unquoted; for (size_t input_position = 0; input_position < input_len && !errored; input_position++) { const wchar_t c = input[input_position]; @@ -1418,7 +1420,8 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in // We can't parse them properly, but it shouldn't hurt, // so we don't assert here. // See #4954. - // assert(brace_count > 0 && "imbalanced brackets are a tokenizer error, we shouldn't be able to get here"); + // assert(brace_count > 0 && "imbalanced brackets are a tokenizer error, we + // shouldn't be able to get here"); brace_count--; brace_text_start = brace_text_start && brace_count > 0; to_append_or_none = BRACE_END; @@ -1748,10 +1751,11 @@ bool string_suffixes_string(const wchar_t *proposed_suffix, const wcstring &valu value.compare(value.size() - suffix_size, suffix_size, proposed_suffix) == 0; } -bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix, const wcstring &value) { +bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix, + const wcstring &value) { size_t suffix_size = proposed_suffix.size(); - return suffix_size <= value.size() && - wcsncasecmp(value.c_str() + (value.size() - suffix_size), proposed_suffix.c_str(), suffix_size) == 0; + return suffix_size <= value.size() && wcsncasecmp(value.c_str() + (value.size() - suffix_size), + proposed_suffix.c_str(), suffix_size) == 0; } /// Returns true if seq, represented as a subsequence, is contained within string. @@ -2061,9 +2065,7 @@ void setup_fork_guards() { initial_pid = getpid(); } -void save_term_foreground_process_group() { - initial_fg_process_group = tcgetpgrp(STDIN_FILENO); -} +void save_term_foreground_process_group() { initial_fg_process_group = tcgetpgrp(STDIN_FILENO); } void restore_term_foreground_process_group() { if (initial_fg_process_group == -1) return; diff --git a/src/common.h b/src/common.h index d08ca7556..f3832bb8b 100644 --- a/src/common.h +++ b/src/common.h @@ -22,9 +22,9 @@ #include #include #include +#include #include #include -#include #include #include "fallback.h" // IWYU pragma: keep @@ -187,7 +187,8 @@ extern struct termios shell_modes; /// The character to use where the text has been truncated. Is an ellipsis on unicode system and a $ /// on other systems. extern wchar_t ellipsis_char; -/// The character or string to use where text has been truncated (ellipsis if possible, otherwise ...) +/// The character or string to use where text has been truncated (ellipsis if possible, otherwise +/// ...) extern const wchar_t *ellipsis_str; /// Character representing an omitted newline at the end of text. @@ -328,7 +329,8 @@ bool string_prefixes_string(const wchar_t *proposed_prefix, const wchar_t *value /// Test if a string is a suffix of another. bool string_suffixes_string(const wcstring &proposed_suffix, const wcstring &value); bool string_suffixes_string(const wchar_t *proposed_suffix, const wcstring &value); -bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix, const wcstring &value); +bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix, + const wcstring &value); /// Test if a string prefixes another without regard to case. Returns true if a is a prefix of b. bool string_prefixes_string_case_insensitive(const wcstring &proposed_prefix, @@ -534,7 +536,7 @@ class null_terminated_array_t { void operator=(null_terminated_array_t rhs); null_terminated_array_t(const null_terminated_array_t &); - typedef std::vector > string_list_t; + typedef std::vector> string_list_t; size_t size() const { size_t len = 0; @@ -882,7 +884,6 @@ struct enum_map { const wchar_t *const str; }; - /// Given a string return the matching enum. Return the sentinal enum if no match is made. The map /// must be sorted by the `str` member. A binary search is twice as fast as a linear search with 16 /// elements in the map. @@ -916,13 +917,13 @@ static const wchar_t *enum_to_str(T enum_val, const enum_map map[]) { return NULL; }; -template +template using tuple_list = std::vector>; -//Given a container mapping one X to many Y, return a list of {X,Y} -template +// Given a container mapping one X to many Y, return a list of {X,Y} +template inline tuple_list flatten(const std::unordered_map> &list) { - tuple_list results(list.size() * 1.5); //just a guess as to the initial size + tuple_list results(list.size() * 1.5); // just a guess as to the initial size for (auto &kv : list) { for (auto &v : kv.second) { results.emplace_back(std::make_tuple(kv.first, v)); @@ -1004,7 +1005,7 @@ struct hash { return hasher((wcstring)w); } }; -} +} // namespace std #endif #endif diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index b2dfc7185..43dbdb753 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -4465,7 +4465,7 @@ static void test_string() { {{L"string", L"trim", L"-c", L"\\/", L"a/"}, STATUS_CMD_OK, L"a\n"}, {{L"string", L"trim", L"-c", L"\\/", L"\\a/"}, STATUS_CMD_OK, L"a\n"}, {{L"string", L"trim", L"-c", L"", L".a."}, STATUS_CMD_ERROR, L".a.\n"}}; - + for (const auto &t : string_tests) { run_one_string_test(t.argv, t.expected_rc, t.expected_out); }