Fix string

This commit is contained in:
ridiculousfish
2018-09-27 22:22:55 -04:00
parent cc99e8d510
commit ae54b66799
4 changed files with 30 additions and 20 deletions

View File

@@ -906,14 +906,19 @@ class literal_replacer_t : public string_replacer_t {
bool replace_matches(const wcstring &arg) override;
};
static wcstring interpret_escapes(const wcstring &arg) {
static maybe_t<wcstring> interpret_escapes(const wcstring &arg) {
wcstring result;
result.reserve(arg.size());
const wchar_t *cursor = arg.c_str();
const wchar_t *end = cursor + arg.size();
while (cursor < end) {
if (*cursor == L'\\') {
cursor += read_unquoted_escape(cursor, &result, true, false);
if (auto escape_len = read_unquoted_escape(cursor, &result, true, false)) {
cursor += *escape_len;
} else {
// Invalid escape.
return none();
}
} else {
result.push_back(*cursor);
cursor++;
@@ -924,7 +929,7 @@ static wcstring interpret_escapes(const wcstring &arg) {
class regex_replacer_t : public string_replacer_t {
compiled_regex_t regex;
wcstring replacement;
maybe_t<wcstring> replacement;
public:
regex_replacer_t(const wchar_t *argv0, const wcstring &pattern, const wcstring &replacement_,
@@ -974,6 +979,7 @@ bool literal_replacer_t::replace_matches(const wcstring &arg) {
/// indicates an unrecoverable error.
bool regex_replacer_t::replace_matches(const wcstring &arg) {
if (!regex.code) return false; // pcre2_compile() failed
if (!replacement) return false; // replacement was an invalid string
uint32_t options = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | PCRE2_SUBSTITUTE_EXTENDED |
(opts.all ? PCRE2_SUBSTITUTE_GLOBAL : 0);
@@ -991,7 +997,7 @@ bool regex_replacer_t::replace_matches(const wcstring &arg) {
0, // start offset
options, regex.match,
0, // match context
PCRE2_SPTR(replacement.c_str()), replacement.length(),
PCRE2_SPTR(replacement->c_str()), replacement->length(),
(PCRE2_UCHAR *)output, &outlen);
if (pcre2_rc != PCRE2_ERROR_NOMEMORY || bufsize >= outlen) {

View File

@@ -1161,11 +1161,9 @@ static wint_t string_last_char(const wcstring &str) {
/// Given a null terminated string starting with a backslash, read the escape as if it is unquoted,
/// appending to result. Return the number of characters consumed, or 0 on error.
size_t read_unquoted_escape(const wchar_t *input, wcstring *result, bool allow_incomplete,
bool unescape_special) {
if (input[0] != L'\\') {
return 0; // not an escape
}
maybe_t<size_t> read_unquoted_escape(const wchar_t *input, wcstring *result, bool allow_incomplete,
bool unescape_special) {
assert(input[0] == L'\\' && "Not an escape");
// Here's the character we'll ultimately append, or NOT_A_WCHAR for none. Note that L'\0' is a
// valid thing to append.
@@ -1328,7 +1326,9 @@ size_t read_unquoted_escape(const wchar_t *input, wcstring *result, bool allow_i
assert((wint_t)result_char == result_char_or_none);
result->push_back(result_char);
}
return errored ? 0 : in_pos;
if (errored) return none();
return in_pos;
}
/// Returns the unescaped version of input_str into output_str (by reference). Returns true if
@@ -1357,16 +1357,16 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in
case L'\\': {
// Backslashes (escapes) are complicated and may result in errors, or appending
// INTERNAL_SEPARATORs, so we have to handle them specially.
size_t escape_chars = read_unquoted_escape(input + input_position, &result,
allow_incomplete, unescape_special);
if (escape_chars == 0) {
// A 0 return indicates an error.
auto escape_chars = read_unquoted_escape(input + input_position, &result,
allow_incomplete, unescape_special);
if (!escape_chars) {
// A none() return indicates an error.
errored = true;
} else {
// Skip over the characters we read, minus one because the outer loop will
// increment it.
assert(escape_chars > 0);
input_position += escape_chars - 1;
assert(*escape_chars > 0);
input_position += *escape_chars - 1;
}
// We've already appended, don't append anything else.
to_append_or_none = NOT_A_WCHAR;

View File

@@ -28,7 +28,8 @@
#include <vector>
#include "fallback.h" // IWYU pragma: keep
#include "signal.h" // IWYU pragma: keep
#include "maybe.h"
#include "signal.h" // IWYU pragma: keep
// Define a symbol we can use elsewhere in our code to determine if we're being built on MS Windows
// under Cygwin.
@@ -771,9 +772,9 @@ wcstring debug_escape(const wcstring &in);
/// defined in a private use area of Unicode. This assumes wchar_t is a unicode character set.
/// Given a null terminated string starting with a backslash, read the escape as if it is unquoted,
/// appending to result. Return the number of characters consumed, or 0 on error.
size_t read_unquoted_escape(const wchar_t *input, wcstring *result, bool allow_incomplete,
bool unescape_special);
/// appending to result. Return the number of characters consumed, or none() on error.
maybe_t<size_t> read_unquoted_escape(const wchar_t *input, wcstring *result, bool allow_incomplete,
bool unescape_special);
/// Unescapes a string in-place. A true result indicates the string was unescaped, a false result
/// indicates the string was unmodified.

View File

@@ -4180,6 +4180,7 @@ static void run_one_string_test(const wchar_t *const *argv, int expected_rc,
}
static void test_string() {
say(L"Testing builtin_string");
const struct string_test {
const wchar_t *argv[15];
int expected_rc;
@@ -4369,6 +4370,7 @@ static void test_string() {
{{L"string", L"replace", L"-r", L"a", L"$1", L"a", 0}, STATUS_INVALID_ARGS, L""},
{{L"string", L"replace", L"-r", L"(a)", L"$2", L"a", 0}, STATUS_INVALID_ARGS, L""},
{{L"string", L"replace", L"-r", L"*", L".", L"a", 0}, STATUS_INVALID_ARGS, L""},
{{L"string", L"replace", L"-ra", L"x", L"\\c", 0}, STATUS_CMD_ERROR, L""},
{{L"string", L"replace", L"-r", L"^(.)", L"\t$1", L"abc", L"x", 0},
STATUS_CMD_OK,
L"\tabc\n\tx\n"},
@@ -4463,6 +4465,7 @@ static void test_string() {
{{L"string", L"trim", L"-c", L"\\/", L"a/"}, STATUS_CMD_OK, L"a\n"},
{{L"string", L"trim", L"-c", L"\\/", L"\\a/"}, STATUS_CMD_OK, L"a\n"},
{{L"string", L"trim", L"-c", L"", L".a."}, STATUS_CMD_ERROR, L".a.\n"}};
for (const auto &t : string_tests) {
run_one_string_test(t.argv, t.expected_rc, t.expected_out);
}