mirror of
https://github.com/fish-shell/fish-shell.git
synced 2026-05-23 13:11:15 -03:00
Adopt the new re in builtin_string
This switches builtin_string from using PCRE2 directly, to using the new re component. This simplifies some code and removes redundancy. No user-visible behavior change expected here.
This commit is contained in:
@@ -1,10 +1,6 @@
|
||||
// Implementation of the string builtin.
|
||||
#include "config.h" // IWYU pragma: keep
|
||||
|
||||
#define PCRE2_CODE_UNIT_WIDTH WCHAR_T_BITS
|
||||
#ifdef _WIN32
|
||||
#define PCRE2_STATIC
|
||||
#endif
|
||||
#include <algorithm>
|
||||
#include <cerrno>
|
||||
#include <climits>
|
||||
@@ -29,12 +25,12 @@
|
||||
#include "../future_feature_flags.h"
|
||||
#include "../parse_util.h"
|
||||
#include "../parser.h"
|
||||
#include "../re.h"
|
||||
#include "../screen.h"
|
||||
#include "../wcstringutil.h"
|
||||
#include "../wgetopt.h"
|
||||
#include "../wildcard.h"
|
||||
#include "../wutil.h" // IWYU pragma: keep
|
||||
#include "pcre2.h"
|
||||
|
||||
// How many bytes we read() at once.
|
||||
// Bash uses 128 here, so we do too (see READ_CHUNK_SIZE).
|
||||
@@ -852,18 +848,15 @@ namespace {
|
||||
class string_matcher_t {
|
||||
protected:
|
||||
const options_t opts;
|
||||
io_streams_t &streams;
|
||||
int total_matched{0};
|
||||
|
||||
public:
|
||||
string_matcher_t(options_t opts_, io_streams_t &streams_)
|
||||
: opts(std::move(opts_)), streams(streams_) {}
|
||||
explicit string_matcher_t(const options_t &opts_) : opts(opts_) {}
|
||||
|
||||
virtual ~string_matcher_t() = default;
|
||||
virtual bool report_matches(const wcstring &arg) = 0;
|
||||
virtual bool report_matches(const wcstring &arg, io_streams_t &streams) = 0;
|
||||
int match_count() const { return total_matched; }
|
||||
|
||||
virtual bool is_valid() const = 0;
|
||||
virtual void clear_capture_vars() {}
|
||||
};
|
||||
|
||||
@@ -872,9 +865,8 @@ class wildcard_matcher_t final : public string_matcher_t {
|
||||
wcstring wcpattern;
|
||||
|
||||
public:
|
||||
wildcard_matcher_t(const wchar_t * /*argv0*/, const wcstring &pattern, const options_t &opts,
|
||||
io_streams_t &streams)
|
||||
: string_matcher_t(opts, streams), wcpattern(parse_util_unescape_wildcards(pattern)) {
|
||||
wildcard_matcher_t(const wcstring &pattern, const options_t &opts)
|
||||
: string_matcher_t(opts), wcpattern(parse_util_unescape_wildcards(pattern)) {
|
||||
if (opts.ignore_case) {
|
||||
wcpattern = wcstolower(std::move(wcpattern));
|
||||
}
|
||||
@@ -891,7 +883,7 @@ class wildcard_matcher_t final : public string_matcher_t {
|
||||
|
||||
~wildcard_matcher_t() override = default;
|
||||
|
||||
bool report_matches(const wcstring &arg) override {
|
||||
bool report_matches(const wcstring &arg, io_streams_t &streams) override {
|
||||
// Note: --all is a no-op for glob matching since the pattern is always matched
|
||||
// against the entire argument.
|
||||
bool match;
|
||||
@@ -915,141 +907,53 @@ class wildcard_matcher_t final : public string_matcher_t {
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool is_valid() const override { return true; }
|
||||
};
|
||||
|
||||
static wcstring pcre2_strerror(int err_code) {
|
||||
wchar_t buf[128];
|
||||
pcre2_get_error_message(err_code, reinterpret_cast<PCRE2_UCHAR *>(buf),
|
||||
sizeof(buf) / sizeof(wchar_t));
|
||||
return buf;
|
||||
// Compile a regex, printing an error on failure.
|
||||
static maybe_t<re::regex_t> try_compile_regex(const wcstring &pattern, const options_t &opts,
|
||||
const wchar_t *cmd, io_streams_t &streams) {
|
||||
re::re_error_t error{};
|
||||
re::flags_t flags{};
|
||||
flags.icase = opts.ignore_case;
|
||||
auto re = re::regex_t::try_compile(pattern, flags, &error);
|
||||
if (!re) {
|
||||
string_error(streams, _(L"%ls: Regular expression compile error: %ls\n"), cmd,
|
||||
error.message().c_str());
|
||||
string_error(streams, L"%ls: %ls\n", cmd, pattern.c_str());
|
||||
string_error(streams, L"%ls: %*ls\n", cmd, static_cast<int>(error.offset), L"^");
|
||||
}
|
||||
return re;
|
||||
}
|
||||
|
||||
struct compiled_regex_t : noncopyable_t {
|
||||
pcre2_code *code{nullptr};
|
||||
pcre2_match_data *match{nullptr};
|
||||
|
||||
// The list of named capture groups.
|
||||
wcstring_list_t capture_group_names;
|
||||
|
||||
compiled_regex_t(const wchar_t *argv0, const wcstring &pattern, bool ignore_case,
|
||||
io_streams_t &streams) {
|
||||
// Disable some sequences that can lead to security problems.
|
||||
uint32_t options = PCRE2_NEVER_UTF;
|
||||
#if PCRE2_CODE_UNIT_WIDTH < 32
|
||||
options |= PCRE2_NEVER_BACKSLASH_C;
|
||||
#endif
|
||||
|
||||
int err_code = 0;
|
||||
PCRE2_SIZE err_offset = 0;
|
||||
|
||||
code = pcre2_compile(PCRE2_SPTR(pattern.c_str()), pattern.length(),
|
||||
options | (ignore_case ? PCRE2_CASELESS : 0), &err_code, &err_offset,
|
||||
nullptr);
|
||||
if (code == nullptr) {
|
||||
string_error(streams, _(L"%ls: Regular expression compile error: %ls\n"), argv0,
|
||||
pcre2_strerror(err_code).c_str());
|
||||
string_error(streams, L"%ls: %ls\n", argv0, pattern.c_str());
|
||||
string_error(streams, L"%ls: %*ls\n", argv0, static_cast<int>(err_offset), L"^");
|
||||
return;
|
||||
/// Check if a list of capture group names is valid for variables. If any are invalid then report an
|
||||
/// error to \p streams. \return true if all names are valid.
|
||||
static bool validate_capture_group_names(const wcstring_list_t &capture_group_names,
|
||||
io_streams_t &streams) {
|
||||
for (const wcstring &name : capture_group_names) {
|
||||
if (env_var_t::flags_for(name.c_str()) & env_var_t::flag_read_only) {
|
||||
streams.err.append_format(
|
||||
L"Modification of read-only variable \"%ls\" is not allowed\n", name.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
this->capture_group_names = get_capture_group_names(code);
|
||||
if (!validate_capture_group_names(streams)) {
|
||||
return;
|
||||
}
|
||||
|
||||
match = pcre2_match_data_create_from_pattern(code, nullptr);
|
||||
assert(match);
|
||||
this->valid_ = true;
|
||||
}
|
||||
|
||||
/// \return the list of capture group names from \p code.
|
||||
static wcstring_list_t get_capture_group_names(const pcre2_code *code) {
|
||||
PCRE2_SPTR name_table;
|
||||
uint32_t name_entry_size;
|
||||
uint32_t name_count;
|
||||
|
||||
pcre2_pattern_info(code, PCRE2_INFO_NAMETABLE, &name_table);
|
||||
pcre2_pattern_info(code, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size);
|
||||
pcre2_pattern_info(code, PCRE2_INFO_NAMECOUNT, &name_count);
|
||||
|
||||
struct name_table_entry_t {
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
uint8_t match_index_msb;
|
||||
uint8_t match_index_lsb;
|
||||
#if CHAR_BIT == PCRE2_CODE_UNIT_WIDTH
|
||||
char name[];
|
||||
#else
|
||||
char8_t name[];
|
||||
#endif
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
uint16_t match_index;
|
||||
#if WCHAR_T_BITS == PCRE2_CODE_UNIT_WIDTH
|
||||
wchar_t name[];
|
||||
#else
|
||||
char16_t name[];
|
||||
#endif
|
||||
#else
|
||||
uint32_t match_index;
|
||||
#if WCHAR_T_BITS == PCRE2_CODE_UNIT_WIDTH
|
||||
wchar_t name[];
|
||||
#else
|
||||
char32_t name[];
|
||||
#endif // WCHAR_T_BITS
|
||||
#endif // PCRE2_CODE_UNIT_WIDTH
|
||||
};
|
||||
|
||||
const auto *names = reinterpret_cast<const name_table_entry_t *>(name_table);
|
||||
wcstring_list_t result;
|
||||
result.reserve(name_count);
|
||||
for (uint32_t i = 0; i < name_count; ++i) {
|
||||
const auto &name_entry = names[i * name_entry_size];
|
||||
result.emplace_back(name_entry.name);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Check if our capture group names are valid. If any are invalid then report an error to \p
|
||||
/// streams. \return true if all names are valid.
|
||||
bool validate_capture_group_names(io_streams_t &streams) {
|
||||
for (const wcstring &name : this->capture_group_names) {
|
||||
if (env_var_t::flags_for(name.c_str()) & env_var_t::flag_read_only) {
|
||||
// Modification of read-only variables is not allowed
|
||||
streams.err.append_format(
|
||||
L"Modification of read-only variable \"%ls\" is not allowed\n", name.c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
~compiled_regex_t() {
|
||||
pcre2_match_data_free(match);
|
||||
pcre2_code_free(code);
|
||||
}
|
||||
|
||||
bool is_valid() const { return this->valid_; }
|
||||
|
||||
private:
|
||||
bool valid_{false};
|
||||
};
|
||||
return true;
|
||||
}
|
||||
|
||||
class pcre2_matcher_t final : public string_matcher_t {
|
||||
const wchar_t *argv0;
|
||||
compiled_regex_t regex;
|
||||
parser_t &parser;
|
||||
using regex_t = re::regex_t;
|
||||
using match_range_t = re::match_range_t;
|
||||
regex_t regex;
|
||||
env_stack_t &vars;
|
||||
bool imported_vars = false;
|
||||
|
||||
enum class match_result_t {
|
||||
pcre2_error = -1,
|
||||
no_match = 0,
|
||||
match = 1,
|
||||
};
|
||||
|
||||
match_result_t report_match(const wcstring &arg, int pcre2_rc) {
|
||||
if (pcre2_rc == PCRE2_ERROR_NOMATCH) {
|
||||
match_result_t report_match(const wcstring &arg, const re::match_data_t &md,
|
||||
maybe_t<match_range_t> mrange, io_streams_t &streams) const {
|
||||
if (!mrange.has_value()) {
|
||||
if (opts.invert_match && !opts.quiet) {
|
||||
if (opts.index) {
|
||||
streams.out.append_format(L"1 %lu\n", arg.length());
|
||||
@@ -1060,14 +964,6 @@ class pcre2_matcher_t final : public string_matcher_t {
|
||||
}
|
||||
|
||||
return opts.invert_match ? match_result_t::match : match_result_t::no_match;
|
||||
} else if (pcre2_rc < 0) {
|
||||
string_error(streams, _(L"%ls: Regular expression match error: %ls\n"), argv0,
|
||||
pcre2_strerror(pcre2_rc).c_str());
|
||||
return match_result_t::pcre2_error;
|
||||
} else if (pcre2_rc == 0) {
|
||||
// The output vector wasn't big enough. Should not happen.
|
||||
string_error(streams, _(L"%ls: Regular expression internal error\n"), argv0);
|
||||
return match_result_t::pcre2_error;
|
||||
} else if (opts.invert_match) {
|
||||
return match_result_t::no_match;
|
||||
}
|
||||
@@ -1077,18 +973,15 @@ class pcre2_matcher_t final : public string_matcher_t {
|
||||
streams.out.push_back(L'\n');
|
||||
}
|
||||
|
||||
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(regex.match);
|
||||
// If we have groups-only, we skip the first match, which is the full one.
|
||||
for (int j = (opts.entire || opts.groups_only ? 1 : 0); j < pcre2_rc; j++) {
|
||||
PCRE2_SIZE begin = ovector[2 * j];
|
||||
PCRE2_SIZE end = ovector[2 * j + 1];
|
||||
|
||||
if (begin != PCRE2_UNSET && end != PCRE2_UNSET && !opts.quiet) {
|
||||
size_t group_count = md.matched_capture_group_count();
|
||||
for (size_t j = (opts.entire || opts.groups_only ? 1 : 0); j < group_count; j++) {
|
||||
maybe_t<match_range_t> cg = this->regex.group(md, j);
|
||||
if (cg.has_value() && !opts.quiet) {
|
||||
if (opts.index) {
|
||||
streams.out.append_format(L"%lu %lu", (begin + 1), (end - begin));
|
||||
} else if (end > begin) {
|
||||
// May have end < begin if \K is used.
|
||||
streams.out.append(arg.substr(begin, end - begin));
|
||||
streams.out.append_format(L"%lu %lu", cg->begin + 1, cg->end - cg->begin);
|
||||
} else {
|
||||
streams.out.append(arg.substr(cg->begin, cg->end - cg->begin));
|
||||
}
|
||||
streams.out.push_back(L'\n');
|
||||
}
|
||||
@@ -1099,62 +992,37 @@ class pcre2_matcher_t final : public string_matcher_t {
|
||||
|
||||
class regex_importer_t {
|
||||
private:
|
||||
// map from group name to its matched substrings.
|
||||
std::map<wcstring, wcstring_list_t> matches_;
|
||||
env_stack_t &vars_;
|
||||
const wcstring &haystack_;
|
||||
const compiled_regex_t ®ex_;
|
||||
const re::regex_t ®ex_;
|
||||
const bool all_flag_;
|
||||
bool do_import_{false};
|
||||
|
||||
public:
|
||||
regex_importer_t(env_stack_t &vars, const wcstring &haystack, const compiled_regex_t ®ex,
|
||||
bool all_flag)
|
||||
: vars_(vars), haystack_(haystack), regex_(regex), all_flag_(all_flag) {
|
||||
for (const wcstring &name : regex_.capture_group_names) {
|
||||
regex_importer_t(env_stack_t &vars, const re::regex_t ®ex, bool all_flag)
|
||||
: vars_(vars), regex_(regex), all_flag_(all_flag) {
|
||||
for (const wcstring &name : regex_.capture_group_names()) {
|
||||
matches_.emplace(name, wcstring_list_t{});
|
||||
}
|
||||
}
|
||||
|
||||
/// This member function should be called each time a match is found
|
||||
void import_vars() {
|
||||
void import_vars(const re::match_data_t &md, const wcstring &subject) {
|
||||
do_import_ = true;
|
||||
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(regex_.match);
|
||||
for (auto &kv : matches_) {
|
||||
const auto &name = kv.first;
|
||||
const wcstring &name = kv.first;
|
||||
wcstring_list_t &vals = kv.second;
|
||||
|
||||
// A named group may actually correspond to multiple group numbers, each of which
|
||||
// might have to be enumerated.
|
||||
PCRE2_SPTR first = nullptr;
|
||||
PCRE2_SPTR last = nullptr;
|
||||
int entry_size = pcre2_substring_nametable_scan(
|
||||
regex_.code, (PCRE2_SPTR)(name.c_str()), &first, &last);
|
||||
if (entry_size <= 0) {
|
||||
FLOGF(warning, L"PCRE2 failure retrieving named matches");
|
||||
continue;
|
||||
}
|
||||
|
||||
bool value_found = false;
|
||||
for (const auto *group_ptr = first; group_ptr <= last; group_ptr += entry_size) {
|
||||
int group_num = group_ptr[0];
|
||||
|
||||
PCRE2_SIZE *capture = ovector + (2 * group_num);
|
||||
PCRE2_SIZE begin = capture[0];
|
||||
PCRE2_SIZE end = capture[1];
|
||||
|
||||
if (begin != PCRE2_UNSET && end != PCRE2_UNSET && end >= begin) {
|
||||
vals.push_back(haystack_.substr(begin, end - begin));
|
||||
value_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If there are multiple named groups and --all was used, we need to ensure that the
|
||||
// indexes are always in sync between the variables. If an optional named group
|
||||
// didn't match but its brethren did, we need to make sure to put *something* in the
|
||||
// resulting array, and unfortunately fish doesn't support empty/null members so
|
||||
// we're going to have to use an empty string as the sentinel value.
|
||||
if (!value_found && all_flag_) {
|
||||
// If there are multiple named groups and --all was used, we need to ensure that
|
||||
// the indexes are always in sync between the variables. If an optional named
|
||||
// group didn't match but its brethren did, we need to make sure to put
|
||||
// *something* in the resulting array, and unfortunately fish doesn't support
|
||||
// empty/null members so we're going to have to use an empty string as the
|
||||
// sentinel value.
|
||||
if (maybe_t<wcstring> capture = regex_.substring_for_group(md, name, subject)) {
|
||||
vals.push_back(capture.acquire());
|
||||
} else if (all_flag_) {
|
||||
vals.emplace_back();
|
||||
}
|
||||
}
|
||||
@@ -1171,37 +1039,26 @@ class pcre2_matcher_t final : public string_matcher_t {
|
||||
};
|
||||
|
||||
public:
|
||||
pcre2_matcher_t(const wchar_t *argv0_, const wcstring &pattern, const options_t &opts,
|
||||
io_streams_t &streams, parser_t &parser_)
|
||||
: string_matcher_t(opts, streams),
|
||||
argv0(argv0_),
|
||||
regex(argv0_, pattern, opts.ignore_case, streams),
|
||||
parser(parser_) {}
|
||||
pcre2_matcher_t(regex_t regex, const options_t &opts, env_stack_t &vars)
|
||||
: string_matcher_t(opts), regex(std::move(regex)), vars(vars) {}
|
||||
|
||||
~pcre2_matcher_t() override = default;
|
||||
|
||||
bool report_matches(const wcstring &arg) override {
|
||||
// A return value of true means all is well (even if no matches were found), false indicates
|
||||
// an unrecoverable error.
|
||||
assert(regex.code && "report_matches should only be called if the regex was valid");
|
||||
bool report_matches(const wcstring &arg, io_streams_t &streams) override {
|
||||
using namespace re;
|
||||
regex_importer_t var_importer(vars, this->regex, opts.all);
|
||||
|
||||
regex_importer_t var_importer(this->parser.vars(), arg, this->regex, opts.all);
|
||||
|
||||
// See pcre2demo.c for an explanation of this logic.
|
||||
PCRE2_SIZE arglen = arg.length();
|
||||
auto rc = report_match(arg, pcre2_match(regex.code, PCRE2_SPTR(arg.c_str()), arglen, 0, 0,
|
||||
regex.match, nullptr));
|
||||
match_data_t md = this->regex.prepare();
|
||||
auto rc = report_match(arg, md, this->regex.match(md, arg), streams);
|
||||
|
||||
// We only import variables for the *first matching argument*
|
||||
bool do_var_import = (rc == match_result_t::match && !imported_vars);
|
||||
if (do_var_import) {
|
||||
var_importer.import_vars();
|
||||
var_importer.import_vars(md, arg);
|
||||
imported_vars = true;
|
||||
}
|
||||
|
||||
switch (rc) {
|
||||
case match_result_t::pcre2_error:
|
||||
return false;
|
||||
case match_result_t::no_match:
|
||||
return true;
|
||||
case match_result_t::match:
|
||||
@@ -1211,31 +1068,12 @@ class pcre2_matcher_t final : public string_matcher_t {
|
||||
if (opts.invert_match) return true;
|
||||
|
||||
// Report any additional matches.
|
||||
for (auto *ovector = pcre2_get_ovector_pointer(regex.match); opts.all; total_matched++) {
|
||||
uint32_t options = 0;
|
||||
PCRE2_SIZE offset = ovector[1]; // start at end of previous match
|
||||
|
||||
if (ovector[0] == ovector[1]) {
|
||||
if (ovector[0] == arglen) break;
|
||||
options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
|
||||
}
|
||||
|
||||
rc = report_match(arg, pcre2_match(regex.code, PCRE2_SPTR(arg.c_str()), arglen, offset,
|
||||
options, regex.match, nullptr));
|
||||
|
||||
if (rc == match_result_t::pcre2_error) {
|
||||
// This shouldn't happen as we've already validated the regex above
|
||||
return false;
|
||||
}
|
||||
|
||||
// Call import_vars() before modifying the ovector
|
||||
if (rc == match_result_t::match && do_var_import) {
|
||||
var_importer.import_vars();
|
||||
}
|
||||
|
||||
if (rc == match_result_t::no_match) {
|
||||
if (options == 0 /* all matches found now */) break;
|
||||
ovector[1] = offset + 1;
|
||||
if (opts.all) {
|
||||
while (auto mr = this->regex.match(md, arg)) {
|
||||
auto rc = this->report_match(arg, md, mr, streams);
|
||||
if (rc == match_result_t::match && do_var_import) {
|
||||
var_importer.import_vars(md, arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
@@ -1244,12 +1082,10 @@ class pcre2_matcher_t final : public string_matcher_t {
|
||||
/// Override to clear our capture variables if we had no match.
|
||||
void clear_capture_vars() override {
|
||||
assert(!imported_vars && "Should not already have imported variables");
|
||||
for (const wcstring &name : regex.capture_group_names) {
|
||||
parser.vars().set_empty(name, ENV_DEFAULT);
|
||||
for (const wcstring &name : regex.capture_group_names()) {
|
||||
vars.set_empty(name, ENV_DEFAULT);
|
||||
}
|
||||
}
|
||||
|
||||
bool is_valid() const override { return regex.is_valid(); }
|
||||
};
|
||||
} // namespace
|
||||
|
||||
@@ -1289,19 +1125,24 @@ static int string_match(parser_t &parser, io_streams_t &streams, int argc, const
|
||||
}
|
||||
|
||||
std::unique_ptr<string_matcher_t> matcher;
|
||||
if (opts.regex) {
|
||||
matcher = make_unique<pcre2_matcher_t>(cmd, pattern, opts, streams, parser);
|
||||
if (!opts.regex) {
|
||||
// Globs cannot fail.
|
||||
matcher = make_unique<wildcard_matcher_t>(pattern, opts);
|
||||
} else {
|
||||
matcher = make_unique<wildcard_matcher_t>(cmd, pattern, opts, streams);
|
||||
}
|
||||
if (!matcher->is_valid()) {
|
||||
// An error will have been printed by the constructor.
|
||||
return STATUS_INVALID_ARGS;
|
||||
// Compile the pattern as regex and validate capture group names as variables; both may
|
||||
// fail. Note both try_compile_regex and validate_capture_group_names print an error on
|
||||
// failure.
|
||||
auto re = try_compile_regex(pattern, opts, cmd, streams);
|
||||
if (!re || !validate_capture_group_names(re->capture_group_names(), streams)) {
|
||||
return STATUS_INVALID_ARGS;
|
||||
}
|
||||
matcher = make_unique<pcre2_matcher_t>(re.acquire(), opts, parser.vars());
|
||||
}
|
||||
|
||||
assert(matcher && "Should have a matcher");
|
||||
arg_iterator_t aiter(argv, optind, streams);
|
||||
while (const wcstring *arg = aiter.nextstr()) {
|
||||
if (!matcher->report_matches(*arg)) {
|
||||
if (!matcher->report_matches(*arg, streams)) {
|
||||
return STATUS_INVALID_ARGS;
|
||||
}
|
||||
if (opts.quiet && matcher->match_count() > 0) return STATUS_CMD_OK;
|
||||
@@ -1428,14 +1269,13 @@ static maybe_t<wcstring> interpret_escapes(const wcstring &arg) {
|
||||
}
|
||||
|
||||
class regex_replacer_t : public string_replacer_t {
|
||||
compiled_regex_t regex;
|
||||
re::regex_t regex;
|
||||
maybe_t<wcstring> replacement;
|
||||
|
||||
public:
|
||||
regex_replacer_t(const wchar_t *argv0, const wcstring &pattern, const wcstring &replacement_,
|
||||
regex_replacer_t(const wchar_t *argv0, re::regex_t regex, const wcstring &replacement_,
|
||||
const options_t &opts, io_streams_t &streams)
|
||||
: string_replacer_t(argv0, opts, streams),
|
||||
regex(argv0, pattern, opts.ignore_case, streams) {
|
||||
: string_replacer_t(argv0, opts, streams), regex(std::move(regex)) {
|
||||
if (feature_test(features_t::string_replace_backslash)) {
|
||||
replacement = replacement_;
|
||||
} else {
|
||||
@@ -1486,62 +1326,32 @@ bool literal_replacer_t::replace_matches(const wcstring &arg, bool want_newline)
|
||||
/// A return value of true means all is well (even if no replacements were performed), false
|
||||
/// indicates an unrecoverable error.
|
||||
bool regex_replacer_t::replace_matches(const wcstring &arg, bool want_newline) {
|
||||
if (!regex.code) return false; // pcre2_compile() failed
|
||||
using namespace re;
|
||||
if (!replacement) return false; // replacement was an invalid string
|
||||
|
||||
// clang-format off
|
||||
// SUBSTITUTE_OVERFLOW_LENGTH causes pcre to return the needed buffer length if the passed one is to small
|
||||
// SUBSTITUTE_EXTENDED changes how substitution expressions are interpreted (`$` as the special character)
|
||||
// SUBSTITUTE_UNSET_EMPTY treats unmatched capturing groups as empty instead of erroring.
|
||||
// SUBSTITUTE_GLOBAL means more than one substitution happens.
|
||||
// clang-format on
|
||||
uint32_t options = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | PCRE2_SUBSTITUTE_EXTENDED |
|
||||
PCRE2_SUBSTITUTE_UNSET_EMPTY | (opts.all ? PCRE2_SUBSTITUTE_GLOBAL : 0);
|
||||
size_t arglen = arg.length();
|
||||
PCRE2_SIZE bufsize = (arglen == 0) ? 16 : 2 * arglen;
|
||||
auto output = static_cast<wchar_t *>(malloc(sizeof(wchar_t) * bufsize));
|
||||
int pcre2_rc;
|
||||
PCRE2_SIZE outlen = bufsize;
|
||||
sub_flags_t sflags{};
|
||||
sflags.global = opts.all;
|
||||
sflags.extended = true;
|
||||
|
||||
bool done = false;
|
||||
while (!done) {
|
||||
assert(output);
|
||||
re_error_t error{};
|
||||
int repl_count{};
|
||||
maybe_t<wcstring> result =
|
||||
this->regex.substitute(arg, *replacement, sflags, 0, &error, &repl_count);
|
||||
|
||||
pcre2_rc = pcre2_substitute(regex.code, PCRE2_SPTR(arg.c_str()), arglen,
|
||||
0, // start offset
|
||||
options, regex.match,
|
||||
nullptr, // match_data
|
||||
PCRE2_SPTR(replacement->c_str()), replacement->length(),
|
||||
reinterpret_cast<PCRE2_UCHAR *>(output), &outlen);
|
||||
|
||||
if (pcre2_rc != PCRE2_ERROR_NOMEMORY || bufsize >= outlen) {
|
||||
done = true;
|
||||
} else {
|
||||
bufsize = outlen;
|
||||
auto new_output = static_cast<wchar_t *>(realloc(output, sizeof(wchar_t) * bufsize));
|
||||
if (new_output) output = new_output;
|
||||
}
|
||||
}
|
||||
|
||||
bool rc = true;
|
||||
if (pcre2_rc < 0) {
|
||||
if (!result) {
|
||||
string_error(streams, _(L"%ls: Regular expression substitute error: %ls\n"), argv0,
|
||||
pcre2_strerror(pcre2_rc).c_str());
|
||||
rc = false;
|
||||
error.message().c_str());
|
||||
} else {
|
||||
wcstring outstr(output, outlen);
|
||||
bool replacement_occurred = pcre2_rc > 0;
|
||||
bool replacement_occurred = repl_count > 0;
|
||||
if (!opts.quiet && (!opts.filter || replacement_occurred)) {
|
||||
streams.out.append(outstr);
|
||||
streams.out.append(*result);
|
||||
if (want_newline) {
|
||||
streams.out.append(L'\n');
|
||||
}
|
||||
}
|
||||
total_replaced += pcre2_rc;
|
||||
total_replaced += repl_count;
|
||||
}
|
||||
|
||||
free(output);
|
||||
return rc;
|
||||
return result.has_value();
|
||||
}
|
||||
|
||||
static int string_replace(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) {
|
||||
@@ -1560,7 +1370,13 @@ static int string_replace(parser_t &parser, io_streams_t &streams, int argc, con
|
||||
|
||||
std::unique_ptr<string_replacer_t> replacer;
|
||||
if (opts.regex) {
|
||||
replacer = make_unique<regex_replacer_t>(argv[0], pattern, replacement, opts, streams);
|
||||
if (auto re = try_compile_regex(pattern, opts, argv[0], streams)) {
|
||||
replacer =
|
||||
make_unique<regex_replacer_t>(argv[0], re.acquire(), replacement, opts, streams);
|
||||
} else {
|
||||
// try_compile_regex prints an error.
|
||||
return STATUS_INVALID_ARGS;
|
||||
}
|
||||
} else {
|
||||
replacer = make_unique<literal_replacer_t>(argv[0], pattern, replacement, opts, streams);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user