mirror of
https://github.com/fish-shell/fish-shell.git
synced 2026-06-08 02:31:18 -03:00
Adopt the new re in builtin_string
This switches builtin_string from using PCRE2 directly, to using the new re component. This simplifies some code and removes redundancy. No user-visible behavior change expected here.
This commit is contained in:
@@ -1,10 +1,6 @@
|
|||||||
// Implementation of the string builtin.
|
// Implementation of the string builtin.
|
||||||
#include "config.h" // IWYU pragma: keep
|
#include "config.h" // IWYU pragma: keep
|
||||||
|
|
||||||
#define PCRE2_CODE_UNIT_WIDTH WCHAR_T_BITS
|
|
||||||
#ifdef _WIN32
|
|
||||||
#define PCRE2_STATIC
|
|
||||||
#endif
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cerrno>
|
#include <cerrno>
|
||||||
#include <climits>
|
#include <climits>
|
||||||
@@ -29,12 +25,12 @@
|
|||||||
#include "../future_feature_flags.h"
|
#include "../future_feature_flags.h"
|
||||||
#include "../parse_util.h"
|
#include "../parse_util.h"
|
||||||
#include "../parser.h"
|
#include "../parser.h"
|
||||||
|
#include "../re.h"
|
||||||
#include "../screen.h"
|
#include "../screen.h"
|
||||||
#include "../wcstringutil.h"
|
#include "../wcstringutil.h"
|
||||||
#include "../wgetopt.h"
|
#include "../wgetopt.h"
|
||||||
#include "../wildcard.h"
|
#include "../wildcard.h"
|
||||||
#include "../wutil.h" // IWYU pragma: keep
|
#include "../wutil.h" // IWYU pragma: keep
|
||||||
#include "pcre2.h"
|
|
||||||
|
|
||||||
// How many bytes we read() at once.
|
// How many bytes we read() at once.
|
||||||
// Bash uses 128 here, so we do too (see READ_CHUNK_SIZE).
|
// Bash uses 128 here, so we do too (see READ_CHUNK_SIZE).
|
||||||
@@ -852,18 +848,15 @@ namespace {
|
|||||||
class string_matcher_t {
|
class string_matcher_t {
|
||||||
protected:
|
protected:
|
||||||
const options_t opts;
|
const options_t opts;
|
||||||
io_streams_t &streams;
|
|
||||||
int total_matched{0};
|
int total_matched{0};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
string_matcher_t(options_t opts_, io_streams_t &streams_)
|
explicit string_matcher_t(const options_t &opts_) : opts(opts_) {}
|
||||||
: opts(std::move(opts_)), streams(streams_) {}
|
|
||||||
|
|
||||||
virtual ~string_matcher_t() = default;
|
virtual ~string_matcher_t() = default;
|
||||||
virtual bool report_matches(const wcstring &arg) = 0;
|
virtual bool report_matches(const wcstring &arg, io_streams_t &streams) = 0;
|
||||||
int match_count() const { return total_matched; }
|
int match_count() const { return total_matched; }
|
||||||
|
|
||||||
virtual bool is_valid() const = 0;
|
|
||||||
virtual void clear_capture_vars() {}
|
virtual void clear_capture_vars() {}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -872,9 +865,8 @@ class wildcard_matcher_t final : public string_matcher_t {
|
|||||||
wcstring wcpattern;
|
wcstring wcpattern;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
wildcard_matcher_t(const wchar_t * /*argv0*/, const wcstring &pattern, const options_t &opts,
|
wildcard_matcher_t(const wcstring &pattern, const options_t &opts)
|
||||||
io_streams_t &streams)
|
: string_matcher_t(opts), wcpattern(parse_util_unescape_wildcards(pattern)) {
|
||||||
: string_matcher_t(opts, streams), wcpattern(parse_util_unescape_wildcards(pattern)) {
|
|
||||||
if (opts.ignore_case) {
|
if (opts.ignore_case) {
|
||||||
wcpattern = wcstolower(std::move(wcpattern));
|
wcpattern = wcstolower(std::move(wcpattern));
|
||||||
}
|
}
|
||||||
@@ -891,7 +883,7 @@ class wildcard_matcher_t final : public string_matcher_t {
|
|||||||
|
|
||||||
~wildcard_matcher_t() override = default;
|
~wildcard_matcher_t() override = default;
|
||||||
|
|
||||||
bool report_matches(const wcstring &arg) override {
|
bool report_matches(const wcstring &arg, io_streams_t &streams) override {
|
||||||
// Note: --all is a no-op for glob matching since the pattern is always matched
|
// Note: --all is a no-op for glob matching since the pattern is always matched
|
||||||
// against the entire argument.
|
// against the entire argument.
|
||||||
bool match;
|
bool match;
|
||||||
@@ -915,141 +907,53 @@ class wildcard_matcher_t final : public string_matcher_t {
|
|||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_valid() const override { return true; }
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static wcstring pcre2_strerror(int err_code) {
|
// Compile a regex, printing an error on failure.
|
||||||
wchar_t buf[128];
|
static maybe_t<re::regex_t> try_compile_regex(const wcstring &pattern, const options_t &opts,
|
||||||
pcre2_get_error_message(err_code, reinterpret_cast<PCRE2_UCHAR *>(buf),
|
const wchar_t *cmd, io_streams_t &streams) {
|
||||||
sizeof(buf) / sizeof(wchar_t));
|
re::re_error_t error{};
|
||||||
return buf;
|
re::flags_t flags{};
|
||||||
|
flags.icase = opts.ignore_case;
|
||||||
|
auto re = re::regex_t::try_compile(pattern, flags, &error);
|
||||||
|
if (!re) {
|
||||||
|
string_error(streams, _(L"%ls: Regular expression compile error: %ls\n"), cmd,
|
||||||
|
error.message().c_str());
|
||||||
|
string_error(streams, L"%ls: %ls\n", cmd, pattern.c_str());
|
||||||
|
string_error(streams, L"%ls: %*ls\n", cmd, static_cast<int>(error.offset), L"^");
|
||||||
|
}
|
||||||
|
return re;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct compiled_regex_t : noncopyable_t {
|
/// Check if a list of capture group names is valid for variables. If any are invalid then report an
|
||||||
pcre2_code *code{nullptr};
|
/// error to \p streams. \return true if all names are valid.
|
||||||
pcre2_match_data *match{nullptr};
|
static bool validate_capture_group_names(const wcstring_list_t &capture_group_names,
|
||||||
|
io_streams_t &streams) {
|
||||||
// The list of named capture groups.
|
for (const wcstring &name : capture_group_names) {
|
||||||
wcstring_list_t capture_group_names;
|
if (env_var_t::flags_for(name.c_str()) & env_var_t::flag_read_only) {
|
||||||
|
streams.err.append_format(
|
||||||
compiled_regex_t(const wchar_t *argv0, const wcstring &pattern, bool ignore_case,
|
L"Modification of read-only variable \"%ls\" is not allowed\n", name.c_str());
|
||||||
io_streams_t &streams) {
|
return false;
|
||||||
// Disable some sequences that can lead to security problems.
|
|
||||||
uint32_t options = PCRE2_NEVER_UTF;
|
|
||||||
#if PCRE2_CODE_UNIT_WIDTH < 32
|
|
||||||
options |= PCRE2_NEVER_BACKSLASH_C;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int err_code = 0;
|
|
||||||
PCRE2_SIZE err_offset = 0;
|
|
||||||
|
|
||||||
code = pcre2_compile(PCRE2_SPTR(pattern.c_str()), pattern.length(),
|
|
||||||
options | (ignore_case ? PCRE2_CASELESS : 0), &err_code, &err_offset,
|
|
||||||
nullptr);
|
|
||||||
if (code == nullptr) {
|
|
||||||
string_error(streams, _(L"%ls: Regular expression compile error: %ls\n"), argv0,
|
|
||||||
pcre2_strerror(err_code).c_str());
|
|
||||||
string_error(streams, L"%ls: %ls\n", argv0, pattern.c_str());
|
|
||||||
string_error(streams, L"%ls: %*ls\n", argv0, static_cast<int>(err_offset), L"^");
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
this->capture_group_names = get_capture_group_names(code);
|
|
||||||
if (!validate_capture_group_names(streams)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
match = pcre2_match_data_create_from_pattern(code, nullptr);
|
|
||||||
assert(match);
|
|
||||||
this->valid_ = true;
|
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
/// \return the list of capture group names from \p code.
|
}
|
||||||
static wcstring_list_t get_capture_group_names(const pcre2_code *code) {
|
|
||||||
PCRE2_SPTR name_table;
|
|
||||||
uint32_t name_entry_size;
|
|
||||||
uint32_t name_count;
|
|
||||||
|
|
||||||
pcre2_pattern_info(code, PCRE2_INFO_NAMETABLE, &name_table);
|
|
||||||
pcre2_pattern_info(code, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size);
|
|
||||||
pcre2_pattern_info(code, PCRE2_INFO_NAMECOUNT, &name_count);
|
|
||||||
|
|
||||||
struct name_table_entry_t {
|
|
||||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
|
||||||
uint8_t match_index_msb;
|
|
||||||
uint8_t match_index_lsb;
|
|
||||||
#if CHAR_BIT == PCRE2_CODE_UNIT_WIDTH
|
|
||||||
char name[];
|
|
||||||
#else
|
|
||||||
char8_t name[];
|
|
||||||
#endif
|
|
||||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
|
||||||
uint16_t match_index;
|
|
||||||
#if WCHAR_T_BITS == PCRE2_CODE_UNIT_WIDTH
|
|
||||||
wchar_t name[];
|
|
||||||
#else
|
|
||||||
char16_t name[];
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
uint32_t match_index;
|
|
||||||
#if WCHAR_T_BITS == PCRE2_CODE_UNIT_WIDTH
|
|
||||||
wchar_t name[];
|
|
||||||
#else
|
|
||||||
char32_t name[];
|
|
||||||
#endif // WCHAR_T_BITS
|
|
||||||
#endif // PCRE2_CODE_UNIT_WIDTH
|
|
||||||
};
|
|
||||||
|
|
||||||
const auto *names = reinterpret_cast<const name_table_entry_t *>(name_table);
|
|
||||||
wcstring_list_t result;
|
|
||||||
result.reserve(name_count);
|
|
||||||
for (uint32_t i = 0; i < name_count; ++i) {
|
|
||||||
const auto &name_entry = names[i * name_entry_size];
|
|
||||||
result.emplace_back(name_entry.name);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check if our capture group names are valid. If any are invalid then report an error to \p
|
|
||||||
/// streams. \return true if all names are valid.
|
|
||||||
bool validate_capture_group_names(io_streams_t &streams) {
|
|
||||||
for (const wcstring &name : this->capture_group_names) {
|
|
||||||
if (env_var_t::flags_for(name.c_str()) & env_var_t::flag_read_only) {
|
|
||||||
// Modification of read-only variables is not allowed
|
|
||||||
streams.err.append_format(
|
|
||||||
L"Modification of read-only variable \"%ls\" is not allowed\n", name.c_str());
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
~compiled_regex_t() {
|
|
||||||
pcre2_match_data_free(match);
|
|
||||||
pcre2_code_free(code);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_valid() const { return this->valid_; }
|
|
||||||
|
|
||||||
private:
|
|
||||||
bool valid_{false};
|
|
||||||
};
|
|
||||||
|
|
||||||
class pcre2_matcher_t final : public string_matcher_t {
|
class pcre2_matcher_t final : public string_matcher_t {
|
||||||
const wchar_t *argv0;
|
using regex_t = re::regex_t;
|
||||||
compiled_regex_t regex;
|
using match_range_t = re::match_range_t;
|
||||||
parser_t &parser;
|
regex_t regex;
|
||||||
|
env_stack_t &vars;
|
||||||
bool imported_vars = false;
|
bool imported_vars = false;
|
||||||
|
|
||||||
enum class match_result_t {
|
enum class match_result_t {
|
||||||
pcre2_error = -1,
|
|
||||||
no_match = 0,
|
no_match = 0,
|
||||||
match = 1,
|
match = 1,
|
||||||
};
|
};
|
||||||
|
|
||||||
match_result_t report_match(const wcstring &arg, int pcre2_rc) {
|
match_result_t report_match(const wcstring &arg, const re::match_data_t &md,
|
||||||
if (pcre2_rc == PCRE2_ERROR_NOMATCH) {
|
maybe_t<match_range_t> mrange, io_streams_t &streams) const {
|
||||||
|
if (!mrange.has_value()) {
|
||||||
if (opts.invert_match && !opts.quiet) {
|
if (opts.invert_match && !opts.quiet) {
|
||||||
if (opts.index) {
|
if (opts.index) {
|
||||||
streams.out.append_format(L"1 %lu\n", arg.length());
|
streams.out.append_format(L"1 %lu\n", arg.length());
|
||||||
@@ -1060,14 +964,6 @@ class pcre2_matcher_t final : public string_matcher_t {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return opts.invert_match ? match_result_t::match : match_result_t::no_match;
|
return opts.invert_match ? match_result_t::match : match_result_t::no_match;
|
||||||
} else if (pcre2_rc < 0) {
|
|
||||||
string_error(streams, _(L"%ls: Regular expression match error: %ls\n"), argv0,
|
|
||||||
pcre2_strerror(pcre2_rc).c_str());
|
|
||||||
return match_result_t::pcre2_error;
|
|
||||||
} else if (pcre2_rc == 0) {
|
|
||||||
// The output vector wasn't big enough. Should not happen.
|
|
||||||
string_error(streams, _(L"%ls: Regular expression internal error\n"), argv0);
|
|
||||||
return match_result_t::pcre2_error;
|
|
||||||
} else if (opts.invert_match) {
|
} else if (opts.invert_match) {
|
||||||
return match_result_t::no_match;
|
return match_result_t::no_match;
|
||||||
}
|
}
|
||||||
@@ -1077,18 +973,15 @@ class pcre2_matcher_t final : public string_matcher_t {
|
|||||||
streams.out.push_back(L'\n');
|
streams.out.push_back(L'\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(regex.match);
|
|
||||||
// If we have groups-only, we skip the first match, which is the full one.
|
// If we have groups-only, we skip the first match, which is the full one.
|
||||||
for (int j = (opts.entire || opts.groups_only ? 1 : 0); j < pcre2_rc; j++) {
|
size_t group_count = md.matched_capture_group_count();
|
||||||
PCRE2_SIZE begin = ovector[2 * j];
|
for (size_t j = (opts.entire || opts.groups_only ? 1 : 0); j < group_count; j++) {
|
||||||
PCRE2_SIZE end = ovector[2 * j + 1];
|
maybe_t<match_range_t> cg = this->regex.group(md, j);
|
||||||
|
if (cg.has_value() && !opts.quiet) {
|
||||||
if (begin != PCRE2_UNSET && end != PCRE2_UNSET && !opts.quiet) {
|
|
||||||
if (opts.index) {
|
if (opts.index) {
|
||||||
streams.out.append_format(L"%lu %lu", (begin + 1), (end - begin));
|
streams.out.append_format(L"%lu %lu", cg->begin + 1, cg->end - cg->begin);
|
||||||
} else if (end > begin) {
|
} else {
|
||||||
// May have end < begin if \K is used.
|
streams.out.append(arg.substr(cg->begin, cg->end - cg->begin));
|
||||||
streams.out.append(arg.substr(begin, end - begin));
|
|
||||||
}
|
}
|
||||||
streams.out.push_back(L'\n');
|
streams.out.push_back(L'\n');
|
||||||
}
|
}
|
||||||
@@ -1099,62 +992,37 @@ class pcre2_matcher_t final : public string_matcher_t {
|
|||||||
|
|
||||||
class regex_importer_t {
|
class regex_importer_t {
|
||||||
private:
|
private:
|
||||||
|
// map from group name to its matched substrings.
|
||||||
std::map<wcstring, wcstring_list_t> matches_;
|
std::map<wcstring, wcstring_list_t> matches_;
|
||||||
env_stack_t &vars_;
|
env_stack_t &vars_;
|
||||||
const wcstring &haystack_;
|
const re::regex_t ®ex_;
|
||||||
const compiled_regex_t ®ex_;
|
|
||||||
const bool all_flag_;
|
const bool all_flag_;
|
||||||
bool do_import_{false};
|
bool do_import_{false};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
regex_importer_t(env_stack_t &vars, const wcstring &haystack, const compiled_regex_t ®ex,
|
regex_importer_t(env_stack_t &vars, const re::regex_t ®ex, bool all_flag)
|
||||||
bool all_flag)
|
: vars_(vars), regex_(regex), all_flag_(all_flag) {
|
||||||
: vars_(vars), haystack_(haystack), regex_(regex), all_flag_(all_flag) {
|
for (const wcstring &name : regex_.capture_group_names()) {
|
||||||
for (const wcstring &name : regex_.capture_group_names) {
|
|
||||||
matches_.emplace(name, wcstring_list_t{});
|
matches_.emplace(name, wcstring_list_t{});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This member function should be called each time a match is found
|
/// This member function should be called each time a match is found
|
||||||
void import_vars() {
|
void import_vars(const re::match_data_t &md, const wcstring &subject) {
|
||||||
do_import_ = true;
|
do_import_ = true;
|
||||||
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(regex_.match);
|
|
||||||
for (auto &kv : matches_) {
|
for (auto &kv : matches_) {
|
||||||
const auto &name = kv.first;
|
const wcstring &name = kv.first;
|
||||||
wcstring_list_t &vals = kv.second;
|
wcstring_list_t &vals = kv.second;
|
||||||
|
|
||||||
// A named group may actually correspond to multiple group numbers, each of which
|
// If there are multiple named groups and --all was used, we need to ensure that
|
||||||
// might have to be enumerated.
|
// the indexes are always in sync between the variables. If an optional named
|
||||||
PCRE2_SPTR first = nullptr;
|
// group didn't match but its brethren did, we need to make sure to put
|
||||||
PCRE2_SPTR last = nullptr;
|
// *something* in the resulting array, and unfortunately fish doesn't support
|
||||||
int entry_size = pcre2_substring_nametable_scan(
|
// empty/null members so we're going to have to use an empty string as the
|
||||||
regex_.code, (PCRE2_SPTR)(name.c_str()), &first, &last);
|
// sentinel value.
|
||||||
if (entry_size <= 0) {
|
if (maybe_t<wcstring> capture = regex_.substring_for_group(md, name, subject)) {
|
||||||
FLOGF(warning, L"PCRE2 failure retrieving named matches");
|
vals.push_back(capture.acquire());
|
||||||
continue;
|
} else if (all_flag_) {
|
||||||
}
|
|
||||||
|
|
||||||
bool value_found = false;
|
|
||||||
for (const auto *group_ptr = first; group_ptr <= last; group_ptr += entry_size) {
|
|
||||||
int group_num = group_ptr[0];
|
|
||||||
|
|
||||||
PCRE2_SIZE *capture = ovector + (2 * group_num);
|
|
||||||
PCRE2_SIZE begin = capture[0];
|
|
||||||
PCRE2_SIZE end = capture[1];
|
|
||||||
|
|
||||||
if (begin != PCRE2_UNSET && end != PCRE2_UNSET && end >= begin) {
|
|
||||||
vals.push_back(haystack_.substr(begin, end - begin));
|
|
||||||
value_found = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If there are multiple named groups and --all was used, we need to ensure that the
|
|
||||||
// indexes are always in sync between the variables. If an optional named group
|
|
||||||
// didn't match but its brethren did, we need to make sure to put *something* in the
|
|
||||||
// resulting array, and unfortunately fish doesn't support empty/null members so
|
|
||||||
// we're going to have to use an empty string as the sentinel value.
|
|
||||||
if (!value_found && all_flag_) {
|
|
||||||
vals.emplace_back();
|
vals.emplace_back();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1171,37 +1039,26 @@ class pcre2_matcher_t final : public string_matcher_t {
|
|||||||
};
|
};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
pcre2_matcher_t(const wchar_t *argv0_, const wcstring &pattern, const options_t &opts,
|
pcre2_matcher_t(regex_t regex, const options_t &opts, env_stack_t &vars)
|
||||||
io_streams_t &streams, parser_t &parser_)
|
: string_matcher_t(opts), regex(std::move(regex)), vars(vars) {}
|
||||||
: string_matcher_t(opts, streams),
|
|
||||||
argv0(argv0_),
|
|
||||||
regex(argv0_, pattern, opts.ignore_case, streams),
|
|
||||||
parser(parser_) {}
|
|
||||||
|
|
||||||
~pcre2_matcher_t() override = default;
|
~pcre2_matcher_t() override = default;
|
||||||
|
|
||||||
bool report_matches(const wcstring &arg) override {
|
bool report_matches(const wcstring &arg, io_streams_t &streams) override {
|
||||||
// A return value of true means all is well (even if no matches were found), false indicates
|
using namespace re;
|
||||||
// an unrecoverable error.
|
regex_importer_t var_importer(vars, this->regex, opts.all);
|
||||||
assert(regex.code && "report_matches should only be called if the regex was valid");
|
|
||||||
|
|
||||||
regex_importer_t var_importer(this->parser.vars(), arg, this->regex, opts.all);
|
match_data_t md = this->regex.prepare();
|
||||||
|
auto rc = report_match(arg, md, this->regex.match(md, arg), streams);
|
||||||
// See pcre2demo.c for an explanation of this logic.
|
|
||||||
PCRE2_SIZE arglen = arg.length();
|
|
||||||
auto rc = report_match(arg, pcre2_match(regex.code, PCRE2_SPTR(arg.c_str()), arglen, 0, 0,
|
|
||||||
regex.match, nullptr));
|
|
||||||
|
|
||||||
// We only import variables for the *first matching argument*
|
// We only import variables for the *first matching argument*
|
||||||
bool do_var_import = (rc == match_result_t::match && !imported_vars);
|
bool do_var_import = (rc == match_result_t::match && !imported_vars);
|
||||||
if (do_var_import) {
|
if (do_var_import) {
|
||||||
var_importer.import_vars();
|
var_importer.import_vars(md, arg);
|
||||||
imported_vars = true;
|
imported_vars = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (rc) {
|
switch (rc) {
|
||||||
case match_result_t::pcre2_error:
|
|
||||||
return false;
|
|
||||||
case match_result_t::no_match:
|
case match_result_t::no_match:
|
||||||
return true;
|
return true;
|
||||||
case match_result_t::match:
|
case match_result_t::match:
|
||||||
@@ -1211,31 +1068,12 @@ class pcre2_matcher_t final : public string_matcher_t {
|
|||||||
if (opts.invert_match) return true;
|
if (opts.invert_match) return true;
|
||||||
|
|
||||||
// Report any additional matches.
|
// Report any additional matches.
|
||||||
for (auto *ovector = pcre2_get_ovector_pointer(regex.match); opts.all; total_matched++) {
|
if (opts.all) {
|
||||||
uint32_t options = 0;
|
while (auto mr = this->regex.match(md, arg)) {
|
||||||
PCRE2_SIZE offset = ovector[1]; // start at end of previous match
|
auto rc = this->report_match(arg, md, mr, streams);
|
||||||
|
if (rc == match_result_t::match && do_var_import) {
|
||||||
if (ovector[0] == ovector[1]) {
|
var_importer.import_vars(md, arg);
|
||||||
if (ovector[0] == arglen) break;
|
}
|
||||||
options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
|
|
||||||
}
|
|
||||||
|
|
||||||
rc = report_match(arg, pcre2_match(regex.code, PCRE2_SPTR(arg.c_str()), arglen, offset,
|
|
||||||
options, regex.match, nullptr));
|
|
||||||
|
|
||||||
if (rc == match_result_t::pcre2_error) {
|
|
||||||
// This shouldn't happen as we've already validated the regex above
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Call import_vars() before modifying the ovector
|
|
||||||
if (rc == match_result_t::match && do_var_import) {
|
|
||||||
var_importer.import_vars();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rc == match_result_t::no_match) {
|
|
||||||
if (options == 0 /* all matches found now */) break;
|
|
||||||
ovector[1] = offset + 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@@ -1244,12 +1082,10 @@ class pcre2_matcher_t final : public string_matcher_t {
|
|||||||
/// Override to clear our capture variables if we had no match.
|
/// Override to clear our capture variables if we had no match.
|
||||||
void clear_capture_vars() override {
|
void clear_capture_vars() override {
|
||||||
assert(!imported_vars && "Should not already have imported variables");
|
assert(!imported_vars && "Should not already have imported variables");
|
||||||
for (const wcstring &name : regex.capture_group_names) {
|
for (const wcstring &name : regex.capture_group_names()) {
|
||||||
parser.vars().set_empty(name, ENV_DEFAULT);
|
vars.set_empty(name, ENV_DEFAULT);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_valid() const override { return regex.is_valid(); }
|
|
||||||
};
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
@@ -1289,19 +1125,24 @@ static int string_match(parser_t &parser, io_streams_t &streams, int argc, const
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<string_matcher_t> matcher;
|
std::unique_ptr<string_matcher_t> matcher;
|
||||||
if (opts.regex) {
|
if (!opts.regex) {
|
||||||
matcher = make_unique<pcre2_matcher_t>(cmd, pattern, opts, streams, parser);
|
// Globs cannot fail.
|
||||||
|
matcher = make_unique<wildcard_matcher_t>(pattern, opts);
|
||||||
} else {
|
} else {
|
||||||
matcher = make_unique<wildcard_matcher_t>(cmd, pattern, opts, streams);
|
// Compile the pattern as regex and validate capture group names as variables; both may
|
||||||
}
|
// fail. Note both try_compile_regex and validate_capture_group_names print an error on
|
||||||
if (!matcher->is_valid()) {
|
// failure.
|
||||||
// An error will have been printed by the constructor.
|
auto re = try_compile_regex(pattern, opts, cmd, streams);
|
||||||
return STATUS_INVALID_ARGS;
|
if (!re || !validate_capture_group_names(re->capture_group_names(), streams)) {
|
||||||
|
return STATUS_INVALID_ARGS;
|
||||||
|
}
|
||||||
|
matcher = make_unique<pcre2_matcher_t>(re.acquire(), opts, parser.vars());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
assert(matcher && "Should have a matcher");
|
||||||
arg_iterator_t aiter(argv, optind, streams);
|
arg_iterator_t aiter(argv, optind, streams);
|
||||||
while (const wcstring *arg = aiter.nextstr()) {
|
while (const wcstring *arg = aiter.nextstr()) {
|
||||||
if (!matcher->report_matches(*arg)) {
|
if (!matcher->report_matches(*arg, streams)) {
|
||||||
return STATUS_INVALID_ARGS;
|
return STATUS_INVALID_ARGS;
|
||||||
}
|
}
|
||||||
if (opts.quiet && matcher->match_count() > 0) return STATUS_CMD_OK;
|
if (opts.quiet && matcher->match_count() > 0) return STATUS_CMD_OK;
|
||||||
@@ -1428,14 +1269,13 @@ static maybe_t<wcstring> interpret_escapes(const wcstring &arg) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
class regex_replacer_t : public string_replacer_t {
|
class regex_replacer_t : public string_replacer_t {
|
||||||
compiled_regex_t regex;
|
re::regex_t regex;
|
||||||
maybe_t<wcstring> replacement;
|
maybe_t<wcstring> replacement;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
regex_replacer_t(const wchar_t *argv0, const wcstring &pattern, const wcstring &replacement_,
|
regex_replacer_t(const wchar_t *argv0, re::regex_t regex, const wcstring &replacement_,
|
||||||
const options_t &opts, io_streams_t &streams)
|
const options_t &opts, io_streams_t &streams)
|
||||||
: string_replacer_t(argv0, opts, streams),
|
: string_replacer_t(argv0, opts, streams), regex(std::move(regex)) {
|
||||||
regex(argv0, pattern, opts.ignore_case, streams) {
|
|
||||||
if (feature_test(features_t::string_replace_backslash)) {
|
if (feature_test(features_t::string_replace_backslash)) {
|
||||||
replacement = replacement_;
|
replacement = replacement_;
|
||||||
} else {
|
} else {
|
||||||
@@ -1486,62 +1326,32 @@ bool literal_replacer_t::replace_matches(const wcstring &arg, bool want_newline)
|
|||||||
/// A return value of true means all is well (even if no replacements were performed), false
|
/// A return value of true means all is well (even if no replacements were performed), false
|
||||||
/// indicates an unrecoverable error.
|
/// indicates an unrecoverable error.
|
||||||
bool regex_replacer_t::replace_matches(const wcstring &arg, bool want_newline) {
|
bool regex_replacer_t::replace_matches(const wcstring &arg, bool want_newline) {
|
||||||
if (!regex.code) return false; // pcre2_compile() failed
|
using namespace re;
|
||||||
if (!replacement) return false; // replacement was an invalid string
|
if (!replacement) return false; // replacement was an invalid string
|
||||||
|
|
||||||
// clang-format off
|
sub_flags_t sflags{};
|
||||||
// SUBSTITUTE_OVERFLOW_LENGTH causes pcre to return the needed buffer length if the passed one is to small
|
sflags.global = opts.all;
|
||||||
// SUBSTITUTE_EXTENDED changes how substitution expressions are interpreted (`$` as the special character)
|
sflags.extended = true;
|
||||||
// SUBSTITUTE_UNSET_EMPTY treats unmatched capturing groups as empty instead of erroring.
|
|
||||||
// SUBSTITUTE_GLOBAL means more than one substitution happens.
|
|
||||||
// clang-format on
|
|
||||||
uint32_t options = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | PCRE2_SUBSTITUTE_EXTENDED |
|
|
||||||
PCRE2_SUBSTITUTE_UNSET_EMPTY | (opts.all ? PCRE2_SUBSTITUTE_GLOBAL : 0);
|
|
||||||
size_t arglen = arg.length();
|
|
||||||
PCRE2_SIZE bufsize = (arglen == 0) ? 16 : 2 * arglen;
|
|
||||||
auto output = static_cast<wchar_t *>(malloc(sizeof(wchar_t) * bufsize));
|
|
||||||
int pcre2_rc;
|
|
||||||
PCRE2_SIZE outlen = bufsize;
|
|
||||||
|
|
||||||
bool done = false;
|
re_error_t error{};
|
||||||
while (!done) {
|
int repl_count{};
|
||||||
assert(output);
|
maybe_t<wcstring> result =
|
||||||
|
this->regex.substitute(arg, *replacement, sflags, 0, &error, &repl_count);
|
||||||
|
|
||||||
pcre2_rc = pcre2_substitute(regex.code, PCRE2_SPTR(arg.c_str()), arglen,
|
if (!result) {
|
||||||
0, // start offset
|
|
||||||
options, regex.match,
|
|
||||||
nullptr, // match_data
|
|
||||||
PCRE2_SPTR(replacement->c_str()), replacement->length(),
|
|
||||||
reinterpret_cast<PCRE2_UCHAR *>(output), &outlen);
|
|
||||||
|
|
||||||
if (pcre2_rc != PCRE2_ERROR_NOMEMORY || bufsize >= outlen) {
|
|
||||||
done = true;
|
|
||||||
} else {
|
|
||||||
bufsize = outlen;
|
|
||||||
auto new_output = static_cast<wchar_t *>(realloc(output, sizeof(wchar_t) * bufsize));
|
|
||||||
if (new_output) output = new_output;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool rc = true;
|
|
||||||
if (pcre2_rc < 0) {
|
|
||||||
string_error(streams, _(L"%ls: Regular expression substitute error: %ls\n"), argv0,
|
string_error(streams, _(L"%ls: Regular expression substitute error: %ls\n"), argv0,
|
||||||
pcre2_strerror(pcre2_rc).c_str());
|
error.message().c_str());
|
||||||
rc = false;
|
|
||||||
} else {
|
} else {
|
||||||
wcstring outstr(output, outlen);
|
bool replacement_occurred = repl_count > 0;
|
||||||
bool replacement_occurred = pcre2_rc > 0;
|
|
||||||
if (!opts.quiet && (!opts.filter || replacement_occurred)) {
|
if (!opts.quiet && (!opts.filter || replacement_occurred)) {
|
||||||
streams.out.append(outstr);
|
streams.out.append(*result);
|
||||||
if (want_newline) {
|
if (want_newline) {
|
||||||
streams.out.append(L'\n');
|
streams.out.append(L'\n');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
total_replaced += pcre2_rc;
|
total_replaced += repl_count;
|
||||||
}
|
}
|
||||||
|
return result.has_value();
|
||||||
free(output);
|
|
||||||
return rc;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int string_replace(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) {
|
static int string_replace(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) {
|
||||||
@@ -1560,7 +1370,13 @@ static int string_replace(parser_t &parser, io_streams_t &streams, int argc, con
|
|||||||
|
|
||||||
std::unique_ptr<string_replacer_t> replacer;
|
std::unique_ptr<string_replacer_t> replacer;
|
||||||
if (opts.regex) {
|
if (opts.regex) {
|
||||||
replacer = make_unique<regex_replacer_t>(argv[0], pattern, replacement, opts, streams);
|
if (auto re = try_compile_regex(pattern, opts, argv[0], streams)) {
|
||||||
|
replacer =
|
||||||
|
make_unique<regex_replacer_t>(argv[0], re.acquire(), replacement, opts, streams);
|
||||||
|
} else {
|
||||||
|
// try_compile_regex prints an error.
|
||||||
|
return STATUS_INVALID_ARGS;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
replacer = make_unique<literal_replacer_t>(argv[0], pattern, replacement, opts, streams);
|
replacer = make_unique<literal_replacer_t>(argv[0], pattern, replacement, opts, streams);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user