mirror of
https://github.com/fish-shell/fish-shell.git
synced 2026-05-26 16:01:15 -03:00
Remove wcstring_tok
wcstring_tok was a funky function which was confusing and used only in one place. Replace it with split_string_tok, which is somewhat simpler.
This commit is contained in:
@@ -594,13 +594,8 @@ maybe_t<int> builtin_read(parser_t &parser, io_streams_t &streams, const wchar_t
|
||||
if (!opts.have_delimiter) {
|
||||
// We're using IFS, so tokenize the buffer using each IFS char. This is for backward
|
||||
// compatibility with old versions of fish.
|
||||
wcstring_list_t tokens;
|
||||
|
||||
for (wcstring_range loc = wcstring_tok(buff, opts.delimiter);
|
||||
loc.first != wcstring::npos; loc = wcstring_tok(buff, opts.delimiter, loc)) {
|
||||
tokens.emplace_back(wcstring(buff, loc.first, loc.second));
|
||||
}
|
||||
parser.set_var_and_fire(*var_ptr++, opts.place, tokens);
|
||||
wcstring_list_t tokens = split_string_tok(buff, opts.delimiter);
|
||||
parser.set_var_and_fire(*var_ptr++, opts.place, std::move(tokens));
|
||||
} else {
|
||||
// We're using a delimiter provided by the user so use the `string split` behavior.
|
||||
wcstring_list_t splits;
|
||||
@@ -614,14 +609,15 @@ maybe_t<int> builtin_read(parser_t &parser, io_streams_t &streams, const wchar_t
|
||||
if (!opts.have_delimiter) {
|
||||
// We're using IFS, so tokenize the buffer using each IFS char. This is for backward
|
||||
// compatibility with old versions of fish.
|
||||
wcstring_range loc = wcstring_range(0, 0);
|
||||
// Note the final variable gets any remaining text.
|
||||
wcstring_list_t var_vals = split_string_tok(buff, opts.delimiter, vars_left());
|
||||
size_t val_idx = 0;
|
||||
while (vars_left()) {
|
||||
wcstring substr;
|
||||
loc = wcstring_tok(buff, (vars_left() > 1) ? opts.delimiter : wcstring(), loc);
|
||||
if (loc.first != wcstring::npos) {
|
||||
substr = wcstring(buff, loc.first, loc.second);
|
||||
wcstring val;
|
||||
if (val_idx < var_vals.size()) {
|
||||
val = std::move(var_vals.at(val_idx++));
|
||||
}
|
||||
parser.set_var_and_fire(*var_ptr++, opts.place, substr);
|
||||
parser.set_var_and_fire(*var_ptr++, opts.place, std::move(val));
|
||||
}
|
||||
} else {
|
||||
// We're using a delimiter provided by the user so use the `string split` behavior.
|
||||
|
||||
@@ -5430,31 +5430,26 @@ static void test_highlighting() {
|
||||
vars.remove(L"VARIABLE_IN_COMMAND2", ENV_DEFAULT);
|
||||
}
|
||||
|
||||
static void test_wcstring_tok() {
|
||||
say(L"Testing wcstring_tok");
|
||||
wcstring buff = L"hello world";
|
||||
wcstring needle = L" \t\n";
|
||||
wcstring_range loc = wcstring_tok(buff, needle);
|
||||
if (loc.first == wcstring::npos || buff.substr(loc.first, loc.second) != L"hello") {
|
||||
err(L"Wrong results from first wcstring_tok(): {%zu, %zu}", loc.first, loc.second);
|
||||
}
|
||||
loc = wcstring_tok(buff, needle, loc);
|
||||
if (loc.first == wcstring::npos || buff.substr(loc.first, loc.second) != L"world") {
|
||||
err(L"Wrong results from second wcstring_tok(): {%zu, %zu}", loc.first, loc.second);
|
||||
}
|
||||
loc = wcstring_tok(buff, needle, loc);
|
||||
if (loc.first != wcstring::npos) {
|
||||
err(L"Wrong results from third wcstring_tok(): {%zu, %zu}", loc.first, loc.second);
|
||||
}
|
||||
static void test_split_string_tok() {
|
||||
say(L"Testing split_string_tok");
|
||||
wcstring_list_t splits;
|
||||
splits = split_string_tok(L" hello \t world", L" \t\n");
|
||||
do_test((splits == wcstring_list_t{L"hello", L"world"}));
|
||||
|
||||
buff = L"hello world";
|
||||
loc = wcstring_tok(buff, needle);
|
||||
// loc is "hello" again
|
||||
loc = wcstring_tok(buff, L"", loc);
|
||||
if (loc.first == wcstring::npos || buff.substr(loc.first, loc.second) != L"world") {
|
||||
err(L"Wrong results from wcstring_tok with empty needle: {%zu, %zu}", loc.first,
|
||||
loc.second);
|
||||
}
|
||||
splits = split_string_tok(L" stuff ", wcstring(L" "), 0);
|
||||
do_test((splits == wcstring_list_t{}));
|
||||
|
||||
splits = split_string_tok(L" stuff ", wcstring(L" "), 1);
|
||||
do_test((splits == wcstring_list_t{L" stuff "}));
|
||||
|
||||
splits = split_string_tok(L" hello \t world andstuff ", L" \t\n", 3);
|
||||
do_test((splits == wcstring_list_t{L"hello", L"world", L" andstuff "}));
|
||||
|
||||
// NUL chars are OK.
|
||||
wcstring nullstr = L" hello X world";
|
||||
nullstr.at(nullstr.find(L'X')) = L'\0';
|
||||
splits = split_string_tok(nullstr, wcstring(L" \0", 2));
|
||||
do_test((splits == wcstring_list_t{L"hello", L"world"}));
|
||||
}
|
||||
|
||||
static void test_wwrite_to_fd() {
|
||||
@@ -6521,7 +6516,7 @@ int main(int argc, char **argv) {
|
||||
env_stack_t::principal().set_pwd_from_getcwd();
|
||||
|
||||
if (should_test_function("utility_functions")) test_utility_functions();
|
||||
if (should_test_function("wcstring_tok")) test_wcstring_tok();
|
||||
if (should_test_function("string_split")) test_split_string_tok();
|
||||
if (should_test_function("wwrite_to_fd")) test_wwrite_to_fd();
|
||||
if (should_test_function("env_vars")) test_env_vars();
|
||||
if (should_test_function("env")) test_env_snapshot();
|
||||
|
||||
@@ -10,31 +10,6 @@
|
||||
#include "common.h"
|
||||
#include "flog.h"
|
||||
|
||||
wcstring_range wcstring_tok(wcstring &str, const wcstring &needle, wcstring_range last) {
|
||||
using size_type = wcstring::size_type;
|
||||
size_type pos = last.second == wcstring::npos ? wcstring::npos : last.first;
|
||||
if (pos != wcstring::npos && last.second != wcstring::npos) pos += last.second;
|
||||
if (pos != wcstring::npos && pos != 0) ++pos;
|
||||
if (pos == wcstring::npos || pos >= str.size()) {
|
||||
return std::make_pair(wcstring::npos, wcstring::npos);
|
||||
}
|
||||
|
||||
if (needle.empty()) {
|
||||
return std::make_pair(pos, wcstring::npos);
|
||||
}
|
||||
|
||||
pos = str.find_first_not_of(needle, pos);
|
||||
if (pos == wcstring::npos) return std::make_pair(wcstring::npos, wcstring::npos);
|
||||
|
||||
size_type next_pos = str.find_first_of(needle, pos);
|
||||
if (next_pos == wcstring::npos) {
|
||||
return std::make_pair(pos, wcstring::npos);
|
||||
}
|
||||
|
||||
str[next_pos] = L'\0';
|
||||
return std::make_pair(pos, next_pos - pos);
|
||||
}
|
||||
|
||||
wcstring truncate(const wcstring &input, int max_len, ellipsis_type etype) {
|
||||
if (input.size() <= static_cast<size_t>(max_len)) {
|
||||
return input;
|
||||
@@ -282,6 +257,33 @@ wcstring_list_t split_string(const wcstring &val, wchar_t sep) {
|
||||
return out;
|
||||
}
|
||||
|
||||
wcstring_list_t split_string_tok(const wcstring &val, const wcstring &seps, size_t max_results) {
|
||||
wcstring_list_t out;
|
||||
size_t end = val.size();
|
||||
size_t pos = 0;
|
||||
while (pos < end && out.size() + 1 < max_results) {
|
||||
// Skip leading seps.
|
||||
pos = val.find_first_not_of(seps, pos);
|
||||
if (pos == wcstring::npos) break;
|
||||
|
||||
// Find next sep.
|
||||
size_t next_sep = val.find_first_of(seps, pos);
|
||||
if (next_sep == wcstring::npos) {
|
||||
next_sep = end;
|
||||
}
|
||||
out.emplace_back(val, pos, next_sep - pos);
|
||||
// Note we skip exactly one sep here. This is because on the last iteration we retain all
|
||||
// but the first leading separators. This is historical.
|
||||
pos = next_sep + 1;
|
||||
}
|
||||
if (pos < end && max_results > 0) {
|
||||
assert(out.size() + 1 == max_results && "Should have split the max");
|
||||
out.emplace_back(val, pos);
|
||||
}
|
||||
assert(out.size() <= max_results && "Got too many results");
|
||||
return out;
|
||||
}
|
||||
|
||||
wcstring join_strings(const wcstring_list_t &vals, wchar_t sep) {
|
||||
if (vals.empty()) return wcstring{};
|
||||
|
||||
|
||||
@@ -120,6 +120,16 @@ inline maybe_t<string_fuzzy_match_t> string_fuzzy_match_string(const wcstring &s
|
||||
/// Split a string by a separator character.
|
||||
wcstring_list_t split_string(const wcstring &val, wchar_t sep);
|
||||
|
||||
/// Split a string by runs of any of the separator characters provided in \p seps.
|
||||
/// Note the delimiters are the characters in \p seps, not \p seps itself.
|
||||
/// \p seps may contain the NUL character.
|
||||
/// Do not output more than \p max_results results. If we are to output exactly that much,
|
||||
/// the last output is the the remainder of the input, including leading delimiters,
|
||||
/// except for the first. This is historical behavior.
|
||||
/// Example: split_string_tok(" a b c ", " ", 3) -> {"a", "b", " c "}
|
||||
wcstring_list_t split_string_tok(const wcstring &val, const wcstring &seps,
|
||||
size_t max_results = std::numeric_limits<size_t>::max());
|
||||
|
||||
/// Join a list of strings by a separator character.
|
||||
wcstring join_strings(const wcstring_list_t &vals, wchar_t sep);
|
||||
|
||||
@@ -157,19 +167,6 @@ inline bool bool_from_string(const wcstring &x) {
|
||||
return !x.empty() && std::wcschr(L"YTyt1", x.at(0));
|
||||
}
|
||||
|
||||
/// @typedef wcstring_range represents a range in a wcstring.
|
||||
/// The first element is the location, the second is the count.
|
||||
typedef std::pair<wcstring::size_type, wcstring::size_type> wcstring_range;
|
||||
|
||||
/// wcstring equivalent of wcstok(). Supports NUL. For convenience and wcstok() compatibility, the
|
||||
/// first character of each token separator is replaced with NUL.
|
||||
/// @return Returns a pair of (pos, count).
|
||||
/// This will be (npos, npos) when it's done. In the form of (pos, npos)
|
||||
/// when the token is already known to be the final token.
|
||||
/// @note The final token may not necessarily return (pos, npos).
|
||||
wcstring_range wcstring_tok(wcstring &str, const wcstring &needle,
|
||||
wcstring_range last = wcstring_range(0, 0));
|
||||
|
||||
/// Given iterators into a string (forward or reverse), splits the haystack iterators
|
||||
/// about the needle sequence, up to max times. Inserts splits into the output array.
|
||||
/// If the iterators are forward, this does the normal thing.
|
||||
|
||||
Reference in New Issue
Block a user