Remove wcstring_tok

wcstring_tok was a funky function which was confusing and used only in
one place. Replace it with split_string_tok, which is somewhat simpler.
This commit is contained in:
ridiculousfish
2021-04-18 14:46:05 -07:00
parent 2fb0a703de
commit 092168485b
4 changed files with 66 additions and 76 deletions

View File

@@ -594,13 +594,8 @@ maybe_t<int> builtin_read(parser_t &parser, io_streams_t &streams, const wchar_t
if (!opts.have_delimiter) {
// We're using IFS, so tokenize the buffer using each IFS char. This is for backward
// compatibility with old versions of fish.
wcstring_list_t tokens;
for (wcstring_range loc = wcstring_tok(buff, opts.delimiter);
loc.first != wcstring::npos; loc = wcstring_tok(buff, opts.delimiter, loc)) {
tokens.emplace_back(wcstring(buff, loc.first, loc.second));
}
parser.set_var_and_fire(*var_ptr++, opts.place, tokens);
wcstring_list_t tokens = split_string_tok(buff, opts.delimiter);
parser.set_var_and_fire(*var_ptr++, opts.place, std::move(tokens));
} else {
// We're using a delimiter provided by the user so use the `string split` behavior.
wcstring_list_t splits;
@@ -614,14 +609,15 @@ maybe_t<int> builtin_read(parser_t &parser, io_streams_t &streams, const wchar_t
if (!opts.have_delimiter) {
// We're using IFS, so tokenize the buffer using each IFS char. This is for backward
// compatibility with old versions of fish.
wcstring_range loc = wcstring_range(0, 0);
// Note the final variable gets any remaining text.
wcstring_list_t var_vals = split_string_tok(buff, opts.delimiter, vars_left());
size_t val_idx = 0;
while (vars_left()) {
wcstring substr;
loc = wcstring_tok(buff, (vars_left() > 1) ? opts.delimiter : wcstring(), loc);
if (loc.first != wcstring::npos) {
substr = wcstring(buff, loc.first, loc.second);
wcstring val;
if (val_idx < var_vals.size()) {
val = std::move(var_vals.at(val_idx++));
}
parser.set_var_and_fire(*var_ptr++, opts.place, substr);
parser.set_var_and_fire(*var_ptr++, opts.place, std::move(val));
}
} else {
// We're using a delimiter provided by the user so use the `string split` behavior.

View File

@@ -5430,31 +5430,26 @@ static void test_highlighting() {
vars.remove(L"VARIABLE_IN_COMMAND2", ENV_DEFAULT);
}
static void test_wcstring_tok() {
say(L"Testing wcstring_tok");
wcstring buff = L"hello world";
wcstring needle = L" \t\n";
wcstring_range loc = wcstring_tok(buff, needle);
if (loc.first == wcstring::npos || buff.substr(loc.first, loc.second) != L"hello") {
err(L"Wrong results from first wcstring_tok(): {%zu, %zu}", loc.first, loc.second);
}
loc = wcstring_tok(buff, needle, loc);
if (loc.first == wcstring::npos || buff.substr(loc.first, loc.second) != L"world") {
err(L"Wrong results from second wcstring_tok(): {%zu, %zu}", loc.first, loc.second);
}
loc = wcstring_tok(buff, needle, loc);
if (loc.first != wcstring::npos) {
err(L"Wrong results from third wcstring_tok(): {%zu, %zu}", loc.first, loc.second);
}
static void test_split_string_tok() {
say(L"Testing split_string_tok");
wcstring_list_t splits;
splits = split_string_tok(L" hello \t world", L" \t\n");
do_test((splits == wcstring_list_t{L"hello", L"world"}));
buff = L"hello world";
loc = wcstring_tok(buff, needle);
// loc is "hello" again
loc = wcstring_tok(buff, L"", loc);
if (loc.first == wcstring::npos || buff.substr(loc.first, loc.second) != L"world") {
err(L"Wrong results from wcstring_tok with empty needle: {%zu, %zu}", loc.first,
loc.second);
}
splits = split_string_tok(L" stuff ", wcstring(L" "), 0);
do_test((splits == wcstring_list_t{}));
splits = split_string_tok(L" stuff ", wcstring(L" "), 1);
do_test((splits == wcstring_list_t{L" stuff "}));
splits = split_string_tok(L" hello \t world andstuff ", L" \t\n", 3);
do_test((splits == wcstring_list_t{L"hello", L"world", L" andstuff "}));
// NUL chars are OK.
wcstring nullstr = L" hello X world";
nullstr.at(nullstr.find(L'X')) = L'\0';
splits = split_string_tok(nullstr, wcstring(L" \0", 2));
do_test((splits == wcstring_list_t{L"hello", L"world"}));
}
static void test_wwrite_to_fd() {
@@ -6521,7 +6516,7 @@ int main(int argc, char **argv) {
env_stack_t::principal().set_pwd_from_getcwd();
if (should_test_function("utility_functions")) test_utility_functions();
if (should_test_function("wcstring_tok")) test_wcstring_tok();
if (should_test_function("string_split")) test_split_string_tok();
if (should_test_function("wwrite_to_fd")) test_wwrite_to_fd();
if (should_test_function("env_vars")) test_env_vars();
if (should_test_function("env")) test_env_snapshot();

View File

@@ -10,31 +10,6 @@
#include "common.h"
#include "flog.h"
wcstring_range wcstring_tok(wcstring &str, const wcstring &needle, wcstring_range last) {
using size_type = wcstring::size_type;
size_type pos = last.second == wcstring::npos ? wcstring::npos : last.first;
if (pos != wcstring::npos && last.second != wcstring::npos) pos += last.second;
if (pos != wcstring::npos && pos != 0) ++pos;
if (pos == wcstring::npos || pos >= str.size()) {
return std::make_pair(wcstring::npos, wcstring::npos);
}
if (needle.empty()) {
return std::make_pair(pos, wcstring::npos);
}
pos = str.find_first_not_of(needle, pos);
if (pos == wcstring::npos) return std::make_pair(wcstring::npos, wcstring::npos);
size_type next_pos = str.find_first_of(needle, pos);
if (next_pos == wcstring::npos) {
return std::make_pair(pos, wcstring::npos);
}
str[next_pos] = L'\0';
return std::make_pair(pos, next_pos - pos);
}
wcstring truncate(const wcstring &input, int max_len, ellipsis_type etype) {
if (input.size() <= static_cast<size_t>(max_len)) {
return input;
@@ -282,6 +257,33 @@ wcstring_list_t split_string(const wcstring &val, wchar_t sep) {
return out;
}
wcstring_list_t split_string_tok(const wcstring &val, const wcstring &seps, size_t max_results) {
wcstring_list_t out;
size_t end = val.size();
size_t pos = 0;
while (pos < end && out.size() + 1 < max_results) {
// Skip leading seps.
pos = val.find_first_not_of(seps, pos);
if (pos == wcstring::npos) break;
// Find next sep.
size_t next_sep = val.find_first_of(seps, pos);
if (next_sep == wcstring::npos) {
next_sep = end;
}
out.emplace_back(val, pos, next_sep - pos);
// Note we skip exactly one sep here. This is because on the last iteration we retain all
// but the first leading separators. This is historical.
pos = next_sep + 1;
}
if (pos < end && max_results > 0) {
assert(out.size() + 1 == max_results && "Should have split the max");
out.emplace_back(val, pos);
}
assert(out.size() <= max_results && "Got too many results");
return out;
}
wcstring join_strings(const wcstring_list_t &vals, wchar_t sep) {
if (vals.empty()) return wcstring{};

View File

@@ -120,6 +120,16 @@ inline maybe_t<string_fuzzy_match_t> string_fuzzy_match_string(const wcstring &s
/// Split a string by a separator character.
wcstring_list_t split_string(const wcstring &val, wchar_t sep);
/// Split a string by runs of any of the separator characters provided in \p seps.
/// Note the delimiters are the characters in \p seps, not \p seps itself.
/// \p seps may contain the NUL character.
/// Do not output more than \p max_results results. If we are to output exactly that much,
/// the last output is the the remainder of the input, including leading delimiters,
/// except for the first. This is historical behavior.
/// Example: split_string_tok(" a b c ", " ", 3) -> {"a", "b", " c "}
wcstring_list_t split_string_tok(const wcstring &val, const wcstring &seps,
size_t max_results = std::numeric_limits<size_t>::max());
/// Join a list of strings by a separator character.
wcstring join_strings(const wcstring_list_t &vals, wchar_t sep);
@@ -157,19 +167,6 @@ inline bool bool_from_string(const wcstring &x) {
return !x.empty() && std::wcschr(L"YTyt1", x.at(0));
}
/// @typedef wcstring_range represents a range in a wcstring.
/// The first element is the location, the second is the count.
typedef std::pair<wcstring::size_type, wcstring::size_type> wcstring_range;
/// wcstring equivalent of wcstok(). Supports NUL. For convenience and wcstok() compatibility, the
/// first character of each token separator is replaced with NUL.
/// @return Returns a pair of (pos, count).
/// This will be (npos, npos) when it's done. In the form of (pos, npos)
/// when the token is already known to be the final token.
/// @note The final token may not necessarily return (pos, npos).
wcstring_range wcstring_tok(wcstring &str, const wcstring &needle,
wcstring_range last = wcstring_range(0, 0));
/// Given iterators into a string (forward or reverse), splits the haystack iterators
/// about the needle sequence, up to max times. Inserts splits into the output array.
/// If the iterators are forward, this does the normal thing.