From 6c46ea0ed21e0ac84441a4fab0936d5c8099d9f4 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 13 Feb 2021 16:52:31 -0800 Subject: [PATCH] Refactor builtin_set This cleans up builtin_set a bit, with the meat of the change being reworking `parse_index` into `split_var_and_indexes`. `parse_index` was a function that split a string like `foo[1 3..5]` into its variable name `foo` and the indexes (here `1 3 4 5`). It had a funny interface where it would modify a C string in-place. Switch it to return a `split_var_t` which is a little struct wrapping up the split operation. This simplifies memory management, and also avoids modifying the arguments to the builtin. --- src/builtin_set.cpp | 223 +++++++++++++++++++++----------------------- 1 file changed, 107 insertions(+), 116 deletions(-) diff --git a/src/builtin_set.cpp b/src/builtin_set.cpp index 5f64349b7..30fbf5c99 100644 --- a/src/builtin_set.cpp +++ b/src/builtin_set.cpp @@ -78,8 +78,7 @@ static const struct woption long_options[] = { _(L"%ls: Universal variable '%ls' is shadowed by the global variable of the same name.\n") // Test if the specified variable should be subject to path validation. -static const wchar_t *const path_variables[] = {L"PATH", L"CDPATH"}; -static int is_path_variable(const wchar_t *env) { return contains(path_variables, env); } +static int is_path_variable(const wcstring &env) { return env == L"PATH" || env == L"CDPATH"; } static int parse_cmd_opts(set_cmd_opts_t &opts, int *optind, //!OCLINT(high ncss method) int argc, wchar_t **argv, parser_t &parser, io_streams_t &streams) { @@ -236,12 +235,12 @@ static int validate_cmd_opts(const wchar_t *cmd, // Check if we are setting a uvar and a global of the same name exists. See // https://github.com/fish-shell/fish-shell/issues/806 static int check_global_scope_exists(const wchar_t *cmd, const set_cmd_opts_t &opts, - const wchar_t *dest, io_streams_t &streams, + const wcstring &dest, io_streams_t &streams, const parser_t &parser) { if (opts.universal) { auto global_dest = parser.vars().get(dest, ENV_GLOBAL); if (global_dest && parser.is_interactive()) { - streams.err.append_format(BUILTIN_SET_UVAR_ERR, cmd, dest); + streams.err.append_format(BUILTIN_SET_UVAR_ERR, cmd, dest.c_str()); } } @@ -251,7 +250,7 @@ static int check_global_scope_exists(const wchar_t *cmd, const set_cmd_opts_t &o // Validate the given path `list`. If there are any entries referring to invalid directories which // contain a colon, then complain. Return true if any path element was valid, false if not. static bool validate_path_warning_on_colons(const wchar_t *cmd, - const wchar_t *key, //!OCLINT(npath complexity) + const wcstring &key, //!OCLINT(npath complexity) const wcstring_list_t &list, io_streams_t &streams, const environment_t &vars) { // Always allow setting an empty value. @@ -297,16 +296,16 @@ static bool validate_path_warning_on_colons(const wchar_t *cmd, if (valid) { any_success = true; } else if (looks_like_colon_sep) { - streams.err.append_format(BUILTIN_SET_PATH_ERROR, cmd, key, dir.c_str(), + streams.err.append_format(BUILTIN_SET_PATH_ERROR, cmd, key.c_str(), dir.c_str(), std::strerror(errno)); - streams.err.append_format(BUILTIN_SET_PATH_HINT, cmd, key, key, + streams.err.append_format(BUILTIN_SET_PATH_HINT, cmd, key.c_str(), key.c_str(), std::wcschr(dir.c_str(), L':') + 1); } } return any_success; } -static void handle_env_return(int retval, const wchar_t *cmd, const wchar_t *key, +static void handle_env_return(int retval, const wchar_t *cmd, const wcstring &key, io_streams_t &streams) { switch (retval) { case ENV_OK: { @@ -314,23 +313,24 @@ static void handle_env_return(int retval, const wchar_t *cmd, const wchar_t *key } case ENV_PERM: { streams.err.append_format(_(L"%ls: Tried to change the read-only variable '%ls'\n"), - cmd, key); + cmd, key.c_str()); break; } case ENV_SCOPE: { streams.err.append_format( _(L"%ls: Tried to modify the special variable '%ls' with the wrong scope\n"), cmd, - key); + key.c_str()); break; } case ENV_INVALID: { streams.err.append_format( _(L"%ls: Tried to modify the special variable '%ls' to an invalid value\n"), cmd, - key); + key.c_str()); break; } case ENV_NOT_FOUND: { - streams.err.append_format(_(L"%ls: The variable '%ls' does not exist\n"), cmd, key); + streams.err.append_format(_(L"%ls: The variable '%ls' does not exist\n"), cmd, + key.c_str()); break; } default: { @@ -341,7 +341,7 @@ static void handle_env_return(int retval, const wchar_t *cmd, const wchar_t *key /// Call vars.set. If this is a path variable, e.g. PATH, validate the elements. On error, print a /// description of the problem to stderr. -static int env_set_reporting_errors(const wchar_t *cmd, const wchar_t *key, int scope, +static int env_set_reporting_errors(const wchar_t *cmd, const wcstring &key, int scope, wcstring_list_t list, io_streams_t &streams, env_stack_t &vars, std::vector *evts) { if (is_path_variable(key) && !validate_path_warning_on_colons(cmd, key, list, streams, vars)) { @@ -354,61 +354,72 @@ static int env_set_reporting_errors(const wchar_t *cmd, const wchar_t *key, int return retval; } +/// A helper type returned by split_var_and_indexes. +struct split_var_t { + wcstring varname; // name of the variable + maybe_t var{}; // value of the variable, or none if missing + std::vector indexes{}; // list of requested indexes + + /// \return the number of elements in our variable, or 0 if missing. + long varsize() const { return var ? static_cast(var->as_list().size()) : 0L; } +}; + /// Extract indexes from an argument of the form `var_name[index1 index2...]`. -/// -/// Inputs: -/// indexes: the list to insert the new indexes into -/// src: The source string to parse. This must be a var spec of the form "var[...]" +/// The argument \p arg is split into a variable name and list of indexes, which is returned by +/// reference. Indexes are "expanded" in the sense that range expressions .. and negative values are +/// handled. /// /// Returns: -/// The total number of indexes parsed, or -1 on error. If any indexes were found the `src` string -/// is modified to omit the index expression leaving just the var name. -static int parse_index(std::vector &indexes, wchar_t *src, int scope, io_streams_t &streams, - const environment_t &vars) { - wchar_t *p = std::wcschr(src, L'['); - if (!p) return 0; // no slices so nothing for us to do - *p = L'\0'; // split the var name from the indexes/slices - p++; - - auto var_str = vars.get(src, scope); - size_t varsize = 0; - if (var_str) varsize = var_str->as_list().size(); - - int count = 0; +/// a split var on success, none() on error, in which case an error will have been printed. +/// If no index is found, this leaves indexes empty. +maybe_t split_var_and_indexes(const wchar_t *arg, env_mode_flags_t mode, + const environment_t &vars, io_streams_t &streams) { + split_var_t res{}; + const wchar_t *open_bracket = std::wcschr(arg, L'['); + size_t varname_len = open_bracket ? open_bracket - arg : wcslen(arg); + res.varname.assign(arg, varname_len); + res.var = vars.get(res.varname, mode); + if (!open_bracket) { + // Common case of no bracket. + return res; + } + long varsize = res.varsize(); + const wchar_t *p = open_bracket + 1; while (*p != L']') { const wchar_t *end; long l_ind; - if (indexes.empty() && *p == L'.' && p[1] == L'.') { + if (res.indexes.empty() && p[0] == L'.' && p[1] == L'.') { // If we are at the first index expression, a missing start-index means the range starts // at the first item. l_ind = 1; // first index } else { + const wchar_t *end = nullptr; l_ind = fish_wcstol(p, &end); if (errno > 0) { // ignore errno == -1 meaning the int did not end with a '\0' streams.err.append_format(_(L"%ls: Invalid index starting at '%ls'\n"), L"set", - src); - return -1; + res.varname.c_str()); + return none(); } - p = const_cast(end); + p = end; } // Convert negative index to a positive index. if (l_ind < 0) l_ind = varsize + l_ind + 1; - if (*p == L'.' && *(p + 1) == L'.') { + if (p[0] == L'.' && p[1] == L'.') { p += 2; long l_ind2; // If we are at the last index range expression, a missing end-index means the range // spans until the last item. - if (indexes.empty() && *p == L']') { + if (res.indexes.empty() && *p == L']') { l_ind2 = -1; } else { l_ind2 = fish_wcstol(p, &end); - if (errno > 0) { // ignore errno == -1 meaning the int did not end with a '\0' - return -1; + if (errno > 0) { // ignore errno == -1 meaning there was text after the int + return none(); } - p = const_cast(end); + p = end; } // Convert negative index to a positive index. @@ -416,16 +427,13 @@ static int parse_index(std::vector &indexes, wchar_t *src, int scope, io_s int direction = l_ind2 < l_ind ? -1 : 1; for (long jjj = l_ind; jjj * direction <= l_ind2 * direction; jjj += direction) { - indexes.push_back(jjj); - count++; + res.indexes.push_back(jjj); } } else { - indexes.push_back(l_ind); - count++; + res.indexes.push_back(l_ind); } } - - return count; + return res; } static int update_values(wcstring_list_t &list, std::vector &indexes, @@ -449,22 +457,23 @@ static int update_values(wcstring_list_t &list, std::vector &indexes, return 0; } -/// Erase from a list of wcstring values at specified indexes. -static void erase_values(wcstring_list_t &list, const std::vector &indexes) { - // Make a set of indexes. - // This both sorts them into ascending order and removes duplicates. - const std::set indexes_set(indexes.begin(), indexes.end()); +/// Given a list of values and 1-based indexes, return a new list, with those elements removed. +/// Note this deliberately accepts both args by value, as it modifies them both. +static wcstring_list_t erased_at_indexes(wcstring_list_t input, std::vector indexes) { + // Sort our indexes into *descending* order. + std::sort(indexes.begin(), indexes.end(), std::greater()); - // Now walk the set backwards, so we encounter larger indexes first, and remove elements at the - // given (1-based) indexes. - decltype(indexes_set)::const_reverse_iterator iter; - for (iter = indexes_set.rbegin(); iter != indexes_set.rend(); ++iter) { - long val = *iter; - if (val > 0 && static_cast(val) <= list.size()) { + // Remove duplicates. + indexes.erase(std::unique(indexes.begin(), indexes.end()), indexes.end()); + + // Now when we walk indexes, we encounter larger indexes first. + for (long idx : indexes) { + if (idx > 0 && static_cast(idx) <= input.size()) { // One-based indexing! - list.erase(list.begin() + val - 1); + input.erase(input.begin() + idx - 1); } } + return input; } static env_mode_flags_t compute_scope(const set_cmd_opts_t &opts) { @@ -530,39 +539,29 @@ static int builtin_set_list(const wchar_t *cmd, set_cmd_opts_t &opts, int argc, return STATUS_CMD_OK; } -// Query mode. Return the number of variables that do not exist out of the specified variables. +// Query mode. Return the number of variables that do NOT exist out of the specified variables. static int builtin_set_query(const wchar_t *cmd, set_cmd_opts_t &opts, int argc, wchar_t **argv, parser_t &parser, io_streams_t &streams) { int retval = 0; - int scope = compute_scope(opts); + env_mode_flags_t scope = compute_scope(opts); for (int i = 0; i < argc; i++) { - wchar_t *arg = argv[i]; - wchar_t *dest = wcsdup(arg); - assert(dest); - - std::vector indexes; - int idx_count = parse_index(indexes, dest, scope, streams, parser.vars()); - if (idx_count == -1) { - free(dest); + auto split = split_var_and_indexes(argv[i], scope, parser.vars(), streams); + if (!split) { builtin_print_error_trailer(parser, streams.err, cmd); return STATUS_CMD_ERROR; } - if (idx_count) { - wcstring_list_t result; - size_t varsize = 0; - auto dest_str = parser.vars().get(dest, scope); - if (dest_str) varsize = dest_str->as_list().size(); - - for (auto idx : indexes) { - if (idx < 1 || static_cast(idx) > varsize) retval++; - } + if (split->indexes.empty()) { + // No indexes, just increment if our variable is missing. + if (!split->var) retval++; } else { - if (!parser.vars().get(arg, scope)) retval++; + // Increment for every index out of range. + long varsize = split->varsize(); + for (long idx : split->indexes) { + if (idx < 1 || idx > varsize) retval++; + } } - - free(dest); } return retval; @@ -659,42 +658,35 @@ static int builtin_set_show(const wchar_t *cmd, const set_cmd_opts_t &opts, int static int builtin_set_erase(const wchar_t *cmd, set_cmd_opts_t &opts, int argc, wchar_t **argv, parser_t &parser, io_streams_t &streams) { int ret = STATUS_CMD_OK; + env_mode_flags_t scope = compute_scope(opts); for (int i = 0; i < argc; i++) { - int scope = - compute_scope(opts); // calculate the variable scope based on the provided options - wchar_t *dest = argv[i]; - - std::vector indexes; - int idx_count = parse_index(indexes, dest, scope, streams, parser.vars()); - if (idx_count == -1) { + auto split = split_var_and_indexes(argv[i], scope, parser.vars(), streams); + if (!split) { builtin_print_error_trailer(parser, streams.err, cmd); return STATUS_CMD_ERROR; } - int retval; - if (!valid_var_name(dest)) { - streams.err.append_format(BUILTIN_ERR_VARNAME, cmd, dest); + if (!valid_var_name(split->varname)) { + streams.err.append_format(BUILTIN_ERR_VARNAME, cmd, split->varname.c_str()); builtin_print_error_trailer(parser, streams.err, cmd); return STATUS_INVALID_ARGS; } + int retval = STATUS_CMD_OK; std::vector evts; - if (idx_count == 0) { // unset the var - retval = parser.vars().remove(dest, scope, &evts); + if (split->indexes.empty()) { // unset the var + retval = parser.vars().remove(split->varname, scope, &evts); // When a non-existent-variable is unset, return ENV_NOT_FOUND as $status // but do not emit any errors at the console as a compromise between user // friendliness and correctness. if (retval != ENV_NOT_FOUND) { - handle_env_return(retval, cmd, dest, streams); + handle_env_return(retval, cmd, split->varname, streams); } } else { // remove just the specified indexes of the var - const auto dest_var = parser.vars().get(dest, scope); - if (!dest_var) return STATUS_CMD_ERROR; - wcstring_list_t result; - dest_var->to_list(result); - erase_values(result, indexes); - retval = env_set_reporting_errors(cmd, dest, scope, std::move(result), streams, - parser.vars(), &evts); + if (!split->var) return STATUS_CMD_ERROR; + wcstring_list_t result = erased_at_indexes(split->var->as_list(), split->indexes); + retval = env_set_reporting_errors(cmd, split->varname, scope, std::move(result), + streams, parser.vars(), &evts); } // Fire any events. @@ -707,7 +699,7 @@ static int builtin_set_erase(const wchar_t *cmd, set_cmd_opts_t &opts, int argc, if (retval != STATUS_CMD_OK) { ret = retval; } else { - retval = check_global_scope_exists(cmd, opts, dest, streams, parser); + retval = check_global_scope_exists(cmd, opts, split->varname, streams, parser); if (retval != STATUS_CMD_OK) ret = retval; } } @@ -715,7 +707,7 @@ static int builtin_set_erase(const wchar_t *cmd, set_cmd_opts_t &opts, int argc, } /// This handles the common case of setting the entire var to a set of values. -static int set_var_array(const wchar_t *cmd, const set_cmd_opts_t &opts, const wchar_t *varname, +static int set_var_array(const wchar_t *cmd, const set_cmd_opts_t &opts, const wcstring &varname, wcstring_list_t &new_values, int argc, wchar_t **argv, parser_t &parser, const io_streams_t &streams) { UNUSED(cmd); @@ -743,7 +735,7 @@ static int set_var_array(const wchar_t *cmd, const set_cmd_opts_t &opts, const w } /// This handles the more difficult case of setting individual slices of a var. -static int set_var_slices(const wchar_t *cmd, set_cmd_opts_t &opts, const wchar_t *varname, +static int set_var_slices(const wchar_t *cmd, set_cmd_opts_t &opts, const wcstring &varname, wcstring_list_t &new_values, std::vector &indexes, int argc, wchar_t **argv, parser_t &parser, io_streams_t &streams) { UNUSED(parser); @@ -786,38 +778,37 @@ static int builtin_set_set(const wchar_t *cmd, set_cmd_opts_t &opts, int argc, w return STATUS_INVALID_ARGS; } - int scope = compute_scope(opts); // calculate the variable scope based on the provided options - wchar_t *varname = argv[0]; + env_mode_flags_t scope = compute_scope(opts); + const wchar_t *var_expr = argv[0]; argv++; argc--; - std::vector indexes; - int idx_count = parse_index(indexes, varname, scope, streams, parser.vars()); - if (idx_count == -1) { + auto split = split_var_and_indexes(var_expr, scope, parser.vars(), streams); + if (!split) { builtin_print_error_trailer(parser, streams.err, cmd); return STATUS_INVALID_ARGS; } - if (!valid_var_name(varname)) { - streams.err.append_format(BUILTIN_ERR_VARNAME, cmd, varname); + if (!valid_var_name(split->varname)) { + streams.err.append_format(BUILTIN_ERR_VARNAME, cmd, split->varname.c_str()); builtin_print_error_trailer(parser, streams.err, cmd); return STATUS_INVALID_ARGS; } int retval; wcstring_list_t new_values; - if (idx_count == 0) { + if (split->indexes.empty()) { // Handle the simple, common, case. Set the var to the specified values. - retval = set_var_array(cmd, opts, varname, new_values, argc, argv, parser, streams); + retval = set_var_array(cmd, opts, split->varname, new_values, argc, argv, parser, streams); } else { // Handle the uncommon case of setting specific slices of a var. - retval = - set_var_slices(cmd, opts, varname, new_values, indexes, argc, argv, parser, streams); + retval = set_var_slices(cmd, opts, split->varname, new_values, split->indexes, argc, argv, + parser, streams); } if (retval != STATUS_CMD_OK) return retval; std::vector evts; - retval = env_set_reporting_errors(cmd, varname, scope, std::move(new_values), streams, + retval = env_set_reporting_errors(cmd, split->varname, scope, std::move(new_values), streams, parser.vars(), &evts); // Fire any events. for (const auto &evt : evts) { @@ -825,7 +816,7 @@ static int builtin_set_set(const wchar_t *cmd, set_cmd_opts_t &opts, int argc, w } if (retval != STATUS_CMD_OK) return retval; - return check_global_scope_exists(cmd, opts, varname, streams, parser); + return check_global_scope_exists(cmd, opts, split->varname, streams, parser); } /// The set builtin creates, updates, and erases (removes, deletes) variables.