Abbreviations to support functions

This adds support for the `--function` option of abbreviations, so that the expansion of an abbreviation may be generated dynamically via a fish function.
2026-06-07 18:21:16 -03:00 · 2022-08-06 16:57:26 -07:00
parent d15855d3e3
commit 8135c52c13
9 changed files with 261 additions and 70 deletions
--- a/src/abbrs.cpp
+++ b/src/abbrs.cpp
@@ -14,7 +14,11 @@ abbreviation_t::abbreviation_t(wcstring name, wcstring key, wcstring replacement
      position(position),
      from_universal(from_universal) {}

-bool abbreviation_t::matches(const wcstring &token) const {
+bool abbreviation_t::matches(const wcstring &token, abbrs_position_t position) const {
+    // We must either expands anywhere, or in the given position.
+    if (this->position != position && this->position != abbrs_position_t::anywhere) {
+        return false;
+    }
    if (this->is_regex()) {
        return this->regex->match(token).has_value();
    } else {
@@ -27,23 +31,25 @@ acquired_lock<abbrs_set_t> abbrs_get_set() {
    return abbrs.acquire();
 }

-maybe_t<wcstring> abbrs_set_t::expand(const wcstring &token, abbrs_position_t position) const {
+abbrs_replacer_list_t abbrs_set_t::match(const wcstring &token, abbrs_position_t position) const {
+    abbrs_replacer_list_t result{};
    // Later abbreviations take precedence so walk backwards.
    for (auto it = abbrs_.rbegin(); it != abbrs_.rend(); ++it) {
        const abbreviation_t &abbr = *it;
-        // Expand only if the abbreviation expands anywhere or in the given position.
-        if (!(abbr.position == position || abbr.position == abbrs_position_t::anywhere)) {
-            continue;
+        if (abbr.matches(token, position)) {
+            result.push_back(abbrs_replacer_t{abbr.replacement, abbr.replacement_is_function});
        }
-
-        // Expand only if the name matches.
-        if (!abbr.matches(token)) {
-            continue;
-        }
-
-        return abbr.replacement;
    }
-    return none();
+    return result;
+}
+
+bool abbrs_set_t::has_match(const wcstring &token, abbrs_position_t position) const {
+    for (const auto &abbr : abbrs_) {
+        if (abbr.matches(token, position)) {
+            return true;
+        }
+    }
+    return false;
 }

 void abbrs_set_t::add(abbreviation_t &&abbr) {
@@ -106,7 +112,3 @@ void abbrs_set_t::import_from_uvars(const std::unordered_map<wcstring, env_var_t
        }
    }
 }
-
-maybe_t<wcstring> abbrs_expand(const wcstring &token, abbrs_position_t position) {
-    return abbrs_get_set()->expand(token, position);
-}
--- a/src/abbrs.h
+++ b/src/abbrs.h
@@ -32,20 +32,29 @@ struct abbreviation_t {
    /// we accomplish this by surrounding the regex in ^ and $.
    maybe_t<re::regex_t> regex{};

-    // Replacement string.
+    /// Replacement string.
    wcstring replacement{};

+    /// If set, the replacement is a function name.
+    bool replacement_is_function{};
+
    /// Expansion position.
    abbrs_position_t position{abbrs_position_t::command};

    /// Mark if we came from a universal variable.
    bool from_universal{};

+    /// Whether this abbrevation expands after entry (before execution).
+    bool expand_on_entry{true};
+
+    /// Whether this abbrevation expands on execution.
+    bool expand_on_execute{true};
+
    // \return true if this is a regex abbreviation.
    bool is_regex() const { return this->regex.has_value(); }

-    // \return true if we match a token.
-    bool matches(const wcstring &token) const;
+    // \return true if we match a token at a given position.
+    bool matches(const wcstring &token, abbrs_position_t position) const;

    // Construct from a name, a key which matches a token, a replacement token, a position, and
    // whether we are derived from a universal variable.
@@ -56,11 +65,24 @@ struct abbreviation_t {
    abbreviation_t() = default;
 };

+/// The result of an abbreviation expansion.
+struct abbrs_replacer_t {
+    /// The string to use to replace the incoming token, either literal or as a function name.
+    wcstring replacement;
+
+    /// If true, treat 'replacement' as the name of a function.
+    bool is_function;
+};
+using abbrs_replacer_list_t = std::vector<abbrs_replacer_t>;
+
 class abbrs_set_t {
   public:
-    /// \return the replacement value for a abbreviation token, if any.
+    /// \return the list of replacers for an input token, in priority order.
    /// The \p position is given to describe where the token was found.
-    maybe_t<wcstring> expand(const wcstring &token, abbrs_position_t position) const;
+    abbrs_replacer_list_t match(const wcstring &token, abbrs_position_t position) const;
+
+    /// \return whether we would have at least one replacer for a given token.
+    bool has_match(const wcstring &token, abbrs_position_t position) const;

    /// Add an abbreviation. Any abbreviation with the same name is replaced.
    void add(abbreviation_t &&abbr);
@@ -94,8 +116,10 @@ class abbrs_set_t {
 /// \return the global mutable set of abbreviations.
 acquired_lock<abbrs_set_t> abbrs_get_set();

-/// \return the replacement value for a abbreviation token, if any, using the global set.
+/// \return the list of replacers for an input token, in priority order, using the global set.
 /// The \p position is given to describe where the token was found.
-maybe_t<wcstring> abbrs_expand(const wcstring &token, abbrs_position_t position);
+inline abbrs_replacer_list_t abbrs_match(const wcstring &token, abbrs_position_t position) {
+    return abbrs_get_set()->match(token, position);
+}

 #endif
--- a/src/builtins/abbr.cpp
+++ b/src/builtins/abbr.cpp
@@ -38,9 +38,13 @@ struct abbr_options_t {
    bool list{};
    bool erase{};
    bool query{};
+    bool function{};
    maybe_t<wcstring> regex_pattern;
    maybe_t<abbrs_position_t> position{};

+    bool expand_on_entry{};
+    bool expand_on_execute{};
+
    wcstring_list_t args;

    bool validate(io_streams_t &streams) {
@@ -72,6 +76,21 @@ struct abbr_options_t {
            streams.err.append_format(_(L"%ls: --regex option requires --add\n"), CMD);
            return false;
        }
+        if (!add && function) {
+            streams.err.append_format(_(L"%ls: --function option requires --add\n"), CMD);
+            return false;
+        }
+        if (!add && (expand_on_entry || expand_on_execute)) {
+            streams.err.append_format(_(L"%ls: --expand-on option requires --add\n"), CMD);
+            return false;
+        }
+
+        // If no expand-on is specified, expand on both entry and execute, to match historical
+        // behavior.
+        if (add && !expand_on_entry && !expand_on_execute) {
+            expand_on_entry = true;
+            expand_on_execute = true;
+        }
        return true;
    }
 };
@@ -79,20 +98,36 @@ struct abbr_options_t {
 // Print abbreviations in a fish-script friendly way.
 static int abbr_show(const abbr_options_t &, io_streams_t &streams) {
    const auto abbrs = abbrs_get_set();
+    wcstring_list_t comps{};
    for (const auto &abbr : abbrs->list()) {
-        wcstring name = escape_string(abbr.name);
-        wcstring value = escape_string(abbr.replacement);
-        const wchar_t *scope = (abbr.from_universal ? L"-U " : L"");
-        // Literal abbreviations share both name and key.
+        comps.clear();
+        comps.push_back(L"abbr -a");
+        if (abbr.from_universal) comps.push_back(L"-U");
+        comps.push_back(L"--");
+        // Literal abbreviations have the name and key as the same.
        // Regex abbreviations have a pattern separate from the name.
-        if (!abbr.is_regex()) {
-            streams.out.append_format(L"abbr -a %ls-- %ls %ls\n", scope, name.c_str(),
-                                      value.c_str());
-        } else {
-            wcstring pattern = escape_string(abbr.key);
-            streams.out.append_format(L"abbr -a %ls-- %ls --regex %ls %ls\n", scope, name.c_str(),
-                                      pattern.c_str(), value.c_str());
+        comps.push_back(escape_string(abbr.name));
+        if (abbr.is_regex()) {
+            comps.push_back(L"--regex");
+            comps.push_back(escape_string(abbr.key));
        }
+        // The default is to expand on both entry and execute.
+        // Add flags if we're not the default.
+        if (!(abbr.expand_on_entry && abbr.expand_on_execute)) {
+            if (abbr.expand_on_entry) {
+                comps.push_back(L"--expand-on entry");
+            }
+            if (abbr.expand_on_execute) {
+                comps.push_back(L"--expand-on execute");
+            }
+        }
+        if (abbr.replacement_is_function) {
+            comps.push_back(L"--function");
+        }
+        comps.push_back(escape_string(abbr.replacement));
+        wcstring result = join_strings(comps, L' ');
+        result.push_back(L'\n');
+        streams.out.append(result);
    }
    return STATUS_CMD_OK;
 }
@@ -209,11 +244,21 @@ static int abbr_add(const abbr_options_t &opts, io_streams_t &streams) {
        if (!replacement.empty()) replacement.push_back(L' ');
        replacement.append(*iter);
    }
+    // Abbreviation function names disallow spaces.
+    // This is to prevent accidental usage of e.g. `--function 'string replace'`
+    if (opts.function &&
+        (!valid_func_name(replacement) || replacement.find(L' ') != wcstring::npos)) {
+        streams.err.append_format(_(L"%ls: Invalid function name: %ls\n"), CMD,
+                                  replacement.c_str());
+        return STATUS_INVALID_ARGS;
+    }
+
    abbrs_position_t position = opts.position ? *opts.position : abbrs_position_t::command;

    // Note historically we have allowed overwriting existing abbreviations.
    abbreviation_t abbr{std::move(name), std::move(key), std::move(replacement), position};
    abbr.regex = std::move(regex);
+    abbr.replacement_is_function = opts.function;
    abbrs_get_set()->add(std::move(abbr));
    return STATUS_CMD_OK;
 }
@@ -242,24 +287,27 @@ maybe_t<int> builtin_abbr(parser_t &parser, io_streams_t &streams, const wchar_t
    const wchar_t *cmd = argv[0];
    abbr_options_t opts;
    // Note 1 is returned by wgetopt to indicate a non-option argument.
-    enum { NON_OPTION_ARGUMENT = 1, REGEX_SHORT };
+    enum { NON_OPTION_ARGUMENT = 1, REGEX_SHORT, EXPAND_ON_SHORT };

    // Note the leading '-' causes wgetopter to return arguments in order, instead of permuting
    // them. We need this behavior for compatibility with pre-builtin abbreviations where options
    // could be given literally, for example `abbr e emacs -nw`.
-    static const wchar_t *const short_options = L"-arseqgUh";
-    static const struct woption long_options[] = {{L"add", no_argument, 'a'},
-                                                  {L"position", required_argument, 'p'},
-                                                  {L"regex", required_argument, REGEX_SHORT},
-                                                  {L"rename", no_argument, 'r'},
-                                                  {L"erase", no_argument, 'e'},
-                                                  {L"query", no_argument, 'q'},
-                                                  {L"show", no_argument, 's'},
-                                                  {L"list", no_argument, 'l'},
-                                                  {L"global", no_argument, 'g'},
-                                                  {L"universal", no_argument, 'U'},
-                                                  {L"help", no_argument, 'h'},
-                                                  {}};
+    static const wchar_t *const short_options = L"-afrseqgUh";
+    static const struct woption long_options[] = {
+        {L"add", no_argument, 'a'},
+        {L"position", required_argument, 'p'},
+        {L"regex", required_argument, REGEX_SHORT},
+        {L"expand-on", required_argument, EXPAND_ON_SHORT},
+        {L"function", no_argument, 'f'},
+        {L"rename", no_argument, 'r'},
+        {L"erase", no_argument, 'e'},
+        {L"query", no_argument, 'q'},
+        {L"show", no_argument, 's'},
+        {L"list", no_argument, 'l'},
+        {L"global", no_argument, 'g'},
+        {L"universal", no_argument, 'U'},
+        {L"help", no_argument, 'h'},
+        {}};

    int argc = builtin_count_args(argv);
    int opt;
@@ -308,7 +356,22 @@ maybe_t<int> builtin_abbr(parser_t &parser, io_streams_t &streams, const wchar_t
                opts.regex_pattern = w.woptarg;
                break;
            }
-
+            case EXPAND_ON_SHORT: {
+                if (!wcscmp(w.woptarg, L"entry")) {
+                    opts.expand_on_entry = true;
+                } else if (!wcscmp(w.woptarg, L"execute")) {
+                    opts.expand_on_execute = true;
+                } else {
+                    streams.err.append_format(_(L"%ls: Invalid expand-on '%ls'\nexpand-on must be "
+                                                L"one of: entry, execute.\n"),
+                                              CMD, w.woptarg);
+                    return STATUS_INVALID_ARGS;
+                }
+                break;
+            }
+            case 'f':
+                opts.function = true;
+                break;
            case 'r':
                opts.rename = true;
                break;
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@@ -2479,22 +2479,35 @@ static void test_abbreviations() {
        abbrs->add(literal_abbr(L"yin", L"yang", abbrs_position_t::anywhere));
    }

-    auto cmd = abbrs_position_t::command;
-    if (abbrs_expand(L"", cmd)) err(L"Unexpected success with empty abbreviation");
-    if (abbrs_expand(L"nothing", cmd)) err(L"Unexpected success with missing abbreviation");
+    // Helper to expand an abbreviation, enforcing we have no more than one result.
+    auto abbr_expand_1 = [](const wcstring &token, abbrs_position_t pos) -> maybe_t<wcstring> {
+        auto result = abbrs_match(token, pos);
+        if (result.size() > 1) {
+            err(L"abbreviation expansion for %ls returned more than 1 result", token.c_str());
+        }
+        if (result.empty()) {
+            return none();
+        }
+        return result.front().replacement;
+    };

-    auto mresult = abbrs_expand(L"gc", cmd);
+    auto cmd = abbrs_position_t::command;
+    if (abbr_expand_1(L"", cmd)) err(L"Unexpected success with empty abbreviation");
+    if (abbr_expand_1(L"nothing", cmd)) err(L"Unexpected success with missing abbreviation");
+
+    auto mresult = abbr_expand_1(L"gc", cmd);
    if (!mresult) err(L"Unexpected failure with gc abbreviation");
    if (*mresult != L"git checkout") err(L"Wrong abbreviation result for gc");

-    mresult = abbrs_expand(L"foo", cmd);
+    mresult = abbr_expand_1(L"foo", cmd);
    if (!mresult) err(L"Unexpected failure with foo abbreviation");
    if (*mresult != L"bar") err(L"Wrong abbreviation result for foo");

    maybe_t<wcstring> result;
    auto expand_abbreviation_in_command = [](const wcstring &cmdline,
                                             size_t cursor_pos) -> maybe_t<wcstring> {
-        if (auto edit = reader_expand_abbreviation_at_cursor(cmdline, cursor_pos)) {
+        if (auto edit = reader_expand_abbreviation_at_cursor(cmdline, cursor_pos,
+                                                             parser_t::principal_parser())) {
            wcstring cmdline_expanded = cmdline;
            std::vector<highlight_spec_t> colors{cmdline_expanded.size()};
            apply_edit(&cmdline_expanded, &colors, *edit);
--- a/src/highlight.cpp
+++ b/src/highlight.cpp
@@ -1336,7 +1336,7 @@ static bool command_is_valid(const wcstring &cmd, enum statement_decoration_t de

    // Abbreviations
    if (!is_valid && abbreviation_ok)
-        is_valid = abbrs_expand(cmd, abbrs_position_t::command).has_value();
+        is_valid = abbrs_get_set()->has_match(cmd, abbrs_position_t::command);

    // Regular commands
    if (!is_valid && command_ok) is_valid = path_get_path(cmd, vars).has_value();
--- a/src/reader.cpp
+++ b/src/reader.cpp
@@ -1347,8 +1347,30 @@ void reader_data_t::pager_selection_changed() {
    }
 }

+/// Expand an abbreviation replacer, which means either returning its literal replacement or running
+/// its function. \return the replacement string, or none to skip it.
+maybe_t<wcstring> expand_replacer(const wcstring &token, const abbrs_replacer_t &repl,
+                                  parser_t &parser) {
+    if (!repl.is_function) {
+        // Literal replacement cannot fail.
+        return repl.replacement;
+    }
+
+    wcstring cmd = escape_string(repl.replacement);
+    cmd.push_back(L' ');
+    cmd.append(escape_string(token));
+
+    wcstring_list_t outputs{};
+    int ret = exec_subshell(cmd, parser, outputs, false /* not apply_exit_status */);
+    if (ret != STATUS_CMD_OK) {
+        return none();
+    }
+    return join_strings(outputs, L'\n');
+}
+
 /// Expand abbreviations at the given cursor position. Does NOT inspect 'data'.
-maybe_t<edit_t> reader_expand_abbreviation_at_cursor(const wcstring &cmdline, size_t cursor_pos) {
+maybe_t<edit_t> reader_expand_abbreviation_at_cursor(const wcstring &cmdline, size_t cursor_pos,
+                                                     parser_t &parser) {
    // Get the surrounding command substitution.
    const wchar_t *const buff = cmdline.c_str();
    const wchar_t *cmdsub_begin = nullptr, *cmdsub_end = nullptr;
@@ -1400,14 +1422,17 @@ maybe_t<edit_t> reader_expand_abbreviation_at_cursor(const wcstring &cmdline, si
        leaf_is_command ? abbrs_position_t::command : abbrs_position_t::anywhere;

    // Now we can expand the abbreviation.
-    maybe_t<edit_t> result{};
-    if (auto abbreviation = abbrs_expand(leaf->source(subcmd), expand_position)) {
-        // There was an abbreviation! Replace the token in the full command. Maintain the
-        // relative position of the cursor.
-        source_range_t r = leaf->source_range();
-        result = edit_t(subcmd_offset + r.start, r.length, abbreviation.acquire());
+    // Find the first which succeeds.
+    wcstring token = leaf->source(subcmd);
+    auto replacers = abbrs_match(token, expand_position);
+    for (const auto &replacer : replacers) {
+        if (auto replacement = expand_replacer(token, replacer, parser)) {
+            // Replace the token in the full command. Maintain the relative position of the cursor.
+            source_range_t r = leaf->source_range();
+            return edit_t(subcmd_offset + r.start, r.length, replacement.acquire());
+        }
    }
-    return result;
+    return none();
 }

 /// Expand abbreviations at the current cursor position, minus the given cursor backtrack. This may
@@ -1419,9 +1444,9 @@ bool reader_data_t::expand_abbreviation_as_necessary(size_t cursor_backtrack) {

    if (conf.expand_abbrev_ok && el == &command_line) {
        // Try expanding abbreviations.
+        this->update_commandline_state();
        size_t cursor_pos = el->position() - std::min(el->position(), cursor_backtrack);
-
-        if (auto edit = reader_expand_abbreviation_at_cursor(el->text(), cursor_pos)) {
+        if (auto edit = reader_expand_abbreviation_at_cursor(el->text(), cursor_pos, parser())) {
            push_edit(el, std::move(*edit));
            update_buff_pos(el);
            result = true;
--- a/src/reader.h
+++ b/src/reader.h
@@ -261,9 +261,11 @@ bool fish_is_unwinding_for_exit();
 wcstring combine_command_and_autosuggestion(const wcstring &cmdline,
                                            const wcstring &autosuggestion);

-/// Expand abbreviations at the given cursor position. Exposed for testing purposes only.
-/// \return none if no abbreviations were expanded, otherwise the new command line.
-maybe_t<edit_t> reader_expand_abbreviation_at_cursor(const wcstring &cmdline, size_t cursor_pos);
+/// Expand at most one abbreviation at the given cursor position. Use the parser to run any
+/// abbreviations which want function calls.
+/// \return none if no abbreviations were expanded, otherwise the resulting edit.
+maybe_t<edit_t> reader_expand_abbreviation_at_cursor(const wcstring &cmdline, size_t cursor_pos,
+                                                     parser_t &parser);

 /// Apply a completion string. Exposed for testing only.
 wcstring completion_apply_to_command_line(const wcstring &val_str, complete_flags_t flags,
--- a/tests/checks/abbr.fish
+++ b/tests/checks/abbr.fish
@@ -133,6 +133,22 @@ echo $status
 # CHECKERR: abbr: --position option requires --add
 # CHECK: 2

+abbr --query banana --function
+echo $status
+# CHECKERR: abbr: --function option requires --add
+# CHECK: 2
+
+abbr --add peach --function invalid/function/name
+echo $status
+# CHECKERR: abbr: Invalid function name: invalid/function/name
+# CHECK: 2
+
+# Function names cannot contain spaces, to prevent confusion with fish script.
+abbr --add peach --function 'no space allowed'
+echo $status
+# CHECKERR: abbr: Invalid function name: no space allowed
+# CHECK: 2
+
 # Erase all abbreviations
 abbr --erase (abbr --list)
 abbr --show
--- a/tests/pexpects/abbrs.py
+++ b/tests/pexpects/abbrs.py
@@ -53,6 +53,7 @@ expect_str(r"<echo )>")

 # Support position anywhere.
 sendline(r"abbr alpha --position anywhere beta2")
+expect_prompt()

 send(r"alpha ?")
 expect_str(r"<beta2 >")
@@ -62,6 +63,7 @@ expect_str(r"<echo beta2 >")

 # Support regex.
 sendline(r"abbr alpha --regex 'A[0-9]+Z' beta3")
+expect_prompt()
 send(r"A123Z ?")
 expect_str(r"<beta3 >")
 send(r"AZ ?")
@@ -70,3 +72,47 @@ send(r"QA123Z ?")
 expect_str(r"<QA123Z >")
 send(r"A0000000000000000000009Z ?")
 expect_str(r"<beta3 >")
+
+# Support functions. Here anything in between @@ is uppercased, except 'nope'.
+sendline(
+    r"""function uppercaser;
+            string match --quiet '*nope*' $argv[1] && return 1
+            string trim --chars @ $argv | string upper
+        end"""
+)
+expect_prompt()
+sendline(r"abbr uppercaser --regex '@.+@' --function uppercaser")
+expect_prompt()
+send(r"@abc@ ?")
+expect_str(r"<ABC >")
+send(r"@nope@ ?")
+expect_str(r"<@nope@ >")
+sendline(r"abbr --erase uppercaser")
+expect_prompt()
+
+# -f works as shorthand for --function.
+sendline(r"abbr uppercaser2 --regex '@.+@' -f uppercaser")
+expect_prompt()
+send(r"@abc@ ?")
+expect_str(r"<ABC >")
+send(r"@nope@ ?")
+expect_str(r"<@nope@ >")
+sendline(r"abbr --erase uppercaser2")
+expect_prompt()
+
+# Verify that 'commandline' is accurate.
+# Abbreviation functions cannot usefully change the command line, but they can read it.
+sendline(
+    r"""function check_cmdline
+            set -g last_cmdline (commandline)
+            set -g last_cursor (commandline --cursor)
+            false
+        end"""
+)
+expect_prompt()
+sendline(r"abbr check_cmdline --regex '@.+@' --function check_cmdline")
+expect_prompt()
+send(r"@abc@ ?")
+expect_str(r"<@abc@ >")
+sendline(r"echo $last_cursor - $last_cmdline")
+expect_prompt(r"6 - @abc@ ")