From 5a7e5dc743bcbf7c1a460d5920dde5d248af5a97 Mon Sep 17 00:00:00 2001 From: Isaac Oscar Gariano Date: Sun, 3 Aug 2025 10:07:08 +1000 Subject: [PATCH] Make argparse separate known and unknown options that occur in a group together. For example, argparse --ignore-unknown h -- -ho will now set set $argv to -o and $argv_opts to -h (i.e. -ho is split into -h and -o). Previously, it would set $argv to -ho, and $argv_opts to empty. With this change, the "Limitations" section of argparse's man page has been removed, and the examples merged into the description of the -i/--ignore-unknown option. (Note: there was another 'limitation' mentioned in the 'limitations' section: that everything occuring after an unknown option in a group was considered an argument to an option; the documentation has been reworded to make it clear that this is intended behaviour, as unknown options are always treated as taking optional arguments, and modifying that behaviour would be a breaking change and not a bug fix). --- CHANGELOG.rst | 1 + doc_src/cmds/argparse.rst | 21 ++++----------- src/builtins/argparse.rs | 54 +++++++++++++++++++++++++++----------- src/wgetopt.rs | 4 +++ tests/checks/argparse.fish | 19 +++++++++++--- 5 files changed, 65 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 92f3ba9b9..fb45bde82 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -44,6 +44,7 @@ Scripting improvements - The ``psub`` command now allows combining ``--suffix`` with ``--fifo`` (:issue:`11729`). - ``argparse`` now saves recognised options and values in ``$argv_opts``, allowing them to be forwarded to other commands (:issue:`6466`). - ``argparse`` options can now be marked to be deleted from ``$argv_opts`` (by adding a ``&`` at the end of the option spec, before a ``!`` if present). There is now also a corresponding ``-d`` / ``--delete`` option to ``fish_opt``. +- ``argparse --ignore-unknown`` now removes preceding known short options from groups containing unknown options (e.g. when parsing ``-abc``, if ``a`` is known but ``b`` is not, then ``$argv`` will contain ``-bc``). Interactive improvements ------------------------ diff --git a/doc_src/cmds/argparse.rst b/doc_src/cmds/argparse.rst index 9734da0c9..017b58140 100644 --- a/doc_src/cmds/argparse.rst +++ b/doc_src/cmds/argparse.rst @@ -41,7 +41,11 @@ The following ``argparse`` options are available. They must appear before all *O The maximum number of acceptable non-option arguments. The default is infinity. **-i** or **--ignore-unknown** - Ignores unknown options, keeping them and their arguments in $argv instead. + Ignore unknown options, keeping them and their arguments in ``$argv`` instead and not moving them to ``$argv_opts``. Unknown options are treated as if they take optional arguments (i.e. have option spec ``=?``). + + The above means that if a group of short options contains an unknown short option *followed* by a known short option, the known short option is + treated as an argument to the unknown one (e.g. ``--ignore-unknown h -- -oh`` will treat ``h`` as the argument to ``-o``, and so ``_flag_h`` will *not* be set). + In contrast, if the known option comes first (and does not take any arguments), the known option will be recognised and moved from ``$argv`` to ``$argv_opts`` (e.g. ``argparse --ignore-unknown h -- -ho`` will set ``$argv`` to ``-o`` and ``$argv_opts`` to ``-h``) **-s** or **--stop-nonopt** Causes scanning the arguments to stop as soon as the first non-option argument is seen. Among other things, this is useful to implement subcommands that have their own options. @@ -299,18 +303,3 @@ An example of using ``$argv_opts`` to forward known options to another command, The argparse call above saves all the options we do *not* want to process in ``$argv_opts``. (The ``--qwords`` and ``--bytes`` options are *not* saved there as their option spec's end in a ``~``). The code then processes the ``--qwords`` and ``--bytess`` options using the the ``$_flag_OPTION`` variables, and puts the transformed options in ``$argv_opts`` (which already contains all the original options, *other* than ``--qwords`` and ``--bytes``). Note that the ``argparse`` call does need to know all the options to the original ``head`` so that we can accurately work out the *non*-option arguments (i.e. ``$argv``, the filenames that ``head`` is to operate on). We'd similarly need to tell ``argparse`` the options if we wanted to modify the given filenames. - -Limitations ------------ - -One limitation with **--ignore-unknown** is that, if an unknown option is given in a group with known options, the entire group will be kept in $argv. ``argparse`` will not do any permutations here. - -For instance:: - - argparse --ignore-unknown h -- -ho - echo $_flag_h # is -h, because -h was given - echo $argv # is still -ho - -This limitation may be lifted in future. - -Additionally, it can only parse known options up to the first unknown option in the group - the unknown option could take options, so it isn't clear what any character after an unknown option means. diff --git a/src/builtins/argparse.rs b/src/builtins/argparse.rs index 6b74ba935..0ec1f4e5e 100644 --- a/src/builtins/argparse.rs +++ b/src/builtins/argparse.rs @@ -59,7 +59,7 @@ struct ArgParseCmdOpts<'args> { implicit_int_flag: char, name: WString, raw_exclusive_flags: Vec<&'args wstr>, - args: Vec<&'args wstr>, + args: Vec>, args_opts: Vec>, options: HashMap>, long_to_short_flag: HashMap, @@ -692,7 +692,7 @@ fn validate_and_store_implicit_int<'args>( let opt_spec = opts.options.get_mut(&opts.implicit_int_flag).unwrap(); if opt_spec.delete { // No need to call delete_flag, as w.argv_opts.last will be of form - - w.argv_opts.pop(); + w.argv_opts.pop().unwrap(); } validate_arg(parser, &opts.name, opt_spec, is_long_flag, val, streams)?; @@ -706,8 +706,9 @@ fn validate_and_store_implicit_int<'args>( } /// Delete the most recently matched flag, is_long_flag should be true if the flag was given with a -/// long flag name (even if it was given with a single - and not two). -fn delete_flag<'args>(w: &mut WGetopter<'_, 'args, '_>, is_long_flag: bool) { +/// long flag name (even if it was given with a single - and not two). The returned value is the +/// deleted flag and its value (unless the value was given as a seperate argument) +fn delete_flag<'args>(w: &mut WGetopter<'_, 'args, '_>, is_long_flag: bool) -> Cow<'args, wstr> { // Does the option have a value that was a seperate argument to the option name itself? (e.g. // w.argv_opts ends in -- or -... ) let separate_value = w @@ -731,7 +732,7 @@ fn delete_flag<'args>(w: &mut WGetopter<'_, 'args, '_>, is_long_flag: bool) { // may be abbreviated assert!(w.remaining_text.is_empty()); // There will be no short options, so we're done - return; + return opt_arg; } // Otherwise, it's a short option so opt_arg will be of form: @@ -754,16 +755,21 @@ fn delete_flag<'args>(w: &mut WGetopter<'_, 'args, '_>, is_long_flag: bool) { if previous_opts == 0 && more_opts.is_empty() { // opt_arg will be of form - or - // (i.e. there are no other options that we need to add back to w.argv_opts) - return; + return opt_arg; } + // Set opt_arg_with to be opt_arg minus the ... (i.e. -) + // (the +1 is to skip over the leading '-', and the +2 make it a one character slice) + let opt_arg_with = L!("-").to_owned() + &opt_arg[previous_opts + 1..previous_opts + 2] + value; + // Set opt_arg_without to to be opt_arg minus (i.e. // -...). (the +1 is to skip over the leading '-') let opt_arg_without = opt_arg[..previous_opts + 1].to_owned() + more_opts; assert!(opt_arg.len() > 1); // There should be at least one short opt - // Put the version without back + // Put the version without back, and return the version with it w.argv_opts.push(opt_arg_without.into()); + Cow::Owned(opt_arg_with) } fn handle_flag<'args>( @@ -867,18 +873,35 @@ fn argparse_parse_flags<'args>( )); Err(STATUS_INVALID_ARGS) } else { + // The option is unknown, so there's no long opt index it could have used + assert!(!is_long_flag); + // arg_contents already skipped over the first '-' + let is_long_flag = arg_contents.starts_with("-"); + // Any unrecognized option is put back if ignore_unknown is used. // This allows reusing the same argv in multiple argparse calls, // or just ignoring the error (e.g. in completions). - opts.args.push(args_read[w.wopt_index - 1]); - w.argv_opts.pop(); + + // First we consume any remaining text as if it was the option's argument + if !w.remaining_text.is_empty() { + assert!(w.woptarg.is_none()); // Both don't make sense + w.woptarg = Some(w.remaining_text); + // Explain to wgetopt that we want to skip to the next arg, + // because we can't handle this opt group. + w.remaining_text = L!(""); + } + + // Now by calling delete_flag we ensure that if the unknown flag is precceded by + // known flags, the known flags are kept in $argv_opts, and not added to $argv. + // (any argument to the option is also returned in unknown_flag, and removed + // from opts.argv_opts) + let unknown_flag = delete_flag(&mut w, is_long_flag); + opts.args.push(unknown_flag); + // Work around weirdness with wgetopt, which crashes if we `continue` here. if w.wopt_index == argc { break; } - // Explain to wgetopt that we want to skip to the next arg, - // because we can't handle this opt group. - w.remaining_text = L!(""); Ok(SUCCESS) } } @@ -888,7 +911,7 @@ fn argparse_parse_flags<'args>( // otherwise we'd get ignored options first and normal arguments later. // E.g. `argparse -i -- -t tango -w` needs to keep `-t tango -w` in $argv, not `-t -w // tango`. - opts.args.push(args_read[w.wopt_index - 1]); + opts.args.push(Cow::Borrowed(args_read[w.wopt_index - 1])); continue; } // It's a recognized flag. @@ -921,7 +944,8 @@ fn argparse_parse_args<'args>( check_for_mutually_exclusive_flags(opts, streams)?; - opts.args.extend_from_slice(&args[optind..]); + opts.args + .extend(args[optind..].iter().map(|&s| Cow::Borrowed(s))); Ok(SUCCESS) } @@ -980,7 +1004,7 @@ fn set_argparse_result_vars(vars: &EnvStack, opts: ArgParseCmdOpts) { } } - let args = opts.args.iter().map(|&s| s.to_owned()).collect(); + let args = opts.args.into_iter().map(|s| s.into_owned()).collect(); vars.set(L!("argv"), EnvMode::LOCAL, args); let args_opts = opts.args_opts.into_iter().map(|s| s.into_owned()).collect(); vars.set(L!("argv_opts"), EnvMode::LOCAL, args_opts); diff --git a/src/wgetopt.rs b/src/wgetopt.rs index 2abc8485f..7110d63ed 100644 --- a/src/wgetopt.rs +++ b/src/wgetopt.rs @@ -490,6 +490,10 @@ fn handle_long_opt(&mut self, longopt_index: &mut usize) -> Option { .as_char_slice() .contains(&self.remaining_text.char_at(0)) { + // Unknown option, assume remaining text is its argument + // (it needs to be saved somewhere for argparse, but we cant save it in + // self.remaining_text as that will break further parsing) + self.woptarg = Some(&self.remaining_text[1..]); self.remaining_text = empty_wstr(); self.wopt_index += 1; diff --git a/tests/checks/argparse.fish b/tests/checks/argparse.fish index d1c96bb84..462c44a6b 100644 --- a/tests/checks/argparse.fish +++ b/tests/checks/argparse.fish @@ -346,14 +346,26 @@ begin # CHECK: alpha aaaa end +begin + # Ignoring unknown long options that share a character with a known short options + argparse -i o -- --long=value --long + or echo unexpected argparse return status $status >&2 + set -l + # CHECK: argv '--long=value' '--long' + # CHECK: argv_opts +end + begin # Check for crash when last option is unknown + # Note that because of the quotation marks, there is a single argument + # starting with the known short option '-b', and continuing with an unknown short option + # that starts with a space argparse -i b/break -- "-b kubectl get pods -l name=foo" set -l # CHECK: _flag_b -b # CHECK: _flag_break -b - # CHECK: argv '-b kubectl get pods -l name=foo' - # CHECK: argv_opts + # CHECK: argv '- kubectl get pods -l name=foo' + # CHECK: argv_opts -b end begin @@ -519,8 +531,9 @@ end begin argparse --ignore-unknown h i -- -hoa -oia echo -- $argv - #CHECK: -hoa -oia + #CHECK: -oa -oia echo -- $argv_opts + #CHECK: -h echo $_flag_h #CHECK: -h set -q _flag_i