From e260c42a13f427eb8213c0732e31d9f314195035 Mon Sep 17 00:00:00 2001 From: Kurtis Rader Date: Mon, 10 Jul 2017 20:58:19 -0700 Subject: [PATCH] implement `argparse --exclusive` Fixes #4190 --- fish.xcodeproj/project.pbxproj | 18 +- src/builtin_argparse.cpp | 325 ++++++++++++++++++++++----------- 2 files changed, 235 insertions(+), 108 deletions(-) diff --git a/fish.xcodeproj/project.pbxproj b/fish.xcodeproj/project.pbxproj index 43fef7032..bb42b7082 100644 --- a/fish.xcodeproj/project.pbxproj +++ b/fish.xcodeproj/project.pbxproj @@ -117,6 +117,9 @@ 9C7A557D1DCD71890049C25D /* fish_key_reader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9C7A557C1DCD717C0049C25D /* fish_key_reader.cpp */; }; 9C7A557E1DCD71CD0049C25D /* print_help.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855613B3ACEE0099B651 /* print_help.cpp */; }; 9C7A55811DCD739C0049C25D /* fish_key_reader in CopyFiles */ = {isa = PBXBuildFile; fileRef = 9C7A55721DCD71330049C25D /* fish_key_reader */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; }; + CB0F034C1F156FE3001827D3 /* builtin_argparse.cpp in Sources */ = {isa = PBXBuildFile; fileRef = CB0F034A1F156FE3001827D3 /* builtin_argparse.cpp */; }; + CB0F034D1F156FE3001827D3 /* builtin_argparse.cpp in Sources */ = {isa = PBXBuildFile; fileRef = CB0F034A1F156FE3001827D3 /* builtin_argparse.cpp */; }; + CB0F034E1F156FE3001827D3 /* builtin_argparse.cpp in Sources */ = {isa = PBXBuildFile; fileRef = CB0F034A1F156FE3001827D3 /* builtin_argparse.cpp */; }; D001B5EE1F041CBD000838CC /* builtin.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D05F592F1F041AE4003EE978 /* builtin.cpp */; }; D001B5F01F041CBD000838CC /* builtin_ulimit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D05F59311F041AE4003EE978 /* builtin_ulimit.cpp */; }; D001B5F21F041CBD000838CC /* builtin_test.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D05F59331F041AE4003EE978 /* builtin_test.cpp */; }; @@ -647,8 +650,10 @@ 63A2C0E81CC5F9FB00973404 /* pcre2_find_bracket.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_find_bracket.c; sourceTree = ""; }; 9C7A55721DCD71330049C25D /* fish_key_reader */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = fish_key_reader; sourceTree = BUILT_PRODUCTS_DIR; }; 9C7A557C1DCD717C0049C25D /* fish_key_reader.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fish_key_reader.cpp; sourceTree = ""; }; - CBB772591F11F93F00780A21 /* builtin_argparse.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = builtin_argparse.cpp; sourceTree = ""; }; - CBB7725A1F11F93F00780A21 /* builtin_argparse.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = builtin_argparse.h; sourceTree = ""; }; + CB0F034A1F156FE3001827D3 /* builtin_argparse.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = builtin_argparse.cpp; sourceTree = ""; }; + CB0F034B1F156FE3001827D3 /* builtin_argparse.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = builtin_argparse.h; sourceTree = ""; }; + CBB7725B1F14964100780A21 /* fish-shell */ = {isa = PBXFileReference; lastKnownFileType = folder; path = "fish-shell"; sourceTree = ""; }; + CBB7725C1F1496AF00780A21 /* fish-shell */ = {isa = PBXFileReference; lastKnownFileType = folder; path = "fish-shell"; sourceTree = ""; }; D00769421990137800CA4627 /* fish_tests */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = fish_tests; sourceTree = BUILT_PRODUCTS_DIR; }; D00F63F019137E9D00FCCDEC /* fish_version.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fish_version.cpp; sourceTree = ""; }; D01A2D23169B730A00767098 /* man1 */ = {isa = PBXFileReference; lastKnownFileType = text; name = man1; path = pages_for_manpath/man1; sourceTree = BUILT_PRODUCTS_DIR; }; @@ -964,6 +969,8 @@ D0A084F013B3AC130099B651 = { isa = PBXGroup; children = ( + CBB7725C1F1496AF00780A21 /* fish-shell */, + CBB7725B1F14964100780A21 /* fish-shell */, D0D02A91159845EF008E62BD /* Sources */, D0D02AFC159871BF008E62BD /* Launcher */, D0D02A8E15983D5F008E62BD /* Libraries */, @@ -986,8 +993,8 @@ D0D02A91159845EF008E62BD /* Sources */ = { isa = PBXGroup; children = ( - CBB772591F11F93F00780A21 /* builtin_argparse.cpp */, - CBB7725A1F11F93F00780A21 /* builtin_argparse.h */, + CB0F034A1F156FE3001827D3 /* builtin_argparse.cpp */, + CB0F034B1F156FE3001827D3 /* builtin_argparse.h */, 9C7A557C1DCD717C0049C25D /* fish_key_reader.cpp */, 4E142D731B56B5D7008783C8 /* config.h */, D0C6FCCB14CFA4B7004CE8AD /* autoload.h */, @@ -1684,6 +1691,7 @@ D007692D1990137800CA4627 /* proc.cpp in Sources */, D007692E1990137800CA4627 /* reader.cpp in Sources */, D007692F1990137800CA4627 /* sanity.cpp in Sources */, + CB0F034E1F156FE3001827D3 /* builtin_argparse.cpp in Sources */, D05F597F1F041AE4003EE978 /* builtin_source.cpp in Sources */, D05F59B51F041AE4003EE978 /* builtin_contains.cpp in Sources */, D05F59911F041AE4003EE978 /* builtin_random.cpp in Sources */, @@ -1771,6 +1779,7 @@ D05F59AB1F041AE4003EE978 /* builtin_emit.cpp in Sources */, D05F59961F041AE4003EE978 /* builtin_printf.cpp in Sources */, D030FC001A4A38F300F7ADA0 /* function.cpp in Sources */, + CB0F034D1F156FE3001827D3 /* builtin_argparse.cpp in Sources */, D05F59C61F041AE4003EE978 /* builtin_block.cpp in Sources */, D05F59C91F041AE4003EE978 /* builtin_bind.cpp in Sources */, D05F599C1F041AE4003EE978 /* builtin_history.cpp in Sources */, @@ -1880,6 +1889,7 @@ D0D02A7015983842008E62BD /* proc.cpp in Sources */, D0D02A7115983848008E62BD /* reader.cpp in Sources */, D0D02A721598384C008E62BD /* sanity.cpp in Sources */, + CB0F034C1F156FE3001827D3 /* builtin_argparse.cpp in Sources */, D05F597D1F041AE4003EE978 /* builtin_source.cpp in Sources */, D05F59B31F041AE4003EE978 /* builtin_contains.cpp in Sources */, D05F598F1F041AE4003EE978 /* builtin_random.cpp in Sources */, diff --git a/src/builtin_argparse.cpp b/src/builtin_argparse.cpp index f6147700c..00c1ae0ad 100644 --- a/src/builtin_argparse.cpp +++ b/src/builtin_argparse.cpp @@ -3,19 +3,22 @@ // See issue #4190 for the rationale behind the original behavior of this builtin. #include "config.h" // IWYU pragma: keep -#if 0 -#include -#endif - +#include +#include #include #include +#include #include +#include +#include #include #include +#include #include #include #include +#include #include "builtin.h" #include "builtin_argparse.h" @@ -48,8 +51,11 @@ class argparse_cmd_opts_t { bool print_help = false; bool require_order = false; wcstring name = L"argparse"; + wcstring_list_t raw_exclusive_flags; wcstring_list_t argv; - struct std::map options; + std::map options; + std::map long_to_short_flag; + std::vector> exclusive_flag_sets; ~argparse_cmd_opts_t() { for (auto it : options) { @@ -58,12 +64,108 @@ class argparse_cmd_opts_t { } }; -static const wchar_t *short_options = L"+:hn:r"; +static const wchar_t *short_options = L"+:hn:rx:"; static const struct woption long_options[] = {{L"require-order", no_argument, NULL, 'r'}, {L"name", required_argument, NULL, 'n'}, + {L"exclusive", required_argument, NULL, 'x'}, {L"help", no_argument, NULL, 'h'}, {NULL, 0, NULL, 0}}; +// Check if any pair of mutually exclusive options was seen. Note that since every option must have +// a short name we only need to check those. +static int check_for_mutually_exclusive_flags(argparse_cmd_opts_t &opts, io_streams_t &streams) { + for (auto it : opts.options) { + auto opt_spec = it.second; + if (opt_spec->num_seen == 0) continue; + + // We saw this option at least once. Check all the sets of mutually exclusive options to see + // if this option appears in any of them. + for (auto xarg_set : opts.exclusive_flag_sets) { + auto found = std::find(xarg_set.begin(), xarg_set.end(), opt_spec->short_flag); + if (found != xarg_set.end()) { + // Okay, this option is in a mutually exclusive set of options. Check if any of the + // other mutually exclusive options have been seen. + for (auto xflag : xarg_set) { + auto xopt_spec = opts.options[xflag]; + // Ignore this flag in the list of mutually exclusive flags. + if (xopt_spec->short_flag == opt_spec->short_flag) continue; + + // If it is a different flag check if it has been seen. + if (xopt_spec->num_seen) { + wcstring flag1; + if (opt_spec->short_flag_valid) flag1 = wcstring(1, opt_spec->short_flag); + if (!opt_spec->long_flag.empty()) { + if (opt_spec->short_flag_valid) flag1 += L"/"; + flag1 += opt_spec->long_flag; + } + wcstring flag2; + if (xopt_spec->short_flag_valid) flag2 = wcstring(1, xopt_spec->short_flag); + if (!xopt_spec->long_flag.empty()) { + if (xopt_spec->short_flag_valid) flag2 += L"/"; + flag2 += xopt_spec->long_flag; + } + streams.err.append_format( + _(L"%ls: Mutually exclusive flags '%ls' and `%ls` seen\n"), + opts.name.c_str(), flag1.c_str(), flag2.c_str()); + return STATUS_CMD_ERROR; + } + } + } + } + } + + return STATUS_CMD_OK; +} + +// This is used as a specialization to allow us to force operator>> to split the input on something +// other than spaces. +class WordDelimitedByComma : public wcstring {}; + +// Cppcheck incorrectly complains this is unused. It is indirectly used by std:istream_iterator. +std::wistream &operator>>(std::wistream &is, WordDelimitedByComma &output) { // cppcheck-suppress + std::getline(is, output, L','); + return is; +} + +// This should be called after all the option specs have been parsed. At that point we have enough +// information to parse the values associated with any `--exclusive` flags. +static int parse_exclusive_args(argparse_cmd_opts_t &opts, io_streams_t &streams) { + for (auto raw_xflags : opts.raw_exclusive_flags) { + // This is an advanced technique that leverages the C++ STL to split the string on commas. + std::wistringstream iss(raw_xflags); + wcstring_list_t xflags((std::istream_iterator(iss)), + std::istream_iterator()); + if (xflags.size() < 2) { + streams.err.append_format(_(L"%ls: exclusive flag string '%ls' is not valid\n"), + opts.name.c_str(), raw_xflags.c_str()); + return STATUS_CMD_ERROR; + } + + std::vector exclusive_set; + for (auto flag : xflags) { + if (flag.size() == 1 && opts.options.find(flag[0]) != opts.options.end()) { + // It's a short flag. + exclusive_set.push_back(flag[0]); + } else { + auto x = opts.long_to_short_flag.find(flag); + if (x != opts.long_to_short_flag.end()) { + // It's a long flag we store as its short flag equivalent. + exclusive_set.push_back(x->second); + } else { + streams.err.append_format(_(L"%ls: exclusive flag '%ls' is not valid\n"), + opts.name.c_str(), flag.c_str()); + return STATUS_CMD_ERROR; + } + } + } + + // Store the set of exclusive flags for use when parsing the supplied set of arguments. + opts.exclusive_flag_sets.push_back(exclusive_set); + } + + return STATUS_CMD_OK; +} + static bool parse_flag_modifiers(argparse_cmd_opts_t &opts, option_spec_t *opt_spec, const wcstring &option_spec, const wchar_t *s, io_streams_t &streams) { @@ -122,9 +224,30 @@ static bool parse_option_spec(argparse_cmd_opts_t &opts, wcstring option_spec, } opt_spec->long_flag = wcstring(s, e - s); + opts.long_to_short_flag.emplace(opt_spec->long_flag, opt_spec->short_flag); return parse_flag_modifiers(opts, opt_spec, option_spec, e, streams); } +static int collect_option_specs(argparse_cmd_opts_t &opts, int *optind, int argc, wchar_t **argv, + io_streams_t &streams) { + wchar_t *cmd = argv[0]; + while (true) { + if (wcscmp(L"--", argv[*optind]) == 0) { + ++*optind; + return STATUS_CMD_OK; + } + + if (!parse_option_spec(opts, argv[*optind], streams)) { + return STATUS_CMD_ERROR; + } + + if (++*optind == argc) { + streams.err.append_format(BUILTIN_ERR_ARG_COUNT1, cmd, 2, 0); + return STATUS_INVALID_ARGS; + } + } +} + static int parse_cmd_opts(argparse_cmd_opts_t &opts, int *optind, //!OCLINT(high ncss method) int argc, wchar_t **argv, parser_t &parser, io_streams_t &streams) { wchar_t *cmd = argv[0]; @@ -140,6 +263,12 @@ static int parse_cmd_opts(argparse_cmd_opts_t &opts, int *optind, //!OCLINT(hig opts.require_order = true; break; } + case 'x': { + // Just save the raw string here. Later, when we have all the short and long flag + // definitions we'll parse these strings into a more useful data structure. + opts.raw_exclusive_flags.push_back(w.woptarg); + break; + } case 'h': { opts.print_help = true; break; @@ -160,7 +289,7 @@ static int parse_cmd_opts(argparse_cmd_opts_t &opts, int *optind, //!OCLINT(hig } *optind = w.woptind; - return STATUS_CMD_OK; + return collect_option_specs(opts, optind, argc, argv, streams); } static void populate_option_strings( @@ -188,74 +317,9 @@ static void populate_option_strings( long_options.get()[i] = {NULL, 0, NULL, 0}; } -// This function mimics the `wgetopt_long()` usage found elsewhere in our other builtin commands. -// It's different in that the short and long option structures are constructed dynamically based on -// arguments provided to the `argparse` command. -static int argparse_parse_args(argparse_cmd_opts_t &opts, const wcstring_list_t &args, - parser_t &parser, io_streams_t &streams) { - if (args.empty()) return STATUS_CMD_OK; - - wcstring short_options = opts.require_order ? L"+:" : L":"; - int nflags = opts.options.size(); - // This assumes every option has a long flag. Which is the worst case and isn't worth optimizing - // since the number of options is always quite small. Thus the size of the array will never be - // much larger than the minimum size required for all the long options. - auto long_options = std::unique_ptr>( - new woption[nflags + 1], [](woption *p) { delete[] p; }); - populate_option_strings(opts, short_options, long_options); - - const wchar_t *cmd = opts.name.c_str(); - int argc = args.size(); - - // This is awful but we need to convert our wcstring_list_t to a that can be passed - // to w.wgetopt_long(). Furthermore, because we're dynamically allocating the array of pointers - // we need to ensure the memory for the data structure is freed when we leave this scope. - null_terminated_array_t argv_container(args); - auto argv = (wchar_t **)argv_container.get(); - - int opt; - wgetopter_t w; - auto long_opts = long_options.get(); - while ((opt = w.wgetopt_long(argc, argv, short_options.c_str(), long_opts, NULL)) != -1) { - switch (opt) { - case ':': { - builtin_missing_argument(parser, streams, cmd, argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; - } - case '?': { - builtin_unknown_option(parser, streams, cmd, argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; - } - default: { - auto found = opts.options.find(opt); - assert(found != opts.options.end()); - - option_spec_t *opt_spec = found->second; - opt_spec->num_seen++; - if (opt_spec->num_allowed == 0) { - assert(!w.woptarg); - } else if (opt_spec->num_allowed == -1 || opt_spec->num_allowed == 1) { - // This is subtle. We're depending on `wgetopt_long()` to report that a - // mandatory value is missing if `opt_spec->num_allowed == 1` and thus return - // ':' so that we don't take this branch if the mandatory arg is missing. - // Otherwise we can treat the optional and mandatory arg cases the same. That - // is, store the arg as the only value for the flag. Even if we've seen earlier - // instances of the flag. - opt_spec->vals.clear(); - if (w.woptarg) { - opt_spec->vals.push_back(w.woptarg); - } - } else { - assert(w.woptarg); - opt_spec->vals.push_back(w.woptarg); - } - break; - } - } - } - - // Add a count for how many times we saw each boolean flag but only if we saw the flag at least - // once. +// Add a count for how many times we saw each boolean flag but only if we saw the flag at least +// once. +static void update_bool_flag_counts(argparse_cmd_opts_t &opts) { for (auto it : opts.options) { auto opt_spec = it.second; if (opt_spec->num_allowed != 0 || opt_spec->num_seen == 0) continue; @@ -263,8 +327,83 @@ static int argparse_parse_args(argparse_cmd_opts_t &opts, const wcstring_list_t swprintf(count, sizeof count / sizeof count[0], L"%d", opt_spec->num_seen); opt_spec->vals.push_back(wcstring(count)); } +} - for (int i = w.woptind; argv[i]; i++) opts.argv.push_back(argv[i]); +static int argparse_parse_flags(argparse_cmd_opts_t &opts, const wchar_t *short_options, + const woption *long_options, const wchar_t *cmd, int argc, + wchar_t **argv, int *optind, parser_t &parser, + io_streams_t &streams) { + int opt; + wgetopter_t w; + while ((opt = w.wgetopt_long(argc, argv, short_options, long_options, NULL)) != -1) { + if (opt == ':') { + builtin_missing_argument(parser, streams, cmd, argv[w.woptind - 1]); + return STATUS_INVALID_ARGS; + } + if (opt == '?') { + builtin_unknown_option(parser, streams, cmd, argv[w.woptind - 1]); + return STATUS_INVALID_ARGS; + } + + auto found = opts.options.find(opt); + assert(found != opts.options.end()); + + option_spec_t *opt_spec = found->second; + opt_spec->num_seen++; + if (opt_spec->num_allowed == 0) { + assert(!w.woptarg); + } else if (opt_spec->num_allowed == -1 || opt_spec->num_allowed == 1) { + // We're depending on `wgetopt_long()` to report that a mandatory value is missing if + // `opt_spec->num_allowed == 1` and thus return ':' so that we don't take this branch if + // the mandatory arg is missing. + opt_spec->vals.clear(); + if (w.woptarg) { + opt_spec->vals.push_back(w.woptarg); + } + } else { + assert(w.woptarg); + opt_spec->vals.push_back(w.woptarg); + } + } + + *optind = w.woptind; + return STATUS_CMD_OK; +} + +// This function mimics the `wgetopt_long()` usage found elsewhere in our other builtin commands. +// It's different in that the short and long option structures are constructed dynamically based on +// arguments provided to the `argparse` command. +static int argparse_parse_args(argparse_cmd_opts_t &opts, const wcstring_list_t &args, + parser_t &parser, io_streams_t &streams) { + if (args.empty()) return STATUS_CMD_OK; + + wcstring short_options = opts.require_order ? L"+:" : L":"; + size_t nflags = opts.options.size(); + // This assumes every option has a long flag. Which is the worst case and isn't worth optimizing + // since the number of options is always quite small. + auto long_options = std::unique_ptr>( + new woption[nflags + 1], [](woption *p) { delete[] p; }); + populate_option_strings(opts, short_options, long_options); + + const wchar_t *cmd = opts.name.c_str(); + int argc = static_cast(args.size()); + + // This is awful but we need to convert our wcstring_list_t to a that can be passed + // to w.wgetopt_long(). Furthermore, because we're dynamically allocating the array of pointers + // we need to ensure the memory for the data structure is freed when we leave this scope. + null_terminated_array_t argv_container(args); + auto argv = (wchar_t **)argv_container.get(); + + int optind; + int retval = argparse_parse_flags(opts, short_options.c_str(), long_options.get(), cmd, argc, + argv, &optind, parser, streams); + if (retval != STATUS_CMD_OK) return retval; + + retval = check_for_mutually_exclusive_flags(opts, streams); + if (retval != STATUS_CMD_OK) return retval; + update_bool_flag_counts(opts); + + for (int i = optind; argv[i]; i++) opts.argv.push_back(argv[i]); return STATUS_CMD_OK; } @@ -290,35 +429,18 @@ int builtin_argparse(parser_t &parser, io_streams_t &streams, wchar_t **argv) { } if (optind == argc) { - streams.err.append_format(BUILTIN_ERR_ARG_COUNT1, cmd, 2, 0); - return STATUS_INVALID_ARGS; + // Apparently we weren't handed any arguments to be parsed according to the option specs we + // just collected. So there isn't anything for us to do. + return STATUS_CMD_OK; } - while (true) { - if (wcscmp(L"--", argv[optind]) == 0) { - optind++; - break; - } - - if (!parse_option_spec(opts, argv[optind], streams)) { - return STATUS_CMD_ERROR; - } - - if (++optind == argc) { - streams.err.append_format(BUILTIN_ERR_ARG_COUNT1, cmd, 2, 0); - return STATUS_INVALID_ARGS; - } - } - -#if 0 - auto stats = mstats(); - fwprintf(stderr, L"WTF %d bytes_total %lu chunks_used %lu bytes_used %lu chunks_free %lu bytes_free %lu\n", __LINE__, stats.bytes_total, stats.chunks_used, stats.bytes_used, stats.chunks_free); -#endif - wcstring_list_t args; args.push_back(opts.name); while (optind < argc) args.push_back(argv[optind++]); + retval = parse_exclusive_args(opts, streams); + if (retval != STATUS_CMD_OK) return retval; + retval = argparse_parse_args(opts, args, parser, streams); if (retval != STATUS_CMD_OK) return retval; @@ -345,10 +467,5 @@ int builtin_argparse(parser_t &parser, io_streams_t &streams, wchar_t **argv) { auto val = list_to_array_val(opts.argv); env_set(L"argv", *val == ENV_NULL ? NULL : val->c_str(), ENV_LOCAL); -#if 0 - stats = mstats(); - fwprintf(stderr, L"WTF %d bytes_total %lu chunks_used %lu bytes_used %lu chunks_free %lu bytes_free %lu\n", __LINE__, stats.bytes_total, stats.chunks_used, stats.bytes_used, stats.chunks_free); -#endif - return retval; }