From e9fb9642a809c354acf0e5d853bf0b6a3b098575 Mon Sep 17 00:00:00 2001 From: godylockz <81207744+godylockz@users.noreply.github.com> Date: Sun, 23 Jan 2022 14:27:24 -0500 Subject: [PATCH 01/40] Add no-state option, filter queries from links, fix headers --- src/client.rs | 3 ++- src/config/container.rs | 6 ++++-- src/extractor/container.rs | 16 +++++++++++++--- src/parser.rs | 7 +++++++ 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/client.rs b/src/client.rs index 1e61306..ee0ecff 100644 --- a/src/client.rs +++ b/src/client.rs @@ -27,7 +27,8 @@ pub fn initialize( .user_agent(user_agent) .danger_accept_invalid_certs(insecure) .default_headers(header_map) - .redirect(policy); + .redirect(policy) + .http1_title_case_headers(); if let Some(some_proxy) = proxy { if !some_proxy.is_empty() { diff --git a/src/config/container.rs b/src/config/container.rs index 4c25057..abddfbd 100644 --- a/src/config/container.rs +++ b/src/config/container.rs @@ -246,8 +246,6 @@ pub struct Configuration { pub resume_from: String, /// Whether or not a scan's current state should be saved when user presses Ctrl+C - /// - /// Not configurable from CLI; can only be set from a config file #[serde(default = "save_state")] pub save_state: bool, @@ -696,6 +694,10 @@ impl Configuration { config.requester_policy = RequesterPolicy::AutoBail; } + if args.is_present("no_state") { + config.save_state = false; + } + if args.is_present("dont_filter") { config.dont_filter = true; } diff --git a/src/extractor/container.rs b/src/extractor/container.rs index c7bbb08..e969456 100644 --- a/src/extractor/container.rs +++ b/src/extractor/container.rs @@ -240,8 +240,18 @@ impl<'a> Extractor<'a> { log::trace!("enter: get_sub_paths_from_path({})", path); let mut paths = vec![]; + // trim whitespace, remove slashes, and queries/anchors (i.e. ?C=D;O=A) + let mut path_str = path.to_owned(); + path_str = path_str.trim().to_string(); + path_str.retain(|c| !c.is_whitespace()); + if path_str.starts_with("//") { + path_str = path_str.trim_start_matches('/').to_string(); + }; + let re = Regex::new(r"([#?].*)?").unwrap(); + path_str = re.replace_all(&path_str, "").to_string().trim().to_string(); + // filter out any empty strings caused by .split - let mut parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect(); + let mut parts: Vec<&str> = path_str.split('/').filter(|s| !s.is_empty()).collect(); let length = parts.len(); @@ -297,7 +307,7 @@ impl<'a> Extractor<'a> { let new_url = old_url .join(link) .with_context(|| format!("Could not join {} with {}", old_url, link))?; - + log::debug!("Added link \"{}\"", new_url); links.insert(new_url.to_string()); log::trace!("exit: add_link_to_set_of_links"); @@ -463,7 +473,7 @@ impl<'a> Extractor<'a> { .filter(|a| a.value().attrs().any(|attr| attr.0 == html_attr)); for t in tags { if let Some(link) = t.value().attr(html_attr) { - log::debug!("Parsed link \"{}\" from {}", link, resp_url.as_str()); + log::debug!("Extracted link \"{}\" from {}", link, resp_url.as_str()); match Url::parse(link) { Ok(absolute) => { diff --git a/src/parser.rs b/src/parser.rs index 3bab47b..8bfcf21 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -527,6 +527,13 @@ pub fn initialize() -> App<'static> { .help_heading("Output settings") .help("Output file to write log entries (use w/ --json for JSON entries)") .takes_value(true), + ) + .arg( + Arg::new("no_state") + .long("no-state") + .takes_value(false) + .help_heading("Output settings") + .help("Disable state output file (*.state) for continuing scans") ); ///////////////////////////////////////////////////////////////////// From 4a678ef65ba2b7723a43e79f7c927d88db4639d3 Mon Sep 17 00:00:00 2001 From: godylockz <81207744+godylockz@users.noreply.github.com> Date: Sun, 23 Jan 2022 14:30:41 -0500 Subject: [PATCH 02/40] Shell completions --- shell_completions/_feroxbuster | 1 + shell_completions/_feroxbuster.ps1 | 1 + shell_completions/feroxbuster.bash | 2 +- shell_completions/feroxbuster.elv | 1 + 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/shell_completions/_feroxbuster b/shell_completions/_feroxbuster index f7fb7b9..6f28ffd 100644 --- a/shell_completions/_feroxbuster +++ b/shell_completions/_feroxbuster @@ -94,6 +94,7 @@ _feroxbuster() { '-q[Hide progress bars and banner (good for tmux windows w/ notifications)]' \ '--quiet[Hide progress bars and banner (good for tmux windows w/ notifications)]' \ '--json[Emit JSON logs to --output and --debug-log instead of normal text]' \ +'--no-state[Disable state output file (*.state) for continuing scans]' \ && ret=0 } diff --git a/shell_completions/_feroxbuster.ps1 b/shell_completions/_feroxbuster.ps1 index a38c7d8..6253655 100644 --- a/shell_completions/_feroxbuster.ps1 +++ b/shell_completions/_feroxbuster.ps1 @@ -99,6 +99,7 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock { [CompletionResult]::new('-q', 'q', [CompletionResultType]::ParameterName, 'Hide progress bars and banner (good for tmux windows w/ notifications)') [CompletionResult]::new('--quiet', 'quiet', [CompletionResultType]::ParameterName, 'Hide progress bars and banner (good for tmux windows w/ notifications)') [CompletionResult]::new('--json', 'json', [CompletionResultType]::ParameterName, 'Emit JSON logs to --output and --debug-log instead of normal text') + [CompletionResult]::new('--no-state', 'no-state', [CompletionResultType]::ParameterName, 'Disable state output file (*.state) for continuing scans') break } }) diff --git a/shell_completions/feroxbuster.bash b/shell_completions/feroxbuster.bash index fdb9a6d..af73a30 100644 --- a/shell_completions/feroxbuster.bash +++ b/shell_completions/feroxbuster.bash @@ -19,7 +19,7 @@ _feroxbuster() { case "${cmd}" in feroxbuster) - opts="-h -V -u -p -P -R -a -A -x -m -H -b -Q -f -S -X -W -N -C -s -T -r -k -t -n -d -e -L -w -D -v -q -o --help --version --url --stdin --resume-from --proxy --replay-proxy --replay-codes --user-agent --random-agent --extensions --methods --data --headers --cookies --query --add-slash --dont-scan --filter-size --filter-regex --filter-words --filter-lines --filter-status --filter-similar-to --status-codes --timeout --redirects --insecure --threads --no-recursion --depth --extract-links --scan-limit --parallel --rate-limit --time-limit --wordlist --auto-tune --auto-bail --dont-filter --verbosity --silent --quiet --json --output --debug-log" + opts="-h -V -u -p -P -R -a -A -x -m -H -b -Q -f -S -X -W -N -C -s -T -r -k -t -n -d -e -L -w -D -v -q -o --help --version --url --stdin --resume-from --proxy --replay-proxy --replay-codes --user-agent --random-agent --extensions --methods --data --headers --cookies --query --add-slash --dont-scan --filter-size --filter-regex --filter-words --filter-lines --filter-status --filter-similar-to --status-codes --timeout --redirects --insecure --threads --no-recursion --depth --extract-links --scan-limit --parallel --rate-limit --time-limit --wordlist --auto-tune --auto-bail --dont-filter --verbosity --silent --quiet --json --output --debug-log --no-state" if [[ ${cur} == -* || ${COMP_CWORD} -eq 1 ]] ; then COMPREPLY=( $(compgen -W "${opts}" -- "${cur}") ) return 0 diff --git a/shell_completions/feroxbuster.elv b/shell_completions/feroxbuster.elv index 7234c7b..0253de0 100644 --- a/shell_completions/feroxbuster.elv +++ b/shell_completions/feroxbuster.elv @@ -97,6 +97,7 @@ set edit:completion:arg-completer[feroxbuster] = {|@words| cand -q 'Hide progress bars and banner (good for tmux windows w/ notifications)' cand --quiet 'Hide progress bars and banner (good for tmux windows w/ notifications)' cand --json 'Emit JSON logs to --output and --debug-log instead of normal text' + cand --no-state 'Disable state output file (*.state) for continuing scans' } ] $completions[$command] From 079b8b21767d7d86880dcf2f045a07a221409037 Mon Sep 17 00:00:00 2001 From: godylockz <81207744+godylockz@users.noreply.github.com> Date: Mon, 24 Jan 2022 22:17:09 -0500 Subject: [PATCH 03/40] Update src/parser.rs Co-authored-by: epi <43392618+epi052@users.noreply.github.com> --- src/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.rs b/src/parser.rs index 8bfcf21..e4e4765 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -533,7 +533,7 @@ pub fn initialize() -> App<'static> { .long("no-state") .takes_value(false) .help_heading("Output settings") - .help("Disable state output file (*.state) for continuing scans") + .help("Disable state output file (*.state)") ); ///////////////////////////////////////////////////////////////////// From 3c474920bbd2fe8224439aadb8b3dcace4490222 Mon Sep 17 00:00:00 2001 From: godylockz <81207744+godylockz@users.noreply.github.com> Date: Mon, 24 Jan 2022 23:10:21 -0500 Subject: [PATCH 04/40] Fix per comments --- shell_completions/_feroxbuster | 2 +- shell_completions/_feroxbuster.ps1 | 2 +- shell_completions/feroxbuster.elv | 2 +- src/extractor/container.rs | 11 ++++++++--- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/shell_completions/_feroxbuster b/shell_completions/_feroxbuster index 6f28ffd..9ec33cd 100644 --- a/shell_completions/_feroxbuster +++ b/shell_completions/_feroxbuster @@ -94,7 +94,7 @@ _feroxbuster() { '-q[Hide progress bars and banner (good for tmux windows w/ notifications)]' \ '--quiet[Hide progress bars and banner (good for tmux windows w/ notifications)]' \ '--json[Emit JSON logs to --output and --debug-log instead of normal text]' \ -'--no-state[Disable state output file (*.state) for continuing scans]' \ +'--no-state[Disable state output file (*.state)]' \ && ret=0 } diff --git a/shell_completions/_feroxbuster.ps1 b/shell_completions/_feroxbuster.ps1 index 6253655..bd90fba 100644 --- a/shell_completions/_feroxbuster.ps1 +++ b/shell_completions/_feroxbuster.ps1 @@ -99,7 +99,7 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock { [CompletionResult]::new('-q', 'q', [CompletionResultType]::ParameterName, 'Hide progress bars and banner (good for tmux windows w/ notifications)') [CompletionResult]::new('--quiet', 'quiet', [CompletionResultType]::ParameterName, 'Hide progress bars and banner (good for tmux windows w/ notifications)') [CompletionResult]::new('--json', 'json', [CompletionResultType]::ParameterName, 'Emit JSON logs to --output and --debug-log instead of normal text') - [CompletionResult]::new('--no-state', 'no-state', [CompletionResultType]::ParameterName, 'Disable state output file (*.state) for continuing scans') + [CompletionResult]::new('--no-state', 'no-state', [CompletionResultType]::ParameterName, 'Disable state output file (*.state)') break } }) diff --git a/shell_completions/feroxbuster.elv b/shell_completions/feroxbuster.elv index 0253de0..a379930 100644 --- a/shell_completions/feroxbuster.elv +++ b/shell_completions/feroxbuster.elv @@ -97,7 +97,7 @@ set edit:completion:arg-completer[feroxbuster] = {|@words| cand -q 'Hide progress bars and banner (good for tmux windows w/ notifications)' cand --quiet 'Hide progress bars and banner (good for tmux windows w/ notifications)' cand --json 'Emit JSON logs to --output and --debug-log instead of normal text' - cand --no-state 'Disable state output file (*.state) for continuing scans' + cand --no-state 'Disable state output file (*.state)' } ] $completions[$command] diff --git a/src/extractor/container.rs b/src/extractor/container.rs index e969456..1338c3a 100644 --- a/src/extractor/container.rs +++ b/src/extractor/container.rs @@ -240,15 +240,20 @@ impl<'a> Extractor<'a> { log::trace!("enter: get_sub_paths_from_path({})", path); let mut paths = vec![]; - // trim whitespace, remove slashes, and queries/anchors (i.e. ?C=D;O=A) + // trim whitespace, remove slashes, and queries/fragments (i.e. ?C=D;O=A) let mut path_str = path.to_owned(); path_str = path_str.trim().to_string(); path_str.retain(|c| !c.is_whitespace()); if path_str.starts_with("//") { path_str = path_str.trim_start_matches('/').to_string(); }; - let re = Regex::new(r"([#?].*)?").unwrap(); - path_str = re.replace_all(&path_str, "").to_string().trim().to_string(); + let (path_str, _discarded) = path_str + .split_once('?') + // if there isn't a '?', try to remove a fragment + .unwrap_or_else(|| { + // if there isn't a '#', return (original, empty) + path_str.split_once('#').unwrap_or((&path_str, "")) + }); // filter out any empty strings caused by .split let mut parts: Vec<&str> = path_str.split('/').filter(|s| !s.is_empty()).collect(); From c0b404074324c07f174dad709ffc42ae6c49d249 Mon Sep 17 00:00:00 2001 From: godylockz <81207744+godylockz@users.noreply.github.com> Date: Sat, 29 Jan 2022 09:20:44 -0500 Subject: [PATCH 05/40] Fix relative pathing --- src/extractor/container.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/extractor/container.rs b/src/extractor/container.rs index 1338c3a..d285187 100644 --- a/src/extractor/container.rs +++ b/src/extractor/container.rs @@ -191,9 +191,10 @@ impl<'a> Extractor<'a> { // this is the expected error that happens when we try to parse a url fragment // ex: Url::parse("/login") -> Err("relative URL without a base") // while this is technically an error, these are good results for us + let absolute_path = format!("{}{}", resp_url.path().to_string(), link.to_string()); if e.to_string().contains("relative URL without a base") { - if self.add_all_sub_paths(link, &mut links).is_err() { - log::warn!("could not add sub-paths from {} to {:?}", link, links); + if self.add_all_sub_paths(&absolute_path, &mut links).is_err() { + log::warn!("could not add sub-paths from {} to {:?}", absolute_path, links); } } else { // unexpected error has occurred @@ -498,9 +499,10 @@ impl<'a> Extractor<'a> { // this is the expected error that happens when we try to parse a url fragment // ex: Url::parse("/login") -> Err("relative URL without a base") // while this is technically an error, these are good results for us + let absolute_path = format!("{}{}", resp_url.path().to_string(), link.to_string()); if e.to_string().contains("relative URL without a base") { - if self.add_all_sub_paths(link, links).is_err() { - log::warn!("could not add sub-paths from {} to {:?}", link, links); + if self.add_all_sub_paths(&absolute_path, links).is_err() { + log::warn!("could not add sub-paths from {} to {:?}", absolute_path, links); } } else { // unexpected error has occurred From 1498122973bfe26ffdb91d1fa237530454125728 Mon Sep 17 00:00:00 2001 From: godylockz <81207744+godylockz@users.noreply.github.com> Date: Sat, 29 Jan 2022 09:22:23 -0500 Subject: [PATCH 06/40] Fix warnings / formatting. --- src/extractor/container.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/extractor/container.rs b/src/extractor/container.rs index d285187..4053500 100644 --- a/src/extractor/container.rs +++ b/src/extractor/container.rs @@ -191,10 +191,14 @@ impl<'a> Extractor<'a> { // this is the expected error that happens when we try to parse a url fragment // ex: Url::parse("/login") -> Err("relative URL without a base") // while this is technically an error, these are good results for us - let absolute_path = format!("{}{}", resp_url.path().to_string(), link.to_string()); + let absolute_path = format!("{}{}", resp_url.path(), link); if e.to_string().contains("relative URL without a base") { if self.add_all_sub_paths(&absolute_path, &mut links).is_err() { - log::warn!("could not add sub-paths from {} to {:?}", absolute_path, links); + log::warn!( + "could not add sub-paths from {} to {:?}", + absolute_path, + links + ); } } else { // unexpected error has occurred @@ -499,10 +503,14 @@ impl<'a> Extractor<'a> { // this is the expected error that happens when we try to parse a url fragment // ex: Url::parse("/login") -> Err("relative URL without a base") // while this is technically an error, these are good results for us - let absolute_path = format!("{}{}", resp_url.path().to_string(), link.to_string()); + let absolute_path = format!("{}{}", resp_url.path(), link); if e.to_string().contains("relative URL without a base") { if self.add_all_sub_paths(&absolute_path, links).is_err() { - log::warn!("could not add sub-paths from {} to {:?}", absolute_path, links); + log::warn!( + "could not add sub-paths from {} to {:?}", + absolute_path, + links + ); } } else { // unexpected error has occurred From 71c5b66eb61b4ee47aaf1d532b3a10cdfa3f7216 Mon Sep 17 00:00:00 2001 From: godylockz <81207744+godylockz@users.noreply.github.com> Date: Sun, 30 Jan 2022 01:07:32 -0500 Subject: [PATCH 07/40] Revert, did in wrong place. --- src/extractor/container.rs | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/src/extractor/container.rs b/src/extractor/container.rs index 4053500..1338c3a 100644 --- a/src/extractor/container.rs +++ b/src/extractor/container.rs @@ -191,14 +191,9 @@ impl<'a> Extractor<'a> { // this is the expected error that happens when we try to parse a url fragment // ex: Url::parse("/login") -> Err("relative URL without a base") // while this is technically an error, these are good results for us - let absolute_path = format!("{}{}", resp_url.path(), link); if e.to_string().contains("relative URL without a base") { - if self.add_all_sub_paths(&absolute_path, &mut links).is_err() { - log::warn!( - "could not add sub-paths from {} to {:?}", - absolute_path, - links - ); + if self.add_all_sub_paths(link, &mut links).is_err() { + log::warn!("could not add sub-paths from {} to {:?}", link, links); } } else { // unexpected error has occurred @@ -503,14 +498,9 @@ impl<'a> Extractor<'a> { // this is the expected error that happens when we try to parse a url fragment // ex: Url::parse("/login") -> Err("relative URL without a base") // while this is technically an error, these are good results for us - let absolute_path = format!("{}{}", resp_url.path(), link); if e.to_string().contains("relative URL without a base") { - if self.add_all_sub_paths(&absolute_path, links).is_err() { - log::warn!( - "could not add sub-paths from {} to {:?}", - absolute_path, - links - ); + if self.add_all_sub_paths(link, links).is_err() { + log::warn!("could not add sub-paths from {} to {:?}", link, links); } } else { // unexpected error has occurred From 007bc4a50d280933a56c08a71ea2bb891abd5af9 Mon Sep 17 00:00:00 2001 From: godylockz <81207744+godylockz@users.noreply.github.com> Date: Sun, 30 Jan 2022 01:24:53 -0500 Subject: [PATCH 08/40] Response URL was not being used in concatenation. --- src/extractor/container.rs | 55 +++++++++++++++++++++++++------------- src/extractor/tests.rs | 16 ++++++----- 2 files changed, 47 insertions(+), 24 deletions(-) diff --git a/src/extractor/container.rs b/src/extractor/container.rs index 1338c3a..33bf1c1 100644 --- a/src/extractor/container.rs +++ b/src/extractor/container.rs @@ -183,7 +183,10 @@ impl<'a> Extractor<'a> { continue; } - if self.add_all_sub_paths(absolute.path(), &mut links).is_err() { + if self + .add_all_sub_paths(resp_url, absolute.path(), &mut links) + .is_err() + { log::warn!("could not add sub-paths from {} to {:?}", absolute, links); } } @@ -192,7 +195,7 @@ impl<'a> Extractor<'a> { // ex: Url::parse("/login") -> Err("relative URL without a base") // while this is technically an error, these are good results for us if e.to_string().contains("relative URL without a base") { - if self.add_all_sub_paths(link, &mut links).is_err() { + if self.add_all_sub_paths(resp_url, link, &mut links).is_err() { log::warn!("could not add sub-paths from {} to {:?}", link, links); } } else { @@ -216,11 +219,16 @@ impl<'a> Extractor<'a> { /// - homepage/assets/img/ /// - homepage/assets/ /// - homepage/ - fn add_all_sub_paths(&self, url_path: &str, links: &mut HashSet) -> Result<()> { + fn add_all_sub_paths( + &self, + resp_url: &Url, + url_path: &str, + links: &mut HashSet, + ) -> Result<()> { log::trace!("enter: add_all_sub_paths({}, {:?})", url_path, links); for sub_path in self.get_sub_paths_from_path(url_path) { - self.add_link_to_set_of_links(&sub_path, links)?; + self.add_link_to_set_of_links(resp_url, &sub_path, links)?; } log::trace!("exit: add_all_sub_paths"); @@ -292,21 +300,26 @@ impl<'a> Extractor<'a> { /// simple helper to stay DRY, trys to join a url + fragment and add it to the `links` HashSet pub(super) fn add_link_to_set_of_links( &self, + resp_url: &Url, link: &str, links: &mut HashSet, ) -> Result<()> { - log::trace!("enter: add_link_to_set_of_links({}, {:?})", link, links); + log::trace!( + "enter: add_link_to_set_of_links({}, {}, {:?})", + resp_url, + link, + links + ); let old_url = match self.target { ExtractionTarget::ResponseBody => self.response.unwrap().url().clone(), - ExtractionTarget::ParseHtml | ExtractionTarget::RobotsTxt => { - match Url::parse(&self.url) { - Ok(u) => u, - Err(e) => { - bail!("Could not parse {}: {}", self.url, e); - } + ExtractionTarget::ParseHtml => resp_url.clone(), + ExtractionTarget::RobotsTxt => match Url::parse(&self.url) { + Ok(u) => u, + Err(e) => { + bail!("Could not parse {}: {}", self.url, e); } - } + }, }; let new_url = old_url @@ -391,10 +404,13 @@ impl<'a> Extractor<'a> { for capture in self.robots_regex.captures_iter(body) { if let Some(new_path) = capture.name("url_path") { - let mut new_url = Url::parse(&self.url)?; - new_url.set_path(new_path.as_str()); - if self.add_all_sub_paths(new_url.path(), &mut links).is_err() { - log::warn!("could not add sub-paths from {} to {:?}", new_url, links); + let mut resp_url = Url::parse(&self.url)?; + resp_url.set_path(new_path.as_str()); + if self + .add_all_sub_paths(&resp_url, resp_url.path(), &mut links) + .is_err() + { + log::warn!("could not add sub-paths from {} to {:?}", resp_url, links); } } } @@ -490,7 +506,10 @@ impl<'a> Extractor<'a> { continue; } - if self.add_all_sub_paths(absolute.path(), links).is_err() { + if self + .add_all_sub_paths(resp_url, absolute.path(), links) + .is_err() + { log::warn!("could not add sub-paths from {} to {:?}", absolute, links); } } @@ -499,7 +518,7 @@ impl<'a> Extractor<'a> { // ex: Url::parse("/login") -> Err("relative URL without a base") // while this is technically an error, these are good results for us if e.to_string().contains("relative URL without a base") { - if self.add_all_sub_paths(link, links).is_err() { + if self.add_all_sub_paths(resp_url, link, links).is_err() { log::warn!("could not add sub-paths from {} to {:?}", link, links); } } else { diff --git a/src/extractor/tests.rs b/src/extractor/tests.rs index 8d9c972..ab66a75 100644 --- a/src/extractor/tests.rs +++ b/src/extractor/tests.rs @@ -150,6 +150,7 @@ fn extractor_with_non_base_url_bails() -> Result<()> { let mut links = HashSet::::new(); let link = "admin"; let handles = Arc::new(Handles::for_testing(None, None).0); + let resp_url = Url::parse("http://localhost").unwrap(); let extractor = ExtractorBuilder::default() .url("\\\\\\") @@ -157,7 +158,7 @@ fn extractor_with_non_base_url_bails() -> Result<()> { .target(ExtractionTarget::RobotsTxt) .build()?; - let result = extractor.add_link_to_set_of_links(link, &mut links); + let result = extractor.add_link_to_set_of_links(&resp_url, link, &mut links); assert!(result.is_err()); Ok(()) @@ -171,10 +172,11 @@ fn extractor_add_link_to_set_of_links_happy_path() { let r_link = "admin"; let mut b_links = HashSet::::new(); let b_link = "shmadmin"; + let resp_url = Url::parse("http://localhost").unwrap(); assert_eq!(r_links.len(), 0); ROBOTS_EXT - .add_link_to_set_of_links(r_link, &mut r_links) + .add_link_to_set_of_links(&resp_url, r_link, &mut r_links) .unwrap(); assert_eq!(r_links.len(), 1); @@ -183,7 +185,7 @@ fn extractor_add_link_to_set_of_links_happy_path() { assert_eq!(b_links.len(), 0); BODY_EXT - .add_link_to_set_of_links(b_link, &mut b_links) + .add_link_to_set_of_links(&resp_url, b_link, &mut b_links) .unwrap(); assert_eq!(b_links.len(), 1); @@ -195,12 +197,14 @@ fn extractor_add_link_to_set_of_links_happy_path() { fn extractor_add_link_to_set_of_links_with_non_base_url() { let mut links = HashSet::::new(); let link = "\\\\\\\\"; - + let resp_url = Url::parse("http://localhost").unwrap(); assert_eq!(links.len(), 0); assert!(ROBOTS_EXT - .add_link_to_set_of_links(link, &mut links) + .add_link_to_set_of_links(&resp_url, link, &mut links) + .is_err()); + assert!(BODY_EXT + .add_link_to_set_of_links(&resp_url, link, &mut links) .is_err()); - assert!(BODY_EXT.add_link_to_set_of_links(link, &mut links).is_err()); assert_eq!(links.len(), 0); assert!(links.is_empty()); From 8b9d640090c02ae74cc21885bcd108827945446e Mon Sep 17 00:00:00 2001 From: godylockz <81207744+godylockz@users.noreply.github.com> Date: Sat, 5 Feb 2022 23:34:16 -0500 Subject: [PATCH 09/40] Found a bug for redirect links not extracting links properly --- src/scanner/requester.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scanner/requester.rs b/src/scanner/requester.rs index f19767b..ebc527d 100644 --- a/src/scanner/requester.rs +++ b/src/scanner/requester.rs @@ -392,7 +392,7 @@ impl Requester { continue; } - if self.handles.config.extract_links && !ferox_response.status().is_redirection() { + if self.handles.config.extract_links { let extractor = ExtractorBuilder::default() .target(ExtractionTarget::ResponseBody) .response(&ferox_response) From 13cfbe152e6028a3d47c2bbdb486c1bb7d88b895 Mon Sep 17 00:00:00 2001 From: epi Date: Sat, 12 Feb 2022 06:57:42 -0600 Subject: [PATCH 10/40] bumped version to 2.6.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 7bc9fb5..e335106 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "feroxbuster" -version = "2.5.0" +version = "2.6.0" authors = ["Ben 'epi' Risher (@epi052)"] license = "MIT" edition = "2021" From 71885e7e56bea39a5018e1879b1d76e5a6631bba Mon Sep 17 00:00:00 2001 From: epi Date: Sat, 12 Feb 2022 07:01:25 -0600 Subject: [PATCH 11/40] implemented --collect-extensions; numerous bugfixes/code improvements --- Cargo.lock | 2 +- Cargo.toml | 2 +- ferox-config.toml.example | 2 + shell_completions/_feroxbuster | 8 +- shell_completions/_feroxbuster.ps1 | 8 +- shell_completions/feroxbuster.bash | 10 +- shell_completions/feroxbuster.elv | 8 +- src/banner/container.rs | 38 ++- src/config/container.rs | 33 ++- src/config/tests.rs | 18 ++ src/config/utils.rs | 10 +- src/event_handlers/command.rs | 3 + src/event_handlers/container.rs | 46 +++ src/event_handlers/scans.rs | 106 +++++++ src/event_handlers/statistics.rs | 2 +- src/extractor/builder.rs | 5 +- src/extractor/container.rs | 436 ++++++++++++++++++----------- src/filters/init.rs | 9 +- src/heuristics.rs | 9 +- src/lib.rs | 9 + src/main.rs | 31 +- src/parser.rs | 20 ++ src/response.rs | 121 ++++---- src/scan_manager/scan.rs | 6 +- src/scan_manager/scan_container.rs | 81 ++++-- src/scan_manager/state.rs | 10 + src/scan_manager/tests.rs | 20 +- src/scanner/ferox_scanner.rs | 28 +- src/scanner/init.rs | 4 +- src/scanner/requester.rs | 36 ++- src/statistics/container.rs | 25 +- src/statistics/field.rs | 3 + src/url.rs | 15 +- src/utils.rs | 17 +- tests/test_banner.rs | 57 ++++ 35 files changed, 939 insertions(+), 299 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 09008b8..91530a1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -671,7 +671,7 @@ dependencies = [ [[package]] name = "feroxbuster" -version = "2.5.0" +version = "2.6.0" dependencies = [ "anyhow", "assert_cmd", diff --git a/Cargo.toml b/Cargo.toml index 7bc9fb5..e335106 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "feroxbuster" -version = "2.5.0" +version = "2.6.0" authors = ["Ben 'epi' Risher (@epi052)"] license = "MIT" edition = "2021" diff --git a/ferox-config.toml.example b/ferox-config.toml.example index 275f3c8..b0cf807 100644 --- a/ferox-config.toml.example +++ b/ferox-config.toml.example @@ -30,7 +30,9 @@ # random_agent = false # redirects = true # insecure = true +# collect_extensions = true # extensions = ["php", "html"] +# dont_collect = ["png", "gif", "jpg", "jpeg"] # methods = ["GET", "POST"] # data = [11, 12, 13, 14, 15] # url_denylist = ["http://dont-scan.me", "https://also-not.me"] diff --git a/shell_completions/_feroxbuster b/shell_completions/_feroxbuster index f7fb7b9..412c5f7 100644 --- a/shell_completions/_feroxbuster +++ b/shell_completions/_feroxbuster @@ -24,8 +24,8 @@ _feroxbuster() { '--replay-proxy=[Send only unfiltered requests through a Replay Proxy, instead of all requests]:REPLAY_PROXY:_urls' \ '*-R+[Status Codes to send through a Replay Proxy when found (default: --status-codes value)]:REPLAY_CODE: ' \ '*--replay-codes=[Status Codes to send through a Replay Proxy when found (default: --status-codes value)]:REPLAY_CODE: ' \ -'-a+[Sets the User-Agent (default: feroxbuster/2.5.0)]:USER_AGENT: ' \ -'--user-agent=[Sets the User-Agent (default: feroxbuster/2.5.0)]:USER_AGENT: ' \ +'-a+[Sets the User-Agent (default: feroxbuster/2.6.0)]:USER_AGENT: ' \ +'--user-agent=[Sets the User-Agent (default: feroxbuster/2.6.0)]:USER_AGENT: ' \ '*-x+[File extension(s) to search for (ex: -x php -x pdf js)]:FILE_EXTENSION: ' \ '*--extensions=[File extension(s) to search for (ex: -x php -x pdf js)]:FILE_EXTENSION: ' \ '*-m+[Which HTTP request method(s) should be sent (default: GET)]:HTTP_METHODS: ' \ @@ -64,6 +64,8 @@ _feroxbuster() { '--time-limit=[Limit total run time of all scans (ex: --time-limit 10m)]:TIME_SPEC: ' \ '-w+[Path to the wordlist]:FILE:_files' \ '--wordlist=[Path to the wordlist]:FILE:_files' \ +'*-I+[File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)]:FILE_EXTENSION: ' \ +'*--dont-collect=[File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)]:FILE_EXTENSION: ' \ '-o+[Output file to write results to (use w/ --json for JSON entries)]:FILE:_files' \ '--output=[Output file to write results to (use w/ --json for JSON entries)]:FILE:_files' \ '--debug-log=[Output file to write log entries (use w/ --json for JSON entries)]:FILE:_files' \ @@ -88,6 +90,8 @@ _feroxbuster() { '--auto-bail[Automatically stop scanning when an excessive amount of errors are encountered]' \ '-D[Don'\''t auto-filter wildcard responses]' \ '--dont-filter[Don'\''t auto-filter wildcard responses]' \ +'-c[Automatically discover extensions and add them to --extensions (unless they'\''re in --dont-collect)]' \ +'--collect-extensions[Automatically discover extensions and add them to --extensions (unless they'\''re in --dont-collect)]' \ '(--silent)*-v[Increase verbosity level (use -vv or more for greater effect. \[CAUTION\] 4 -v'\''s is probably too much)]' \ '(--silent)*--verbosity[Increase verbosity level (use -vv or more for greater effect. \[CAUTION\] 4 -v'\''s is probably too much)]' \ '(-q --quiet)--silent[Only print URLs + turn off logging (good for piping a list of urls to other commands)]' \ diff --git a/shell_completions/_feroxbuster.ps1 b/shell_completions/_feroxbuster.ps1 index a38c7d8..7a62229 100644 --- a/shell_completions/_feroxbuster.ps1 +++ b/shell_completions/_feroxbuster.ps1 @@ -29,8 +29,8 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock { [CompletionResult]::new('--replay-proxy', 'replay-proxy', [CompletionResultType]::ParameterName, 'Send only unfiltered requests through a Replay Proxy, instead of all requests') [CompletionResult]::new('-R', 'R', [CompletionResultType]::ParameterName, 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)') [CompletionResult]::new('--replay-codes', 'replay-codes', [CompletionResultType]::ParameterName, 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)') - [CompletionResult]::new('-a', 'a', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.5.0)') - [CompletionResult]::new('--user-agent', 'user-agent', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.5.0)') + [CompletionResult]::new('-a', 'a', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.6.0)') + [CompletionResult]::new('--user-agent', 'user-agent', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.6.0)') [CompletionResult]::new('-x', 'x', [CompletionResultType]::ParameterName, 'File extension(s) to search for (ex: -x php -x pdf js)') [CompletionResult]::new('--extensions', 'extensions', [CompletionResultType]::ParameterName, 'File extension(s) to search for (ex: -x php -x pdf js)') [CompletionResult]::new('-m', 'm', [CompletionResultType]::ParameterName, 'Which HTTP request method(s) should be sent (default: GET)') @@ -69,6 +69,8 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock { [CompletionResult]::new('--time-limit', 'time-limit', [CompletionResultType]::ParameterName, 'Limit total run time of all scans (ex: --time-limit 10m)') [CompletionResult]::new('-w', 'w', [CompletionResultType]::ParameterName, 'Path to the wordlist') [CompletionResult]::new('--wordlist', 'wordlist', [CompletionResultType]::ParameterName, 'Path to the wordlist') + [CompletionResult]::new('-I', 'I', [CompletionResultType]::ParameterName, 'File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)') + [CompletionResult]::new('--dont-collect', 'dont-collect', [CompletionResultType]::ParameterName, 'File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)') [CompletionResult]::new('-o', 'o', [CompletionResultType]::ParameterName, 'Output file to write results to (use w/ --json for JSON entries)') [CompletionResult]::new('--output', 'output', [CompletionResultType]::ParameterName, 'Output file to write results to (use w/ --json for JSON entries)') [CompletionResult]::new('--debug-log', 'debug-log', [CompletionResultType]::ParameterName, 'Output file to write log entries (use w/ --json for JSON entries)') @@ -93,6 +95,8 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock { [CompletionResult]::new('--auto-bail', 'auto-bail', [CompletionResultType]::ParameterName, 'Automatically stop scanning when an excessive amount of errors are encountered') [CompletionResult]::new('-D', 'D', [CompletionResultType]::ParameterName, 'Don''t auto-filter wildcard responses') [CompletionResult]::new('--dont-filter', 'dont-filter', [CompletionResultType]::ParameterName, 'Don''t auto-filter wildcard responses') + [CompletionResult]::new('-c', 'c', [CompletionResultType]::ParameterName, 'Automatically discover extensions and add them to --extensions (unless they''re in --dont-collect)') + [CompletionResult]::new('--collect-extensions', 'collect-extensions', [CompletionResultType]::ParameterName, 'Automatically discover extensions and add them to --extensions (unless they''re in --dont-collect)') [CompletionResult]::new('-v', 'v', [CompletionResultType]::ParameterName, 'Increase verbosity level (use -vv or more for greater effect. [CAUTION] 4 -v''s is probably too much)') [CompletionResult]::new('--verbosity', 'verbosity', [CompletionResultType]::ParameterName, 'Increase verbosity level (use -vv or more for greater effect. [CAUTION] 4 -v''s is probably too much)') [CompletionResult]::new('--silent', 'silent', [CompletionResultType]::ParameterName, 'Only print URLs + turn off logging (good for piping a list of urls to other commands)') diff --git a/shell_completions/feroxbuster.bash b/shell_completions/feroxbuster.bash index fdb9a6d..12ee5cf 100644 --- a/shell_completions/feroxbuster.bash +++ b/shell_completions/feroxbuster.bash @@ -19,7 +19,7 @@ _feroxbuster() { case "${cmd}" in feroxbuster) - opts="-h -V -u -p -P -R -a -A -x -m -H -b -Q -f -S -X -W -N -C -s -T -r -k -t -n -d -e -L -w -D -v -q -o --help --version --url --stdin --resume-from --proxy --replay-proxy --replay-codes --user-agent --random-agent --extensions --methods --data --headers --cookies --query --add-slash --dont-scan --filter-size --filter-regex --filter-words --filter-lines --filter-status --filter-similar-to --status-codes --timeout --redirects --insecure --threads --no-recursion --depth --extract-links --scan-limit --parallel --rate-limit --time-limit --wordlist --auto-tune --auto-bail --dont-filter --verbosity --silent --quiet --json --output --debug-log" + opts="-h -V -u -p -P -R -a -A -x -m -H -b -Q -f -S -X -W -N -C -s -T -r -k -t -n -d -e -L -w -D -c -I -v -q -o --help --version --url --stdin --resume-from --proxy --replay-proxy --replay-codes --user-agent --random-agent --extensions --methods --data --headers --cookies --query --add-slash --dont-scan --filter-size --filter-regex --filter-words --filter-lines --filter-status --filter-similar-to --status-codes --timeout --redirects --insecure --threads --no-recursion --depth --extract-links --scan-limit --parallel --rate-limit --time-limit --wordlist --auto-tune --auto-bail --dont-filter --collect-extensions --dont-collect --verbosity --silent --quiet --json --output --debug-log" if [[ ${cur} == -* || ${COMP_CWORD} -eq 1 ]] ; then COMPREPLY=( $(compgen -W "${opts}" -- "${cur}") ) return 0 @@ -221,6 +221,14 @@ _feroxbuster() { COMPREPLY=($(compgen -f "${cur}")) return 0 ;; + --dont-collect) + COMPREPLY=($(compgen -f "${cur}")) + return 0 + ;; + -I) + COMPREPLY=($(compgen -f "${cur}")) + return 0 + ;; --output) COMPREPLY=($(compgen -f "${cur}")) return 0 diff --git a/shell_completions/feroxbuster.elv b/shell_completions/feroxbuster.elv index 7234c7b..e86a29e 100644 --- a/shell_completions/feroxbuster.elv +++ b/shell_completions/feroxbuster.elv @@ -27,8 +27,8 @@ set edit:completion:arg-completer[feroxbuster] = {|@words| cand --replay-proxy 'Send only unfiltered requests through a Replay Proxy, instead of all requests' cand -R 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)' cand --replay-codes 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)' - cand -a 'Sets the User-Agent (default: feroxbuster/2.5.0)' - cand --user-agent 'Sets the User-Agent (default: feroxbuster/2.5.0)' + cand -a 'Sets the User-Agent (default: feroxbuster/2.6.0)' + cand --user-agent 'Sets the User-Agent (default: feroxbuster/2.6.0)' cand -x 'File extension(s) to search for (ex: -x php -x pdf js)' cand --extensions 'File extension(s) to search for (ex: -x php -x pdf js)' cand -m 'Which HTTP request method(s) should be sent (default: GET)' @@ -67,6 +67,8 @@ set edit:completion:arg-completer[feroxbuster] = {|@words| cand --time-limit 'Limit total run time of all scans (ex: --time-limit 10m)' cand -w 'Path to the wordlist' cand --wordlist 'Path to the wordlist' + cand -I 'File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)' + cand --dont-collect 'File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)' cand -o 'Output file to write results to (use w/ --json for JSON entries)' cand --output 'Output file to write results to (use w/ --json for JSON entries)' cand --debug-log 'Output file to write log entries (use w/ --json for JSON entries)' @@ -91,6 +93,8 @@ set edit:completion:arg-completer[feroxbuster] = {|@words| cand --auto-bail 'Automatically stop scanning when an excessive amount of errors are encountered' cand -D 'Don''t auto-filter wildcard responses' cand --dont-filter 'Don''t auto-filter wildcard responses' + cand -c 'Automatically discover extensions and add them to --extensions (unless they''re in --dont-collect)' + cand --collect-extensions 'Automatically discover extensions and add them to --extensions (unless they''re in --dont-collect)' cand -v 'Increase verbosity level (use -vv or more for greater effect. [CAUTION] 4 -v''s is probably too much)' cand --verbosity 'Increase verbosity level (use -vv or more for greater effect. [CAUTION] 4 -v''s is probably too much)' cand --silent 'Only print URLs + turn off logging (good for piping a list of urls to other commands)' diff --git a/src/banner/container.rs b/src/banner/container.rs index 04e54e7..262bf20 100644 --- a/src/banner/container.rs +++ b/src/banner/container.rs @@ -3,7 +3,7 @@ use crate::{ config::Configuration, event_handlers::Handles, utils::{logged_request, status_colorizer}, - DEFAULT_METHOD, VERSION, + DEFAULT_IGNORED_EXTENSIONS, DEFAULT_METHOD, VERSION, }; use anyhow::{bail, Result}; use console::{style, Emoji}; @@ -151,6 +151,12 @@ pub struct Banner { /// whether or not there is a known new version pub(super) update_status: UpdateStatus, + + /// represents Configuration.collect_extensions + collect_extensions: BannerEntry, + + /// represents Configuration.dont_collect + dont_collect: BannerEntry, } /// implementation of Banner @@ -314,6 +320,21 @@ impl Banner { &format!("[{}]", config.methods.join(", ")), ); + let dont_collect = if config.dont_collect == DEFAULT_IGNORED_EXTENSIONS { + // default has 30+ extensions, just trim it up + BannerEntry::new( + "💸", + "Ignored Extensions", + "[Images, Movies, Audio, etc...]", + ) + } else { + BannerEntry::new( + "💸", + "Ignored Extensions", + &format!("[{}]", config.dont_collect.join(", ")), + ) + }; + let offset = std::cmp::min(config.data.len(), 30); let data = String::from_utf8(config.data[..offset].to_vec()) .unwrap_or_else(|_err| { @@ -334,6 +355,11 @@ impl Banner { let parallel = BannerEntry::new("🛤", "Parallel Scans", &config.parallel.to_string()); let rate_limit = BannerEntry::new("🚧", "Requests per Second", &config.rate_limit.to_string()); + let collect_extensions = BannerEntry::new( + "💰", + "Collect Extensions", + &config.collect_extensions.to_string(), + ); Self { targets, @@ -374,6 +400,8 @@ impl Banner { scan_limit, time_limit, url_denylist, + collect_extensions, + dont_collect, config: cfg, version: VERSION.to_string(), update_status: UpdateStatus::Unknown, @@ -420,7 +448,7 @@ by Ben "epi" Risher {} ver: {}"#, let api_url = Url::parse(url)?; - let result = logged_request(&api_url, DEFAULT_METHOD, None, handles.clone()).await?; + let result = logged_request(&api_url, DEFAULT_METHOD, None, handles.clone(), None).await?; let body = result.text().await?; let json_response: Value = serde_json::from_str(&body)?; @@ -550,6 +578,12 @@ by Ben "epi" Risher {} ver: {}"#, writeln!(&mut writer, "{}", self.extensions)?; } + if config.collect_extensions { + // dont-collect is active only when collect-extensions is used + writeln!(&mut writer, "{}", self.collect_extensions)?; + writeln!(&mut writer, "{}", self.dont_collect)?; + } + if !config.methods.is_empty() { writeln!(&mut writer, "{}", self.methods)?; } diff --git a/src/config/container.rs b/src/config/container.rs index 4c25057..d01afa8 100644 --- a/src/config/container.rs +++ b/src/config/container.rs @@ -1,6 +1,6 @@ use super::utils::{ - depth, methods, report_and_exit, save_state, serialized_type, status_codes, threads, timeout, - user_agent, wordlist, OutputLevel, RequesterPolicy, + depth, ignored_extensions, methods, report_and_exit, save_state, serialized_type, status_codes, + threads, timeout, user_agent, wordlist, OutputLevel, RequesterPolicy, }; use crate::config::determine_output_level; use crate::config::utils::determine_requester_policy; @@ -264,8 +264,17 @@ pub struct Configuration { #[serde(default)] pub url_denylist: Vec, + /// URLs that should never be scanned/recursed into based on a regular expression #[serde(with = "serde_regex", default)] pub regex_denylist: Vec, + + /// Automatically discover extensions and add them to --extensions (unless they're in --dont-collect) + #[serde(default)] + pub collect_extensions: bool, + + /// don't collect any of these extensions when --collect-extensions is used + #[serde(default = "ignored_extensions")] + pub dont_collect: Vec, } impl Default for Configuration { @@ -310,6 +319,7 @@ impl Default for Configuration { no_recursion: false, extract_links: false, random_agent: false, + collect_extensions: false, save_state: true, proxy: String::new(), config: String::new(), @@ -335,6 +345,7 @@ impl Default for Configuration { depth: depth(), threads: threads(), wordlist: wordlist(), + dont_collect: ignored_extensions(), } } } @@ -365,7 +376,9 @@ impl Configuration { /// - **random_agent**: `false` /// - **insecure**: `false` (don't be insecure, i.e. don't allow invalid certs) /// - **extensions**: `None` - /// - **methods**: [`DEFAULT_METHOD`] + /// - **collect_extensions**: `false` + /// - **dont_collect**: [`DEFAULT_IGNORED_EXTENSIONS`](constant.DEFAULT_RESPONSE_CODES.html) + /// - **methods**: [`DEFAULT_METHOD`](constant.DEFAULT_METHOD.html) /// - **data**: `None` /// - **url_denylist**: `None` /// - **regex_denylist**: `None` @@ -566,6 +579,10 @@ impl Configuration { config.extensions = arg.map(|val| val.to_string()).collect(); } + if let Some(arg) = args.values_of("dont_collect") { + config.dont_collect = arg.map(|val| val.to_string()).collect(); + } + if let Some(arg) = args.values_of("methods") { config.methods = arg .map(|val| { @@ -700,6 +717,10 @@ impl Configuration { config.dont_filter = true; } + if args.is_present("collect_extensions") { + config.collect_extensions = true; + } + if args.occurrences_of("verbosity") > 0 { // occurrences_of returns 0 if none are found; this is protected in // an if block for the same reason as the quiet option @@ -872,6 +893,7 @@ impl Configuration { update_if_not_default!(&mut conf.quiet, new.quiet, false); update_if_not_default!(&mut conf.auto_bail, new.auto_bail, false); update_if_not_default!(&mut conf.auto_tune, new.auto_tune, false); + update_if_not_default!(&mut conf.collect_extensions, new.collect_extensions, false); // use updated quiet/silent values to determine output level; same for requester policy conf.output_level = determine_output_level(conf.quiet, conf.silent); conf.requester_policy = determine_requester_policy(conf.auto_tune, conf.auto_bail); @@ -941,6 +963,11 @@ impl Configuration { // status_codes() is the default for replay_codes, if they're not provided update_if_not_default!(&mut conf.replay_codes, new.replay_codes, status_codes()); update_if_not_default!(&mut conf.save_state, new.save_state, save_state()); + update_if_not_default!( + &mut conf.dont_collect, + new.dont_collect, + ignored_extensions() + ); } /// If present, read in `DEFAULT_CONFIG_NAME` and deserialize the specified values diff --git a/src/config/tests.rs b/src/config/tests.rs index 634c200..db975cf 100644 --- a/src/config/tests.rs +++ b/src/config/tests.rs @@ -30,7 +30,9 @@ fn setup_config_test() -> Configuration { resume_from = "/some/state/file" redirects = true insecure = true + collect_extensions = true extensions = ["html", "php", "js"] + dont_collect = ["png", "gif", "jpg", "jpeg"] methods = ["GET", "PUT", "DELETE"] data = [31, 32, 33, 34] url_denylist = ["http://dont-scan.me", "https://also-not.me"] @@ -94,6 +96,7 @@ fn default_configuration() { assert!(!config.redirects); assert!(!config.extract_links); assert!(!config.insecure); + assert!(!config.collect_extensions); assert!(config.regex_denylist.is_empty()); assert_eq!(config.queries, Vec::new()); assert_eq!(config.filter_size, Vec::::new()); @@ -101,6 +104,7 @@ fn default_configuration() { assert_eq!(config.methods, vec!["GET"]); assert_eq!(config.data, Vec::::new()); assert_eq!(config.url_denylist, Vec::::new()); + assert_eq!(config.dont_collect, Vec::::new()); assert_eq!(config.filter_regex, Vec::::new()); assert_eq!(config.filter_similar, Vec::::new()); assert_eq!(config.filter_word_count, Vec::::new()); @@ -291,6 +295,13 @@ fn config_reads_extract_links() { assert!(config.extract_links); } +#[test] +/// parse the test config and see that the value parsed is correct +fn config_reads_collect_extensions() { + let config = setup_config_test(); + assert!(config.collect_extensions); +} + #[test] /// parse the test config and see that the value parsed is correct fn config_reads_extensions() { @@ -298,6 +309,13 @@ fn config_reads_extensions() { assert_eq!(config.extensions, vec!["html", "php", "js"]); } +#[test] +/// parse the test config and see that the value parsed is correct +fn config_reads_dont_collect() { + let config = setup_config_test(); + assert_eq!(config.dont_collect, vec!["png", "gif", "jpg", "jpeg"]); +} + #[test] /// parse the test config and see that the value parsed is correct fn config_reads_methods() { diff --git a/src/config/utils.rs b/src/config/utils.rs index fb0926a..b31d646 100644 --- a/src/config/utils.rs +++ b/src/config/utils.rs @@ -1,6 +1,6 @@ use crate::{ utils::{module_colorizer, status_colorizer}, - DEFAULT_METHOD, DEFAULT_STATUS_CODES, DEFAULT_WORDLIST, VERSION, + DEFAULT_IGNORED_EXTENSIONS, DEFAULT_METHOD, DEFAULT_STATUS_CODES, DEFAULT_WORDLIST, VERSION, }; #[cfg(not(test))] use std::process::exit; @@ -57,6 +57,14 @@ pub(super) fn methods() -> Vec { vec![DEFAULT_METHOD.to_owned()] } +/// default extensions to ignore while auto-collecting +pub(super) fn ignored_extensions() -> Vec { + DEFAULT_IGNORED_EXTENSIONS + .iter() + .map(|s| s.to_string()) + .collect() +} + /// default wordlist pub(super) fn wordlist() -> String { String::from(DEFAULT_WORDLIST) diff --git a/src/event_handlers/command.rs b/src/event_handlers/command.rs index 58b7df9..00d694f 100644 --- a/src/event_handlers/command.rs +++ b/src/event_handlers/command.rs @@ -66,6 +66,9 @@ pub enum Command { /// Just receive a sender and reply, used for slowing down the main thread Sync(Sender), + /// Notify event handler that a new extension has been seen + AddDiscoveredExtension(String), + /// Break out of the (infinite) mpsc receive loop Exit, } diff --git a/src/event_handlers/container.rs b/src/event_handlers/container.rs index d2ed9de..4e24714 100644 --- a/src/event_handlers/container.rs +++ b/src/event_handlers/container.rs @@ -6,6 +6,7 @@ use crate::Joiner; #[cfg(test)] use crate::{filters::FeroxFilters, statistics::Stats, Command}; use anyhow::{bail, Result}; +use std::collections::HashSet; use std::sync::{Arc, RwLock}; #[cfg(test)] use tokio::sync::mpsc::{self, UnboundedReceiver}; @@ -56,6 +57,9 @@ pub struct Handles { /// Handle for recursion pub scans: RwLock>, + + /// Pointer to the list of words generated from reading in the wordlist + pub wordlist: Arc>, } /// implementation of Handles @@ -66,6 +70,7 @@ impl Handles { filters: FiltersHandle, output: TermOutHandle, config: Arc, + wordlist: Arc>, ) -> Self { Self { stats, @@ -73,6 +78,7 @@ impl Handles { output, config, scans: RwLock::new(None), + wordlist, } } @@ -116,6 +122,46 @@ impl Handles { bail!("Could not get underlying CommandSender object") } + /// wrapper to reach into `FeroxScans` and yank out the length of `collected_extensions` + pub fn num_collected_extensions(&self) -> usize { + if !self.config.collect_extensions { + // if --collect-extensions wasn't used, simply return 0 and forego unlocking + return 0; + } + + self.collected_extensions().len() + } + + /// wrapper to reach into `FeroxScans` and yank out the length of `collected_extensions` + pub fn collected_extensions(&self) -> HashSet { + if let Ok(scans) = self.ferox_scans() { + if let Ok(extensions) = scans.collected_extensions.read() { + return extensions.clone(); + } + } + + HashSet::new() + } + + /// number of words in the wordlist, multiplied by `expected_num_requests_multiplier` + pub fn expected_num_requests_per_dir(&self) -> usize { + let num_words = self.wordlist.len(); + let multiplier = self.expected_num_requests_multiplier(); + multiplier * num_words + } + + /// number of extensions plus the number of request method types plus any dynamically collected + /// extensions + pub fn expected_num_requests_multiplier(&self) -> usize { + let multiplier = self.config.extensions.len() + + self.config.methods.len() + + self.num_collected_extensions(); + + // methods should always have at least 1 member, likely making this .max call unneeded + // but leaving it for 'just in case' reasons + multiplier.max(1) + } + /// Helper to easily get the (locked) underlying FeroxScans object pub fn ferox_scans(&self) -> Result> { if let Ok(guard) = self.scans.read().as_ref() { diff --git a/src/event_handlers/scans.rs b/src/event_handlers/scans.rs index 3eb9bf3..b8030b3 100644 --- a/src/event_handlers/scans.rs +++ b/src/event_handlers/scans.rs @@ -15,6 +15,8 @@ use crate::{ use super::command::Command::AddToUsizeField; use super::*; +use crate::scan_manager::{ScanStatus, ScanType}; +use crate::statistics::StatField; use reqwest::Url; use tokio::time::Duration; @@ -176,6 +178,23 @@ impl ScanHandler { Command::Sync(sender) => { sender.send(true).unwrap_or_default(); } + Command::AddDiscoveredExtension(new_extension) => { + // if --collect-extensions was used, AND the new extension isn't in + // the --dont-collect list AND it's also not in the --extensions list, AND + // we actually added a new extension (i.e. wasn't previously known), add + // it to FeroxScans.collected_extensions + if self.handles.config.collect_extensions + && !self.handles.config.dont_collect.contains(&new_extension) + && !self.handles.config.extensions.contains(&new_extension) + && self.data.add_discovered_extension(new_extension) + { + self.update_all_bar_lengths()?; + self.handles + .stats + .send(Command::AddToUsizeField(StatField::ExtensionsCollected, 1)) + .unwrap_or_default(); + } + } _ => {} // no other commands needed for RecursionHandler } } @@ -184,6 +203,93 @@ impl ScanHandler { Ok(()) } + /// update all current and future bar lengths + /// + /// updating all bar lengths correctly requires a few different actions on our part. + /// - get the current number of requests expected per scan (dynamic when --collect-extensions + /// is used) + /// - update the overall progress bar via the statistics handler (total expected) + /// - update the expected per scan value tracked in the statistics handler + /// - update progress bars on each FeroxScan (type::directory) that are running/not-started + /// - update progress bar length on FeroxScans (this is used when creating new a FeroxScan and + /// determines the new scan's progress bar length) + fn update_all_bar_lengths(&self) -> Result<()> { + log::trace!("enter: update_all_bar_lengths"); + + // current number of requests expected per scan + // ExpectedPerScan and TotalExpected are a += action, so we need the wordlist length to + // update them while the other updates use expected_num_requests_per_dir + let num_words = self.get_wordlist()?.len(); + let current_expectation = self.handles.expected_num_requests_per_dir() as u64; + + // used in the calculation of bar width down below, see explanation there + let divisor = self.handles.expected_num_requests_multiplier() as u64 - 1; + + // add another `wordlist.len` to the expected per scan tracker in the statistics handler + self.handles + .stats + .send(AddToUsizeField(StatField::ExpectedPerScan, num_words))?; + + // since we're adding extensions in the middle of scans (potentially), we need to take + // current number of requests into account, new_total will be used as an accumulator + // used to increment the overall progress bar + let mut new_total = 0; + + if let Ok(ferox_scans) = self.handles.ferox_scans() { + // update progress bar length on FeroxScans, which used when creating a new FeroxScan's + // progress bar and should mirror the expected_per_scan field on Statistics + ferox_scans.set_bar_length(current_expectation); + + if let Ok(scans_guard) = ferox_scans.scans.read() { + // update progress bars on each FeroxScan where its scan type is directory and + // scan status is either running or not-started + for scan in scans_guard.iter() { + if scan.is_active() { + // current number of words left in the 'to-scan' bin, for example: + // + // say we have a 2000 word wordlist, have `-x js` on the command line, and + // just found `php` as a new extension + // + // that puts our state at: + // - wordlist length: 2000 + // - total expected: 4000 (original length * 2 for -x js) + // + // let's assume the current scan has sent 3000 requests so far + // that means to get the number of `words` left to send, we need to take + // the difference of 4000 and 3000 and then divide that by the current + // multiplier (2 in the example) + // + // (4000 - 3000) / 2 => 500 words left to send + // + // the remaining 500 words will be sent as 3 variations (word, word.js, + // word.php). So, we would then need to increment the bar by 500 to + // reflect the dynamism of adding extensions mid-scan. + let bar = scan.progress_bar(); + + // (4000 - 3000) / 2 => 500 words left to send + let length = bar.length(); + let num_words_left = (length - bar.position()) / divisor; + + // accumulate each bar's increment value for incrementing the total bar + new_total += num_words_left; + + bar.inc_length(num_words_left); + } + } + } + + // add the total number of newly expected requests to the overall progress bar + // via the statistics handler + self.handles.stats.send(AddToUsizeField( + StatField::TotalExpected, + new_total as usize, + ))?; + } + + log::trace!("exit: update_all_bar_lengths"); + Ok(()) + } + /// Helper to easily get the (locked) underlying wordlist pub fn get_wordlist(&self) -> Result>> { if let Ok(guard) = self.wordlist.lock().as_ref() { diff --git a/src/event_handlers/statistics.rs b/src/event_handlers/statistics.rs index 5e6483b..2ca4426 100644 --- a/src/event_handlers/statistics.rs +++ b/src/event_handlers/statistics.rs @@ -103,7 +103,7 @@ impl StatsHandler { Command::AddToUsizeField(field, value) => { self.stats.update_usize_field(field, value); - if matches!(field, StatField::TotalScans) { + if matches!(field, StatField::TotalScans | StatField::TotalExpected) { self.bar.set_length(self.stats.total_expected() as u64); } } diff --git a/src/extractor/builder.rs b/src/extractor/builder.rs index d9c0d7d..8cc4c12 100644 --- a/src/extractor/builder.rs +++ b/src/extractor/builder.rs @@ -79,9 +79,9 @@ impl<'a> ExtractorBuilder<'a> { self } - /// finalize configuration of ExtratorBuilder and return an Extractor + /// finalize configuration of `ExtractorBuilder` and return an `Extractor` /// - /// requires either with_url or with_response to have been used in the build process + /// requires either `with_url` or `with_response` to have been used in the build process pub fn build(&self) -> Result> { if (self.url.is_empty() && self.response.is_none()) || self.handles.is_none() { bail!("Extractor requires a URL or a FeroxResponse be specified as well as a Handles object") @@ -98,6 +98,7 @@ impl<'a> ExtractorBuilder<'a> { url: self.url.to_owned(), handles: self.handles.as_ref().unwrap().clone(), target: self.target, + num_collected: 0, }) } } diff --git a/src/extractor/container.rs b/src/extractor/container.rs index c7bbb08..25168c3 100644 --- a/src/extractor/container.rs +++ b/src/extractor/container.rs @@ -1,5 +1,5 @@ use super::*; -use crate::utils::should_deny_url; +use crate::utils::{should_deny_url, should_read_body}; use crate::{ client, event_handlers::{ @@ -17,8 +17,9 @@ use crate::{ DEFAULT_METHOD, }; use anyhow::{bail, Context, Result}; -use reqwest::{StatusCode, Url}; +use reqwest::{Client, StatusCode, Url}; use scraper::{Html, Selector}; +use std::borrow::BorrowMut; use std::collections::HashSet; use tokio::sync::oneshot; @@ -32,6 +33,33 @@ enum RecursionStatus { NotRecursive, } +/// enum representing the different servers that `parse_html` can detect when directory listing is +/// enabled +#[derive(Copy, Debug, Clone)] +pub enum DirListingType { + /// apache server, detected by `Index of /` + Apache, + + /// tomcat/python server, detected by `Directory Listing for /` + TomCat_Python, + + /// ASP.NET server, detected by `Directory Listing -- /` + AspDotNet, + // /// IIS/Azure server, detected by `HOST_NAME - /` (not currently used) + // IIS_AZURE, +} + +/// Wrapper around the results of performing any kind of extraction against a target web page +#[derive(Debug, Default, Clone)] +pub struct ExtractionResult { + /// links extracted from the visited page + pub found_links: HashSet, + + /// type of server where directory listing was detected + /// i.e. https://portswigger.net/kb/issues/00600100_directory-listing + pub dir_list_type: Option, +} + /// Handles all logic related to extracting links from requested source code #[derive(Debug)] pub struct Extractor<'a> { @@ -52,24 +80,86 @@ pub struct Extractor<'a> { /// type of extraction to be performed pub(super) target: ExtractionTarget, + + /// current number of collected extensions, used for updating the base scan's progress bar + pub(super) num_collected: usize, } /// Extractor implementation impl<'a> Extractor<'a> { /// perform extraction from the given target and return any links found - pub async fn extract(&self) -> Result<(HashSet, bool)> { - log::trace!("enter: extract (this fn has associated trace exit msg)"); + pub async fn extract(&self) -> Result { + log::trace!( + "enter: extract({:?}) (this fn has no associated trace exit msg)", + self.target + ); match self.target { - ExtractionTarget::ResponseBody => Ok(self.extract_from_body().await?), + ExtractionTarget::ResponseBody | ExtractionTarget::ParseHtml => { + Ok(self.extract_from_body().await?) + } ExtractionTarget::RobotsTxt => Ok(self.extract_from_robots().await?), - ExtractionTarget::ParseHtml => Ok(self.parse_html().await?), } } + /// wrapper around logic that performs the following: + /// - parses `url_to_parse` + /// - bails if the parsed url doesn't belong to the original host/domain + /// - otherwise, calls `add_all_sub_paths` with the parsed result + fn parse_url_and_add_subpaths( + &self, + url_to_parse: &str, + original_url: &Url, + links: &mut HashSet, + ) -> Result<()> { + log::trace!("enter: parse_url_and_add_subpaths({:?})", links); + + match Url::parse(url_to_parse) { + Ok(absolute) => { + if absolute.domain() != original_url.domain() + || absolute.host() != original_url.host() + { + // domains/ips are not the same, don't scan things that aren't part of the original + // target url + bail!("parsed url does not belong to original domain/host"); + } + + if self.add_all_sub_paths(absolute.path(), links).is_err() { + log::warn!("could not add sub-paths from {} to {:?}", absolute, links); + } + } + Err(e) => { + // this is the expected error that happens when we try to parse a url fragment + // ex: Url::parse("/login") -> Err("relative URL without a base") + // while this is technically an error, these are good results for us + if e.to_string().contains("relative URL without a base") { + if self.add_all_sub_paths(url_to_parse, links).is_err() { + log::warn!( + "could not add sub-paths from {} to {:?}", + url_to_parse, + links + ); + } + } else { + // unexpected error has occurred + log::warn!("Could not parse given url: {}", e); + self.handles.stats.send(AddError(Other)).unwrap_or_default(); + } + } + } + + log::trace!("exit: parse_url_and_add_subpaths"); + Ok(()) + } + /// given a set of links from a normal http body response, task the request handler to make /// the requests - pub async fn request_links(&self, links: HashSet) -> Result<()> { + pub async fn request_links(&mut self, links: HashSet) -> Result<()> { log::trace!("enter: request_links({:?})", links); + + if links.is_empty() { + return Ok(()); + } + let recursive = if self.handles.config.no_recursion { RecursionStatus::NotRecursive } else { @@ -77,6 +167,10 @@ impl<'a> Extractor<'a> { }; let scanned_urls = self.handles.ferox_scans()?; + log::warn!("links: {:?}", links); + self.update_stats(links.len())?; + // todo: get it to where parsehtml->request_links doesn't run in a single thread/block asyncness + // todo: move the parse_html into heuristics (maybe) and then move the check/dirlist stuff lower in scan_url (may not work, but look) for link in links { let mut resp = match self.request_link(&link).await { @@ -100,6 +194,10 @@ impl<'a> Extractor<'a> { scanned_urls.add_file_scan(resp.url().as_str(), ScanOrder::Latest); + if self.handles.config.collect_extensions { + resp.parse_extension(self.handles.clone())?; + } + if let Err(e) = resp.send_report(self.handles.output.tx.clone()) { log::warn!("Could not send FeroxResponse to output handler: {}", e); } @@ -135,8 +233,26 @@ impl<'a> Extractor<'a> { Ok(()) } - /// Given a `reqwest::Response`, perform the following actions - /// - parse the response's text for links using the linkfinder regex + /// wrapper around link extraction via html attributes + fn extract_all_links_from_html_tags( + &self, + resp_url: &Url, + links: &mut HashSet, + html: &Html, + ) { + self.extract_links_by_attr(resp_url, links, &html, "a", "href"); + self.extract_links_by_attr(resp_url, links, &html, "img", "src"); + self.extract_links_by_attr(resp_url, links, &html, "form", "action"); + self.extract_links_by_attr(resp_url, links, &html, "script", "src"); + self.extract_links_by_attr(resp_url, links, &html, "iframe", "src"); + self.extract_links_by_attr(resp_url, links, &html, "div", "src"); + self.extract_links_by_attr(resp_url, links, &html, "frame", "src"); + self.extract_links_by_attr(resp_url, links, &html, "embed", "src"); + self.extract_links_by_attr(resp_url, links, &html, "script", "src"); + } + + /// Given the body of a `reqwest::Response`, perform the following actions + /// - parse the body for links using the linkfinder regex /// - for every link found take its url path and parse each sub-path /// - example: Response contains a link fragment `homepage/assets/img/icons/handshake.svg` /// with a base url of http://localhost, the following urls would be returned: @@ -145,69 +261,32 @@ impl<'a> Extractor<'a> { /// - homepage/assets/img/ /// - homepage/assets/ /// - homepage/ - pub(super) async fn extract_from_body(&self) -> Result<(HashSet, bool)> { - log::trace!("enter: extract_from_body"); + fn extract_all_links_from_javascript( + &self, + response_body: &str, + response_url: &Url, + links: &mut HashSet, + ) { + log::trace!( + "enter: extract_all_links_from_javascript(html body..., {}, {:?})", + response_url.as_str(), + links + ); - let mut links = HashSet::::new(); - let dirlist_flag = false; - - // Response - let response = self.response.unwrap(); - let resp_url = response.url(); - let body = response.text(); - let html = Html::parse_document(body); - - // Extract Links - self.extract_links_by_attr(resp_url, &mut links, &html, "a", "href"); - self.extract_links_by_attr(resp_url, &mut links, &html, "img", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "form", "action"); - self.extract_links_by_attr(resp_url, &mut links, &html, "script", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "iframe", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "div", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "frame", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "embed", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "script", "src"); - - for capture in self.links_regex.captures_iter(body) { + for capture in self.links_regex.captures_iter(response_body) { // remove single & double quotes from both ends of the capture // capture[0] is the entire match, additional capture groups start at [1] let link = capture[0].trim_matches(|c| c == '\'' || c == '"'); - match Url::parse(link) { - Ok(absolute) => { - if absolute.domain() != self.response.unwrap().url().domain() - || absolute.host() != self.response.unwrap().url().host() - { - // domains/ips are not the same, don't scan things that aren't part of the original - // target url - continue; - } - - if self.add_all_sub_paths(absolute.path(), &mut links).is_err() { - log::warn!("could not add sub-paths from {} to {:?}", absolute, links); - } - } - Err(e) => { - // this is the expected error that happens when we try to parse a url fragment - // ex: Url::parse("/login") -> Err("relative URL without a base") - // while this is technically an error, these are good results for us - if e.to_string().contains("relative URL without a base") { - if self.add_all_sub_paths(link, &mut links).is_err() { - log::warn!("could not add sub-paths from {} to {:?}", link, links); - } - } else { - // unexpected error has occurred - log::warn!("Could not parse given url: {}", e); - self.handles.stats.send(AddError(Other)).unwrap_or_default(); - } - } + if self + .parse_url_and_add_subpaths(link, response_url, links) + .is_err() + { + // purposely not logging the error here, due to the frequency with which it gets hit } } - self.update_stats(links.len())?; - - log::trace!("exit: extract_from_body -> {:?} {}", links, dirlist_flag); - Ok((links, dirlist_flag)) + log::trace!("exit: extract_all_links_from_javascript"); } /// take a url fragment like homepage/assets/img/icons/handshake.svg and @@ -340,13 +419,13 @@ impl<'a> Extractor<'a> { // make the request and store the response let new_response = - logged_request(&new_url, DEFAULT_METHOD, None, self.handles.clone()).await?; + logged_request(&new_url, DEFAULT_METHOD, None, self.handles.clone(), None).await?; let new_ferox_response = FeroxResponse::from( new_response, url, DEFAULT_METHOD, - true, + should_read_body(&self.handles.config), self.handles.config.output_level, ) .await; @@ -364,11 +443,10 @@ impl<'a> Extractor<'a> { /// http://localhost/stuff/things /// this function requests: /// http://localhost/robots.txt - pub(super) async fn extract_from_robots(&self) -> Result<(HashSet, bool)> { + pub(super) async fn extract_from_robots(&self) -> Result { log::trace!("enter: extract_robots_txt"); let mut links: HashSet = HashSet::new(); - let dirlist_flag = false; // request let response = self.make_extract_request("/robots.txt").await?; @@ -384,66 +462,108 @@ impl<'a> Extractor<'a> { } } - self.update_stats(links.len())?; + let result = ExtractionResult { + found_links: links, + dir_list_type: None, + }; - log::trace!("exit: extract_robots_txt -> {:?} {}", links, dirlist_flag); - Ok((links, dirlist_flag)) + log::trace!("exit: extract_robots_txt -> {:?}", result); + Ok(result) } - /// Entry point to parse html for links (i.e. webscraping, directory listings) - /// this function requests: - /// http://localhost/ - pub(super) async fn parse_html(&self) -> Result<(HashSet, bool)> { - log::trace!("enter: parse_html"); + /// outer-most wrapper for parsing html response bodies in search of additional content. + /// performs the following high-level steps: + /// - requests the page, if necessary + /// - checks the page to see if directory listing is enabled and sucks up all the links, if so + /// - uses the linkfinder regex to grab links from embedded javascript/javascript files + /// - extracts many different types of link sources from the html itself + pub(super) async fn extract_from_body(&self) -> Result { + log::trace!("enter: extract_from_body"); - let mut links: HashSet = HashSet::new(); - let mut dirlist_flag = false; + let mut result = ExtractionResult::default(); + + // need late binding here to avoid 'creates a temporary which is freed...' in the + // `let ... if` below because of self's FeroxResponse lifetime + let mut requested = FeroxResponse::default(); + + if self.response.is_none() { + // called as a ParseHtml target + let url = Url::parse(&self.url)?; + requested = self.make_extract_request(url.path()).await?; + } + + let response = if self.response.is_some() { + // called as a ResponseBody extraction + self.response.unwrap() + } else { + &requested + }; - // Response - let url = Url::parse(&self.url)?; - let response = self.make_extract_request(url.path()).await?; let resp_url = response.url(); let body = response.text(); let html = Html::parse_document(body); - // Directory listing heuristic detection to not continue scanning - // Index of /: apache - // Directory Listing for /: tomcat, - // Directory Listing -- /: ASP.NET - // - /: iis, azure, skipping due to loose heuristic - let title_selector = Selector::parse("title").unwrap(); - for t in html.select(&title_selector) { - let title = t.inner_html().to_lowercase(); - if title.contains("directory listing for /") - || title.contains("index of /") - || title.contains("directory listing -- /") - { - log::debug!("Directory listing heuristic detection from \"{}\"", title); - dirlist_flag = true; + if matches!(self.target, ExtractionTarget::ParseHtml) { + // only check for directory listing when ParseHtml is the target, based on where + // in the codebase Extractor::extract() is called + let dirlist_type = self.detect_directory_listing(&html); - self.extract_links_by_attr(resp_url, &mut links, &html, "a", "href"); - self.update_stats(links.len())?; + if dirlist_type.is_some() { + log::debug!( + "Directory listing heuristic detected: {:?}", + dirlist_type.unwrap() + ); - log::trace!("exit: parse_html -> {:?} {}", links, dirlist_flag); - return Ok((links, dirlist_flag)); + self.extract_links_by_attr(resp_url, &mut result.found_links, &html, "a", "href"); + + result.dir_list_type = dirlist_type; + + log::trace!("exit: extract_from_body -> {:?}", result); + return Ok(result); } } - // Extract Links - self.extract_links_by_attr(resp_url, &mut links, &html, "a", "href"); - self.extract_links_by_attr(resp_url, &mut links, &html, "img", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "form", "action"); - self.extract_links_by_attr(resp_url, &mut links, &html, "script", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "iframe", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "div", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "frame", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "embed", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "script", "src"); + // extract links from html tags/attributes and embedded javascript + self.extract_all_links_from_html_tags(resp_url, &mut result.found_links, &html); + self.extract_all_links_from_javascript(body, resp_url, &mut result.found_links); - self.update_stats(links.len())?; + log::trace!("exit: extract_from_body -> {:?}", result); + Ok(result) + } - log::trace!("exit: parse_html -> {:?} {}", links, dirlist_flag); - Ok((links, dirlist_flag)) + /// Directory listing heuristic detection, uses tag to make its determination. When + /// the inner html of <title> matches one of the following, a `DirListingType` is returned. + /// - apache: `Index of /` + /// - tomcat/python: `Directory Listing for /` + /// - ASP.NET: `Directory Listing -- /` + /// - <host> - /: iis, azure, skipping due to loose heuristic + pub(super) fn detect_directory_listing(&self, html: &Html) -> Option<DirListingType> { + log::trace!("enter: detect_directory_listing(html body...)"); + + let title_selector = Selector::parse("title").expect("couldn't parse title selector"); + + for t in html.select(&title_selector) { + let title = t.inner_html().to_lowercase(); + + let dirlist_type = if title.contains("directory listing for /") { + Some(DirListingType::TomCat_Python) + } else if title.contains("index of /") { + Some(DirListingType::Apache) + } else if title.contains("directory listing -- /") { + Some(DirListingType::AspDotNet) + } else { + // IIS_AZURE purposely skipped for now + None + }; + + if dirlist_type.is_some() { + log::trace!("exit: detect_directory_listing -> {:?}", dirlist_type); + return dirlist_type; + } + } + + log::trace!("exit: detect_directory_listing -> None"); + None } /// simple helper to get html links by tag/attribute and add it to the `links` HashSet @@ -458,41 +578,20 @@ impl<'a> Extractor<'a> { log::trace!("enter: extract_links_by_attr"); let selector = Selector::parse(html_tag).unwrap(); + let tags = html .select(&selector) .filter(|a| a.value().attrs().any(|attr| attr.0 == html_attr)); - for t in tags { - if let Some(link) = t.value().attr(html_attr) { + + for tag in tags { + if let Some(link) = tag.value().attr(html_attr) { log::debug!("Parsed link \"{}\" from {}", link, resp_url.as_str()); - match Url::parse(link) { - Ok(absolute) => { - if absolute.domain() != resp_url.domain() - || absolute.host() != resp_url.host() - { - // domains/ips are not the same, don't scan things that aren't part of the original - // target url - continue; - } - - if self.add_all_sub_paths(absolute.path(), links).is_err() { - log::warn!("could not add sub-paths from {} to {:?}", absolute, links); - } - } - Err(e) => { - // this is the expected error that happens when we try to parse a url fragment - // ex: Url::parse("/login") -> Err("relative URL without a base") - // while this is technically an error, these are good results for us - if e.to_string().contains("relative URL without a base") { - if self.add_all_sub_paths(link, links).is_err() { - log::warn!("could not add sub-paths from {} to {:?}", link, links); - } - } else { - // unexpected error has occurred - log::warn!("Could not parse given url: {}", e); - self.handles.stats.send(AddError(Other)).unwrap_or_default(); - } - } + if self + .parse_url_and_add_subpaths(link, resp_url, links) + .is_err() + { + log::debug!("link didn't belong to the target domain/host: {}", link); } } } @@ -507,33 +606,45 @@ impl<'a> Extractor<'a> { pub(super) async fn make_extract_request(&self, location: &str) -> Result<FeroxResponse> { log::trace!("enter: make_extract_request"); - // more often than not, domain/robots.txt will redirect to www.domain/robots.txt or something - // similar; to account for that, create a client that will follow redirects, regardless of - // what the user specified for the scanning client. Other than redirects, it will respect - // all other user specified settings - let follow_redirects = true; + // need late binding here to avoid 'creates a temporary which is freed...' in the + // `let ... if` below to avoid cloning the client out of config + let mut client = Client::new(); - let proxy = if self.handles.config.proxy.is_empty() { - None + if location == "/robots.txt" { + // more often than not, domain/robots.txt will redirect to www.domain/robots.txt or something + // similar; to account for that, create a client that will follow redirects, regardless of + // what the user specified for the scanning client. Other than redirects, it will respect + // all other user specified settings + let follow_redirects = true; + + let proxy = if self.handles.config.proxy.is_empty() { + None + } else { + Some(self.handles.config.proxy.as_str()) + }; + + client = client::initialize( + self.handles.config.timeout, + &self.handles.config.user_agent, + follow_redirects, + self.handles.config.insecure, + &self.handles.config.headers, + proxy, + )?; + } + + let client = if location != "/robots.txt" { + &self.handles.config.client } else { - Some(self.handles.config.proxy.as_str()) + &client }; - let client = client::initialize( - self.handles.config.timeout, - &self.handles.config.user_agent, - follow_redirects, - self.handles.config.insecure, - &self.handles.config.headers, - proxy, - )?; - let mut url = Url::parse(&self.url)?; url.set_path(location); // overwrite existing path // purposefully not using logged_request here due to using the special client let response = make_request( - &client, + client, &url, DEFAULT_METHOD, None, @@ -547,10 +658,11 @@ impl<'a> Extractor<'a> { response, &self.url, DEFAULT_METHOD, - true, + should_read_body(&self.handles.config), self.handles.config.output_level, ) .await; + // note: don't call parse_extension here. If we call it here, it gets called on robots.txt log::trace!("exit: make_extract_request -> {}", ferox_response); Ok(ferox_response) @@ -558,7 +670,7 @@ impl<'a> Extractor<'a> { /// update total number of links extracted and expected responses fn update_stats(&self, num_links: usize) -> Result<()> { - let multiplier = self.handles.config.extensions.len().max(1); + let multiplier = self.handles.expected_num_requests_multiplier(); self.handles .stats diff --git a/src/filters/init.rs b/src/filters/init.rs index 1d98615..41ccd48 100644 --- a/src/filters/init.rs +++ b/src/filters/init.rs @@ -72,10 +72,11 @@ pub async fn initialize(handles: Arc<Handles>) -> Result<()> { let url = skip_fail!(Url::parse(similarity_filter)); // attempt to request the given url - let resp = skip_fail!(logged_request(&url, DEFAULT_METHOD, None, handles.clone()).await); + let resp = + skip_fail!(logged_request(&url, DEFAULT_METHOD, None, handles.clone(), None).await); // if successful, create a filter based on the response's body - let fr = FeroxResponse::from( + let mut fr = FeroxResponse::from( resp, similarity_filter, DEFAULT_METHOD, @@ -84,6 +85,10 @@ pub async fn initialize(handles: Arc<Handles>) -> Result<()> { ) .await; + if handles.config.collect_extensions { + fr.parse_extension(handles.clone())?; + } + // hash the response body and store the resulting hash in the filter object let hash = FuzzyHash::new(&fr.text()).to_string(); diff --git a/src/heuristics.rs b/src/heuristics.rs index 31e832f..9837f30 100644 --- a/src/heuristics.rs +++ b/src/heuristics.rs @@ -12,7 +12,7 @@ use crate::{ response::FeroxResponse, skip_fail, url::FeroxUrl, - utils::{ferox_print, fmt_err, logged_request, status_colorizer}, + utils::{ferox_print, fmt_err, logged_request, should_read_body, status_colorizer}, DEFAULT_METHOD, }; @@ -188,6 +188,7 @@ impl HeuristicTests { method, data, self.handles.clone(), + None, ) .await?; @@ -198,11 +199,12 @@ impl HeuristicTests { .contains(&response.status().as_u16()) { // found a wildcard response + let mut ferox_response = FeroxResponse::from( response, &target.target, method, - true, + should_read_body(&self.handles.config), self.handles.config.output_level, ) .await; @@ -247,7 +249,8 @@ impl HeuristicTests { let url = FeroxUrl::from_string(target_url, self.handles.clone()); let request = skip_fail!(url.format("", None)); - let result = logged_request(&request, DEFAULT_METHOD, None, self.handles.clone()).await; + let result = + logged_request(&request, DEFAULT_METHOD, None, self.handles.clone(), None).await; match result { Ok(_) => { diff --git a/src/lib.rs b/src/lib.rs index 0aa5613..87b0626 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,5 @@ +#![deny(clippy::all)] +// #![warn(clippy::pedantic, clippy::restriction, clippy::nursery, clippy::cargo)] use anyhow::Result; use reqwest::StatusCode; use tokio::{ @@ -48,6 +50,13 @@ pub const DEFAULT_OPEN_FILE_LIMIT: u64 = 8192; /// Default value used to determine near-duplicate web pages (equivalent to 95%) pub const SIMILARITY_THRESHOLD: u32 = 95; +/// Default set of extensions to Ignore when auto-collecting extensions during scans +pub(crate) const DEFAULT_IGNORED_EXTENSIONS: [&str; 38] = [ + "tif", "tiff", "ico", "cur", "bmp", "webp", "svg", "png", "jpg", "jpeg", "jfif", "gif", "avif", + "apng", "pjpeg", "pjp", "mov", "wav", "mpg", "mpeg", "mp3", "mp4", "m4a", "m4p", "m4v", "ogg", + "webm", "ogv", "oga", "flac", "aac", "3gp", "css", "zip", "xls", "xml", "gz", "tgz", +]; + /// Default wordlist to use when `-w|--wordlist` isn't specified and not `wordlist` isn't set /// in a [ferox-config.toml](constant.DEFAULT_CONFIG_NAME.html) config file. /// diff --git a/src/main.rs b/src/main.rs index d2feb29..95cf66e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -16,6 +16,7 @@ use tokio::{ }; use tokio_util::codec::{FramedRead, LinesCodec}; +use feroxbuster::scan_manager::ScanType; use feroxbuster::{ banner::{Banner, UPDATE_URL}, config::{Configuration, OutputLevel}, @@ -70,21 +71,12 @@ fn get_unique_words_from_wordlist(path: &str) -> Result<Arc<Vec<String>>> { /// Determine whether it's a single url scan or urls are coming from stdin, then scan as needed async fn scan(targets: Vec<String>, handles: Arc<Handles>) -> Result<()> { log::trace!("enter: scan({:?}, {:?})", targets, handles); - // cloning an Arc is cheap (it's basically a pointer into the heap) - // so that will allow for cheap/safe sharing of a single wordlist across multi-target scans - // as well as additional directories found as part of recursion - - let words = get_unique_words_from_wordlist(&handles.config.wordlist)?; - - if words.len() == 0 { - bail!("Did not find any words in {}", handles.config.wordlist); - } let scanned_urls = handles.ferox_scans()?; - handles.send_scan_command(UpdateWordlist(words.clone()))?; + handles.send_scan_command(UpdateWordlist(handles.wordlist.clone()))?; - scanner::initialize(words.len(), handles.clone()).await?; + scanner::initialize(handles.wordlist.len(), handles.clone()).await?; // at this point, the stat thread's progress bar can be created; things that needed to happen // first: @@ -103,7 +95,7 @@ async fn scan(targets: Vec<String>, handles: Arc<Handles>) -> Result<()> { if handles.config.resumed { // display what has already been completed scanned_urls.print_known_responses(); - scanned_urls.print_completed_bars(words.len())?; + scanned_urls.print_completed_bars(handles.wordlist.len())?; } log::debug!("sending {:?} to be scanned as initial targets", targets); @@ -138,8 +130,8 @@ async fn get_targets(handles: Arc<Handles>) -> Result<Vec<String>> { for scan in scans.iter() { // ferox_scans gets deserialized scans added to it at program start if --resume-from // is used, so scans that aren't marked complete still need to be scanned - if scan.is_complete() { - // this one's already done, ignore it + if scan.is_complete() || matches!(scan.scan_type, ScanType::File) { + // this one's already done, or it's not a directory, ignore it continue; } @@ -199,12 +191,23 @@ async fn wrapped_main(config: Arc<Configuration>) -> Result<()> { let (out_task, out_handle) = TermOutHandler::initialize(config.clone(), stats_handle.tx.clone()); + // cloning an Arc is cheap (it's basically a pointer into the heap) + // so that will allow for cheap/safe sharing of a single wordlist across multi-target scans + // as well as additional directories found as part of recursion + + let words = get_unique_words_from_wordlist(&config.wordlist)?; + + if words.len() == 0 { + bail!("Did not find any words in {}", config.wordlist); + } + // bundle up all the disparate handles and JoinHandles (tasks) let handles = Arc::new(Handles::new( stats_handle, filters_handle, out_handle, config.clone(), + words, )); let (scan_task, scan_handle) = ScanHandler::initialize(handles.clone()); diff --git a/src/parser.rs b/src/parser.rs index 3bab47b..44eb9e2 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -470,6 +470,26 @@ pub fn initialize() -> App<'static> { .takes_value(false) .help_heading("Scan settings") .help("Don't auto-filter wildcard responses") + ).arg( + Arg::new("collect_extensions") + .short('c') + .long("collect-extensions") + .takes_value(false) + .help_heading("Scan settings") + .help("Automatically discover extensions and add them to --extensions (unless they're in --dont-collect)") + ).arg( + Arg::new("dont_collect") + .short('I') + .long("dont-collect") + .value_name("FILE_EXTENSION") + .takes_value(true) + .multiple_values(true) + .multiple_occurrences(true) + .use_delimiter(true) + .help_heading("Scan settings") + .help( + "File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)", + ), ); ///////////////////////////////////////////////////////////////////// diff --git a/src/response.rs b/src/response.rs index 2fb9c48..27938ac 100644 --- a/src/response.rs +++ b/src/response.rs @@ -60,6 +60,9 @@ pub struct FeroxResponse { /// whether the user passed --quiet|--silent on the command line pub(crate) output_level: OutputLevel, + + /// Url's file extension, if one exists + pub(crate) extension: Option<String>, } /// implement Default trait for FeroxResponse @@ -78,6 +81,7 @@ impl Default for FeroxResponse { headers: Default::default(), wildcard: false, output_level: Default::default(), + extension: None, } } } @@ -244,9 +248,60 @@ impl FeroxResponse { word_count, output_level, wildcard: false, + extension: None, } } + /// if --collect-extensions is used, examine the response's url and grab the file's extension + /// if one is available to be grabbed. If an extension is found, send it to the ScanHandler + /// for further processing + pub(crate) fn parse_extension(&mut self, handles: Arc<Handles>) -> Result<()> { + log::trace!("enter: parse_extension"); + + if !handles.config.collect_extensions { + // early return, --collect-extensions not used + return Ok(()); + } + + // path_segments: + // Return None for cannot-be-a-base URLs. + // When Some is returned, the iterator always contains at least one string + // (which may be empty). + // + // meaning: the two unwraps here are fine, the worst outcome is an empty string + let filename = self.url.path_segments().unwrap().last().unwrap(); + + if !filename.is_empty() { + // non-empty string, try to get extension + let parts: Vec<_> = filename + .split('.') + // keep things like /.bash_history from becoming an extension + .filter(|part| !part.is_empty()) + .collect(); + + if parts.len() > 1 { + // filename + at least one extension, i.e. whatever.js becomes ["whatever", "js"] + self.extension = Some(parts.last().unwrap().to_string()) + } + } + + if let Some(extension) = &self.extension { + if handles + .config + .status_codes + .contains(&self.status().as_u16()) + { + // only add extensions to those responses that pass our checks; filtered out + // status codes are handled by should_filter, but we need to still check against + // the allow list for what we want to keep + handles.send_scan_command(Command::AddDiscoveredExtension(extension.to_owned()))?; + } + } + + log::trace!("exit: parse_extension"); + Ok(()) + } + /// Helper function that determines if the configured maximum recursion depth has been reached /// /// Essentially looks at the Url path and determines how many directories are present in the @@ -484,6 +539,10 @@ impl Serialize for FeroxResponse { state.serialize_field("line_count", &self.line_count)?; state.serialize_field("word_count", &self.word_count)?; state.serialize_field("headers", &headers)?; + state.serialize_field( + "extension", + self.extension.as_ref().unwrap_or(&String::new()), + )?; state.end() } @@ -508,6 +567,7 @@ impl<'de> Deserialize<'de> for FeroxResponse { output_level: Default::default(), line_count: 0, word_count: 0, + extension: None, }; let map: HashMap<String, Value> = HashMap::deserialize(deserializer)?; @@ -576,6 +636,11 @@ impl<'de> Deserialize<'de> for FeroxResponse { response.wildcard = result; } } + "extension" => { + if let Some(result) = value.as_str() { + response.extension = Some(result.to_string()); + } + } _ => {} } } @@ -587,6 +652,7 @@ impl<'de> Deserialize<'de> for FeroxResponse { #[cfg(test)] mod tests { use super::*; + use std::default::Default; #[test] /// call reached_max_depth with max depth of zero, which is infinite recursion, expect false @@ -595,16 +661,7 @@ mod tests { let url = Url::parse("http://localhost").unwrap(); let response = FeroxResponse { url, - original_url: String::new(), - status: Default::default(), - method: Default::default(), - text: "".to_string(), - content_length: 0, - line_count: 0, - word_count: 0, - headers: Default::default(), - wildcard: false, - output_level: Default::default(), + ..Default::default() }; let result = response.reached_max_depth(0, 0, handles); assert!(!result); @@ -618,16 +675,7 @@ mod tests { let url = Url::parse("http://localhost/one/two").unwrap(); let response = FeroxResponse { url, - original_url: String::new(), - status: Default::default(), - method: Default::default(), - text: "".to_string(), - content_length: 0, - line_count: 0, - word_count: 0, - headers: Default::default(), - wildcard: false, - output_level: Default::default(), + ..Default::default() }; let result = response.reached_max_depth(0, 2, handles); @@ -641,16 +689,7 @@ mod tests { let url = Url::parse("http://localhost").unwrap(); let response = FeroxResponse { url, - original_url: String::new(), - status: Default::default(), - method: Default::default(), - text: "".to_string(), - content_length: 0, - line_count: 0, - word_count: 0, - headers: Default::default(), - wildcard: false, - output_level: Default::default(), + ..Default::default() }; let result = response.reached_max_depth(0, 2, handles); @@ -664,16 +703,7 @@ mod tests { let url = Url::parse("http://localhost/one/two").unwrap(); let response = FeroxResponse { url, - original_url: String::new(), - status: Default::default(), - method: Default::default(), - text: "".to_string(), - content_length: 0, - line_count: 0, - word_count: 0, - headers: Default::default(), - wildcard: false, - output_level: Default::default(), + ..Default::default() }; let result = response.reached_max_depth(2, 2, handles); @@ -687,16 +717,7 @@ mod tests { let url = Url::parse("http://localhost/one/two/three").unwrap(); let response = FeroxResponse { url, - original_url: String::new(), - status: Default::default(), - method: Default::default(), - text: "".to_string(), - content_length: 0, - line_count: 0, - word_count: 0, - headers: Default::default(), - wildcard: false, - output_level: Default::default(), + ..Default::default() }; let result = response.reached_max_depth(0, 2, handles); diff --git a/src/scan_manager/scan.rs b/src/scan_manager/scan.rs index 42c06bc..289a941 100644 --- a/src/scan_manager/scan.rs +++ b/src/scan_manager/scan.rs @@ -33,7 +33,7 @@ pub struct FeroxScan { pub(super) url: String, /// The type of scan - pub(super) scan_type: ScanType, + pub scan_type: ScanType, /// The order in which the scan was received pub(crate) scan_order: ScanOrder, @@ -41,8 +41,8 @@ pub struct FeroxScan { /// Number of requests to populate the progress bar with pub(super) num_requests: u64, - /// Status of this scan - pub(super) status: Mutex<ScanStatus>, + /// Status of this scan + pub status: Mutex<ScanStatus>, /// The spawned tokio task performing this scan (uses tokio::sync::Mutex) pub(super) task: sync::Mutex<Option<JoinHandle<()>>>, diff --git a/src/scan_manager/scan_container.rs b/src/scan_manager/scan_container.rs index ab1124e..8c6170c 100644 --- a/src/scan_manager/scan_container.rs +++ b/src/scan_manager/scan_container.rs @@ -7,12 +7,13 @@ use crate::{ scan_manager::{MenuCmd, MenuCmdResult}, scanner::RESPONSES, traits::FeroxSerialize, - SLEEP_DURATION, + DEFAULT_IGNORED_EXTENSIONS, SLEEP_DURATION, }; use anyhow::Result; use reqwest::StatusCode; use serde::{ser::SerializeSeq, Serialize, Serializer}; use std::{ + collections::HashSet, convert::TryInto, fs::File, io::BufReader, @@ -47,6 +48,9 @@ pub struct FeroxScans { /// whether or not the user passed --silent|--quiet on the command line output_level: OutputLevel, + + /// vector of extensions discovered and collected during scans + pub(crate) collected_extensions: RwLock<HashSet<String>>, } /// Serialize implementation for FeroxScans @@ -58,17 +62,20 @@ impl Serialize for FeroxScans { where S: Serializer, { - if let Ok(scans) = self.scans.read() { - let mut seq = serializer.serialize_seq(Some(scans.len()))?; - for scan in scans.iter() { - seq.serialize_element(&*scan).unwrap_or_default(); - } + match self.scans.read() { + Ok(scans) => { + let mut seq = serializer.serialize_seq(Some(scans.len() + 1))?; - seq.end() - } else { - // if for some reason we can't unlock the RwLock, just write an empty list - let seq = serializer.serialize_seq(Some(0))?; - seq.end() + for scan in scans.iter() { + seq.serialize_element(&*scan).unwrap_or_default(); + } + seq.end() + } + Err(_) => { + // if for some reason we can't unlock the RwLock, just write an empty list + let seq = serializer.serialize_seq(Some(0))?; + seq.end() + } } } } @@ -109,7 +116,7 @@ impl FeroxScans { sentry } - /// load serialized FeroxScan(s) into this FeroxScans + /// load serialized FeroxScan(s) and any previously collected extensions into this FeroxScans pub fn add_serialized_scans(&self, filename: &str) -> Result<()> { log::trace!("enter: add_serialized_scans({})", filename); let file = File::open(filename)?; @@ -122,18 +129,31 @@ impl FeroxScans { for scan in arr_scans { let mut deser_scan: FeroxScan = serde_json::from_value(scan.clone()).unwrap_or_default(); + // FeroxScans gets -q value from config as usual; the FeroxScans themselves // rely on that value being passed in. If the user starts a scan without -q // and resumes the scan but adds -q, FeroxScan will not have the proper value // without the line below deser_scan.output_level = self.output_level; - log::debug!("added: {}", deser_scan); self.insert(Arc::new(deser_scan)); } } } + if let Some(extensions) = state.get("collected_extensions") { + if let Some(arr_exts) = extensions.as_array() { + if let Ok(mut guard) = self.collected_extensions.write() { + for ext in arr_exts { + let deser_ext: String = + serde_json::from_value(ext.clone()).unwrap_or_default(); + + guard.insert(deser_ext); + } + } + } + } + log::trace!("exit: add_serialized_scans"); Ok(()) } @@ -163,8 +183,8 @@ impl FeroxScans { None } - pub(super) fn get_base_scan_by_url(&self, url: &str) -> Option<Arc<FeroxScan>> { - log::trace!("enter: get_sub_paths_from_path({})", url); + pub fn get_base_scan_by_url(&self, url: &str) -> Option<Arc<FeroxScan>> { + log::trace!("enter: get_base_scan_by_url({})", url); // rmatch_indices returns tuples in index, match form, i.e. (10, "/") // with the furthest-right match in the first position in the vector @@ -188,14 +208,14 @@ impl FeroxScans { for scan in guard.iter() { let slice = url.index(0..*idx); if slice == scan.url || format!("{}/", slice).as_str() == scan.url { - log::trace!("enter: get_sub_paths_from_path -> {}", scan); + log::trace!("enter: get_base_scan_by_url -> {}", scan); return Some(scan.clone()); } } } } - log::trace!("enter: get_sub_paths_from_path -> None"); + log::trace!("enter: get_base_scan_by_url -> None"); None } /// add one to either 403 or 429 tracker in the scan related to the given url @@ -511,4 +531,31 @@ impl FeroxScans { } scans } + + /// given an extension, add it to `collected_extensions` if all constraints are met + /// returns `true` if an extension was added, `false` otherwise + pub fn add_discovered_extension(&self, extension: String) -> bool { + log::trace!("enter: add_discovered_extension({})", extension); + let mut extension_added = false; + + // note: the filter by --dont-collect happens in the event handler, since it has access + // to a Handles object form which it can check the config value. additionally, the check + // against --extensions is performed there for the same reason + + if let Ok(extensions) = self.collected_extensions.read() { + // quicker to allow most to read and return and then reopen for write if necessary + if extensions.contains(&extension) { + return extension_added; + } + } + + if let Ok(mut extensions) = self.collected_extensions.write() { + log::info!("discovered new extension: {}", extension); + extensions.insert(extension); + extension_added = true; + } + + log::trace!("exit: add_discovered_extension -> {}", extension_added); + extension_added + } } diff --git a/src/scan_manager/state.rs b/src/scan_manager/state.rs index ed883bd..28e7ca7 100644 --- a/src/scan_manager/state.rs +++ b/src/scan_manager/state.rs @@ -2,6 +2,7 @@ use super::*; use crate::{config::Configuration, statistics::Stats, traits::FeroxSerialize, utils::fmt_err}; use anyhow::{Context, Result}; use serde::Serialize; +use std::collections::HashSet; use std::sync::Arc; /// Data container for (de)?serialization of multiple items @@ -18,6 +19,9 @@ pub struct FeroxState { /// Gathered statistics statistics: Arc<Stats>, + + /// collected extensions + collected_extensions: HashSet<String>, } /// implementation of FeroxState @@ -29,11 +33,17 @@ impl FeroxState { responses: &'static FeroxResponses, statistics: Arc<Stats>, ) -> Self { + let collected_extensions = match scans.collected_extensions.read() { + Ok(extensions) => extensions.clone(), + Err(_) => HashSet::new(), + }; + Self { scans, config, responses, statistics, + collected_extensions, } } } diff --git a/src/scan_manager/tests.rs b/src/scan_manager/tests.rs index c14b3cd..1dc3de7 100644 --- a/src/scan_manager/tests.rs +++ b/src/scan_manager/tests.rs @@ -352,6 +352,16 @@ fn feroxstates_feroxserialize_implementation() { let ferox_scans = FeroxScans::default(); let saved_id = ferox_scan.id.clone(); ferox_scans.insert(ferox_scan); + ferox_scans + .collected_extensions + .write() + .unwrap() + .insert(String::from("cgi")); + ferox_scans + .collected_extensions + .write() + .unwrap() + .insert(String::from("php")); let config = Configuration::new().unwrap(); let stats = Arc::new(Stats::new(config.json)); @@ -372,12 +382,17 @@ fn feroxstates_feroxserialize_implementation() { .and(predicate::str::contains("responses: FeroxResponses")) .and(predicate::str::contains("nerdcore.com")) .and(predicate::str::contains("/css")) - .and(predicate::str::contains("https://spiritanimal.com")), + .and(predicate::str::contains("https://spiritanimal.com")) + .and(predicate::str::contains("cgi")) + .and(predicate::str::contains("php")), ); assert!(expected_strs.eval(&ferox_state.as_str())); let json_state = ferox_state.as_json().unwrap(); + + println!("{}", json_state); // for debugging, if the test fails, can see what's going on + for expected in [ r#""scans""#, &format!(r#""id":"{}""#, saved_id), @@ -445,6 +460,9 @@ fn feroxstates_feroxserialize_implementation() { r#""word_count":16"#, r#""headers""#, r#""server":"nginx/1.16.1"#, + r#""collect_extensions":false"#, + r#""dont_collect":["tif","tiff","ico","cur","bmp","webp","svg","png","jpg","jpeg","jfif","gif","avif","apng","pjpeg","pjp","mov","wav","mpg","mpeg","mp3","mp4","m4a","m4p","m4v","ogg","webm","ogv","oga","flac","aac","3gp","css","zip","xls","xml","gz","tgz"]"#, + r#""collected_extensions":["cgi","php"]"#, ] .iter() { diff --git a/src/scanner/ferox_scanner.rs b/src/scanner/ferox_scanner.rs index d7449ed..55225c9 100644 --- a/src/scanner/ferox_scanner.rs +++ b/src/scanner/ferox_scanner.rs @@ -75,29 +75,34 @@ impl FeroxScanner { log::info!("Starting scan against: {}", self.target_url); let mut scan_timer = Instant::now(); - let mut dirlist_flag = false; + let mut dirlist_type = None; if self.handles.config.extract_links { // parse html for links (i.e. web scraping) - let extractor = ExtractorBuilder::default() + let mut extractor = ExtractorBuilder::default() .target(ExtractionTarget::ParseHtml) .url(&self.target_url) .handles(self.handles.clone()) .build()?; - let extract_out = extractor.extract().await?; - let links = extract_out.0; - dirlist_flag = extract_out.1; - extractor.request_links(links).await?; + + let result = extractor.extract().await?; + + if result.dir_list_type.is_some() { + dirlist_type = result.dir_list_type; + } + + extractor.request_links(result.found_links).await?; if matches!(self.order, ScanOrder::Initial) { // check for robots.txt (cannot be in subdirs) - let extractor = ExtractorBuilder::default() + let mut extractor = ExtractorBuilder::default() .target(ExtractionTarget::RobotsTxt) .url(&self.target_url) .handles(self.handles.clone()) .build()?; - let links = (extractor.extract().await?).0; - extractor.request_links(links).await?; + + let result = extractor.extract().await?; + extractor.request_links(result.found_links).await?; } } @@ -119,7 +124,7 @@ impl FeroxScanner { let progress_bar = ferox_scan.progress_bar(); // Directory listing heuristic detection to not continue scanning - if dirlist_flag { + if dirlist_type.is_some() { log::trace!("exit: scan_url -> Directory listing heuristic"); self.handles.stats.send(AddToF64Field( @@ -164,8 +169,6 @@ impl FeroxScanner { } let requester = Arc::new(Requester::from(self, ferox_scan.clone())?); - let increment_len = - ((self.handles.config.extensions.len() + 1) * self.handles.config.methods.len()) as u64; // producer tasks (mp of mpsc); responsible for making requests let producers = stream::iter(looping_words.deref().to_owned()) @@ -217,6 +220,7 @@ impl FeroxScanner { .for_each_concurrent(self.handles.config.threads, |(resp, bar)| async move { match resp.await { Ok(_) => { + let increment_len = self.handles.expected_num_requests_multiplier() as u64; bar.inc(increment_len); } Err(e) => { diff --git a/src/scanner/init.rs b/src/scanner/init.rs index b50a943..bee4c06 100644 --- a/src/scanner/init.rs +++ b/src/scanner/init.rs @@ -11,9 +11,7 @@ pub async fn initialize(num_words: usize, handles: Arc<Handles>) -> Result<()> { log::trace!("enter: initialize({}, {:?})", num_words, handles); // number of requests only needs to be calculated once, and then can be reused - let num_reqs_expected: u64 = - (num_words * (handles.config.extensions.len() + 1) * (handles.config.methods.len())) - .try_into()?; + let num_reqs_expected: u64 = handles.expected_num_requests_per_dir().try_into()?; { // no real reason to keep the arc around beyond this call diff --git a/src/scanner/requester.rs b/src/scanner/requester.rs index f19767b..53411ec 100644 --- a/src/scanner/requester.rs +++ b/src/scanner/requester.rs @@ -14,12 +14,13 @@ use crate::{ atomic_load, atomic_store, config::RequesterPolicy, event_handlers::{ - Command::{self, AddError, SubtractFromUsizeField}, + Command::{self, AddDiscoveredExtension, AddError, SubtractFromUsizeField}, Handles, }, extractor::{ExtractionTarget, ExtractorBuilder}, response::FeroxResponse, scan_manager::{FeroxScan, ScanStatus}, + scanner::RESPONSES, statistics::{StatError::Other, StatField::TotalExpected}, url::FeroxUrl, utils::logged_request, @@ -27,7 +28,7 @@ use crate::{ }; use super::{policy_data::PolicyData, FeroxScanner, PolicyTrigger}; -use crate::utils::should_deny_url; +use crate::utils::{should_deny_url, should_read_body}; use std::collections::HashSet; /// Makes multiple requests based on the presence of extensions @@ -303,8 +304,10 @@ impl Requester { pub async fn request(&self, word: &str) -> Result<()> { log::trace!("enter: request({})", word); - let urls = - FeroxUrl::from_string(&self.target_url, self.handles.clone()).formatted_urls(word)?; + let collected = self.handles.collected_extensions(); + + let urls = FeroxUrl::from_string(&self.target_url, self.handles.clone()) + .formatted_urls(word, collected)?; let should_test_deny = !self.handles.config.url_denylist.is_empty() || !self.handles.config.regex_denylist.is_empty(); @@ -336,6 +339,7 @@ impl Requester { method.as_str(), Some(self.handles.config.data.as_slice()), self.handles.clone(), + None, ) .await?; @@ -361,11 +365,11 @@ impl Requester { } // response came back without error, convert it to FeroxResponse - let ferox_response = FeroxResponse::from( + let mut ferox_response = FeroxResponse::from( response, &self.target_url, method, - true, + true, // lines/words never gets populated without true self.handles.config.output_level, ) .await; @@ -392,20 +396,30 @@ impl Requester { continue; } + if self.handles.config.collect_extensions { + ferox_response.parse_extension(self.handles.clone())?; + } + if self.handles.config.extract_links && !ferox_response.status().is_redirection() { - let extractor = ExtractorBuilder::default() + let mut extractor = ExtractorBuilder::default() .target(ExtractionTarget::ResponseBody) .response(&ferox_response) .handles(self.handles.clone()) .build()?; + let new_links: HashSet<_>; - let extracted = (extractor.extract().await?).0; + + let result = extractor.extract().await?; { // gain and quickly drop the read lock on seen_links, using it while unlocked // to determine if there are any new links to process let read_links = self.seen_links.read().await; - new_links = extracted.difference(&read_links).cloned().collect(); + new_links = result + .found_links + .difference(&read_links) + .cloned() + .collect(); } if !new_links.is_empty() { @@ -417,7 +431,9 @@ impl Requester { } } - extractor.request_links(new_links).await?; + if !new_links.is_empty() { + extractor.request_links(new_links).await?; + } } // everything else should be reported diff --git a/src/statistics/container.rs b/src/statistics/container.rs index 5ae3de0..780e517 100644 --- a/src/statistics/container.rs +++ b/src/statistics/container.rs @@ -69,6 +69,10 @@ pub struct Stats { /// response bodies and robots.txt as of v1.11.0 links_extracted: AtomicUsize, + /// tracker for number of extensions discovered when `--collect-extensions` is used; sources + /// are response bodies + extensions_collected: AtomicUsize, + /// tracker for overall number of 200s seen by the client status_200s: AtomicUsize, @@ -166,6 +170,10 @@ impl Serialize for Stats { state.serialize_field("total_scans", &atomic_load!(self.total_scans))?; state.serialize_field("initial_targets", &atomic_load!(self.initial_targets))?; state.serialize_field("links_extracted", &atomic_load!(self.links_extracted))?; + state.serialize_field( + "extensions_collected", + &atomic_load!(self.extensions_collected), + )?; state.serialize_field("status_200s", &atomic_load!(self.status_200s))?; state.serialize_field("status_301s", &atomic_load!(self.status_301s))?; state.serialize_field("status_302s", &atomic_load!(self.status_302s))?; @@ -290,6 +298,13 @@ impl<'a> Deserialize<'a> for Stats { } } } + "extensions_collected" => { + if let Some(num) = value.as_u64() { + if let Ok(parsed) = usize::try_from(num) { + atomic_increment!(stats.extensions_collected, parsed); + } + } + } "status_200s" => { if let Some(num) = value.as_u64() { if let Ok(parsed) = usize::try_from(num) { @@ -628,6 +643,9 @@ impl Stats { StatField::LinksExtracted => { atomic_increment!(self.links_extracted, value); } + StatField::ExtensionsCollected => { + atomic_increment!(self.extensions_collected, value); + } StatField::WildcardsFiltered => { atomic_increment!(self.wildcards_filtered, value); atomic_increment!(self.responses_filtered, value); @@ -664,6 +682,10 @@ impl Stats { atomic_increment!(self.client_errors, atomic_load!(d_stats.client_errors)); atomic_increment!(self.server_errors, atomic_load!(d_stats.server_errors)); atomic_increment!(self.links_extracted, atomic_load!(d_stats.links_extracted)); + atomic_increment!( + self.extensions_collected, + atomic_load!(d_stats.extensions_collected) + ); atomic_increment!(self.status_200s, atomic_load!(d_stats.status_200s)); atomic_increment!(self.status_301s, atomic_load!(d_stats.status_301s)); atomic_increment!(self.status_302s, atomic_load!(d_stats.status_302s)); @@ -834,7 +856,7 @@ mod tests { #[test] /// Stats::merge_from should properly increment expected fields and ignore others fn stats_merge_from_alters_correct_fields() { - let contents = r#"{"statistics":{"type":"statistics","timeouts":1,"requests":9207,"expected_per_scan":707,"total_expected":9191,"errors":3,"successes":720,"redirects":13,"client_errors":8474,"server_errors":2,"total_scans":13,"initial_targets":1,"links_extracted":51,"status_403s":3,"status_200s":720,"status_301s":12,"status_302s":1,"status_401s":4,"status_429s":2,"status_500s":5,"status_503s":9,"status_504s":6,"status_508s":7,"wildcards_filtered":707,"responses_filtered":707,"resources_discovered":27,"directory_scan_times":[2.211973078,1.989015505,1.898675839,3.9714468910000003,4.938152838,5.256073528,6.021986595,6.065740734,6.42633762,7.095142125,7.336982137,5.319785619,4.843649778],"total_runtime":[11.556575456000001],"url_format_errors":17,"redirection_errors":12,"connection_errors":21,"request_errors":4}}"#; + let contents = r#"{"statistics":{"type":"statistics","timeouts":1,"requests":9207,"expected_per_scan":707,"total_expected":9191,"errors":3,"successes":720,"redirects":13,"client_errors":8474,"server_errors":2,"total_scans":13,"initial_targets":1,"links_extracted":51,"extensions_collected":4,"status_403s":3,"status_200s":720,"status_301s":12,"status_302s":1,"status_401s":4,"status_429s":2,"status_500s":5,"status_503s":9,"status_504s":6,"status_508s":7,"wildcards_filtered":707,"responses_filtered":707,"resources_discovered":27,"directory_scan_times":[2.211973078,1.989015505,1.898675839,3.9714468910000003,4.938152838,5.256073528,6.021986595,6.065740734,6.42633762,7.095142125,7.336982137,5.319785619,4.843649778],"total_runtime":[11.556575456000001],"url_format_errors":17,"redirection_errors":12,"connection_errors":21,"request_errors":4}}"#; let config = Configuration::new().unwrap(); let stats = Stats::new(config.json); @@ -857,6 +879,7 @@ mod tests { assert_eq!(atomic_load!(stats.total_scans), 0); // not updated in merge_from assert_eq!(atomic_load!(stats.initial_targets), 0); // not updated in merge_from assert_eq!(atomic_load!(stats.links_extracted), 51); + assert_eq!(atomic_load!(stats.extensions_collected), 4); assert_eq!(atomic_load!(stats.status_200s), 720); assert_eq!(atomic_load!(stats.status_301s), 12); assert_eq!(atomic_load!(stats.status_302s), 1); diff --git a/src/statistics/field.rs b/src/statistics/field.rs index dab6954..80ff3b3 100644 --- a/src/statistics/field.rs +++ b/src/statistics/field.rs @@ -13,6 +13,9 @@ pub enum StatField { /// Translates to `links_extracted` LinksExtracted, + /// Translates to `extensions_collected` + ExtensionsCollected, + /// Translates to `total_expected` TotalExpected, diff --git a/src/url.rs b/src/url.rs index 9e522ab..337bb5d 100644 --- a/src/url.rs +++ b/src/url.rs @@ -1,6 +1,7 @@ use crate::{event_handlers::Handles, statistics::StatError::UrlFormat, Command::AddError}; use anyhow::{anyhow, bail, Result}; use reqwest::Url; +use std::collections::HashSet; use std::{convert::TryInto, fmt, sync::Arc}; /// abstraction around target urls; collects all Url related shenanigans in one place @@ -37,7 +38,11 @@ impl FeroxUrl { /// /// If any extensions were passed to the program, each extension will add a /// (base_url + word + ext) Url to the vector - pub fn formatted_urls(&self, word: &str) -> Result<Vec<Url>> { + pub fn formatted_urls( + &self, + word: &str, + collected_extensions: HashSet<String>, + ) -> Result<Vec<Url>> { log::trace!("enter: formatted_urls({})", word); let mut urls = vec![]; @@ -54,7 +59,13 @@ impl FeroxUrl { Err(_) => self.handles.stats.send(AddError(UrlFormat))?, } - for ext in self.handles.config.extensions.iter() { + for ext in self + .handles + .config + .extensions + .iter() + .chain(collected_extensions.iter()) + { match self.format(word, Some(ext)) { // any extensions passed in Ok(url) => urls.push(url), diff --git a/src/utils.rs b/src/utils.rs index aa97907..15264f3 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -100,8 +100,9 @@ pub async fn logged_request( method: &str, data: Option<&[u8]>, handles: Arc<Handles>, + client: Option<&Client>, ) -> Result<Response> { - let client = &handles.config.client; + let client = client.unwrap_or(&handles.config.client); let level = handles.config.output_level; let tx_stats = handles.stats.tx.clone(); @@ -490,6 +491,20 @@ pub fn slugify_filename(url: &str, prefix: &str, suffix: &str) -> String { filename } +/// check for runtime options that necessitate reading the response body +pub fn should_read_body(config: &Configuration) -> bool { + log::trace!("enter: should_read_body(running config...)"); + + let result = config.extract_links + || !config.filter_line_count.is_empty() + || !config.filter_word_count.is_empty() + || !config.filter_regex.is_empty() + || !config.filter_similar.is_empty(); + + log::trace!("exit: should_read_body -> {}", result); + result +} + #[cfg(test)] mod tests { use super::*; diff --git a/tests/test_banner.rs b/tests/test_banner.rs index 980f346..a1d3879 100644 --- a/tests/test_banner.rs +++ b/tests/test_banner.rs @@ -1091,3 +1091,60 @@ fn banner_prints_data() { .and(predicate::str::contains("─┴─")), ); } + +#[test] +/// test allows non-existent wordlist to trigger the banner printing to stderr +/// expect to see all mandatory prints + data body +fn banner_prints_collect_extensions_and_dont_collect_default() { + Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg("http://localhost") + .arg("-c") + .assert() + .success() + .stderr( + predicate::str::contains("─┬─") + .and(predicate::str::contains("Target Url")) + .and(predicate::str::contains("http://localhost")) + .and(predicate::str::contains("Threads")) + .and(predicate::str::contains("Wordlist")) + .and(predicate::str::contains("Status Codes")) + .and(predicate::str::contains("Timeout (secs)")) + .and(predicate::str::contains("User-Agent")) + .and(predicate::str::contains("Collect Extensions")) + .and(predicate::str::contains("Ignored Extensions")) + .and(predicate::str::contains("Images, Movies, Audio, etc...")) + .and(predicate::str::contains("─┴─")), + ); +} + +#[test] +/// test allows non-existent wordlist to trigger the banner printing to stderr +/// expect to see all mandatory prints + data body +fn banner_prints_collect_extensions_and_dont_collect_with_input() { + Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg("http://localhost") + .arg("-c") + .arg("--dont-collect") + .arg("pdf") + .arg("xps") + .assert() + .success() + .stderr( + predicate::str::contains("─┬─") + .and(predicate::str::contains("Target Url")) + .and(predicate::str::contains("http://localhost")) + .and(predicate::str::contains("Threads")) + .and(predicate::str::contains("Wordlist")) + .and(predicate::str::contains("Status Codes")) + .and(predicate::str::contains("Timeout (secs)")) + .and(predicate::str::contains("User-Agent")) + .and(predicate::str::contains("Collect Extensions")) + .and(predicate::str::contains("Ignored Extensions")) + .and(predicate::str::contains("[pdf, xps]")) + .and(predicate::str::contains("─┴─")), + ); +} From 7194326cd1143d21f45a4c5e395077a8c6de3345 Mon Sep 17 00:00:00 2001 From: epi <epibar052@gmail.com> Date: Sun, 13 Feb 2022 13:20:13 -0600 Subject: [PATCH 12/40] moved dirlist detection to heuristics/fixed initial extract async issue --- src/event_handlers/container.rs | 9 +- src/event_handlers/scans.rs | 1 - src/extractor/builder.rs | 7 +- src/extractor/container.rs | 177 +++++++---------------------- src/extractor/tests.rs | 8 +- src/heuristics.rs | 117 +++++++++++++++++++ src/lib.rs | 5 + src/main.rs | 6 +- src/scan_manager/scan.rs | 2 +- src/scan_manager/scan_container.rs | 2 +- src/scanner/ferox_scanner.rs | 104 +++++++++-------- src/scanner/requester.rs | 18 ++- src/url.rs | 8 +- 13 files changed, 251 insertions(+), 213 deletions(-) diff --git a/src/event_handlers/container.rs b/src/event_handlers/container.rs index 4e24714..37cbd8e 100644 --- a/src/event_handlers/container.rs +++ b/src/event_handlers/container.rs @@ -93,7 +93,14 @@ impl Handles { let terminal_handle = TermOutHandle::new(tx.clone(), tx.clone()); let stats_handle = StatsHandle::new(Arc::new(Stats::new(configuration.json)), tx.clone()); let filters_handle = FiltersHandle::new(Arc::new(FeroxFilters::default()), tx.clone()); - let handles = Self::new(stats_handle, filters_handle, terminal_handle, configuration); + let wordlist = Arc::new(vec![String::from("this_is_a_test")]); + let handles = Self::new( + stats_handle, + filters_handle, + terminal_handle, + configuration, + wordlist, + ); if let Some(sh) = scanned_urls { let scan_handle = ScanHandle::new(sh, tx); handles.set_scan_handle(scan_handle); diff --git a/src/event_handlers/scans.rs b/src/event_handlers/scans.rs index b8030b3..a1705c3 100644 --- a/src/event_handlers/scans.rs +++ b/src/event_handlers/scans.rs @@ -15,7 +15,6 @@ use crate::{ use super::command::Command::AddToUsizeField; use super::*; -use crate::scan_manager::{ScanStatus, ScanType}; use crate::statistics::StatField; use reqwest::Url; use tokio::time::Duration; diff --git a/src/extractor/builder.rs b/src/extractor/builder.rs index 8cc4c12..6349607 100644 --- a/src/extractor/builder.rs +++ b/src/extractor/builder.rs @@ -16,14 +16,14 @@ pub(super) const ROBOTS_TXT_REGEX: &str = /// Which type of extraction should be performed #[derive(Debug, Copy, Clone)] pub enum ExtractionTarget { - /// Examine a response body and extract links + /// Examine a response body and extract javascript and html links (multiple tags) ResponseBody, /// Examine robots.txt (specifically) and extract links RobotsTxt, - // Parse HTML and extract links - ParseHtml, + /// Extract all <a> tags from a page + DirectoryListing, } /// responsible for building an `Extractor` @@ -98,7 +98,6 @@ impl<'a> ExtractorBuilder<'a> { url: self.url.to_owned(), handles: self.handles.as_ref().unwrap().clone(), target: self.target, - num_collected: 0, }) } } diff --git a/src/extractor/container.rs b/src/extractor/container.rs index 25168c3..02b767d 100644 --- a/src/extractor/container.rs +++ b/src/extractor/container.rs @@ -1,5 +1,4 @@ use super::*; -use crate::utils::{should_deny_url, should_read_body}; use crate::{ client, event_handlers::{ @@ -13,13 +12,12 @@ use crate::{ StatField::{LinksExtracted, TotalExpected}, }, url::FeroxUrl, - utils::{logged_request, make_request}, - DEFAULT_METHOD, + utils::{logged_request, make_request, should_deny_url, should_read_body}, + ExtractionResult, DEFAULT_METHOD, }; use anyhow::{bail, Context, Result}; use reqwest::{Client, StatusCode, Url}; use scraper::{Html, Selector}; -use std::borrow::BorrowMut; use std::collections::HashSet; use tokio::sync::oneshot; @@ -33,33 +31,6 @@ enum RecursionStatus { NotRecursive, } -/// enum representing the different servers that `parse_html` can detect when directory listing is -/// enabled -#[derive(Copy, Debug, Clone)] -pub enum DirListingType { - /// apache server, detected by `Index of /` - Apache, - - /// tomcat/python server, detected by `Directory Listing for /` - TomCat_Python, - - /// ASP.NET server, detected by `Directory Listing -- /` - AspDotNet, - // /// IIS/Azure server, detected by `HOST_NAME - /` (not currently used) - // IIS_AZURE, -} - -/// Wrapper around the results of performing any kind of extraction against a target web page -#[derive(Debug, Default, Clone)] -pub struct ExtractionResult { - /// links extracted from the visited page - pub found_links: HashSet<String>, - - /// type of server where directory listing was detected - /// i.e. https://portswigger.net/kb/issues/00600100_directory-listing - pub dir_list_type: Option<DirListingType>, -} - /// Handles all logic related to extracting links from requested source code #[derive(Debug)] pub struct Extractor<'a> { @@ -80,9 +51,6 @@ pub struct Extractor<'a> { /// type of extraction to be performed pub(super) target: ExtractionTarget, - - /// current number of collected extensions, used for updating the base scan's progress bar - pub(super) num_collected: usize, } /// Extractor implementation @@ -94,10 +62,9 @@ impl<'a> Extractor<'a> { self.target ); match self.target { - ExtractionTarget::ResponseBody | ExtractionTarget::ParseHtml => { - Ok(self.extract_from_body().await?) - } + ExtractionTarget::ResponseBody => Ok(self.extract_from_body().await?), ExtractionTarget::RobotsTxt => Ok(self.extract_from_robots().await?), + ExtractionTarget::DirectoryListing => Ok(self.extract_from_dir_listing().await?), } } @@ -167,10 +134,7 @@ impl<'a> Extractor<'a> { }; let scanned_urls = self.handles.ferox_scans()?; - log::warn!("links: {:?}", links); self.update_stats(links.len())?; - // todo: get it to where parsehtml->request_links doesn't run in a single thread/block asyncness - // todo: move the parse_html into heuristics (maybe) and then move the check/dirlist stuff lower in scan_url (may not work, but look) for link in links { let mut resp = match self.request_link(&link).await { @@ -240,15 +204,15 @@ impl<'a> Extractor<'a> { links: &mut HashSet<String>, html: &Html, ) { - self.extract_links_by_attr(resp_url, links, &html, "a", "href"); - self.extract_links_by_attr(resp_url, links, &html, "img", "src"); - self.extract_links_by_attr(resp_url, links, &html, "form", "action"); - self.extract_links_by_attr(resp_url, links, &html, "script", "src"); - self.extract_links_by_attr(resp_url, links, &html, "iframe", "src"); - self.extract_links_by_attr(resp_url, links, &html, "div", "src"); - self.extract_links_by_attr(resp_url, links, &html, "frame", "src"); - self.extract_links_by_attr(resp_url, links, &html, "embed", "src"); - self.extract_links_by_attr(resp_url, links, &html, "script", "src"); + self.extract_links_by_attr(resp_url, links, html, "a", "href"); + self.extract_links_by_attr(resp_url, links, html, "img", "src"); + self.extract_links_by_attr(resp_url, links, html, "form", "action"); + self.extract_links_by_attr(resp_url, links, html, "script", "src"); + self.extract_links_by_attr(resp_url, links, html, "iframe", "src"); + self.extract_links_by_attr(resp_url, links, html, "div", "src"); + self.extract_links_by_attr(resp_url, links, html, "frame", "src"); + self.extract_links_by_attr(resp_url, links, html, "embed", "src"); + self.extract_links_by_attr(resp_url, links, html, "script", "src"); } /// Given the body of a `reqwest::Response`, perform the following actions @@ -353,7 +317,7 @@ impl<'a> Extractor<'a> { paths } - /// simple helper to stay DRY, trys to join a url + fragment and add it to the `links` HashSet + /// simple helper to stay DRY, tries to join a url + fragment and add it to the `links` HashSet pub(super) fn add_link_to_set_of_links( &self, link: &str, @@ -362,15 +326,15 @@ impl<'a> Extractor<'a> { log::trace!("enter: add_link_to_set_of_links({}, {:?})", link, links); let old_url = match self.target { - ExtractionTarget::ResponseBody => self.response.unwrap().url().clone(), - ExtractionTarget::ParseHtml | ExtractionTarget::RobotsTxt => { - match Url::parse(&self.url) { - Ok(u) => u, - Err(e) => { - bail!("Could not parse {}: {}", self.url, e); - } - } + ExtractionTarget::ResponseBody | ExtractionTarget::DirectoryListing => { + self.response.unwrap().url().clone() } + ExtractionTarget::RobotsTxt => match Url::parse(&self.url) { + Ok(u) => u, + Err(e) => { + bail!("Could not parse {}: {}", self.url, e); + } + }, }; let new_url = old_url @@ -446,7 +410,7 @@ impl<'a> Extractor<'a> { pub(super) async fn extract_from_robots(&self) -> Result<ExtractionResult> { log::trace!("enter: extract_robots_txt"); - let mut links: HashSet<String> = HashSet::new(); + let mut result: HashSet<_> = ExtractionResult::new(); // request let response = self.make_extract_request("/robots.txt").await?; @@ -456,17 +420,12 @@ impl<'a> Extractor<'a> { if let Some(new_path) = capture.name("url_path") { let mut new_url = Url::parse(&self.url)?; new_url.set_path(new_path.as_str()); - if self.add_all_sub_paths(new_url.path(), &mut links).is_err() { - log::warn!("could not add sub-paths from {} to {:?}", new_url, links); + if self.add_all_sub_paths(new_url.path(), &mut result).is_err() { + log::warn!("could not add sub-paths from {} to {:?}", new_url, result); } } } - let result = ExtractionResult { - found_links: links, - dir_list_type: None, - }; - log::trace!("exit: extract_robots_txt -> {:?}", result); Ok(result) } @@ -480,90 +439,38 @@ impl<'a> Extractor<'a> { pub(super) async fn extract_from_body(&self) -> Result<ExtractionResult> { log::trace!("enter: extract_from_body"); - let mut result = ExtractionResult::default(); - - // need late binding here to avoid 'creates a temporary which is freed...' in the - // `let ... if` below because of self's FeroxResponse lifetime - let mut requested = FeroxResponse::default(); - - if self.response.is_none() { - // called as a ParseHtml target - let url = Url::parse(&self.url)?; - requested = self.make_extract_request(url.path()).await?; - } - - let response = if self.response.is_some() { - // called as a ResponseBody extraction - self.response.unwrap() - } else { - &requested - }; + let mut result = ExtractionResult::new(); + let response = self.response.unwrap(); let resp_url = response.url(); let body = response.text(); let html = Html::parse_document(body); - if matches!(self.target, ExtractionTarget::ParseHtml) { - // only check for directory listing when ParseHtml is the target, based on where - // in the codebase Extractor::extract() is called - let dirlist_type = self.detect_directory_listing(&html); - - if dirlist_type.is_some() { - log::debug!( - "Directory listing heuristic detected: {:?}", - dirlist_type.unwrap() - ); - - self.extract_links_by_attr(resp_url, &mut result.found_links, &html, "a", "href"); - - result.dir_list_type = dirlist_type; - - log::trace!("exit: extract_from_body -> {:?}", result); - return Ok(result); - } - } - // extract links from html tags/attributes and embedded javascript - self.extract_all_links_from_html_tags(resp_url, &mut result.found_links, &html); - self.extract_all_links_from_javascript(body, resp_url, &mut result.found_links); + self.extract_all_links_from_html_tags(resp_url, &mut result, &html); + self.extract_all_links_from_javascript(body, resp_url, &mut result); log::trace!("exit: extract_from_body -> {:?}", result); Ok(result) } - /// Directory listing heuristic detection, uses <title> tag to make its determination. When - /// the inner html of <title> matches one of the following, a `DirListingType` is returned. - /// - apache: `Index of /` - /// - tomcat/python: `Directory Listing for /` - /// - ASP.NET: `Directory Listing -- /` - /// - <host> - /: iis, azure, skipping due to loose heuristic - pub(super) fn detect_directory_listing(&self, html: &Html) -> Option<DirListingType> { - log::trace!("enter: detect_directory_listing(html body...)"); + /// parses html response bodies in search of <a> tags. + /// + /// the assumption is that directory listing is turned on and this extraction target simply + /// scoops up all the links for the given directory. The test to detect a directory listing + /// is located in `HeuristicTests` + pub async fn extract_from_dir_listing(&self) -> Result<ExtractionResult> { + log::trace!("enter: extract_from_dir_listing"); - let title_selector = Selector::parse("title").expect("couldn't parse title selector"); + let mut result = ExtractionResult::new(); - for t in html.select(&title_selector) { - let title = t.inner_html().to_lowercase(); + let response = self.response.unwrap(); + let html = Html::parse_document(response.text()); - let dirlist_type = if title.contains("directory listing for /") { - Some(DirListingType::TomCat_Python) - } else if title.contains("index of /") { - Some(DirListingType::Apache) - } else if title.contains("directory listing -- /") { - Some(DirListingType::AspDotNet) - } else { - // IIS_AZURE purposely skipped for now - None - }; + self.extract_links_by_attr(response.url(), &mut result, &html, "a", "href"); - if dirlist_type.is_some() { - log::trace!("exit: detect_directory_listing -> {:?}", dirlist_type); - return dirlist_type; - } - } - - log::trace!("exit: detect_directory_listing -> None"); - None + log::trace!("exit: extract_from_dir_listing -> {:?}", result); + Ok(result) } /// simple helper to get html links by tag/attribute and add it to the `links` HashSet diff --git a/src/extractor/tests.rs b/src/extractor/tests.rs index 8d9c972..2706916 100644 --- a/src/extractor/tests.rs +++ b/src/extractor/tests.rs @@ -21,7 +21,7 @@ lazy_static! { static ref BODY_EXT: Extractor<'static> = setup_extractor(ExtractionTarget::ResponseBody, Arc::new(FeroxScans::default())); /// Extractor for testing paring html - static ref PARSEHTML_EXT: Extractor<'static> = setup_extractor(ExtractionTarget::ParseHtml, Arc::new(FeroxScans::default())); + static ref PARSEHTML_EXT: Extractor<'static> = setup_extractor(ExtractionTarget::DirectoryListing, Arc::new(FeroxScans::default())); /// FeroxResponse for Extractor static ref RESPONSE: FeroxResponse = get_test_response(); @@ -45,9 +45,9 @@ fn setup_extractor(target: ExtractionTarget, scanned_urls: Arc<FeroxScans>) -> E ExtractionTarget::RobotsTxt => builder .url("http://localhost") .target(ExtractionTarget::RobotsTxt), - ExtractionTarget::ParseHtml => builder + ExtractionTarget::DirectoryListing => builder .url("http://localhost") - .target(ExtractionTarget::ParseHtml), + .target(ExtractionTarget::DirectoryListing), }; let config = Arc::new(Configuration::new().unwrap()); @@ -258,7 +258,7 @@ async fn extractor_get_links_with_absolute_url_that_differs_from_target_domain() handles: handles.clone(), }; - let links = (extractor.extract_from_body().await?).0; + let links = extractor.extract_from_body().await?; assert!(links.is_empty()); assert_eq!(mock.hits(), 1); diff --git a/src/heuristics.rs b/src/heuristics.rs index 9837f30..e999133 100644 --- a/src/heuristics.rs +++ b/src/heuristics.rs @@ -2,6 +2,7 @@ use std::sync::Arc; use anyhow::{bail, Result}; use console::style; +use scraper::{Html, Selector}; use uuid::Uuid; use crate::{ @@ -36,6 +37,36 @@ macro_rules! format_template { }; } +/// enum representing the different servers that `parse_html` can detect when directory listing is +/// enabled +#[derive(Copy, Debug, Clone)] +pub enum DirListingType { + /// apache server, detected by `Index of /` + Apache, + + /// tomcat/python server, detected by `Directory Listing for /` + TomCatOrPython, + + /// ASP.NET server, detected by `Directory Listing -- /` + AspDotNet, + + // /// IIS/Azure server, detected by `HOST_NAME - /` (not currently used) + // IIS_AZURE, + /// variant that represents the absence of directory listing + None, +} + +/// Wrapper around the results of running a directory listing detection against a target web page +#[derive(Debug, Clone)] +pub struct DirListingResult { + /// type of server where directory listing was detected + /// i.e. https://portswigger.net/kb/issues/00600100_directory-listing + pub dir_list_type: Option<DirListingType>, + + /// the `FeroxResponse` generated during detection + pub response: FeroxResponse, +} + /// container for heuristics related info pub struct HeuristicTests { /// Handles object for event handler interaction @@ -285,6 +316,92 @@ impl HeuristicTests { log::trace!("exit: connectivity_test -> {:?}", good_urls); Ok(good_urls) } + + /// heuristic designed to detect when a server has directory listing enabled + pub async fn directory_listing(&self, target_url: &str) -> Result<Option<DirListingResult>> { + log::trace!("enter: directory_listing({})", target_url); + + let tgt = if !target_url.ends_with('/') { + // if left unchanged, this function would be called against redirects that point to + // valid directories for most, if not all, directories beyond the initial urls. + // so, instead of `directory_listing("http://localhost") -> None` we get + // `directory_listing("http://localhost/") -> Some(DirListingResult)` if there is + // directory listing beyond the redirect + format!("{}/", target_url) + } else { + target_url.to_string() + }; + + let url = FeroxUrl::from_string(&tgt, self.handles.clone()); + let request = url.format("", None)?; + + let result = + logged_request(&request, DEFAULT_METHOD, None, self.handles.clone(), None).await?; + + let ferox_response = FeroxResponse::from( + result, + &url.target, + DEFAULT_METHOD, + true, + self.handles.config.output_level, + ) + .await; + + let body = ferox_response.text(); + let html = Html::parse_document(body); + + let dirlist_type = self.detect_directory_listing(&html); + + if dirlist_type.is_some() { + log::debug!("directory listing heuristic detected: {:?}", dirlist_type); + + let result = DirListingResult { + dir_list_type: dirlist_type, + response: ferox_response, + }; + + log::trace!("exit: directory_listing -> {:?}", result); + return Ok(Some(result)); + } + + log::trace!("exit: directory_listing -> None"); + Ok(None) + } + + /// Directory listing heuristic detection, uses <title> tag to make its determination. When + /// the inner html of <title> matches one of the following, a `DirListingType` is returned. + /// - apache: `Index of /` + /// - tomcat/python: `Directory Listing for /` + /// - ASP.NET: `Directory Listing -- /` + /// - <host> - /: iis, azure, skipping due to loose heuristic + fn detect_directory_listing(&self, html: &Html) -> Option<DirListingType> { + log::trace!("enter: detect_directory_listing(html body...)"); + + let title_selector = Selector::parse("title").expect("couldn't parse title selector"); + + for t in html.select(&title_selector) { + let title = t.inner_html().to_lowercase(); + + let dirlist_type = if title.contains("directory listing for /") { + Some(DirListingType::TomCatOrPython) + } else if title.contains("index of /") { + Some(DirListingType::Apache) + } else if title.contains("directory listing -- /") { + Some(DirListingType::AspDotNet) + } else { + // IIS_AZURE purposely skipped for now + None + }; + + if dirlist_type.is_some() { + log::trace!("exit: detect_directory_listing -> {:?}", dirlist_type); + return dirlist_type; + } + } + + log::trace!("exit: detect_directory_listing -> None"); + None + } } #[cfg(test)] diff --git a/src/lib.rs b/src/lib.rs index 87b0626..7a0061c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,9 @@ #![deny(clippy::all)] +#![allow(clippy::mutex_atomic)] // #![warn(clippy::pedantic, clippy::restriction, clippy::nursery, clippy::cargo)] use anyhow::Result; use reqwest::StatusCode; +use std::collections::HashSet; use tokio::{ sync::mpsc::{UnboundedReceiver, UnboundedSender}, task::JoinHandle, @@ -41,6 +43,9 @@ pub(crate) type Joiner = JoinHandle<Result<()>>; /// Generic mpsc::unbounded_channel type to tidy up some code pub(crate) type FeroxChannel<T> = (UnboundedSender<T>, UnboundedReceiver<T>); +/// Wrapper around the results of performing any kind of extraction against a target web page +pub(crate) type ExtractionResult = HashSet<String>; + /// Version pulled from Cargo.toml at compile time pub(crate) const VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/src/main.rs b/src/main.rs index 95cf66e..20ddba8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -49,7 +49,11 @@ fn get_unique_words_from_wordlist(path: &str) -> Result<Arc<Vec<String>>> { let reader = BufReader::new(file); - let mut words = Vec::new(); + // this empty string ensures that we call Requester::request with the base url, i.e. + // `http://localhost/` instead of going straight into `http://localhost/WORD.EXT`. + // for vanilla scans, it doesn't matter all that much, but it can be a significant difference + // when `-e` is used, depending on the content at the base url. + let mut words = vec![String::from("")]; for line in reader.lines() { line.map(|result| { diff --git a/src/scan_manager/scan.rs b/src/scan_manager/scan.rs index 289a941..e2ac1b9 100644 --- a/src/scan_manager/scan.rs +++ b/src/scan_manager/scan.rs @@ -41,7 +41,7 @@ pub struct FeroxScan { /// Number of requests to populate the progress bar with pub(super) num_requests: u64, - /// Status of this scan + /// Status of this scan pub status: Mutex<ScanStatus>, /// The spawned tokio task performing this scan (uses tokio::sync::Mutex) diff --git a/src/scan_manager/scan_container.rs b/src/scan_manager/scan_container.rs index 8c6170c..fea984d 100644 --- a/src/scan_manager/scan_container.rs +++ b/src/scan_manager/scan_container.rs @@ -7,7 +7,7 @@ use crate::{ scan_manager::{MenuCmd, MenuCmdResult}, scanner::RESPONSES, traits::FeroxSerialize, - DEFAULT_IGNORED_EXTENSIONS, SLEEP_DURATION, + SLEEP_DURATION, }; use anyhow::Result; use reqwest::StatusCode; diff --git a/src/scanner/ferox_scanner.rs b/src/scanner/ferox_scanner.rs index 55225c9..8320198 100644 --- a/src/scanner/ferox_scanner.rs +++ b/src/scanner/ferox_scanner.rs @@ -75,35 +75,18 @@ impl FeroxScanner { log::info!("Starting scan against: {}", self.target_url); let mut scan_timer = Instant::now(); - let mut dirlist_type = None; + // let mut dirlist_type = None; - if self.handles.config.extract_links { - // parse html for links (i.e. web scraping) + if self.handles.config.extract_links && matches!(self.order, ScanOrder::Initial) { + // check for robots.txt (cannot be in sub-directories, so limited to Initial) let mut extractor = ExtractorBuilder::default() - .target(ExtractionTarget::ParseHtml) + .target(ExtractionTarget::RobotsTxt) .url(&self.target_url) .handles(self.handles.clone()) .build()?; let result = extractor.extract().await?; - - if result.dir_list_type.is_some() { - dirlist_type = result.dir_list_type; - } - - extractor.request_links(result.found_links).await?; - - if matches!(self.order, ScanOrder::Initial) { - // check for robots.txt (cannot be in subdirs) - let mut extractor = ExtractorBuilder::default() - .target(ExtractionTarget::RobotsTxt) - .url(&self.target_url) - .handles(self.handles.clone()) - .build()?; - - let result = extractor.extract().await?; - extractor.request_links(result.found_links).await?; - } + extractor.request_links(result).await?; } let scanned_urls = self.handles.ferox_scans()?; @@ -123,51 +106,72 @@ impl FeroxScanner { let progress_bar = ferox_scan.progress_bar(); - // Directory listing heuristic detection to not continue scanning - if dirlist_type.is_some() { - log::trace!("exit: scan_url -> Directory listing heuristic"); - - self.handles.stats.send(AddToF64Field( - DirScanTimes, - scan_timer.elapsed().as_secs_f64(), - ))?; - - self.handles.stats.send(SubtractFromUsizeField( - TotalExpected, - progress_bar.length() as usize, - ))?; - - progress_bar.reset_eta(); - progress_bar.finish_with_message(&format!( - "=> {}", - style("Directory listing").blue().bright() - )); - - ferox_scan.finish()?; - - return Ok(()); - } - // When acquire is called and the semaphore has remaining permits, the function immediately // returns a permit. However, if no remaining permits are available, acquire (asynchronously) // waits until an outstanding permit is dropped, at which point, the freed permit is assigned // to the caller. let _permit = self.scan_limiter.acquire().await; + if self.handles.config.scan_limit > 0 { scan_timer = Instant::now(); progress_bar.reset(); } - // Arc clones to be passed around to the various scans - let looping_words = self.wordlist.clone(); - { + // heuristics test block let test = heuristics::HeuristicTests::new(self.handles.clone()); + if let Ok(num_reqs) = test.wildcard(&self.target_url).await { progress_bar.inc(num_reqs); } + + if let Ok(dirlist_result) = test.directory_listing(&self.target_url).await { + if dirlist_result.is_some() { + let dirlist_result = dirlist_result.unwrap(); + // at this point, we have a DirListingType, and it's not the None variant + // which means we found directory listing based on the heuristic; now we need + // to process the links that are available + // Directory listing heuristic detection to not continue scanning + + let mut extractor = ExtractorBuilder::default() + .response(&dirlist_result.response) + .target(ExtractionTarget::DirectoryListing) + .url(&self.target_url) + .handles(self.handles.clone()) + .build()?; + + let result = extractor.extract_from_dir_listing().await?; + + extractor.request_links(result).await?; + + log::trace!("exit: scan_url -> Directory listing heuristic"); + + self.handles.stats.send(AddToF64Field( + DirScanTimes, + scan_timer.elapsed().as_secs_f64(), + ))?; + + self.handles.stats.send(SubtractFromUsizeField( + TotalExpected, + progress_bar.length() as usize, + ))?; + + progress_bar.reset_eta(); + progress_bar.finish_with_message(&format!( + "=> {}", + style("Directory listing").blue().bright() + )); + + ferox_scan.finish()?; + + return Ok(()); + } + } } + // Arc clones to be passed around to the various scans + let looping_words = self.wordlist.clone(); + let requester = Arc::new(Requester::from(self, ferox_scan.clone())?); // producer tasks (mp of mpsc); responsible for making requests diff --git a/src/scanner/requester.rs b/src/scanner/requester.rs index 53411ec..a8cd629 100644 --- a/src/scanner/requester.rs +++ b/src/scanner/requester.rs @@ -1,5 +1,6 @@ use std::{ cmp::max, + collections::HashSet, sync::{atomic::Ordering, Arc, Mutex}, }; @@ -14,22 +15,19 @@ use crate::{ atomic_load, atomic_store, config::RequesterPolicy, event_handlers::{ - Command::{self, AddDiscoveredExtension, AddError, SubtractFromUsizeField}, + Command::{self, AddError, SubtractFromUsizeField}, Handles, }, extractor::{ExtractionTarget, ExtractorBuilder}, response::FeroxResponse, scan_manager::{FeroxScan, ScanStatus}, - scanner::RESPONSES, statistics::{StatError::Other, StatField::TotalExpected}, url::FeroxUrl, - utils::logged_request, + utils::{logged_request, should_deny_url}, HIGH_ERROR_RATIO, }; use super::{policy_data::PolicyData, FeroxScanner, PolicyTrigger}; -use crate::utils::{should_deny_url, should_read_body}; -use std::collections::HashSet; /// Makes multiple requests based on the presence of extensions pub(super) struct Requester { @@ -415,11 +413,7 @@ impl Requester { // gain and quickly drop the read lock on seen_links, using it while unlocked // to determine if there are any new links to process let read_links = self.seen_links.read().await; - new_links = result - .found_links - .difference(&read_links) - .cloned() - .collect(); + new_links = result.difference(&read_links).cloned().collect(); } if !new_links.is_empty() { @@ -474,12 +468,14 @@ mod tests { let (filters_task, filters_handle) = FiltersHandler::initialize(); let (out_task, out_handle) = TermOutHandler::initialize(configuration.clone(), stats_handle.tx.clone()); + let wordlist = Arc::new(vec![String::from("this_is_a_test")]); let handles = Arc::new(Handles::new( stats_handle, filters_handle, out_handle, configuration.clone(), + wordlist, )); let (scan_task, scan_handle) = ScanHandler::initialize(handles.clone()); @@ -603,10 +599,10 @@ mod tests { let requester = Requester { handles, + target_url: "http://localhost".to_string(), seen_links: RwLock::new(HashSet::<String>::new()), tuning_lock: Mutex::new(0), ferox_scan: Arc::new(FeroxScan::default()), - target_url: "http://localhost".to_string(), rate_limiter: RwLock::new(None), policy_data: Default::default(), }; diff --git a/src/url.rs b/src/url.rs index 337bb5d..7621ec5 100644 --- a/src/url.rs +++ b/src/url.rs @@ -265,7 +265,7 @@ mod tests { fn formatted_urls_no_extension_returns_base_url_with_word() { let handles = Arc::new(Handles::for_testing(None, None).0); let url = FeroxUrl::from_string("http://localhost", handles); - let urls = url.formatted_urls("turbo").unwrap(); + let urls = url.formatted_urls("turbo", HashSet::new()).unwrap(); assert_eq!(urls, [Url::parse("http://localhost/turbo").unwrap()]) } @@ -279,7 +279,7 @@ mod tests { let handles = Arc::new(Handles::for_testing(None, Some(Arc::new(config))).0); let url = FeroxUrl::from_string("http://localhost", handles); - let urls = url.formatted_urls("turbo").unwrap(); + let urls = url.formatted_urls("turbo", HashSet::new()).unwrap(); assert_eq!( urls, @@ -326,7 +326,7 @@ mod tests { let handles = Arc::new(Handles::for_testing(None, Some(Arc::new(config))).0); let url = FeroxUrl::from_string("http://localhost", handles); - let urls = url.formatted_urls("turbo").unwrap(); + let urls = url.formatted_urls("turbo", HashSet::new()).unwrap(); assert_eq!(urls, expected[i]); } } @@ -513,7 +513,7 @@ mod tests { }; let handles = Arc::new(Handles::for_testing(None, Some(Arc::new(config))).0); let url = FeroxUrl::from_string("http://localhost", handles); - match url.formatted_urls("ferox") { + match url.formatted_urls("ferox", HashSet::new()) { Ok(urls) => { // 3 = One for the main word + slash and for the two extensions assert_eq!(urls.len(), 3); From d0d99ebed6e832f13c00a829ff0538e3476da844 Mon Sep 17 00:00:00 2001 From: epi <epibar052@gmail.com> Date: Mon, 14 Feb 2022 06:29:25 -0600 Subject: [PATCH 13/40] tests passing --- Cargo.lock | 246 ++++++++++++++++------------- Cargo.toml | 10 +- shell_completions/_feroxbuster.ps1 | 3 +- src/config/tests.rs | 2 +- src/main.rs | 25 +-- src/scan_manager/tests.rs | 38 ++--- tests/test_deny_list.rs | 10 +- tests/test_extractor.rs | 6 +- tests/test_main.rs | 13 +- tests/test_scanner.rs | 14 +- 10 files changed, 191 insertions(+), 176 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 91530a1..69451f0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,9 +13,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.52" +version = "1.0.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84450d0b4a8bd1ba4144ce8ce718fbc5d071358b1e5384bace6536b3d1f2d5b3" +checksum = "94a45b455c14666b85fc40a019e8ab9eb75e3a124e05494f5397122bc9eb06e0" [[package]] name = "ascii-canvas" @@ -184,9 +184,9 @@ dependencies = [ [[package]] name = "async-task" -version = "4.0.3" +version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91831deabf0d6d7ec49552e489aed63b7456a7a3c46cff62adad428110b0af0" +checksum = "677d306121baf53310a3fd342d88dc0824f6bbeace68347593658525565abee8" [[package]] name = "async-trait" @@ -218,9 +218,9 @@ dependencies = [ [[package]] name = "autocfg" -version = "1.0.1" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "base64" @@ -329,9 +329,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "3.0.7" +version = "3.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12e8611f9ae4e068fa3e56931fded356ff745e70987ff76924a6e0ab1c8ef2e3" +checksum = "b63edc3f163b3c71ec8aa23f9bd6070f77edbf3d1d198b164afa90ff00e4ec62" dependencies = [ "atty", "bitflags", @@ -346,9 +346,9 @@ dependencies = [ [[package]] name = "clap_complete" -version = "3.0.4" +version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d044e9db8cd0f68191becdeb5246b7462e4cf0c069b19ae00d1bf3fa9889498d" +checksum = "678db4c39c013cc68b54d372bce2efc58e30a0337c497c9032fd196802df3bc3" dependencies = [ "clap", ] @@ -385,9 +385,9 @@ checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" [[package]] name = "core-foundation" -version = "0.9.2" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6888e10551bb93e424d8df1d07f1a8b4fceb0001a3a4b048bfc47554946f47b3" +checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" dependencies = [ "core-foundation-sys", "libc", @@ -401,9 +401,9 @@ checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" [[package]] name = "crossbeam-utils" -version = "0.8.6" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfcae03edb34f947e64acdb1c33ec169824e20657e9ecb61cef6c8c74dcb8120" +checksum = "b5e5bed1f1c269533fa816a0a5492b3545209a205ca1a54842be180eb63a16a6" dependencies = [ "cfg-if", "lazy_static", @@ -656,15 +656,15 @@ dependencies = [ [[package]] name = "event-listener" -version = "2.5.1" +version = "2.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7531096570974c3a9dcf9e4b8e1cede1ec26cf5046219fb3b9d897503b9be59" +checksum = "77f3309417938f28bf8228fcff79a4a37103981e3e186d2ccd19c74b38f4eb71" [[package]] name = "fastrand" -version = "1.6.0" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "779d043b6a0b90cc4c0ed7ee380a6504394cee7efd7db050e3774eee387324b2" +checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" dependencies = [ "instant", ] @@ -700,7 +700,7 @@ dependencies = [ "serde_regex", "tempfile", "tokio", - "tokio-util", + "tokio-util 0.7.0", "toml", "url", "uuid", @@ -754,9 +754,9 @@ dependencies = [ [[package]] name = "futf" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c9c1ce3fa9336301af935ab852c437817d14cd33690446569392e65170aac3b" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" dependencies = [ "mac", "new_debug_unreachable", @@ -764,9 +764,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.19" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28560757fe2bb34e79f907794bb6b22ae8b0e5c669b638a1132f2592b19035b4" +checksum = "f73fe65f54d1e12b726f517d3e2135ca3125a437b6d998caf1962961f7172d9e" dependencies = [ "futures-channel", "futures-core", @@ -779,9 +779,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.19" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3dda0b6588335f360afc675d0564c17a77a2bda81ca178a4b6081bd86c7f0b" +checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010" dependencies = [ "futures-core", "futures-sink", @@ -789,15 +789,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.19" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0c8ff0461b82559810cdccfde3215c3f373807f5e5232b71479bff7bb2583d7" +checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" [[package]] name = "futures-executor" -version = "0.3.19" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29d6d2ff5bb10fb95c85b8ce46538a2e5f5e7fdc755623a7d4529ab8a4ed9d2a" +checksum = "9420b90cfa29e327d0429f19be13e7ddb68fa1cccb09d65e5706b8c7a749b8a6" dependencies = [ "futures-core", "futures-task", @@ -806,9 +806,9 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.19" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1f9d34af5a1aac6fb380f735fe510746c38067c5bf16c7fd250280503c971b2" +checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b" [[package]] name = "futures-lite" @@ -827,9 +827,9 @@ dependencies = [ [[package]] name = "futures-macro" -version = "0.3.19" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dbd947adfffb0efc70599b3ddcf7b5597bb5fa9e245eb99f62b3a5f7bb8bd3c" +checksum = "33c1e13800337f4d4d7a316bf45a567dbcb6ffe087f16424852d97e97a91f512" dependencies = [ "proc-macro2", "quote", @@ -838,21 +838,21 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.19" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3055baccb68d74ff6480350f8d6eb8fcfa3aa11bdc1a1ae3afdd0514617d508" +checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868" [[package]] name = "futures-task" -version = "0.3.19" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ee7c6485c30167ce4dfb83ac568a849fe53274c831081476ee13e0dce1aad72" +checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a" [[package]] name = "futures-util" -version = "0.3.19" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b5cf40b47a271f77a8b1bec03ca09044d99d2372c0de244e66430761127164" +checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" dependencies = [ "futures-channel", "futures-core", @@ -914,22 +914,21 @@ dependencies = [ [[package]] name = "gloo-timers" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f16c88aa13d2656ef20d1c042086b8767bbe2bdb62526894275a1b062161b2e" +checksum = "4d12a7f4e95cfe710f1d624fb1210b7d961a5fb05c4fd942f4feab06e61f590e" dependencies = [ "futures-channel", "futures-core", "js-sys", "wasm-bindgen", - "web-sys", ] [[package]] name = "h2" -version = "0.3.10" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c9de88456263e249e241fcd211d3954e2c9b0ef7ccfc235a444eb367cae3689" +checksum = "d9f1f717ddc7b2ba36df7e871fd88db79326551d3d6f1fc406fbfd28b582ff8e" dependencies = [ "bytes", "fnv", @@ -940,7 +939,7 @@ dependencies = [ "indexmap", "slab", "tokio", - "tokio-util", + "tokio-util 0.6.9", "tracing", ] @@ -997,9 +996,9 @@ dependencies = [ [[package]] name = "httparse" -version = "1.5.1" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acd94fdbe1d4ff688b67b04eee2e17bd50995534a61539e45adfefb45e5e5503" +checksum = "9100414882e15fb7feccb4897e5f0ff0ff1ca7d1a86a23208ada4d7a18e6c6c4" [[package]] name = "httpdate" @@ -1043,9 +1042,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.16" +version = "0.14.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7ec3e62bdc98a2f0393a5048e4c30ef659440ea6e0e572965103e72bd836f55" +checksum = "043f0e083e9901b6cc658a77d1eb86f4fc650bbb977a4337dd63192826aa85dd" dependencies = [ "bytes", "futures-channel", @@ -1056,7 +1055,7 @@ dependencies = [ "http-body", "httparse", "httpdate", - "itoa 0.4.8", + "itoa 1.0.1", "pin-project-lite", "socket2", "tokio", @@ -1176,9 +1175,9 @@ checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35" [[package]] name = "js-sys" -version = "0.3.55" +version = "0.3.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cc9ffccd38c451a86bf13657df244e9c3f37493cce8e5e21e940963777acc84" +checksum = "a38fc24e30fd564ce974c02bf1d337caddff65be6cc4735a1f7eab22a7440f04" dependencies = [ "wasm-bindgen", ] @@ -1194,9 +1193,9 @@ dependencies = [ [[package]] name = "lalrpop" -version = "0.19.6" +version = "0.19.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15174f1c529af5bf1283c3bc0058266b483a67156f79589fab2a25e23cf8988" +checksum = "852b75a095da6b69da8c5557731c3afd06525d4f655a4fc1c799e2ec8bc4dce4" dependencies = [ "ascii-canvas", "atty", @@ -1217,9 +1216,9 @@ dependencies = [ [[package]] name = "lalrpop-util" -version = "0.19.6" +version = "0.19.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3e58cce361efcc90ba8a0a5f982c741ff86b603495bb15a998412e957dcd278" +checksum = "d6d265705249fe209280676d8f68887859fa42e1d34f342fc05bd47726a5e188" dependencies = [ "regex", ] @@ -1252,9 +1251,9 @@ checksum = "db13adb97ab515a3691f56e4dbab09283d0b86cb45abd991d8634a9d6f501760" [[package]] name = "libc" -version = "0.2.112" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b03d17f364a3a042d5e5d46b053bbbf82c92c9430c592dd4c064dc6ee997125" +checksum = "e74d72e0f9b65b5b4ca49a346af3976df0f9c61d550727f349ecd559f251a26c" [[package]] name = "libnghttp2-sys" @@ -1280,9 +1279,9 @@ dependencies = [ [[package]] name = "lock_api" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712a4d093c9976e24e7dbca41db895dabcbac38eb5f4045393d17a95bdfb1109" +checksum = "88943dd7ef4a2e5a4bfa2753aaab3013e34ce2533d1996fb18ef591e315e2b3b" dependencies = [ "scopeguard", ] @@ -1417,9 +1416,9 @@ checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" [[package]] name = "ntapi" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f6bb902e437b6d86e03cce10a7e2af662292c5dfef23b65899ea3ac9354ad44" +checksum = "c28774a7fd2fbb4f0babd8237ce554b73af68021b5f695a3cebd6c59bac0980f" dependencies = [ "winapi", ] @@ -1561,7 +1560,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" dependencies = [ "phf_macros", - "phf_shared", + "phf_shared 0.8.0", "proc-macro-hack", ] @@ -1572,7 +1571,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" dependencies = [ "phf_generator", - "phf_shared", + "phf_shared 0.8.0", ] [[package]] @@ -1581,7 +1580,7 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" dependencies = [ - "phf_shared", + "phf_shared 0.8.0", "rand", ] @@ -1592,7 +1591,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c" dependencies = [ "phf_generator", - "phf_shared", + "phf_shared 0.8.0", "proc-macro-hack", "proc-macro2", "quote", @@ -1608,6 +1607,15 @@ dependencies = [ "siphasher", ] +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher", +] + [[package]] name = "pico-args" version = "0.4.2" @@ -1724,9 +1732,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.14" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47aa80447ce4daf1717500037052af176af5d38cc3e571d9ec1c7353fc10c87d" +checksum = "864d3e96a899863136fc6e99f3d7cae289dafe43bf2c5ac19b70df7210c0a145" dependencies = [ "proc-macro2", ] @@ -1934,9 +1942,9 @@ dependencies = [ [[package]] name = "security-framework" -version = "2.5.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d09d3c15d814eda1d6a836f2f2b56a6abc1446c8a34351cb3180d3db92ffe4ce" +checksum = "2dc14f172faf8a0194a3aded622712b0de276821addc574fa54fc0a1167e10dc" dependencies = [ "bitflags", "core-foundation", @@ -1947,9 +1955,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.5.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e90dd10c41c6bfc633da6e0c659bd25d31e0791e5974ac42970267d59eba87f7" +checksum = "0160a13a177a45bfb43ce71c01580998474f556ad854dcbca936dd2841a5c556" dependencies = [ "core-foundation-sys", "libc", @@ -1977,24 +1985,24 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "568a8e6258aa33c13358f81fd834adb854c6f7c9468520910a9b1e8fac068012" +checksum = "0486718e92ec9a68fbed73bb5ef687d71103b142595b406835649bebd33f72c7" [[package]] name = "serde" -version = "1.0.133" +version = "1.0.136" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97565067517b60e2d1ea8b268e59ce036de907ac523ad83a0475da04e818989a" +checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.133" +version = "1.0.136" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed201699328568d8d08208fdd080e3ff594e6c422e438b6705905da01005d537" +checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9" dependencies = [ "proc-macro2", "quote", @@ -2003,9 +2011,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.75" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c059c05b48c5c0067d4b4b2b4f0732dd65feb52daf7e0ea09cd87e7dadc1af79" +checksum = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95" dependencies = [ "itoa 1.0.1", "ryu", @@ -2082,9 +2090,9 @@ checksum = "2e24979f63a11545f5f2c60141afe249d4f19f84581ea2138065e400941d83d3" [[package]] name = "siphasher" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba1eead9e94aa5a2e02de9e7839f96a007f686ae7a1d57c7797774810d24908a" +checksum = "a86232ab60fa71287d7f2ddae4a7073f6b7aac33631c3015abb556f08c6d0a3e" [[package]] name = "slab" @@ -2111,9 +2119,9 @@ checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" [[package]] name = "socket2" -version = "0.4.2" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dc90fe6c7be1a323296982db1836d1ea9e47b6839496dde9a541bc496df3516" +checksum = "66d72b759436ae32898a2af0a14218dbf55efde3feeb170eb623637db85ee1e0" dependencies = [ "libc", "winapi", @@ -2127,14 +2135,14 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "string_cache" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "923f0f39b6267d37d23ce71ae7235602134b250ace715dd2c90421998ddac0c6" +checksum = "33994d0838dc2d152d17a62adf608a869b5e846b65b389af7f3dbc1de45c5b26" dependencies = [ "lazy_static", "new_debug_unreachable", "parking_lot", - "phf_shared", + "phf_shared 0.10.0", "precomputed-hash", "serde", ] @@ -2146,7 +2154,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f24c8e5e19d22a726626f1a5e16fe15b132dcf21d10177fa5a45ce7962996b97" dependencies = [ "phf_generator", - "phf_shared", + "phf_shared 0.8.0", "proc-macro2", "quote", ] @@ -2159,9 +2167,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "syn" -version = "1.0.85" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a684ac3dcd8913827e18cd09a68384ee66c1de24157e3c556c9ab16d85695fb7" +checksum = "8a65b3f4ffa0092e9887669db0eae07941f023991ab58ea44da8fe8e2d511c6b" dependencies = [ "proc-macro2", "quote", @@ -2290,9 +2298,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "tokio" -version = "1.15.0" +version = "1.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbbf1c778ec206785635ce8ad57fe52b3009ae9e0c9f574a728f3049d3e55838" +checksum = "0c27a64b625de6d309e8c57716ba93021dccf1b3b5c97edd6d3dd2d2135afc0a" dependencies = [ "bytes", "libc", @@ -2365,6 +2373,20 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-util" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64910e1b9c1901aaf5375561e35b9c057d95ff41a44ede043a03e09279eabaf1" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "log", + "pin-project-lite", + "tokio", +] + [[package]] name = "toml" version = "0.5.8" @@ -2382,9 +2404,9 @@ checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6" [[package]] name = "tracing" -version = "0.1.29" +version = "0.1.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "375a639232caf30edfc78e8d89b2d4c375515393e7af7e16f01cd96917fb2105" +checksum = "2d8d93354fe2a8e50d5953f5ae2e47a3fc2ef03292e7ea46e3cc38f549525fb9" dependencies = [ "cfg-if", "log", @@ -2395,9 +2417,9 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.18" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f480b8f81512e825f337ad51e94c1eb5d3bbdf2b363dcd01e2b19a9ffe3f8e" +checksum = "8276d9a4a3a558d7b7ad5303ad50b53d58264641b82914b7ada36bd762e7a716" dependencies = [ "proc-macro2", "quote", @@ -2406,9 +2428,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.21" +version = "0.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f4ed65637b8390770814083d20756f87bfa2c21bf2f110babdc5438351746e4" +checksum = "03cfcb51380632a72d3111cb8d3447a8d908e577d31beeac006f836383d29a23" dependencies = [ "lazy_static", ] @@ -2545,9 +2567,9 @@ checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" [[package]] name = "wasm-bindgen" -version = "0.2.78" +version = "0.2.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "632f73e236b219150ea279196e54e610f5dbafa5d61786303d4da54f84e47fce" +checksum = "25f1af7423d8588a3d840681122e72e6a24ddbcb3f0ec385cac0d12d24256c06" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -2555,9 +2577,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.78" +version = "0.2.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a317bf8f9fba2476b4b2c85ef4c4af8ff39c3c7f0cdfeed4f82c34a880aa837b" +checksum = "8b21c0df030f5a177f3cba22e9bc4322695ec43e7257d865302900290bcdedca" dependencies = [ "bumpalo", "lazy_static", @@ -2570,9 +2592,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.28" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e8d7523cb1f2a4c96c1317ca690031b714a51cc14e05f712446691f413f5d39" +checksum = "2eb6ec270a31b1d3c7e266b999739109abce8b6c87e4b31fcfcd788b65267395" dependencies = [ "cfg-if", "js-sys", @@ -2582,9 +2604,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.78" +version = "0.2.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d56146e7c495528bf6587663bea13a8eb588d39b36b679d83972e1a2dbbdacf9" +checksum = "2f4203d69e40a52ee523b2529a773d5ffc1dc0071801c87b3d270b471b80ed01" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2592,9 +2614,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.78" +version = "0.2.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7803e0eea25835f8abdc585cd3021b3deb11543c6fe226dcd30b228857c5c5ab" +checksum = "bfa8a30d46208db204854cadbb5d4baf5fcf8071ba5bf48190c3e59937962ebc" dependencies = [ "proc-macro2", "quote", @@ -2605,15 +2627,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.78" +version = "0.2.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0237232789cf037d5480773fe568aac745bfe2afbc11a863e97901780a6b47cc" +checksum = "3d958d035c4438e28c70e4321a2911302f10135ce78a9c7834c0cab4123d06a2" [[package]] name = "web-sys" -version = "0.3.55" +version = "0.3.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38eb105f1c59d9eaa6b5cdc92b859d85b926e82cb2e0945cd0c9259faa6fe9fb" +checksum = "c060b319f29dd25724f09a2ba1418f142f539b2be99fbf4d2d5a8f7330afb8eb" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/Cargo.toml b/Cargo.toml index e335106..abf7551 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ build = "build.rs" maintenance = { status = "actively-developed" } [build-dependencies] -clap = {version = "3.0", features = ["cargo"]} +clap = {version = "3.0", features = ["wrap_help", "cargo"]} clap_complete = "3.0" regex = "1" lazy_static = "1.4" @@ -25,8 +25,8 @@ dirs = "4.0" [dependencies] scraper = "0.12" futures = { version = "0.3"} -tokio = { version = "1.15", features = ["full"] } -tokio-util = {version = "0.6", features = ["codec"]} +tokio = { version = "1.16", features = ["full"] } +tokio-util = {version = "0.7", features = ["codec"]} log = "0.4" env_logger = "0.9" reqwest = { version = "0.11", features = ["socks"] } @@ -44,11 +44,11 @@ openssl = { version = "0.10", features = ["vendored"] } dirs = "4.0" regex = "1" crossterm = "0.20" -rlimit = "0.6" +rlimit = "0.6" # todo: watch for 1.0, adds windows rlimit ctrlc = "3.2" fuzzyhash = "0.2.1" anyhow = "1.0" -leaky-bucket = "0.10.0" +leaky-bucket = "0.10.0" # todo: upgrade, will take a little work/thought since api changed [dev-dependencies] tempfile = "3.3" diff --git a/shell_completions/_feroxbuster.ps1 b/shell_completions/_feroxbuster.ps1 index 7a62229..2f10ad3 100644 --- a/shell_completions/_feroxbuster.ps1 +++ b/shell_completions/_feroxbuster.ps1 @@ -12,7 +12,8 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock { $element = $commandElements[$i] if ($element -isnot [StringConstantExpressionAst] -or $element.StringConstantType -ne [StringConstantType]::BareWord -or - $element.Value.StartsWith('-')) { + $element.Value.StartsWith('-') -or + $element.Value -eq $wordToComplete) { break } $element.Value diff --git a/src/config/tests.rs b/src/config/tests.rs index db975cf..5c8508c 100644 --- a/src/config/tests.rs +++ b/src/config/tests.rs @@ -104,7 +104,7 @@ fn default_configuration() { assert_eq!(config.methods, vec!["GET"]); assert_eq!(config.data, Vec::<u8>::new()); assert_eq!(config.url_denylist, Vec::<Url>::new()); - assert_eq!(config.dont_collect, Vec::<String>::new()); + assert_eq!(config.dont_collect, ignored_extensions()); assert_eq!(config.filter_regex, Vec::<String>::new()); assert_eq!(config.filter_similar, Vec::<String>::new()); assert_eq!(config.filter_word_count, Vec::<usize>::new()); diff --git a/src/main.rs b/src/main.rs index 20ddba8..aa72e59 100644 --- a/src/main.rs +++ b/src/main.rs @@ -189,22 +189,24 @@ async fn wrapped_main(config: Arc<Configuration>) -> Result<()> { PROGRESS_BAR.join().unwrap(); }); + // cloning an Arc is cheap (it's basically a pointer into the heap) + // so that will allow for cheap/safe sharing of a single wordlist across multi-target scans + // as well as additional directories found as part of recursion + let words = get_unique_words_from_wordlist(&config.wordlist)?; + + if words.len() <= 1 { + // the check is now <= 1 due to the initial empty string added in 2.6.0 + // 1 -> empty wordlist + // 0 -> error + bail!("Did not find any words in {}", config.wordlist); + } + // spawn all event handlers, expect back a JoinHandle and a *Handle to the specific event let (stats_task, stats_handle) = StatsHandler::initialize(config.clone()); let (filters_task, filters_handle) = FiltersHandler::initialize(); let (out_task, out_handle) = TermOutHandler::initialize(config.clone(), stats_handle.tx.clone()); - // cloning an Arc is cheap (it's basically a pointer into the heap) - // so that will allow for cheap/safe sharing of a single wordlist across multi-target scans - // as well as additional directories found as part of recursion - - let words = get_unique_words_from_wordlist(&config.wordlist)?; - - if words.len() == 0 { - bail!("Did not find any words in {}", config.wordlist); - } - // bundle up all the disparate handles and JoinHandles (tasks) let handles = Arc::new(Handles::new( stats_handle, @@ -504,6 +506,9 @@ fn main() -> Result<()> { let future = wrapped_main(config); if let Err(e) = runtime.block_on(future) { eprintln!("{}", e); + // if we've errored out before clean_up can be called (i.e. a wordlist error) need to + // at least spin-down the progress bar + PROGRESS_PRINTER.finish(); }; } diff --git a/src/scan_manager/tests.rs b/src/scan_manager/tests.rs index 1dc3de7..d899a18 100644 --- a/src/scan_manager/tests.rs +++ b/src/scan_manager/tests.rs @@ -303,7 +303,7 @@ fn ferox_scans_serialize() { #[test] /// given a FeroxResponses, test that it serializes into the proper JSON entry fn ferox_responses_serialize() { - let json_response = r#"{"type":"response","url":"https://nerdcore.com/css","original_url":"https://nerdcore.com","path":"/css","wildcard":true,"status":301,"method":"GET","content_length":173,"line_count":10,"word_count":16,"headers":{"server":"nginx/1.16.1"}}"#; + let json_response = r#"{"type":"response","url":"https://nerdcore.com/css","original_url":"https://nerdcore.com","path":"/css","wildcard":true,"status":301,"method":"GET","content_length":173,"line_count":10,"word_count":16,"headers":{"server":"nginx/1.16.1"},"extension":""}"#; let response: FeroxResponse = serde_json::from_str(json_response).unwrap(); let responses = FeroxResponses::default(); @@ -321,7 +321,7 @@ fn ferox_responses_serialize() { /// given a FeroxResponse, test that it serializes into the proper JSON entry fn ferox_response_serialize_and_deserialize() { // deserialize - let json_response = r#"{"type":"response","url":"https://nerdcore.com/css","original_url":"https://nerdcore.com","path":"/css","wildcard":true,"status":301,"method":"GET","content_length":173,"line_count":10,"word_count":16,"headers":{"server":"nginx/1.16.1"}}"#; + let json_response = r#"{"type":"response","url":"https://nerdcore.com/css","original_url":"https://nerdcore.com","path":"/css","wildcard":true,"status":301,"method":"GET","content_length":173,"line_count":10,"word_count":16,"headers":{"server":"nginx/1.16.1"},"extension":""}"#; let response: FeroxResponse = serde_json::from_str(json_response).unwrap(); assert_eq!(response.url().as_str(), "https://nerdcore.com/css"); @@ -351,31 +351,26 @@ fn feroxstates_feroxserialize_implementation() { ); let ferox_scans = FeroxScans::default(); let saved_id = ferox_scan.id.clone(); + ferox_scans.insert(ferox_scan); - ferox_scans - .collected_extensions - .write() - .unwrap() - .insert(String::from("cgi")); + ferox_scans .collected_extensions .write() .unwrap() .insert(String::from("php")); - let config = Configuration::new().unwrap(); + let mut config = Configuration::new().unwrap(); + + config.collect_extensions = true; + let stats = Arc::new(Stats::new(config.json)); - let json_response = r#"{"type":"response","url":"https://nerdcore.com/css","path":"/css","wildcard":true,"status":301,"content_length":173,"line_count":10,"word_count":16,"headers":{"server":"nginx/1.16.1"}}"#; + let json_response = r#"{"type":"response","url":"https://nerdcore.com/css","path":"/css","wildcard":true,"status":301,"content_length":173,"line_count":10,"word_count":16,"headers":{"server":"nginx/1.16.1"},"extension":""}"#; let response: FeroxResponse = serde_json::from_str(json_response).unwrap(); RESPONSES.insert(response); - let ferox_state = FeroxState::new( - Arc::new(ferox_scans), - Arc::new(Configuration::new().unwrap()), - &RESPONSES, - stats, - ); + let ferox_state = FeroxState::new(Arc::new(ferox_scans), Arc::new(config), &RESPONSES, stats); let expected_strs = predicates::str::contains("scans: FeroxScans").and( predicate::str::contains("config: Configuration") @@ -383,7 +378,6 @@ fn feroxstates_feroxserialize_implementation() { .and(predicate::str::contains("nerdcore.com")) .and(predicate::str::contains("/css")) .and(predicate::str::contains("https://spiritanimal.com")) - .and(predicate::str::contains("cgi")) .and(predicate::str::contains("php")), ); @@ -391,7 +385,7 @@ fn feroxstates_feroxserialize_implementation() { let json_state = ferox_state.as_json().unwrap(); - println!("{}", json_state); // for debugging, if the test fails, can see what's going on + println!("echo '{}'|jq", json_state); // for debugging, if the test fails, can see what's going on for expected in [ r#""scans""#, @@ -460,17 +454,15 @@ fn feroxstates_feroxserialize_implementation() { r#""word_count":16"#, r#""headers""#, r#""server":"nginx/1.16.1"#, - r#""collect_extensions":false"#, + r#""collect_extensions":true"#, + r#""collected_extensions":["php"]"#, r#""dont_collect":["tif","tiff","ico","cur","bmp","webp","svg","png","jpg","jpeg","jfif","gif","avif","apng","pjpeg","pjp","mov","wav","mpg","mpeg","mp3","mp4","m4a","m4p","m4v","ogg","webm","ogv","oga","flac","aac","3gp","css","zip","xls","xml","gz","tgz"]"#, - r#""collected_extensions":["cgi","php"]"#, ] .iter() { assert!( - predicates::str::contains(*expected).eval(&json_state), - "{}", - expected - ) + predicates::str::contains(*expected).eval(&json_state) + ); } } diff --git a/tests/test_deny_list.rs b/tests/test_deny_list.rs index 7c23116..b634ea6 100644 --- a/tests/test_deny_list.rs +++ b/tests/test_deny_list.rs @@ -131,8 +131,8 @@ fn deny_list_works_during_recursion() { .not(), ); - assert_eq!(js_mock.hits(), 1); - assert_eq!(js_prod_mock.hits(), 1); + assert_eq!(js_mock.hits(), 2); + assert_eq!(js_prod_mock.hits(), 2); assert_eq!(js_dev_mock.hits(), 0); assert_eq!(js_dev_file_mock.hits(), 0); @@ -202,9 +202,9 @@ fn deny_list_works_during_recursion_with_inverted_parents() { .not(), ); - assert_eq!(js_mock.hits(), 1); - assert_eq!(js_prod_mock.hits(), 1); - assert_eq!(js_dev_mock.hits(), 1); + assert_eq!(js_mock.hits(), 2); + assert_eq!(js_prod_mock.hits(), 2); + assert_eq!(js_dev_mock.hits(), 2); assert_eq!(js_dev_file_mock.hits(), 1); assert_eq!(api_mock.hits(), 0); diff --git a/tests/test_extractor.rs b/tests/test_extractor.rs index bea4685..a8029a1 100644 --- a/tests/test_extractor.rs +++ b/tests/test_extractor.rs @@ -284,11 +284,11 @@ fn extractor_finds_robots_txt_links_and_displays_files_or_scans_directories() { .and(predicate::str::contains("22c")) .and(predicate::str::contains("/misc/LICENSE")) .and(predicate::str::contains("29c")) - .and(predicate::str::contains("200").count(3)), + .and(predicate::str::contains("200").count(4)), ); assert_eq!(mock.hits(), 1); - assert_eq!(mock_dir.hits(), 2); + assert_eq!(mock_dir.hits(), 3); assert_eq!(mock_two.hits(), 1); assert_eq!(mock_file.hits(), 1); assert_eq!(mock_disallowed.hits(), 1); @@ -636,7 +636,7 @@ fn extractor_recurses_into_403_directories() -> Result<(), Box<dyn std::error::E assert_eq!(mock.hits(), 1); assert_eq!(mock_two.hits(), 1); - assert_eq!(forbidden_dir.hits(), 2); + assert_eq!(forbidden_dir.hits(), 3); teardown_tmp_directory(tmp_dir); Ok(()) } diff --git a/tests/test_main.rs b/tests/test_main.rs index ec055b0..d50709c 100644 --- a/tests/test_main.rs +++ b/tests/test_main.rs @@ -25,12 +25,9 @@ fn main_use_root_owned_file_as_wordlist() { .arg("-vvvv") .assert() .success() - .stderr(predicate::str::contains( - "Failed while scanning: Could not open /etc/shadow", - )); + .stderr(predicate::str::contains("Could not open /etc/shadow")); - // connectivity test hits it once - assert_eq!(mock.hits(), 1); + assert_eq!(mock.hits(), 0); } #[test] @@ -53,11 +50,9 @@ fn main_use_empty_wordlist() -> Result<(), Box<dyn std::error::Error>> { .arg("-vvvv") .assert() .success() - .stderr(predicate::str::contains( - "Failed while scanning: Did not find any words in", - )); + .stderr(predicate::str::contains("Did not find any words in")); - assert_eq!(mock.hits(), 1); + assert_eq!(mock.hits(), 0); teardown_tmp_directory(tmp_dir); Ok(()) diff --git a/tests/test_scanner.rs b/tests/test_scanner.rs index b6fab74..76a1d3b 100644 --- a/tests/test_scanner.rs +++ b/tests/test_scanner.rs @@ -89,9 +89,9 @@ fn scanner_recursive_request_scan() -> Result<(), Box<dyn std::error::Error>> { .and(predicate::str::is_match("200.*js/dev/file.js").unwrap()), ); - assert_eq!(js_mock.hits(), 1); - assert_eq!(js_prod_mock.hits(), 1); - assert_eq!(js_dev_mock.hits(), 1); + assert_eq!(js_mock.hits(), 2); + assert_eq!(js_prod_mock.hits(), 2); + assert_eq!(js_dev_mock.hits(), 2); assert_eq!(js_dev_file_mock.hits(), 1); teardown_tmp_directory(tmp_dir); @@ -153,9 +153,9 @@ fn scanner_recursive_request_scan_using_only_success_responses( .and(predicate::str::is_match("200.*js/dev/file.js").unwrap()), ); - assert_eq!(js_mock.hits(), 1); - assert_eq!(js_prod_mock.hits(), 1); - assert_eq!(js_dev_mock.hits(), 1); + assert_eq!(js_mock.hits(), 3); + assert_eq!(js_prod_mock.hits(), 3); + assert_eq!(js_dev_mock.hits(), 3); assert_eq!(js_dev_file_mock.hits(), 1); teardown_tmp_directory(tmp_dir); @@ -596,7 +596,7 @@ fn scanner_recursion_works_with_403_directories() { assert_eq!(mock.hits(), 1); assert_eq!(found_anyway.hits(), 1); - assert_eq!(forbidden_dir.hits(), 1); + assert_eq!(forbidden_dir.hits(), 3); teardown_tmp_directory(tmp_dir); } From 9a84c5234f4a00983fe9df76c1dd860a4eea6f21 Mon Sep 17 00:00:00 2001 From: epi <epibar052@gmail.com> Date: Mon, 14 Feb 2022 17:32:40 -0600 Subject: [PATCH 14/40] fixed banner tests --- src/main.rs | 34 ++++++++++++-- tests/test_banner.rs | 103 +++++++++++++++++++++++++++++++++++++------ 2 files changed, 121 insertions(+), 16 deletions(-) diff --git a/src/main.rs b/src/main.rs index aa72e59..7e28bb5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,4 @@ +use std::io::stdin; use std::{ env::args, fs::{create_dir, remove_file, File}, @@ -503,11 +504,38 @@ fn main() -> Result<()> { .enable_all() .build() { - let future = wrapped_main(config); + let future = wrapped_main(config.clone()); if let Err(e) = runtime.block_on(future) { eprintln!("{}", e); - // if we've errored out before clean_up can be called (i.e. a wordlist error) need to - // at least spin-down the progress bar + + // the code below is to facilitate testing tests/test_banner entries. Since it's an + // integration test, normal test detection (cfg!(test), etc...) won't work. So, in + // the tests themselves, we pass + // `--wordlist /definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676` + // and look for that here to print the banner. + // + // this change became a necessity once we moved wordlist parsing out of `scan` and into + // `wrapped_main`. + if e.to_string() + .contains("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") + { + // support the handful of tests that use `--stdin` + let targets: Vec<_> = if config.stdin { + stdin().lock().lines().map(|tgt| tgt.unwrap()).collect() + } else { + vec!["http://localhost".to_string()] + }; + + // print the banner to stderr + let std_stderr = stderr(); // std::io::stderr + let banner = Banner::new(&targets, &config); + if !config.quiet && !config.silent { + banner.print_to(std_stderr, config).unwrap(); + } + } + + // if we've encountered an error before clean_up can be called (i.e. a wordlist error) + // we need to at least spin-down the progress bar PROGRESS_PRINTER.finish(); }; } diff --git a/tests/test_banner.rs b/tests/test_banner.rs index a1d3879..3e90757 100644 --- a/tests/test_banner.rs +++ b/tests/test_banner.rs @@ -16,10 +16,10 @@ fn banner_prints_proxy() -> Result<(), Box<dyn std::error::Error>> { Command::cargo_bin("feroxbuster") .unwrap() .arg("--stdin") - .arg("--wordlist") - .arg(file.as_os_str()) .arg("--proxy") .arg("http://127.0.0.1:8080") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .pipe_stdin(file) .unwrap() .assert() @@ -57,7 +57,7 @@ fn banner_prints_replay_proxy() -> Result<(), Box<dyn std::error::Error>> { .unwrap() .arg("--stdin") .arg("--wordlist") - .arg(file.as_os_str()) + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .arg("--replay-proxy") .arg("http://127.0.0.1:8081") .pipe_stdin(file) @@ -95,6 +95,8 @@ fn banner_prints_headers() { .arg("stuff:things") .arg("-H") .arg("mostuff:mothings") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -126,6 +128,8 @@ fn banner_prints_denied_urls() { .arg("https://also-not.me") .arg("https:") .arg("/deny.*") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -156,6 +160,8 @@ fn banner_prints_random_agent() { .arg("--url") .arg("http://localhost") .arg("--random-agent") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -192,6 +198,8 @@ fn banner_prints_filter_sizes() { .arg("93") .arg("--filter-words") .arg("94") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -228,6 +236,8 @@ fn banner_prints_queries() { .arg("token=supersecret") .arg("--query") .arg("stuff=things") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -256,6 +266,8 @@ fn banner_prints_status_codes() { .arg("http://localhost") .arg("-s") .arg("201,301,401") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -284,6 +296,8 @@ fn banner_prints_replay_codes() { .arg("200,302") .arg("--replay-proxy") .arg("http://localhost:8081") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -312,6 +326,8 @@ fn banner_prints_output_file() { .arg("http://localhost") .arg("--output") .arg("/super/cool/path") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -326,7 +342,7 @@ fn banner_prints_output_file() { .and(predicate::str::contains("Output File")) .and(predicate::str::contains("/super/cool/path")) .and(predicate::str::contains( - "ERROR: Couldn't start /super/cool/path file handler", + "Could not open /definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676", )) .and(predicate::str::contains("─┴─")), ); @@ -341,6 +357,8 @@ fn banner_prints_insecure() { .arg("--url") .arg("http://localhost") .arg("-k") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -367,6 +385,8 @@ fn banner_prints_redirects() { .arg("--url") .arg("http://localhost") .arg("-r") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -396,6 +416,8 @@ fn banner_prints_extensions() { .arg("js") .arg("--extensions") .arg("pdf") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -422,6 +444,8 @@ fn banner_prints_dont_filter() { .arg("--url") .arg("http://localhost") .arg("--dont-filter") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -448,6 +472,8 @@ fn banner_prints_verbosity_one() { .arg("--url") .arg("http://localhost") .arg("-v") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -474,6 +500,8 @@ fn banner_prints_verbosity_two() { .arg("--url") .arg("http://localhost") .arg("-vv") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -500,6 +528,8 @@ fn banner_prints_verbosity_three() { .arg("--url") .arg("http://localhost") .arg("-vvv") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -526,6 +556,8 @@ fn banner_prints_verbosity_four() { .arg("--url") .arg("http://localhost") .arg("-vvvv") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -552,6 +584,8 @@ fn banner_prints_add_slash() { .arg("--url") .arg("http://localhost") .arg("-f") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -579,6 +613,8 @@ fn banner_prints_infinite_depth() { .arg("http://localhost") .arg("--depth") .arg("0") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -606,6 +642,8 @@ fn banner_prints_recursion_depth() { .arg("http://localhost") .arg("--depth") .arg("343214") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -632,6 +670,8 @@ fn banner_prints_no_recursion() { .arg("--url") .arg("http://localhost") .arg("-n") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -658,10 +698,12 @@ fn banner_doesnt_print() { .arg("--url") .arg("http://localhost") .arg("-q") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr(predicate::str::contains( - "Could not connect to any target provided", + "Could not open /definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676", )); } @@ -674,6 +716,8 @@ fn banner_prints_extract_links() { .arg("--url") .arg("http://localhost") .arg("-e") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -701,6 +745,8 @@ fn banner_prints_scan_limit() { .arg("http://localhost") .arg("-L") .arg("4") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -728,6 +774,8 @@ fn banner_prints_filter_status() { .arg("http://localhost") .arg("-C") .arg("200") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -756,6 +804,8 @@ fn banner_prints_json() { .arg("--json") .arg("--output") .arg("/dev/null") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -783,6 +833,8 @@ fn banner_prints_debug_log() { .arg("http://localhost") .arg("--debug-log") .arg("/dev/null") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -810,6 +862,8 @@ fn banner_prints_filter_regex() { .arg("http://localhost") .arg("--filter-regex") .arg("^ignore me$") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -837,6 +891,8 @@ fn banner_prints_time_limit() { .arg("http://localhost") .arg("--time-limit") .arg("10m") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -864,6 +920,8 @@ fn banner_prints_similarity_filter() { .arg("http://localhost") .arg("--filter-similar-to") .arg("https://somesite.com") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -891,6 +949,8 @@ fn banner_prints_rate_limit() { .arg("http://localhost") .arg("--rate-limit") .arg("6735") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -917,6 +977,8 @@ fn banner_prints_auto_tune() { .arg("--url") .arg("http://localhost") .arg("--auto-tune") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -943,6 +1005,8 @@ fn banner_prints_auto_bail() { .arg("--url") .arg("http://localhost") .arg("--auto-bail") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -969,6 +1033,8 @@ fn banner_doesnt_print_when_silent() { .arg("--url") .arg("http://localhost") .arg("--silent") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -993,6 +1059,8 @@ fn banner_doesnt_print_when_quiet() { .arg("--url") .arg("http://localhost") .arg("--quiet") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -1017,18 +1085,19 @@ fn banner_prints_parallel() { .arg("--stdin") .arg("--parallel") .arg("4316") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( predicate::str::contains("─┬─") - .not() - .and(predicate::str::contains("Target Url").not()) - .and(predicate::str::contains("Parallel Scans").not()) - .and(predicate::str::contains("Threads").not()) - .and(predicate::str::contains("Wordlist").not()) - .and(predicate::str::contains("Status Codes").not()) - .and(predicate::str::contains("Timeout (secs)").not()) - .and(predicate::str::contains("User-Agent").not()), + .and(predicate::str::contains("Parallel Scans")) + .and(predicate::str::contains("4316")) + .and(predicate::str::contains("Threads")) + .and(predicate::str::contains("Wordlist")) + .and(predicate::str::contains("Status Codes")) + .and(predicate::str::contains("Timeout (secs)")) + .and(predicate::str::contains("User-Agent")), ); } @@ -1044,6 +1113,8 @@ fn banner_prints_methods() { .arg("PUT") .arg("--methods") .arg("OPTIONS") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -1075,6 +1146,8 @@ fn banner_prints_data() { .arg("POST") .arg("--data") .arg("some_data") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -1101,6 +1174,8 @@ fn banner_prints_collect_extensions_and_dont_collect_default() { .arg("--url") .arg("http://localhost") .arg("-c") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -1131,6 +1206,8 @@ fn banner_prints_collect_extensions_and_dont_collect_with_input() { .arg("--dont-collect") .arg("pdf") .arg("xps") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( From 88a595fd82ff2078332429565389cdcc0d8e31ff Mon Sep 17 00:00:00 2001 From: epi <epibar052@gmail.com> Date: Tue, 15 Feb 2022 07:14:23 -0600 Subject: [PATCH 15/40] added more tests --- Cargo.lock | 135 ++++++++++++++++++++++++++++++++++++++++-- Cargo.toml | 1 + src/heuristics.rs | 48 +++++++++++++++ src/response.rs | 67 +++++++++++++++++++++ tests/test_scanner.rs | 44 ++++++++++++++ 5 files changed, 290 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 69451f0..3e25afc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + [[package]] name = "anyhow" version = "1.0.53" @@ -26,6 +35,29 @@ dependencies = [ "term", ] +[[package]] +name = "assay" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "238d82aacd5cfde8ccae5c981912be68ec3cfa2d92ff4ce34090be40584c96a6" +dependencies = [ + "assay-proc-macro", + "pretty_assertions", + "rusty-fork", + "tempdir", + "tokio", +] + +[[package]] +name = "assay-proc-macro" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b5fd637c6a75fe224b372556511913f12d6ad481fbfef2fb7ecea2f7cb4965d" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "assert-json-diff" version = "2.0.1" @@ -674,6 +706,7 @@ name = "feroxbuster" version = "2.6.0" dependencies = [ "anyhow", + "assay", "assert_cmd", "clap", "clap_complete", @@ -752,6 +785,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fuchsia-cprng" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" + [[package]] name = "futf" version = "0.1.5" @@ -1506,6 +1545,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "output_vt100" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "628223faebab4e3e40667ee0b2336d34a5b960ff60ea743ddfdbcf7770bcfb66" +dependencies = [ + "winapi", +] + [[package]] name = "parking" version = "2.0.0" @@ -1581,7 +1629,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" dependencies = [ "phf_shared 0.8.0", - "rand", + "rand 0.7.3", ] [[package]] @@ -1715,6 +1763,18 @@ dependencies = [ "termtree", ] +[[package]] +name = "pretty_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76d5b548b725018ab5496482b45cb8bef21e9fed1858a6d674e3a8a0f0bb5d50" +dependencies = [ + "ansi_term", + "ctor", + "diff", + "output_vt100", +] + [[package]] name = "proc-macro-hack" version = "0.5.19" @@ -1730,6 +1790,12 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + [[package]] name = "quote" version = "1.0.15" @@ -1739,6 +1805,19 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" +dependencies = [ + "fuchsia-cprng", + "libc", + "rand_core 0.3.1", + "rdrand", + "winapi", +] + [[package]] name = "rand" version = "0.7.3" @@ -1748,7 +1827,7 @@ dependencies = [ "getrandom 0.1.16", "libc", "rand_chacha", - "rand_core", + "rand_core 0.5.1", "rand_hc", "rand_pcg", ] @@ -1760,9 +1839,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.5.1", ] +[[package]] +name = "rand_core" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" +dependencies = [ + "rand_core 0.4.2", +] + +[[package]] +name = "rand_core" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" + [[package]] name = "rand_core" version = "0.5.1" @@ -1778,7 +1872,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" dependencies = [ - "rand_core", + "rand_core 0.5.1", ] [[package]] @@ -1787,7 +1881,16 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" dependencies = [ - "rand_core", + "rand_core 0.5.1", +] + +[[package]] +name = "rdrand" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" +dependencies = [ + "rand_core 0.3.1", ] [[package]] @@ -1902,6 +2005,18 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2cc38e8fa666e2de3c4aba7edeb5ffc5246c1c2ed0e3d17e560aeeba736b23f" +[[package]] +name = "rusty-fork" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" +dependencies = [ + "fnv", + "quick-error", + "tempfile", + "wait-timeout", +] + [[package]] name = "ryu" version = "1.0.9" @@ -2176,6 +2291,16 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "tempdir" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8" +dependencies = [ + "rand 0.4.6", + "remove_dir_all", +] + [[package]] name = "tempfile" version = "3.3.0" diff --git a/Cargo.toml b/Cargo.toml index abf7551..3e44961 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,6 +55,7 @@ tempfile = "3.3" httpmock = "0.6" assert_cmd = "2.0" predicates = "2.1" +assay = "0.1.0" [profile.release] lto = true diff --git a/src/heuristics.rs b/src/heuristics.rs index e999133..1a5fb25 100644 --- a/src/heuristics.rs +++ b/src/heuristics.rs @@ -407,6 +407,7 @@ impl HeuristicTests { #[cfg(test)] mod tests { use super::*; + use assay::assay; #[test] /// request a unique string of 32bytes * a value returns correct result @@ -417,4 +418,51 @@ mod tests { assert_eq!(tester.unique_string(i).len(), i * 32); } } + + #[assay] + /// `detect_directory_listing` correctly identifies tomcat/python instances + fn detect_directory_listing_finds_tomcat_python() { + let html = "<title>directory listing for /"; + let parsed = Html::parse_document(html); + let handles = Handles::for_testing(None, None); + let heuristics = HeuristicTests::new(Arc::new(handles.0)); + let dirlist_type = heuristics.detect_directory_listing(&parsed); + assert!(matches!( + dirlist_type.unwrap(), + DirListingType::TomCatOrPython + )); + } + + #[assay] + /// `detect_directory_listing` correctly identifies apache instances + fn detect_directory_listing_finds_apache() { + let html = "index of /"; + let parsed = Html::parse_document(html); + let handles = Handles::for_testing(None, None); + let heuristics = HeuristicTests::new(Arc::new(handles.0)); + let dirlist_type = heuristics.detect_directory_listing(&parsed); + assert!(matches!(dirlist_type.unwrap(), DirListingType::Apache)); + } + + #[assay] + /// `detect_directory_listing` correctly identifies ASP.NET instances + fn detect_directory_listing_finds_asp_dot_net() { + let html = "directory listing -- /"; + let parsed = Html::parse_document(html); + let handles = Handles::for_testing(None, None); + let heuristics = HeuristicTests::new(Arc::new(handles.0)); + let dirlist_type = heuristics.detect_directory_listing(&parsed); + assert!(matches!(dirlist_type.unwrap(), DirListingType::AspDotNet)); + } + + #[assay] + /// `detect_directory_listing` returns None when heuristic doesn't match + fn detect_directory_listing_returns_none_as_default() { + let html = "derp listing -- /"; + let parsed = Html::parse_document(html); + let handles = Handles::for_testing(None, None); + let heuristics = HeuristicTests::new(Arc::new(handles.0)); + let dirlist_type = heuristics.detect_directory_listing(&parsed); + assert!(dirlist_type.is_none()); + } } diff --git a/src/response.rs b/src/response.rs index 27938ac..e4084ef 100644 --- a/src/response.rs +++ b/src/response.rs @@ -294,6 +294,11 @@ impl FeroxResponse { // only add extensions to those responses that pass our checks; filtered out // status codes are handled by should_filter, but we need to still check against // the allow list for what we want to keep + #[cfg(test)] + handles + .send_scan_command(Command::AddDiscoveredExtension(extension.to_owned())) + .unwrap_or_default(); + #[cfg(not(test))] handles.send_scan_command(Command::AddDiscoveredExtension(extension.to_owned()))?; } } @@ -652,6 +657,7 @@ impl<'de> Deserialize<'de> for FeroxResponse { #[cfg(test)] mod tests { use super::*; + use crate::config::Configuration; use std::default::Default; #[test] @@ -723,4 +729,65 @@ mod tests { let result = response.reached_max_depth(0, 2, handles); assert!(result); } + + #[test] + /// simple case of a single extension gets parsed correctly and stored on the `FeroxResponse` + fn parse_extension_finds_simple_extension() { + let config = Configuration { + collect_extensions: true, + ..Default::default() + }; + + let (handles, _) = Handles::for_testing(None, Some(Arc::new(config))); + + let url = Url::parse("http://localhost/derp.js").unwrap(); + + let mut response = FeroxResponse { + url, + ..Default::default() + }; + + response.parse_extension(Arc::new(handles)).unwrap(); + + assert_eq!(response.extension, Some(String::from("js"))); + } + + #[test] + /// hidden files shouldn't be parsed as extensions, i.e. `/.bash_history` + fn parse_extension_ignores_hidden_files() { + let config = Configuration { + collect_extensions: true, + ..Default::default() + }; + + let (handles, _) = Handles::for_testing(None, Some(Arc::new(config))); + + let url = Url::parse("http://localhost/.bash_history").unwrap(); + + let mut response = FeroxResponse { + url, + ..Default::default() + }; + + response.parse_extension(Arc::new(handles)).unwrap(); + + assert_eq!(response.extension, None); + } + + #[test] + /// `parse_extension` should return immediately if `--collect-extensions` isn't used + fn parse_extension_early_returns_based_on_config() { + let (handles, _) = Handles::for_testing(None, None); + + let url = Url::parse("http://localhost/derp.js").unwrap(); + + let mut response = FeroxResponse { + url, + ..Default::default() + }; + + response.parse_extension(Arc::new(handles)).unwrap(); + + assert_eq!(response.extension, None); + } } diff --git a/tests/test_scanner.rs b/tests/test_scanner.rs index 76a1d3b..12a1ce7 100644 --- a/tests/test_scanner.rs +++ b/tests/test_scanner.rs @@ -1,8 +1,12 @@ mod utils; +use assay::assay; use assert_cmd::prelude::*; use httpmock::Method::GET; use httpmock::MockServer; use predicates::prelude::*; +use std::env::temp_dir; +use std::thread::sleep; +use std::time::Duration; use std::{process::Command, time}; use utils::{setup_tmp_directory, teardown_tmp_directory}; @@ -638,3 +642,43 @@ fn rate_limit_enforced_when_specified() { teardown_tmp_directory(tmp_dir); } + +#[assay] +/// ensure that auto-discovered extensions are tracked in statistics and bar lengths are updated +fn add_discovered_extension_updates_bars_and_stats() { + let srv = MockServer::start(); + let (tmp_dir, file) = setup_tmp_directory( + &["LICENSE".to_string(), "stuff.php".to_string()], + "wordlist", + ) + .unwrap(); + + let mock = srv.mock(|when, then| { + when.method(GET).path("/stuff.php"); + then.status(200).body("cool... coolcoolcool"); + }); + + let file_path = tmp_dir.path().join("debug-file.txt"); + + assert!(!file_path.exists()); + + Command::cargo_bin("feroxbuster")? + .arg("--url") + .arg(srv.url("/")) + .arg("--wordlist") + .arg(file.as_os_str()) + .arg("--extract-links") + .arg("--collect-extensions") + .arg("-vvvv") + .arg("--debug-log") + .arg(file_path.as_os_str()) + .unwrap() + .assert() + .success(); + + let contents = std::fs::read_to_string(file_path).unwrap(); + println!("{}", contents); + assert!(contents.contains("discovered new extension: php")); + assert!(contents.contains("extensions_collected: 1")); + assert!(contents.contains("expected_per_scan: 6")); +} From 3030296d1c0a46ac9470702d166acbdc6283168e Mon Sep 17 00:00:00 2001 From: epi Date: Tue, 15 Feb 2022 16:34:12 -0600 Subject: [PATCH 16/40] added more tests again --- src/heuristics.rs | 9 ++++---- src/scan_manager/scan_container.rs | 36 ++++++++++++++++++++++++++++++ tests/test_scanner.rs | 6 +---- 3 files changed, 41 insertions(+), 10 deletions(-) diff --git a/src/heuristics.rs b/src/heuristics.rs index 1a5fb25..37e899e 100644 --- a/src/heuristics.rs +++ b/src/heuristics.rs @@ -407,7 +407,6 @@ impl HeuristicTests { #[cfg(test)] mod tests { use super::*; - use assay::assay; #[test] /// request a unique string of 32bytes * a value returns correct result @@ -419,7 +418,7 @@ mod tests { } } - #[assay] + #[test] /// `detect_directory_listing` correctly identifies tomcat/python instances fn detect_directory_listing_finds_tomcat_python() { let html = "directory listing for /"; @@ -433,7 +432,7 @@ mod tests { )); } - #[assay] + #[test] /// `detect_directory_listing` correctly identifies apache instances fn detect_directory_listing_finds_apache() { let html = "index of /"; @@ -444,7 +443,7 @@ mod tests { assert!(matches!(dirlist_type.unwrap(), DirListingType::Apache)); } - #[assay] + #[test] /// `detect_directory_listing` correctly identifies ASP.NET instances fn detect_directory_listing_finds_asp_dot_net() { let html = "directory listing -- /"; @@ -455,7 +454,7 @@ mod tests { assert!(matches!(dirlist_type.unwrap(), DirListingType::AspDotNet)); } - #[assay] + #[test] /// `detect_directory_listing` returns None when heuristic doesn't match fn detect_directory_listing_returns_none_as_default() { let html = "derp listing -- /"; diff --git a/src/scan_manager/scan_container.rs b/src/scan_manager/scan_container.rs index fea984d..317b940 100644 --- a/src/scan_manager/scan_container.rs +++ b/src/scan_manager/scan_container.rs @@ -559,3 +559,39 @@ impl FeroxScans { extension_added } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + /// unknown extension should be added to collected_extensions + fn unknown_extension_is_added_to_collected_extensions() { + let scans = FeroxScans::new(OutputLevel::Default); + + assert_eq!(0, scans.collected_extensions.read().unwrap().len()); + + let added = scans.add_discovered_extension(String::from("js")); + + assert!(added); + assert_eq!(1, scans.collected_extensions.read().unwrap().len()); + } + + #[test] + /// known extension should not be added to collected_extensions + fn known_extension_is_added_to_collected_extensions() { + let scans = FeroxScans::new(OutputLevel::Default); + scans + .collected_extensions + .write() + .unwrap() + .insert(String::from("js")); + + assert_eq!(1, scans.collected_extensions.read().unwrap().len()); + + let added = scans.add_discovered_extension(String::from("js")); + + assert!(!added); + assert_eq!(1, scans.collected_extensions.read().unwrap().len()); + } +} diff --git a/tests/test_scanner.rs b/tests/test_scanner.rs index 12a1ce7..54db1ae 100644 --- a/tests/test_scanner.rs +++ b/tests/test_scanner.rs @@ -1,12 +1,8 @@ mod utils; -use assay::assay; use assert_cmd::prelude::*; use httpmock::Method::GET; use httpmock::MockServer; use predicates::prelude::*; -use std::env::temp_dir; -use std::thread::sleep; -use std::time::Duration; use std::{process::Command, time}; use utils::{setup_tmp_directory, teardown_tmp_directory}; @@ -643,7 +639,7 @@ fn rate_limit_enforced_when_specified() { teardown_tmp_directory(tmp_dir); } -#[assay] +#[test] /// ensure that auto-discovered extensions are tracked in statistics and bar lengths are updated fn add_discovered_extension_updates_bars_and_stats() { let srv = MockServer::start(); From 801413105dadeef7131df5d4065e44dffdc0f178 Mon Sep 17 00:00:00 2001 From: epi Date: Tue, 15 Feb 2022 16:35:16 -0600 Subject: [PATCH 17/40] clippy --- tests/test_scanner.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_scanner.rs b/tests/test_scanner.rs index 54db1ae..9c75fff 100644 --- a/tests/test_scanner.rs +++ b/tests/test_scanner.rs @@ -649,7 +649,7 @@ fn add_discovered_extension_updates_bars_and_stats() { ) .unwrap(); - let mock = srv.mock(|when, then| { + srv.mock(|when, then| { when.method(GET).path("/stuff.php"); then.status(200).body("cool... coolcoolcool"); }); @@ -658,7 +658,8 @@ fn add_discovered_extension_updates_bars_and_stats() { assert!(!file_path.exists()); - Command::cargo_bin("feroxbuster")? + Command::cargo_bin("feroxbuster") + .unwrap() .arg("--url") .arg(srv.url("/")) .arg("--wordlist") From b21ea9ce32117c3572998ea68ac67dbd46d98a0c Mon Sep 17 00:00:00 2001 From: epi Date: Tue, 15 Feb 2022 20:37:20 -0600 Subject: [PATCH 18/40] removed assay --- Cargo.lock | 139 +++-------------------------------------------------- Cargo.toml | 1 - 2 files changed, 7 insertions(+), 133 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3e25afc..03c2154 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,15 +11,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "ansi_term" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" -dependencies = [ - "winapi", -] - [[package]] name = "anyhow" version = "1.0.53" @@ -35,29 +26,6 @@ dependencies = [ "term", ] -[[package]] -name = "assay" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "238d82aacd5cfde8ccae5c981912be68ec3cfa2d92ff4ce34090be40584c96a6" -dependencies = [ - "assay-proc-macro", - "pretty_assertions", - "rusty-fork", - "tempdir", - "tokio", -] - -[[package]] -name = "assay-proc-macro" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b5fd637c6a75fe224b372556511913f12d6ad481fbfef2fb7ecea2f7cb4965d" -dependencies = [ - "quote", - "syn", -] - [[package]] name = "assert-json-diff" version = "2.0.1" @@ -706,7 +674,6 @@ name = "feroxbuster" version = "2.6.0" dependencies = [ "anyhow", - "assay", "assert_cmd", "clap", "clap_complete", @@ -785,12 +752,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "fuchsia-cprng" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" - [[package]] name = "futf" version = "0.1.5" @@ -1290,9 +1251,9 @@ checksum = "db13adb97ab515a3691f56e4dbab09283d0b86cb45abd991d8634a9d6f501760" [[package]] name = "libc" -version = "0.2.117" +version = "0.2.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e74d72e0f9b65b5b4ca49a346af3976df0f9c61d550727f349ecd559f251a26c" +checksum = "06e509672465a0504304aa87f9f176f2b2b716ed8fb105ebe5c02dc6dce96a94" [[package]] name = "libnghttp2-sys" @@ -1545,15 +1506,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "output_vt100" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "628223faebab4e3e40667ee0b2336d34a5b960ff60ea743ddfdbcf7770bcfb66" -dependencies = [ - "winapi", -] - [[package]] name = "parking" version = "2.0.0" @@ -1629,7 +1581,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" dependencies = [ "phf_shared 0.8.0", - "rand 0.7.3", + "rand", ] [[package]] @@ -1763,18 +1715,6 @@ dependencies = [ "termtree", ] -[[package]] -name = "pretty_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76d5b548b725018ab5496482b45cb8bef21e9fed1858a6d674e3a8a0f0bb5d50" -dependencies = [ - "ansi_term", - "ctor", - "diff", - "output_vt100", -] - [[package]] name = "proc-macro-hack" version = "0.5.19" @@ -1790,12 +1730,6 @@ dependencies = [ "unicode-xid", ] -[[package]] -name = "quick-error" -version = "1.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" - [[package]] name = "quote" version = "1.0.15" @@ -1805,19 +1739,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "rand" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" -dependencies = [ - "fuchsia-cprng", - "libc", - "rand_core 0.3.1", - "rdrand", - "winapi", -] - [[package]] name = "rand" version = "0.7.3" @@ -1827,7 +1748,7 @@ dependencies = [ "getrandom 0.1.16", "libc", "rand_chacha", - "rand_core 0.5.1", + "rand_core", "rand_hc", "rand_pcg", ] @@ -1839,24 +1760,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" dependencies = [ "ppv-lite86", - "rand_core 0.5.1", + "rand_core", ] -[[package]] -name = "rand_core" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" -dependencies = [ - "rand_core 0.4.2", -] - -[[package]] -name = "rand_core" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" - [[package]] name = "rand_core" version = "0.5.1" @@ -1872,7 +1778,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" dependencies = [ - "rand_core 0.5.1", + "rand_core", ] [[package]] @@ -1881,16 +1787,7 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" dependencies = [ - "rand_core 0.5.1", -] - -[[package]] -name = "rdrand" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" -dependencies = [ - "rand_core 0.3.1", + "rand_core", ] [[package]] @@ -2005,18 +1902,6 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2cc38e8fa666e2de3c4aba7edeb5ffc5246c1c2ed0e3d17e560aeeba736b23f" -[[package]] -name = "rusty-fork" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" -dependencies = [ - "fnv", - "quick-error", - "tempfile", - "wait-timeout", -] - [[package]] name = "ryu" version = "1.0.9" @@ -2291,16 +2176,6 @@ dependencies = [ "unicode-xid", ] -[[package]] -name = "tempdir" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8" -dependencies = [ - "rand 0.4.6", - "remove_dir_all", -] - [[package]] name = "tempfile" version = "3.3.0" diff --git a/Cargo.toml b/Cargo.toml index 3e44961..abf7551 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,7 +55,6 @@ tempfile = "3.3" httpmock = "0.6" assert_cmd = "2.0" predicates = "2.1" -assay = "0.1.0" [profile.release] lto = true From 3230f9c2768673f11ed645ef0e12bdf8f976a138 Mon Sep 17 00:00:00 2001 From: epi Date: Tue, 15 Feb 2022 20:42:15 -0600 Subject: [PATCH 19/40] removed client param from logged_request --- src/banner/container.rs | 2 +- src/extractor/container.rs | 2 +- src/filters/init.rs | 3 +-- src/heuristics.rs | 7 ++----- src/scanner/requester.rs | 1 - src/utils.rs | 3 +-- 6 files changed, 6 insertions(+), 12 deletions(-) diff --git a/src/banner/container.rs b/src/banner/container.rs index 262bf20..bbd1af9 100644 --- a/src/banner/container.rs +++ b/src/banner/container.rs @@ -448,7 +448,7 @@ by Ben "epi" Risher {} ver: {}"#, let api_url = Url::parse(url)?; - let result = logged_request(&api_url, DEFAULT_METHOD, None, handles.clone(), None).await?; + let result = logged_request(&api_url, DEFAULT_METHOD, None, handles.clone()).await?; let body = result.text().await?; let json_response: Value = serde_json::from_str(&body)?; diff --git a/src/extractor/container.rs b/src/extractor/container.rs index 02b767d..7d4b3ad 100644 --- a/src/extractor/container.rs +++ b/src/extractor/container.rs @@ -383,7 +383,7 @@ impl<'a> Extractor<'a> { // make the request and store the response let new_response = - logged_request(&new_url, DEFAULT_METHOD, None, self.handles.clone(), None).await?; + logged_request(&new_url, DEFAULT_METHOD, None, self.handles.clone()).await?; let new_ferox_response = FeroxResponse::from( new_response, diff --git a/src/filters/init.rs b/src/filters/init.rs index 41ccd48..13e6c44 100644 --- a/src/filters/init.rs +++ b/src/filters/init.rs @@ -72,8 +72,7 @@ pub async fn initialize(handles: Arc) -> Result<()> { let url = skip_fail!(Url::parse(similarity_filter)); // attempt to request the given url - let resp = - skip_fail!(logged_request(&url, DEFAULT_METHOD, None, handles.clone(), None).await); + let resp = skip_fail!(logged_request(&url, DEFAULT_METHOD, None, handles.clone()).await); // if successful, create a filter based on the response's body let mut fr = FeroxResponse::from( diff --git a/src/heuristics.rs b/src/heuristics.rs index 37e899e..a297200 100644 --- a/src/heuristics.rs +++ b/src/heuristics.rs @@ -219,7 +219,6 @@ impl HeuristicTests { method, data, self.handles.clone(), - None, ) .await?; @@ -280,8 +279,7 @@ impl HeuristicTests { let url = FeroxUrl::from_string(target_url, self.handles.clone()); let request = skip_fail!(url.format("", None)); - let result = - logged_request(&request, DEFAULT_METHOD, None, self.handles.clone(), None).await; + let result = logged_request(&request, DEFAULT_METHOD, None, self.handles.clone()).await; match result { Ok(_) => { @@ -335,8 +333,7 @@ impl HeuristicTests { let url = FeroxUrl::from_string(&tgt, self.handles.clone()); let request = url.format("", None)?; - let result = - logged_request(&request, DEFAULT_METHOD, None, self.handles.clone(), None).await?; + let result = logged_request(&request, DEFAULT_METHOD, None, self.handles.clone()).await?; let ferox_response = FeroxResponse::from( result, diff --git a/src/scanner/requester.rs b/src/scanner/requester.rs index a8cd629..4172392 100644 --- a/src/scanner/requester.rs +++ b/src/scanner/requester.rs @@ -337,7 +337,6 @@ impl Requester { method.as_str(), Some(self.handles.config.data.as_slice()), self.handles.clone(), - None, ) .await?; diff --git a/src/utils.rs b/src/utils.rs index 15264f3..e78c1ad 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -100,9 +100,8 @@ pub async fn logged_request( method: &str, data: Option<&[u8]>, handles: Arc, - client: Option<&Client>, ) -> Result { - let client = client.unwrap_or(&handles.config.client); + let client = &handles.config.client; let level = handles.config.output_level; let tx_stats = handles.stats.tx.clone(); From 7f0dcb6b46ac00c27dbff4913548f2d70b7ba8a0 Mon Sep 17 00:00:00 2001 From: epi Date: Tue, 15 Feb 2022 20:51:01 -0600 Subject: [PATCH 20/40] lint --- src/scanner/ferox_scanner.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/scanner/ferox_scanner.rs b/src/scanner/ferox_scanner.rs index 8320198..3c1d214 100644 --- a/src/scanner/ferox_scanner.rs +++ b/src/scanner/ferox_scanner.rs @@ -75,7 +75,6 @@ impl FeroxScanner { log::info!("Starting scan against: {}", self.target_url); let mut scan_timer = Instant::now(); - // let mut dirlist_type = None; if self.handles.config.extract_links && matches!(self.order, ScanOrder::Initial) { // check for robots.txt (cannot be in sub-directories, so limited to Initial) From 8d639a17e4f06b8e992828446863ad9f92fbb364 Mon Sep 17 00:00:00 2001 From: epi Date: Tue, 15 Feb 2022 21:03:02 -0600 Subject: [PATCH 21/40] removed read_body param from FeroxResponse::from --- src/extractor/container.rs | 4 +--- src/extractor/tests.rs | 10 ++-------- src/filters/init.rs | 1 - src/heuristics.rs | 4 +--- src/response.rs | 22 ++++++---------------- src/scanner/requester.rs | 1 - src/utils.rs | 14 -------------- 7 files changed, 10 insertions(+), 46 deletions(-) diff --git a/src/extractor/container.rs b/src/extractor/container.rs index 7d4b3ad..a696f4b 100644 --- a/src/extractor/container.rs +++ b/src/extractor/container.rs @@ -12,7 +12,7 @@ use crate::{ StatField::{LinksExtracted, TotalExpected}, }, url::FeroxUrl, - utils::{logged_request, make_request, should_deny_url, should_read_body}, + utils::{logged_request, make_request, should_deny_url}, ExtractionResult, DEFAULT_METHOD, }; use anyhow::{bail, Context, Result}; @@ -389,7 +389,6 @@ impl<'a> Extractor<'a> { new_response, url, DEFAULT_METHOD, - should_read_body(&self.handles.config), self.handles.config.output_level, ) .await; @@ -565,7 +564,6 @@ impl<'a> Extractor<'a> { response, &self.url, DEFAULT_METHOD, - should_read_body(&self.handles.config), self.handles.config.output_level, ) .await; diff --git a/src/extractor/tests.rs b/src/extractor/tests.rs index 2706916..3ef95c6 100644 --- a/src/extractor/tests.rs +++ b/src/extractor/tests.rs @@ -240,14 +240,8 @@ async fn extractor_get_links_with_absolute_url_that_differs_from_target_domain() let (handles, _rx) = Handles::for_testing(None, None); let handles = Arc::new(handles); - let ferox_response = FeroxResponse::from( - response, - &srv.url(""), - DEFAULT_METHOD, - true, - OutputLevel::Default, - ) - .await; + let ferox_response = + FeroxResponse::from(response, &srv.url(""), DEFAULT_METHOD, OutputLevel::Default).await; let extractor = Extractor { links_regex: Regex::new(LINKFINDER_REGEX).unwrap(), diff --git a/src/filters/init.rs b/src/filters/init.rs index 13e6c44..604b552 100644 --- a/src/filters/init.rs +++ b/src/filters/init.rs @@ -79,7 +79,6 @@ pub async fn initialize(handles: Arc) -> Result<()> { resp, similarity_filter, DEFAULT_METHOD, - true, handles.config.output_level, ) .await; diff --git a/src/heuristics.rs b/src/heuristics.rs index a297200..706a76e 100644 --- a/src/heuristics.rs +++ b/src/heuristics.rs @@ -13,7 +13,7 @@ use crate::{ response::FeroxResponse, skip_fail, url::FeroxUrl, - utils::{ferox_print, fmt_err, logged_request, should_read_body, status_colorizer}, + utils::{ferox_print, fmt_err, logged_request, status_colorizer}, DEFAULT_METHOD, }; @@ -234,7 +234,6 @@ impl HeuristicTests { response, &target.target, method, - should_read_body(&self.handles.config), self.handles.config.output_level, ) .await; @@ -339,7 +338,6 @@ impl HeuristicTests { result, &url.target, DEFAULT_METHOD, - true, self.handles.config.output_level, ) .await; diff --git a/src/response.rs b/src/response.rs index e4084ef..6b08b00 100644 --- a/src/response.rs +++ b/src/response.rs @@ -209,7 +209,6 @@ impl FeroxResponse { response: Response, original_url: &str, method: &str, - read_body: bool, output_level: OutputLevel, ) -> Self { let url = response.url().clone(); @@ -217,21 +216,12 @@ impl FeroxResponse { let headers = response.headers().clone(); let content_length = response.content_length().unwrap_or(0); - let text = if read_body { - // .text() consumes the response, must be called last - // additionally, --extract-links is currently the only place we use the body of the - // response, so we forego the processing if not performing extraction - match response.text().await { - // await the response's body - Ok(text) => text, - Err(e) => { - log::warn!("Could not parse body from response: {}", e); - String::new() - } - } - } else { - String::new() - }; + // .text() consumes the response, must be called last + let text = response + .text() + .await + .with_context(|| "Could not parse body from response") + .unwrap_or_default(); let line_count = text.lines().count(); let word_count = text.lines().map(|s| s.split_whitespace().count()).sum(); diff --git a/src/scanner/requester.rs b/src/scanner/requester.rs index 4172392..43aa615 100644 --- a/src/scanner/requester.rs +++ b/src/scanner/requester.rs @@ -366,7 +366,6 @@ impl Requester { response, &self.target_url, method, - true, // lines/words never gets populated without true self.handles.config.output_level, ) .await; diff --git a/src/utils.rs b/src/utils.rs index e78c1ad..aa97907 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -490,20 +490,6 @@ pub fn slugify_filename(url: &str, prefix: &str, suffix: &str) -> String { filename } -/// check for runtime options that necessitate reading the response body -pub fn should_read_body(config: &Configuration) -> bool { - log::trace!("enter: should_read_body(running config...)"); - - let result = config.extract_links - || !config.filter_line_count.is_empty() - || !config.filter_word_count.is_empty() - || !config.filter_regex.is_empty() - || !config.filter_similar.is_empty(); - - log::trace!("exit: should_read_body -> {}", result); - result -} - #[cfg(test)] mod tests { use super::*; From d13bce226178fca007fc418dc8880d903158678e Mon Sep 17 00:00:00 2001 From: epi Date: Wed, 16 Feb 2022 17:16:12 -0600 Subject: [PATCH 22/40] implemented/tested logic for collecting backups --- src/event_handlers/outputs.rs | 274 +++++++++++++++++++++++++++------- tests/test_scanner.rs | 88 +++++++++++ 2 files changed, 312 insertions(+), 50 deletions(-) diff --git a/src/event_handlers/outputs.rs b/src/event_handlers/outputs.rs index a855712..3c8cbc5 100644 --- a/src/event_handlers/outputs.rs +++ b/src/event_handlers/outputs.rs @@ -2,11 +2,13 @@ use super::Command::AddToUsizeField; use super::*; use anyhow::{Context, Result}; +use futures::future::{BoxFuture, FutureExt}; use tokio::sync::{mpsc, oneshot}; use crate::{ config::Configuration, progress::PROGRESS_PRINTER, + response::FeroxResponse, scanner::RESPONSES, send_command, skip_fail, statistics::StatField::ResourcesDiscovered, @@ -15,6 +17,7 @@ use crate::{ CommandReceiver, CommandSender, Joiner, }; use std::sync::Arc; +use url::Url; #[derive(Debug)] /// Container for terminal output transmitter @@ -185,56 +188,9 @@ impl TermOutHandler { while let Some(command) = self.receiver.recv().await { match command { - Command::Report(mut resp) => { - let contains_sentry = - self.config.status_codes.contains(&resp.status().as_u16()); - let unknown_sentry = !RESPONSES.contains(&resp); // !contains == unknown - let should_process_response = contains_sentry && unknown_sentry; - - if should_process_response { - // print to stdout - ferox_print(&resp.as_str(), &PROGRESS_PRINTER); - - send_command!(tx_stats, AddToUsizeField(ResourcesDiscovered, 1)); - - if self.file_task.is_some() { - // -o used, need to send the report to be written out to disk - self.tx_file - .send(Command::Report(resp.clone())) - .with_context(|| { - fmt_err(&format!("Could not send {} to file handler", resp)) - })?; - } - } - log::trace!("report complete: {}", resp.url()); - - if self.config.replay_client.is_some() && should_process_response { - // replay proxy specified/client created and this response's status code is one that - // should be replayed; not using logged_request due to replay proxy client - make_request( - self.config.replay_client.as_ref().unwrap(), - resp.url(), - resp.method().as_str(), - None, - self.config.output_level, - &self.config, - tx_stats.clone(), - ) - .await - .with_context(|| "Could not replay request through replay proxy")?; - } - - if should_process_response { - // add response to RESPONSES for serialization in case of ctrl+c - // placed all by its lonesome like this so that RESPONSES can take ownership - // of the FeroxResponse - - // before ownership is transferred, there's no real reason to keep the body anymore - // so we can free that piece of data, reducing memory usage - resp.drop_text(); - - RESPONSES.insert(*resp); - } + Command::Report(resp) => { + // todo add enum to replace bool + self.process_response(tx_stats.clone(), resp, false).await?; } Command::Sync(sender) => { sender.send(true).unwrap_or_default(); @@ -251,6 +207,139 @@ impl TermOutHandler { log::trace!("exit: start"); Ok(()) } + + /// todo + fn process_response( + &self, + tx_stats: CommandSender, + mut resp: Box, + recursive_call: bool, + ) -> BoxFuture<'_, Result<()>> { + async move { + let contains_sentry = self.config.status_codes.contains(&resp.status().as_u16()); + let unknown_sentry = !RESPONSES.contains(&resp); // !contains == unknown + let should_process_response = contains_sentry && unknown_sentry; + + if should_process_response { + // print to stdout + ferox_print(&resp.as_str(), &PROGRESS_PRINTER); + + send_command!(tx_stats, AddToUsizeField(ResourcesDiscovered, 1)); + + if self.file_task.is_some() { + // -o used, need to send the report to be written out to disk + self.tx_file + .send(Command::Report(resp.clone())) + .with_context(|| { + fmt_err(&format!("Could not send {} to file handler", resp)) + })?; + } + } + log::trace!("report complete: {}", resp.url()); + + if self.config.replay_client.is_some() && should_process_response { + // replay proxy specified/client created and this response's status code is one that + // should be replayed; not using logged_request due to replay proxy client + make_request( + self.config.replay_client.as_ref().unwrap(), + resp.url(), + resp.method().as_str(), + None, + self.config.output_level, + &self.config, + tx_stats.clone(), + ) + .await + .with_context(|| "Could not replay request through replay proxy")?; + } + + // todo update if statement to include --collect-backups + if should_process_response && !recursive_call { + let backup_urls = self.generate_backup_urls(&resp).await; + for backup_url in &backup_urls { + let backup_response = make_request( + &self.config.client, + backup_url, + resp.method().as_str(), + None, + self.config.output_level, + &self.config, + tx_stats.clone(), + ) + .await + .with_context(|| { + format!("Could not request backup of {}", resp.url().as_str()) + })?; + let mut ferox_response = FeroxResponse::from( + backup_response, + resp.url().as_str(), + resp.method().as_str(), + resp.output_level, + ) + .await; + self.process_response(tx_stats.clone(), Box::new(ferox_response), true) + .await?; + } + } + + if should_process_response { + // add response to RESPONSES for serialization in case of ctrl+c + // placed all by its lonesome like this so that RESPONSES can take ownership + // of the FeroxResponse + + // before ownership is transferred, there's no real reason to keep the body anymore + // so we can free that piece of data, reducing memory usage + resp.drop_text(); + + RESPONSES.insert(*resp); + } + log::trace!("exit: process_response"); + Ok(()) + } + .boxed() + } + + /// internal helper to stay DRY + fn add_new_url_to_vec(&self, url: &Url, new_name: &str, urls: &mut Vec) { + let mut new_url = url.clone(); + new_url.set_path(&new_name); + urls.push(new_url); + } + + /// todo + async fn generate_backup_urls(&self, response: &FeroxResponse) -> Vec { + // todo + let mut urls = vec![]; + let url = response.url(); + + // confirmed safe: see src/response.rs for comments + let filename = url.path_segments().unwrap().last().unwrap(); + + if !filename.is_empty() { + // append rules + for suffix in ["~", ".bak", ".bak2", ".old", ".1"] { + self.add_new_url_to_vec(&url, &format!("{}{}", filename, suffix), &mut urls); + } + + // vim swap rule + self.add_new_url_to_vec(&url, &format!(".{}.swp", filename), &mut urls); + + // replace original extension rule + let parts: Vec<_> = filename + .split('.') + // keep things like /.bash_history out of results + .filter(|part| !part.is_empty()) + .collect(); + + if parts.len() > 1 { + // filename + at least one extension, i.e. whatever.js becomes ["whatever", "js"] + self.add_new_url_to_vec(url, &format!("{}.bak", parts.first().unwrap()), &mut urls); + } + } + + // todo + urls + } } #[cfg(test)] @@ -286,4 +375,89 @@ mod tests { println!("{:?}", toh); tx.send(Command::Exit).unwrap(); } + + #[tokio::test(flavor = "multi_thread", worker_threads = 1)] + /// when the feroxresponse's url contains an extension, there should be 7 urls returned + async fn generate_backup_urls_creates_correct_urls_when_extension_present() { + let (tx, rx) = mpsc::unbounded_channel::(); + let (tx_file, _) = mpsc::unbounded_channel::(); + let config = Arc::new(Configuration::new().unwrap()); + + let toh = TermOutHandler { + config, + file_task: None, + receiver: rx, + tx_file, + }; + + let expected: Vec<_> = vec![ + "derp.php~", + "derp.php.bak", + "derp.php.bak2", + "derp.php.old", + "derp.php.1", + ".derp.php.swp", + "derp.bak", + ]; + + let mut fr = FeroxResponse::default(); + fr.set_url("http://localhost/derp.php"); + + let urls = toh.generate_backup_urls(&fr).await; + + let paths: Vec<_> = urls + .iter() + .map(|url| url.path_segments().unwrap().last().unwrap()) + .collect(); + + assert_eq!(urls.len(), 7); + + for path in paths { + assert!(expected.contains(&path)); + } + + tx.send(Command::Exit).unwrap(); + } + + #[tokio::test(flavor = "multi_thread", worker_threads = 1)] + /// when the feroxresponse's url doesn't contain an extension, there should be 6 urls returned + async fn generate_backup_urls_creates_correct_urls_when_extension_not_present() { + let (tx, rx) = mpsc::unbounded_channel::(); + let (tx_file, _) = mpsc::unbounded_channel::(); + let config = Arc::new(Configuration::new().unwrap()); + + let toh = TermOutHandler { + config, + file_task: None, + receiver: rx, + tx_file, + }; + + let expected: Vec<_> = vec![ + "derp~", + "derp.bak", + "derp.bak2", + "derp.old", + "derp.1", + ".derp.swp", + ]; + + let mut fr = FeroxResponse::default(); + fr.set_url("http://localhost/derp"); + + let urls = toh.generate_backup_urls(&fr).await; + + let paths: Vec<_> = urls + .iter() + .map(|url| url.path_segments().unwrap().last().unwrap()) + .collect(); + + assert_eq!(urls.len(), 6); + + for path in paths { + assert!(expected.contains(&path)); + } + + tx.send(Command::Exit).unwrap(); + } } diff --git a/tests/test_scanner.rs b/tests/test_scanner.rs index 9c75fff..cdef22a 100644 --- a/tests/test_scanner.rs +++ b/tests/test_scanner.rs @@ -679,3 +679,91 @@ fn add_discovered_extension_updates_bars_and_stats() { assert!(contents.contains("extensions_collected: 1")); assert!(contents.contains("expected_per_scan: 6")); } + +#[test] +/// send a request to a 200 file, expect pre-configured backup collection rules to be applied +/// and then requested +fn collect_backups_makes_appropriate_requests() { + let srv = MockServer::start(); + let (tmp_dir, file) = setup_tmp_directory(&["LICENSE.txt".to_string()], "wordlist").unwrap(); + + let mock = srv.mock(|when, then| { + when.method(GET).path("/LICENSE.txt"); + then.status(200).body("this is a test"); + }); + + let tilde_backup = srv.mock(|when, then| { + when.method(GET).path("/LICENSE.txt~"); + then.status(200); + }); + + let bak_backup = srv.mock(|when, then| { + when.method(GET).path("/LICENSE.txt.bak"); + then.status(200); + }); + + let bak2_backup = srv.mock(|when, then| { + when.method(GET).path("/LICENSE.txt.bak2"); + then.status(200); + }); + + let old_backup = srv.mock(|when, then| { + when.method(GET).path("/LICENSE.txt.old"); + then.status(200); + }); + + let dot1_backup = srv.mock(|when, then| { + when.method(GET).path("/LICENSE.txt.1"); + then.status(200); + }); + + let replaced_bak_backup = srv.mock(|when, then| { + when.method(GET).path("/LICENSE.bak"); + then.status(200); + }); + + let vim_swap_backup = srv.mock(|when, then| { + when.method(GET).path("/.LICENSE.txt.swp"); + then.status(200); + }); + + // todo add double backup style tests for all variants + let tilde_double_backup = srv.mock(|when, then| { + when.method(GET).path("/LICENSE.txt~~"); + then.status(404); + }); + + // todo add --collect-backups flag when available + let cmd = Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg(srv.url("/")) + .arg("--wordlist") + .arg(file.as_os_str()) + .unwrap(); + + // todo maybe add in some stdout checks + cmd.assert().success().stdout( + predicate::str::contains("/LICENSE.txt").and(predicate::str::contains("/LICENSE.txt~")), + ); + // .and(predicate::str::contains("403")) + // .and(predicate::str::contains("53c")) + // .and(predicate::str::contains("14c")) + // .and(predicate::str::contains("0c")) + // .and(predicate::str::contains("ignored").count(2)) + // .and(predicate::str::contains("/ignored/LICENSE")), + // ); + + assert_eq!(mock.hits(), 1); + assert_eq!(tilde_backup.hits(), 1); + assert_eq!(tilde_double_backup.hits(), 0); // shouldn't request backups of backups + + assert_eq!(bak_backup.hits(), 1); + assert_eq!(bak2_backup.hits(), 1); + assert_eq!(old_backup.hits(), 1); + assert_eq!(dot1_backup.hits(), 1); + assert_eq!(replaced_bak_backup.hits(), 1); + assert_eq!(vim_swap_backup.hits(), 1); + + teardown_tmp_directory(tmp_dir); +} From 368035833c70e01b0181d358e8f79d0250a6d8b0 Mon Sep 17 00:00:00 2001 From: epi Date: Wed, 16 Feb 2022 20:44:07 -0600 Subject: [PATCH 23/40] fixed up implementation/removed todo items --- src/event_handlers/outputs.rs | 59 ++++++++++++++---- tests/test_scanner.rs | 110 ++++++++++++++++------------------ 2 files changed, 98 insertions(+), 71 deletions(-) diff --git a/src/event_handlers/outputs.rs b/src/event_handlers/outputs.rs index 3c8cbc5..d040574 100644 --- a/src/event_handlers/outputs.rs +++ b/src/event_handlers/outputs.rs @@ -5,6 +5,7 @@ use anyhow::{Context, Result}; use futures::future::{BoxFuture, FutureExt}; use tokio::sync::{mpsc, oneshot}; +use crate::statistics::StatField::TotalExpected; use crate::{ config::Configuration, progress::PROGRESS_PRINTER, @@ -19,6 +20,16 @@ use crate::{ use std::sync::Arc; use url::Url; +#[derive(Debug, Copy)] +/// Simple enum for semantic clarity around calling expectations for `process_response` +enum ProcessResponseCall { + /// call should allow recursion + Recursive, + + /// call should not allow recursion + NotRecursive, +} + #[derive(Debug)] /// Container for terminal output transmitter pub struct TermOutHandle { @@ -189,8 +200,8 @@ impl TermOutHandler { while let Some(command) = self.receiver.recv().await { match command { Command::Report(resp) => { - // todo add enum to replace bool - self.process_response(tx_stats.clone(), resp, false).await?; + self.process_response(tx_stats.clone(), resp, ProcessResponseCall::Recursive) + .await?; } Command::Sync(sender) => { sender.send(true).unwrap_or_default(); @@ -208,13 +219,16 @@ impl TermOutHandler { Ok(()) } - /// todo + /// upon receiving a `FeroxResponse` from the mpsc, handle printing, sending to the replay + /// proxy, checking for backups of the `FeroxResponse`'s url, and tracking the response. fn process_response( &self, tx_stats: CommandSender, mut resp: Box, - recursive_call: bool, + call_type: ProcessResponseCall, ) -> BoxFuture<'_, Result<()>> { + log::trace!("enter: generate_backup_urls({:?})", response); + async move { let contains_sentry = self.config.status_codes.contains(&resp.status().as_u16()); let unknown_sentry = !RESPONSES.contains(&resp); // !contains == unknown @@ -254,8 +268,12 @@ impl TermOutHandler { } // todo update if statement to include --collect-backups - if should_process_response && !recursive_call { + if should_process_response && matches!(call_type, ProcessResponseCall::Recursive) { let backup_urls = self.generate_backup_urls(&resp).await; + + // need to manually adjust stats + send_command!(tx_stats, AddToUsizeField(TotalExpected, backup_urls.len())); + for backup_url in &backup_urls { let backup_response = make_request( &self.config.client, @@ -270,15 +288,21 @@ impl TermOutHandler { .with_context(|| { format!("Could not request backup of {}", resp.url().as_str()) })?; - let mut ferox_response = FeroxResponse::from( + + let ferox_response = FeroxResponse::from( backup_response, resp.url().as_str(), resp.method().as_str(), resp.output_level, ) .await; - self.process_response(tx_stats.clone(), Box::new(ferox_response), true) - .await?; + + self.process_response( + tx_stats.clone(), + Box::new(ferox_response), + ProcessResponseCall::NotRecursive, + ) + .await?; } } @@ -306,9 +330,22 @@ impl TermOutHandler { urls.push(new_url); } - /// todo + /// given a `FeroxResponse`, generate either 6 or 7 urls that are likely backups of the + /// original. + /// + /// example: + /// original: LICENSE.txt + /// backups: + /// - LICENSE.txt~ + /// - LICENSE.txt.bak + /// - LICENSE.txt.bak2 + /// - LICENSE.txt.old + /// - LICENSE.txt.1 + /// - LICENSE.bak + /// - .LICENSE.txt.swp async fn generate_backup_urls(&self, response: &FeroxResponse) -> Vec { - // todo + log::trace!("enter: generate_backup_urls({:?})", response); + let mut urls = vec![]; let url = response.url(); @@ -337,7 +374,7 @@ impl TermOutHandler { } } - // todo + log::trace!("exit: generate_backup_urls -> {:?}", urls); urls } } diff --git a/tests/test_scanner.rs b/tests/test_scanner.rs index cdef22a..c06919f 100644 --- a/tests/test_scanner.rs +++ b/tests/test_scanner.rs @@ -687,51 +687,45 @@ fn collect_backups_makes_appropriate_requests() { let srv = MockServer::start(); let (tmp_dir, file) = setup_tmp_directory(&["LICENSE.txt".to_string()], "wordlist").unwrap(); - let mock = srv.mock(|when, then| { - when.method(GET).path("/LICENSE.txt"); - then.status(200).body("this is a test"); - }); + let valid_paths = vec![ + "/LICENSE.txt", + "/LICENSE.txt~", + "/LICENSE.txt.bak", + "/LICENSE.txt.bak2", + "/LICENSE.txt.old", + "/LICENSE.txt.1", + "/LICENSE.bak", + "/.LICENSE.txt.swp", + ]; - let tilde_backup = srv.mock(|when, then| { - when.method(GET).path("/LICENSE.txt~"); - then.status(200); - }); + let valid_mocks: Vec<_> = valid_paths + .iter() + .map(|&p| { + srv.mock(|when, then| { + when.method(GET).path(p); + then.status(200).body("this is a valid test"); + }) + }) + .collect(); - let bak_backup = srv.mock(|when, then| { - when.method(GET).path("/LICENSE.txt.bak"); - then.status(200); - }); + let invalid_paths: Vec<_> = vec![ + "/LICENSE.txt~~", + "/LICENSE.txt.bak.bak", + "/LICENSE.txt.bak2.bak2", + "/LICENSE.txt.old.old", + "/LICENSE.txt.1.1", + "/..LICENSE.txt.swp.swp", + ]; - let bak2_backup = srv.mock(|when, then| { - when.method(GET).path("/LICENSE.txt.bak2"); - then.status(200); - }); - - let old_backup = srv.mock(|when, then| { - when.method(GET).path("/LICENSE.txt.old"); - then.status(200); - }); - - let dot1_backup = srv.mock(|when, then| { - when.method(GET).path("/LICENSE.txt.1"); - then.status(200); - }); - - let replaced_bak_backup = srv.mock(|when, then| { - when.method(GET).path("/LICENSE.bak"); - then.status(200); - }); - - let vim_swap_backup = srv.mock(|when, then| { - when.method(GET).path("/.LICENSE.txt.swp"); - then.status(200); - }); - - // todo add double backup style tests for all variants - let tilde_double_backup = srv.mock(|when, then| { - when.method(GET).path("/LICENSE.txt~~"); - then.status(404); - }); + let invalid_mocks: Vec<_> = invalid_paths + .iter() + .map(|&p| { + srv.mock(|when, then| { + when.method(GET).path(p); + then.status(200).body("this is an invalid test"); + }) + }) + .collect(); // todo add --collect-backups flag when available let cmd = Command::cargo_bin("feroxbuster") @@ -742,28 +736,24 @@ fn collect_backups_makes_appropriate_requests() { .arg(file.as_os_str()) .unwrap(); - // todo maybe add in some stdout checks cmd.assert().success().stdout( - predicate::str::contains("/LICENSE.txt").and(predicate::str::contains("/LICENSE.txt~")), + predicate::str::contains("/LICENSE.txt") + .and(predicate::str::contains("/LICENSE.txt~")) + .and(predicate::str::contains("/LICENSE.txt.bak")) + .and(predicate::str::contains("/LICENSE.txt.bak2")) + .and(predicate::str::contains("/LICENSE.txt.old")) + .and(predicate::str::contains("/LICENSE.txt.1")) + .and(predicate::str::contains("/LICENSE.bak")) + .and(predicate::str::contains("/.LICENSE.txt.swp")), ); - // .and(predicate::str::contains("403")) - // .and(predicate::str::contains("53c")) - // .and(predicate::str::contains("14c")) - // .and(predicate::str::contains("0c")) - // .and(predicate::str::contains("ignored").count(2)) - // .and(predicate::str::contains("/ignored/LICENSE")), - // ); - assert_eq!(mock.hits(), 1); - assert_eq!(tilde_backup.hits(), 1); - assert_eq!(tilde_double_backup.hits(), 0); // shouldn't request backups of backups + for valid_mock in valid_mocks { + assert_eq!(valid_mock.hits(), 1); + } - assert_eq!(bak_backup.hits(), 1); - assert_eq!(bak2_backup.hits(), 1); - assert_eq!(old_backup.hits(), 1); - assert_eq!(dot1_backup.hits(), 1); - assert_eq!(replaced_bak_backup.hits(), 1); - assert_eq!(vim_swap_backup.hits(), 1); + for invalid_mock in invalid_mocks { + assert_eq!(invalid_mock.hits(), 0); + } teardown_tmp_directory(tmp_dir); } From 02448e9834a9eb8243b10fd2264fa79eac7d8a2b Mon Sep 17 00:00:00 2001 From: epi Date: Wed, 16 Feb 2022 21:25:20 -0600 Subject: [PATCH 24/40] dirlist extraction gated behind -e --- src/event_handlers/outputs.rs | 4 +-- src/heuristics.rs | 20 +++++++++++++- src/scanner/ferox_scanner.rs | 51 +++++++++++++++++++---------------- 3 files changed, 49 insertions(+), 26 deletions(-) diff --git a/src/event_handlers/outputs.rs b/src/event_handlers/outputs.rs index d040574..e7f124d 100644 --- a/src/event_handlers/outputs.rs +++ b/src/event_handlers/outputs.rs @@ -20,7 +20,7 @@ use crate::{ use std::sync::Arc; use url::Url; -#[derive(Debug, Copy)] +#[derive(Debug, Copy, Clone)] /// Simple enum for semantic clarity around calling expectations for `process_response` enum ProcessResponseCall { /// call should allow recursion @@ -227,7 +227,7 @@ impl TermOutHandler { mut resp: Box, call_type: ProcessResponseCall, ) -> BoxFuture<'_, Result<()>> { - log::trace!("enter: generate_backup_urls({:?})", response); + log::trace!("enter: process_response({:?}, {:?})", resp, call_type); async move { let contains_sentry = self.config.status_codes.contains(&resp.status().as_u16()); diff --git a/src/heuristics.rs b/src/heuristics.rs index 706a76e..9b59569 100644 --- a/src/heuristics.rs +++ b/src/heuristics.rs @@ -348,7 +348,25 @@ impl HeuristicTests { let dirlist_type = self.detect_directory_listing(&html); if dirlist_type.is_some() { - log::debug!("directory listing heuristic detected: {:?}", dirlist_type); + // todo give some extra consideration to what colors should be used + // todo if this needs to go to a file, has to be sent to the handler + let msg = format!( + "{} {:>8} {:>9} {:>9} {:>9} {} => {}\n", + style("DIR").bright().blue(), + DEFAULT_METHOD, + "-", + "-", + "-", + ferox_response.url().as_str(), + style("Directory listing").green().bright(), + ); + ferox_print(&msg, &PROGRESS_PRINTER); + + log::info!( + "directory listing detected: {} ({:?})", + target_url, + dirlist_type.unwrap() + ); let result = DirListingResult { dir_list_type: dirlist_type, diff --git a/src/scanner/ferox_scanner.rs b/src/scanner/ferox_scanner.rs index 3c1d214..d41c524 100644 --- a/src/scanner/ferox_scanner.rs +++ b/src/scanner/ferox_scanner.rs @@ -129,37 +129,42 @@ impl FeroxScanner { let dirlist_result = dirlist_result.unwrap(); // at this point, we have a DirListingType, and it's not the None variant // which means we found directory listing based on the heuristic; now we need - // to process the links that are available - // Directory listing heuristic detection to not continue scanning + // to process the links that are available if --extract-links was used - let mut extractor = ExtractorBuilder::default() - .response(&dirlist_result.response) - .target(ExtractionTarget::DirectoryListing) - .url(&self.target_url) - .handles(self.handles.clone()) - .build()?; + if self.handles.config.extract_links { + let mut extractor = ExtractorBuilder::default() + .response(&dirlist_result.response) + .target(ExtractionTarget::DirectoryListing) + .url(&self.target_url) + .handles(self.handles.clone()) + .build()?; - let result = extractor.extract_from_dir_listing().await?; + let result = extractor.extract_from_dir_listing().await?; - extractor.request_links(result).await?; + extractor.request_links(result).await?; - log::trace!("exit: scan_url -> Directory listing heuristic"); + log::trace!("exit: scan_url -> Directory listing heuristic"); - self.handles.stats.send(AddToF64Field( - DirScanTimes, - scan_timer.elapsed().as_secs_f64(), - ))?; + self.handles.stats.send(AddToF64Field( + DirScanTimes, + scan_timer.elapsed().as_secs_f64(), + ))?; - self.handles.stats.send(SubtractFromUsizeField( - TotalExpected, - progress_bar.length() as usize, - ))?; + self.handles.stats.send(SubtractFromUsizeField( + TotalExpected, + progress_bar.length() as usize, + ))?; + } + + let mut message = format!("=> {}", style("Directory listing").blue().bright()); + + if !self.handles.config.extract_links { + message + .push_str(&format!(" (add {} to scan)", style("-e").bright().yellow())) + } progress_bar.reset_eta(); - progress_bar.finish_with_message(&format!( - "=> {}", - style("Directory listing").blue().bright() - )); + progress_bar.finish_with_message(&message); ferox_scan.finish()?; From 44693a3498e3912fef0fdcde5919c2305191984a Mon Sep 17 00:00:00 2001 From: epi Date: Thu, 17 Feb 2022 19:44:29 -0600 Subject: [PATCH 25/40] added cli/banner/tests etc... --- ferox-config.toml.example | 1 + shell_completions/_feroxbuster | 4 +++- shell_completions/_feroxbuster.ps1 | 4 +++- shell_completions/feroxbuster.bash | 2 +- shell_completions/feroxbuster.elv | 4 +++- src/banner/container.rs | 10 +++++++++ src/config/container.rs | 11 ++++++++++ src/config/tests.rs | 9 ++++++++ src/event_handlers/command.rs | 4 ++++ src/event_handlers/outputs.rs | 14 ++++++++++-- src/heuristics.rs | 34 +++++++++++++++++------------- src/message.rs | 6 +++--- src/parser.rs | 13 +++++++++--- src/scan_manager/tests.rs | 1 + tests/test_banner.rs | 31 +++++++++++++++++++++++++-- tests/test_scanner.rs | 2 +- 16 files changed, 120 insertions(+), 30 deletions(-) diff --git a/ferox-config.toml.example b/ferox-config.toml.example index b0cf807..378625c 100644 --- a/ferox-config.toml.example +++ b/ferox-config.toml.example @@ -30,6 +30,7 @@ # random_agent = false # redirects = true # insecure = true +# collect_backups = true # collect_extensions = true # extensions = ["php", "html"] # dont_collect = ["png", "gif", "jpg", "jpeg"] diff --git a/shell_completions/_feroxbuster b/shell_completions/_feroxbuster index 412c5f7..4cfba6c 100644 --- a/shell_completions/_feroxbuster +++ b/shell_completions/_feroxbuster @@ -90,8 +90,10 @@ _feroxbuster() { '--auto-bail[Automatically stop scanning when an excessive amount of errors are encountered]' \ '-D[Don'\''t auto-filter wildcard responses]' \ '--dont-filter[Don'\''t auto-filter wildcard responses]' \ -'-c[Automatically discover extensions and add them to --extensions (unless they'\''re in --dont-collect)]' \ +'-E[Automatically discover extensions and add them to --extensions (unless they'\''re in --dont-collect)]' \ '--collect-extensions[Automatically discover extensions and add them to --extensions (unless they'\''re in --dont-collect)]' \ +'-B[Automatically request likely backup extensions for "found" urls]' \ +'--collect-backups[Automatically request likely backup extensions for "found" urls]' \ '(--silent)*-v[Increase verbosity level (use -vv or more for greater effect. \[CAUTION\] 4 -v'\''s is probably too much)]' \ '(--silent)*--verbosity[Increase verbosity level (use -vv or more for greater effect. \[CAUTION\] 4 -v'\''s is probably too much)]' \ '(-q --quiet)--silent[Only print URLs + turn off logging (good for piping a list of urls to other commands)]' \ diff --git a/shell_completions/_feroxbuster.ps1 b/shell_completions/_feroxbuster.ps1 index 2f10ad3..6c5576d 100644 --- a/shell_completions/_feroxbuster.ps1 +++ b/shell_completions/_feroxbuster.ps1 @@ -96,8 +96,10 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock { [CompletionResult]::new('--auto-bail', 'auto-bail', [CompletionResultType]::ParameterName, 'Automatically stop scanning when an excessive amount of errors are encountered') [CompletionResult]::new('-D', 'D', [CompletionResultType]::ParameterName, 'Don''t auto-filter wildcard responses') [CompletionResult]::new('--dont-filter', 'dont-filter', [CompletionResultType]::ParameterName, 'Don''t auto-filter wildcard responses') - [CompletionResult]::new('-c', 'c', [CompletionResultType]::ParameterName, 'Automatically discover extensions and add them to --extensions (unless they''re in --dont-collect)') + [CompletionResult]::new('-E', 'E', [CompletionResultType]::ParameterName, 'Automatically discover extensions and add them to --extensions (unless they''re in --dont-collect)') [CompletionResult]::new('--collect-extensions', 'collect-extensions', [CompletionResultType]::ParameterName, 'Automatically discover extensions and add them to --extensions (unless they''re in --dont-collect)') + [CompletionResult]::new('-B', 'B', [CompletionResultType]::ParameterName, 'Automatically request likely backup extensions for "found" urls') + [CompletionResult]::new('--collect-backups', 'collect-backups', [CompletionResultType]::ParameterName, 'Automatically request likely backup extensions for "found" urls') [CompletionResult]::new('-v', 'v', [CompletionResultType]::ParameterName, 'Increase verbosity level (use -vv or more for greater effect. [CAUTION] 4 -v''s is probably too much)') [CompletionResult]::new('--verbosity', 'verbosity', [CompletionResultType]::ParameterName, 'Increase verbosity level (use -vv or more for greater effect. [CAUTION] 4 -v''s is probably too much)') [CompletionResult]::new('--silent', 'silent', [CompletionResultType]::ParameterName, 'Only print URLs + turn off logging (good for piping a list of urls to other commands)') diff --git a/shell_completions/feroxbuster.bash b/shell_completions/feroxbuster.bash index 12ee5cf..38ccbac 100644 --- a/shell_completions/feroxbuster.bash +++ b/shell_completions/feroxbuster.bash @@ -19,7 +19,7 @@ _feroxbuster() { case "${cmd}" in feroxbuster) - opts="-h -V -u -p -P -R -a -A -x -m -H -b -Q -f -S -X -W -N -C -s -T -r -k -t -n -d -e -L -w -D -c -I -v -q -o --help --version --url --stdin --resume-from --proxy --replay-proxy --replay-codes --user-agent --random-agent --extensions --methods --data --headers --cookies --query --add-slash --dont-scan --filter-size --filter-regex --filter-words --filter-lines --filter-status --filter-similar-to --status-codes --timeout --redirects --insecure --threads --no-recursion --depth --extract-links --scan-limit --parallel --rate-limit --time-limit --wordlist --auto-tune --auto-bail --dont-filter --collect-extensions --dont-collect --verbosity --silent --quiet --json --output --debug-log" + opts="-h -V -u -p -P -R -a -A -x -m -H -b -Q -f -S -X -W -N -C -s -T -r -k -t -n -d -e -L -w -D -E -B -I -v -q -o --help --version --url --stdin --resume-from --proxy --replay-proxy --replay-codes --user-agent --random-agent --extensions --methods --data --headers --cookies --query --add-slash --dont-scan --filter-size --filter-regex --filter-words --filter-lines --filter-status --filter-similar-to --status-codes --timeout --redirects --insecure --threads --no-recursion --depth --extract-links --scan-limit --parallel --rate-limit --time-limit --wordlist --auto-tune --auto-bail --dont-filter --collect-extensions --collect-backups --dont-collect --verbosity --silent --quiet --json --output --debug-log" if [[ ${cur} == -* || ${COMP_CWORD} -eq 1 ]] ; then COMPREPLY=( $(compgen -W "${opts}" -- "${cur}") ) return 0 diff --git a/shell_completions/feroxbuster.elv b/shell_completions/feroxbuster.elv index e86a29e..c467423 100644 --- a/shell_completions/feroxbuster.elv +++ b/shell_completions/feroxbuster.elv @@ -93,8 +93,10 @@ set edit:completion:arg-completer[feroxbuster] = {|@words| cand --auto-bail 'Automatically stop scanning when an excessive amount of errors are encountered' cand -D 'Don''t auto-filter wildcard responses' cand --dont-filter 'Don''t auto-filter wildcard responses' - cand -c 'Automatically discover extensions and add them to --extensions (unless they''re in --dont-collect)' + cand -E 'Automatically discover extensions and add them to --extensions (unless they''re in --dont-collect)' cand --collect-extensions 'Automatically discover extensions and add them to --extensions (unless they''re in --dont-collect)' + cand -B 'Automatically request likely backup extensions for "found" urls' + cand --collect-backups 'Automatically request likely backup extensions for "found" urls' cand -v 'Increase verbosity level (use -vv or more for greater effect. [CAUTION] 4 -v''s is probably too much)' cand --verbosity 'Increase verbosity level (use -vv or more for greater effect. [CAUTION] 4 -v''s is probably too much)' cand --silent 'Only print URLs + turn off logging (good for piping a list of urls to other commands)' diff --git a/src/banner/container.rs b/src/banner/container.rs index bbd1af9..915b79a 100644 --- a/src/banner/container.rs +++ b/src/banner/container.rs @@ -157,6 +157,9 @@ pub struct Banner { /// represents Configuration.dont_collect dont_collect: BannerEntry, + + /// represents Configuration.collect_backups + collect_backups: BannerEntry, } /// implementation of Banner @@ -360,6 +363,8 @@ impl Banner { "Collect Extensions", &config.collect_extensions.to_string(), ); + let collect_backups = + BannerEntry::new("🏦", "Collect Backups", &config.collect_backups.to_string()); Self { targets, @@ -401,6 +406,7 @@ impl Banner { time_limit, url_denylist, collect_extensions, + collect_backups, dont_collect, config: cfg, version: VERSION.to_string(), @@ -584,6 +590,10 @@ by Ben "epi" Risher {} ver: {}"#, writeln!(&mut writer, "{}", self.dont_collect)?; } + if config.collect_backups { + writeln!(&mut writer, "{}", self.collect_backups)?; + } + if !config.methods.is_empty() { writeln!(&mut writer, "{}", self.methods)?; } diff --git a/src/config/container.rs b/src/config/container.rs index d01afa8..dc7f323 100644 --- a/src/config/container.rs +++ b/src/config/container.rs @@ -275,6 +275,10 @@ pub struct Configuration { /// don't collect any of these extensions when --collect-extensions is used #[serde(default = "ignored_extensions")] pub dont_collect: Vec, + + /// Automatically request likely backup extensions on "found" urls + #[serde(default)] + pub collect_backups: bool, } impl Default for Configuration { @@ -320,6 +324,7 @@ impl Default for Configuration { extract_links: false, random_agent: false, collect_extensions: false, + collect_backups: false, save_state: true, proxy: String::new(), config: String::new(), @@ -377,6 +382,7 @@ impl Configuration { /// - **insecure**: `false` (don't be insecure, i.e. don't allow invalid certs) /// - **extensions**: `None` /// - **collect_extensions**: `false` + /// - **collect_backups**: `false` /// - **dont_collect**: [`DEFAULT_IGNORED_EXTENSIONS`](constant.DEFAULT_RESPONSE_CODES.html) /// - **methods**: [`DEFAULT_METHOD`](constant.DEFAULT_METHOD.html) /// - **data**: `None` @@ -721,6 +727,10 @@ impl Configuration { config.collect_extensions = true; } + if args.is_present("collect_backups") { + config.collect_backups = true; + } + if args.occurrences_of("verbosity") > 0 { // occurrences_of returns 0 if none are found; this is protected in // an if block for the same reason as the quiet option @@ -894,6 +904,7 @@ impl Configuration { update_if_not_default!(&mut conf.auto_bail, new.auto_bail, false); update_if_not_default!(&mut conf.auto_tune, new.auto_tune, false); update_if_not_default!(&mut conf.collect_extensions, new.collect_extensions, false); + update_if_not_default!(&mut conf.collect_backups, new.collect_backups, false); // use updated quiet/silent values to determine output level; same for requester policy conf.output_level = determine_output_level(conf.quiet, conf.silent); conf.requester_policy = determine_requester_policy(conf.auto_tune, conf.auto_bail); diff --git a/src/config/tests.rs b/src/config/tests.rs index 5c8508c..8721a64 100644 --- a/src/config/tests.rs +++ b/src/config/tests.rs @@ -30,6 +30,7 @@ fn setup_config_test() -> Configuration { resume_from = "/some/state/file" redirects = true insecure = true + collect_backups = true collect_extensions = true extensions = ["html", "php", "js"] dont_collect = ["png", "gif", "jpg", "jpeg"] @@ -97,6 +98,7 @@ fn default_configuration() { assert!(!config.extract_links); assert!(!config.insecure); assert!(!config.collect_extensions); + assert!(!config.collect_backups); assert!(config.regex_denylist.is_empty()); assert_eq!(config.queries, Vec::new()); assert_eq!(config.filter_size, Vec::::new()); @@ -302,6 +304,13 @@ fn config_reads_collect_extensions() { assert!(config.collect_extensions); } +#[test] +/// parse the test config and see that the value parsed is correct +fn config_reads_collect_backups() { + let config = setup_config_test(); + assert!(config.collect_backups); +} + #[test] /// parse the test config and see that the value parsed is correct fn config_reads_extensions() { diff --git a/src/event_handlers/command.rs b/src/event_handlers/command.rs index 00d694f..5c91172 100644 --- a/src/event_handlers/command.rs +++ b/src/event_handlers/command.rs @@ -5,6 +5,7 @@ use tokio::sync::oneshot::Sender; use crate::response::FeroxResponse; use crate::{ + message::FeroxMessage, statistics::{StatError, StatField}, traits::FeroxFilter, }; @@ -69,6 +70,9 @@ pub enum Command { /// Notify event handler that a new extension has been seen AddDiscoveredExtension(String), + /// Write an arbitrary string to disk + WriteToDisk(Box), + /// Break out of the (infinite) mpsc receive loop Exit, } diff --git a/src/event_handlers/outputs.rs b/src/event_handlers/outputs.rs index e7f124d..314530a 100644 --- a/src/event_handlers/outputs.rs +++ b/src/event_handlers/outputs.rs @@ -104,6 +104,12 @@ impl FileOutHandler { Command::Report(response) => { skip_fail!(write_to(&*response, &mut file, self.config.json)); } + Command::WriteToDisk(message) => { + // todo consider making report accept dyn FeroxSerialize; would mean adding + // as_any/box_eq/PartialEq to the trait and then adding them to the + // implementing structs + skip_fail!(write_to(&*message, &mut file, self.config.json)); + } Command::Exit => { break; } @@ -267,8 +273,12 @@ impl TermOutHandler { .with_context(|| "Could not replay request through replay proxy")?; } - // todo update if statement to include --collect-backups - if should_process_response && matches!(call_type, ProcessResponseCall::Recursive) { + if self.config.collect_backups + && should_process_response + && matches!(call_type, ProcessResponseCall::Recursive) + { + // --collect-backups was used; the response is one we care about, and the function + // call came from the loop in `.start` (i.e. recursive was specified let backup_urls = self.generate_backup_urls(&resp).await; // need to manually adjust stats diff --git a/src/heuristics.rs b/src/heuristics.rs index 9b59569..044b9b2 100644 --- a/src/heuristics.rs +++ b/src/heuristics.rs @@ -5,6 +5,7 @@ use console::style; use scraper::{Html, Selector}; use uuid::Uuid; +use crate::message::FeroxMessage; use crate::{ config::OutputLevel, event_handlers::{Command, Handles}, @@ -348,25 +349,28 @@ impl HeuristicTests { let dirlist_type = self.detect_directory_listing(&html); if dirlist_type.is_some() { - // todo give some extra consideration to what colors should be used - // todo if this needs to go to a file, has to be sent to the handler + // folks that run things and step away/rely on logs need to be notified of directory + // listing, since they won't see the message on the bar; bastardizing FeroxMessage + // for ease of implementation. This could use a bit of polish at some point. let msg = format!( - "{} {:>8} {:>9} {:>9} {:>9} {} => {}\n", - style("DIR").bright().blue(), - DEFAULT_METHOD, - "-", - "-", - "-", - ferox_response.url().as_str(), - style("Directory listing").green().bright(), - ); - ferox_print(&msg, &PROGRESS_PRINTER); - - log::info!( - "directory listing detected: {} ({:?})", + "detected directory listing: {} ({:?})", target_url, dirlist_type.unwrap() ); + let ferox_msg = FeroxMessage { + kind: "log".to_string(), + message: msg.clone(), + level: "MSG".to_string(), + time_offset: 0.0, + module: "feroxbuster::heuristics".to_string(), + }; + self.handles + .output + .tx_file + .send(Command::WriteToDisk(Box::new(ferox_msg))) + .unwrap_or_default(); + + log::info!("{}", msg); let result = DirListingResult { dir_list_type: dirlist_type, diff --git a/src/message.rs b/src/message.rs index a5186c4..14e71ce 100644 --- a/src/message.rs +++ b/src/message.rs @@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize}; use crate::traits::FeroxSerialize; use crate::utils::fmt_err; -#[derive(Serialize, Deserialize, Default)] +#[derive(Serialize, Deserialize, Default, Debug)] /// Representation of a log entry, can be represented as a human readable string or JSON pub struct FeroxMessage { #[serde(rename = "type")] @@ -38,7 +38,7 @@ impl FeroxSerialize for FeroxMessage { "DEBUG" => ("DBG", Color::Yellow), "TRACE" => ("TRC", Color::Magenta), "WILDCARD" => ("WLD", Color::Cyan), - _ => ("UNK", Color::White), + _ => ("MSG", Color::White), }; format!( @@ -143,6 +143,6 @@ mod tests { assert!(console::strip_ansi_codes(&msg.as_str()).starts_with("WLD")); msg.level = "UNKNOWN".to_string(); - assert!(console::strip_ansi_codes(&msg.as_str()).starts_with("UNK")); + assert!(console::strip_ansi_codes(&msg.as_str()).starts_with("MSG")); } } diff --git a/src/parser.rs b/src/parser.rs index 44eb9e2..7c25020 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -472,11 +472,18 @@ pub fn initialize() -> App<'static> { .help("Don't auto-filter wildcard responses") ).arg( Arg::new("collect_extensions") - .short('c') + .short('E') .long("collect-extensions") .takes_value(false) - .help_heading("Scan settings") + .help_heading("Dynamic collection settings") .help("Automatically discover extensions and add them to --extensions (unless they're in --dont-collect)") + ).arg( + Arg::new("collect_backups") + .short('B') + .long("collect-backups") + .takes_value(false) + .help_heading("Dynamic collection settings") + .help("Automatically request likely backup extensions for \"found\" urls") ).arg( Arg::new("dont_collect") .short('I') @@ -486,7 +493,7 @@ pub fn initialize() -> App<'static> { .multiple_values(true) .multiple_occurrences(true) .use_delimiter(true) - .help_heading("Scan settings") + .help_heading("Dynamic collection settings") .help( "File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)", ), diff --git a/src/scan_manager/tests.rs b/src/scan_manager/tests.rs index d899a18..c9671eb 100644 --- a/src/scan_manager/tests.rs +++ b/src/scan_manager/tests.rs @@ -455,6 +455,7 @@ fn feroxstates_feroxserialize_implementation() { r#""headers""#, r#""server":"nginx/1.16.1"#, r#""collect_extensions":true"#, + r#""collect_backups":false"#, r#""collected_extensions":["php"]"#, r#""dont_collect":["tif","tiff","ico","cur","bmp","webp","svg","png","jpg","jpeg","jfif","gif","avif","apng","pjpeg","pjp","mov","wav","mpg","mpeg","mp3","mp4","m4a","m4p","m4v","ogg","webm","ogv","oga","flac","aac","3gp","css","zip","xls","xml","gz","tgz"]"#, ] diff --git a/tests/test_banner.rs b/tests/test_banner.rs index 3e90757..5d9b921 100644 --- a/tests/test_banner.rs +++ b/tests/test_banner.rs @@ -1173,7 +1173,7 @@ fn banner_prints_collect_extensions_and_dont_collect_default() { .unwrap() .arg("--url") .arg("http://localhost") - .arg("-c") + .arg("--collect-extensions") .arg("--wordlist") .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() @@ -1202,7 +1202,7 @@ fn banner_prints_collect_extensions_and_dont_collect_with_input() { .unwrap() .arg("--url") .arg("http://localhost") - .arg("-c") + .arg("--collect-extensions") .arg("--dont-collect") .arg("pdf") .arg("xps") @@ -1225,3 +1225,30 @@ fn banner_prints_collect_extensions_and_dont_collect_with_input() { .and(predicate::str::contains("─┴─")), ); } + +#[test] +/// test allows non-existent wordlist to trigger the banner printing to stderr +/// expect to see all mandatory prints + data body +fn banner_prints_collect_backups() { + Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg("http://localhost") + .arg("--collect-backups") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") + .assert() + .success() + .stderr( + predicate::str::contains("─┬─") + .and(predicate::str::contains("Target Url")) + .and(predicate::str::contains("http://localhost")) + .and(predicate::str::contains("Threads")) + .and(predicate::str::contains("Wordlist")) + .and(predicate::str::contains("Status Codes")) + .and(predicate::str::contains("Timeout (secs)")) + .and(predicate::str::contains("User-Agent")) + .and(predicate::str::contains("Collect Backups")) + .and(predicate::str::contains("─┴─")), + ); +} diff --git a/tests/test_scanner.rs b/tests/test_scanner.rs index c06919f..93e1a71 100644 --- a/tests/test_scanner.rs +++ b/tests/test_scanner.rs @@ -727,11 +727,11 @@ fn collect_backups_makes_appropriate_requests() { }) .collect(); - // todo add --collect-backups flag when available let cmd = Command::cargo_bin("feroxbuster") .unwrap() .arg("--url") .arg(srv.url("/")) + .arg("--collect-backups") .arg("--wordlist") .arg(file.as_os_str()) .unwrap(); From 5edd58a3f4de23a4c96f4354682da9d44032d367 Mon Sep 17 00:00:00 2001 From: epi Date: Thu, 17 Feb 2022 19:46:44 -0600 Subject: [PATCH 26/40] clippy --- src/event_handlers/outputs.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/event_handlers/outputs.rs b/src/event_handlers/outputs.rs index 314530a..ace9c0d 100644 --- a/src/event_handlers/outputs.rs +++ b/src/event_handlers/outputs.rs @@ -336,7 +336,7 @@ impl TermOutHandler { /// internal helper to stay DRY fn add_new_url_to_vec(&self, url: &Url, new_name: &str, urls: &mut Vec) { let mut new_url = url.clone(); - new_url.set_path(&new_name); + new_url.set_path(new_name); urls.push(new_url); } @@ -365,11 +365,11 @@ impl TermOutHandler { if !filename.is_empty() { // append rules for suffix in ["~", ".bak", ".bak2", ".old", ".1"] { - self.add_new_url_to_vec(&url, &format!("{}{}", filename, suffix), &mut urls); + self.add_new_url_to_vec(url, &format!("{}{}", filename, suffix), &mut urls); } // vim swap rule - self.add_new_url_to_vec(&url, &format!(".{}.swp", filename), &mut urls); + self.add_new_url_to_vec(url, &format!(".{}.swp", filename), &mut urls); // replace original extension rule let parts: Vec<_> = filename From 2d5aeb444e742766cf5da37e272b8195326c3868 Mon Sep 17 00:00:00 2001 From: epi Date: Sat, 19 Feb 2022 15:11:22 -0600 Subject: [PATCH 27/40] added temp workaround for +proxy/-data problem --- src/scanner/requester.rs | 15 ++++++++------- src/utils.rs | 24 +++++++++++++++++++++++- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/src/scanner/requester.rs b/src/scanner/requester.rs index 52ea9a8..a57302d 100644 --- a/src/scanner/requester.rs +++ b/src/scanner/requester.rs @@ -332,13 +332,14 @@ impl Requester { continue; } - let response = logged_request( - &url, - method.as_str(), - Some(self.handles.config.data.as_slice()), - self.handles.clone(), - ) - .await?; + let data = if self.handles.config.data.is_empty() { + None + } else { + Some(self.handles.config.data.as_slice()) + }; + + let response = + logged_request(&url, method.as_str(), data, self.handles.clone()).await?; if (should_tune || self.handles.config.auto_bail) && !atomic_load!(self.policy_data.cooling_down, Ordering::SeqCst) diff --git a/src/utils.rs b/src/utils.rs index 18da1a4..5e05997 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -131,7 +131,7 @@ pub async fn make_request( client: &Client, url: &Url, method: &str, - data: Option<&[u8]>, + mut data: Option<&[u8]>, output_level: OutputLevel, config: &Configuration, tx_stats: UnboundedSender, @@ -142,8 +142,30 @@ pub async fn make_request( output_level, tx_stats ); + let tmp_workaround: Option<&[u8]> = Some(&[0xd_u8, 0xa]); // \r\n let mut request = client.request(Method::from_bytes(method.as_bytes())?, url.to_owned()); + + if (!config.proxy.is_empty() || config.replay_proxy.is_empty()) + && data.is_none() + && ["post", "put", "patch"].contains(&method.to_ascii_lowercase().as_str()) + { + // either --proxy or --replay-proxy was specified + // AND + // --data wasn't used + // AND + // the method is either post/put/patch (case insensitive) + // + // this combination of factors results in requests that are delayed for 10 seconds before + // being issued. The tracking issues are + // https://github.com/epi052/feroxbuster/issues/501 + // https://github.com/seanmonstar/reqwest/issues/1474 + // + // as a (hopefully temporary) workaround, we'll add \r\n to the body so that there's no + // delay + data = tmp_workaround; + } + if let Some(body_data) = data { request = request.body(body_data.to_vec()); } From c1132622cfc5a630d22ff762c853dfdd04aa2985 Mon Sep 17 00:00:00 2001 From: epi Date: Sat, 19 Feb 2022 15:19:09 -0600 Subject: [PATCH 28/40] replay proxy respects --data now --- src/event_handlers/outputs.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/event_handlers/outputs.rs b/src/event_handlers/outputs.rs index ace9c0d..0ae253d 100644 --- a/src/event_handlers/outputs.rs +++ b/src/event_handlers/outputs.rs @@ -260,11 +260,17 @@ impl TermOutHandler { if self.config.replay_client.is_some() && should_process_response { // replay proxy specified/client created and this response's status code is one that // should be replayed; not using logged_request due to replay proxy client + let data = if self.handles.config.data.is_empty() { + None + } else { + Some(self.handles.config.data.as_slice()) + }; + make_request( self.config.replay_client.as_ref().unwrap(), resp.url(), resp.method().as_str(), - None, + data, self.config.output_level, &self.config, tx_stats.clone(), @@ -346,7 +352,7 @@ impl TermOutHandler { /// example: /// original: LICENSE.txt /// backups: - /// - LICENSE.txt~ + /// - LICENSE.txt~ /// - LICENSE.txt.bak /// - LICENSE.txt.bak2 /// - LICENSE.txt.old From ca4d8f0c52f1a0b83494dc0f4c23e9b9b09974f0 Mon Sep 17 00:00:00 2001 From: epi Date: Sat, 19 Feb 2022 17:28:19 -0600 Subject: [PATCH 29/40] replay proxy respects --data now --- src/event_handlers/outputs.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/event_handlers/outputs.rs b/src/event_handlers/outputs.rs index 0ae253d..304cbaf 100644 --- a/src/event_handlers/outputs.rs +++ b/src/event_handlers/outputs.rs @@ -260,10 +260,10 @@ impl TermOutHandler { if self.config.replay_client.is_some() && should_process_response { // replay proxy specified/client created and this response's status code is one that // should be replayed; not using logged_request due to replay proxy client - let data = if self.handles.config.data.is_empty() { + let data = if self.config.data.is_empty() { None } else { - Some(self.handles.config.data.as_slice()) + Some(self.config.data.as_slice()) }; make_request( From eed59e1da56e48a33127af08653369399897bdc0 Mon Sep 17 00:00:00 2001 From: epi Date: Sun, 27 Feb 2022 13:42:07 -0600 Subject: [PATCH 30/40] added nlp module --- src/lib.rs | 1 + src/nlp/constants.rs | 334 +++++++++++++++++++++++++++++++++++++++++++ src/nlp/document.rs | 200 ++++++++++++++++++++++++++ src/nlp/mod.rs | 11 ++ src/nlp/model.rs | 140 ++++++++++++++++++ src/nlp/term.rs | 105 ++++++++++++++ src/nlp/utils.rs | 158 ++++++++++++++++++++ 7 files changed, 949 insertions(+) create mode 100644 src/nlp/constants.rs create mode 100644 src/nlp/document.rs create mode 100644 src/nlp/mod.rs create mode 100644 src/nlp/model.rs create mode 100644 src/nlp/term.rs create mode 100644 src/nlp/utils.rs diff --git a/src/lib.rs b/src/lib.rs index 7a0061c..de9bbe8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,6 +30,7 @@ mod macros; mod url; mod response; mod message; +mod nlp; /// Alias for tokio::sync::mpsc::UnboundedSender pub(crate) type CommandSender = UnboundedSender; diff --git a/src/nlp/constants.rs b/src/nlp/constants.rs new file mode 100644 index 0000000..a276214 --- /dev/null +++ b/src/nlp/constants.rs @@ -0,0 +1,334 @@ +use lazy_static::lazy_static; +use regex::Regex; + +lazy_static! { + /// regular expression to match on words with numbers, underscores, and hyphens + pub(super) static ref BOUNDED_WORD_REGEX: Regex = Regex::new(r"\b[a-zA-Z0-9_-]+\b").unwrap(); +} + +/// collection of stop words from spaCy with small modifications +pub(super) static STOP_WORDS: [&str; 323] = [ + "'d", + "'ll", + "'m", + "'re", + "'s", + "'ve", + "a", + "about", + "above", + "across", + "after", + "afterwards", + "again", + "against", + "almost", + "alone", + "along", + "already", + "also", + "although", + "always", + "am", + "among", + "amongst", + "amount", + "an", + "and", + "another", + "any", + "anyhow", + "anyone", + "anything", + "anyway", + "anywhere", + "are", + "around", + "as", + "at", + "back", + "be", + "became", + "because", + "become", + "becomes", + "becoming", + "been", + "before", + "beforehand", + "behind", + "being", + "below", + "beside", + "besides", + "between", + "beyond", + "both", + "bottom", + "but", + "by", + "ca", + "call", + "can", + "cannot", + "could", + "did", + "do", + "does", + "doing", + "done", + "down", + "due", + "during", + "each", + "eight", + "either", + "eleven", + "else", + "elsewhere", + "empty", + "enough", + "even", + "ever", + "every", + "everyone", + "everything", + "everywhere", + "except", + "few", + "fifteen", + "fifty", + "first", + "five", + "for", + "former", + "formerly", + "forty", + "four", + "from", + "front", + "full", + "further", + "get", + "got", + "give", + "go", + "had", + "has", + "have", + "he", + "hence", + "her", + "here", + "hereafter", + "hereby", + "herein", + "hereupon", + "hers", + "herself", + "him", + "himself", + "his", + "how", + "however", + "hundred", + "i", + "if", + "in", + "indeed", + "into", + "is", + "it", + "its", + "itself", + "just", + "keep", + "last", + "latter", + "latterly", + "least", + "less", + "made", + "make", + "many", + "may", + "me", + "meanwhile", + "might", + "mine", + "more", + "moreover", + "most", + "mostly", + "move", + "much", + "must", + "my", + "myself", + "n't", + "name", + "namely", + "neither", + "never", + "nevertheless", + "next", + "nine", + "no", + "nobody", + "none", + "noone", + "nor", + "not", + "nothing", + "now", + "nowhere", + "n\u{2018}t", + "n\u{2019}t", + "of", + "off", + "often", + "on", + "once", + "one", + "only", + "onto", + "or", + "other", + "others", + "otherwise", + "our", + "ours", + "ourselves", + "out", + "over", + "own", + "part", + "per", + "perhaps", + "please", + "put", + "quite", + "rather", + "re", + "really", + "regarding", + "same", + "say", + "see", + "seem", + "seemed", + "seeming", + "seems", + "serious", + "several", + "she", + "should", + "side", + "since", + "six", + "sixty", + "so", + "some", + "somehow", + "someone", + "something", + "sometime", + "sometimes", + "somewhere", + "still", + "such", + "take", + "ten", + "than", + "that", + "the", + "their", + "them", + "themselves", + "then", + "thence", + "there", + "thereafter", + "thereby", + "therefore", + "therein", + "thereupon", + "these", + "they", + "third", + "this", + "those", + "though", + "three", + "through", + "throughout", + "thru", + "thus", + "to", + "together", + "too", + "toward", + "towards", + "twelve", + "twenty", + "two", + "under", + "unless", + "until", + "up", + "upon", + "used", + "using", + "various", + "very", + "via", + "was", + "we", + "well", + "were", + "what", + "whatever", + "when", + "whence", + "whenever", + "where", + "whereafter", + "whereas", + "whereby", + "wherein", + "whereupon", + "wherever", + "whether", + "which", + "while", + "whither", + "who", + "whoever", + "whole", + "whom", + "whose", + "why", + "will", + "with", + "within", + "without", + "would", + "yet", + "you", + "your", + "yours", + "yourself", + "yourselves", + "\u{2018}d", + "\u{2018}ll", + "\u{2018}m", + "\u{2018}re", + "\u{2018}s", + "\u{2018}ve", + "\u{2019}d", + "\u{2019}ll", + "\u{2019}m", + "\u{2019}re", + "\u{2019}s", + "\u{2019}ve", +]; diff --git a/src/nlp/document.rs b/src/nlp/document.rs new file mode 100644 index 0000000..1a6c871 --- /dev/null +++ b/src/nlp/document.rs @@ -0,0 +1,200 @@ +use super::term::{Term, TermMetaData}; +use super::utils::preprocess; +use scraper::{Html, Node, Selector}; +use std::collections::HashMap; + +/// data container representing a single document, in the nlp sense +#[derive(Debug, Default)] +pub struct Document { + /// collection of `Term`s and their associated metadata + pub terms: HashMap, + + /// number of terms contained within the document + number_of_terms: usize, +} + +impl Document { + /// create a new `Document` from the given string + pub fn new(text: &str) -> Self { + let mut document = Self::default(); + + let processed = preprocess(text); + + document.number_of_terms += processed.len(); + + for normalized in processed { + if normalized.len() > 2 { + document.add_term(&normalized) + } + } + document + } + + /// add a `Term` to the document if it's not already tracked, otherwise increment the number + /// of times the term has been seen + pub fn add_term(&mut self, word: &str) { + let term = Term::new(word); + + let metadata = self.terms.entry(term).or_insert_with(TermMetaData::new); + *metadata.count_mut() += 1; + } + + /// create a new `Document` from the given HTML string + pub fn from_html(raw_html: &str) -> Self { + let selector = Selector::parse("body").unwrap(); + + let html = Html::parse_document(raw_html); + + let text = html + .select(&selector) + .next() + .unwrap() + .descendants() + .filter_map(|node| { + if !node.value().is_text() && !node.value().is_comment() { + return None; + } + + // have a Text||Comment node, trim whitespace to test for all whitespace stuff + let trimmed = if node.value().is_text() { + node.value().as_text().unwrap().text.trim() + } else { + node.value().as_comment().unwrap().comment.trim() + }; + + if trimmed.is_empty() { + return None; + } + + // found a non-empty Text||Comment node, need to check its parent to determine if + // it's a

got worse on Wednesday.

"; + let doc = Document::from_html(html); + let keys = doc.terms().keys().map(|key| key.raw()).collect::>(); + + let expected = ["worse", "wednesday"]; + + assert_eq!(doc.number_of_terms(), 2); + + for key in keys { + assert!(expected.contains(&key)); + } + } } diff --git a/src/nlp/model.rs b/src/nlp/model.rs index 151f8a9..588d2a3 100644 --- a/src/nlp/model.rs +++ b/src/nlp/model.rs @@ -152,6 +152,8 @@ mod tests { model.add_document(d); } + assert_eq!(model.num_documents(), 4); + model.calculate_tf_idf_scores(); let non_zero_words = model.all_words(); diff --git a/src/scanner/ferox_scanner.rs b/src/scanner/ferox_scanner.rs index 83c17c9..b610125 100644 --- a/src/scanner/ferox_scanner.rs +++ b/src/scanner/ferox_scanner.rs @@ -1,3 +1,4 @@ +use std::sync::atomic::AtomicBool; use std::{ops::Deref, sync::atomic::Ordering, sync::Arc, time::Instant}; use anyhow::{bail, Result}; @@ -31,6 +32,43 @@ lazy_static! { pub static ref RESPONSES: FeroxResponses = FeroxResponses::default(); // todo consider removing this } + +/// check to see if `pause_flag` is set to true. when true; enter a busy loop that only exits +/// by setting PAUSE_SCAN back to false +async fn check_for_user_input( + pause_flag: &AtomicBool, + scanned_urls: Arc, + handles: Arc, +) { + log::trace!( + "enter: check_for_user_input({:?}, SCANNED_URLS, HANDLES)", + pause_flag + ); + + // todo write a test or two for this function at some point... + if pause_flag.load(Ordering::Acquire) { + match scanned_urls.pause(true).await { + Some(MenuCmdResult::Url(url)) => { + // user wants to add a new url to be scanned, need to send + // it over to the event handler for processing + handles + .send_scan_command(Command::ScanNewUrl(url)) + .unwrap_or_else(|e| log::warn!("Could not add scan to scan queue: {}", e)) + } + Some(MenuCmdResult::NumCancelled(num_canx)) => { + if num_canx > 0 { + handles + .stats + .send(SubtractFromUsizeField(TotalExpected, num_canx)) + .unwrap_or_else(|e| log::warn!("Could not update overall scan bar: {}", e)); + } + } + _ => {} + } + } + log::trace!("exit: check_for_user_input"); +} + /// handles the main muscle movement of scanning a url pub struct FeroxScanner { /// handles to handlers and config @@ -69,6 +107,7 @@ impl FeroxScanner { } } + /// produces and awaits tasks (mp of mpsc); responsible for making requests async fn stream_requests( &self, looping_words: Arc>, @@ -76,6 +115,8 @@ impl FeroxScanner { scanned_urls: Arc, requester: Arc, ) { + log::trace!("enter: stream_requests(params too verbose to print)"); + let producers = stream::iter(looping_words.deref().to_owned()) .map(|word| { let pb = progress_bar.clone(); // progress bar is an Arc around internal state @@ -84,36 +125,11 @@ impl FeroxScanner { let handles_clone = self.handles.clone(); ( tokio::spawn(async move { - if PAUSE_SCAN.load(Ordering::Acquire) { - // for every word in the wordlist, check to see if PAUSE_SCAN is set to true - // when true; enter a busy loop that only exits by setting PAUSE_SCAN back - // to false - match scanned_urls_clone.pause(true).await { - Some(MenuCmdResult::Url(url)) => { - // user wants to add a new url to be scanned, need to send - // it over to the event handler for processing - handles_clone - .send_scan_command(Command::ScanNewUrl(url)) - .unwrap_or_else(|e| { - log::warn!("Could not add scan to scan queue: {}", e) - }) - } - Some(MenuCmdResult::NumCancelled(num_canx)) => { - if num_canx > 0 { - handles_clone - .stats - .send(SubtractFromUsizeField(TotalExpected, num_canx)) - .unwrap_or_else(|e| { - log::warn!( - "Could not update overall scan bar: {}", - e - ) - }); - } - } - _ => {} - } - } + // for every word in the wordlist, check to see if user has pressed enter + // in order to go into the interactive menu + check_for_user_input(&PAUSE_SCAN, scanned_urls_clone, handles_clone).await; + + // after checking for user input, send the request requester_clone .request(&word) .await @@ -139,6 +155,7 @@ impl FeroxScanner { log::trace!("awaiting scan producers"); producers.await; log::trace!("done awaiting scan producers"); + log::trace!("exit: stream_requests"); } /// Scan a given url using a given wordlist diff --git a/tests/test_scanner.rs b/tests/test_scanner.rs index 93e1a71..547bb0a 100644 --- a/tests/test_scanner.rs +++ b/tests/test_scanner.rs @@ -757,3 +757,89 @@ fn collect_backups_makes_appropriate_requests() { teardown_tmp_directory(tmp_dir); } + +#[test] +/// send a request to 4 200 files, expect non-zero tf-idf rated words to be requested as well +fn collect_words_makes_appropriate_requests() { + let srv = MockServer::start(); + + let wordlist: Vec<_> = ["doc1", "doc2", "doc3", "doc4"] + .iter() + .map(|w| w.to_string()) + .collect(); + + let (tmp_dir, file) = setup_tmp_directory(&wordlist, "wordlist").unwrap(); + + srv.mock(|when, then| { + when.method(GET).path("/doc1"); + then.status(200) + .body("Air quality in the sunny island improved gradually throughout Wednesday."); + }); + srv.mock(|when, then| { + when.method(GET).path("/doc2"); + then.status(200).body( + "Air quality in Singapore on Wednesday continued to get worse as haze hit the island.", + ); + }); + srv.mock(|when, then| { + when.method(GET).path("/doc3"); + then.status(200).body("The air quality in Singapore is monitored through a network of air monitoring stations located in different parts of the island"); + }); + srv.mock(|when, then| { + when.method(GET).path("/doc4"); + then.status(200) + .body("The air quality in Singapore got worse on Wednesday."); + }); + + let valid_paths = vec![ + "/gradually", + "/network", + "/hit", + "/located", + "/continued", + "/island", + "/worse", + "/monitored", + "/monitoring", + "/haze", + "/different", + "/stations", + "/sunny", + "/singapore", + "/improved", + "/parts", + "/wednesday", + ]; + + let valid_mocks: Vec<_> = valid_paths + .iter() + .map(|&p| { + srv.mock(|when, then| { + when.method(GET).path(p); + then.status(200).body("this is a valid test"); + }) + }) + .collect(); + + let cmd = Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg(srv.url("/")) + .arg("--collect-words") + .arg("--wordlist") + .arg(file.as_os_str()) + .unwrap(); + + cmd.assert().success().stdout( + predicate::str::contains("/doc1") + .and(predicate::str::contains("/doc2")) + .and(predicate::str::contains("/doc3")) + .and(predicate::str::contains("/doc4")), + ); + + for valid_mock in valid_mocks { + assert_eq!(valid_mock.hits(), 1); + } + + teardown_tmp_directory(tmp_dir); +} From 53d2076176b0e1d69cd336dd5c30d29ab6bee0ec Mon Sep 17 00:00:00 2001 From: epi Date: Fri, 4 Mar 2022 06:45:22 -0600 Subject: [PATCH 33/40] removed deprecated clap struct/methods --- src/config/container.rs | 2 +- src/parser.rs | 38 +++++++++++++++++++------------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/config/container.rs b/src/config/container.rs index 1d4206e..2af4f9f 100644 --- a/src/config/container.rs +++ b/src/config/container.rs @@ -26,7 +26,7 @@ macro_rules! update_config_if_present { match $matches.value_of_t($arg_name) { Ok(value) => *$conf_val = value, // Update value Err(err) => { - if !matches!(err.kind, clap::ErrorKind::ArgumentNotFound) { + if !matches!(err.kind(), clap::ErrorKind::ArgumentNotFound) { // Do nothing if argument not found err.exit() // Exit with error on any other parse error } diff --git a/src/parser.rs b/src/parser.rs index ef65414..80337d8 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,5 +1,5 @@ use clap::{ - crate_authors, crate_description, crate_name, crate_version, App, Arg, ArgGroup, ValueHint, + crate_authors, crate_description, crate_name, crate_version, Arg, ArgGroup, Command, ValueHint, }; use lazy_static::lazy_static; use regex::Regex; @@ -25,8 +25,8 @@ lazy_static! { } /// Create and return an instance of [clap::App](https://docs.rs/clap/latest/clap/struct.App.html), i.e. the Command Line Interface's configuration -pub fn initialize() -> App<'static> { - let app = App::new(crate_name!()) +pub fn initialize() -> Command<'static> { + let app = Command::new(crate_name!()) .version(crate_version!()) .author(crate_authors!()) .about(crate_description!()); @@ -42,7 +42,7 @@ pub fn initialize() -> App<'static> { .required_unless_present_any(&["stdin", "resume_from"]) .help_heading("Target selection") .value_name("URL") - .use_delimiter(true) + .use_value_delimiter(true) .value_hint(ValueHint::Url) .help("The target URL (required, unless [--stdin || --resume-from] used)"), ) @@ -101,7 +101,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .requires("replay_proxy") .help_heading("Proxy settings") .help( @@ -138,7 +138,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Request settings") .help( "File extension(s) to search for (ex: -x php -x pdf js)", @@ -152,7 +152,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Request settings") .help( "Which HTTP request method(s) should be sent (default: GET)", @@ -177,7 +177,7 @@ pub fn initialize() -> App<'static> { .help_heading("Request settings") .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help( "Specify HTTP headers to be used in each request (ex: -H Header:val -H 'stuff: things')", ), @@ -190,7 +190,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Request settings") .help( "Specify HTTP cookies to be used in each request (ex: -b stuff=things)", @@ -204,7 +204,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Request settings") .help( "Request's URL query parameters (ex: -Q token=stuff -Q secret=key)", @@ -229,7 +229,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Request filters") .help("URL(s) or Regex Pattern(s) to exclude from recursion/scans"), ); @@ -246,7 +246,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Response filters") .help( "Filter out messages of a particular size (ex: -S 5120 -S 4927,1970)", @@ -260,7 +260,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Response filters") .help( "Filter out messages via regular expression matching on the response's body (ex: -X '^ignore me$')", @@ -274,7 +274,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Response filters") .help( "Filter out messages of a particular word count (ex: -W 312 -W 91,82)", @@ -288,7 +288,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Response filters") .help( "Filter out messages of a particular line count (ex: -N 20 -N 31,30)", @@ -302,7 +302,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Response filters") .help( "Filter out status codes (deny list) (ex: -C 200 -C 401)", @@ -316,7 +316,7 @@ pub fn initialize() -> App<'static> { .multiple_values(true) .multiple_occurrences(true) .value_hint(ValueHint::Url) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Response filters") .help( "Filter out pages that are similar to the given page (ex. --filter-similar-to http://site.xyz/soft404)", @@ -330,7 +330,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Response filters") .help( "Status Codes to include (allow list) (default: 200 204 301 302 307 308 401 403 405)", @@ -499,7 +499,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Dynamic collection settings") .help( "File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)", From 1628ee86a3b6986d2650180b4391c827d4640ba1 Mon Sep 17 00:00:00 2001 From: epi Date: Fri, 4 Mar 2022 06:51:59 -0600 Subject: [PATCH 34/40] added info log for collect words --- src/scanner/ferox_scanner.rs | 9 ++++++++- src/scanner/requester.rs | 1 - 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/scanner/ferox_scanner.rs b/src/scanner/ferox_scanner.rs index b610125..32790e6 100644 --- a/src/scanner/ferox_scanner.rs +++ b/src/scanner/ferox_scanner.rs @@ -279,9 +279,10 @@ impl FeroxScanner { if self.handles.config.collect_words { let new_words = TF_IDF.read().unwrap().all_words(); + let new_words_len = new_words.len(); let cur_length = progress_bar.length(); - let new_length = cur_length + new_words.len() as u64; + let new_length = cur_length + new_words_len as u64; progress_bar.set_length(new_length); @@ -290,6 +291,12 @@ impl FeroxScanner { .send(AddToUsizeField(TotalExpected, new_words.len())) .unwrap_or_default(); + log::info!( + "requesting {} collected words: {:?}...", + new_words_len, + &new_words[..new_words_len.min(3) as usize] + ); + self.stream_requests( Arc::new(new_words), progress_bar.clone(), diff --git a/src/scanner/requester.rs b/src/scanner/requester.rs index 2de81b6..2d01d97 100644 --- a/src/scanner/requester.rs +++ b/src/scanner/requester.rs @@ -405,7 +405,6 @@ impl Requester { } if self.handles.config.collect_words { - // todo think about before/after filtering, similar to recursion if let Ok(mut guard) = TF_IDF.write() { let doc = Document::from_html(ferox_response.text()); guard.add_document(doc); From e06e194f774f791c79ac02ec02cfd42732e68cee Mon Sep 17 00:00:00 2001 From: epi Date: Fri, 4 Mar 2022 06:52:13 -0600 Subject: [PATCH 35/40] fixed flaky test --- tests/test_scanner.rs | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/tests/test_scanner.rs b/tests/test_scanner.rs index 547bb0a..fb3c6f3 100644 --- a/tests/test_scanner.rs +++ b/tests/test_scanner.rs @@ -3,6 +3,8 @@ use assert_cmd::prelude::*; use httpmock::Method::GET; use httpmock::MockServer; use predicates::prelude::*; +use std::thread::sleep; +use std::time::Duration; use std::{process::Command, time}; use utils::{setup_tmp_directory, teardown_tmp_directory}; @@ -763,10 +765,12 @@ fn collect_backups_makes_appropriate_requests() { fn collect_words_makes_appropriate_requests() { let srv = MockServer::start(); - let wordlist: Vec<_> = ["doc1", "doc2", "doc3", "doc4"] - .iter() - .map(|w| w.to_string()) - .collect(); + let wordlist: Vec<_> = [ + "doc1", "doc2", "doc3", "doc4", "blah", "blah2", "blah3", "blah4", + ] + .iter() + .map(|w| w.to_string()) + .collect(); let (tmp_dir, file) = setup_tmp_directory(&wordlist, "wordlist").unwrap(); @@ -816,7 +820,7 @@ fn collect_words_makes_appropriate_requests() { .map(|&p| { srv.mock(|when, then| { when.method(GET).path(p); - then.status(200).body("this is a valid test"); + then.status(200); }) }) .collect(); @@ -825,19 +829,25 @@ fn collect_words_makes_appropriate_requests() { .unwrap() .arg("--url") .arg(srv.url("/")) + .arg("-vv") .arg("--collect-words") + .arg("-t") + .arg("1") .arg("--wordlist") .arg(file.as_os_str()) .unwrap(); + print!("{}", std::str::from_utf8(&cmd.stdout).unwrap().to_string()); + cmd.assert().success().stdout( predicate::str::contains("/doc1") .and(predicate::str::contains("/doc2")) .and(predicate::str::contains("/doc3")) .and(predicate::str::contains("/doc4")), ); - + sleep(Duration::new(2, 0)); for valid_mock in valid_mocks { + println!("mock: {}", valid_paths[valid_mock.id - 4]); assert_eq!(valid_mock.hits(), 1); } From 8214a2a35722cbc9db83293b32a3ef37e84179b9 Mon Sep 17 00:00:00 2001 From: epi Date: Fri, 4 Mar 2022 06:52:23 -0600 Subject: [PATCH 36/40] bumped depenedencies --- Cargo.lock | 170 +++++++++++++++++++++++++++++++++++++++-------------- Cargo.toml | 64 ++++++++++---------- 2 files changed, 157 insertions(+), 77 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 03c2154..cf6ed58 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,9 +13,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.53" +version = "1.0.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94a45b455c14666b85fc40a019e8ab9eb75e3a124e05494f5397122bc9eb06e0" +checksum = "159bb86af3a200e19a068f4224eae4c8bb2d0fa054c7e5d1cacd5cef95e684cd" [[package]] name = "ascii-canvas" @@ -77,9 +77,9 @@ dependencies = [ [[package]] name = "async-global-executor" -version = "2.0.2" +version = "2.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9586ec52317f36de58453159d48351bc244bc24ced3effc1fce22f3d48664af6" +checksum = "c026b7e44f1316b567ee750fea85103f87fcb80792b860e979f221259796ca0a" dependencies = [ "async-channel", "async-executor", @@ -112,9 +112,9 @@ dependencies = [ [[package]] name = "async-lock" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6a8ea61bf9947a1007c5cada31e647dbc77b103c679858150003ba697ea798b" +checksum = "e97a171d191782fba31bb902b14ad94e24a68145032b7eedf871ab0bc0d077b6" dependencies = [ "event-listener", ] @@ -317,9 +317,9 @@ checksum = "a2698f953def977c68f935bb0dfa959375ad4638570e969e2f1e9f433cbf1af6" [[package]] name = "cc" -version = "1.0.72" +version = "1.0.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22a9137b95ea06864e018375b72adfb7db6e6f68cfc8df5a04d00288050485ee" +checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" [[package]] name = "cfg-if" @@ -329,9 +329,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "3.0.14" +version = "3.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b63edc3f163b3c71ec8aa23f9bd6070f77edbf3d1d198b164afa90ff00e4ec62" +checksum = "ced1892c55c910c1219e98d6fc8d71f6bddba7905866ce740066d8bfea859312" dependencies = [ "atty", "bitflags", @@ -346,9 +346,9 @@ dependencies = [ [[package]] name = "clap_complete" -version = "3.0.6" +version = "3.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "678db4c39c013cc68b54d372bce2efc58e30a0337c497c9032fd196802df3bc3" +checksum = "df6f3613c0a3cddfd78b41b10203eb322cb29b600cbdf808a7d3db95691b8e25" dependencies = [ "clap", ] @@ -411,15 +411,15 @@ dependencies = [ [[package]] name = "crossterm" -version = "0.20.0" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0ebde6a9dd5e331cd6c6f48253254d117642c31653baa475e394657c59c1f7d" +checksum = "77b75a27dc8d220f1f8521ea69cd55a34d720a200ebb3a624d9aa19193d3b432" dependencies = [ "bitflags", "crossterm_winapi", "libc", - "mio", - "parking_lot", + "mio 0.7.14", + "parking_lot 0.12.0", "signal-hook", "signal-hook-mio", "winapi", @@ -427,9 +427,9 @@ dependencies = [ [[package]] name = "crossterm_winapi" -version = "0.8.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a6966607622438301997d3dac0d2f6e9a90c68bb6bc1785ea98456ab93c0507" +checksum = "2ae1b35a484aa10e07fe0638d02301c5ad24de82d310ccbd2f3693da5f09bf1c" dependencies = [ "winapi", ] @@ -903,9 +903,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "418d37c8b1d42553c93648be529cb70f920d3baf8ef469b74b9638df426e0b4c" +checksum = "d39cd93900197114fa1fcb7ae84ca742095eed9442088988ae74fa744e930e77" dependencies = [ "cfg-if", "libc", @@ -1251,9 +1251,9 @@ checksum = "db13adb97ab515a3691f56e4dbab09283d0b86cb45abd991d8634a9d6f501760" [[package]] name = "libc" -version = "0.2.118" +version = "0.2.119" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06e509672465a0504304aa87f9f176f2b2b716ed8fb105ebe5c02dc6dce96a94" +checksum = "1bf2e165bb3457c8e098ea76f3e3bc9db55f87aa90d52d0e6be741470916aaa4" [[package]] name = "libnghttp2-sys" @@ -1356,6 +1356,19 @@ dependencies = [ "winapi", ] +[[package]] +name = "mio" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba272f85fa0b41fc91872be579b3bbe0f56b792aa361a380eb669469f68dafb2" +dependencies = [ + "libc", + "log", + "miow", + "ntapi", + "winapi", +] + [[package]] name = "miow" version = "0.3.7" @@ -1450,9 +1463,9 @@ checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a" [[package]] name = "once_cell" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5" +checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9" [[package]] name = "openssl" @@ -1520,7 +1533,17 @@ checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" dependencies = [ "instant", "lock_api", - "parking_lot_core", + "parking_lot_core 0.8.5", +] + +[[package]] +name = "parking_lot" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f5ec2493a61ac0506c0f4199f99070cbe83857b0337006a30f3e6719b8ef58" +dependencies = [ + "lock_api", + "parking_lot_core 0.9.1", ] [[package]] @@ -1537,6 +1560,19 @@ dependencies = [ "winapi", ] +[[package]] +name = "parking_lot_core" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28141e0cc4143da2443301914478dc976a61ffdb3f043058310c70df2fed8954" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-sys", +] + [[package]] name = "percent-encoding" version = "2.1.0" @@ -1792,9 +1828,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.2.10" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff" +checksum = "8380fe0152551244f0747b1bf41737e0f8a74f97a14ccefd1148187271634f3c" dependencies = [ "bitflags", ] @@ -1805,7 +1841,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64" dependencies = [ - "getrandom 0.2.4", + "getrandom 0.2.5", "redox_syscall", ] @@ -1880,9 +1916,9 @@ dependencies = [ [[package]] name = "rlimit" -version = "0.6.2" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc0bf25554376fd362f54332b8410a625c71f15445bca32ffdfdf4ec9ac91726" +checksum = "347703a5ae47adf1e693144157be231dde38c72bd485925cae7407ad3e52480b" dependencies = [ "libc", ] @@ -1985,9 +2021,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0486718e92ec9a68fbed73bb5ef687d71103b142595b406835649bebd33f72c7" +checksum = "a4a3381e03edd24287172047536f20cabde766e2cd3e65e6b00fb3af51c4f38d" [[package]] name = "serde" @@ -2069,7 +2105,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "29fd5867f1c4f2c5be079aee7a2adf1152ebb04a4bc4d341f504b7dece607ed4" dependencies = [ "libc", - "mio", + "mio 0.7.14", "signal-hook", ] @@ -2141,7 +2177,7 @@ checksum = "33994d0838dc2d152d17a62adf608a869b5e846b65b389af7f3dbc1de45c5b26" dependencies = [ "lazy_static", "new_debug_unreachable", - "parking_lot", + "parking_lot 0.11.2", "phf_shared 0.10.0", "precomputed-hash", "serde", @@ -2214,9 +2250,9 @@ dependencies = [ [[package]] name = "termcolor" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4" +checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" dependencies = [ "winapi-util", ] @@ -2239,9 +2275,9 @@ checksum = "507e9898683b6c43a9aa55b64259b721b52ba226e0f3779137e50ad114a4c90b" [[package]] name = "textwrap" -version = "0.14.2" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0066c8d12af8b5acd21e00547c3797fde4e8677254a7ee429176ccebbe93dd80" +checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" dependencies = [ "terminal_size", ] @@ -2298,19 +2334,20 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "tokio" -version = "1.16.1" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c27a64b625de6d309e8c57716ba93021dccf1b3b5c97edd6d3dd2d2135afc0a" +checksum = "2af73ac49756f3f7c01172e34a23e5d0216f6c32333757c2c61feb2bbff5a5ee" dependencies = [ "bytes", "libc", "memchr", - "mio", + "mio 0.8.0", "num_cpus", "once_cell", - "parking_lot", + "parking_lot 0.12.0", "pin-project-lite", "signal-hook-registry", + "socket2", "tokio-macros", "winapi", ] @@ -2404,9 +2441,9 @@ checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6" [[package]] name = "tracing" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d8d93354fe2a8e50d5953f5ae2e47a3fc2ef03292e7ea46e3cc38f549525fb9" +checksum = "f6c650a8ef0cd2dd93736f033d21cbd1224c5a967aa0c258d00fcf7dafef9b9f" dependencies = [ "cfg-if", "log", @@ -2503,7 +2540,7 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" dependencies = [ - "getrandom 0.2.4", + "getrandom 0.2.5", ] [[package]] @@ -2681,6 +2718,49 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-sys" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3df6e476185f92a12c072be4a189a0210dcdcf512a1891d6dff9edb874deadc6" +dependencies = [ + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_msvc" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8e92753b1c443191654ec532f14c199742964a061be25d77d7a96f09db20bf5" + +[[package]] +name = "windows_i686_gnu" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a711c68811799e017b6038e0922cb27a5e2f43a2ddb609fe0b6f3eeda9de615" + +[[package]] +name = "windows_i686_msvc" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "146c11bb1a02615db74680b32a68e2d61f553cc24c4eb5b4ca10311740e44172" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c912b12f7454c6620635bbff3450962753834be2a594819bd5e945af18ec64bc" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "504a2476202769977a040c6364301a3f65d0cc9e3fb08600b2bda150a0488316" + [[package]] name = "winreg" version = "0.7.0" diff --git a/Cargo.toml b/Cargo.toml index abf7551..16364b1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,45 +16,45 @@ build = "build.rs" maintenance = { status = "actively-developed" } [build-dependencies] -clap = {version = "3.0", features = ["wrap_help", "cargo"]} -clap_complete = "3.0" -regex = "1" -lazy_static = "1.4" -dirs = "4.0" +clap = { version = "3.1.5", features = ["wrap_help", "cargo"] } +clap_complete = "3.1.1" +regex = "1.5.4" +lazy_static = "1.4.0" +dirs = "4.0.0" [dependencies] -scraper = "0.12" -futures = { version = "0.3"} -tokio = { version = "1.16", features = ["full"] } -tokio-util = {version = "0.7", features = ["codec"]} -log = "0.4" -env_logger = "0.9" -reqwest = { version = "0.11", features = ["socks"] } -url = { version = "2.2", features = ["serde"]} # uses feature unification to add 'serde' to reqwest::Url -serde_regex = "1.1" -clap = {version = "3.0", features = ["wrap_help", "cargo"]} -lazy_static = "1.4" -toml = "0.5" -serde = { version = "1.0", features = ["derive", "rc"] } -serde_json = "1.0" -uuid = { version = "0.8", features = ["v4"] } +scraper = "0.12.0" +futures = "0.3.21" +tokio = { version = "1.17.0", features = ["full"] } +tokio-util = { version = "0.7.0", features = ["codec"] } +log = "0.4.14" +env_logger = "0.9.0" +reqwest = { version = "0.11.9", features = ["socks"] } +url = { version = "2.2.2", features = ["serde"] } # uses feature unification to add 'serde' to reqwest::Url +serde_regex = "1.1.0" +clap = { version = "3.1.5", features = ["wrap_help", "cargo"] } +lazy_static = "1.4.0" +toml = "0.5.8" +serde = { version = "1.0.136", features = ["derive", "rc"] } +serde_json = "1.0.79" +uuid = { version = "0.8.2", features = ["v4"] } indicatif = "0.15" -console = "0.15" -openssl = { version = "0.10", features = ["vendored"] } -dirs = "4.0" -regex = "1" -crossterm = "0.20" -rlimit = "0.6" # todo: watch for 1.0, adds windows rlimit -ctrlc = "3.2" +console = "0.15.0" +openssl = { version = "0.10.38", features = ["vendored"] } +dirs = "4.0.0" +regex = "1.5.4" +crossterm = "0.23.0" +rlimit = "0.7.0" +ctrlc = "3.2.1" fuzzyhash = "0.2.1" -anyhow = "1.0" +anyhow = "1.0.55" leaky-bucket = "0.10.0" # todo: upgrade, will take a little work/thought since api changed [dev-dependencies] -tempfile = "3.3" -httpmock = "0.6" -assert_cmd = "2.0" -predicates = "2.1" +tempfile = "3.3.0" +httpmock = "0.6.6" +assert_cmd = "2.0.4" +predicates = "2.1.1" [profile.release] lto = true From 7839118379dc515cdbbc1e44253ce8de25ebad68 Mon Sep 17 00:00:00 2001 From: epi Date: Fri, 4 Mar 2022 06:52:45 -0600 Subject: [PATCH 37/40] added cargo make Makefile.toml --- Makefile.toml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 Makefile.toml diff --git a/Makefile.toml b/Makefile.toml new file mode 100644 index 0000000..1afc7d4 --- /dev/null +++ b/Makefile.toml @@ -0,0 +1,18 @@ +# composite tasks +[tasks.upgrade] +dependencies = ["upgrade-deps", "update"] + +# cleaning +[tasks.clean-state] +script = """ +rm ferox-*.state +""" + +# dependency management +[tasks.upgrade-deps] +command = "cargo" +args = ["upgrade", "--exclude", "indicatif", "leaky-bucket"] + +[tasks.update] +command = "cargo" +args = ["update"] From f814c4b223b53d71bbad691178557982869714d3 Mon Sep 17 00:00:00 2001 From: epi Date: Fri, 4 Mar 2022 06:54:17 -0600 Subject: [PATCH 38/40] put back stripped comment --- Cargo.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 16364b1..9a81a8e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,7 +30,8 @@ tokio-util = { version = "0.7.0", features = ["codec"] } log = "0.4.14" env_logger = "0.9.0" reqwest = { version = "0.11.9", features = ["socks"] } -url = { version = "2.2.2", features = ["serde"] } # uses feature unification to add 'serde' to reqwest::Url +# uses feature unification to add 'serde' to reqwest::Url +url = { version = "2.2.2", features = ["serde"] } serde_regex = "1.1.0" clap = { version = "3.1.5", features = ["wrap_help", "cargo"] } lazy_static = "1.4.0" From cbbf9be6c9bbaa6f5fe49685f6ff359438427d60 Mon Sep 17 00:00:00 2001 From: epi Date: Fri, 4 Mar 2022 21:15:43 -0600 Subject: [PATCH 39/40] added composite flags --- shell_completions/_feroxbuster | 4 + shell_completions/_feroxbuster.ps1 | 4 + shell_completions/feroxbuster.bash | 2 +- shell_completions/feroxbuster.elv | 4 + src/config/container.rs | 30 ++++++-- src/parser.rs | 30 ++++++++ tests/test_banner.rs | 115 +++++++++++++++++++++++++++++ tests/test_scanner.rs | 2 +- 8 files changed, 183 insertions(+), 8 deletions(-) diff --git a/shell_completions/_feroxbuster b/shell_completions/_feroxbuster index 4254cec..f863ac4 100644 --- a/shell_completions/_feroxbuster +++ b/shell_completions/_feroxbuster @@ -74,6 +74,10 @@ _feroxbuster() { '-V[Print version information]' \ '--version[Print version information]' \ '(-u --url)--stdin[Read url(s) from STDIN]' \ +'(-p --proxy -k --insecure --burp-replay)--burp[Set --proxy to http://127.0.0.1:8080 and set --insecure to true]' \ +'(-P --replay-proxy -k --insecure)--burp-replay[Set --replay-proxy to http://127.0.0.1:8080 and set --insecure to true]' \ +'--smart[Set --extract-links, --auto-tune, --collect-words, and --collect-backups to true]' \ +'--thorough[Use the same settings as --smart and set --collect-extensions to true]' \ '-A[Use a random User-Agent]' \ '--random-agent[Use a random User-Agent]' \ '-f[Append / to each request'\''s URL]' \ diff --git a/shell_completions/_feroxbuster.ps1 b/shell_completions/_feroxbuster.ps1 index 8e3b228..5eaeadb 100644 --- a/shell_completions/_feroxbuster.ps1 +++ b/shell_completions/_feroxbuster.ps1 @@ -80,6 +80,10 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock { [CompletionResult]::new('-V', 'V', [CompletionResultType]::ParameterName, 'Print version information') [CompletionResult]::new('--version', 'version', [CompletionResultType]::ParameterName, 'Print version information') [CompletionResult]::new('--stdin', 'stdin', [CompletionResultType]::ParameterName, 'Read url(s) from STDIN') + [CompletionResult]::new('--burp', 'burp', [CompletionResultType]::ParameterName, 'Set --proxy to http://127.0.0.1:8080 and set --insecure to true') + [CompletionResult]::new('--burp-replay', 'burp-replay', [CompletionResultType]::ParameterName, 'Set --replay-proxy to http://127.0.0.1:8080 and set --insecure to true') + [CompletionResult]::new('--smart', 'smart', [CompletionResultType]::ParameterName, 'Set --extract-links, --auto-tune, --collect-words, and --collect-backups to true') + [CompletionResult]::new('--thorough', 'thorough', [CompletionResultType]::ParameterName, 'Use the same settings as --smart and set --collect-extensions to true') [CompletionResult]::new('-A', 'A', [CompletionResultType]::ParameterName, 'Use a random User-Agent') [CompletionResult]::new('--random-agent', 'random-agent', [CompletionResultType]::ParameterName, 'Use a random User-Agent') [CompletionResult]::new('-f', 'f', [CompletionResultType]::ParameterName, 'Append / to each request''s URL') diff --git a/shell_completions/feroxbuster.bash b/shell_completions/feroxbuster.bash index eb89374..a3a766a 100644 --- a/shell_completions/feroxbuster.bash +++ b/shell_completions/feroxbuster.bash @@ -19,7 +19,7 @@ _feroxbuster() { case "${cmd}" in feroxbuster) - opts="-h -V -u -p -P -R -a -A -x -m -H -b -Q -f -S -X -W -N -C -s -T -r -k -t -n -d -e -L -w -D -E -B -g -I -v -q -o --help --version --url --stdin --resume-from --proxy --replay-proxy --replay-codes --user-agent --random-agent --extensions --methods --data --headers --cookies --query --add-slash --dont-scan --filter-size --filter-regex --filter-words --filter-lines --filter-status --filter-similar-to --status-codes --timeout --redirects --insecure --threads --no-recursion --depth --extract-links --scan-limit --parallel --rate-limit --time-limit --wordlist --auto-tune --auto-bail --dont-filter --collect-extensions --collect-backups --collect-words --dont-collect --verbosity --silent --quiet --json --output --debug-log --no-state" + opts="-h -V -u -p -P -R -a -A -x -m -H -b -Q -f -S -X -W -N -C -s -T -r -k -t -n -d -e -L -w -D -E -B -g -I -v -q -o --help --version --url --stdin --resume-from --burp --burp-replay --smart --thorough --proxy --replay-proxy --replay-codes --user-agent --random-agent --extensions --methods --data --headers --cookies --query --add-slash --dont-scan --filter-size --filter-regex --filter-words --filter-lines --filter-status --filter-similar-to --status-codes --timeout --redirects --insecure --threads --no-recursion --depth --extract-links --scan-limit --parallel --rate-limit --time-limit --wordlist --auto-tune --auto-bail --dont-filter --collect-extensions --collect-backups --collect-words --dont-collect --verbosity --silent --quiet --json --output --debug-log --no-state" if [[ ${cur} == -* || ${COMP_CWORD} -eq 1 ]] ; then COMPREPLY=( $(compgen -W "${opts}" -- "${cur}") ) return 0 diff --git a/shell_completions/feroxbuster.elv b/shell_completions/feroxbuster.elv index 36d7f06..4050129 100644 --- a/shell_completions/feroxbuster.elv +++ b/shell_completions/feroxbuster.elv @@ -77,6 +77,10 @@ set edit:completion:arg-completer[feroxbuster] = {|@words| cand -V 'Print version information' cand --version 'Print version information' cand --stdin 'Read url(s) from STDIN' + cand --burp 'Set --proxy to http://127.0.0.1:8080 and set --insecure to true' + cand --burp-replay 'Set --replay-proxy to http://127.0.0.1:8080 and set --insecure to true' + cand --smart 'Set --extract-links, --auto-tune, --collect-words, and --collect-backups to true' + cand --thorough 'Use the same settings as --smart and set --collect-extensions to true' cand -A 'Use a random User-Agent' cand --random-agent 'Use a random User-Agent' cand -f 'Append / to each request''s URL' diff --git a/src/config/container.rs b/src/config/container.rs index 2af4f9f..c992e58 100644 --- a/src/config/container.rs +++ b/src/config/container.rs @@ -713,7 +713,7 @@ impl Configuration { config.output_level = OutputLevel::Quiet; } - if args.is_present("auto_tune") { + if args.is_present("auto_tune") || args.is_present("smart") || args.is_present("thorough") { config.auto_tune = true; config.requester_policy = RequesterPolicy::AutoTune; } @@ -731,15 +731,21 @@ impl Configuration { config.dont_filter = true; } - if args.is_present("collect_extensions") { + if args.is_present("collect_extensions") || args.is_present("thorough") { config.collect_extensions = true; } - if args.is_present("collect_backups") { + if args.is_present("collect_backups") + || args.is_present("smart") + || args.is_present("thorough") + { config.collect_backups = true; } - if args.is_present("collect_words") { + if args.is_present("collect_words") + || args.is_present("smart") + || args.is_present("thorough") + { config.collect_words = true; } @@ -757,7 +763,10 @@ impl Configuration { config.add_slash = true; } - if args.is_present("extract_links") { + if args.is_present("extract_links") + || args.is_present("smart") + || args.is_present("thorough") + { config.extract_links = true; } @@ -773,6 +782,14 @@ impl Configuration { update_config_if_present!(&mut config.user_agent, args, "user_agent"); update_config_if_present!(&mut config.timeout, args, "timeout"); + if args.is_present("burp") { + config.proxy = String::from("http://127.0.0.1:8080"); + } + + if args.is_present("burp_replay") { + config.replay_proxy = String::from("http://127.0.0.1:8080"); + } + if args.is_present("random_agent") { config.random_agent = true; } @@ -781,7 +798,8 @@ impl Configuration { config.redirects = true; } - if args.is_present("insecure") { + if args.is_present("insecure") || args.is_present("burp") || args.is_present("burp_replay") + { config.insecure = true; } diff --git a/src/parser.rs b/src/parser.rs index 80337d8..41a90d1 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -65,6 +65,36 @@ pub fn initialize() -> Command<'static> { .takes_value(true), ); + ///////////////////////////////////////////////////////////////////// + // group - composite settings + ///////////////////////////////////////////////////////////////////// + let app = app + .arg( + Arg::new("burp") + .long("burp") + .help_heading("Composite settings") + .conflicts_with_all(&["proxy", "insecure", "burp_replay"]) + .help("Set --proxy to http://127.0.0.1:8080 and set --insecure to true"), + ) + .arg( + Arg::new("burp_replay") + .long("burp-replay") + .help_heading("Composite settings") + .conflicts_with_all(&["replay_proxy", "insecure"]) + .help("Set --replay-proxy to http://127.0.0.1:8080 and set --insecure to true"), + ) + .arg( + Arg::new("smart") + .long("smart") + .help_heading("Composite settings") + .help("Set --extract-links, --auto-tune, --collect-words, and --collect-backups to true"), + ).arg( + Arg::new("thorough") + .long("thorough") + .help_heading("Composite settings") + .help("Use the same settings as --smart and set --collect-extensions to true"), + ); + ///////////////////////////////////////////////////////////////////// // group - proxy settings ///////////////////////////////////////////////////////////////////// diff --git a/tests/test_banner.rs b/tests/test_banner.rs index 69c1a6d..b6e0df8 100644 --- a/tests/test_banner.rs +++ b/tests/test_banner.rs @@ -1279,3 +1279,118 @@ fn banner_prints_collect_words() { .and(predicate::str::contains("─┴─")), ); } + +#[test] +/// test allows non-existent wordlist to trigger the banner printing to stderr +/// expect to see all mandatory prints + collect words +fn banner_prints_all_composite_settings_smart() { + Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg("http://localhost") + .arg("--smart") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") + .assert() + .success() + .stderr( + predicate::str::contains("─┬─") + .and(predicate::str::contains("Target Url")) + .and(predicate::str::contains("http://localhost")) + .and(predicate::str::contains("Threads")) + .and(predicate::str::contains("Wordlist")) + .and(predicate::str::contains("Status Codes")) + .and(predicate::str::contains("Timeout (secs)")) + .and(predicate::str::contains("User-Agent")) + .and(predicate::str::contains("Collect Words")) + .and(predicate::str::contains("Collect Backups")) + .and(predicate::str::contains("Extract Links")) + .and(predicate::str::contains("Auto Tune")) + .and(predicate::str::contains("─┴─")), + ); +} + +#[test] +/// test allows non-existent wordlist to trigger the banner printing to stderr +/// expect to see all mandatory prints + collect words +fn banner_prints_all_composite_settings_thorough() { + Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg("http://localhost") + .arg("--thorough") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") + .assert() + .success() + .stderr( + predicate::str::contains("─┬─") + .and(predicate::str::contains("Target Url")) + .and(predicate::str::contains("http://localhost")) + .and(predicate::str::contains("Threads")) + .and(predicate::str::contains("Wordlist")) + .and(predicate::str::contains("Status Codes")) + .and(predicate::str::contains("Timeout (secs)")) + .and(predicate::str::contains("User-Agent")) + .and(predicate::str::contains("Collect Words")) + .and(predicate::str::contains("Collect Extensions")) + .and(predicate::str::contains("Collect Backups")) + .and(predicate::str::contains("Extract Links")) + .and(predicate::str::contains("Auto Tune")) + .and(predicate::str::contains("─┴─")), + ); +} +#[test] +/// test allows non-existent wordlist to trigger the banner printing to stderr +/// expect to see all mandatory prints + collect words +fn banner_prints_all_composite_settings_burp() { + Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg("http://localhost") + .arg("--burp") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") + .assert() + .success() + .stderr( + predicate::str::contains("─┬─") + .and(predicate::str::contains("Target Url")) + .and(predicate::str::contains("http://localhost")) + .and(predicate::str::contains("Threads")) + .and(predicate::str::contains("Wordlist")) + .and(predicate::str::contains("Status Codes")) + .and(predicate::str::contains("Timeout (secs)")) + .and(predicate::str::contains("User-Agent")) + .and(predicate::str::contains("Proxy")) + .and(predicate::str::contains("Insecure")) + .and(predicate::str::contains("─┴─")), + ); +} +#[test] +/// test allows non-existent wordlist to trigger the banner printing to stderr +/// expect to see all mandatory prints + collect words +fn banner_prints_all_composite_settings_burp_replay() { + Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg("http://localhost") + .arg("--burp-replay") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") + .assert() + .success() + .stderr( + predicate::str::contains("─┬─") + .and(predicate::str::contains("Target Url")) + .and(predicate::str::contains("http://localhost")) + .and(predicate::str::contains("Threads")) + .and(predicate::str::contains("Wordlist")) + .and(predicate::str::contains("Status Codes")) + .and(predicate::str::contains("Timeout (secs)")) + .and(predicate::str::contains("User-Agent")) + .and(predicate::str::contains("Replay Proxy")) + .and(predicate::str::contains("Insecure")) + .and(predicate::str::contains("─┴─")), + ); +} diff --git a/tests/test_scanner.rs b/tests/test_scanner.rs index fb3c6f3..feb7141 100644 --- a/tests/test_scanner.rs +++ b/tests/test_scanner.rs @@ -837,7 +837,7 @@ fn collect_words_makes_appropriate_requests() { .arg(file.as_os_str()) .unwrap(); - print!("{}", std::str::from_utf8(&cmd.stdout).unwrap().to_string()); + print!("{}", std::str::from_utf8(&cmd.stdout).unwrap()); cmd.assert().success().stdout( predicate::str::contains("/doc1") From 86b17f226dd2a75eec6264854e1f2a176ac3c692 Mon Sep 17 00:00:00 2001 From: epi Date: Fri, 4 Mar 2022 21:47:52 -0600 Subject: [PATCH 40/40] removed lint --- src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index de9bbe8..72258d9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,5 @@ #![deny(clippy::all)] #![allow(clippy::mutex_atomic)] -// #![warn(clippy::pedantic, clippy::restriction, clippy::nursery, clippy::cargo)] use anyhow::Result; use reqwest::StatusCode; use std::collections::HashSet;