diff --git a/src/heuristics.rs b/src/heuristics.rs index ca7958c..20d5d6e 100644 --- a/src/heuristics.rs +++ b/src/heuristics.rs @@ -1,4 +1,5 @@ use std::sync::Arc; +use std::collections::HashMap; use anyhow::{bail, Result}; use scraper::{Html, Selector}; @@ -276,133 +277,183 @@ impl HeuristicTests { None }; - // 4 is due to the array in the nested for loop below - let mut responses = Vec::with_capacity(4); + // 6 is due to the array in the nested for loop below + let mut responses = Vec::with_capacity(6); + + let extensions = if self.handles.config.extensions.is_empty() { + vec!["".to_string()] + } else { + let mut exts: Vec = self + .handles + .config + .extensions + .iter() + .map(|ext| format!(".{}", ext)) + .collect(); + exts.push("".to_string()); + exts + }; // for every method, attempt to id its 404 response // // a good example of one where the GET/POST differ is on hackthebox: // - http://prd.m.rendering-api.interface.htb/api + // + // a good example of one where the heuristics return a 403 and a 404 (apache) + // as well as return two different types of 404s based on the file extension + // - http://10.10.11.198 (Encoding box in normal labs) + // + // both methods and extensions can elicit different responses from a given + // server, so both are considered when building auto-filter rules for method in self.handles.config.methods.iter() { - for (prefix, length) in [("", 1), ("", 3), (".htaccess", 1), ("admin", 1)] { - let path = format!("{prefix}{}", self.unique_string(length)); + for extension in extensions.iter() { + for (prefix, length) in [ + ("", 1), + ("", 3), + (".htaccess", 1), + (".htaccess", 3), + ("admin", 1), + ("admin", 3), + ] { + let path = format!("{prefix}{}{extension}", self.unique_string(length)); - let ferox_url = FeroxUrl::from_string(target_url, self.handles.clone()); + let ferox_url = FeroxUrl::from_string(target_url, self.handles.clone()); - let nonexistent_url = ferox_url.format(&path, slash)?; + let nonexistent_url = ferox_url.format(&path, slash)?; - // example requests: - // - http://localhost/2fc1077836ad43ab98b7a31c2ca28fea - // - http://localhost/92969beae6bf4beb855d1622406d87e395c87387a9ad432e8a11245002b709b03cf609d471004154b83bcc1c6ec49f6f - // - http://localhost/.htaccessa005a2131e68449aa26e99029c914c09 - // - http://localhost/adminf1d2541e73c44dcb9d1fb7d93334b280 - let response = - logged_request(&nonexistent_url, method, data, self.handles.clone()).await; + // example requests: + // - http://localhost/2fc1077836ad43ab98b7a31c2ca28fea + // - http://localhost/92969beae6bf4beb855d1622406d87e395c87387a9ad432e8a11245002b709b03cf609d471004154b83bcc1c6ec49f6f + // - http://localhost/.htaccessa005a2131e68449aa26e99029c914c09 + // - http://localhost/.htaccess92969beae6bf4beb855d1622406d87e395c87387a9ad432e8a11245002b709b03cf609d471004154b83bcc1c6ec49f6f + // - http://localhost/adminf1d2541e73c44dcb9d1fb7d93334b280 + // - http://localhost/admin92969beae6bf4beb855d1622406d87e395c87387a9ad432e8a11245002b709b03cf609d471004154b83bcc1c6ec49f6f + let response = + logged_request(&nonexistent_url, method, data, self.handles.clone()).await; - req_counter += 1; + req_counter += 1; - // continue to next on error - let response = skip_fail!(response); + // continue to next on error + let response = skip_fail!(response); - if !self - .handles - .config - .status_codes - .contains(&response.status().as_u16()) - { - // if the response code isn't one that's accepted via -s values, then skip to the next - // - // the default value for -s is all status codes, so unless the user says otherwise - // this won't fire + if !self + .handles + .config + .status_codes + .contains(&response.status().as_u16()) + { + // if the response code isn't one that's accepted via -s values, then skip to the next + // + // the default value for -s is all status codes, so unless the user says otherwise + // this won't fire + continue; + } + + let ferox_response = FeroxResponse::from( + response, + &ferox_url.target, + method, + self.handles.config.output_level, + ) + .await; + + responses.push(ferox_response); + } + + if responses.len() < 2 { + // don't have enough responses to make a determination, continue to next method + responses.clear(); continue; } - let ferox_response = FeroxResponse::from( - response, - &ferox_url.target, - method, + // check the responses for similarities on which we can filter, multiple may be returned + let Some((wildcard_filters, wildcard_responses)) = self.examine_404_like_responses(&responses) else { + // no match was found during analysis of responses + responses.clear(); + log::warn!("no match found for 404 responses"); + continue; + }; + + // report to the user, if appropriate + if matches!( self.handles.config.output_level, - ) - .await; + OutputLevel::Default | OutputLevel::Quiet + ) { + // sentry value to control whether or not to print the filter + // used because we only want to print the same filter once + let mut print_sentry; - responses.push(ferox_response); - } + if let Ok(filters) = self.handles.filters.data.filters.read() { + for new_wildcard in &wildcard_filters { + // reset the sentry for every new wildcard produced by examine_404_like_responses + print_sentry = true; - if responses.len() < 2 { - // don't have enough responses to make a determination, continue to next method - responses.clear(); - continue; - } + for other in filters.iter() { + if let Some(other_wildcard) = + other.as_any().downcast_ref::() + { + // check the new wildcard against all existing wildcards, if it was added + // on the cli or by a previous directory, don't print it + if new_wildcard.as_ref() == other_wildcard { + print_sentry = false; + break; + } + } + } - // Command::AddFilter, &str (bytes/words/lines), usize (i.e. length associated with the type) - let Some(filter) = self.examine_404_like_responses(&responses) else { - // no match was found during analysis of responses - responses.clear(); - continue; - }; - - // report to the user, if appropriate - if matches!( - self.handles.config.output_level, - OutputLevel::Default | OutputLevel::Quiet - ) { - // sentry value to control whether or not to print the filter - // used because we only want to print the same filter once - let mut print_sentry = true; - - if let Ok(filters) = self.handles.filters.data.filters.read() { - for other in filters.iter() { - if let Some(other_wildcard) = - other.as_any().downcast_ref::() - { - if &*filter == other_wildcard { - print_sentry = false; - break; + // if we're here, we've found a new wildcard that we didn't previously display, print it + if print_sentry { + ferox_print(&format!("{}", new_wildcard), &PROGRESS_PRINTER); } } } } - if print_sentry { - ferox_print(&format!("{}", filter), &PROGRESS_PRINTER); + // create the new filter + for wildcard in wildcard_filters { + self.handles.filters.send(Command::AddFilter(wildcard))?; } + + // if we're here, we've detected a 404-like response pattern, and we're already filtering for size/word/line + // + // in addition, we'll create a similarity filter as a fallback + for resp in wildcard_responses { + let hash = SIM_HASHER.create_signature(preprocess(resp.text()).iter()); + + let sim_filter = SimilarityFilter { + hash, + original_url: resp.url().to_string(), + }; + + self.handles + .filters + .send(Command::AddFilter(Box::new(sim_filter)))?; + + if resp.is_directory() { + // response is either a 3XX with a Location header that matches url + '/' + // or it's a 2XX that ends with a '/' + // or it's a 403 that ends with a '/' + + // set the wildcard flag to true, so we can check it when preventing + // recursion in event_handlers/scans.rs + + // we'd need to clone the response to give ownership to the global list anyway + // so we'll also use that clone to set the wildcard flag + let mut cloned_resp = resp.clone(); + + cloned_resp.set_wildcard(true); + + // add the response to the global list of responses + RESPONSES.insert(cloned_resp); + + // function-internal magic number, indicates that we've detected a wildcard directory + req_counter += 100; + } + } + + // reset the responses for the next method, if it exists + responses.clear(); } - - // create the new filter - self.handles.filters.send(Command::AddFilter(filter))?; - - // if we're here, we've detected a 404-like response pattern, and we're already filtering for size/word/line - // - // in addition, we'll create a similarity filter as a fallback - let hash = SIM_HASHER.create_signature(preprocess(responses[0].text()).iter()); - - let sim_filter = SimilarityFilter { - hash, - original_url: responses[0].url().to_string(), - }; - - self.handles - .filters - .send(Command::AddFilter(Box::new(sim_filter)))?; - - if responses[0].is_directory() { - // response is either a 3XX with a Location header that matches url + '/' - // or it's a 2XX that ends with a '/' - // or it's a 403 that ends with a '/' - - // set the wildcard flag to true, so we can check it when preventing - // recursion in event_handlers/scans.rs - responses[0].set_wildcard(true); - - // add the response to the global list of responses - RESPONSES.insert(responses[0].clone()); - - // function-internal magic number, indicates that we've detected a wildcard directory - req_counter += 100; - } - - // reset the responses for the next method, if it exists - responses.clear(); } log::trace!("exit: detect_404_like_responses"); @@ -416,96 +467,137 @@ impl HeuristicTests { Ok(Some(retval)) } - /// for all responses, examine chars/words/lines - /// if all responses respective lengths match each other, we can assume - /// that will remain true for subsequent non-existent urls + /// for all responses, group them by status code, then examine chars/words/lines. + /// if all responses' respective lengths within a status code grouping match + /// each other, we can assume that will remain true for subsequent non-existent urls /// - /// values are examined from most to least specific (content length, word count, line count) - fn examine_404_like_responses( + /// within a status code grouping, values are examined from most to + /// least specific (content length, word count, line count) + fn examine_404_like_responses<'a>( &self, - responses: &[FeroxResponse], - ) -> Option> { + responses: &'a [FeroxResponse], + ) -> Option<(Vec>, Vec<&'a FeroxResponse>)> { + // aside from word/line/byte counts, additional discriminators are status code + // extension, and request method. The request method and extension are handled by + // the caller, since they're part of the request and make up the nested for loops + // in detect_404_like_responses. + // + // The status code is handled here, since it's part of the response to catch cases + // where we have something like a 403 and a 404 + let mut size_sentry = true; let mut word_sentry = true; let mut line_sentry = true; - let method = responses[0].method(); - let status_code = responses[0].status(); - let content_length = responses[0].content_length(); - let word_count = responses[0].word_count(); - let line_count = responses[0].line_count(); + // returned vec of boxed wildcard filters + let mut wildcards = Vec::new(); - for response in &responses[1..] { - // if any of the responses differ in length, that particular - // response length type is no longer a candidate for filtering - if response.content_length() != content_length { - size_sentry = false; - } + // returned vec of ferox responses that are needed for additional + // analysis + let mut wild_responses = Vec::new(); - if response.word_count() != word_count { - word_sentry = false; - } + // mapping of grouped responses to status code + let mut grouped_responses = HashMap::new(); - if response.line_count() != line_count { - line_sentry = false; - } + // iterate over all responses and add each response to its + // corresponding status code group + for response in responses { + grouped_responses + .entry(response.status()) + .or_insert_with(Vec::new) + .push(response); } - if !size_sentry && !word_sentry && !line_sentry { - // none of the response lengths match, so we can't filter on any of them - return None; + // iterate over each grouped response and determine the most specific + // filter that can be applied to all responses in the group, i.e. + // start from byte count and work 'out' to line count + for (_, response_group) in &grouped_responses { + if response_group.len() < 2 { + // not enough responses to make a determination + continue; + } + + let method = response_group[0].method(); + let status_code = response_group[0].status(); + let content_length = response_group[0].content_length(); + let word_count = response_group[0].word_count(); + let line_count = response_group[0].line_count(); + + for response in &response_group[1..] { + // if any of the responses differ in length, that particular + // response length type is no longer a candidate for filtering + if response.content_length() != content_length { + size_sentry = false; + } + + if response.word_count() != word_count { + word_sentry = false; + } + + if response.line_count() != line_count { + line_sentry = false; + } + } + + if !size_sentry && !word_sentry && !line_sentry { + // none of the response lengths match, so we can't filter on any of them + continue; + } + + let mut wildcard = WildcardFilter { + content_length: None, + line_count: None, + word_count: None, + method: method.to_string(), + status_code: status_code.as_u16(), + dont_filter: self.handles.config.dont_filter, + }; + + match (size_sentry, word_sentry, line_sentry) { + (true, true, true) => { + // all three types of length match, so we can't filter on any of them + wildcard.content_length = Some(content_length); + wildcard.word_count = Some(word_count); + wildcard.line_count = Some(line_count); + } + (true, true, false) => { + // content length and word count match, so we can filter on either + wildcard.content_length = Some(content_length); + wildcard.word_count = Some(word_count); + } + (true, false, true) => { + // content length and line count match, so we can filter on either + wildcard.content_length = Some(content_length); + wildcard.line_count = Some(line_count); + } + (false, true, true) => { + // word count and line count match, so we can filter on either + wildcard.word_count = Some(word_count); + wildcard.line_count = Some(line_count); + } + (true, false, false) => { + // content length matches, so we can filter on that + wildcard.content_length = Some(content_length); + } + (false, true, false) => { + // word count matches, so we can filter on that + wildcard.word_count = Some(word_count); + } + (false, false, true) => { + // line count matches, so we can filter on that + wildcard.line_count = Some(line_count); + } + (false, false, false) => { + // none of the length types match, so we can't filter on any of them + unreachable!("no wildcard size matches; handled by the if statement above"); + } + }; + + wild_responses.push(response_group[0]); + wildcards.push(Box::new(wildcard)); } - let mut wildcard = WildcardFilter { - content_length: None, - line_count: None, - word_count: None, - method: method.to_string(), - status_code: status_code.as_u16(), - dont_filter: self.handles.config.dont_filter, - }; - - match (size_sentry, word_sentry, line_sentry) { - (true, true, true) => { - // all three types of length match, so we can't filter on any of them - wildcard.content_length = Some(content_length); - wildcard.word_count = Some(word_count); - wildcard.line_count = Some(line_count); - } - (true, true, false) => { - // content length and word count match, so we can filter on either - wildcard.content_length = Some(content_length); - wildcard.word_count = Some(word_count); - } - (true, false, true) => { - // content length and line count match, so we can filter on either - wildcard.content_length = Some(content_length); - wildcard.line_count = Some(line_count); - } - (false, true, true) => { - // word count and line count match, so we can filter on either - wildcard.word_count = Some(word_count); - wildcard.line_count = Some(line_count); - } - (true, false, false) => { - // content length matches, so we can filter on that - wildcard.content_length = Some(content_length); - } - (false, true, false) => { - // word count matches, so we can filter on that - wildcard.word_count = Some(word_count); - } - (false, false, true) => { - // line count matches, so we can filter on that - wildcard.line_count = Some(line_count); - } - (false, false, false) => { - // none of the length types match, so we can't filter on any of them - unreachable!("no wildcard size matches; handled by the if statement above"); - } - }; - - Some(Box::new(wildcard)) + Some((wildcards, wild_responses)) } } diff --git a/tests/test_heuristics.rs b/tests/test_heuristics.rs index 6696a03..c86df67 100644 --- a/tests/test_heuristics.rs +++ b/tests/test_heuristics.rs @@ -164,7 +164,7 @@ fn test_static_wildcard_request_found() -> Result<(), Box let mock = srv.mock(|when, then| { when.method(GET) - .path_matches(Regex::new("/[a-zA-Z0-9]{32}/").unwrap()); + .path_matches(Regex::new("/[.a-zA-Z0-9]{32,}/").unwrap()); then.status(200).body("this is a test"); }); @@ -188,7 +188,8 @@ fn test_static_wildcard_request_found() -> Result<(), Box .and(predicate::str::contains("1l")), ); - assert_eq!(mock.hits(), 1); + assert_eq!(mock.hits(), 6); + Ok(()) } @@ -305,11 +306,67 @@ fn heuristics_wildcard_test_with_two_static_wildcards_with_silent_enabled( .success() .stdout(predicate::str::contains(srv.url("/"))); - assert_eq!(mock.hits(), 4); + assert_eq!(mock.hits(), 6); assert_eq!(mock2.hits(), 1); Ok(()) } +#[test] +/// test finds a 404-like response that returns a 403 and a 403 directory should still be allowed +/// to be tested for recrusion +fn heuristics_wildcard_test_that_auto_filtering_403s_still_allows_for_recursion_into_403_directories( +) -> Result<(), Box> { + let srv = MockServer::start(); + + let super_long = String::from("92969beae6bf4beb855d1622406d87e395c87387a9ad432e8a11245002b709b03cf609d471004154b83bcc1c6ec49f6f09d471004154b83bcc1c6ec49f6f"); + + let (tmp_dir, file) = + setup_tmp_directory(&["LICENSE".to_string(), super_long.clone()], "wordlist")?; + + srv.mock(|when, then| { + when.method(GET) + .path_matches(Regex::new("/.?[a-zA-Z0-9]{32,103}").unwrap()); + then.status(403) + .body("this is a testAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"); + }); + + srv.mock(|when, then| { + when.method(GET).path("/LICENSE/"); + then.status(403) + .body("this is a testAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"); + }); + + srv.mock(|when, then| { + when.method(GET).path(format!("/LICENSE/{}", super_long)); + then.status(200); + }); + + let cmd = Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg(srv.url("/")) + .arg("--wordlist") + .arg(file.as_os_str()) + .arg("--add-slash") + .unwrap(); + + teardown_tmp_directory(tmp_dir); + + cmd.assert().success().stdout( + predicate::str::contains("GET") + .and(predicate::str::contains( + "Auto-filtering found 404-like response and created new filter", + )) + .and(predicate::str::contains("403")) + .and(predicate::str::contains("1l")) + .and(predicate::str::contains("4w")) + .and(predicate::str::contains("46c")) + .and(predicate::str::contains(srv.url("/LICENSE/LICENSE/"))), + ); + + Ok(()) +} + // #[test] // /// test finds a static wildcard and reports as much to stdout and a file // fn heuristics_wildcard_test_with_two_static_wildcards_and_output_to_file() {