added extensions and status codes into auto filtering decision calculus

This commit is contained in:
epi
2023-04-11 18:29:12 -05:00
parent c7498a7695
commit e110f86f39
2 changed files with 330 additions and 181 deletions

View File

@@ -1,4 +1,5 @@
use std::sync::Arc;
use std::collections::HashMap;
use anyhow::{bail, Result};
use scraper::{Html, Selector};
@@ -276,133 +277,183 @@ impl HeuristicTests {
None
};
// 4 is due to the array in the nested for loop below
let mut responses = Vec::with_capacity(4);
// 6 is due to the array in the nested for loop below
let mut responses = Vec::with_capacity(6);
let extensions = if self.handles.config.extensions.is_empty() {
vec!["".to_string()]
} else {
let mut exts: Vec<String> = self
.handles
.config
.extensions
.iter()
.map(|ext| format!(".{}", ext))
.collect();
exts.push("".to_string());
exts
};
// for every method, attempt to id its 404 response
//
// a good example of one where the GET/POST differ is on hackthebox:
// - http://prd.m.rendering-api.interface.htb/api
//
// a good example of one where the heuristics return a 403 and a 404 (apache)
// as well as return two different types of 404s based on the file extension
// - http://10.10.11.198 (Encoding box in normal labs)
//
// both methods and extensions can elicit different responses from a given
// server, so both are considered when building auto-filter rules
for method in self.handles.config.methods.iter() {
for (prefix, length) in [("", 1), ("", 3), (".htaccess", 1), ("admin", 1)] {
let path = format!("{prefix}{}", self.unique_string(length));
for extension in extensions.iter() {
for (prefix, length) in [
("", 1),
("", 3),
(".htaccess", 1),
(".htaccess", 3),
("admin", 1),
("admin", 3),
] {
let path = format!("{prefix}{}{extension}", self.unique_string(length));
let ferox_url = FeroxUrl::from_string(target_url, self.handles.clone());
let ferox_url = FeroxUrl::from_string(target_url, self.handles.clone());
let nonexistent_url = ferox_url.format(&path, slash)?;
let nonexistent_url = ferox_url.format(&path, slash)?;
// example requests:
// - http://localhost/2fc1077836ad43ab98b7a31c2ca28fea
// - http://localhost/92969beae6bf4beb855d1622406d87e395c87387a9ad432e8a11245002b709b03cf609d471004154b83bcc1c6ec49f6f
// - http://localhost/.htaccessa005a2131e68449aa26e99029c914c09
// - http://localhost/adminf1d2541e73c44dcb9d1fb7d93334b280
let response =
logged_request(&nonexistent_url, method, data, self.handles.clone()).await;
// example requests:
// - http://localhost/2fc1077836ad43ab98b7a31c2ca28fea
// - http://localhost/92969beae6bf4beb855d1622406d87e395c87387a9ad432e8a11245002b709b03cf609d471004154b83bcc1c6ec49f6f
// - http://localhost/.htaccessa005a2131e68449aa26e99029c914c09
// - http://localhost/.htaccess92969beae6bf4beb855d1622406d87e395c87387a9ad432e8a11245002b709b03cf609d471004154b83bcc1c6ec49f6f
// - http://localhost/adminf1d2541e73c44dcb9d1fb7d93334b280
// - http://localhost/admin92969beae6bf4beb855d1622406d87e395c87387a9ad432e8a11245002b709b03cf609d471004154b83bcc1c6ec49f6f
let response =
logged_request(&nonexistent_url, method, data, self.handles.clone()).await;
req_counter += 1;
req_counter += 1;
// continue to next on error
let response = skip_fail!(response);
// continue to next on error
let response = skip_fail!(response);
if !self
.handles
.config
.status_codes
.contains(&response.status().as_u16())
{
// if the response code isn't one that's accepted via -s values, then skip to the next
//
// the default value for -s is all status codes, so unless the user says otherwise
// this won't fire
if !self
.handles
.config
.status_codes
.contains(&response.status().as_u16())
{
// if the response code isn't one that's accepted via -s values, then skip to the next
//
// the default value for -s is all status codes, so unless the user says otherwise
// this won't fire
continue;
}
let ferox_response = FeroxResponse::from(
response,
&ferox_url.target,
method,
self.handles.config.output_level,
)
.await;
responses.push(ferox_response);
}
if responses.len() < 2 {
// don't have enough responses to make a determination, continue to next method
responses.clear();
continue;
}
let ferox_response = FeroxResponse::from(
response,
&ferox_url.target,
method,
// check the responses for similarities on which we can filter, multiple may be returned
let Some((wildcard_filters, wildcard_responses)) = self.examine_404_like_responses(&responses) else {
// no match was found during analysis of responses
responses.clear();
log::warn!("no match found for 404 responses");
continue;
};
// report to the user, if appropriate
if matches!(
self.handles.config.output_level,
)
.await;
OutputLevel::Default | OutputLevel::Quiet
) {
// sentry value to control whether or not to print the filter
// used because we only want to print the same filter once
let mut print_sentry;
responses.push(ferox_response);
}
if let Ok(filters) = self.handles.filters.data.filters.read() {
for new_wildcard in &wildcard_filters {
// reset the sentry for every new wildcard produced by examine_404_like_responses
print_sentry = true;
if responses.len() < 2 {
// don't have enough responses to make a determination, continue to next method
responses.clear();
continue;
}
for other in filters.iter() {
if let Some(other_wildcard) =
other.as_any().downcast_ref::<WildcardFilter>()
{
// check the new wildcard against all existing wildcards, if it was added
// on the cli or by a previous directory, don't print it
if new_wildcard.as_ref() == other_wildcard {
print_sentry = false;
break;
}
}
}
// Command::AddFilter, &str (bytes/words/lines), usize (i.e. length associated with the type)
let Some(filter) = self.examine_404_like_responses(&responses) else {
// no match was found during analysis of responses
responses.clear();
continue;
};
// report to the user, if appropriate
if matches!(
self.handles.config.output_level,
OutputLevel::Default | OutputLevel::Quiet
) {
// sentry value to control whether or not to print the filter
// used because we only want to print the same filter once
let mut print_sentry = true;
if let Ok(filters) = self.handles.filters.data.filters.read() {
for other in filters.iter() {
if let Some(other_wildcard) =
other.as_any().downcast_ref::<WildcardFilter>()
{
if &*filter == other_wildcard {
print_sentry = false;
break;
// if we're here, we've found a new wildcard that we didn't previously display, print it
if print_sentry {
ferox_print(&format!("{}", new_wildcard), &PROGRESS_PRINTER);
}
}
}
}
if print_sentry {
ferox_print(&format!("{}", filter), &PROGRESS_PRINTER);
// create the new filter
for wildcard in wildcard_filters {
self.handles.filters.send(Command::AddFilter(wildcard))?;
}
// if we're here, we've detected a 404-like response pattern, and we're already filtering for size/word/line
//
// in addition, we'll create a similarity filter as a fallback
for resp in wildcard_responses {
let hash = SIM_HASHER.create_signature(preprocess(resp.text()).iter());
let sim_filter = SimilarityFilter {
hash,
original_url: resp.url().to_string(),
};
self.handles
.filters
.send(Command::AddFilter(Box::new(sim_filter)))?;
if resp.is_directory() {
// response is either a 3XX with a Location header that matches url + '/'
// or it's a 2XX that ends with a '/'
// or it's a 403 that ends with a '/'
// set the wildcard flag to true, so we can check it when preventing
// recursion in event_handlers/scans.rs
// we'd need to clone the response to give ownership to the global list anyway
// so we'll also use that clone to set the wildcard flag
let mut cloned_resp = resp.clone();
cloned_resp.set_wildcard(true);
// add the response to the global list of responses
RESPONSES.insert(cloned_resp);
// function-internal magic number, indicates that we've detected a wildcard directory
req_counter += 100;
}
}
// reset the responses for the next method, if it exists
responses.clear();
}
// create the new filter
self.handles.filters.send(Command::AddFilter(filter))?;
// if we're here, we've detected a 404-like response pattern, and we're already filtering for size/word/line
//
// in addition, we'll create a similarity filter as a fallback
let hash = SIM_HASHER.create_signature(preprocess(responses[0].text()).iter());
let sim_filter = SimilarityFilter {
hash,
original_url: responses[0].url().to_string(),
};
self.handles
.filters
.send(Command::AddFilter(Box::new(sim_filter)))?;
if responses[0].is_directory() {
// response is either a 3XX with a Location header that matches url + '/'
// or it's a 2XX that ends with a '/'
// or it's a 403 that ends with a '/'
// set the wildcard flag to true, so we can check it when preventing
// recursion in event_handlers/scans.rs
responses[0].set_wildcard(true);
// add the response to the global list of responses
RESPONSES.insert(responses[0].clone());
// function-internal magic number, indicates that we've detected a wildcard directory
req_counter += 100;
}
// reset the responses for the next method, if it exists
responses.clear();
}
log::trace!("exit: detect_404_like_responses");
@@ -416,96 +467,137 @@ impl HeuristicTests {
Ok(Some(retval))
}
/// for all responses, examine chars/words/lines
/// if all responses respective lengths match each other, we can assume
/// that will remain true for subsequent non-existent urls
/// for all responses, group them by status code, then examine chars/words/lines.
/// if all responses' respective lengths within a status code grouping match
/// each other, we can assume that will remain true for subsequent non-existent urls
///
/// values are examined from most to least specific (content length, word count, line count)
fn examine_404_like_responses(
/// within a status code grouping, values are examined from most to
/// least specific (content length, word count, line count)
fn examine_404_like_responses<'a>(
&self,
responses: &[FeroxResponse],
) -> Option<Box<WildcardFilter>> {
responses: &'a [FeroxResponse],
) -> Option<(Vec<Box<WildcardFilter>>, Vec<&'a FeroxResponse>)> {
// aside from word/line/byte counts, additional discriminators are status code
// extension, and request method. The request method and extension are handled by
// the caller, since they're part of the request and make up the nested for loops
// in detect_404_like_responses.
//
// The status code is handled here, since it's part of the response to catch cases
// where we have something like a 403 and a 404
let mut size_sentry = true;
let mut word_sentry = true;
let mut line_sentry = true;
let method = responses[0].method();
let status_code = responses[0].status();
let content_length = responses[0].content_length();
let word_count = responses[0].word_count();
let line_count = responses[0].line_count();
// returned vec of boxed wildcard filters
let mut wildcards = Vec::new();
for response in &responses[1..] {
// if any of the responses differ in length, that particular
// response length type is no longer a candidate for filtering
if response.content_length() != content_length {
size_sentry = false;
}
// returned vec of ferox responses that are needed for additional
// analysis
let mut wild_responses = Vec::new();
if response.word_count() != word_count {
word_sentry = false;
}
// mapping of grouped responses to status code
let mut grouped_responses = HashMap::new();
if response.line_count() != line_count {
line_sentry = false;
}
// iterate over all responses and add each response to its
// corresponding status code group
for response in responses {
grouped_responses
.entry(response.status())
.or_insert_with(Vec::new)
.push(response);
}
if !size_sentry && !word_sentry && !line_sentry {
// none of the response lengths match, so we can't filter on any of them
return None;
// iterate over each grouped response and determine the most specific
// filter that can be applied to all responses in the group, i.e.
// start from byte count and work 'out' to line count
for (_, response_group) in &grouped_responses {
if response_group.len() < 2 {
// not enough responses to make a determination
continue;
}
let method = response_group[0].method();
let status_code = response_group[0].status();
let content_length = response_group[0].content_length();
let word_count = response_group[0].word_count();
let line_count = response_group[0].line_count();
for response in &response_group[1..] {
// if any of the responses differ in length, that particular
// response length type is no longer a candidate for filtering
if response.content_length() != content_length {
size_sentry = false;
}
if response.word_count() != word_count {
word_sentry = false;
}
if response.line_count() != line_count {
line_sentry = false;
}
}
if !size_sentry && !word_sentry && !line_sentry {
// none of the response lengths match, so we can't filter on any of them
continue;
}
let mut wildcard = WildcardFilter {
content_length: None,
line_count: None,
word_count: None,
method: method.to_string(),
status_code: status_code.as_u16(),
dont_filter: self.handles.config.dont_filter,
};
match (size_sentry, word_sentry, line_sentry) {
(true, true, true) => {
// all three types of length match, so we can't filter on any of them
wildcard.content_length = Some(content_length);
wildcard.word_count = Some(word_count);
wildcard.line_count = Some(line_count);
}
(true, true, false) => {
// content length and word count match, so we can filter on either
wildcard.content_length = Some(content_length);
wildcard.word_count = Some(word_count);
}
(true, false, true) => {
// content length and line count match, so we can filter on either
wildcard.content_length = Some(content_length);
wildcard.line_count = Some(line_count);
}
(false, true, true) => {
// word count and line count match, so we can filter on either
wildcard.word_count = Some(word_count);
wildcard.line_count = Some(line_count);
}
(true, false, false) => {
// content length matches, so we can filter on that
wildcard.content_length = Some(content_length);
}
(false, true, false) => {
// word count matches, so we can filter on that
wildcard.word_count = Some(word_count);
}
(false, false, true) => {
// line count matches, so we can filter on that
wildcard.line_count = Some(line_count);
}
(false, false, false) => {
// none of the length types match, so we can't filter on any of them
unreachable!("no wildcard size matches; handled by the if statement above");
}
};
wild_responses.push(response_group[0]);
wildcards.push(Box::new(wildcard));
}
let mut wildcard = WildcardFilter {
content_length: None,
line_count: None,
word_count: None,
method: method.to_string(),
status_code: status_code.as_u16(),
dont_filter: self.handles.config.dont_filter,
};
match (size_sentry, word_sentry, line_sentry) {
(true, true, true) => {
// all three types of length match, so we can't filter on any of them
wildcard.content_length = Some(content_length);
wildcard.word_count = Some(word_count);
wildcard.line_count = Some(line_count);
}
(true, true, false) => {
// content length and word count match, so we can filter on either
wildcard.content_length = Some(content_length);
wildcard.word_count = Some(word_count);
}
(true, false, true) => {
// content length and line count match, so we can filter on either
wildcard.content_length = Some(content_length);
wildcard.line_count = Some(line_count);
}
(false, true, true) => {
// word count and line count match, so we can filter on either
wildcard.word_count = Some(word_count);
wildcard.line_count = Some(line_count);
}
(true, false, false) => {
// content length matches, so we can filter on that
wildcard.content_length = Some(content_length);
}
(false, true, false) => {
// word count matches, so we can filter on that
wildcard.word_count = Some(word_count);
}
(false, false, true) => {
// line count matches, so we can filter on that
wildcard.line_count = Some(line_count);
}
(false, false, false) => {
// none of the length types match, so we can't filter on any of them
unreachable!("no wildcard size matches; handled by the if statement above");
}
};
Some(Box::new(wildcard))
Some((wildcards, wild_responses))
}
}

View File

@@ -164,7 +164,7 @@ fn test_static_wildcard_request_found() -> Result<(), Box<dyn std::error::Error>
let mock = srv.mock(|when, then| {
when.method(GET)
.path_matches(Regex::new("/[a-zA-Z0-9]{32}/").unwrap());
.path_matches(Regex::new("/[.a-zA-Z0-9]{32,}/").unwrap());
then.status(200).body("this is a test");
});
@@ -188,7 +188,8 @@ fn test_static_wildcard_request_found() -> Result<(), Box<dyn std::error::Error>
.and(predicate::str::contains("1l")),
);
assert_eq!(mock.hits(), 1);
assert_eq!(mock.hits(), 6);
Ok(())
}
@@ -305,11 +306,67 @@ fn heuristics_wildcard_test_with_two_static_wildcards_with_silent_enabled(
.success()
.stdout(predicate::str::contains(srv.url("/")));
assert_eq!(mock.hits(), 4);
assert_eq!(mock.hits(), 6);
assert_eq!(mock2.hits(), 1);
Ok(())
}
#[test]
/// test finds a 404-like response that returns a 403 and a 403 directory should still be allowed
/// to be tested for recrusion
fn heuristics_wildcard_test_that_auto_filtering_403s_still_allows_for_recursion_into_403_directories(
) -> Result<(), Box<dyn std::error::Error>> {
let srv = MockServer::start();
let super_long = String::from("92969beae6bf4beb855d1622406d87e395c87387a9ad432e8a11245002b709b03cf609d471004154b83bcc1c6ec49f6f09d471004154b83bcc1c6ec49f6f");
let (tmp_dir, file) =
setup_tmp_directory(&["LICENSE".to_string(), super_long.clone()], "wordlist")?;
srv.mock(|when, then| {
when.method(GET)
.path_matches(Regex::new("/.?[a-zA-Z0-9]{32,103}").unwrap());
then.status(403)
.body("this is a testAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA");
});
srv.mock(|when, then| {
when.method(GET).path("/LICENSE/");
then.status(403)
.body("this is a testAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA");
});
srv.mock(|when, then| {
when.method(GET).path(format!("/LICENSE/{}", super_long));
then.status(200);
});
let cmd = Command::cargo_bin("feroxbuster")
.unwrap()
.arg("--url")
.arg(srv.url("/"))
.arg("--wordlist")
.arg(file.as_os_str())
.arg("--add-slash")
.unwrap();
teardown_tmp_directory(tmp_dir);
cmd.assert().success().stdout(
predicate::str::contains("GET")
.and(predicate::str::contains(
"Auto-filtering found 404-like response and created new filter",
))
.and(predicate::str::contains("403"))
.and(predicate::str::contains("1l"))
.and(predicate::str::contains("4w"))
.and(predicate::str::contains("46c"))
.and(predicate::str::contains(srv.url("/LICENSE/LICENSE/"))),
);
Ok(())
}
// #[test]
// /// test finds a static wildcard and reports as much to stdout and a file
// fn heuristics_wildcard_test_with_two_static_wildcards_and_output_to_file() {