new filter system appears to work

This commit is contained in:
epi
2020-10-29 06:05:25 -05:00
parent 9db0dc505b
commit 2d67336b86
4 changed files with 185 additions and 115 deletions

93
src/filters.rs Normal file
View File

@@ -0,0 +1,93 @@
use crate::utils::get_url_path_length;
use crate::FeroxResponse;
use std::any::Any;
use std::fmt::Debug;
// references:
// https://dev.to/magnusstrale/rust-trait-objects-in-a-vector-non-trivial-4co5
// https://stackoverflow.com/questions/25339603/how-to-test-for-equality-between-trait-objects
/// FeroxFilter trait; represents different types of possible filters that can be applied to
/// responses
pub trait FeroxFilter: Debug + Send + Sync {
/// Determine whether or not this particular filter should be applied or not
fn should_filter_response(&self, response: &FeroxResponse) -> bool;
/// delegates to the FeroxFilter-implementing type which gives us the actual type of self
fn box_eq(&self, other: &dyn Any) -> bool;
/// gives us `other` as Any in box_eq
fn as_any(&self) -> &dyn Any;
}
/// implementation of PartialEq, necessary long-form due to "trait cannot be made into an object"
/// error when attempting to derive PartialEq on the trait itself
impl PartialEq for Box<dyn FeroxFilter> {
/// Perform a comparison of two implementors of the FeroxFilter trait
fn eq(&self, other: &Box<dyn FeroxFilter>) -> bool {
self.box_eq(other.as_any())
}
}
/// Data holder for two pieces of data needed when auto-filtering out wildcard responses
///
/// `dynamic` is the size of the response that will later be combined with the length
/// of the path of the url requested and used to determine interesting pages from custom
/// 404s where the requested url is reflected back in the response
///
/// `size` is size of the response that should be included with filters passed via runtime
/// configuration and any static wildcard lengths.
#[derive(Debug, Default, Clone, PartialEq)]
pub struct WildcardFilter {
/// size of the response that will later be combined with the length of the path of the url
/// requested
pub dynamic: u64,
/// size of the response that should be included with filters passed via runtime configuration
pub size: u64,
}
impl FeroxFilter for WildcardFilter {
/// Examine size, dynamic, and content_len to determine whether or not the response received
/// is a wildcard response and therefore should be filtered out
fn should_filter_response(&self, response: &FeroxResponse) -> bool {
log::trace!("enter: should_filter_response({:?} {:?})", self, response);
if self.size > 0 && self.size == response.content_length() {
// static wildcard size found during testing
// size isn't default, size equals response length, and auto-filter is on
log::debug!("static wildcard: filtered out {}", response.url());
log::trace!("exit: should_filter_response -> true");
return true;
}
if self.dynamic > 0 {
// dynamic wildcard offset found during testing
// I'm about to manually split this url path instead of using reqwest::Url's
// builtin parsing. The reason is that they call .split() on the url path
// except that I don't want an empty string taking up the last index in the
// event that the url ends with a forward slash. It's ugly enough to be split
// into its own function for readability.
let url_len = get_url_path_length(&response.url());
if url_len + self.dynamic == response.content_length() {
log::debug!("dynamic wildcard: filtered out {}", response.url());
log::trace!("exit: should_filter_response -> true");
return true;
}
}
log::trace!("exit: should_filter_response -> false");
false
}
/// Compare one WildcardFilter to another
fn box_eq(&self, other: &dyn Any) -> bool {
other.downcast_ref::<Self>().map_or(false, |a| self == a)
}
/// Return seld as Any for dynamic dispatch purposes
fn as_any(&self) -> &dyn Any {
self
}
}

View File

@@ -1,11 +1,12 @@
use crate::config::{CONFIGURATION, PROGRESS_PRINTER};
use crate::filters::WildcardFilter;
use crate::scanner::should_filter_response;
use crate::utils::{
ferox_print, format_url, get_url_path_length, make_request, module_colorizer, status_colorizer,
};
use crate::FeroxResponse;
use console::style;
use indicatif::ProgressBar;
use reqwest::Response;
use std::process;
use tokio::sync::mpsc::UnboundedSender;
use uuid::Uuid;
@@ -13,24 +14,6 @@ use uuid::Uuid;
/// length of a standard UUID, used when determining wildcard responses
const UUID_LENGTH: u64 = 32;
/// Data holder for two pieces of data needed when auto-filtering out wildcard responses
///
/// `dynamic` is the size of the response that will later be combined with the length
/// of the path of the url requested and used to determine interesting pages from custom
/// 404s where the requested url is reflected back in the response
///
/// `size` is size of the response that should be included with filters passed via runtime
/// configuration and any static wildcard lengths.
#[derive(Default, Debug, PartialEq, Copy, Clone)]
pub struct WildcardFilter {
/// size of the response that will later be combined with the length of the path of the url
/// requested
pub dynamic: u64,
/// size of the response that should be included with filters passed via runtime configuration
pub size: u64,
}
/// Simple helper to return a uuid, formatted as lowercase without hyphens
///
/// `length` determines the number of uuids to string together. Each uuid
@@ -75,13 +58,13 @@ pub async fn wildcard_test(
let clone_req_one = tx_file.clone();
let clone_req_two = tx_file.clone();
if let Some(resp_one) = make_wildcard_request(&target_url, 1, clone_req_one).await {
if let Some(ferox_response) = make_wildcard_request(&target_url, 1, clone_req_one).await {
bar.inc(1);
// found a wildcard response
let mut wildcard = WildcardFilter::default();
let wc_length = resp_one.content_length().unwrap_or(0);
let wc_length = ferox_response.content_length();
if wc_length == 0 {
log::trace!("exit: wildcard_test -> Some({:?})", wildcard);
@@ -93,18 +76,16 @@ pub async fn wildcard_test(
if let Some(resp_two) = make_wildcard_request(&target_url, 3, clone_req_two).await {
bar.inc(1);
let wc2_length = resp_two.content_length().unwrap_or(0);
let wc2_length = resp_two.content_length();
if wc2_length == wc_length + (UUID_LENGTH * 2) {
// second length is what we'd expect to see if the requested url is
// reflected in the response along with some static content; aka custom 404
let url_len = get_url_path_length(&resp_one.url());
let url_len = get_url_path_length(&ferox_response.url());
wildcard.dynamic = wc_length - url_len;
if !CONFIGURATION.quiet
&& !should_filter_response(&wildcard.dynamic, &resp_one.url())
{
if !CONFIGURATION.quiet { // && !wildcard.should_filter_response(&ferox_response) {
let msg = format!(
"{} {:>10} Wildcard response is dynamic; {} ({} + url length) responses; toggle this behavior by using {}\n",
status_colorizer("WLD"),
@@ -125,8 +106,7 @@ pub async fn wildcard_test(
} else if wc_length == wc2_length {
wildcard.size = wc_length;
if !CONFIGURATION.quiet && !should_filter_response(&wildcard.size, &resp_one.url())
{
if !CONFIGURATION.quiet { // && !wildcard.should_filter_response(&ferox_response) {
let msg = format!(
"{} {:>10} Wildcard response is static; {} {} responses; toggle this behavior by using {}\n",
status_colorizer("WLD"),
@@ -167,7 +147,7 @@ async fn make_wildcard_request(
target_url: &str,
length: usize,
tx_file: UnboundedSender<String>,
) -> Option<Response> {
) -> Option<FeroxResponse> {
log::trace!(
"enter: make_wildcard_request({}, {}, {:?})",
target_url,
@@ -201,16 +181,17 @@ async fn make_wildcard_request(
.contains(&response.status().as_u16())
{
// found a wildcard response
let url_len = get_url_path_length(&response.url());
let content_len = response.content_length().unwrap_or(0);
let ferox_response = FeroxResponse::from(response, false).await;
let url_len = get_url_path_length(&ferox_response.url());
let content_len = ferox_response.content_length();
if !CONFIGURATION.quiet && !should_filter_response(&content_len, &response.url()) {
if !CONFIGURATION.quiet && !should_filter_response(&ferox_response) {
let msg = format!(
"{} {:>10} Got {} for {} (url length: {})\n",
wildcard,
content_len,
status_colorizer(&response.status().as_str()),
response.url(),
status_colorizer(&ferox_response.status().as_str()),
ferox_response.url(),
url_len
);
@@ -223,18 +204,16 @@ async fn make_wildcard_request(
);
}
if response.status().is_redirection() {
if ferox_response.status().is_redirection() {
// show where it goes, if possible
if let Some(next_loc) = response.headers().get("Location") {
if let Some(next_loc) = ferox_response.headers().get("Location") {
let next_loc_str = next_loc.to_str().unwrap_or("Unknown");
if !CONFIGURATION.quiet
&& !should_filter_response(&content_len, &response.url())
{
if !CONFIGURATION.quiet && !should_filter_response(&ferox_response) {
let msg = format!(
"{} {:>10} {} redirects to => {}\n",
wildcard,
content_len,
response.url(),
ferox_response.url(),
next_loc_str
);
@@ -248,8 +227,8 @@ async fn make_wildcard_request(
}
}
}
log::trace!("exit: make_wildcard_request -> {:?}", response);
return Some(response);
log::trace!("exit: make_wildcard_request -> {:?}", ferox_response);
return Some(ferox_response);
}
}
Err(e) => {

View File

@@ -2,6 +2,7 @@ pub mod banner;
pub mod client;
pub mod config;
pub mod extractor;
pub mod filters;
pub mod heuristics;
pub mod logger;
pub mod parser;
@@ -61,7 +62,7 @@ pub const DEFAULT_STATUS_CODES: [StatusCode; 9] = [
pub const DEFAULT_CONFIG_NAME: &str = "ferox-config.toml";
/// A `FeroxResponse`, derived from a `Response` to a submitted `Request`
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct FeroxResponse {
/// The final `Url` of this `FeroxResponse`
url: Url,

View File

@@ -1,7 +1,7 @@
use crate::config::{CONFIGURATION, PROGRESS_BAR};
use crate::extractor::get_links;
use crate::heuristics::WildcardFilter;
use crate::utils::{format_url, get_current_depth, get_url_path_length, make_request};
use crate::filters::{FeroxFilter, WildcardFilter};
use crate::utils::{format_url, get_current_depth, make_request};
use crate::{heuristics, progress, FeroxChannel, FeroxResponse};
use futures::future::{BoxFuture, FutureExt};
use futures::{stream, StreamExt};
@@ -23,8 +23,8 @@ lazy_static! {
/// Set of urls that have been sent to [scan_url](fn.scan_url.html), used for deduplication
static ref SCANNED_URLS: RwLock<HashSet<String>> = RwLock::new(HashSet::new());
/// Vector of WildcardFilters that have been ID'd through heuristics
static ref WILDCARD_FILTERS: Arc<RwLock<Vec<Arc<WildcardFilter>>>> = Arc::new(RwLock::new(Vec::<Arc<WildcardFilter>>::new()));
/// Vector of implementors of the FeroxFilter trait
static ref FILTERS: Arc<RwLock<Vec<Box<dyn FeroxFilter>>>> = Arc::new(RwLock::new(Vec::<Box<dyn FeroxFilter>>::new()));
/// Bounded semaphore used as a barrier to limit concurrent scans
static ref SCAN_LIMITER: Semaphore = Semaphore::new(CONFIGURATION.scan_limit);
@@ -67,12 +67,12 @@ fn add_url_to_list_of_scanned_urls(resp: &str, scanned_urls: &RwLock<HashSet<Str
}
}
/// Adds the given WildcardFilter to `WILDCARD_FILTERS`
/// Adds the given FeroxFilter to the given list of FeroxFilter implementors
///
/// If `WILDCARD_FILTERS` did not already contain the filter, return true; otherwise return false
/// If the given list did not already contain the filter, return true; otherwise return false
fn add_filter_to_list_of_wildcard_filters(
filter: Arc<WildcardFilter>,
wildcard_filters: Arc<RwLock<Vec<Arc<WildcardFilter>>>>,
filter: Box<dyn FeroxFilter>,
wildcard_filters: Arc<RwLock<Vec<Box<dyn FeroxFilter>>>>,
) -> bool {
log::trace!(
"enter: add_filter_to_list_of_wildcard_filters({:?}, {:?})",
@@ -207,7 +207,9 @@ fn create_urls(target_url: &str, word: &str, extensions: &[String]) -> Vec<Url>
/// or if the Location header is present and matches the base url + / (3xx)
fn response_is_directory(response: &FeroxResponse) -> bool {
log::trace!("enter: is_directory({:?})", response);
if response.url().as_str().contains("/api") {
log::warn!("response: {:?}", response);
}
if response.status().is_redirection() {
// status code is 3xx
match response.headers().get("Location") {
@@ -335,45 +337,42 @@ async fn try_recursion(
log::trace!("exit: try_recursion");
}
/// Given a `FeroxResponse` and a `FeroxFilter` determine whether or not to apply the filter to
/// the response
pub fn should_filter(response: &FeroxResponse, filter: Box<dyn FeroxFilter>) -> bool {
log::trace!("enter: should_filter({:?}, {:?})", response, filter);
let result = filter.should_filter_response(&response);
log::trace!("exit: should_filter -> {}", result);
result
}
/// Simple helper to stay DRY; determines whether or not a given `FeroxResponse` should be reported
/// to the user or not.
pub fn should_filter_response(content_len: &u64, url: &Url) -> bool {
if CONFIGURATION.sizefilters.contains(content_len) {
// filtered value from --sizefilters, move on to the next url
log::debug!("size filter: filtered out {}", url);
pub fn should_filter_response(response: &FeroxResponse) -> bool {
if CONFIGURATION
.sizefilters
.contains(&response.content_length())
{
// filtered value from --sizefilters, sizefilters and wildcards are two separate filters
// and are applied independently
log::debug!("size filter: filtered out {}", response.url());
return true;
}
match WILDCARD_FILTERS.read() {
if CONFIGURATION.dontfilter {
// quick return if dontfilter is set
return false;
}
match FILTERS.read() {
Ok(filters) => {
for filter in filters.iter() {
if CONFIGURATION.dontfilter {
// quick return if dontfilter is set
return false;
}
if filter.size > 0 && filter.size == *content_len {
// static wildcard size found during testing
// size isn't default, size equals response length, and auto-filter is on
log::debug!("static wildcard: filtered out {}", url);
// wildcard.should_filter goes here
if filter.should_filter_response(&response) {
return true;
}
if filter.dynamic > 0 {
// dynamic wildcard offset found during testing
// I'm about to manually split this url path instead of using reqwest::Url's
// builtin parsing. The reason is that they call .split() on the url path
// except that I don't want an empty string taking up the last index in the
// event that the url ends with a forward slash. It's ugly enough to be split
// into its own function for readability.
let url_len = get_url_path_length(&url);
if url_len + filter.dynamic == *content_len {
log::debug!("dynamic wildcard: filtered out {}", url);
return true;
}
}
}
}
Err(e) => {
@@ -419,9 +418,7 @@ async fn make_requests(
// purposefully doing recursion before filtering. the thought process is that
// even though this particular url is filtered, subsequent urls may not
let content_len = &ferox_response.content_length();
if should_filter_response(content_len, &ferox_response.url()) {
if should_filter_response(&ferox_response) {
continue;
}
@@ -458,8 +455,7 @@ async fn make_requests(
FeroxResponse::from(new_response, CONFIGURATION.extract_links).await;
// filter if necessary
let new_content_len = &new_ferox_response.content_length();
if should_filter_response(new_content_len, &new_ferox_response.url()) {
if should_filter_response(&new_ferox_response) {
continue;
}
@@ -596,11 +592,11 @@ pub async fn scan_url(
let filter =
match heuristics::wildcard_test(&target_url, wildcard_bar, heuristics_file_clone).await {
Some(f) => Arc::new(f),
None => Arc::new(WildcardFilter::default()),
Some(f) => Box::new(f),
None => Box::new(WildcardFilter::default()),
};
add_filter_to_list_of_wildcard_filters(filter.clone(), WILDCARD_FILTERS.clone());
add_filter_to_list_of_wildcard_filters(filter, FILTERS.clone());
// producer tasks (mp of mpsc); responsible for making requests
let producers = stream::iter(looping_words.deref().to_owned())
@@ -780,29 +776,30 @@ mod tests {
assert_eq!(add_url_to_list_of_scanned_urls(url, &urls), false);
}
#[test]
/// add a wildcard filter with the `size` attribute set to WILDCARD_FILTERS and ensure that
/// should_filter_response correctly returns true
fn should_filter_response_filters_wildcard_size() {
let mut filter = WildcardFilter::default();
let url = Url::parse("http://localhost").unwrap();
filter.size = 18;
let filter = Arc::new(filter);
add_filter_to_list_of_wildcard_filters(filter, WILDCARD_FILTERS.clone());
let result = should_filter_response(&18, &url);
assert!(result);
}
#[test]
/// add a wildcard filter with the `dynamic` attribute set to WILDCARD_FILTERS and ensure that
/// should_filter_response correctly returns true
fn should_filter_response_filters_wildcard_dynamic() {
let mut filter = WildcardFilter::default();
let url = Url::parse("http://localhost/some-path").unwrap();
filter.dynamic = 9;
let filter = Arc::new(filter);
add_filter_to_list_of_wildcard_filters(filter, WILDCARD_FILTERS.clone());
let result = should_filter_response(&18, &url);
assert!(result);
}
// todo check coverage and remove
// #[test]
// /// add a wildcard filter with the `size` attribute set to FILTERS and ensure that
// /// should_filter_response correctly returns true
// fn should_filter_response_filters_wildcard_size() {
// let mut filter = WildcardFilter::default();
// let url = Url::parse("http://localhost").unwrap();
// filter.size = 18;
// let filter = Box::new(filter);
// add_filter_to_list_of_wildcard_filters(filter, FILTERS.clone());
// let result = should_filter_response(&18, &url);
// assert!(result);
// }
//
// #[test]
// /// add a wildcard filter with the `dynamic` attribute set to FILTERS and ensure that
// /// should_filter_response correctly returns true
// fn should_filter_response_filters_wildcard_dynamic() {
// let mut filter = WildcardFilter::default();
// let url = Url::parse("http://localhost/some-path").unwrap();
// filter.dynamic = 9;
// let filter = Arc::new(filter);
// add_filter_to_list_of_wildcard_filters(filter, FILTERS.clone());
// let result = should_filter_response(&18, &url);
// assert!(result);
// }
}