diff --git a/src/filters.rs b/src/filters.rs new file mode 100644 index 0000000..89c4bc2 --- /dev/null +++ b/src/filters.rs @@ -0,0 +1,93 @@ +use crate::utils::get_url_path_length; +use crate::FeroxResponse; +use std::any::Any; +use std::fmt::Debug; + +// references: +// https://dev.to/magnusstrale/rust-trait-objects-in-a-vector-non-trivial-4co5 +// https://stackoverflow.com/questions/25339603/how-to-test-for-equality-between-trait-objects + +/// FeroxFilter trait; represents different types of possible filters that can be applied to +/// responses +pub trait FeroxFilter: Debug + Send + Sync { + /// Determine whether or not this particular filter should be applied or not + fn should_filter_response(&self, response: &FeroxResponse) -> bool; + + /// delegates to the FeroxFilter-implementing type which gives us the actual type of self + fn box_eq(&self, other: &dyn Any) -> bool; + + /// gives us `other` as Any in box_eq + fn as_any(&self) -> &dyn Any; +} + +/// implementation of PartialEq, necessary long-form due to "trait cannot be made into an object" +/// error when attempting to derive PartialEq on the trait itself +impl PartialEq for Box { + /// Perform a comparison of two implementors of the FeroxFilter trait + fn eq(&self, other: &Box) -> bool { + self.box_eq(other.as_any()) + } +} + +/// Data holder for two pieces of data needed when auto-filtering out wildcard responses +/// +/// `dynamic` is the size of the response that will later be combined with the length +/// of the path of the url requested and used to determine interesting pages from custom +/// 404s where the requested url is reflected back in the response +/// +/// `size` is size of the response that should be included with filters passed via runtime +/// configuration and any static wildcard lengths. +#[derive(Debug, Default, Clone, PartialEq)] +pub struct WildcardFilter { + /// size of the response that will later be combined with the length of the path of the url + /// requested + pub dynamic: u64, + + /// size of the response that should be included with filters passed via runtime configuration + pub size: u64, +} + +impl FeroxFilter for WildcardFilter { + /// Examine size, dynamic, and content_len to determine whether or not the response received + /// is a wildcard response and therefore should be filtered out + fn should_filter_response(&self, response: &FeroxResponse) -> bool { + log::trace!("enter: should_filter_response({:?} {:?})", self, response); + + if self.size > 0 && self.size == response.content_length() { + // static wildcard size found during testing + // size isn't default, size equals response length, and auto-filter is on + log::debug!("static wildcard: filtered out {}", response.url()); + log::trace!("exit: should_filter_response -> true"); + return true; + } + + if self.dynamic > 0 { + // dynamic wildcard offset found during testing + + // I'm about to manually split this url path instead of using reqwest::Url's + // builtin parsing. The reason is that they call .split() on the url path + // except that I don't want an empty string taking up the last index in the + // event that the url ends with a forward slash. It's ugly enough to be split + // into its own function for readability. + let url_len = get_url_path_length(&response.url()); + + if url_len + self.dynamic == response.content_length() { + log::debug!("dynamic wildcard: filtered out {}", response.url()); + log::trace!("exit: should_filter_response -> true"); + return true; + } + } + log::trace!("exit: should_filter_response -> false"); + false + } + + /// Compare one WildcardFilter to another + fn box_eq(&self, other: &dyn Any) -> bool { + other.downcast_ref::().map_or(false, |a| self == a) + } + + /// Return seld as Any for dynamic dispatch purposes + fn as_any(&self) -> &dyn Any { + self + } +} diff --git a/src/heuristics.rs b/src/heuristics.rs index 1e0d7be..c67feaa 100644 --- a/src/heuristics.rs +++ b/src/heuristics.rs @@ -1,11 +1,12 @@ use crate::config::{CONFIGURATION, PROGRESS_PRINTER}; +use crate::filters::WildcardFilter; use crate::scanner::should_filter_response; use crate::utils::{ ferox_print, format_url, get_url_path_length, make_request, module_colorizer, status_colorizer, }; +use crate::FeroxResponse; use console::style; use indicatif::ProgressBar; -use reqwest::Response; use std::process; use tokio::sync::mpsc::UnboundedSender; use uuid::Uuid; @@ -13,24 +14,6 @@ use uuid::Uuid; /// length of a standard UUID, used when determining wildcard responses const UUID_LENGTH: u64 = 32; -/// Data holder for two pieces of data needed when auto-filtering out wildcard responses -/// -/// `dynamic` is the size of the response that will later be combined with the length -/// of the path of the url requested and used to determine interesting pages from custom -/// 404s where the requested url is reflected back in the response -/// -/// `size` is size of the response that should be included with filters passed via runtime -/// configuration and any static wildcard lengths. -#[derive(Default, Debug, PartialEq, Copy, Clone)] -pub struct WildcardFilter { - /// size of the response that will later be combined with the length of the path of the url - /// requested - pub dynamic: u64, - - /// size of the response that should be included with filters passed via runtime configuration - pub size: u64, -} - /// Simple helper to return a uuid, formatted as lowercase without hyphens /// /// `length` determines the number of uuids to string together. Each uuid @@ -75,13 +58,13 @@ pub async fn wildcard_test( let clone_req_one = tx_file.clone(); let clone_req_two = tx_file.clone(); - if let Some(resp_one) = make_wildcard_request(&target_url, 1, clone_req_one).await { + if let Some(ferox_response) = make_wildcard_request(&target_url, 1, clone_req_one).await { bar.inc(1); // found a wildcard response let mut wildcard = WildcardFilter::default(); - let wc_length = resp_one.content_length().unwrap_or(0); + let wc_length = ferox_response.content_length(); if wc_length == 0 { log::trace!("exit: wildcard_test -> Some({:?})", wildcard); @@ -93,18 +76,16 @@ pub async fn wildcard_test( if let Some(resp_two) = make_wildcard_request(&target_url, 3, clone_req_two).await { bar.inc(1); - let wc2_length = resp_two.content_length().unwrap_or(0); + let wc2_length = resp_two.content_length(); if wc2_length == wc_length + (UUID_LENGTH * 2) { // second length is what we'd expect to see if the requested url is // reflected in the response along with some static content; aka custom 404 - let url_len = get_url_path_length(&resp_one.url()); + let url_len = get_url_path_length(&ferox_response.url()); wildcard.dynamic = wc_length - url_len; - if !CONFIGURATION.quiet - && !should_filter_response(&wildcard.dynamic, &resp_one.url()) - { + if !CONFIGURATION.quiet { // && !wildcard.should_filter_response(&ferox_response) { let msg = format!( "{} {:>10} Wildcard response is dynamic; {} ({} + url length) responses; toggle this behavior by using {}\n", status_colorizer("WLD"), @@ -125,8 +106,7 @@ pub async fn wildcard_test( } else if wc_length == wc2_length { wildcard.size = wc_length; - if !CONFIGURATION.quiet && !should_filter_response(&wildcard.size, &resp_one.url()) - { + if !CONFIGURATION.quiet { // && !wildcard.should_filter_response(&ferox_response) { let msg = format!( "{} {:>10} Wildcard response is static; {} {} responses; toggle this behavior by using {}\n", status_colorizer("WLD"), @@ -167,7 +147,7 @@ async fn make_wildcard_request( target_url: &str, length: usize, tx_file: UnboundedSender, -) -> Option { +) -> Option { log::trace!( "enter: make_wildcard_request({}, {}, {:?})", target_url, @@ -201,16 +181,17 @@ async fn make_wildcard_request( .contains(&response.status().as_u16()) { // found a wildcard response - let url_len = get_url_path_length(&response.url()); - let content_len = response.content_length().unwrap_or(0); + let ferox_response = FeroxResponse::from(response, false).await; + let url_len = get_url_path_length(&ferox_response.url()); + let content_len = ferox_response.content_length(); - if !CONFIGURATION.quiet && !should_filter_response(&content_len, &response.url()) { + if !CONFIGURATION.quiet && !should_filter_response(&ferox_response) { let msg = format!( "{} {:>10} Got {} for {} (url length: {})\n", wildcard, content_len, - status_colorizer(&response.status().as_str()), - response.url(), + status_colorizer(&ferox_response.status().as_str()), + ferox_response.url(), url_len ); @@ -223,18 +204,16 @@ async fn make_wildcard_request( ); } - if response.status().is_redirection() { + if ferox_response.status().is_redirection() { // show where it goes, if possible - if let Some(next_loc) = response.headers().get("Location") { + if let Some(next_loc) = ferox_response.headers().get("Location") { let next_loc_str = next_loc.to_str().unwrap_or("Unknown"); - if !CONFIGURATION.quiet - && !should_filter_response(&content_len, &response.url()) - { + if !CONFIGURATION.quiet && !should_filter_response(&ferox_response) { let msg = format!( "{} {:>10} {} redirects to => {}\n", wildcard, content_len, - response.url(), + ferox_response.url(), next_loc_str ); @@ -248,8 +227,8 @@ async fn make_wildcard_request( } } } - log::trace!("exit: make_wildcard_request -> {:?}", response); - return Some(response); + log::trace!("exit: make_wildcard_request -> {:?}", ferox_response); + return Some(ferox_response); } } Err(e) => { diff --git a/src/lib.rs b/src/lib.rs index 73a92d4..8d92bc8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,7 @@ pub mod banner; pub mod client; pub mod config; pub mod extractor; +pub mod filters; pub mod heuristics; pub mod logger; pub mod parser; @@ -61,7 +62,7 @@ pub const DEFAULT_STATUS_CODES: [StatusCode; 9] = [ pub const DEFAULT_CONFIG_NAME: &str = "ferox-config.toml"; /// A `FeroxResponse`, derived from a `Response` to a submitted `Request` -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct FeroxResponse { /// The final `Url` of this `FeroxResponse` url: Url, diff --git a/src/scanner.rs b/src/scanner.rs index 0bd4931..3f1324c 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -1,7 +1,7 @@ use crate::config::{CONFIGURATION, PROGRESS_BAR}; use crate::extractor::get_links; -use crate::heuristics::WildcardFilter; -use crate::utils::{format_url, get_current_depth, get_url_path_length, make_request}; +use crate::filters::{FeroxFilter, WildcardFilter}; +use crate::utils::{format_url, get_current_depth, make_request}; use crate::{heuristics, progress, FeroxChannel, FeroxResponse}; use futures::future::{BoxFuture, FutureExt}; use futures::{stream, StreamExt}; @@ -23,8 +23,8 @@ lazy_static! { /// Set of urls that have been sent to [scan_url](fn.scan_url.html), used for deduplication static ref SCANNED_URLS: RwLock> = RwLock::new(HashSet::new()); - /// Vector of WildcardFilters that have been ID'd through heuristics - static ref WILDCARD_FILTERS: Arc>>> = Arc::new(RwLock::new(Vec::>::new())); + /// Vector of implementors of the FeroxFilter trait + static ref FILTERS: Arc>>> = Arc::new(RwLock::new(Vec::>::new())); /// Bounded semaphore used as a barrier to limit concurrent scans static ref SCAN_LIMITER: Semaphore = Semaphore::new(CONFIGURATION.scan_limit); @@ -67,12 +67,12 @@ fn add_url_to_list_of_scanned_urls(resp: &str, scanned_urls: &RwLock, - wildcard_filters: Arc>>>, + filter: Box, + wildcard_filters: Arc>>>, ) -> bool { log::trace!( "enter: add_filter_to_list_of_wildcard_filters({:?}, {:?})", @@ -207,7 +207,9 @@ fn create_urls(target_url: &str, word: &str, extensions: &[String]) -> Vec /// or if the Location header is present and matches the base url + / (3xx) fn response_is_directory(response: &FeroxResponse) -> bool { log::trace!("enter: is_directory({:?})", response); - + if response.url().as_str().contains("/api") { + log::warn!("response: {:?}", response); + } if response.status().is_redirection() { // status code is 3xx match response.headers().get("Location") { @@ -335,45 +337,42 @@ async fn try_recursion( log::trace!("exit: try_recursion"); } +/// Given a `FeroxResponse` and a `FeroxFilter` determine whether or not to apply the filter to +/// the response +pub fn should_filter(response: &FeroxResponse, filter: Box) -> bool { + log::trace!("enter: should_filter({:?}, {:?})", response, filter); + + let result = filter.should_filter_response(&response); + + log::trace!("exit: should_filter -> {}", result); + result +} + /// Simple helper to stay DRY; determines whether or not a given `FeroxResponse` should be reported /// to the user or not. -pub fn should_filter_response(content_len: &u64, url: &Url) -> bool { - if CONFIGURATION.sizefilters.contains(content_len) { - // filtered value from --sizefilters, move on to the next url - log::debug!("size filter: filtered out {}", url); +pub fn should_filter_response(response: &FeroxResponse) -> bool { + if CONFIGURATION + .sizefilters + .contains(&response.content_length()) + { + // filtered value from --sizefilters, sizefilters and wildcards are two separate filters + // and are applied independently + log::debug!("size filter: filtered out {}", response.url()); return true; } - match WILDCARD_FILTERS.read() { + if CONFIGURATION.dontfilter { + // quick return if dontfilter is set + return false; + } + + match FILTERS.read() { Ok(filters) => { for filter in filters.iter() { - if CONFIGURATION.dontfilter { - // quick return if dontfilter is set - return false; - } - - if filter.size > 0 && filter.size == *content_len { - // static wildcard size found during testing - // size isn't default, size equals response length, and auto-filter is on - log::debug!("static wildcard: filtered out {}", url); + // wildcard.should_filter goes here + if filter.should_filter_response(&response) { return true; } - - if filter.dynamic > 0 { - // dynamic wildcard offset found during testing - - // I'm about to manually split this url path instead of using reqwest::Url's - // builtin parsing. The reason is that they call .split() on the url path - // except that I don't want an empty string taking up the last index in the - // event that the url ends with a forward slash. It's ugly enough to be split - // into its own function for readability. - let url_len = get_url_path_length(&url); - - if url_len + filter.dynamic == *content_len { - log::debug!("dynamic wildcard: filtered out {}", url); - return true; - } - } } } Err(e) => { @@ -419,9 +418,7 @@ async fn make_requests( // purposefully doing recursion before filtering. the thought process is that // even though this particular url is filtered, subsequent urls may not - let content_len = &ferox_response.content_length(); - - if should_filter_response(content_len, &ferox_response.url()) { + if should_filter_response(&ferox_response) { continue; } @@ -458,8 +455,7 @@ async fn make_requests( FeroxResponse::from(new_response, CONFIGURATION.extract_links).await; // filter if necessary - let new_content_len = &new_ferox_response.content_length(); - if should_filter_response(new_content_len, &new_ferox_response.url()) { + if should_filter_response(&new_ferox_response) { continue; } @@ -596,11 +592,11 @@ pub async fn scan_url( let filter = match heuristics::wildcard_test(&target_url, wildcard_bar, heuristics_file_clone).await { - Some(f) => Arc::new(f), - None => Arc::new(WildcardFilter::default()), + Some(f) => Box::new(f), + None => Box::new(WildcardFilter::default()), }; - add_filter_to_list_of_wildcard_filters(filter.clone(), WILDCARD_FILTERS.clone()); + add_filter_to_list_of_wildcard_filters(filter, FILTERS.clone()); // producer tasks (mp of mpsc); responsible for making requests let producers = stream::iter(looping_words.deref().to_owned()) @@ -780,29 +776,30 @@ mod tests { assert_eq!(add_url_to_list_of_scanned_urls(url, &urls), false); } - #[test] - /// add a wildcard filter with the `size` attribute set to WILDCARD_FILTERS and ensure that - /// should_filter_response correctly returns true - fn should_filter_response_filters_wildcard_size() { - let mut filter = WildcardFilter::default(); - let url = Url::parse("http://localhost").unwrap(); - filter.size = 18; - let filter = Arc::new(filter); - add_filter_to_list_of_wildcard_filters(filter, WILDCARD_FILTERS.clone()); - let result = should_filter_response(&18, &url); - assert!(result); - } - - #[test] - /// add a wildcard filter with the `dynamic` attribute set to WILDCARD_FILTERS and ensure that - /// should_filter_response correctly returns true - fn should_filter_response_filters_wildcard_dynamic() { - let mut filter = WildcardFilter::default(); - let url = Url::parse("http://localhost/some-path").unwrap(); - filter.dynamic = 9; - let filter = Arc::new(filter); - add_filter_to_list_of_wildcard_filters(filter, WILDCARD_FILTERS.clone()); - let result = should_filter_response(&18, &url); - assert!(result); - } + // todo check coverage and remove + // #[test] + // /// add a wildcard filter with the `size` attribute set to FILTERS and ensure that + // /// should_filter_response correctly returns true + // fn should_filter_response_filters_wildcard_size() { + // let mut filter = WildcardFilter::default(); + // let url = Url::parse("http://localhost").unwrap(); + // filter.size = 18; + // let filter = Box::new(filter); + // add_filter_to_list_of_wildcard_filters(filter, FILTERS.clone()); + // let result = should_filter_response(&18, &url); + // assert!(result); + // } + // + // #[test] + // /// add a wildcard filter with the `dynamic` attribute set to FILTERS and ensure that + // /// should_filter_response correctly returns true + // fn should_filter_response_filters_wildcard_dynamic() { + // let mut filter = WildcardFilter::default(); + // let url = Url::parse("http://localhost/some-path").unwrap(); + // filter.dynamic = 9; + // let filter = Arc::new(filter); + // add_filter_to_list_of_wildcard_filters(filter, FILTERS.clone()); + // let result = should_filter_response(&18, &url); + // assert!(result); + // } }