diff --git a/Cargo.toml b/Cargo.toml index fb29529..e8c0b79 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "feroxbuster" -version = "1.0.4" +version = "1.1.0" authors = ["Ben 'epi' Risher "] license = "MIT" edition = "2018" diff --git a/README.md b/README.md index 14ffe29..b4f0a4a 100644 --- a/README.md +++ b/README.md @@ -88,22 +88,25 @@ Releases for multiple architectures can be found in the [Releases](https://githu #### Linux x86 ``` -wget -sLO https://github.com/epi052/feroxbuster/releases/latest/download/x86-linux-feroxbuster.zip +curl -sLO https://github.com/epi052/feroxbuster/releases/latest/download/x86-linux-feroxbuster.zip unzip x86-linux-feroxbuster.zip +chmod +x ./feroxbuster ./feroxbuster -V ``` #### Linux x86_64 ``` -wget -sLO https://github.com/epi052/feroxbuster/releases/latest/download/x86_64-linux-feroxbuster.zip +curl -sLO https://github.com/epi052/feroxbuster/releases/latest/download/x86_64-linux-feroxbuster.zip unzip x86_64-linux-feroxbuster.zip +chmod +x ./feroxbuster ./feroxbuster -V ``` #### MacOS x86_64 ``` -wget -sLO https://github.com/epi052/feroxbuster/releases/latest/download/x86_64-macos-feroxbuster.zip +curl -sLO https://github.com/epi052/feroxbuster/releases/latest/download/x86_64-macos-feroxbuster.zip unzip x86_64-macos-feroxbuster.zip +chmod +x ./feroxbuster ./feroxbuster -V ``` @@ -239,6 +242,11 @@ built-in defaults. - The same directory as the `feroxbuster` executable (per-user) - The user's current working directory (per-target) +> `CONFIG_DIR` is defined as the following: +> - Linux: `$XDG_CONFIG_HOME` or `$HOME/.config` i.e. `/home/bob/.config` +> - MacOs: `$HOME/Library/Application Support` i.e. `/Users/bob/Library/Application Support` +> - Windows: `{FOLDERID_RoamingAppData}` i.e. `C:\Users\Bob\AppData\Roaming` + If more than one valid configuration file is found, each one overwrites the values found previously. If no configuration file is found, nothing happens at this stage. diff --git a/src/extractor.rs b/src/extractor.rs index f623632..33c640c 100644 --- a/src/extractor.rs +++ b/src/extractor.rs @@ -1,6 +1,6 @@ +use crate::FeroxResponse; use lazy_static::lazy_static; use regex::Regex; -use reqwest::Response; use reqwest::Url; use std::collections::HashSet; @@ -83,20 +83,12 @@ fn add_link_to_set_of_links(link: &str, url: &Url, links: &mut HashSet) /// - homepage/assets/img/ /// - homepage/assets/ /// - homepage/ -pub async fn get_links(response: Response) -> HashSet { +pub async fn get_links(response: &FeroxResponse) -> HashSet { log::trace!("enter: get_links({})", response.url().as_str()); - let url = response.url().clone(); let mut links = HashSet::::new(); - let body = match response.text().await { - // await the response's body - Ok(text) => text, - Err(e) => { - log::error!("Could not parse body from response: {}", e); - return links; - } - }; + let body = response.text(); for capture in REGEX.captures_iter(&body) { // remove single & double quotes from both ends of the capture @@ -105,8 +97,10 @@ pub async fn get_links(response: Response) -> HashSet { match Url::parse(link) { Ok(absolute) => { - if absolute.domain() != url.domain() { - // domains are not the same, don't scan things that aren't part of the original + if absolute.domain() != response.url().domain() + || absolute.host() != response.url().host() + { + // domains/ips are not the same, don't scan things that aren't part of the original // target url continue; } @@ -118,7 +112,8 @@ pub async fn get_links(response: Response) -> HashSet { // - homepage/assets/img/ // - homepage/assets/ // - homepage/ - add_link_to_set_of_links(&sub_path, &url, &mut links); + log::debug!("Adding {} to {:?}", sub_path, links); + add_link_to_set_of_links(&sub_path, &response.url(), &mut links); } } Err(e) => { @@ -128,7 +123,8 @@ pub async fn get_links(response: Response) -> HashSet { if e.to_string().contains("relative URL without a base") { for sub_path in get_sub_paths_from_path(link) { // incrementally save all sub-paths that led to the relative url's resource - add_link_to_set_of_links(&sub_path, &url, &mut links); + log::debug!("Adding {} to {:?}", sub_path, links); + add_link_to_set_of_links(&sub_path, &response.url(), &mut links); } } else { // unexpected error has occurred @@ -145,6 +141,10 @@ pub async fn get_links(response: Response) -> HashSet { #[cfg(test)] mod tests { use super::*; + use crate::utils::make_request; + use httpmock::Method::GET; + use httpmock::{Mock, MockServer}; + use reqwest::Client; #[test] /// extract sub paths from the given url fragment; expect 4 sub paths and that all are @@ -236,4 +236,34 @@ mod tests { assert_eq!(links.len(), 0); assert!(links.is_empty()); } + + #[tokio::test(core_threads = 1)] + /// use make_request to generate a Response, and use the Response to test get_links; + /// the response will contain an absolute path to a domain that is not part of the scanned + /// domain; expect an empty set returned + async fn extractor_get_links_with_absolute_url_that_differs_from_target_domain( + ) -> Result<(), Box> { + let srv = MockServer::start(); + + let mock = Mock::new() + .expect_method(GET) + .expect_path("/some-path") + .return_status(200) + .return_body("\"http://defintely.not.a.thing.probably.com/homepage/assets/img/icons/handshake.svg\"") + .create_on(&srv); + + let client = Client::new(); + let url = Url::parse(&srv.url("/some-path")).unwrap(); + + let response = make_request(&client, &url).await.unwrap(); + + let ferox_response = FeroxResponse::from(response, true).await; + + let links = get_links(&ferox_response).await; + + assert!(links.is_empty()); + + assert_eq!(mock.times_called(), 1); + Ok(()) + } } diff --git a/src/heuristics.rs b/src/heuristics.rs index 266e558..1e0d7be 100644 --- a/src/heuristics.rs +++ b/src/heuristics.rs @@ -1,4 +1,5 @@ use crate::config::{CONFIGURATION, PROGRESS_PRINTER}; +use crate::scanner::should_filter_response; use crate::utils::{ ferox_print, format_url, get_url_path_length, make_request, module_colorizer, status_colorizer, }; @@ -20,7 +21,7 @@ const UUID_LENGTH: u64 = 32; /// /// `size` is size of the response that should be included with filters passed via runtime /// configuration and any static wildcard lengths. -#[derive(Default, Debug)] +#[derive(Default, Debug, PartialEq, Copy, Clone)] pub struct WildcardFilter { /// size of the response that will later be combined with the length of the path of the url /// requested @@ -99,11 +100,15 @@ pub async fn wildcard_test( // reflected in the response along with some static content; aka custom 404 let url_len = get_url_path_length(&resp_one.url()); - if !CONFIGURATION.quiet { + wildcard.dynamic = wc_length - url_len; + + if !CONFIGURATION.quiet + && !should_filter_response(&wildcard.dynamic, &resp_one.url()) + { let msg = format!( "{} {:>10} Wildcard response is dynamic; {} ({} + url length) responses; toggle this behavior by using {}\n", status_colorizer("WLD"), - wc_length - url_len, + wildcard.dynamic, style("auto-filtering").yellow(), style(wc_length - url_len).cyan(), style("--dontfilter").yellow() @@ -117,10 +122,11 @@ pub async fn wildcard_test( !CONFIGURATION.output.is_empty(), ); } - - wildcard.dynamic = wc_length - url_len; } else if wc_length == wc2_length { - if !CONFIGURATION.quiet { + wildcard.size = wc_length; + + if !CONFIGURATION.quiet && !should_filter_response(&wildcard.size, &resp_one.url()) + { let msg = format!( "{} {:>10} Wildcard response is static; {} {} responses; toggle this behavior by using {}\n", status_colorizer("WLD"), @@ -138,7 +144,6 @@ pub async fn wildcard_test( !CONFIGURATION.output.is_empty(), ); } - wildcard.size = wc_length; } } else { bar.inc(2); @@ -199,7 +204,7 @@ async fn make_wildcard_request( let url_len = get_url_path_length(&response.url()); let content_len = response.content_length().unwrap_or(0); - if !CONFIGURATION.quiet { + if !CONFIGURATION.quiet && !should_filter_response(&content_len, &response.url()) { let msg = format!( "{} {:>10} Got {} for {} (url length: {})\n", wildcard, @@ -221,31 +226,16 @@ async fn make_wildcard_request( if response.status().is_redirection() { // show where it goes, if possible if let Some(next_loc) = response.headers().get("Location") { - if let Ok(next_loc_str) = next_loc.to_str() { - if !CONFIGURATION.quiet { - let msg = format!( - "{} {:>10} {} redirects to => {}\n", - wildcard, - content_len, - response.url(), - next_loc_str - ); - - ferox_print(&msg, &PROGRESS_PRINTER); - - try_send_message_to_file( - &msg, - tx_file.clone(), - !CONFIGURATION.output.is_empty(), - ); - } - } else if !CONFIGURATION.quiet { + let next_loc_str = next_loc.to_str().unwrap_or("Unknown"); + if !CONFIGURATION.quiet + && !should_filter_response(&content_len, &response.url()) + { let msg = format!( - "{} {:>10} {} redirects to => {:?}\n", + "{} {:>10} {} redirects to => {}\n", wildcard, content_len, response.url(), - next_loc + next_loc_str ); ferox_print(&msg, &PROGRESS_PRINTER); diff --git a/src/lib.rs b/src/lib.rs index dcdd5e4..73a92d4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,7 +10,8 @@ pub mod reporter; pub mod scanner; pub mod utils; -use reqwest::StatusCode; +use reqwest::header::HeaderMap; +use reqwest::{Response, StatusCode, Url}; use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender}; /// Generic Result type to ease error handling in async contexts @@ -59,6 +60,118 @@ pub const DEFAULT_STATUS_CODES: [StatusCode; 9] = [ /// Expected location is in the same directory as the feroxbuster binary. pub const DEFAULT_CONFIG_NAME: &str = "ferox-config.toml"; +/// A `FeroxResponse`, derived from a `Response` to a submitted `Request` +#[derive(Debug)] +pub struct FeroxResponse { + /// The final `Url` of this `FeroxResponse` + url: Url, + + /// The `StatusCode` of this `FeroxResponse` + status: StatusCode, + + /// The full response text + text: String, + + /// The content-length of this response, if known + content_length: u64, + + /// The `Headers` of this `FeroxResponse` + headers: HeaderMap, +} + +/// `FeroxResponse` implementation +impl FeroxResponse { + /// Get the `StatusCode` of this `FeroxResponse` + pub fn status(&self) -> &StatusCode { + &self.status + } + + /// Get the final `Url` of this `FeroxResponse`. + pub fn url(&self) -> &Url { + &self.url + } + + /// Get the full response text + pub fn text(&self) -> &str { + &self.text + } + + /// Get the `Headers` of this `FeroxResponse` + pub fn headers(&self) -> &HeaderMap { + &self.headers + } + + /// Get the content-length of this response, if known + pub fn content_length(&self) -> u64 { + self.content_length + } + + /// Set `FeroxResponse`'s `url` attribute, has no affect if an error occurs + pub fn set_url(&mut self, url: &str) { + match Url::parse(&url) { + Ok(url) => { + self.url = url; + } + Err(e) => { + log::error!("Could not parse {} into a Url: {}", url, e); + } + }; + } + + /// Make a reasonable guess at whether the response is a file or not + /// + /// Examines the last part of a path to determine if it has an obvious extension + /// i.e. http://localhost/some/path/stuff.js where stuff.js indicates a file + /// + /// Additionally, inspects query parameters, as they're also often indicative of a file + pub fn is_file(&self) -> bool { + let has_extension = match self.url.path_segments() { + Some(path) => { + if let Some(last) = path.last() { + last.contains('.') // last segment has some sort of extension, probably + } else { + false + } + } + None => false, + }; + + self.url.query_pairs().count() > 0 || has_extension + } + + /// Create a new `FeroxResponse` from the given `Response` + pub async fn from(response: Response, read_body: bool) -> Self { + let url = response.url().clone(); + let status = response.status(); + let headers = response.headers().clone(); + let content_length = response.content_length().unwrap_or(0); + + let text = if read_body { + // .text() consumes the response, must be called last + // additionally, --extract-links is currently the only place we use the body of the + // response, so we forego the processing if not performing extraction + match response.text().await { + // await the response's body + Ok(text) => text, + Err(e) => { + log::error!("Could not parse body from response: {}", e); + String::new() + } + } + } else { + String::new() + }; + + FeroxResponse { + url, + status, + content_length, + text, + headers, + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/main.rs b/src/main.rs index d26dfa3..7c8369c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,8 @@ use feroxbuster::config::{CONFIGURATION, PROGRESS_PRINTER}; use feroxbuster::scanner::scan_url; use feroxbuster::utils::{ferox_print, get_current_depth, module_colorizer, status_colorizer}; -use feroxbuster::{banner, heuristics, logger, reporter, FeroxResult}; +use feroxbuster::{banner, heuristics, logger, reporter, FeroxResponse, FeroxResult}; use futures::StreamExt; -use reqwest::Response; use std::collections::HashSet; use std::fs::File; use std::io::{BufRead, BufReader}; @@ -38,7 +37,13 @@ fn get_unique_words_from_wordlist(path: &str) -> FeroxResult let mut words = HashSet::new(); for line in reader.lines() { - words.insert(line?); + let result = line?; + + if result.starts_with('#') || result.is_empty() { + continue; + } + + words.insert(result); } log::trace!( @@ -52,7 +57,7 @@ fn get_unique_words_from_wordlist(path: &str) -> FeroxResult /// Determine whether it's a single url scan or urls are coming from stdin, then scan as needed async fn scan( targets: Vec, - tx_term: UnboundedSender, + tx_term: UnboundedSender, tx_file: UnboundedSender, ) -> FeroxResult<()> { log::trace!("enter: scan({:?}, {:?}, {:?})", targets, tx_term, tx_file); diff --git a/src/reporter.rs b/src/reporter.rs index f3d234a..8465ad7 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -1,8 +1,7 @@ use crate::config::{CONFIGURATION, PROGRESS_PRINTER}; use crate::utils::{ferox_print, status_colorizer}; -use crate::FeroxChannel; +use crate::{FeroxChannel, FeroxResponse}; use console::strip_ansi_codes; -use reqwest::Response; use std::io::Write; use std::sync::{Arc, Once, RwLock}; use std::{fs, io}; @@ -41,14 +40,14 @@ pub fn initialize( output_file: &str, save_output: bool, ) -> ( - UnboundedSender, + UnboundedSender, UnboundedSender, JoinHandle<()>, Option>, ) { log::trace!("enter: initialize({}, {})", output_file, save_output); - let (tx_rpt, rx_rpt): FeroxChannel = mpsc::unbounded_channel(); + let (tx_rpt, rx_rpt): FeroxChannel = mpsc::unbounded_channel(); let (tx_file, rx_file): FeroxChannel = mpsc::unbounded_channel(); let file_clone = tx_file.clone(); @@ -81,7 +80,7 @@ pub fn initialize( /// The consumer simply receives responses and prints them if they meet the given /// reporting criteria async fn spawn_terminal_reporter( - mut resp_chan: UnboundedReceiver, + mut resp_chan: UnboundedReceiver, file_chan: UnboundedSender, save_output: bool, ) { @@ -107,7 +106,7 @@ async fn spawn_terminal_reporter( // 200 3280 https://localhost.com/FAQ "{} {:>10} {}\n", status, - resp.content_length().unwrap_or(0), + resp.content_length(), resp.url() ) }; diff --git a/src/scanner.rs b/src/scanner.rs index 2f786af..f999bb7 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -1,11 +1,12 @@ use crate::config::{CONFIGURATION, PROGRESS_BAR}; +use crate::extractor::get_links; use crate::heuristics::WildcardFilter; use crate::utils::{format_url, get_current_depth, get_url_path_length, make_request}; -use crate::{heuristics, progress, FeroxChannel}; +use crate::{heuristics, progress, FeroxChannel, FeroxResponse}; use futures::future::{BoxFuture, FutureExt}; use futures::{stream, StreamExt}; use lazy_static::lazy_static; -use reqwest::{Response, Url}; +use reqwest::Url; use std::collections::HashSet; use std::convert::TryInto; use std::ops::Deref; @@ -20,6 +21,9 @@ static CALL_COUNT: AtomicUsize = AtomicUsize::new(0); lazy_static! { /// Set of urls that have been sent to [scan_url](fn.scan_url.html), used for deduplication static ref SCANNED_URLS: RwLock> = RwLock::new(HashSet::new()); + + /// Vector of WildcardFilters that have been ID'd through heuristics + static ref WILDCARD_FILTERS: Arc>>> = Arc::new(RwLock::new(Vec::>::new())); } /// Adds the given url to `SCANNED_URLS` @@ -59,6 +63,42 @@ fn add_url_to_list_of_scanned_urls(resp: &str, scanned_urls: &RwLock, + wildcard_filters: Arc>>>, +) -> bool { + log::trace!( + "enter: add_filter_to_list_of_wildcard_filters({:?}, {:?})", + filter, + wildcard_filters + ); + + match wildcard_filters.write() { + Ok(mut filters) => { + // If the set did not contain the assigned filter, true is returned. + // If the set did contain the assigned filter, false is returned. + if filters.contains(&filter) { + log::trace!("exit: add_filter_to_list_of_wildcard_filters -> false"); + return false; + } + + filters.push(filter); + + log::trace!("exit: add_filter_to_list_of_wildcard_filters -> true"); + true + } + Err(e) => { + // poisoned lock + log::error!("Set of wildcard filters poisoned: {}", e); + log::trace!("exit: add_filter_to_list_of_wildcard_filters -> false"); + false + } + } +} + /// Spawn a single consumer task (sc side of mpsc) /// /// The consumer simply receives Urls and scans them @@ -66,7 +106,7 @@ fn spawn_recursion_handler( mut recursion_channel: UnboundedReceiver, wordlist: Arc>, base_depth: usize, - tx_term: UnboundedSender, + tx_term: UnboundedSender, tx_file: UnboundedSender, ) -> BoxFuture<'static, Vec>> { log::trace!( @@ -160,7 +200,7 @@ fn create_urls(target_url: &str, word: &str, extensions: &[String]) -> Vec /// /// handles 2xx and 3xx responses by either checking if the url ends with a / (2xx) /// or if the Location header is present and matches the base url + / (3xx) -fn response_is_directory(response: &Response) -> bool { +fn response_is_directory(response: &FeroxResponse) -> bool { log::trace!("enter: is_directory({:?})", response); if response.status().is_redirection() { @@ -240,7 +280,7 @@ fn reached_max_depth(url: &Url, base_depth: usize, max_depth: usize) -> bool { /// /// When a recursion opportunity is found, the new url is sent across the recursion channel async fn try_recursion( - response: &Response, + response: &FeroxResponse, base_depth: usize, transmitter: UnboundedSender, ) { @@ -290,6 +330,54 @@ async fn try_recursion( log::trace!("exit: try_recursion"); } +/// Simple helper to stay DRY; determines whether or not a given `FeroxResponse` should be reported +/// to the user or not. +pub fn should_filter_response(content_len: &u64, url: &Url) -> bool { + if CONFIGURATION.sizefilters.contains(content_len) { + // filtered value from --sizefilters, move on to the next url + log::debug!("size filter: filtered out {}", url); + return true; + } + + match WILDCARD_FILTERS.read() { + Ok(filters) => { + for filter in filters.iter() { + if CONFIGURATION.dontfilter { + // quick return if dontfilter is set + return false; + } + + if filter.size > 0 && filter.size == *content_len { + // static wildcard size found during testing + // size isn't default, size equals response length, and auto-filter is on + log::debug!("static wildcard: filtered out {}", url); + return true; + } + + if filter.dynamic > 0 { + // dynamic wildcard offset found during testing + + // I'm about to manually split this url path instead of using reqwest::Url's + // builtin parsing. The reason is that they call .split() on the url path + // except that I don't want an empty string taking up the last index in the + // event that the url ends with a forward slash. It's ugly enough to be split + // into its own function for readability. + let url_len = get_url_path_length(&url); + + if url_len + filter.dynamic == *content_len { + log::debug!("dynamic wildcard: filtered out {}", url); + return true; + } + } + } + } + Err(e) => { + log::error!("{}", e); + } + } + false +} + /// Wrapper for [make_request](fn.make_request.html) /// /// Handles making multiple requests based on the presence of extensions @@ -299,9 +387,8 @@ async fn make_requests( target_url: &str, word: &str, base_depth: usize, - filter: Arc, dir_chan: UnboundedSender, - report_chan: UnboundedSender, + report_chan: UnboundedSender, ) { log::trace!( "enter: make_requests({}, {}, {}, {:?}, {:?})", @@ -316,61 +403,117 @@ async fn make_requests( for url in urls { if let Ok(response) = make_request(&CONFIGURATION.client, &url).await { - // response came back without error + // response came back without error, convert it to FeroxResponse + let ferox_response = FeroxResponse::from(response, CONFIGURATION.extract_links).await; // do recursion if appropriate - if !CONFIGURATION.norecursion && response_is_directory(&response) { - try_recursion(&response, base_depth, dir_chan.clone()).await; + if !CONFIGURATION.norecursion { + try_recursion(&ferox_response, base_depth, dir_chan.clone()).await; } // purposefully doing recursion before filtering. the thought process is that // even though this particular url is filtered, subsequent urls may not - let content_len = &response.content_length().unwrap_or(0); + let content_len = &ferox_response.content_length(); - if CONFIGURATION.sizefilters.contains(content_len) { - // filtered value from --sizefilters, move on to the next url - log::debug!("size filter: filtered out {}", response.url()); + if should_filter_response(content_len, &ferox_response.url()) { continue; } - if filter.size > 0 && filter.size == *content_len && !CONFIGURATION.dontfilter { - // static wildcard size found during testing - // size isn't default, size equals response length, and auto-filter is on - log::debug!("static wildcard: filtered out {}", response.url()); - continue; - } + if CONFIGURATION.extract_links && !ferox_response.status().is_redirection() { + let new_links = get_links(&ferox_response).await; - if filter.dynamic > 0 && !CONFIGURATION.dontfilter { - // dynamic wildcard offset found during testing + for new_link in new_links { + let unknown = add_url_to_list_of_scanned_urls(&new_link, &SCANNED_URLS); - // I'm about to manually split this url path instead of using reqwest::Url's - // builtin parsing. The reason is that they call .split() on the url path - // except that I don't want an empty string taking up the last index in the - // event that the url ends with a forward slash. It's ugly enough to be split - // into its own function for readability. - let url_len = get_url_path_length(&response.url()); + if !unknown { + // not unknown, i.e. we've seen the url before and don't need to scan again + continue; + } - if url_len + filter.dynamic == *content_len { - log::debug!("dynamic wildcard: filtered out {}", response.url()); - continue; + // create a url based on the given command line options, continue on error + let new_url = match format_url( + &new_link, + &"", + CONFIGURATION.addslash, + &CONFIGURATION.queries, + None, + ) { + Ok(url) => url, + Err(_) => continue, + }; + + // make the request and store the response + let new_response = match make_request(&CONFIGURATION.client, &new_url).await { + Ok(resp) => resp, + Err(_) => continue, + }; + + let mut new_ferox_response = + FeroxResponse::from(new_response, CONFIGURATION.extract_links).await; + + // filter if necessary + let new_content_len = &new_ferox_response.content_length(); + if should_filter_response(new_content_len, &new_ferox_response.url()) { + continue; + } + + if new_ferox_response.is_file() { + // very likely a file, simply request and report + log::debug!( + "Singular extraction: {} ({})", + new_ferox_response.url(), + new_ferox_response.status().as_str(), + ); + + send_report(report_chan.clone(), new_ferox_response); + + continue; + } + + if !CONFIGURATION.norecursion { + log::debug!( + "Recursive extraction: {} ({})", + new_ferox_response.url(), + new_ferox_response.status().as_str() + ); + + if new_ferox_response.status().is_success() + && !new_ferox_response.url().as_str().ends_with('/') + { + // since all of these are 2xx, recursion is only attempted if the + // url ends in a /. I am actually ok with adding the slash and not + // adding it, as both have merit. Leaving it in for now to see how + // things turn out (current as of: v1.1.0) + new_ferox_response.set_url(&format!("{}/", new_ferox_response.url())); + } + + try_recursion(&new_ferox_response, base_depth, dir_chan.clone()).await; + } } } // everything else should be reported - match report_chan.send(response) { - Ok(_) => { - log::debug!("sent {}/{} over reporting channel", &target_url, &word); - } - Err(e) => { - log::error!("wtf: {}", e); - } - } + send_report(report_chan.clone(), ferox_response); } } log::trace!("exit: make_requests"); } +/// Simple helper to send a `FeroxResponse` over the tx side of an `mpsc::unbounded_channel` +fn send_report(report_sender: UnboundedSender, response: FeroxResponse) { + log::trace!("enter: send_report({:?}, {:?}", report_sender, response); + + match report_sender.send(response) { + Ok(_) => {} + Err(e) => { + log::error!("{}", e); + } + } + + log::trace!("exit: send_report"); +} + /// Scan a given url using a given wordlist /// /// This is the primary entrypoint for the scanner @@ -378,7 +521,7 @@ pub async fn scan_url( target_url: &str, wordlist: Arc>, base_depth: usize, - tx_term: UnboundedSender, + tx_term: UnboundedSender, tx_file: UnboundedSender, ) { log::trace!( @@ -439,18 +582,17 @@ pub async fn scan_url( None => Arc::new(WildcardFilter::default()), }; + add_filter_to_list_of_wildcard_filters(filter.clone(), WILDCARD_FILTERS.clone()); + // producer tasks (mp of mpsc); responsible for making requests let producers = stream::iter(looping_words.deref().to_owned()) .map(|word| { - let wc_filter = filter.clone(); let txd = tx_dir.clone(); let txr = tx_term.clone(); let pb = progress_bar.clone(); // progress bar is an Arc around internal state let tgt = target_url.to_string(); // done to satisfy 'static lifetime below ( - tokio::spawn(async move { - make_requests(&tgt, &word, base_depth, wc_filter, txd, txr).await - }), + tokio::spawn(async move { make_requests(&tgt, &word, base_depth, txd, txr).await }), pb, ) }) @@ -616,4 +758,30 @@ mod tests { assert_eq!(add_url_to_list_of_scanned_urls(url, &urls), false); } + + #[test] + /// add a wildcard filter with the `size` attribute set to WILDCARD_FILTERS and ensure that + /// should_filter_response correctly returns true + fn should_filter_response_filters_wildcard_size() { + let mut filter = WildcardFilter::default(); + let url = Url::parse("http://localhost").unwrap(); + filter.size = 18; + let filter = Arc::new(filter); + add_filter_to_list_of_wildcard_filters(filter, WILDCARD_FILTERS.clone()); + let result = should_filter_response(&18, &url); + assert!(result); + } + + #[test] + /// add a wildcard filter with the `dynamic` attribute set to WILDCARD_FILTERS and ensure that + /// should_filter_response correctly returns true + fn should_filter_response_filters_wildcard_dynamic() { + let mut filter = WildcardFilter::default(); + let url = Url::parse("http://localhost/some-path").unwrap(); + filter.dynamic = 9; + let filter = Arc::new(filter); + add_filter_to_list_of_wildcard_filters(filter, WILDCARD_FILTERS.clone()); + let result = should_filter_response(&18, &url); + assert!(result); + } } diff --git a/src/utils.rs b/src/utils.rs index fe0bff6..048cfbd 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -160,7 +160,11 @@ pub fn format_url( // // the transforms that occur here will need to keep this in mind, i.e. add a slash to preserve // the current directory sent as part of the url - let url = if !url.ends_with('/') { + let url = if word.is_empty() { + // v1.0.6: added during --extract-links feature inplementation to support creating urls + // that were extracted from response bodies, i.e. http://localhost/some/path/js/main.js + url.to_string() + } else if !url.ends_with('/') { format!("{}/", url) } else { url.to_string() diff --git a/tests/test_extractor.rs b/tests/test_extractor.rs new file mode 100644 index 0000000..f88f229 --- /dev/null +++ b/tests/test_extractor.rs @@ -0,0 +1,229 @@ +mod utils; +use assert_cmd::prelude::*; +use httpmock::Method::GET; +use httpmock::{Mock, MockServer}; +use predicates::prelude::*; +use std::process::Command; +use utils::{setup_tmp_directory, teardown_tmp_directory}; + +#[test] +/// send a request to a page that contains a relative link, --extract-links should find the link +/// and make a request to the new link +fn extractor_finds_absolute_url() -> Result<(), Box> { + let srv = MockServer::start(); + let (tmp_dir, file) = setup_tmp_directory(&["LICENSE".to_string()], "wordlist")?; + + let mock = Mock::new() + .expect_method(GET) + .expect_path("/LICENSE") + .return_status(200) + .return_body(&srv.url("'/homepage/assets/img/icons/handshake.svg'")) + .create_on(&srv); + + let mock_two = Mock::new() + .expect_method(GET) + .expect_path("/homepage/assets/img/icons/handshake.svg") + .return_status(200) + .create_on(&srv); + + let cmd = Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg(srv.url("/")) + .arg("--wordlist") + .arg(file.as_os_str()) + .arg("--extract-links") + .unwrap(); + + cmd.assert().success().stdout( + predicate::str::contains("/LICENSE") + .and(predicate::str::contains("200")) + .and(predicate::str::contains( + "/homepage/assets/img/icons/handshake.svg", + )), + ); + + assert_eq!(mock.times_called(), 1); + assert_eq!(mock_two.times_called(), 1); + teardown_tmp_directory(tmp_dir); + Ok(()) +} + +#[test] +/// send a request to a page that contains an absolute link to another domain, scanner should not +/// follow +fn extractor_finds_absolute_url_to_different_domain() -> Result<(), Box> { + let srv = MockServer::start(); + let (tmp_dir, file) = setup_tmp_directory(&["LICENSE".to_string()], "wordlist")?; + + let mock = Mock::new() + .expect_method(GET) + .expect_path("/LICENSE") + .return_status(200) + .return_body("\"http://localhost/homepage/assets/img/icons/handshake.svg\"") + .create_on(&srv); + + let cmd = Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg(srv.url("/")) + .arg("--wordlist") + .arg(file.as_os_str()) + .arg("--extract-links") + .unwrap(); + + cmd.assert().success().stdout( + predicate::str::contains("/LICENSE") + .and(predicate::str::contains("200")) + .and(predicate::str::contains( + "/homepage/assets/img/icons/handshake.svg", + )) + .not(), + ); + + assert_eq!(mock.times_called(), 1); + teardown_tmp_directory(tmp_dir); + Ok(()) +} + +#[test] +/// send a request to a page that contains a relative link, should follow +fn extractor_finds_relative_url() -> Result<(), Box> { + let srv = MockServer::start(); + let (tmp_dir, file) = setup_tmp_directory(&["LICENSE".to_string()], "wordlist")?; + + let mock = Mock::new() + .expect_method(GET) + .expect_path("/LICENSE") + .return_status(200) + .return_body("\"/homepage/assets/img/icons/handshake.svg\"") + .create_on(&srv); + + let mock_two = Mock::new() + .expect_method(GET) + .expect_path("/homepage/assets/img/icons/handshake.svg") + .return_status(200) + .create_on(&srv); + + let cmd = Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg(srv.url("/")) + .arg("--wordlist") + .arg(file.as_os_str()) + .arg("--extract-links") + .unwrap(); + + cmd.assert().success().stdout( + predicate::str::contains("/LICENSE") + .and(predicate::str::contains("200")) + .and(predicate::str::contains( + "/homepage/assets/img/icons/handshake.svg", + )), + ); + + assert_eq!(mock.times_called(), 1); + assert_eq!(mock_two.times_called(), 1); + teardown_tmp_directory(tmp_dir); + Ok(()) +} + +#[test] +/// send a request to a page that contains an relative link, follow it, and find the same link again +/// should follow then filter +fn extractor_finds_same_relative_url_twice() -> Result<(), Box> { + let srv = MockServer::start(); + let (tmp_dir, file) = + setup_tmp_directory(&["LICENSE".to_string(), "README".to_string()], "wordlist")?; + + let mock = Mock::new() + .expect_method(GET) + .expect_path("/LICENSE") + .return_status(200) + .return_body(&srv.url("\"/homepage/assets/img/icons/handshake.svg\"")) + .create_on(&srv); + + let mock_two = Mock::new() + .expect_method(GET) + .expect_path("/README") + .return_body(&srv.url("\"/homepage/assets/img/icons/handshake.svg\"")) + .return_status(200) + .create_on(&srv); + + let mock_three = Mock::new() + .expect_method(GET) + .expect_path("/homepage/assets/img/icons/handshake.svg") + .return_status(200) + .create_on(&srv); + + let cmd = Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg(srv.url("/")) + .arg("--wordlist") + .arg(file.as_os_str()) + .arg("--extract-links") + .unwrap(); + + cmd.assert().success().stdout( + predicate::str::contains("/LICENSE") + .and(predicate::str::contains("200")) + .and(predicate::str::contains( + "/homepage/assets/img/icons/handshake.svg", + )), + ); + + assert_eq!(mock.times_called(), 1); + assert_eq!(mock_two.times_called(), 1); + assert_eq!(mock_three.times_called(), 1); + teardown_tmp_directory(tmp_dir); + Ok(()) +} + +#[test] +/// send a request to a page that contains an absolute link that leads to a page with a sizefilter +/// that should filter it out, expect not to see the second response reported +fn extractor_finds_filtered_content() -> Result<(), Box> { + let srv = MockServer::start(); + let (tmp_dir, file) = + setup_tmp_directory(&["LICENSE".to_string(), "README".to_string()], "wordlist")?; + + let mock = Mock::new() + .expect_method(GET) + .expect_path("/LICENSE") + .return_status(200) + .return_body(&srv.url("\"/homepage/assets/img/icons/handshake.svg\"")) + .create_on(&srv); + + let mock_two = Mock::new() + .expect_method(GET) + .expect_path("/homepage/assets/img/icons/handshake.svg") + .return_body("im a little teapot") + .return_status(200) + .create_on(&srv); + + let cmd = Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg(srv.url("/")) + .arg("--wordlist") + .arg(file.as_os_str()) + .arg("--extract-links") + .arg("--sizefilter") + .arg("18") + .unwrap(); + + cmd.assert().success().stdout( + predicate::str::contains("/LICENSE") + .and(predicate::str::contains("200")) + .and(predicate::str::contains( + "/homepage/assets/img/icons/handshake.svg", + )) + .not(), + ); + + assert_eq!(mock.times_called(), 1); + assert_eq!(mock_two.times_called(), 1); + teardown_tmp_directory(tmp_dir); + Ok(()) +}