diff --git a/Cargo.toml b/Cargo.toml index d48eaaf..34ca299 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,7 +34,7 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" uuid = { version = "0.8", features = ["v4"] } indicatif = "0.15" -console = "0.13" +console = "0.14" openssl = { version = "0.10", features = ["vendored"] } dirs = "3.0" regex = "1" diff --git a/src/banner.rs b/src/banner.rs index 74a8fdc..1408137 100644 --- a/src/banner.rs +++ b/src/banner.rs @@ -1,9 +1,11 @@ use crate::config::{Configuration, CONFIGURATION}; +use crate::statistics::StatCommand; use crate::utils::{make_request, status_colorizer}; use console::{style, Emoji}; use reqwest::{Client, Url}; use serde_json::Value; use std::io::Write; +use tokio::sync::mpsc::UnboundedSender; /// macro helper to abstract away repetitive string formatting macro_rules! format_banner_entry_helper { @@ -67,7 +69,12 @@ enum UpdateStatus { /// ex: v1.1.0 /// /// Returns `UpdateStatus` -async fn needs_update(client: &Client, url: &str, bin_version: &str) -> UpdateStatus { +async fn needs_update( + client: &Client, + url: &str, + bin_version: &str, + tx_stats: UnboundedSender, +) -> UpdateStatus { log::trace!("enter: needs_update({:?}, {})", client, url); let unknown = UpdateStatus::Unknown; @@ -81,7 +88,7 @@ async fn needs_update(client: &Client, url: &str, bin_version: &str) -> UpdateSt } }; - if let Ok(response) = make_request(&client, &api_url).await { + if let Ok(response) = make_request(&client, &api_url, tx_stats).await { let body = response.text().await.unwrap_or_default(); let json_response: Value = serde_json::from_str(&body).unwrap_or_default(); @@ -137,8 +144,13 @@ fn format_emoji(emoji: &str) -> String { /// Prints the banner to stdout. /// /// Only prints those settings which are either always present, or passed in by the user. -pub async fn initialize(targets: &[String], config: &Configuration, version: &str, mut writer: W) -where +pub async fn initialize( + targets: &[String], + config: &Configuration, + version: &str, + mut writer: W, + tx_stats: UnboundedSender, +) where W: Write, { let artwork = format!( @@ -150,7 +162,7 @@ by Ben "epi" Risher {} ver: {}"#, Emoji("🤓", &format!("{:<2}", "\u{0020}")), version ); - let status = needs_update(&CONFIGURATION.client, UPDATE_URL, version).await; + let status = needs_update(&CONFIGURATION.client, UPDATE_URL, version, tx_stats).await; let top = "───────────────────────────┬──────────────────────"; let addl_section = "──────────────────────────────────────────────────"; diff --git a/src/extractor.rs b/src/extractor.rs index 408a2ef..c028fb7 100644 --- a/src/extractor.rs +++ b/src/extractor.rs @@ -1,3 +1,4 @@ +use crate::statistics::StatCommand; use crate::{ client, config::{Configuration, CONFIGURATION}, @@ -9,6 +10,7 @@ use lazy_static::lazy_static; use regex::Regex; use reqwest::Url; use std::collections::HashSet; +use tokio::sync::mpsc::UnboundedSender; /// Regular expression used in [LinkFinder](https://github.com/GerbenJavado/LinkFinder) /// @@ -172,8 +174,15 @@ fn add_all_sub_paths(url_path: &str, response: &FeroxResponse, mut links: &mut H /// - create a new Url object based on cli options/args /// - check if the new Url has already been seen/scanned -> None /// - make a request to the new Url ? -> Some(response) : None -pub async fn request_feroxresponse_from_new_link(url: &str) -> Option { - log::trace!("enter: request_feroxresponse_from_new_link({})", url); +pub async fn request_feroxresponse_from_new_link( + url: &str, + tx_stats: UnboundedSender, +) -> Option { + log::trace!( + "enter: request_feroxresponse_from_new_link({}, {:?})", + url, + tx_stats + ); // create a url based on the given command line options, return None on error let new_url = match format_url( @@ -197,7 +206,7 @@ pub async fn request_feroxresponse_from_new_link(url: &str) -> Option resp, Err(_) => { log::trace!("exit: request_feroxresponse_from_new_link -> None"); @@ -221,8 +230,16 @@ pub async fn request_feroxresponse_from_new_link(url: &str) -> Option Option { - log::trace!("enter: get_robots_file({})", base_url); +pub async fn request_robots_txt( + base_url: &str, + config: &Configuration, + tx_stats: UnboundedSender, +) -> Option { + log::trace!( + "enter: get_robots_file({}, Configuration, {:?})", + base_url, + tx_stats + ); // more often than not, domain/robots.txt will redirect to www.domain/robots.txt or something // similar; to account for that, create a client that will follow redirects, regardless of @@ -248,7 +265,7 @@ pub async fn request_robots_txt(base_url: &str, config: &Configuration) -> Optio if let Ok(mut url) = Url::parse(base_url) { url.set_path("/robots.txt"); // overwrite existing path with /robots.txt - if let Ok(response) = make_request(&client, &url).await { + if let Ok(response) = make_request(&client, &url, tx_stats).await { let ferox_response = FeroxResponse::from(response, true).await; log::trace!("exit: get_robots_file -> {}", ferox_response); @@ -267,11 +284,15 @@ pub async fn request_robots_txt(base_url: &str, config: &Configuration) -> Optio /// http://localhost/stuff/things /// this function requests: /// http://localhost/robots.txt -pub async fn extract_robots_txt(base_url: &str, config: &Configuration) -> HashSet { +pub async fn extract_robots_txt( + base_url: &str, + config: &Configuration, + tx_stats: UnboundedSender, +) -> HashSet { log::trace!("enter: extract_robots_txt({}, CONFIGURATION)", base_url); let mut links = HashSet::new(); - if let Some(response) = request_robots_txt(&base_url, &config).await { + if let Some(response) = request_robots_txt(&base_url, &config, tx_stats.clone()).await { for capture in ROBOTS_REGEX.captures_iter(response.text.as_str()) { if let Some(new_path) = capture.name("url_path") { if let Ok(mut new_url) = Url::parse(base_url) { @@ -290,9 +311,12 @@ pub async fn extract_robots_txt(base_url: &str, config: &Configuration) -> HashS mod tests { use super::*; use crate::utils::make_request; + use crate::utils::update_stat; + use crate::FeroxChannel; use httpmock::Method::GET; use httpmock::MockServer; use reqwest::Client; + use tokio::sync::mpsc; #[test] /// extract sub paths from the given url fragment; expect 4 sub paths and that all are @@ -402,13 +426,16 @@ mod tests { let client = Client::new(); let url = Url::parse(&srv.url("/some-path")).unwrap(); + let (tx, _): FeroxChannel = mpsc::unbounded_channel(); - let response = make_request(&client, &url).await.unwrap(); + let response = make_request(&client, &url, tx.clone()).await.unwrap(); let ferox_response = FeroxResponse::from(response, true).await; let links = get_links(&ferox_response).await; + update_stat!(tx, StatCommand::Exit); + assert!(links.is_empty()); assert_eq!(mock.hits(), 1); diff --git a/src/heuristics.rs b/src/heuristics.rs index 35ed7e1..3291c2d 100644 --- a/src/heuristics.rs +++ b/src/heuristics.rs @@ -1,7 +1,8 @@ use crate::{ config::{CONFIGURATION, PROGRESS_PRINTER}, filters::WildcardFilter, - scanner::{should_filter_response, STATS}, + scanner::should_filter_response, + statistics::{StatCommand, StatError}, utils::{ferox_print, format_url, get_url_path_length, make_request, status_colorizer}, FeroxResponse, }; @@ -40,12 +41,14 @@ pub async fn wildcard_test( target_url: &str, bar: ProgressBar, tx_term: UnboundedSender, + tx_stats: UnboundedSender, ) -> Option { log::trace!( - "enter: wildcard_test({:?}, {:?}, {:?})", + "enter: wildcard_test({:?}, {:?}, {:?}, {:?})", target_url, bar, - tx_term + tx_term, + tx_stats ); if CONFIGURATION.dont_filter { @@ -54,10 +57,14 @@ pub async fn wildcard_test( return None; } - let tx_clone_one = tx_term.clone(); - let tx_clone_two = tx_term.clone(); + let tx_term_mwcr1 = tx_term.clone(); + let tx_term_mwcr2 = tx_term.clone(); + let tx_stats_mwcr1 = tx_stats.clone(); + let tx_stats_mwcr2 = tx_stats.clone(); - if let Some(ferox_response) = make_wildcard_request(&target_url, 1, tx_clone_one).await { + if let Some(ferox_response) = + make_wildcard_request(&target_url, 1, tx_term_mwcr1, tx_stats_mwcr1).await + { bar.inc(1); // found a wildcard response @@ -72,7 +79,9 @@ pub async fn wildcard_test( // content length of wildcard is non-zero, perform additional tests: // make a second request, with a known-sized (64) longer request - if let Some(resp_two) = make_wildcard_request(&target_url, 3, tx_clone_two).await { + if let Some(resp_two) = + make_wildcard_request(&target_url, 3, tx_term_mwcr2, tx_stats_mwcr2).await + { bar.inc(1); let wc2_length = resp_two.content_length(); @@ -138,12 +147,14 @@ async fn make_wildcard_request( target_url: &str, length: usize, tx_file: UnboundedSender, + tx_stats: UnboundedSender, ) -> Option { log::trace!( - "enter: make_wildcard_request({}, {}, {:?})", + "enter: make_wildcard_request({}, {}, {:?}, {:?})", target_url, length, - tx_file + tx_file, + tx_stats, ); let unique_str = unique_string(length); @@ -163,7 +174,13 @@ async fn make_wildcard_request( } }; - match make_request(&CONFIGURATION.client, &nonexistent.to_owned()).await { + match make_request( + &CONFIGURATION.client, + &nonexistent.to_owned(), + tx_stats.clone(), + ) + .await + { Ok(response) => { if CONFIGURATION .status_codes @@ -199,8 +216,15 @@ async fn make_wildcard_request( /// In the event that no sites can be reached, the program will exit. /// /// Any urls that are found to be alive are returned to the caller. -pub async fn connectivity_test(target_urls: &[String]) -> Vec { - log::trace!("enter: connectivity_test({:?})", target_urls); +pub async fn connectivity_test( + target_urls: &[String], + tx_stats: UnboundedSender, +) -> Vec { + log::trace!( + "enter: connectivity_test({:?}, {:?})", + target_urls, + tx_stats + ); let mut good_urls = vec![]; @@ -214,16 +238,16 @@ pub async fn connectivity_test(target_urls: &[String]) -> Vec { ) { Ok(url) => url, Err(e) => { + tx_stats + .send(StatCommand::AddError(StatError::UrlFormat)) + .unwrap_or_default(); log::error!("{}", e); continue; } }; - match make_request(&CONFIGURATION.client, &request).await { - Ok(response) => { - let ferox_response = FeroxResponse::from(response, false).await; - STATS.update(&ferox_response); - ferox_print(&format!("FUCK YEA: {:?}", *STATS), &PROGRESS_PRINTER); + match make_request(&CONFIGURATION.client, &request, tx_stats.clone()).await { + Ok(_) => { good_urls.push(target_url.to_owned()); } Err(e) => { diff --git a/src/main.rs b/src/main.rs index 9786572..9994f7e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,6 @@ use crossterm::event::{self, Event, KeyCode}; +#[macro_use(update_stat)] +extern crate feroxbuster; use feroxbuster::{ banner, config::{CONFIGURATION, PROGRESS_BAR, PROGRESS_PRINTER}, @@ -8,15 +10,16 @@ use feroxbuster::{ reporter, scan_manager::{self, PAUSE_SCAN}, scanner::{self, scan_url, send_report, RESPONSES, SCANNED_URLS}, + statistics::{self, StatCommand}, utils::{ferox_print, get_current_depth, module_colorizer, status_colorizer}, FeroxError, FeroxResponse, FeroxResult, FeroxSerialize, SLEEP_DURATION, VERSION, }; #[cfg(not(target_os = "windows"))] use feroxbuster::{utils::set_open_file_limit, DEFAULT_OPEN_FILE_LIMIT}; use futures::StreamExt; -use std::convert::TryInto; use std::{ collections::HashSet, + convert::TryInto, fs::File, io::{stderr, BufRead, BufReader}, process, @@ -102,6 +105,7 @@ async fn scan( mut targets: Vec, tx_term: UnboundedSender, tx_file: UnboundedSender, + tx_stats: UnboundedSender, ) -> FeroxResult<()> { log::trace!("enter: scan({:?}, {:?}, {:?})", targets, tx_term, tx_file); // cloning an Arc is cheap (it's basically a pointer into the heap) @@ -117,7 +121,7 @@ async fn scan( return Err(Box::new(err)); } - scanner::initialize(words.len(), &CONFIGURATION).await; + scanner::initialize(words.len(), &CONFIGURATION, tx_stats.clone()).await; if CONFIGURATION.resumed { if let Ok(scans) = SCANNED_URLS.scans.lock() { @@ -148,11 +152,16 @@ async fn scan( for target in targets.clone() { // modifying the targets vector, so we can't have a reference to it while we borrow // it as mutable; thus the clone - let robots_links = extract_robots_txt(&target, &CONFIGURATION).await; + let robots_links = extract_robots_txt(&target, &CONFIGURATION, tx_stats.clone()).await; for robot_link in robots_links { // create a url based on the given command line options, continue on error - let ferox_response = match request_feroxresponse_from_new_link(&robot_link).await { + let ferox_response = match request_feroxresponse_from_new_link( + &robot_link, + tx_stats.clone(), + ) + .await + { Some(resp) => resp, None => continue, }; @@ -182,6 +191,7 @@ async fn scan( let word_clone = words.clone(); let term_clone = tx_term.clone(); let file_clone = tx_file.clone(); + let stats_clone = tx_stats.clone(); let task = tokio::spawn(async move { let base_depth = get_current_depth(&target); @@ -192,6 +202,7 @@ async fn scan( num_targets, term_clone, file_clone, + stats_clone, ) .await; }); @@ -280,8 +291,10 @@ async fn wrapped_main() { let save_output = !CONFIGURATION.output.is_empty(); // was -o used? + let (stats, tx_stats, stats_handle) = statistics::initialize(); + let (tx_term, tx_file, term_handle, file_handle) = - reporter::initialize(&CONFIGURATION.output, save_output); + reporter::initialize(&CONFIGURATION.output, save_output, tx_stats.clone()); // get targets from command line or stdin let targets = match get_targets().await { @@ -289,7 +302,16 @@ async fn wrapped_main() { Err(e) => { // should only happen in the event that there was an error reading from stdin log::error!("{} {}", module_colorizer("main::get_targets"), e); - clean_up(tx_term, term_handle, tx_file, file_handle, save_output).await; + clean_up( + tx_term, + term_handle, + tx_file, + file_handle, + tx_stats, + stats_handle, + save_output, + ) + .await; return; } }; @@ -297,19 +319,42 @@ async fn wrapped_main() { if !CONFIGURATION.quiet { // only print banner if -q isn't used let std_stderr = stderr(); // std::io::stderr - banner::initialize(&targets, &CONFIGURATION, &VERSION, std_stderr).await; + banner::initialize( + &targets, + &CONFIGURATION, + &VERSION, + std_stderr, + tx_stats.clone(), + ) + .await; } // discard non-responsive targets - let live_targets = heuristics::connectivity_test(&targets).await; + let live_targets = heuristics::connectivity_test(&targets, tx_stats.clone()).await; if live_targets.is_empty() { - clean_up(tx_term, term_handle, tx_file, file_handle, save_output).await; + clean_up( + tx_term, + term_handle, + tx_file, + file_handle, + tx_stats, + stats_handle, + save_output, + ) + .await; return; } // kick off a scan against any targets determined to be responsive - match scan(live_targets, tx_term.clone(), tx_file.clone()).await { + match scan( + live_targets, + tx_term.clone(), + tx_file.clone(), + tx_stats.clone(), + ) + .await + { Ok(_) => { log::info!("All scans complete!"); } @@ -318,12 +363,30 @@ async fn wrapped_main() { &format!("{} while scanning: {}", status_colorizer("Error"), e), &PROGRESS_PRINTER, ); - clean_up(tx_term, term_handle, tx_file, file_handle, save_output).await; + clean_up( + tx_term, + term_handle, + tx_file, + file_handle, + tx_stats, + stats_handle, + save_output, + ) + .await; process::exit(1); } }; - clean_up(tx_term, term_handle, tx_file, file_handle, save_output).await; + clean_up( + tx_term, + term_handle, + tx_file, + file_handle, + tx_stats, + stats_handle, + save_output, + ) + .await; log::trace!("exit: main"); } @@ -335,14 +398,18 @@ async fn clean_up( term_handle: JoinHandle<()>, tx_file: UnboundedSender, file_handle: Option>, + tx_stats: UnboundedSender, + stats_handle: JoinHandle<()>, save_output: bool, ) { log::trace!( - "enter: clean_up({:?}, {:?}, {:?}, {:?}, {})", + "enter: clean_up({:?}, {:?}, {:?}, {:?}, {:?}, {:?}, {})", tx_term, term_handle, tx_file, file_handle, + tx_stats, + stats_handle, save_output ); @@ -364,6 +431,10 @@ async fn clean_up( // we drop the file transmitter every time, because it's created no matter what drop(tx_file); + log::trace!("tx_stats: {:?}", tx_stats); + update_stat!(tx_stats, StatCommand::Exit); // send exit command and await the end of the future + stats_handle.await.unwrap_or_default(); + log::trace!("dropped file output handler's transmitter"); if save_output { // but we only await if -o was specified diff --git a/src/reporter.rs b/src/reporter.rs index fc5a5ea..66694e5 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -1,6 +1,7 @@ +use crate::statistics::StatCommand; use crate::{ config::{CONFIGURATION, PROGRESS_PRINTER}, - scanner::{RESPONSES, SCANNED_URLS}, + scanner::RESPONSES, utils::{ferox_print, make_request, open_file}, FeroxChannel, FeroxResponse, FeroxSerialize, }; @@ -42,21 +43,28 @@ pub fn get_cached_file_handle(filename: &str) -> Option, ) -> ( UnboundedSender, UnboundedSender, JoinHandle<()>, Option>, ) { - log::trace!("enter: initialize({}, {})", output_file, save_output); + log::trace!( + "enter: initialize({}, {}, {:?})", + output_file, + save_output, + tx_stats + ); let (tx_rpt, rx_rpt): FeroxChannel = mpsc::unbounded_channel(); let (tx_file, rx_file): FeroxChannel = mpsc::unbounded_channel(); let file_clone = tx_file.clone(); - let term_reporter = - tokio::spawn(async move { spawn_terminal_reporter(rx_rpt, file_clone, save_output).await }); + let term_reporter = tokio::spawn(async move { + spawn_terminal_reporter(rx_rpt, file_clone, tx_stats.clone(), save_output).await + }); let file_reporter = if save_output { // -o used, need to spawn the thread for writing to disk @@ -85,12 +93,14 @@ pub fn initialize( async fn spawn_terminal_reporter( mut resp_chan: UnboundedReceiver, file_chan: UnboundedSender, + tx_stats: UnboundedSender, save_output: bool, ) { log::trace!( - "enter: spawn_terminal_reporter({:?}, {:?}, {})", + "enter: spawn_terminal_reporter({:?}, {:?}, {:?}, {})", resp_chan, file_chan, + tx_stats, save_output ); @@ -122,7 +132,13 @@ async fn spawn_terminal_reporter( if CONFIGURATION.replay_client.is_some() && should_process_response { // replay proxy specified/client created and this response's status code is one that // should be replayed - match make_request(CONFIGURATION.replay_client.as_ref().unwrap(), &resp.url()).await { + match make_request( + CONFIGURATION.replay_client.as_ref().unwrap(), + &resp.url(), + tx_stats.clone(), + ) + .await + { Ok(_) => {} Err(e) => { log::error!("{}", e); diff --git a/src/scan_manager.rs b/src/scan_manager.rs index 9793d6b..26dd9a8 100644 --- a/src/scan_manager.rs +++ b/src/scan_manager.rs @@ -6,7 +6,6 @@ use crate::{ parser::TIMESPEC_REGEX, progress, scanner::{NUMBER_OF_REQUESTS, RESPONSES, SCANNED_URLS}, - statistics::Stats, FeroxResponse, FeroxSerialize, SLEEP_DURATION, }; use console::style; diff --git a/src/scanner.rs b/src/scanner.rs index 8e65c4b..dd9e545 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -1,5 +1,6 @@ +use crate::statistics::StatCommand; use crate::{ - config::{Configuration, CONFIGURATION, PROGRESS_PRINTER}, + config::{Configuration, CONFIGURATION}, extractor::{get_links, request_feroxresponse_from_new_link}, filters::{ FeroxFilter, LinesFilter, RegexFilter, SimilarityFilter, SizeFilter, StatusCodeFilter, @@ -8,7 +9,7 @@ use crate::{ heuristics, scan_manager::{FeroxResponses, FeroxScans, PAUSE_SCAN}, statistics::Stats, - utils::{ferox_print, format_url, get_current_depth, make_request}, + utils::{format_url, get_current_depth, make_request}, FeroxChannel, FeroxResponse, SIMILARITY_THRESHOLD, }; use futures::{ @@ -105,15 +106,17 @@ fn spawn_recursion_handler( num_targets: usize, tx_term: UnboundedSender, tx_file: UnboundedSender, + tx_stats: UnboundedSender, ) -> BoxFuture<'static, Vec>> { log::trace!( - "enter: spawn_recursion_handler({:?}, wordlist[{} words...], {}, {}, {:?}, {:?})", + "enter: spawn_recursion_handler({:?}, wordlist[{} words...], {}, {}, {:?}, {:?}, {:?})", recursion_channel, wordlist.len(), base_depth, num_targets, tx_term, - tx_file + tx_file, + tx_stats ); let boxed_future = async move { @@ -131,6 +134,7 @@ fn spawn_recursion_handler( let term_clone = tx_term.clone(); let file_clone = tx_file.clone(); + let stats_clone = tx_stats.clone(); let resp_clone = resp.clone(); let list_clone = wordlist.clone(); @@ -142,6 +146,7 @@ fn spawn_recursion_handler( num_targets, term_clone, file_clone, + stats_clone, ) .await }); @@ -360,26 +365,25 @@ async fn make_requests( base_depth: usize, dir_chan: UnboundedSender, report_chan: UnboundedSender, + tx_stats: UnboundedSender, ) { log::trace!( - "enter: make_requests({}, {}, {}, {:?}, {:?})", + "enter: make_requests({}, {}, {}, {:?}, {:?}, {:?})", target_url, word, base_depth, dir_chan, - report_chan + report_chan, + tx_stats ); let urls = create_urls(&target_url, &word, &CONFIGURATION.extensions); for url in urls { - if let Ok(response) = make_request(&CONFIGURATION.client, &url).await { + if let Ok(response) = make_request(&CONFIGURATION.client, &url, tx_stats.clone()).await { // response came back without error, convert it to FeroxResponse let ferox_response = FeroxResponse::from(response, true).await; - STATS.update(&ferox_response); - ferox_print(&format!("FUCK YEA: {:?}", *STATS), &PROGRESS_PRINTER); - // do recursion if appropriate if !CONFIGURATION.no_recursion { try_recursion(&ferox_response, base_depth, dir_chan.clone()).await; @@ -396,11 +400,15 @@ async fn make_requests( let new_links = get_links(&ferox_response).await; for new_link in new_links { - let mut new_ferox_response = - match request_feroxresponse_from_new_link(&new_link).await { - Some(resp) => resp, - None => continue, - }; + let mut new_ferox_response = match request_feroxresponse_from_new_link( + &new_link, + tx_stats.clone(), + ) + .await + { + Some(resp) => resp, + None => continue, + }; // filter if necessary if should_filter_response(&new_ferox_response) { @@ -467,15 +475,17 @@ pub async fn scan_url( num_targets: usize, tx_term: UnboundedSender, tx_file: UnboundedSender, + tx_stats: UnboundedSender, ) { log::trace!( - "enter: scan_url({:?}, wordlist[{} words...], {}, {}, {:?}, {:?})", + "enter: scan_url({:?}, wordlist[{} words...], {}, {}, {:?}, {:?}, {:?})", target_url, wordlist.len(), base_depth, num_targets, tx_term, - tx_file + tx_file, + tx_stats ); log::info!("Starting scan against: {}", target_url); @@ -518,8 +528,10 @@ pub async fn scan_url( // Arc clones to be passed around to the various scans let wildcard_bar = progress_bar.clone(); let heuristics_term_clone = tx_term.clone(); + let heuristics_stats_clone = tx_stats.clone(); let recurser_term_clone = tx_term.clone(); let recurser_file_clone = tx_file.clone(); + let recurser_stats_clone = tx_stats.clone(); let recurser_words = wordlist.clone(); let looping_words = wordlist.clone(); @@ -531,16 +543,23 @@ pub async fn scan_url( num_targets, recurser_term_clone, recurser_file_clone, + recurser_stats_clone, ) .await }); // add any wildcard filters to `FILTERS` - let filter = - match heuristics::wildcard_test(&target_url, wildcard_bar, heuristics_term_clone).await { - Some(f) => Box::new(f), - None => Box::new(WildcardFilter::default()), - }; + let filter = match heuristics::wildcard_test( + &target_url, + wildcard_bar, + heuristics_term_clone, + heuristics_stats_clone, + ) + .await + { + Some(f) => Box::new(f), + None => Box::new(WildcardFilter::default()), + }; add_filter_to_list_of_ferox_filters(filter, FILTERS.clone()); @@ -549,6 +568,7 @@ pub async fn scan_url( .map(|word| { let txd = tx_dir.clone(); let txr = tx_term.clone(); + let txs = tx_stats.clone(); let pb = progress_bar.clone(); // progress bar is an Arc around internal state let tgt = target_url.to_string(); // done to satisfy 'static lifetime below ( @@ -561,7 +581,7 @@ pub async fn scan_url( // todo change to true when issue #107 is resolved SCANNED_URLS.pause(false).await; } - make_requests(&tgt, &word, base_depth, txd, txr).await + make_requests(&tgt, &word, base_depth, txd, txr, txs).await }), pb, ) @@ -603,8 +623,17 @@ pub async fn scan_url( /// Perform steps necessary to run scans that only need to be performed once (warming up the /// engine, as it were) -pub async fn initialize(num_words: usize, config: &Configuration) { - log::trace!("enter: initialize({}, {:?})", num_words, config,); +pub async fn initialize( + num_words: usize, + config: &Configuration, + tx_stats: UnboundedSender, +) { + log::trace!( + "enter: initialize({}, {:?}, {:?})", + num_words, + config, + tx_stats + ); // number of requests only needs to be calculated once, and then can be reused let num_reqs_expected: u64 = if config.extensions.is_empty() { @@ -679,7 +708,7 @@ pub async fn initialize(num_words: usize, config: &Configuration) { // url as-is based on input, ignores user-specified url manipulation options (add-slash etc) if let Ok(url) = format_url(&similarity_filter, &"", false, &Vec::new(), None) { // attempt to request the given url - if let Ok(resp) = make_request(&CONFIGURATION.client, &url).await { + if let Ok(resp) = make_request(&CONFIGURATION.client, &url, tx_stats.clone()).await { // if successful, create a filter based on the response's body let fr = FeroxResponse::from(resp, true).await; diff --git a/src/statistics.rs b/src/statistics.rs index dafc640..a4a8921 100644 --- a/src/statistics.rs +++ b/src/statistics.rs @@ -1,48 +1,324 @@ // todo needs to be serializable and added to scan save/resume/output // todo consider batch size for stats update/display (if display is used) -use crate::FeroxResponse; +// todo are there more metrics to capture? +// - domains redirected to? +// - number of links extracted vs busted? +// - number of borked urls? +// todo integration test that hits some/all of the errors in make_request + +use crate::{config::PROGRESS_PRINTER, FeroxChannel}; use reqwest::StatusCode; use serde::{Deserialize, Serialize}; use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; +use tokio::sync::mpsc; +use tokio::sync::mpsc::UnboundedReceiver; +use tokio::sync::mpsc::UnboundedSender; +use tokio::task::JoinHandle; -#[derive(Debug)] -/// todo -pub enum StatError { - /// todo - Four_Oh_Three, - /// todo - Timeout, +/// Wrapper to save me from writing Ordering::Relaxed a bajillion times +/// +/// default is to increment by 1, second arg can be used to increment by a different value +macro_rules! atomic_increment { + ($metric:expr) => { + $metric.fetch_add(1, Ordering::Relaxed); + }; + + ($metric:expr, $value:expr) => { + $metric.fetch_add($value, Ordering::Relaxed); + }; } /// Data collection of statistics related to a scan #[derive(Default, Serialize, Deserialize, Debug)] pub struct Stats { /// tracker for number of timeouts seen by the client - pub timeouts: AtomicUsize, + timeouts: AtomicUsize, + + /// tracker for total number of requests sent by the client + requests: AtomicUsize, + + /// tracker for total number of requests expected to send if the scan runs to completion + /// + /// Note: this is a per-scan expectation; `expected_requests * current # of scans` would be + /// indicative of the current expectation at any given time, but is a moving target. + expected_requests: AtomicUsize, + + /// tracker for total number of errors encountered by the client + errors: AtomicUsize, + + /// tracker for overall number of 2xx status codes seen by the client + successes: AtomicUsize, + + /// tracker for overall number of 3xx status codes seen by the client + redirects: AtomicUsize, + + /// tracker for overall number of 4xx status codes seen by the client + client_errors: AtomicUsize, + + /// tracker for overall number of 5xx status codes seen by the client + server_errors: AtomicUsize, /// tracker for overall number of 403s seen by the client - four_oh_threes: AtomicUsize, - - /// tracker for overall number of 408s seen by the client - request_timeouts: AtomicUsize, - - /// tracker for overall number of 504s seen by the client - gateway_timeouts: AtomicUsize, + status_403s: AtomicUsize, } +/// implementation of statistics data collection struct impl Stats { - pub fn update(&self, response: &FeroxResponse) { - match response.status { - StatusCode::FORBIDDEN => { - self.four_oh_threes.fetch_add(1, Ordering::SeqCst); + /// increment `requests` field by one + fn add_request(&self) { + atomic_increment!(self.requests); + } + + /// create a new Stats object with an expected number of requests + /// + /// Note: this is a per-scan expectation; `expected_requests * current # of scans` would be + /// indicative of the current expectation at any given time, but is a moving target. + pub fn new(expected: usize) -> Self { + let stats = Self::default(); + atomic_increment!(stats.expected_requests, expected); + stats + } + + /// Inspect the given `StatError` and increment the appropriate fields + /// + /// Implies incrementing: + /// - requests + /// - errors + pub fn add_error(&self, error: StatError) { + self.add_request(); + atomic_increment!(self.errors); + + match error { + StatError::Timeout => { + atomic_increment!(self.timeouts); } - StatusCode::REQUEST_TIMEOUT => { - self.request_timeouts.fetch_add(1, Ordering::SeqCst); + StatError::Status403 => { + atomic_increment!(self.status_403s); + atomic_increment!(self.client_errors); } - StatusCode::GATEWAY_TIMEOUT => { - self.gateway_timeouts.fetch_add(1, Ordering::SeqCst); + _ => { + // todo implement the rest of the errors } - _ => {} + } + } + + /// Inspect the given `StatusCode` and increment the appropriate fields + /// + /// Implies incrementing: + /// - requests + /// - status_403s (when code is 403) + /// - errors (when code is [45]xx) + pub fn add_status_code(&self, status: StatusCode) { + self.add_request(); + + if status.is_success() { + atomic_increment!(self.successes); + } else if status.is_redirection() { + atomic_increment!(self.redirects); + } else if status.is_client_error() { + atomic_increment!(self.errors); + atomic_increment!(self.client_errors); + } else if status.is_server_error() { + atomic_increment!(self.errors); + atomic_increment!(self.server_errors); + } + // todo consider else / other status codes etc... + + if matches!(status, StatusCode::FORBIDDEN) { + atomic_increment!(self.status_403s); } } } + +#[derive(Debug)] +/// Enum variants used to inform the `StatCommand` protocol what `Stats` fields should be updated +pub enum StatError { + /// Represents a 403 response code + Status403, + + /// Represents a timeout error + Timeout, + + /// Represents a URL formatting error + UrlFormat, + + /// Represents an error encountered during redirection + Redirection, + + /// Represents an error encountered during connection + Connection, + + /// Represents an error resulting from the client's request + Request, + + /// Represents any other error not explicitly defined above + Other, +} + +/// Protocol definition for updating a Stats object via mpsc +pub enum StatCommand { + /// Add one to the total number of requests + AddRequest, + + /// Add one to the proper field(s) based on the given `StatError` + AddError(StatError), + + /// Add one to the proper field(s) based on the given `StatusCode` + AddStatus(StatusCode), + + /// Break out of the (infinite) mpsc receive loop + Exit, +} + +/// Spawn a single consumer task (sc side of mpsc) +/// +/// The consumer simply receives `StatCommands` and updates the given `Stats` object as appropriate +pub async fn spawn_statistics_handler( + mut stats_channel: UnboundedReceiver, + stats: Arc, +) { + while let Some(command) = stats_channel.recv().await { + match command as StatCommand { + StatCommand::AddError(err) => { + stats.add_error(err); + } + StatCommand::AddStatus(status) => { + stats.add_status_code(status); + } + StatCommand::AddRequest => stats.add_request(), + StatCommand::Exit => break, + } + } + + // todo remove or do something cool with it + PROGRESS_PRINTER.println(format!("{:?}", *stats)); +} + +/// Initialize new `Stats` object and the sc side of an mpsc channel that is responsible for +/// updates to the aforementioned object. +pub fn initialize() -> (Arc, UnboundedSender, JoinHandle<()>) { + let stats_tracker = Arc::new(Stats::default()); + let cloned = stats_tracker.clone(); + let (tx_stats, rx_stats): FeroxChannel = mpsc::unbounded_channel(); + let stats_thread = + tokio::spawn(async move { spawn_statistics_handler(rx_stats, cloned).await }); + + (stats_tracker, tx_stats, stats_thread) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// simple helper to reduce code reuse + fn setup_stats_test() -> (Arc, UnboundedSender, JoinHandle<()>) { + initialize() + } + + /// another helper to stay DRY; must be called after any sent commands and before any checks + /// performed against the Stats object + async fn teardown_stats_test(sender: UnboundedSender, handle: JoinHandle<()>) { + // send exit and await, once the await completes, stats should be updated + sender.send(StatCommand::Exit).unwrap_or_default(); + handle.await.unwrap(); + } + + #[tokio::test(core_threads = 1)] + /// when sent StatCommand::Exit, function should exit its while loop (runs forever otherwise) + async fn statistics_handler_exits() { + let (_, sender, handle) = setup_stats_test(); + + sender.send(StatCommand::Exit).unwrap_or_default(); + + handle.await.unwrap(); // blocks on the handler's while loop + + assert!(true); // if we've made it here, the test has succeeded + } + + #[tokio::test(core_threads = 1)] + /// when sent StatCommand::IncrementRequest, stats object should reflect the change + async fn statistics_handler_increments_requests() { + let (stats, tx, handle) = setup_stats_test(); + + tx.send(StatCommand::AddRequest).unwrap_or_default(); + tx.send(StatCommand::AddRequest).unwrap_or_default(); + tx.send(StatCommand::AddRequest).unwrap_or_default(); + + teardown_stats_test(tx, handle).await; + + assert_eq!(stats.requests.load(Ordering::Relaxed), 3); + } + + #[tokio::test(core_threads = 1)] + /// when sent StatCommand::IncrementRequest, stats object should reflect the change + /// + /// incrementing a 403 (tracked in status_403s) should also increment: + /// - errors + /// - requests + /// - client_errors + async fn statistics_handler_increments_403() { + let (stats, tx, handle) = setup_stats_test(); + + let err = StatCommand::AddError(StatError::Status403); + let err2 = StatCommand::AddError(StatError::Status403); + + tx.send(err).unwrap_or_default(); + tx.send(err2).unwrap_or_default(); + + teardown_stats_test(tx, handle).await; + + assert_eq!(stats.errors.load(Ordering::Relaxed), 2); + assert_eq!(stats.requests.load(Ordering::Relaxed), 2); + assert_eq!(stats.status_403s.load(Ordering::Relaxed), 2); + assert_eq!(stats.client_errors.load(Ordering::Relaxed), 2); + } + + #[tokio::test(core_threads = 1)] + /// when sent StatCommand::IncrementRequest, stats object should reflect the change + /// + /// incrementing a 403 (tracked in status_403s) should also increment: + /// - errors + /// - requests + /// - client_errors + async fn statistics_handler_increments_403_via_status_code() { + let (stats, tx, handle) = setup_stats_test(); + + let err = StatCommand::AddStatus(reqwest::StatusCode::FORBIDDEN); + let err2 = StatCommand::AddStatus(reqwest::StatusCode::FORBIDDEN); + + tx.send(err).unwrap_or_default(); + tx.send(err2).unwrap_or_default(); + + teardown_stats_test(tx, handle).await; + + assert_eq!(stats.errors.load(Ordering::Relaxed), 2); + assert_eq!(stats.requests.load(Ordering::Relaxed), 2); + assert_eq!(stats.status_403s.load(Ordering::Relaxed), 2); + assert_eq!(stats.client_errors.load(Ordering::Relaxed), 2); + } + + #[test] + /// when Stats::add_error receives StatError::Timeout, it should increment the following: + /// - timeouts + /// - requests + /// - errors + fn stats_increments_timeouts() { + let stats = Stats::default(); + stats.add_error(StatError::Timeout); + stats.add_error(StatError::Timeout); + stats.add_error(StatError::Timeout); + stats.add_error(StatError::Timeout); + + assert_eq!(stats.errors.load(Ordering::Relaxed), 4); + assert_eq!(stats.requests.load(Ordering::Relaxed), 4); + assert_eq!(stats.timeouts.load(Ordering::Relaxed), 4); + } + + #[test] + /// when Stats::new is called, the value is properly assigned to expected_requests + fn stats_new_sets_expected_requests() { + let stats = Stats::new(42); + assert_eq!(stats.expected_requests.load(Ordering::Relaxed), 42); + } +} diff --git a/src/utils.rs b/src/utils.rs index f8eeb17..4f0dbc7 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,3 +1,7 @@ +use crate::statistics::{ + StatCommand::{self, AddError, AddRequest, AddStatus}, + StatError::{Connection, Other, Redirection, Request, Timeout}, +}; use crate::{ config::{CONFIGURATION, PROGRESS_PRINTER}, FeroxError, FeroxResult, @@ -10,6 +14,7 @@ use rlimit::{getrlimit, setrlimit, Resource, Rlim}; use std::convert::TryInto; use std::sync::{Arc, RwLock}; use std::{fs, io}; +use tokio::sync::mpsc::UnboundedSender; /// Given the path to a file, open the file in append mode (create it if it doesn't exist) and /// return a reference to the file that is buffered and locked @@ -208,7 +213,7 @@ pub fn format_url( // the transforms that occur here will need to keep this in mind, i.e. add a slash to preserve // the current directory sent as part of the url let url = if word.is_empty() { - // v1.0.6: added during --extract-links feature inplementation to support creating urls + // v1.0.6: added during --extract-links feature implementation to support creating urls // that were extracted from response bodies, i.e. http://localhost/some/path/js/main.js url.to_string() } else if !url.ends_with('/') { @@ -262,38 +267,68 @@ pub fn format_url( } } +#[macro_export] +/// wrapper to improve code readability +macro_rules! update_stat { + ($tx:expr, $value:expr) => { + $tx.send($value).unwrap_or_default(); + }; +} + /// Initiate request to the given `Url` using `Client` -pub async fn make_request(client: &Client, url: &Url) -> FeroxResult { +pub async fn make_request( + client: &Client, + url: &Url, + tx_stats: UnboundedSender, +) -> FeroxResult { log::trace!("enter: make_request(CONFIGURATION.Client, {})", url); match client.get(url.to_owned()).send().await { - Ok(resp) => { - log::trace!("exit: make_request -> {:?}", resp); - Ok(resp) - } Err(e) => { + let mut log_level = log::Level::Error; + log::trace!("exit: make_request -> {}", e); - if e.to_string().contains("operation timed out") { + if e.is_timeout() { // only warn for timeouts, while actual errors are still left as errors - log::warn!("Error while making request: {}", e); + log_level = log::Level::Warn; + update_stat!(tx_stats, AddError(Timeout)); } else if e.is_redirect() { if let Some(last_redirect) = e.url() { // get where we were headed (last_redirect) and where we came from (url) let fancy_message = format!("{} !=> {}", url, last_redirect); let report = if let Some(msg_status) = e.status() { + update_stat!(tx_stats, AddStatus(msg_status)); create_report_string(msg_status.as_str(), "-1", "-1", "-1", &fancy_message) } else { create_report_string("UNK", "-1", "-1", "-1", &fancy_message) }; + update_stat!(tx_stats, AddError(Redirection)); + ferox_print(&report, &PROGRESS_PRINTER) }; + } else if e.is_connect() { + update_stat!(tx_stats, AddError(Connection)); + } else if e.is_request() { + update_stat!(tx_stats, AddError(Request)); } else { - log::error!("Error while making request: {}", e); + update_stat!(tx_stats, AddError(Other)); } + + if matches!(log_level, log::Level::Error) { + log::error!("Error while making request: {}", e); + } else { + log::warn!("Error while making request: {}", e); + } + Err(Box::new(e)) } + Ok(resp) => { + log::trace!("exit: make_request -> {:?}", resp); + update_stat!(tx_stats, AddRequest); + Ok(resp) + } } }