From 1b9963c96d4c5732c52deb1de69819820bc164ba Mon Sep 17 00:00:00 2001 From: epi Date: Mon, 4 Jan 2021 16:49:40 -0600 Subject: [PATCH] implemented logic for resume_scan with statistics support --- src/config.rs | 28 +++- src/extractor.rs | 24 +++- src/main.rs | 20 +-- src/scan_manager.rs | 18 +-- src/statistics.rs | 344 ++++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 396 insertions(+), 38 deletions(-) diff --git a/src/config.rs b/src/config.rs index 20f665d..ab08318 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,23 +1,23 @@ use crate::{ - utils::{module_colorizer, status_colorizer}, - scan_manager::resume_scan, client, parser, progress::{add_bar, BarType}, - FeroxSerialize, DEFAULT_CONFIG_NAME, DEFAULT_STATUS_CODES, DEFAULT_WORDLIST, VERSION + scan_manager::resume_scan, + utils::{module_colorizer, status_colorizer}, + FeroxSerialize, DEFAULT_CONFIG_NAME, DEFAULT_STATUS_CODES, DEFAULT_WORDLIST, VERSION, }; use clap::{value_t, ArgMatches}; use indicatif::{MultiProgress, ProgressBar, ProgressDrawTarget}; use lazy_static::lazy_static; use reqwest::{Client, StatusCode}; use serde::{Deserialize, Serialize}; +#[cfg(not(test))] +use std::process::exit; use std::{ collections::HashMap, - env::{current_dir, current_exe}, + env::{current_dir, current_exe}, fs::read_to_string, path::PathBuf, }; -#[cfg(not(test))] -use std::process::exit; lazy_static! { /// Global configuration state @@ -228,6 +228,10 @@ pub struct Configuration { #[serde(default)] pub resumed: bool, + /// Resume scan from this file + #[serde(default)] + pub resume_from: String, + /// Whether or not a scan's current state should be saved when user presses Ctrl+C /// /// Not configurable from CLI; can only be set from a config file @@ -329,6 +333,7 @@ impl Default for Configuration { debug_log: String::new(), target_url: String::new(), time_limit: String::new(), + resume_from: String::new(), replay_proxy: String::new(), queries: Vec::new(), extensions: Vec::new(), @@ -522,6 +527,7 @@ impl Configuration { update_config_if_present!(&mut config.output, args, "output", String); update_config_if_present!(&mut config.debug_log, args, "debug_log", String); update_config_if_present!(&mut config.time_limit, args, "time_limit", String); + update_config_if_present!(&mut config.resume_from, args, "resume_from", String); if let Some(arg) = args.values_of("status_codes") { config.status_codes = arg @@ -801,6 +807,7 @@ impl Configuration { update_if_not_default!(&mut conf.scan_limit, new.scan_limit, 0); update_if_not_default!(&mut conf.replay_proxy, new.replay_proxy, ""); update_if_not_default!(&mut conf.debug_log, new.debug_log, ""); + update_if_not_default!(&mut conf.resume_from, new.resume_from, ""); update_if_not_default!(&mut conf.json, new.json, false); update_if_not_default!(&mut conf.timeout, new.timeout, timeout()); @@ -900,6 +907,7 @@ mod tests { time_limit = "10m" output = "/some/otherpath" debug_log = "/yet/anotherpath" + resume_from = "/some/state/file" redirects = true insecure = true extensions = ["html", "php", "js"] @@ -934,6 +942,7 @@ mod tests { assert_eq!(config.proxy, String::new()); assert_eq!(config.target_url, String::new()); assert_eq!(config.time_limit, String::new()); + assert_eq!(config.resume_from, String::new()); assert_eq!(config.debug_log, String::new()); assert_eq!(config.config, String::new()); assert_eq!(config.replay_proxy, String::new()); @@ -1176,6 +1185,13 @@ mod tests { assert_eq!(config.time_limit, "10m"); } + #[test] + /// parse the test config and see that the value parsed is correct + fn config_reads_resume_from() { + let config = setup_config_test(); + assert_eq!(config.resume_from, "/some/state/file"); + } + #[test] /// parse the test config and see that the values parsed are correct fn config_reads_headers() { diff --git a/src/extractor.rs b/src/extractor.rs index 71e3056..6e25982 100644 --- a/src/extractor.rs +++ b/src/extractor.rs @@ -148,8 +148,18 @@ pub async fn get_links( } } + let num_extensions = CONFIGURATION.extensions.len(); + let multiplier = if num_extensions > 0 { + num_extensions + } else { + 1 + }; + update_stat!(tx_stats, UpdateUsizeField(LinksExtracted, links.len())); - update_stat!(tx_stats, UpdateUsizeField(TotalExpected, links.len())); + update_stat!( + tx_stats, + UpdateUsizeField(TotalExpected, links.len() * multiplier) + ); log::trace!("exit: get_links -> {:?}", links); @@ -321,8 +331,18 @@ pub async fn extract_robots_txt( } } + let num_extensions = CONFIGURATION.extensions.len(); + let multiplier = if num_extensions > 0 { + num_extensions + } else { + 1 + }; + update_stat!(tx_stats, UpdateUsizeField(LinksExtracted, links.len())); - update_stat!(tx_stats, UpdateUsizeField(TotalExpected, links.len())); + update_stat!( + tx_stats, + UpdateUsizeField(TotalExpected, links.len() * multiplier) + ); log::trace!("exit: extract_robots_txt -> {:?}", links); links diff --git a/src/main.rs b/src/main.rs index 4842b77..bfe3e41 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,7 +10,7 @@ use feroxbuster::{ scanner::{self, scan_url, send_report, RESPONSES, SCANNED_URLS}, statistics::{ self, - StatCommand::{self, UpdateUsizeField}, + StatCommand::{self, CreateBar, LoadStats, UpdateUsizeField}, StatField::InitialTargets, Stats, }, @@ -143,9 +143,17 @@ async fn scan( // - scanner initialized (this sent expected requests per directory to the stats thread, which // having been set, makes it so the progress bar doesn't flash as full before anything has // even happened - update_stat!(tx_stats, StatCommand::CreateBar); + update_stat!(tx_stats, CreateBar); if CONFIGURATION.resumed { + update_stat!(tx_stats, LoadStats(CONFIGURATION.resume_from.clone())); + + if let Ok(responses) = RESPONSES.responses.read() { + for response in responses.iter() { + PROGRESS_PRINTER.println(response.as_str()); + } + } + if let Ok(scans) = SCANNED_URLS.scans.lock() { for scan in scans.iter() { if let Ok(locked_scan) = scan.lock() { @@ -154,19 +162,13 @@ async fn scan( let pb = add_bar( &locked_scan.url, words.len().try_into().unwrap_or_default(), - BarType::Default, + BarType::Message, ); pb.finish(); } } } } - - if let Ok(responses) = RESPONSES.responses.read() { - for response in responses.iter() { - PROGRESS_PRINTER.println(response.as_str()); - } - } } if CONFIGURATION.extract_links { diff --git a/src/scan_manager.rs b/src/scan_manager.rs index 27c94b8..ef6bb8a 100644 --- a/src/scan_manager.rs +++ b/src/scan_manager.rs @@ -74,7 +74,7 @@ pub struct FeroxScan { pub scan_type: ScanType, /// Number of requests to populate the progress bar with - num_requests: u64, + pub num_requests: u64, /// Whether or not this scan has completed pub complete: bool, @@ -193,6 +193,7 @@ impl Serialize for FeroxScan { state.serialize_field("url", &self.url)?; state.serialize_field("scan_type", &self.scan_type)?; state.serialize_field("complete", &self.complete)?; + state.serialize_field("num_requests", &self.num_requests)?; state.end() } @@ -235,6 +236,11 @@ impl<'de> Deserialize<'de> for FeroxScan { scan.url = url.to_string(); } } + "num_requests" => { + if let Some(num_requests) = value.as_u64() { + scan.num_requests = num_requests; + } + } _ => {} } } @@ -450,13 +456,11 @@ impl FeroxScans { scan_type: ScanType, stats: Arc, ) -> (bool, Arc>) { + let num_requests = stats.expected_per_scan.load(Ordering::Relaxed) as u64; + let bar = match scan_type { ScanType::Directory => { - let progress_bar = add_bar( - &url, - stats.expected_per_scan.load(Ordering::Relaxed) as u64, - BarType::Default, - ); + let progress_bar = add_bar(&url, num_requests, BarType::Default); progress_bar.reset_elapsed(); @@ -465,8 +469,6 @@ impl FeroxScans { ScanType::File => None, }; - let num_requests = stats.expected_per_scan.load(Ordering::Relaxed) as u64; - let ferox_scan = FeroxScan::new(&url, scan_type, num_requests, bar); // If the set did not contain the scan, true is returned. diff --git a/src/statistics.rs b/src/statistics.rs index 98c82fa..fe2021f 100644 --- a/src/statistics.rs +++ b/src/statistics.rs @@ -1,7 +1,4 @@ // todo integration test that hits some/all of the errors in make_request -// todo resume_scan should repopulate statistics if possible or at least update an already existing Stats -// todo logic for determining if tuning is required - use crate::{ config::CONFIGURATION, progress::{add_bar, BarType}, @@ -12,6 +9,8 @@ use console::style; use indicatif::ProgressBar; use reqwest::StatusCode; use serde::{Deserialize, Serialize}; +use std::fs::File; +use std::io::BufReader; use std::sync::{ atomic::{AtomicUsize, Ordering}, Arc, Mutex, @@ -303,10 +302,17 @@ impl Stats { atomic_increment!(self.expected_per_scan, value); } StatField::TotalScans => { + let num_extensions = CONFIGURATION.extensions.len(); + let multiplier = if num_extensions > 0 { + num_extensions + } else { + 1 + }; + atomic_increment!(self.total_scans, value); atomic_increment!( self.total_expected, - value * self.expected_per_scan.load(Ordering::Relaxed) + value * self.expected_per_scan.load(Ordering::Relaxed) * multiplier ); } StatField::TotalExpected => { @@ -328,6 +334,69 @@ impl Stats { StatField::InitialTargets => { atomic_increment!(self.initial_targets, value); } + StatField::Requests => { + atomic_increment!(self.requests, value); + } + StatField::UrlFormatErrors => { + atomic_increment!(self.url_format_errors, value); + } + StatField::Errors => { + atomic_increment!(self.errors, value); + } + StatField::Timeouts => { + atomic_increment!(self.timeouts, value); + } + StatField::Successes => { + atomic_increment!(self.successes, value); + } + StatField::Redirects => { + atomic_increment!(self.redirects, value); + } + StatField::ClientErrors => { + atomic_increment!(self.client_errors, value); + } + StatField::ServerErrors => { + atomic_increment!(self.server_errors, value); + } + StatField::Status403s => { + atomic_increment!(self.status_403s, value); + } + StatField::Status200s => { + atomic_increment!(self.status_200s, value); + } + StatField::Status301s => { + atomic_increment!(self.status_301s, value); + } + StatField::Status302s => { + atomic_increment!(self.status_302s, value); + } + StatField::Status401s => { + atomic_increment!(self.status_401s, value); + } + StatField::Status429s => { + atomic_increment!(self.status_429s, value); + } + StatField::Status500s => { + atomic_increment!(self.status_500s, value); + } + StatField::Status503s => { + atomic_increment!(self.status_503s, value); + } + StatField::Status504s => { + atomic_increment!(self.status_504s, value); + } + StatField::Status508s => { + atomic_increment!(self.status_508s, value); + } + StatField::RedirectionErrors => { + atomic_increment!(self.redirection_errors, value); + } + StatField::ConnectionErrors => { + atomic_increment!(self.connection_errors, value); + } + StatField::RequestErrors => { + atomic_increment!(self.request_errors, value); + } _ => {} // f64 fields } } @@ -382,6 +451,9 @@ pub enum StatCommand { /// Save a `Stats` object to disk using `reporter::get_cached_file_handle` Save, + /// Load a `Stats` object from disk + LoadStats(String), + /// Break out of the (infinite) mpsc receive loop Exit, } @@ -416,6 +488,69 @@ pub enum StatField { /// Translates to `initial_targets` InitialTargets, + /// Translates to `url_format_errors` + UrlFormatErrors, + + /// Translates to `requests` + Requests, + + /// Translates to `errors` + Errors, + + /// Translates to `timeouts` + Timeouts, + + /// Translates to `successes` + Successes, + + /// Translates to `redirects` + Redirects, + + /// Translates to `client_errors` + ClientErrors, + + /// Translates to `server_errors` + ServerErrors, + + /// Translates to `status_403s` + Status403s, + + /// Translates to `status_200s` + Status200s, + + /// Translates to `status_301s` + Status301s, + + /// Translates to `status_302s` + Status302s, + + /// Translates to `status_401s` + Status401s, + + /// Translates to `status_429s` + Status429s, + + /// Translates to `status_500s` + Status500s, + + /// Translates to `status_503s` + Status503s, + + /// Translates to `status_504s` + Status504s, + + /// Translates to `status_508s` + Status508s, + + /// Translates to `redirection_errors` + RedirectionErrors, + + /// Translates to `connection_errors` + ConnectionErrors, + + /// Translates to `request_errors` + RequestErrors, + /// Translates to `directory_scan_times`; assumes a single append to the vector DirScanTimes, } @@ -424,13 +559,15 @@ pub enum StatField { /// /// The consumer simply receives `StatCommands` and updates the given `Stats` object as appropriate pub async fn spawn_statistics_handler( - mut stats_channel: UnboundedReceiver, + mut rx_stats: UnboundedReceiver, stats: Arc, + tx_stats: UnboundedSender, ) { log::trace!( - "enter: spawn_statistics_handler({:?}, {:?})", - stats_channel, - stats + "enter: spawn_statistics_handler({:?}, {:?}, {:?})", + rx_stats, + stats, + tx_stats ); // will be updated later via StatCommand; delay is for banner to print first @@ -438,8 +575,7 @@ pub async fn spawn_statistics_handler( let start = Instant::now(); - while let Some(command) = stats_channel.recv().await { - log::info!("command: {:?}", command); + while let Some(command) = rx_stats.recv().await { match command as StatCommand { StatCommand::AddError(err) => { stats.add_error(err); @@ -465,6 +601,9 @@ pub async fn spawn_statistics_handler( BarType::Total, ); } + StatCommand::LoadStats(filename) => { + load_stats(&filename, tx_stats.clone()); + } StatCommand::Exit => break, } @@ -489,16 +628,195 @@ pub async fn spawn_statistics_handler( log::trace!("exit: spawn_statistics_handler") } +/// Given a `Stats` object, send update directives over the given `StatCommand` transmitter +fn update_stats(stats: Stats, tx_stats: UnboundedSender) { + // total runtime skipped; makes no sense here as the scan has never completed + // expected_per_scan skipped as it's already updated from scanner::initialize + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField(StatField::Timeouts, atomic_load!(stats.timeouts)) + ); + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField(StatField::Requests, atomic_load!(stats.requests)) + ); + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField(StatField::Errors, atomic_load!(stats.errors)) + ); + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField(StatField::Successes, atomic_load!(stats.successes)) + ); + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField(StatField::Redirects, atomic_load!(stats.redirects)) + ); + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField(StatField::ClientErrors, atomic_load!(stats.client_errors)) + ); + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField(StatField::ServerErrors, atomic_load!(stats.server_errors)) + ); + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField( + StatField::LinksExtracted, + atomic_load!(stats.links_extracted) + ) + ); + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField(StatField::Status200s, atomic_load!(stats.status_200s)) + ); + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField(StatField::Status403s, atomic_load!(stats.status_403s)) + ); + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField(StatField::Status301s, atomic_load!(stats.status_301s)) + ); + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField(StatField::Status302s, atomic_load!(stats.status_302s)) + ); + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField(StatField::Status401s, atomic_load!(stats.status_401s)) + ); + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField(StatField::Status429s, atomic_load!(stats.status_429s)) + ); + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField(StatField::Status500s, atomic_load!(stats.status_500s)) + ); + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField(StatField::Status503s, atomic_load!(stats.status_503s)) + ); + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField(StatField::Status504s, atomic_load!(stats.status_504s)) + ); + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField(StatField::Status508s, atomic_load!(stats.status_508s)) + ); + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField( + StatField::WildcardsFiltered, + atomic_load!(stats.wildcards_filtered) + ) + ); + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField( + StatField::ResponsesFiltered, + atomic_load!(stats.responses_filtered) + ) + ); + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField( + StatField::ResourcesDiscovered, + atomic_load!(stats.resources_discovered) + ) + ); + + if let Ok(scan_times) = stats.directory_scan_times.lock() { + for scan_time in scan_times.iter() { + update_stat!( + tx_stats, + StatCommand::UpdateF64Field(StatField::DirScanTimes, *scan_time) + ); + } + } + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField( + StatField::UrlFormatErrors, + atomic_load!(stats.url_format_errors) + ) + ); + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField( + StatField::RedirectionErrors, + atomic_load!(stats.redirection_errors) + ) + ); + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField( + StatField::ConnectionErrors, + atomic_load!(stats.connection_errors) + ) + ); + + update_stat!( + tx_stats, + StatCommand::UpdateUsizeField(StatField::RequestErrors, atomic_load!(stats.request_errors)) + ); +} + +/// Populate a `Stats` object from a json entry written to disk when handling a Ctrl+c +/// +/// This is only ever called when resuming a scan from disk +pub fn load_stats(filename: &str, tx_stats: UnboundedSender) { + if let Ok(file) = File::open(filename) { + let reader = BufReader::new(file); + let state: serde_json::Value = serde_json::from_reader(reader).unwrap(); + + if let Some(state_stats) = state.get("statistics") { + if let Ok(deser_stats) = serde_json::from_value::(state_stats.clone()) { + update_stats(deser_stats, tx_stats); + } + } + } +} + /// Initialize new `Stats` object and the sc side of an mpsc channel that is responsible for /// updates to the aforementioned object. pub fn initialize() -> (Arc, UnboundedSender, JoinHandle<()>) { log::trace!("enter: initialize"); let stats_tracker = Arc::new(Stats::new()); - let cloned = stats_tracker.clone(); + let stats_cloned = stats_tracker.clone(); let (tx_stats, rx_stats): FeroxChannel = mpsc::unbounded_channel(); - let stats_thread = - tokio::spawn(async move { spawn_statistics_handler(rx_stats, cloned).await }); + let tx_stats_cloned = tx_stats.clone(); + let stats_thread = tokio::spawn(async move { + spawn_statistics_handler(rx_stats, stats_cloned, tx_stats_cloned).await + }); log::trace!( "exit: initialize -> ({:?}, {:?}, {:?})",