From d9c99913d3746d2e29f9acc0e106def06d8e349d Mon Sep 17 00:00:00 2001 From: epi Date: Tue, 26 Jan 2021 06:55:17 -0600 Subject: [PATCH] all todo done; wildcard filter default changed to u64::MAX --- src/event_handlers/command.rs | 5 +- src/event_handlers/scans.rs | 52 +++++++---- src/extractor/builder.rs | 82 +++--------------- src/extractor/container.rs | 80 ++++++++--------- src/extractor/mod.rs | 2 +- src/filters/wildcard.rs | 17 +++- src/heuristics.rs | 63 +++++++------- src/main.rs | 41 ++++----- src/scan_manager.rs | 109 ++++++++++++++--------- src/scanner.rs | 157 ++++++++++++---------------------- src/utils.rs | 1 - 11 files changed, 270 insertions(+), 339 deletions(-) diff --git a/src/event_handlers/command.rs b/src/event_handlers/command.rs index 9cfdb9f..544ff8e 100644 --- a/src/event_handlers/command.rs +++ b/src/event_handlers/command.rs @@ -41,14 +41,11 @@ pub enum Command { /// Send a `FeroxResponse` to the output handler for reporting Report(Box), - /// Send a url to be scanned (in the context of recursion), use sender to notify main when done - ScanUrl(String, Sender), - /// Send a group of urls to be scanned (only used for the urls passed in explicitly by the user) ScanInitialUrls(Vec), /// Determine whether or not recursion is appropriate, given a FeroxResponse, if so start a scan - TryRecursion(FeroxResponse), + TryRecursion(Box), /// Send a pointer to the wordlist to the recursion handler UpdateWordlist(Arc>), diff --git a/src/event_handlers/scans.rs b/src/event_handlers/scans.rs index 27c7a29..89039eb 100644 --- a/src/event_handlers/scans.rs +++ b/src/event_handlers/scans.rs @@ -2,7 +2,6 @@ use super::command::Command::UpdateUsizeField; use super::*; use crate::utils::get_url_depth; use crate::{ - config::CONFIGURATION, scan_manager::{FeroxScan, FeroxScans, ScanOrder}, scanner::scan_url, statistics::StatField::TotalScans, @@ -55,6 +54,9 @@ pub struct ScanHandler { /// group of scans that need to be joined tasks: Vec>, + /// Maximum recursion depth, a depth of 0 is infinite recursion + max_depth: usize, + /// depths associated with the initial targets provided by the user depths: Vec<(String, usize)>, } @@ -62,11 +64,17 @@ pub struct ScanHandler { /// implementation of event handler for filters impl ScanHandler { /// create new event handler - pub fn new(data: Arc, handles: Arc, receiver: CommandReceiver) -> Self { + pub fn new( + data: Arc, + handles: Arc, + max_depth: usize, + receiver: CommandReceiver, + ) -> Self { Self { data, handles, receiver, + max_depth, tasks: Vec::new(), depths: Vec::new(), wordlist: std::sync::Mutex::new(None), @@ -84,13 +92,13 @@ impl ScanHandler { /// Initialize new `FeroxScans` and the sc side of an mpsc channel that is responsible for /// updates to the aforementioned object. - pub fn initialize(handles: Arc) -> (Joiner, ScanHandle) { + pub fn initialize(handles: Arc, max_depth: usize) -> (Joiner, ScanHandle) { log::trace!("enter: initialize"); let data = Arc::new(FeroxScans::default()); let (tx, rx): FeroxChannel = mpsc::unbounded_channel(); - let mut handler = Self::new(data.clone(), handles, rx); + let mut handler = Self::new(data.clone(), handles, max_depth, rx); let task = tokio::spawn(async move { handler.start().await }); @@ -109,10 +117,6 @@ impl ScanHandler { while let Some(command) = self.receiver.recv().await { match command { - Command::ScanUrl(url, sender) => { - self.ordered_scan_url(vec![url], ScanOrder::Latest).await?; - sender.send(true).expect("oneshot channel failed"); - } Command::ScanInitialUrls(targets) => { self.ordered_scan_url(targets, ScanOrder::Initial).await?; } @@ -134,6 +138,9 @@ impl ScanHandler { Command::TryRecursion(response) => { self.try_recursion(response).await?; } + Command::Sync(sender) => { + sender.send(true).unwrap_or_default(); + } _ => {} // no other commands needed for RecursionHandler } } @@ -155,21 +162,30 @@ impl ScanHandler { /// wrapper around scanning a url to stay DRY async fn ordered_scan_url(&mut self, targets: Vec, order: ScanOrder) -> Result<()> { - for target in targets { - let (unknown, scan) = self - .data - .add_directory_scan(&target, self.handles.stats.data.clone()); + log::trace!("enter: ordered_scan_url({:?}, {:?})", targets, order); - if !unknown { - // not unknown, i.e. we've seen the url before and don't need to scan again + for target in targets { + if self.data.contains(&target) && matches!(order, ScanOrder::Latest) { + // FeroxScans knows about this url and scan isn't an Initial scan + // initial scans are skipped because when resuming from a .state file, the scans + // will already be populated in FeroxScans, so we need to not skip kicking off + // their scans continue; } + let scan = if let Some(ferox_scan) = self.data.get_scan_by_url(&target) { + ferox_scan // scan already known + } else { + self.data.add_directory_scan(&target, order).1 // add the new target; return FeroxScan + }; + let list = self.get_wordlist()?; log::info!("scan handler received {} - beginning scan", target); if matches!(order, ScanOrder::Initial) { + // keeps track of the initial targets' scan depths in order to enforce the + // maximum recursion depth on any identified sub-directories self.depths.push((target.clone(), get_url_depth(&target))); } @@ -187,13 +203,14 @@ impl ScanHandler { self.tasks.push(scan.clone()); } + + log::trace!("exit: ordered_scan_url"); Ok(()) } - async fn try_recursion(&mut self, response: FeroxResponse) -> Result<()> { + async fn try_recursion(&mut self, response: Box) -> Result<()> { log::trace!("enter: try_recursion({:?})", response,); - // todo get depth from self.depths let mut base_depth = 1_usize; for (base_url, base_url_depth) in &self.depths { @@ -202,8 +219,7 @@ impl ScanHandler { } } - // todo remove CONFIG dependence, maybe in init - if response.reached_max_depth(base_depth, CONFIGURATION.depth) { + if response.reached_max_depth(base_depth, self.max_depth) { // at or past recursion depth return Ok(()); } diff --git a/src/extractor/builder.rs b/src/extractor/builder.rs index 5b78f53..e4b078e 100644 --- a/src/extractor/builder.rs +++ b/src/extractor/builder.rs @@ -1,5 +1,5 @@ use super::*; -use crate::CommandSender; +use crate::event_handlers::Handles; use anyhow::{bail, Result}; /// Regular expression used in [LinkFinder](https://github.com/GerbenJavado/LinkFinder) @@ -31,26 +31,11 @@ pub struct ExtractorBuilder<'a> { /// Response from which to extract links url: String, - /// Whether or not to try recursion + /// current configuration config: Option<&'a Configuration>, - /// transmitter to the mpsc that handles statistics gathering - tx_stats: Option, - - /// transmitter to the mpsc that handles recursive scan calls - tx_recursion: Option, - - /// transmitter to the mpsc that handles reporting information to the user - tx_reporter: Option, - - /// list of urls that will be added to when new urls are extracted - scanned_urls: Option>, - - /// depth at which the scan was started - depth: Option, - - /// copy of Stats object - stats: Option>, + /// Handles object to house the underlying mpsc transmitters + handles: Option>, /// type of extraction to be performed target: ExtractionTarget, @@ -66,12 +51,7 @@ impl<'a> ExtractorBuilder<'a> { response: Some(response), url: "".to_string(), config: None, - tx_stats: None, - tx_recursion: None, - tx_reporter: None, - scanned_urls: None, - depth: None, - stats: None, + handles: None, target: ExtractionTarget::ResponseBody, } } @@ -84,12 +64,7 @@ impl<'a> ExtractorBuilder<'a> { response: None, url: url.to_string(), config: None, - tx_stats: None, - tx_recursion: None, - tx_reporter: None, - scanned_urls: None, - depth: None, - stats: None, + handles: None, target: ExtractionTarget::RobotsTxt, } } @@ -100,41 +75,9 @@ impl<'a> ExtractorBuilder<'a> { self } - /// builder call to set `tx_recursion` - pub fn recursion_transmitter(&mut self, tx_recursion: CommandSender) -> &mut Self { - // todo change to scans_transmitter or w/e same on struct; don't bother, going to make extractor take a Handles object later anyway - self.tx_recursion = Some(tx_recursion); - self - } - - /// builder call to set `tx_stats` - pub fn stats_transmitter(&mut self, tx_stats: CommandSender) -> &mut Self { - self.tx_stats = Some(tx_stats); - self - } - - /// builder call to set `tx_reporter` - pub fn reporter_transmitter(&mut self, tx_reporter: CommandSender) -> &mut Self { - // todo change to outputs or w/e same on struct; don't bother, going to make extractor take a Handles object later anyway - self.tx_reporter = Some(tx_reporter); - self - } - - /// builder call to set `scanned_urls` - pub fn scanned_urls(&mut self, scanned_urls: Arc) -> &mut Self { - self.scanned_urls = Some(scanned_urls); - self - } - - /// builder call to set `stats` - pub fn stats(&mut self, stats: Arc) -> &mut Self { - self.stats = Some(stats); - self - } - - /// builder call to set `depth` - pub fn depth(&mut self, depth: usize) -> &mut Self { - self.depth = Some(depth); + /// builder call to set `handles` + pub fn handles(&mut self, handles: Arc) -> &mut Self { + self.handles = Some(handles); self } @@ -156,12 +99,7 @@ impl<'a> ExtractorBuilder<'a> { }, url: self.url.to_owned(), config: self.config.unwrap(), - tx_stats: self.tx_stats.as_ref().unwrap().clone(), - tx_recursion: self.tx_recursion.as_ref().unwrap().clone(), - tx_reporter: self.tx_reporter.as_ref().unwrap().clone(), - scanned_urls: self.scanned_urls.as_ref().unwrap().clone(), - depth: self.depth.unwrap(), - stats: self.stats.as_ref().unwrap().clone(), + handles: self.handles.as_ref().unwrap().clone(), target: self.target, }) } diff --git a/src/extractor/container.rs b/src/extractor/container.rs index 7e7fb84..25202ee 100644 --- a/src/extractor/container.rs +++ b/src/extractor/container.rs @@ -1,12 +1,12 @@ use super::*; +use crate::event_handlers::Command; +use crate::scan_manager::ScanOrder; use crate::{ client, - event_handlers::Command::UpdateUsizeField, + event_handlers::{Command::UpdateUsizeField, Handles}, scanner::send_report, - send_command, statistics::StatField::{LinksExtracted, TotalExpected}, utils::{format_url, make_request}, - CommandSender, }; use anyhow::{bail, Context, Result}; use reqwest::{StatusCode, Url}; @@ -40,23 +40,8 @@ pub struct Extractor<'a> { /// Whether or not to try recursion pub(super) config: &'a Configuration, - /// transmitter to the mpsc that handles statistics gathering - pub(super) tx_stats: CommandSender, - - /// transmitter to the mpsc that handles recursive scan calls - pub(super) tx_recursion: CommandSender, - - /// transmitter to the mpsc that handles reporting information to the user - pub(super) tx_reporter: CommandSender, - - /// list of urls that will be added to when new urls are extracted - pub(super) scanned_urls: Arc, - - /// depth at which the scan was started - pub(super) depth: usize, - - /// copy of Stats object - pub(super) stats: Arc, + /// Handles object to house the underlying mpsc transmitters + pub(super) handles: Arc, /// type of extraction to be performed pub(super) target: ExtractionTarget, @@ -77,6 +62,8 @@ impl<'a> Extractor<'a> { RecursionStatus::Recursive }; + let scanned_urls = self.handles.ferox_scans()?; + for link in links { let mut resp = match self.request_link(&link).await { Ok(resp) => resp, @@ -84,19 +71,22 @@ impl<'a> Extractor<'a> { }; // filter if necessary - // if should_filter_response(&resp, self.tx_stats.clone()) { - // continue; - // } - // todo this needs to be reimplemented + if self + .handles + .filters + .data + .should_filter_response(&resp, self.handles.stats.tx.clone()) + { + continue; + } if resp.is_file() { // very likely a file, simply request and report log::debug!("Extracted file: {}", resp); - self.scanned_urls - .add_file_scan(&resp.url().to_string(), self.stats.clone()); + scanned_urls.add_file_scan(&resp.url().to_string(), ScanOrder::Latest); - send_report(self.tx_reporter.clone(), resp); + send_report(self.handles.output.tx.clone(), resp); continue; } @@ -118,8 +108,8 @@ impl<'a> Extractor<'a> { resp.set_url(&format!("{}/", resp.url())); } - // try_recursion(&resp, self.depth, self.tx_recursion.clone()).await; - // todo needs to be sent across to scans handler + self.handles + .send_scan_command(Command::TryRecursion(Box::new(resp)))?; } } Ok(()) @@ -177,7 +167,7 @@ impl<'a> Extractor<'a> { } } - self.update_stats(links.len()); + self.update_stats(links.len())?; log::trace!("exit: get_links -> {:?}", links); @@ -296,14 +286,11 @@ impl<'a> Extractor<'a> { self.config.add_slash, &self.config.queries, None, - self.tx_stats.clone(), + self.handles.stats.tx.clone(), )?; + let scanned_urls = self.handles.ferox_scans()?; - if self - .scanned_urls - .get_scan_by_url(&new_url.to_string()) - .is_some() - { + if scanned_urls.get_scan_by_url(&new_url.to_string()).is_some() { //we've seen the url before and don't need to scan again log::trace!("exit: request_link -> None"); bail!("previously seen url"); @@ -311,7 +298,7 @@ impl<'a> Extractor<'a> { // make the request and store the response let new_response = - make_request(&self.config.client, &new_url, self.tx_stats.clone()).await?; + make_request(&self.config.client, &new_url, self.handles.stats.tx.clone()).await?; let new_ferox_response = FeroxResponse::from(new_response, true).await; @@ -345,7 +332,7 @@ impl<'a> Extractor<'a> { } } - self.update_stats(links.len()); + self.update_stats(links.len())?; log::trace!("exit: extract_robots_txt -> {:?}", links); Ok(links) @@ -385,7 +372,7 @@ impl<'a> Extractor<'a> { let mut url = Url::parse(&self.url)?; url.set_path("/robots.txt"); // overwrite existing path with /robots.txt - let response = make_request(&client, &url, self.tx_stats.clone()).await?; + let response = make_request(&client, &url, self.handles.stats.tx.clone()).await?; let ferox_response = FeroxResponse::from(response, true).await; log::trace!("exit: get_robots_file -> {}", ferox_response); @@ -393,13 +380,16 @@ impl<'a> Extractor<'a> { } /// update total number of links extracted and expected responses - fn update_stats(&self, num_links: usize) { + fn update_stats(&self, num_links: usize) -> Result<()> { let multiplier = self.config.extensions.len().max(1); - send_command!(self.tx_stats, UpdateUsizeField(LinksExtracted, num_links)); - send_command!( - self.tx_stats, - UpdateUsizeField(TotalExpected, num_links * multiplier) - ); + self.handles + .stats + .send(UpdateUsizeField(LinksExtracted, num_links))?; + self.handles + .stats + .send(UpdateUsizeField(TotalExpected, num_links * multiplier))?; + + Ok(()) } } diff --git a/src/extractor/mod.rs b/src/extractor/mod.rs index 8ceef62..0231c93 100644 --- a/src/extractor/mod.rs +++ b/src/extractor/mod.rs @@ -8,6 +8,6 @@ pub use self::builder::ExtractionTarget; pub use self::builder::ExtractorBuilder; pub use self::container::Extractor; -use crate::{config::Configuration, scan_manager::FeroxScans, statistics::Stats, FeroxResponse}; +use crate::{config::Configuration, FeroxResponse}; use regex::Regex; use std::sync::Arc; diff --git a/src/filters/wildcard.rs b/src/filters/wildcard.rs index ed46df5..6fa4164 100644 --- a/src/filters/wildcard.rs +++ b/src/filters/wildcard.rs @@ -8,7 +8,7 @@ use super::*; /// /// `size` is size of the response that should be included with filters passed via runtime /// configuration and any static wildcard lengths. -#[derive(Debug, Default, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub struct WildcardFilter { /// size of the response that will later be combined with the length of the path of the url /// requested @@ -18,6 +18,17 @@ pub struct WildcardFilter { pub size: u64, } +/// implement default that populates both values with u64::MAX +impl Default for WildcardFilter { + /// populate both values with u64::MAX + fn default() -> Self { + Self { + size: u64::MAX, + dynamic: u64::MAX, + } + } +} + /// implementation of FeroxFilter for WildcardFilter impl FeroxFilter for WildcardFilter { /// Examine size, dynamic, and content_len to determine whether or not the response received @@ -33,7 +44,7 @@ impl FeroxFilter for WildcardFilter { return false; } - if self.size > 0 && self.size == response.content_length() { + if self.size != u64::MAX && self.size == response.content_length() { // static wildcard size found during testing // size isn't default, size equals response length, and auto-filter is on log::debug!("static wildcard: filtered out {}", response.url()); @@ -41,7 +52,7 @@ impl FeroxFilter for WildcardFilter { return true; } - if self.dynamic > 0 { + if self.dynamic != u64::MAX { // dynamic wildcard offset found during testing // I'm about to manually split this url path instead of using reqwest::Url's diff --git a/src/heuristics.rs b/src/heuristics.rs index ff9cd62..8ca3916 100644 --- a/src/heuristics.rs +++ b/src/heuristics.rs @@ -1,12 +1,14 @@ use crate::{ config::{CONFIGURATION, PROGRESS_PRINTER}, - event_handlers::Command, + event_handlers::{Command, Handles}, filters::WildcardFilter, utils::{ferox_print, format_url, get_url_path_length, make_request, status_colorizer}, - CommandSender, FeroxResponse, + FeroxResponse, }; +use anyhow::Result; use console::style; use indicatif::ProgressBar; +use std::sync::Arc; use tokio::sync::mpsc::UnboundedSender; use uuid::Uuid; @@ -39,31 +41,22 @@ fn unique_string(length: usize) -> String { pub async fn wildcard_test( target_url: &str, bar: ProgressBar, - tx_term: CommandSender, - tx_stats: CommandSender, -) -> Option { + handles: Arc, +) -> Result<()> { log::trace!( - "enter: wildcard_test({:?}, {:?}, {:?}, {:?})", + "enter: wildcard_test({:?}, {:?}, {:?})", target_url, bar, - tx_term, - tx_stats + handles, ); if CONFIGURATION.dont_filter { // early return, dont_filter scans don't need tested log::trace!("exit: wildcard_test -> None"); - return None; + return Ok(()); } - let tx_term_mwcr1 = tx_term.clone(); - let tx_term_mwcr2 = tx_term.clone(); - let tx_stats_mwcr1 = tx_stats.clone(); - let tx_stats_mwcr2 = tx_stats.clone(); - - if let Some(ferox_response) = - make_wildcard_request(&target_url, 1, tx_term_mwcr1, tx_stats_mwcr1).await - { + if let Some(ferox_response) = make_wildcard_request(&target_url, 1, handles.clone()).await { bar.inc(1); // found a wildcard response @@ -73,14 +66,15 @@ pub async fn wildcard_test( if wc_length == 0 { log::trace!("exit: wildcard_test -> Some({:?})", wildcard); - return Some(wildcard); + handles + .filters + .send(Command::AddFilter(Box::new(wildcard)))?; + return Ok(()); } // content length of wildcard is non-zero, perform additional tests: // make a second request, with a known-sized (64) longer request - if let Some(resp_two) = - make_wildcard_request(&target_url, 3, tx_term_mwcr2, tx_stats_mwcr2).await - { + if let Some(resp_two) = make_wildcard_request(&target_url, 3, handles.clone()).await { bar.inc(1); let wc2_length = resp_two.content_length(); @@ -129,11 +123,14 @@ pub async fn wildcard_test( } log::trace!("exit: wildcard_test -> Some({:?})", wildcard); - return Some(wildcard); + handles + .filters + .send(Command::AddFilter(Box::new(wildcard)))?; + return Ok(()); } log::trace!("exit: wildcard_test -> None"); - None + Ok(()) } /// Generates a uuid and appends it to the given target url. The reasoning is that the randomly @@ -145,15 +142,13 @@ pub async fn wildcard_test( async fn make_wildcard_request( target_url: &str, length: usize, - tx_term: CommandSender, - tx_stats: CommandSender, + handles: Arc, ) -> Option { log::trace!( - "enter: make_wildcard_request({}, {}, {:?}, {:?})", + "enter: make_wildcard_request({}, {}, {:?})", target_url, length, - tx_term, - tx_stats, + handles ); let unique_str = unique_string(length); @@ -164,7 +159,7 @@ async fn make_wildcard_request( CONFIGURATION.add_slash, &CONFIGURATION.queries, None, - tx_stats.clone(), + handles.stats.tx.clone(), ) { Ok(url) => url, Err(e) => { @@ -177,7 +172,7 @@ async fn make_wildcard_request( match make_request( &CONFIGURATION.client, &nonexistent.to_owned(), - tx_stats.clone(), + handles.stats.tx.clone(), ) .await { @@ -191,8 +186,12 @@ async fn make_wildcard_request( ferox_response.wildcard = true; if !CONFIGURATION.quiet - // && !should_filter_response(&ferox_response, tx_stats.clone()) // todo this needs to be reimplemented - && tx_term + && !handles + .filters + .data + .should_filter_response(&ferox_response, handles.stats.tx.clone()) + && handles + .output .send(Command::Report(Box::new(ferox_response.clone()))) .is_err() { diff --git a/src/main.rs b/src/main.rs index 547d9af..9030b11 100644 --- a/src/main.rs +++ b/src/main.rs @@ -130,13 +130,12 @@ async fn scan(targets: Vec, handles: Arc) -> Result<()> { handles.stats.sync().await?; if CONFIGURATION.resumed { - let from_here = CONFIGURATION.resume_from.clone(); - handles.stats.send(LoadStats(from_here))?; - + // display what has already been completed scanned_urls.print_known_responses(); scanned_urls.print_completed_bars(words.len())?; } + log::debug!("sending {:?} to be scanned as initial targets", targets); handles.send_scan_command(ScanInitialUrls(targets))?; log::trace!("exit: scan"); @@ -146,7 +145,7 @@ async fn scan(targets: Vec, handles: Arc) -> Result<()> { /// Get targets from either commandline or stdin, pass them back to the caller as a Result async fn get_targets(handles: Arc) -> Result> { - log::trace!("enter: get_targets"); + log::trace!("enter: get_targets({:?})", handles); let mut targets = vec![]; @@ -166,7 +165,7 @@ async fn get_targets(handles: Arc) -> Result> { if let Ok(scans) = ferox_scans.scans.read() { for scan in scans.iter() { - // SCANNED_URLS gets deserialized scans added to it at program start if --resume-from + // ferox_scans gets deserialized scans added to it at program start if --resume-from // is used, so scans that aren't marked complete still need to be scanned if scan.is_complete() { // this one's already done, ignore it @@ -210,7 +209,7 @@ async fn wrapped_main() -> Result<()> { // bundle up all the disparate handles and JoinHandles (tasks) let handles = Arc::new(Handles::new(stats_handle, filters_handle, out_handle)); - let (scan_task, scan_handle) = ScanHandler::initialize(handles.clone()); + let (scan_task, scan_handle) = ScanHandler::initialize(handles.clone(), CONFIGURATION.depth); handles.scan_handle(scan_handle); // set's the ScanHandle after Handles initialization @@ -220,11 +219,9 @@ async fn wrapped_main() -> Result<()> { if !CONFIGURATION.time_limit.is_empty() { // --time-limit value not an empty string, need to kick off the thread that enforces // the limit - - let max_time_stats = handles.stats.data.clone(); - + let time_handles = handles.clone(); tokio::spawn(async move { - scan_manager::start_max_time_thread(&CONFIGURATION.time_limit, max_time_stats).await + scan_manager::start_max_time_thread(&CONFIGURATION.time_limit, time_handles).await }); } @@ -238,11 +235,21 @@ async fn wrapped_main() -> Result<()> { if CONFIGURATION.save_state { // start the ctrl+c handler - scan_manager::initialize(handles.stats.data.clone()); + scan_manager::initialize(handles.clone()); + } + + if CONFIGURATION.resumed { + let scanned_urls = handles.ferox_scans()?; + let from_here = CONFIGURATION.resume_from.clone(); + + // populate FeroxScans object with previously seen scans + scanned_urls.add_serialized_scans(&from_here)?; + + // populate Stats object with previously known statistics + handles.stats.send(LoadStats(from_here))?; } // get targets from command line or stdin - // todo a bunch of fucking functions needs SCANNED_URLS replaced let targets = match get_targets(handles.clone()).await { Ok(t) => t, Err(e) => { @@ -259,9 +266,9 @@ async fn wrapped_main() -> Result<()> { let mut banner = Banner::new(&targets, &CONFIGURATION); // only interested in the side-effect that sets banner.update_status - let _ = banner + banner .check_for_updates(&CONFIGURATION.client, UPDATE_URL, handles.stats.tx.clone()) - .await; + .await?; if banner.print_to(std_stderr, &CONFIGURATION).is_err() { clean_up(handles, tasks).await?; @@ -288,7 +295,6 @@ async fn wrapped_main() -> Result<()> { } // kick off a scan against any targets determined to be responsive - match scan(live_targets, handles.clone()).await { Ok(_) => {} Err(e) => { @@ -297,11 +303,6 @@ async fn wrapped_main() -> Result<()> { } } - // todo known things not working: overall bar lags behind other bars (seems ok, keep an eye on it during this branch) - // todo known things not working: confirm multi target from stdin works - // todo known things not working: confirm same # of requests seen in burp as reported - // todo known things not working: scan cancel menu is hard fkn broke - clean_up(handles, tasks).await?; log::trace!("exit: wrapped_main"); diff --git a/src/scan_manager.rs b/src/scan_manager.rs index 3ac570b..5a84e6a 100644 --- a/src/scan_manager.rs +++ b/src/scan_manager.rs @@ -26,13 +26,14 @@ use tokio::{ }; use uuid::Uuid; +use crate::event_handlers::Handles; use crate::utils::fmt_err; use crate::utils::write_to; use crate::{ config::{Configuration, CONFIGURATION, PROGRESS_BAR, PROGRESS_PRINTER}, parser::TIMESPEC_REGEX, progress::{add_bar, BarType}, - scanner::{RESPONSES, SCANNED_URLS}, + scanner::RESPONSES, statistics::Stats, utils::open_file, FeroxResponse, FeroxSerialize, SLEEP_DURATION, @@ -90,6 +91,9 @@ pub struct FeroxScan { /// The type of scan pub scan_type: ScanType, + /// The order in which the scan was received + pub scan_order: ScanOrder, + /// Number of requests to populate the progress bar with pub num_requests: u64, @@ -114,6 +118,7 @@ impl Default for FeroxScan { task: sync::Mutex::new(None), // tokio mutex status: Mutex::new(ScanStatus::default()), num_requests: 0, + scan_order: ScanOrder::Latest, url: String::new(), progress_bar: Mutex::new(None), scan_type: ScanType::File, @@ -190,12 +195,14 @@ impl FeroxScan { pub fn new( url: &str, scan_type: ScanType, + scan_order: ScanOrder, num_requests: u64, pb: Option, ) -> Arc { Arc::new(Self { url: url.to_string(), scan_type, + scan_order, num_requests, progress_bar: Mutex::new(pb), ..Default::default() @@ -232,7 +239,7 @@ impl FeroxScan { /// await a task's completion, similar to a thread's join; perform necessary bookkeeping pub async fn join(&self) { - log::trace!("enter join({:?})", self); + log::debug!("enter join({:?})", self); let mut guard = self.task.lock().await; if guard.is_some() { @@ -243,7 +250,7 @@ impl FeroxScan { } } - log::trace!("exit join({:?})", self); + // log::trace!("exit join({:?})", self); } } @@ -511,6 +518,10 @@ pub struct FeroxScans { /// menu used for providing a way for users to cancel a scan menu: Menu, + + /// number of requests expected per scan (mirrors the same on Stats); used for initializing + /// progress bars and feroxscans + bar_length: Mutex, } /// Serialize implementation for FeroxScans @@ -565,6 +576,31 @@ impl FeroxScans { sentry } + /// load serialized FeroxScan(s) into this FeroxScans + pub fn add_serialized_scans(&self, filename: &str) -> Result<()> { + log::trace!("enter: add_serialized_scans({})", filename); + let file = File::open(filename)?; + + let reader = BufReader::new(file); + let state: serde_json::Value = serde_json::from_reader(reader)?; + + if let Some(scans) = state.get("scans") { + if let Some(arr_scans) = scans.as_array() { + for scan in arr_scans { + let deser_scan: FeroxScan = + serde_json::from_value(scan.clone()).unwrap_or_default(); + // need to determine if it's complete and based on that create a progress bar + // populate it accordingly based on completion + log::debug!("added: {}", deser_scan); + self.insert(Arc::new(deser_scan)); + } + } + } + + log::trace!("exit: add_serialized_scans"); + Ok(()) + } + /// Simple check for whether or not a FeroxScan is contained within the inner container based /// on the given URL pub fn contains(&self, url: &str) -> bool { @@ -613,13 +649,10 @@ impl FeroxScans { }; for (i, scan) in scans.iter().enumerate() { - if scan.task.lock().await.is_none() { - // no JoinHandle associated with this FeroxScan, meaning it was an original - // target passed in via either -u or --stdin - // todo check this assumption, as we swap out the task with None once joined + if matches!(scan.scan_order, ScanOrder::Initial) || scan.task.try_lock().is_err() { + // original target passed in via either -u or --stdin continue; } - self.menu.println(&format!("fdaf {}", scan)); if matches!(scan.scan_type, ScanType::Directory) { // we're only interested in displaying directory scans, as those are @@ -749,6 +782,13 @@ impl FeroxScans { } } + /// set the bar length of FeroxScans + pub fn set_bar_length(&self, bar_length: u64) { + if let Ok(mut guard) = self.bar_length.lock() { + *guard = bar_length; + } + } + /// Given a url, create a new `FeroxScan` and add it to `FeroxScans` /// /// If `FeroxScans` did not already contain the scan, return true; otherwise return false @@ -758,14 +798,17 @@ impl FeroxScans { &self, url: &str, scan_type: ScanType, - stats: Arc, + scan_order: ScanOrder, ) -> (bool, Arc) { - // todo eventually this should live on the struct and remove need ofr stats being passed in - let num_requests = stats.expected_per_scan() as u64; + let bar_length = if let Ok(guard) = self.bar_length.lock() { + *guard + } else { + 0 + }; let bar = match scan_type { ScanType::Directory => { - let progress_bar = add_bar(&url, num_requests, BarType::Default); + let progress_bar = add_bar(&url, bar_length, BarType::Default); progress_bar.reset_elapsed(); @@ -774,7 +817,7 @@ impl FeroxScans { ScanType::File => None, }; - let ferox_scan = FeroxScan::new(&url, scan_type, num_requests, bar); + let ferox_scan = FeroxScan::new(&url, scan_type, scan_order, bar_length, bar); // If the set did not contain the scan, true is returned. // If the set did contain the scan, false is returned. @@ -788,8 +831,8 @@ impl FeroxScans { /// If `FeroxScans` did not already contain the scan, return true; otherwise return false /// /// Also return a reference to the new `FeroxScan` - pub fn add_directory_scan(&self, url: &str, stats: Arc) -> (bool, Arc) { - self.add_scan(&url, ScanType::Directory, stats) + pub fn add_directory_scan(&self, url: &str, scan_order: ScanOrder) -> (bool, Arc) { + self.add_scan(&url, ScanType::Directory, scan_order) } /// Given a url, create a new `FeroxScan` and add it to `FeroxScans` as a File Scan @@ -797,8 +840,8 @@ impl FeroxScans { /// If `FeroxScans` did not already contain the scan, return true; otherwise return false /// /// Also return a reference to the new `FeroxScan` - pub fn add_file_scan(&self, url: &str, stats: Arc) -> (bool, Arc) { - self.add_scan(&url, ScanType::File, stats) + pub fn add_file_scan(&self, url: &str, scan_order: ScanOrder) -> (bool, Arc) { + self.add_scan(&url, ScanType::File, scan_order) } pub fn has_active_scans(&self) -> bool { @@ -893,7 +936,7 @@ impl FeroxResponses { #[derive(Serialize, Debug)] pub struct FeroxState { /// Known scans - scans: &'static FeroxScans, + scans: Arc, /// Current running config config: &'static Configuration, @@ -923,7 +966,7 @@ impl FeroxSerialize for FeroxState { /// that representation to seconds and then wait for those seconds to elapse. Once that period /// of time has elapsed, kill all currently running scans and dump a state file to disk that can /// be used to resume any unfinished scan. -pub async fn start_max_time_thread(time_spec: &str, stats: Arc) { +pub async fn start_max_time_thread(time_spec: &str, handles: Arc) { log::trace!("enter: start_max_time_thread({})", time_spec); // as this function has already made it through the parser, which calls is_match on @@ -955,7 +998,7 @@ pub async fn start_max_time_thread(time_spec: &str, stats: Arc) { #[cfg(test)] panic!(stats); #[cfg(not(test))] - let _ = sigint_handler(stats); + let _ = sigint_handler(handles); } log::error!( @@ -965,8 +1008,8 @@ pub async fn start_max_time_thread(time_spec: &str, stats: Arc) { } /// Writes the current state of the program to disk (if save_state is true) and then exits -fn sigint_handler(stats: Arc) -> Result<()> { - log::trace!("enter: sigint_handler({:?})", stats); +fn sigint_handler(handles: Arc) -> Result<()> { + log::trace!("enter: sigint_handler({:?})", handles); let ts = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs(); @@ -993,9 +1036,9 @@ fn sigint_handler(stats: Arc) -> Result<()> { let state = FeroxState { config: &CONFIGURATION, - scans: &SCANNED_URLS, + scans: handles.ferox_scans()?, responses: &RESPONSES, - statistics: stats, + statistics: handles.stats.data.clone(), }; let state_file = open_file(&filename); @@ -1008,11 +1051,11 @@ fn sigint_handler(stats: Arc) -> Result<()> { } /// Initialize the ctrl+c handler that saves scan state to disk -pub fn initialize(stats: Arc) { - log::trace!("enter: initialize({:?})", stats); +pub fn initialize(handles: Arc) { + log::trace!("enter: initialize({:?})", handles); let result = ctrlc::set_handler(move || { - let _ = sigint_handler(stats.clone()); + let _ = sigint_handler(handles.clone()); }); if result.is_err() { @@ -1058,18 +1101,6 @@ pub fn resume_scan(filename: &str) -> Configuration { } } - if let Some(scans) = state.get("scans") { - if let Some(arr_scans) = scans.as_array() { - for scan in arr_scans { - let deser_scan: FeroxScan = - serde_json::from_value(scan.clone()).unwrap_or_default(); - // need to determine if it's complete and based on that create a progress bar - // populate it accordingly based on completion - SCANNED_URLS.insert(Arc::new(deser_scan)); - } - } - } - log::trace!("exit: resume_scan -> {:?}", config); config } diff --git a/src/scanner.rs b/src/scanner.rs index 8248295..8b91eb1 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -6,14 +6,12 @@ use crate::{ }, extractor::ExtractorBuilder, filters::{ - LinesFilter, RegexFilter, SimilarityFilter, SizeFilter, StatusCodeFilter, WildcardFilter, - WordsFilter, + LinesFilter, RegexFilter, SimilarityFilter, SizeFilter, StatusCodeFilter, WordsFilter, }, heuristics, - scan_manager::{FeroxResponses, FeroxScans, ScanOrder, ScanStatus, PAUSE_SCAN}, + scan_manager::{FeroxResponses, ScanOrder, ScanStatus, PAUSE_SCAN}, statistics::StatField::{DirScanTimes, ExpectedPerScan}, - traits::FeroxFilter, - utils::{fmt_err, format_url, get_url_depth, make_request}, + utils::{fmt_err, format_url, make_request}, CommandSender, FeroxResponse, SIMILARITY_THRESHOLD, }; use anyhow::{bail, Result}; @@ -25,22 +23,12 @@ use reqwest::Url; #[cfg(not(test))] use std::process::exit; use std::{ - collections::HashSet, - convert::TryInto, - ops::Deref, - sync::atomic::Ordering, - sync::{Arc, RwLock}, + collections::HashSet, convert::TryInto, ops::Deref, sync::atomic::Ordering, sync::Arc, time::Instant, }; use tokio::sync::{mpsc::UnboundedSender, Semaphore}; lazy_static! { - /// Set of urls that have been sent to [scan_url](fn.scan_url.html), used for deduplication - pub static ref SCANNED_URLS: FeroxScans = FeroxScans::default(); - - /// Vector of implementors of the FeroxFilter trait - static ref FILTERS: Arc>>> = Arc::new(RwLock::new(Vec::>::new())); - /// Vector of FeroxResponse objects pub static ref RESPONSES: FeroxResponses = FeroxResponses::default(); @@ -105,12 +93,11 @@ fn create_urls( /// Makes multiple requests based on the presence of extensions /// /// Attempts recursion when appropriate and sends Responses to the output handler for processing -async fn make_requests(target_url: &str, word: &str, base_depth: usize, handles: Arc) { +async fn make_requests(target_url: &str, word: &str, handles: Arc) -> Result<()> { log::trace!( - "enter: make_requests({}, {}, {}, {:?})", + "enter: make_requests({}, {}, {:?})", target_url, word, - base_depth, handles ); @@ -121,56 +108,42 @@ async fn make_requests(target_url: &str, word: &str, base_depth: usize, handles: handles.stats.tx.clone(), ); - let scanned_urls = handles.ferox_scans().expect("Could not get FeroxScans"); - // todo abstract away, and by that i mean that extractor and try_recursion should either take - // Handles or be put into a struct somewhere - let tx_scans = handles.scans.read().unwrap().as_ref().unwrap().tx.clone(); - for url in urls { - if let Ok(response) = - make_request(&CONFIGURATION.client, &url, handles.stats.tx.clone()).await - { - // response came back without error, convert it to FeroxResponse - let ferox_response = FeroxResponse::from(response, true).await; + let response = make_request(&CONFIGURATION.client, &url, handles.stats.tx.clone()).await?; - // do recursion if appropriate - if !CONFIGURATION.no_recursion { - tx_scans - .send(Command::TryRecursion(ferox_response.clone())) - .unwrap_or_else(|e| log::warn!("Could not send {} for recursion: {}", url, e)); - } + // response came back without error, convert it to FeroxResponse + let ferox_response = FeroxResponse::from(response, true).await; - // purposefully doing recursion before filtering. the thought process is that - // even though this particular url is filtered, subsequent urls may not - if handles - .filters - .data - .should_filter_response(&ferox_response, handles.stats.tx.clone()) - { - continue; - } - - if CONFIGURATION.extract_links && !ferox_response.status().is_redirection() { - // todo extractor should probably just take Handles - let extractor = ExtractorBuilder::with_response(&ferox_response) - .depth(base_depth) - .config(&CONFIGURATION) - .recursion_transmitter(tx_scans.clone()) - .stats_transmitter(handles.stats.tx.clone()) - .reporter_transmitter(handles.output.tx.clone()) - .scanned_urls(scanned_urls.clone()) - .stats(handles.stats.data.clone()) - .build() - .unwrap(); // todo change once this function returns Result - - let _ = extractor.extract().await; - } - - // everything else should be reported - send_report(handles.output.tx.clone(), ferox_response); + // do recursion if appropriate + if !CONFIGURATION.no_recursion { + handles.send_scan_command(Command::TryRecursion(Box::new(ferox_response.clone())))?; } + + // purposefully doing recursion before filtering. the thought process is that + // even though this particular url is filtered, subsequent urls may not + if handles + .filters + .data + .should_filter_response(&ferox_response, handles.stats.tx.clone()) + { + continue; + } + + if CONFIGURATION.extract_links && !ferox_response.status().is_redirection() { + let extractor = ExtractorBuilder::with_response(&ferox_response) + .config(&CONFIGURATION) + .handles(handles.clone()) + .build()?; + + extractor.extract().await?; + } + + // everything else should be reported + send_report(handles.output.tx.clone(), ferox_response); } + log::trace!("exit: make_requests"); + Ok(()) } /// Simple helper to send a `FeroxResponse` over the tx side of an `mpsc::unbounded_channel` @@ -204,8 +177,6 @@ pub async fn scan_url( handles ); - let depth = get_url_depth(&target_url); // todo - log::info!("Starting scan against: {}", target_url); let scan_timer = Instant::now(); @@ -213,24 +184,17 @@ pub async fn scan_url( if matches!(order, ScanOrder::Initial) && CONFIGURATION.extract_links { // only grab robots.txt on the initial scan_url calls. all fresh dirs will be passed // to try_recursion - // todo Extractor should just take Handles let extractor = ExtractorBuilder::with_url(target_url) - .depth(depth) .config(&CONFIGURATION) - // todo abstract the call here, or just leave it til i put handles in extractor instead - .recursion_transmitter(handles.scans.read().unwrap().as_ref().unwrap().tx.clone()) - .stats_transmitter(handles.stats.tx.clone()) - .reporter_transmitter(handles.output.tx.clone()) - .scanned_urls(handles.ferox_scans()?) - .stats(handles.stats.data.clone()) + .handles(handles.clone()) .build()?; let _ = extractor.extract().await; } - let ferox_scans = handles.ferox_scans()?; + let scanned_urls = handles.ferox_scans()?; - let ferox_scan = match ferox_scans.get_scan_by_url(&target_url) { + let ferox_scan = match scanned_urls.get_scan_by_url(&target_url) { Some(scan) => { scan.set_status(ScanStatus::Running)?; scan @@ -251,33 +215,12 @@ pub async fn scan_url( // waits until an outstanding permit is dropped. At this point, the freed permit is assigned // to the caller. let permit = SCAN_LIMITER.acquire().await; - // todo can be moved to scan handler, just acquire before calling scan // Arc clones to be passed around to the various scans let wildcard_bar = progress_bar.clone(); let looping_words = wordlist.clone(); - // add any wildcard filters to `FILTERS` - // todo if you want to remove the 0-based skipping of wildcards, this needs addressed - // todo wildcard_test should take handles probably? idk, could see tradeoff between memsize - // of two clones vs the handles clone - - // todo should take handles - let filter = match heuristics::wildcard_test( - &target_url, - wildcard_bar, - handles.output.tx.clone(), - handles.stats.tx.clone(), - ) - .await - { - Some(f) => Box::new(f), - None => Box::new(WildcardFilter::default()), - }; - - handles.filters.send(AddFilter(filter))?; - - let scanned_urls = handles.ferox_scans()?; + heuristics::wildcard_test(&target_url, wildcard_bar, handles.clone()).await?; // producer tasks (mp of mpsc); responsible for making requests let producers = stream::iter(looping_words.deref().to_owned()) @@ -294,7 +237,7 @@ pub async fn scan_url( // to false scanned_urls_clone.pause(true).await; } - make_requests(&tgt, &word, depth, handles_clone).await + make_requests(&tgt, &word, handles_clone).await }), pb, ) @@ -352,13 +295,19 @@ pub async fn initialize( total.try_into()? }; + { + // no real reason to keep the arc around beyond this call + let scans = handles.ferox_scans()?; + scans.set_bar_length(num_reqs_expected); + } + // tell Stats object about the number of expected requests handles.stats.send(UpdateUsizeField( ExpectedPerScan, num_reqs_expected as usize, ))?; - // add any status code filters to `FILTERS` (-C|--filter-status) + // add any status code filters to filters handler's FeroxFilters (-C|--filter-status) for code_filter in &config.filter_status { let filter = StatusCodeFilter { filter_code: *code_filter, @@ -367,7 +316,7 @@ pub async fn initialize( handles.filters.send(AddFilter(boxed_filter))?; } - // add any line count filters to `FILTERS` (-N|--filter-lines) + // add any line count filters to filters handler's FeroxFilters (-N|--filter-lines) for lines_filter in &config.filter_line_count { let filter = LinesFilter { line_count: *lines_filter, @@ -376,7 +325,7 @@ pub async fn initialize( handles.filters.send(AddFilter(boxed_filter))?; } - // add any line count filters to `FILTERS` (-W|--filter-words) + // add any line count filters to filters handler's FeroxFilters (-W|--filter-words) for words_filter in &config.filter_word_count { let filter = WordsFilter { word_count: *words_filter, @@ -385,7 +334,7 @@ pub async fn initialize( handles.filters.send(AddFilter(boxed_filter))?; } - // add any line count filters to `FILTERS` (-S|--filter-size) + // add any line count filters to filters handler's FeroxFilters (-S|--filter-size) for size_filter in &config.filter_size { let filter = SizeFilter { content_length: *size_filter, @@ -394,7 +343,7 @@ pub async fn initialize( handles.filters.send(AddFilter(boxed_filter))?; } - // add any regex filters to `FILTERS` (-X|--filter-regex) + // add any regex filters to filters handler's FeroxFilters (-X|--filter-regex) for regex_filter in &config.filter_regex { let raw = regex_filter; let compiled = match Regex::new(&raw) { @@ -416,7 +365,7 @@ pub async fn initialize( handles.filters.send(AddFilter(boxed_filter))?; } - // add any similarity filters to `FILTERS` (--filter-similar-to) + // add any similarity filters to filters handler's FeroxFilters (--filter-similar-to) for similarity_filter in &config.filter_similar { // url as-is based on input, ignores user-specified url manipulation options (add-slash etc) if let Ok(url) = format_url( diff --git a/src/utils.rs b/src/utils.rs index 210f4c2..1fbfb81 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -49,7 +49,6 @@ pub fn open_file(filename: &str) -> Result> { /// /// returns 0 on error and relative urls pub fn get_url_depth(target: &str) -> usize { - // todo move ot scanner struct (i.e. once scanner or scan_manager is rewritten) log::trace!("enter: get_url_depth({})", target); let target = normalize_url(target);