From addf867f59efdee5b473d005f641a68b58a2d4ea Mon Sep 17 00:00:00 2001 From: epi Date: Fri, 20 Nov 2020 14:03:23 -0600 Subject: [PATCH] fixed the hanging issue; cleaned up --- src/scan_manager.rs | 128 ++++++++++++++++++++++++++++++-------------- src/scanner.rs | 53 +++++++++--------- 2 files changed, 113 insertions(+), 68 deletions(-) diff --git a/src/scan_manager.rs b/src/scan_manager.rs index 168ee5a..f16bfed 100644 --- a/src/scan_manager.rs +++ b/src/scan_manager.rs @@ -20,6 +20,13 @@ static INTERACTIVE_BARRIER: AtomicUsize = AtomicUsize::new(0); /// Atomic boolean flag, used to determine whether or not a scan should pause or resume pub static PAUSE_SCAN: AtomicBool = AtomicBool::new(false); +/// Simple enum used to flag a `FeroxScan` as likely a directory or file +#[derive(Debug)] +pub enum ScanType { + File, + Directory, +} + /// Struct to hold scan-related state /// /// The purpose of this container is to open up the pathway to aborting currently running tasks and @@ -32,6 +39,9 @@ pub struct FeroxScan { /// The URL that to be scanned pub url: String, + /// The type of scan + pub scan_type: ScanType, + /// Whether or not this scan has completed pub complete: bool, @@ -58,10 +68,11 @@ impl FeroxScan { FeroxScan { id: new_id, + task: None, complete: false, url: String::new(), - task: None, progress_bar: None, + scan_type: ScanType::File, } } @@ -73,17 +84,18 @@ impl FeroxScan { } /// Given a URL and ProgressBar, create a new FeroxScan, wrap it in an Arc and return it - pub fn new(url: &str, pb: ProgressBar) -> Arc> { + pub fn new(url: &str, scan_type: ScanType, pb: Option) -> Arc> { let mut me = Self::default(); me.url = url.to_string(); - me.progress_bar = Some(pb); + me.scan_type = scan_type; + me.progress_bar = pb; + Arc::new(Mutex::new(me)) } /// Mark the scan as complete and stop the scan's progress bar pub fn finish(&mut self) { - PROGRESS_PRINTER.println(format!("{:?} complete? {}", self, self.complete)); self.complete = true; self.stop_progress_bar(); } @@ -187,12 +199,26 @@ impl FeroxScans { None } - /// todo doc + /// Print all FeroxScans of type Directory + /// + /// Example: + /// 0: complete https://10.129.45.20 + /// 9: complete https://10.129.45.20/images + /// 10: complete https://10.129.45.20/assets pub fn display_scans(&self) { if let Ok(scans) = self.scans.lock() { for (i, scan) in scans.iter().enumerate() { - let msg = format!("{:3}: {}", i, scan.lock().unwrap()); - PROGRESS_PRINTER.println(msg); + if let Ok(unlocked_scan) = scan.lock() { + match unlocked_scan.scan_type { + ScanType::Directory => { + PROGRESS_PRINTER.println(format!("{:3}: {}", i, unlocked_scan)); + } + ScanType::File => { + // we're only interested in displaying directory scans, as those are + // the only ones that make sense to be stopped + } + } + } } } } @@ -246,13 +272,20 @@ impl FeroxScans { /// If `FeroxScans` did not already contain the scan, return true; otherwise return false /// /// Also return a reference to the new `FeroxScan` - pub fn add_scan(&self, url: &str) -> (bool, Arc>) { - let progress_bar = - progress::add_bar(&url, NUMBER_OF_REQUESTS.load(Ordering::Relaxed), false); + fn add_scan(&self, url: &str, scan_type: ScanType) -> (bool, Arc>) { + let bar = match scan_type { + ScanType::Directory => { + let progress_bar = + progress::add_bar(&url, NUMBER_OF_REQUESTS.load(Ordering::Relaxed), false); - progress_bar.reset_elapsed(); + progress_bar.reset_elapsed(); - let ferox_scan = FeroxScan::new(&url, progress_bar); + Some(progress_bar) + } + ScanType::File => None, + }; + + let ferox_scan = FeroxScan::new(&url, scan_type, bar); // If the set did not contain the scan, true is returned. // If the set did contain the scan, false is returned. @@ -260,6 +293,24 @@ impl FeroxScans { (response, ferox_scan) } + + /// Given a url, create a new `FeroxScan` and add it to `FeroxScans` as a Directory Scan + /// + /// If `FeroxScans` did not already contain the scan, return true; otherwise return false + /// + /// Also return a reference to the new `FeroxScan` + pub fn add_directory_scan(&self, url: &str) -> (bool, Arc>) { + self.add_scan(&url, ScanType::Directory) + } + + /// Given a url, create a new `FeroxScan` and add it to `FeroxScans` as a File Scan + /// + /// If `FeroxScans` did not already contain the scan, return true; otherwise return false + /// + /// Also return a reference to the new `FeroxScan` + pub fn add_file_scan(&self, url: &str) -> (bool, Arc>) { + self.add_scan(&url, ScanType::File) + } } #[cfg(test)] @@ -268,34 +319,34 @@ mod tests { // todo scanner_pause_scan_with_finished_spinner test need to be redone - #[tokio::test(core_threads = 1)] - /// tests that pause_scan pauses execution and releases execution when PAUSE_SCAN is toggled - /// the spinner used during the test has had .finish_and_clear called on it, meaning that - /// a new one will be created, taking the if branch within the function - async fn scanner_pause_scan_with_finished_spinner() { - let now = time::Instant::now(); - let urls = FeroxScans::default(); - - PAUSE_SCAN.store(true, Ordering::Relaxed); - - let expected = time::Duration::from_secs(2); - - tokio::spawn(async move { - time::delay_for(expected).await; - PAUSE_SCAN.store(false, Ordering::Relaxed); - }); - - urls.pause().await; - - assert!(now.elapsed() > expected); - } + // #[tokio::test(core_threads = 1)] + // /// tests that pause_scan pauses execution and releases execution when PAUSE_SCAN is toggled + // /// the spinner used during the test has had .finish_and_clear called on it, meaning that + // /// a new one will be created, taking the if branch within the function + // async fn scanner_pause_scan_with_finished_spinner() { + // let now = time::Instant::now(); + // let urls = FeroxScans::default(); + // + // PAUSE_SCAN.store(true, Ordering::Relaxed); + // + // let expected = time::Duration::from_secs(2); + // + // tokio::spawn(async move { + // time::delay_for(expected).await; + // PAUSE_SCAN.store(false, Ordering::Relaxed); + // }); + // + // urls.pause().await; + // + // assert!(now.elapsed() > expected); + // } #[test] /// add an unknown url to the hashset, expect true fn add_url_to_list_of_scanned_urls_with_unknown_url() { let urls = FeroxScans::default(); let url = "http://unknown_url"; - let (result, _scan) = urls.add_scan(url); + let (result, _scan) = urls.add_scan(url, ScanType::Directory); assert_eq!(result, true); } @@ -305,11 +356,11 @@ mod tests { let urls = FeroxScans::default(); let pb = ProgressBar::new(1); let url = "http://unknown_url/"; - let scan = FeroxScan::new(url, pb); + let scan = FeroxScan::new(url, ScanType::Directory, Some(pb)); assert_eq!(urls.insert(scan), true); - let (result, _scan) = urls.add_scan(url); + let (result, _scan) = urls.add_scan(url, ScanType::Directory); assert_eq!(result, false); } @@ -318,13 +369,12 @@ mod tests { /// add a known url to the hashset, without a trailing slash, expect false fn add_url_to_list_of_scanned_urls_with_known_url_without_slash() { let urls = FeroxScans::default(); - let pb = ProgressBar::new(1); let url = "http://unknown_url"; - let scan = FeroxScan::new(url, pb); + let scan = FeroxScan::new(url, ScanType::File, None); assert_eq!(urls.insert(scan), true); - let (result, _scan) = urls.add_scan(url); + let (result, _scan) = urls.add_scan(url, ScanType::File); assert_eq!(result, false); } diff --git a/src/scanner.rs b/src/scanner.rs index 2c713dd..f3afe85 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -38,11 +38,7 @@ pub static NUMBER_OF_REQUESTS: AtomicU64 = AtomicU64::new(0); lazy_static! { /// Set of urls that have been sent to [scan_url](fn.scan_url.html), used for deduplication - static ref SCANNED_URLS: FeroxScans = FeroxScans::default(); - - // todo remove if not needed - // /// A clock spinner protected with a RwLock to allow for a single thread to use at a time - // static ref BARRIER: Arc> = Arc::new(RwLock::new(true)); + pub static ref SCANNED_URLS: FeroxScans = FeroxScans::default(); /// Vector of implementors of the FeroxFilter trait static ref FILTERS: Arc>>> = Arc::new(RwLock::new(Vec::>::new())); @@ -110,7 +106,7 @@ fn spawn_recursion_handler( let mut scans = vec![]; while let Some(resp) = recursion_channel.recv().await { - let (unknown, _ferox_scan) = SCANNED_URLS.add_scan(&resp); + let (unknown, _) = SCANNED_URLS.add_directory_scan(&resp); if !unknown { // not unknown, i.e. we've seen the url before and don't need to scan again @@ -382,13 +378,6 @@ async fn make_requests( let new_links = get_links(&ferox_response).await; for new_link in new_links { - let (unknown, _) = SCANNED_URLS.add_scan(&new_link); - - if !unknown { - // not unknown, i.e. we've seen the url before and don't need to scan again - continue; - } - // create a url based on the given command line options, continue on error let new_url = match format_url( &new_link, @@ -401,6 +390,11 @@ async fn make_requests( Err(_) => continue, }; + if SCANNED_URLS.get_scan_by_url(&new_url.to_string()).is_some() { + //we've seen the url before and don't need to scan again + continue; + } + // make the request and store the response let new_response = match make_request(&CONFIGURATION.client, &new_url).await { Ok(resp) => resp, @@ -418,6 +412,8 @@ async fn make_requests( // very likely a file, simply request and report log::debug!("Singular extraction: {}", new_ferox_response); + SCANNED_URLS.add_file_scan(&new_url.to_string()); + send_report(report_chan.clone(), new_ferox_response); continue; @@ -490,23 +486,21 @@ pub async fn scan_url( // this protection allows us to add the first scanned url to SCANNED_URLS // from within the scan_url function instead of the recursion handler - SCANNED_URLS.add_scan(&target_url); + SCANNED_URLS.add_directory_scan(&target_url); } - let ferox_scan = SCANNED_URLS.get_scan_by_url(&target_url); + let ferox_scan = match SCANNED_URLS.get_scan_by_url(&target_url) { + Some(scan) => scan, + None => { + log::error!( + "Could not find FeroxScan associated with {}; this shouldn't happen... exiting", + target_url + ); + return; + } + }; - if ferox_scan.is_none() { - // todo probably remove this, fine for testing for now - log::error!( - "Could not find FeroxScan associated with {}; exiting scan", - target_url - ); - return; - } - - let ferox_scan = ferox_scan.unwrap(); - - // todo unwrap + // todo unwrap, maybe move into the scan impl itself and just manipulate progress bars that way let progress_bar = ferox_scan .lock() .unwrap() @@ -589,8 +583,9 @@ pub async fn scan_url( // drop the current permit so the semaphore will allow another scan to proceed drop(permit); - // todo unwrap - ferox_scan.lock().unwrap().finish(); + if let Ok(mut scan) = ferox_scan.lock() { + scan.finish(); + } // manually drop tx in order for the rx task's while loops to eval to false log::trace!("dropped recursion handler's transmitter");