From 692db93048e23bc8db7561f01aa35ed9e2ea3461 Mon Sep 17 00:00:00 2001 From: epi Date: Wed, 19 Apr 2023 06:57:36 -0500 Subject: [PATCH] clippy/tests and added logic to wait for link extraction if done --- src/extractor/tests.rs | 17 +++++++++-------- src/heuristics.rs | 6 +++--- src/progress.rs | 8 +++----- src/scan_manager/tests.rs | 8 ++++---- src/scanner/ferox_scanner.rs | 15 +++++++++++++-- src/scanner/requester.rs | 7 ++++++- 6 files changed, 38 insertions(+), 23 deletions(-) diff --git a/src/extractor/tests.rs b/src/extractor/tests.rs index 50bcad2..68221f5 100644 --- a/src/extractor/tests.rs +++ b/src/extractor/tests.rs @@ -1,4 +1,5 @@ use super::builder::{LINKFINDER_REGEX, ROBOTS_TXT_REGEX, URL_CHARS_REGEX}; +use super::container::request_link; use super::*; use crate::config::{Configuration, OutputLevel}; use crate::scan_manager::ScanOrder; @@ -360,13 +361,13 @@ async fn request_link_happy_path() -> Result<()> { then.status(200).body("this is a test"); }); - let r_resp = ROBOTS_EXT.request_link(&srv.url("/login.php")).await?; - let b_resp = BODY_EXT.request_link(&srv.url("/login.php")).await?; + let r_resp = request_link(&srv.url("/login.php"), ROBOTS_EXT.handles.clone()).await?; + let b_resp = request_link(&srv.url("/login.php"), BODY_EXT.handles.clone()).await?; - assert!(matches!(r_resp.status(), &StatusCode::OK)); - assert!(matches!(b_resp.status(), &StatusCode::OK)); - assert_eq!(r_resp.content_length(), 14); - assert_eq!(b_resp.content_length(), 14); + assert!(matches!(r_resp.status(), StatusCode::OK)); + assert!(matches!(b_resp.status(), StatusCode::OK)); + assert_eq!(r_resp.content_length().unwrap(), 14); + assert_eq!(b_resp.content_length().unwrap(), 14); assert_eq!(mock.hits(), 2); Ok(()) } @@ -390,8 +391,8 @@ async fn request_link_bails_on_seen_url() -> Result<()> { let robots = setup_extractor(ExtractionTarget::RobotsTxt, scans.clone()); let body = setup_extractor(ExtractionTarget::ResponseBody, scans); - let r_resp = robots.request_link(&served).await; - let b_resp = body.request_link(&served).await; + let r_resp = request_link(&served, robots.handles.clone()).await; + let b_resp = request_link(&served, body.handles.clone()).await; assert!(r_resp.is_err()); assert!(b_resp.is_err()); diff --git a/src/heuristics.rs b/src/heuristics.rs index d9d3213..a4e0cce 100644 --- a/src/heuristics.rs +++ b/src/heuristics.rs @@ -354,12 +354,12 @@ impl HeuristicTests { method, self.handles.config.output_level, ) - .await + .await, ) })) - .await // await gives vector of options containing feroxresponses + .await // await gives vector of options containing feroxresponses .into_iter() - .filter_map(|f| f) // strip out the none values + .flatten() // strip out the none values .collect::>(); if responses.len() < 2 { diff --git a/src/progress.rs b/src/progress.rs index d7c21dd..349fb61 100644 --- a/src/progress.rs +++ b/src/progress.rs @@ -63,7 +63,7 @@ pub fn add_bar(prefix: &str, length: u64, bar_type: BarType) -> ProgressBar { "{:#}", HumanDuration(Duration::from_millis( (state.elapsed().as_millis() - * (len as u128 - pos as u128) + * ((len as u128).checked_sub(pos as u128).unwrap_or(1)) .checked_div(pos as u128) .unwrap_or(1)) as u64 )) @@ -90,13 +90,11 @@ pub fn add_bar(prefix: &str, length: u64, bar_type: BarType) -> ProgressBar { BarType::Quiet => style.template("Scanning: {prefix}").unwrap(), }; - let progress_bar = PROGRESS_BAR.add( + PROGRESS_BAR.add( ProgressBar::new(length) .with_style(style) .with_prefix(prefix.to_string()), - ); - - progress_bar + ) } #[cfg(test)] diff --git a/src/scan_manager/tests.rs b/src/scan_manager/tests.rs index b0d039c..b549935 100644 --- a/src/scan_manager/tests.rs +++ b/src/scan_manager/tests.rs @@ -72,7 +72,7 @@ fn add_url_to_list_of_scanned_urls_with_known_url() { url, ScanType::Directory, ScanOrder::Latest, - pb.length(), + pb.length().unwrap(), OutputLevel::Default, Some(pb), ); @@ -94,7 +94,7 @@ fn stop_progress_bar_stops_bar() { url, ScanType::Directory, ScanOrder::Latest, - pb.length(), + pb.length().unwrap(), OutputLevel::Default, Some(pb), ); @@ -152,7 +152,7 @@ async fn call_display_scans() { url, ScanType::Directory, ScanOrder::Latest, - pb.length(), + pb.length().unwrap(), OutputLevel::Default, Some(pb), ); @@ -160,7 +160,7 @@ async fn call_display_scans() { url_two, ScanType::Directory, ScanOrder::Latest, - pb_two.length(), + pb_two.length().unwrap(), OutputLevel::Default, Some(pb_two), ); diff --git a/src/scanner/ferox_scanner.rs b/src/scanner/ferox_scanner.rs index e533596..6404708 100644 --- a/src/scanner/ferox_scanner.rs +++ b/src/scanner/ferox_scanner.rs @@ -203,6 +203,9 @@ impl FeroxScanner { log::info!("Starting scan against: {}", self.target_url); let mut scan_timer = Instant::now(); + // every time we extract links we'll need to await the task to make sure + // it completes before the scan ends + let mut extraction_tasks = Vec::new(); if self.handles.config.extract_links && matches!(self.order, ScanOrder::Initial) { // check for robots.txt (cannot be in sub-directories, so limited to Initial) @@ -213,7 +216,7 @@ impl FeroxScanner { .build()?; let result = extractor.extract().await?; - extractor.request_links(result).await?; + extraction_tasks.push(extractor.request_links(result).await?) } let scanned_urls = self.handles.ferox_scans()?; @@ -265,7 +268,7 @@ impl FeroxScanner { let result = extractor.extract_from_dir_listing().await?; - extractor.request_links(result).await?; + extraction_tasks.push(extractor.request_links(result).await?); log::trace!("exit: scan_url -> Directory listing heuristic"); @@ -291,6 +294,10 @@ impl FeroxScanner { } if !self.handles.config.force_recursion { + for handle in extraction_tasks.into_iter().flatten() { + _ = handle.await; + } + progress_bar.reset_eta(); progress_bar.finish_with_message(message); @@ -374,6 +381,10 @@ impl FeroxScanner { scan_timer.elapsed().as_secs_f64(), ))?; + for handle in extraction_tasks.into_iter().flatten() { + _ = handle.await; + } + ferox_scan.finish()?; log::trace!("exit: scan_url"); diff --git a/src/scanner/requester.rs b/src/scanner/requester.rs index 28da79e..acdd03d 100644 --- a/src/scanner/requester.rs +++ b/src/scanner/requester.rs @@ -490,6 +490,7 @@ impl Requester { .target(ExtractionTarget::ResponseBody) .response(&ferox_response) .handles(self.handles.clone()) + .url(self.ferox_scan.url()) .build()?; let new_links: HashSet<_>; @@ -513,7 +514,11 @@ impl Requester { } if !new_links.is_empty() { - extractor.request_links(new_links).await?; + let extraction_task = extractor.request_links(new_links).await?; + + if let Some(task) = extraction_task { + _ = task.await; + } } }