clippy/tests and added logic to wait for link extraction if done

This commit is contained in:
epi
2023-04-19 06:57:36 -05:00
parent 233cf99907
commit 692db93048
6 changed files with 38 additions and 23 deletions

View File

@@ -1,4 +1,5 @@
use super::builder::{LINKFINDER_REGEX, ROBOTS_TXT_REGEX, URL_CHARS_REGEX};
use super::container::request_link;
use super::*;
use crate::config::{Configuration, OutputLevel};
use crate::scan_manager::ScanOrder;
@@ -360,13 +361,13 @@ async fn request_link_happy_path() -> Result<()> {
then.status(200).body("this is a test");
});
let r_resp = ROBOTS_EXT.request_link(&srv.url("/login.php")).await?;
let b_resp = BODY_EXT.request_link(&srv.url("/login.php")).await?;
let r_resp = request_link(&srv.url("/login.php"), ROBOTS_EXT.handles.clone()).await?;
let b_resp = request_link(&srv.url("/login.php"), BODY_EXT.handles.clone()).await?;
assert!(matches!(r_resp.status(), &StatusCode::OK));
assert!(matches!(b_resp.status(), &StatusCode::OK));
assert_eq!(r_resp.content_length(), 14);
assert_eq!(b_resp.content_length(), 14);
assert!(matches!(r_resp.status(), StatusCode::OK));
assert!(matches!(b_resp.status(), StatusCode::OK));
assert_eq!(r_resp.content_length().unwrap(), 14);
assert_eq!(b_resp.content_length().unwrap(), 14);
assert_eq!(mock.hits(), 2);
Ok(())
}
@@ -390,8 +391,8 @@ async fn request_link_bails_on_seen_url() -> Result<()> {
let robots = setup_extractor(ExtractionTarget::RobotsTxt, scans.clone());
let body = setup_extractor(ExtractionTarget::ResponseBody, scans);
let r_resp = robots.request_link(&served).await;
let b_resp = body.request_link(&served).await;
let r_resp = request_link(&served, robots.handles.clone()).await;
let b_resp = request_link(&served, body.handles.clone()).await;
assert!(r_resp.is_err());
assert!(b_resp.is_err());

View File

@@ -354,12 +354,12 @@ impl HeuristicTests {
method,
self.handles.config.output_level,
)
.await
.await,
)
}))
.await // await gives vector of options containing feroxresponses
.await // await gives vector of options containing feroxresponses
.into_iter()
.filter_map(|f| f) // strip out the none values
.flatten() // strip out the none values
.collect::<Vec<_>>();
if responses.len() < 2 {

View File

@@ -63,7 +63,7 @@ pub fn add_bar(prefix: &str, length: u64, bar_type: BarType) -> ProgressBar {
"{:#}",
HumanDuration(Duration::from_millis(
(state.elapsed().as_millis()
* (len as u128 - pos as u128)
* ((len as u128).checked_sub(pos as u128).unwrap_or(1))
.checked_div(pos as u128)
.unwrap_or(1)) as u64
))
@@ -90,13 +90,11 @@ pub fn add_bar(prefix: &str, length: u64, bar_type: BarType) -> ProgressBar {
BarType::Quiet => style.template("Scanning: {prefix}").unwrap(),
};
let progress_bar = PROGRESS_BAR.add(
PROGRESS_BAR.add(
ProgressBar::new(length)
.with_style(style)
.with_prefix(prefix.to_string()),
);
progress_bar
)
}
#[cfg(test)]

View File

@@ -72,7 +72,7 @@ fn add_url_to_list_of_scanned_urls_with_known_url() {
url,
ScanType::Directory,
ScanOrder::Latest,
pb.length(),
pb.length().unwrap(),
OutputLevel::Default,
Some(pb),
);
@@ -94,7 +94,7 @@ fn stop_progress_bar_stops_bar() {
url,
ScanType::Directory,
ScanOrder::Latest,
pb.length(),
pb.length().unwrap(),
OutputLevel::Default,
Some(pb),
);
@@ -152,7 +152,7 @@ async fn call_display_scans() {
url,
ScanType::Directory,
ScanOrder::Latest,
pb.length(),
pb.length().unwrap(),
OutputLevel::Default,
Some(pb),
);
@@ -160,7 +160,7 @@ async fn call_display_scans() {
url_two,
ScanType::Directory,
ScanOrder::Latest,
pb_two.length(),
pb_two.length().unwrap(),
OutputLevel::Default,
Some(pb_two),
);

View File

@@ -203,6 +203,9 @@ impl FeroxScanner {
log::info!("Starting scan against: {}", self.target_url);
let mut scan_timer = Instant::now();
// every time we extract links we'll need to await the task to make sure
// it completes before the scan ends
let mut extraction_tasks = Vec::new();
if self.handles.config.extract_links && matches!(self.order, ScanOrder::Initial) {
// check for robots.txt (cannot be in sub-directories, so limited to Initial)
@@ -213,7 +216,7 @@ impl FeroxScanner {
.build()?;
let result = extractor.extract().await?;
extractor.request_links(result).await?;
extraction_tasks.push(extractor.request_links(result).await?)
}
let scanned_urls = self.handles.ferox_scans()?;
@@ -265,7 +268,7 @@ impl FeroxScanner {
let result = extractor.extract_from_dir_listing().await?;
extractor.request_links(result).await?;
extraction_tasks.push(extractor.request_links(result).await?);
log::trace!("exit: scan_url -> Directory listing heuristic");
@@ -291,6 +294,10 @@ impl FeroxScanner {
}
if !self.handles.config.force_recursion {
for handle in extraction_tasks.into_iter().flatten() {
_ = handle.await;
}
progress_bar.reset_eta();
progress_bar.finish_with_message(message);
@@ -374,6 +381,10 @@ impl FeroxScanner {
scan_timer.elapsed().as_secs_f64(),
))?;
for handle in extraction_tasks.into_iter().flatten() {
_ = handle.await;
}
ferox_scan.finish()?;
log::trace!("exit: scan_url");

View File

@@ -490,6 +490,7 @@ impl Requester {
.target(ExtractionTarget::ResponseBody)
.response(&ferox_response)
.handles(self.handles.clone())
.url(self.ferox_scan.url())
.build()?;
let new_links: HashSet<_>;
@@ -513,7 +514,11 @@ impl Requester {
}
if !new_links.is_empty() {
extractor.request_links(new_links).await?;
let extraction_task = extractor.request_links(new_links).await?;
if let Some(task) = extraction_task {
_ = task.await;
}
}
}