diff --git a/src/banner/container.rs b/src/banner/container.rs index 59a6d71..2f94970 100644 --- a/src/banner/container.rs +++ b/src/banner/container.rs @@ -188,13 +188,13 @@ impl Banner { targets.push(BannerEntry::new("🎯", "Target Url", target)); } - for denied_url in &config.url_denylist { - url_denylist.push(BannerEntry::new( - "🚫", - "Don't Scan Url", - denied_url.as_str(), - )); - } + // for denied_url in &config.url_denylist { + // url_denylist.push(BannerEntry::new( + // "🚫", + // "Don't Scan Url", + // denied_url.as_str(), + // )); + // } for denied_regex in &config.regex_denylist { url_denylist.push(BannerEntry::new( diff --git a/src/config/container.rs b/src/config/container.rs index 8cd3bc4..aeeb71a 100644 --- a/src/config/container.rs +++ b/src/config/container.rs @@ -13,6 +13,7 @@ use clap::{parser::ValueSource, ArgMatches}; use regex::Regex; use reqwest::{Client, Method, StatusCode, Url}; use serde::{Deserialize, Serialize}; +use std::sync::{Arc, RwLock}; use std::{ collections::HashMap, env::{current_dir, current_exe}, @@ -284,7 +285,7 @@ pub struct Configuration { /// URLs that should never be scanned/recursed into #[serde(default)] - pub url_denylist: Vec, + pub url_denylist: Arc>>, /// URLs that should never be scanned/recursed into based on a regular expression #[serde(with = "serde_regex", default)] @@ -372,7 +373,7 @@ impl Default for Configuration { data: Vec::new(), filter_size: Vec::new(), filter_regex: Vec::new(), - url_denylist: Vec::new(), + url_denylist: Arc::new(RwLock::new(Vec::new())), regex_denylist: Vec::new(), filter_line_count: Vec::new(), filter_word_count: Vec::new(), @@ -668,7 +669,9 @@ impl Configuration { match Url::parse(denier.trim_end_matches('/')) { Ok(absolute) => { // denier is an absolute url and can be parsed as such - config.url_denylist.push(absolute); + if let Ok(mut guard) = config.url_denylist.write() { + guard.push(absolute); + } } Err(err) => { // there are some expected errors that happen when we try to parse a url @@ -991,7 +994,12 @@ impl Configuration { update_if_not_default!(&mut conf.extensions, new.extensions, Vec::::new()); update_if_not_default!(&mut conf.methods, new.methods, methods()); update_if_not_default!(&mut conf.data, new.data, Vec::::new()); - update_if_not_default!(&mut conf.url_denylist, new.url_denylist, Vec::::new()); + // update_if_not_default!( + // &mut conf.url_denylist, + // new.url_denylist, + // Arc::new(RwLock::new(Vec::::new())) + // ); + // todo: fix this above if !new.regex_denylist.is_empty() { // cant use the update_if_not_default macro due to the following error // diff --git a/src/config/tests.rs b/src/config/tests.rs index 778ece4..1e7b344 100644 --- a/src/config/tests.rs +++ b/src/config/tests.rs @@ -3,6 +3,7 @@ use super::*; use crate::{traits::FeroxSerialize, DEFAULT_CONFIG_NAME}; use regex::Regex; use reqwest::Url; +use std::sync::{Arc, RwLock}; use std::{collections::HashMap, fs::write}; use tempfile::TempDir; @@ -109,7 +110,10 @@ fn default_configuration() { assert_eq!(config.extensions, Vec::::new()); assert_eq!(config.methods, vec!["GET"]); assert_eq!(config.data, Vec::::new()); - assert_eq!(config.url_denylist, Vec::::new()); + assert_eq!( + config.url_denylist, + Arc::new(RwLock::new(Vec::::new())) + ); assert_eq!(config.dont_collect, ignored_extensions()); assert_eq!(config.filter_regex, Vec::::new()); assert_eq!(config.filter_similar, Vec::::new()); diff --git a/src/event_handlers/scans.rs b/src/event_handlers/scans.rs index 8329c20..a71a5b1 100644 --- a/src/event_handlers/scans.rs +++ b/src/event_handlers/scans.rs @@ -312,8 +312,14 @@ impl ScanHandler { /// wrapper around scanning a url to stay DRY async fn ordered_scan_url(&mut self, targets: Vec, order: ScanOrder) -> Result<()> { log::trace!("enter: ordered_scan_url({:?}, {:?})", targets, order); - let should_test_deny = !self.handles.config.url_denylist.is_empty() - || !self.handles.config.regex_denylist.is_empty(); + + let url_denylist_is_not_empty = if let Ok(guard) = self.handles.config.url_denylist.read() { + !guard.is_empty() + } else { + false + }; + let should_test_deny = + url_denylist_is_not_empty || !self.handles.config.regex_denylist.is_empty(); for target in targets { if self.data.contains(&target) && matches!(order, ScanOrder::Latest) { diff --git a/src/extractor/container.rs b/src/extractor/container.rs index e7f5e1f..348b38f 100644 --- a/src/extractor/container.rs +++ b/src/extractor/container.rs @@ -430,8 +430,13 @@ impl<'a> Extractor<'a> { bail!("previously seen url"); } - if (!self.handles.config.url_denylist.is_empty() - || !self.handles.config.regex_denylist.is_empty()) + let url_denylist_is_not_empty = if let Ok(guard) = self.handles.config.url_denylist.read() { + !guard.is_empty() + } else { + false + }; + + if (url_denylist_is_not_empty || !self.handles.config.regex_denylist.is_empty()) && should_deny_url(&new_url, self.handles.clone())? { // can't allow a denied url to be requested diff --git a/src/heuristics.rs b/src/heuristics.rs index 66d6275..b1cb479 100644 --- a/src/heuristics.rs +++ b/src/heuristics.rs @@ -360,6 +360,21 @@ impl HeuristicTests { .filters .send(Command::AddFilter(Box::new(sim_filter)))?; + if let Ok(mut guard) = self.handles.config.url_denylist.write() { + use reqwest::Url; + // todo need to get the parent + log::warn!("FOUND 404 LIKE RESPONSE: {:?}", responses[0].url()); + let derp = responses[0].url().as_str(); + let mut new_url = Url::parse(derp).unwrap(); + new_url.path_segments_mut().unwrap().pop(); + // responses[0].url().set_path(path_segments.as_str()); + log::warn!( + "ADDING PARENT OF 404 LIKE RESPONSE: {:?}", + new_url + ); + guard.push(new_url); + } + // reset the responses for the next method, if it exists responses.clear(); diff --git a/src/main.rs b/src/main.rs index a81dcd1..734b1c5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -182,13 +182,15 @@ async fn get_targets(handles: Arc) -> Result> { ); } } - for denier in &handles.config.url_denylist { - if denier.as_str().trim_end_matches('/') == target.trim_end_matches('/') { - bail!( - "The url '{}' matches {}; the scan will never start", - denier, - target - ); + if let Ok(guard) = handles.config.url_denylist.read() { + for denier in guard.iter() { + if denier.as_str().trim_end_matches('/') == target.trim_end_matches('/') { + bail!( + "The url '{}' matches {}; the scan will never start", + denier, + target + ); + } } } diff --git a/src/scanner/requester.rs b/src/scanner/requester.rs index bc69a8c..6e5c909 100644 --- a/src/scanner/requester.rs +++ b/src/scanner/requester.rs @@ -353,8 +353,14 @@ impl Requester { let urls = FeroxUrl::from_string(&self.target_url, self.handles.clone()) .formatted_urls(word, collected)?; - let should_test_deny = !self.handles.config.url_denylist.is_empty() - || !self.handles.config.regex_denylist.is_empty(); + let url_denylist_is_not_empty = if let Ok(guard) = self.handles.config.url_denylist.read() { + !guard.is_empty() + } else { + false + }; + + let should_test_deny = + url_denylist_is_not_empty || !self.handles.config.regex_denylist.is_empty(); for url in urls { for method in self.handles.config.methods.iter() { diff --git a/src/utils.rs b/src/utils.rs index 754b2e5..6e685b1 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -484,12 +484,14 @@ pub fn should_deny_url(url: &Url, handles: Arc) -> Result { // the given url and any url to which it's compared let normed_url = Url::parse(url.to_string().trim_end_matches('/'))?; - for denier in &handles.config.url_denylist { - // note to self: it may seem as though we can use regex only for --dont-scan, however, in - // doing so, we lose the ability to block a parent directory while scanning a child - if let Ok(should_deny) = should_deny_absolute(&normed_url, denier, handles.clone()) { - if should_deny { - return Ok(true); + if let Ok(guard) = handles.config.url_denylist.read() { + for denier in guard.iter() { + // note to self: it may seem as though we can use regex only for --dont-scan, however, in + // doing so, we lose the ability to block a parent directory while scanning a child + if let Ok(should_deny) = should_deny_absolute(&normed_url, denier, handles.clone()) { + if should_deny { + return Ok(true); + } } } }