broke absolute path denial into its own function; tests pass

This commit is contained in:
epi
2021-10-01 06:23:44 -05:00
parent 28fa90b093
commit 28769b5028

View File

@@ -1,6 +1,7 @@
use anyhow::{bail, Context, Result};
use console::{strip_ansi_codes, style, user_attended};
use indicatif::ProgressBar;
use regex::Regex;
use reqwest::{Client, Response, StatusCode, Url};
#[cfg(not(target_os = "windows"))]
use rlimit::{getrlimit, setrlimit, Resource};
@@ -288,6 +289,94 @@ where
Ok(())
}
/// todo
fn should_deny_absolute(url_to_test: &Url, denier: &Url, handles: Arc<Handles>) -> Result<bool> {
log::trace!(
"enter: should_deny_absolute({}, {:?})",
url_to_test.as_str(),
denier.as_str(),
);
// simplest case is an exact match, check for it first
if url_to_test == denier {
log::trace!("exit: should_deny_absolute -> true");
return Ok(true);
}
match (url_to_test.host(), denier.host()) {
// .host() will return an enum with ipv4|6 or domain and is comparable
// whereas .domain() returns None for ip addresses
(Some(normed_host), Some(denier_host)) => {
if normed_host != denier_host {
// domains don't even match
return Ok(false);
}
}
_ => {
// one or the other couldn't determine the host value, which probably means
// it's not suitable for further comparison
return Ok(false);
}
}
let tested_host = url_to_test.host().unwrap(); // match above will catch errors
// at this point, we have a matching set of ips or domain names. now we can process the
// url path. The goal is to determine whether the given url's path is a subpath of any
// url in the deny list, for example
// GIVEN URL URL DENY LIST USER-SPECIFIED URLS TO SCAN
// http://some.domain/stuff/things, [http://some.domain/stuff], [http://some.domain] => true
// http://some.domain/stuff/things, [http://some.domain/stuff/things], [http://some.domain] => true
// http://some.domain/stuff/things, [http://some.domain/api], [http://some.domain] => false
// the examples above are all pretty obvious, the kicker comes when the blocking url's
// path is a parent to a scanned url
// http://some.domain/stuff/things, [http://some.domain/], [http://some.domain/stuff] => false
// http://some.domain/api, [http://some.domain/], [http://some.domain/stuff] => true
// we want to deny all children of the parent, unless that child is a child of a scan
// we specified through -u(s) or --stdin
let deny_path = denier.path();
let tested_path = url_to_test.path();
if tested_path.starts_with(deny_path) {
// at this point, we know that the given normalized path is a sub-path of the
// current deny-url, now we just need to check to see if this deny-url is a parent
// to a scanned url that is also a parent of the given url
for ferox_scan in handles.ferox_scans()?.get_active_scans() {
let scanner = Url::parse(ferox_scan.url().trim_end_matches('/'))
.with_context(|| format!("Could not parse {} as a url", ferox_scan))?;
if let Some(scan_host) = scanner.host() {
// same domain/ip check we perform on the denier above
if tested_host != scan_host {
// domains don't even match, keep on keepin' on...
continue;
}
} else {
// couldn't process .host from scanner
continue;
};
let scan_path = scanner.path();
if scan_path.starts_with(deny_path) && tested_path.starts_with(scan_path) {
// user-specified scan url is a sub-path of the deny-urls's path AND the
// url to check is a sub-path of the user-specified scan url
//
// the assumption is the user knew what they wanted and we're going to give
// the scanned url precedence, even though it's a sub-path
log::trace!("exit: should_deny_absolute -> false");
return Ok(false);
}
}
log::trace!("exit: should_deny_absolute -> true");
return Ok(true);
}
log::trace!("exit: should_deny_absolute -> false");
Ok(false)
}
/// determines whether or not a given url should be denied based on the user-supplied --dont-scan
/// flag
pub fn should_deny_url(url: &Url, handles: Arc<Handles>) -> Result<bool> {
@@ -297,91 +386,127 @@ pub fn should_deny_url(url: &Url, handles: Arc<Handles>) -> Result<bool> {
handles.config.url_denylist,
handles.ferox_scans()?
);
// normalization for comparison is to remove the trailing / if one exists, this is done for
// the given url and any url to which it's compared
let normed_url = Url::parse(url.to_string().trim_end_matches('/'))?;
for deny_url in &handles.config.url_denylist {
// parse the denying url for easier comparison
let denier = Url::parse(deny_url.trim_end_matches('/'))
.with_context(|| format!("Could not parse {} as a url", deny_url))?;
// simplest case is an exact match, check for it first
if normed_url == denier {
log::trace!("exit: should_deny_url -> true");
return Ok(true);
}
match (normed_url.host(), denier.host()) {
// .host() will return an enum with ipv4|6 or domain and is comparable
// whereas .domain() returns None for ip addresses
(Some(normed_host), Some(denier_host)) => {
if normed_host != denier_host {
// domains don't even match, keep on keepin' on...
continue;
let denier = match Url::parse(deny_url.trim_end_matches('/')) {
// todo consider breaking this out into a function, it's reused in
// todo /home/epi/PycharmProjects/feroxbuster/src/extractor/container.rs:L157
Ok(abs_denier) => {
// url is an absolute url and can be parsed as such
match should_deny_absolute(&normed_url, &abs_denier, handles.clone()) {
// should_deny_absolute returns bool, if it's true, we found should deny
// this url and no further processing is needed; otherwise, need to continue
// to the next item in the deny_list
Ok(true) => return Ok(true),
_ => continue,
}
}
_ => {
// one or the other couldn't determine the host value, which probably means
// it's not suitable for further comparison
continue;
}
}
let normed_host = normed_url.host().unwrap(); // match above will catch errors
// at this point, we have a matching set of ips or domain names. now we can process the
// url path. The goal is to determine whether the given url's path is a subpath of any
// url in the deny list, for example
// GIVEN URL URL DENY LIST USER-SPECIFIED URLS TO SCAN
// http://some.domain/stuff/things, [http://some.domain/stuff], [http://some.domain] => true
// http://some.domain/stuff/things, [http://some.domain/stuff/things], [http://some.domain] => true
// http://some.domain/stuff/things, [http://some.domain/api], [http://some.domain] => false
// the examples above are all pretty obvious, the kicker comes when the blocking url's
// path is a parent to a scanned url
// http://some.domain/stuff/things, [http://some.domain/], [http://some.domain/stuff] => false
// http://some.domain/api, [http://some.domain/], [http://some.domain/stuff] => true
// we want to deny all children of the parent, unless that child is a child of a scan
// we specified through -u(s) or --stdin
let deny_path = denier.path();
let norm_path = normed_url.path();
if norm_path.starts_with(deny_path) {
// at this point, we know that the given normalized path is a sub-path of the
// current deny-url, now we just need to check to see if this deny-url is a parent
// to a scanned url that is also a parent of the given url
for ferox_scan in handles.ferox_scans()?.get_active_scans() {
let scanner = Url::parse(ferox_scan.url().trim_end_matches('/'))
.with_context(|| format!("Could not parse {} as a url", ferox_scan))?;
if let Some(scan_host) = scanner.host() {
// same domain/ip check we perform on the denier above
if normed_host != scan_host {
// domains don't even match, keep on keepin' on...
continue;
Err(e) => {
// this is the expected error that happens when we try to parse a url fragment
// ex: Url::parse("/login") -> Err("relative URL without a base")
if e.to_string().contains("relative URL without a base") {
// url without a base from the --dont-scan flag; we're going to assume
// that the input is a regular expression to be parsed. The possibility
// exists that the user rolled their face across the keyboard and we're
// dealing with the results, but we'll attempt to work with what we're given
if let Ok(regex_denier) = Regex::new(deny_url) {
// todo figure out a way to split out normal url comparisons from regex
}
} else {
// couldn't process .host from scanner
// unexpected error has occurred
log::warn!("Could not parse given url denier: {}", e);
handles.stats.send(AddError(Other)).unwrap_or_default();
continue;
};
let scan_path = scanner.path();
if scan_path.starts_with(deny_path) && norm_path.starts_with(scan_path) {
// user-specified scan url is a sub-path of the deny-urls's path AND the
// url to check is a sub-path of the user-specified scan url
//
// the assumption is the user knew what they wanted and we're going to give
// the scanned url precedence, even though it's a sub-path
log::trace!("exit: should_deny_url -> false");
return Ok(false);
}
}
log::trace!("exit: should_deny_url -> true");
return Ok(true);
}
};
// parse the denying url for easier comparison
// let denier = Url::parse(deny_url.trim_end_matches('/'))
// .with_context(|| format!("Could not parse {} as a url", deny_url))?;
// // simplest case is an exact match, check for it first
// if normed_url == denier {
// log::trace!("exit: should_deny_url -> true");
// return Ok(true);
// }
//
// match (normed_url.host(), denier.host()) {
// // .host() will return an enum with ipv4|6 or domain and is comparable
// // whereas .domain() returns None for ip addresses
// (Some(normed_host), Some(denier_host)) => {
// if normed_host != denier_host {
// // domains don't even match, keep on keepin' on...
// continue;
// }
// }
// _ => {
// // one or the other couldn't determine the host value, which probably means
// // it's not suitable for further comparison
// continue;
// }
// }
//
// let normed_host = normed_url.host().unwrap(); // match above will catch errors
// // at this point, we have a matching set of ips or domain names. now we can process the
// // url path. The goal is to determine whether the given url's path is a subpath of any
// // url in the deny list, for example
// // GIVEN URL URL DENY LIST USER-SPECIFIED URLS TO SCAN
// // http://some.domain/stuff/things, [http://some.domain/stuff], [http://some.domain] => true
// // http://some.domain/stuff/things, [http://some.domain/stuff/things], [http://some.domain] => true
// // http://some.domain/stuff/things, [http://some.domain/api], [http://some.domain] => false
// // the examples above are all pretty obvious, the kicker comes when the blocking url's
// // path is a parent to a scanned url
// // http://some.domain/stuff/things, [http://some.domain/], [http://some.domain/stuff] => false
// // http://some.domain/api, [http://some.domain/], [http://some.domain/stuff] => true
// // we want to deny all children of the parent, unless that child is a child of a scan
// // we specified through -u(s) or --stdin
//
// let deny_path = denier.path();
// let norm_path = normed_url.path();
//
// if norm_path.starts_with(deny_path) {
// // at this point, we know that the given normalized path is a sub-path of the
// // current deny-url, now we just need to check to see if this deny-url is a parent
// // to a scanned url that is also a parent of the given url
// for ferox_scan in handles.ferox_scans()?.get_active_scans() {
// let scanner = Url::parse(ferox_scan.url().trim_end_matches('/'))
// .with_context(|| format!("Could not parse {} as a url", ferox_scan))?;
//
// if let Some(scan_host) = scanner.host() {
// // same domain/ip check we perform on the denier above
// if normed_host != scan_host {
// // domains don't even match, keep on keepin' on...
// continue;
// }
// } else {
// // couldn't process .host from scanner
// continue;
// };
//
// let scan_path = scanner.path();
//
// if scan_path.starts_with(deny_path) && norm_path.starts_with(scan_path) {
// // user-specified scan url is a sub-path of the deny-urls's path AND the
// // url to check is a sub-path of the user-specified scan url
// //
// // the assumption is the user knew what they wanted and we're going to give
// // the scanned url precedence, even though it's a sub-path
// log::trace!("exit: should_deny_url -> false");
// return Ok(false);
// }
// }
// log::trace!("exit: should_deny_url -> true");
// return Ok(true);
// }
}
// made it to the end of the deny list unscathed, return false, indicating we should not deny
// this particular url
log::trace!("exit: should_deny_url -> false");
Ok(false)
}