mirror of
https://github.com/epi052/feroxbuster.git
synced 2026-05-22 20:31:13 -03:00
* bumped version * added cli option to parser * added banner entry * fixed state file with colon on windows * tweaked banner name for scoped url * fixed test with new In-Scope Url banner name * added STATE_FILENAME env var to control state file name/location * added ferox config example * initial implementation complete * updated ci/cd to add components to fmt/clippy configs * clippy * made subdomain detection a bit more robust * --request-file correctly sets scope values * added debug windows build * fixed failing test
1296 lines
48 KiB
Rust
1296 lines
48 KiB
Rust
use anyhow::{bail, Context, Result};
|
|
use console::{strip_ansi_codes, style, user_attended};
|
|
use indicatif::ProgressBar;
|
|
use regex::Regex;
|
|
use reqwest::{Client, Method, Response, StatusCode, Url};
|
|
#[cfg(not(target_os = "windows"))]
|
|
use rlimit::{getrlimit, setrlimit, Resource};
|
|
use std::{
|
|
error::Error,
|
|
fs,
|
|
io::{self, BufWriter, Write},
|
|
sync::Arc,
|
|
time::Duration,
|
|
time::{SystemTime, UNIX_EPOCH},
|
|
};
|
|
use tokio::sync::{mpsc::UnboundedSender, oneshot};
|
|
|
|
use crate::{
|
|
config::Configuration,
|
|
config::OutputLevel,
|
|
event_handlers::{
|
|
Command::{self, AddError, AddStatus},
|
|
Handles,
|
|
},
|
|
progress::PROGRESS_PRINTER,
|
|
response::FeroxResponse,
|
|
send_command,
|
|
statistics::StatError::{Certificate, Connection, Other, Redirection, Request, Timeout},
|
|
traits::FeroxSerialize,
|
|
USER_AGENTS,
|
|
};
|
|
|
|
/// simple counter for grabbing 'random' user agents
|
|
static mut USER_AGENT_CTR: usize = 0;
|
|
|
|
/// detects certificate-related errors by analyzing the error chain
|
|
fn is_certificate_error(error: &reqwest::Error) -> bool {
|
|
let full_error = format!("{error:?}").to_lowercase();
|
|
let error_msg = error.to_string().to_lowercase();
|
|
|
|
// check the main error message first
|
|
if error_msg.contains("certificate verify failed")
|
|
|| error_msg.contains("self-signed certificate")
|
|
|| error_msg.contains("certificate has expired")
|
|
|| error_msg.contains("hostname mismatch")
|
|
|| error_msg.contains("certificate")
|
|
{
|
|
return true;
|
|
}
|
|
|
|
// check the full debug representation for OpenSSL patterns
|
|
if full_error.contains("ssl routines")
|
|
|| full_error.contains("certificate verify failed")
|
|
|| full_error.contains("self-signed certificate")
|
|
|| full_error.contains("certificate has expired")
|
|
|| full_error.contains("hostname mismatch")
|
|
|| full_error.contains("tls_post_process_server_certificate")
|
|
|| full_error.contains("certificate")
|
|
|| full_error.contains("cert")
|
|
{
|
|
return true;
|
|
}
|
|
|
|
// walk the error source chain to find underlying TLS/certificate errors
|
|
let mut source = error.source();
|
|
while let Some(err) = source {
|
|
let source_msg = err.to_string().to_lowercase();
|
|
|
|
// check for specific OpenSSL certificate error patterns
|
|
if source_msg.contains("ssl routines")
|
|
|| source_msg.contains("certificate verify failed")
|
|
|| source_msg.contains("self-signed certificate")
|
|
|| source_msg.contains("certificate has expired")
|
|
|| source_msg.contains("hostname mismatch")
|
|
|| source_msg.contains("unable to get local issuer certificate")
|
|
|| source_msg.contains("certificate is not yet valid")
|
|
|| source_msg.contains("invalid certificate")
|
|
|| source_msg.contains("unknown ca")
|
|
|| source_msg.contains("certificate")
|
|
|| source_msg.contains("cert")
|
|
|| source_msg.contains("tls")
|
|
|| source_msg.contains("ssl")
|
|
{
|
|
return true;
|
|
}
|
|
|
|
source = err.source();
|
|
}
|
|
|
|
false
|
|
}
|
|
|
|
/// Given the path to a file, open the file in append mode (create it if it doesn't exist) and
|
|
/// return a reference to the buffered file
|
|
pub fn open_file(filename: &str) -> Result<BufWriter<fs::File>> {
|
|
log::trace!("enter: open_file({filename})");
|
|
|
|
let file = fs::OpenOptions::new() // std fs
|
|
.create(true)
|
|
.append(true)
|
|
.open(filename)
|
|
.with_context(|| fmt_err(&format!("Could not open {filename}")))?;
|
|
|
|
let writer = BufWriter::new(file); // std io
|
|
|
|
log::trace!("exit: open_file -> {writer:?}");
|
|
Ok(writer)
|
|
}
|
|
|
|
/// Takes in a string and examines the first character to return a color version of the same string
|
|
pub fn status_colorizer(status: &str) -> String {
|
|
match status.chars().next() {
|
|
Some('1') => style(status).blue().to_string(), // informational
|
|
Some('2') => style(status).green().to_string(), // success
|
|
Some('3') => style(status).yellow().to_string(), // redirects
|
|
Some('4') => style(status).red().to_string(), // client error
|
|
Some('5') => style(status).red().to_string(), // server error
|
|
Some('W') => style(status).cyan().to_string(), // wildcard
|
|
Some('E') => style(status).red().to_string(), // error
|
|
_ => status.to_string(), // ¯\_(ツ)_/¯
|
|
}
|
|
}
|
|
|
|
/// simple wrapper to stay DRY
|
|
pub fn fmt_err(msg: &str) -> String {
|
|
format!("{}: {}", status_colorizer("ERROR"), msg)
|
|
}
|
|
|
|
/// simple wrapper to get the current system time as
|
|
/// time elapsed from unix epoch
|
|
pub fn timestamp() -> f64 {
|
|
let since_the_epoch = SystemTime::now()
|
|
.duration_since(UNIX_EPOCH)
|
|
.unwrap_or_else(|_| Duration::from_secs(0));
|
|
|
|
let secs = since_the_epoch.as_secs() as f64;
|
|
let nanos = since_the_epoch.subsec_nanos() as f64;
|
|
|
|
// Convert nanoseconds to fractional seconds and add to secs
|
|
secs + (nanos / 1_000_000_000.0)
|
|
}
|
|
|
|
/// given a FeroxResponse, send a TryRecursion command
|
|
///
|
|
/// moved to utils to allow for calls from extractor and scanner
|
|
pub(crate) async fn send_try_recursion_command(
|
|
handles: Arc<Handles>,
|
|
response: FeroxResponse,
|
|
) -> Result<()> {
|
|
// make the response mutable so we can drop the body before
|
|
// sending it over the mpsc
|
|
let mut response = response;
|
|
response.drop_text();
|
|
|
|
handles.send_scan_command(Command::TryRecursion(Box::new(response)))?;
|
|
let (tx, rx) = oneshot::channel::<bool>();
|
|
handles.send_scan_command(Command::Sync(tx))?;
|
|
rx.await?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Takes in a string and colors it using console::style
|
|
///
|
|
/// mainly putting this here in case i want to change the color later, making any changes easy
|
|
pub fn module_colorizer(modname: &str) -> String {
|
|
style(modname).cyan().to_string()
|
|
}
|
|
|
|
/// Simple helper to abstract away the check for an attached terminal.
|
|
///
|
|
/// If a terminal is attached, progress bars are visible and the progress bar is used to print
|
|
/// to stderr. The progress bar must be used when bars are visible in order to not jack up any
|
|
/// progress bar output (the bar knows how to print above itself)
|
|
///
|
|
/// If a terminal is not attached, `msg` is printed to stdout, with its ansi
|
|
/// color codes stripped.
|
|
///
|
|
/// additionally, provides a location for future printing options (no color, etc) to be handled
|
|
pub fn ferox_print(msg: &str, bar: &ProgressBar) {
|
|
if user_attended() {
|
|
bar.println(msg);
|
|
} else {
|
|
let stripped = strip_ansi_codes(msg);
|
|
println!("{stripped}");
|
|
}
|
|
}
|
|
|
|
/// wrapper for make_request used to pass error/response codes to FeroxScans for per-scan stats
|
|
/// tracking of information related to auto-tune/bail
|
|
pub async fn logged_request(
|
|
url: &Url,
|
|
method: &str,
|
|
data: Option<&[u8]>,
|
|
handles: Arc<Handles>,
|
|
) -> Result<Response> {
|
|
let client = &handles.config.client;
|
|
let level = handles.config.output_level;
|
|
let tx_stats = handles.stats.tx.clone();
|
|
|
|
let response = make_request(client, url, method, data, level, &handles.config, tx_stats).await;
|
|
|
|
let scans = handles.ferox_scans()?;
|
|
match response {
|
|
Ok(resp) => {
|
|
match resp.status() {
|
|
StatusCode::TOO_MANY_REQUESTS | StatusCode::FORBIDDEN => {
|
|
scans.increment_status_code(url.as_str(), resp.status());
|
|
}
|
|
_ => {}
|
|
}
|
|
Ok(resp)
|
|
}
|
|
Err(e) => {
|
|
log::warn!("err: {e:?}");
|
|
scans.increment_error(url.as_str());
|
|
bail!(e)
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Initiate request to the given `Url` using `Client`
|
|
pub async fn make_request(
|
|
client: &Client,
|
|
url: &Url,
|
|
method: &str,
|
|
mut data: Option<&[u8]>,
|
|
output_level: OutputLevel,
|
|
config: &Configuration,
|
|
tx_stats: UnboundedSender<Command>,
|
|
) -> Result<Response> {
|
|
log::trace!(
|
|
"enter: make_request(Configuration::Client, {url}, {output_level:?}, {tx_stats:?})"
|
|
);
|
|
let tmp_workaround: Option<&[u8]> = Some(&[0xd_u8, 0xa]); // \r\n
|
|
|
|
let mut request = client.request(Method::from_bytes(method.as_bytes())?, url.to_owned());
|
|
|
|
if (!config.proxy.is_empty() || !config.replay_proxy.is_empty())
|
|
&& data.is_none()
|
|
&& ["post", "put", "patch"].contains(&method.to_ascii_lowercase().as_str())
|
|
{
|
|
// either --proxy or --replay-proxy was specified
|
|
// AND
|
|
// --data wasn't used
|
|
// AND
|
|
// the method is either post/put/patch (case insensitive)
|
|
//
|
|
// this combination of factors results in requests that are delayed for 10 seconds before
|
|
// being issued. The tracking issues are
|
|
// https://github.com/epi052/feroxbuster/issues/501
|
|
// https://github.com/seanmonstar/reqwest/issues/1474
|
|
//
|
|
// as a (hopefully temporary) workaround, we'll add \r\n to the body so that there's no
|
|
// delay
|
|
data = tmp_workaround;
|
|
}
|
|
|
|
if let Some(body_data) = data {
|
|
request = request.body(body_data.to_vec());
|
|
}
|
|
|
|
if config.random_agent {
|
|
let index = unsafe {
|
|
USER_AGENT_CTR += 1;
|
|
USER_AGENT_CTR % USER_AGENTS.len()
|
|
};
|
|
|
|
let user_agent = USER_AGENTS[index];
|
|
|
|
request = request.header("User-Agent", user_agent);
|
|
}
|
|
|
|
match request.send().await {
|
|
Err(e) => {
|
|
log::trace!("exit: make_request -> {e}");
|
|
|
|
if e.is_timeout() {
|
|
send_command!(tx_stats, AddError(Timeout));
|
|
} else if e.is_redirect() {
|
|
if let Some(last_redirect) = e.url() {
|
|
// get where we were headed (last_redirect) and where we came from (url)
|
|
let fancy_message = format!(
|
|
"{} !=> {} ({})",
|
|
url,
|
|
last_redirect,
|
|
style("too many redirects").red(),
|
|
);
|
|
|
|
let msg_status = match e.status() {
|
|
Some(status) => status.to_string(),
|
|
None => "ERR".to_string(),
|
|
};
|
|
|
|
let report = create_report_string(
|
|
&msg_status,
|
|
method,
|
|
"-1",
|
|
"-1",
|
|
"-1",
|
|
&fancy_message,
|
|
output_level,
|
|
);
|
|
|
|
send_command!(tx_stats, AddError(Redirection));
|
|
|
|
ferox_print(&report, &PROGRESS_PRINTER)
|
|
};
|
|
} else if is_certificate_error(&e) {
|
|
log::warn!("Certificate error detected: {e}");
|
|
send_command!(tx_stats, AddError(Certificate));
|
|
bail!(":SSL: {e}");
|
|
} else if e.is_connect() {
|
|
send_command!(tx_stats, AddError(Connection));
|
|
} else if e.is_request() {
|
|
send_command!(tx_stats, AddError(Request));
|
|
} else {
|
|
send_command!(tx_stats, AddError(Other));
|
|
}
|
|
|
|
log::warn!("Error while making request: {e}");
|
|
bail!("{}", e)
|
|
}
|
|
Ok(resp) => {
|
|
log::trace!("exit: make_request -> {resp:?}");
|
|
send_command!(tx_stats, AddStatus(resp.status()));
|
|
Ok(resp)
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Helper to create the standard line for output to file/terminal
|
|
///
|
|
/// example output:
|
|
/// 200 127l 283w 4134c http://localhost/faq
|
|
pub fn create_report_string(
|
|
status: &str,
|
|
method: &str,
|
|
line_count: &str,
|
|
word_count: &str,
|
|
content_length: &str,
|
|
url: &str,
|
|
output_level: OutputLevel,
|
|
) -> String {
|
|
if matches!(output_level, OutputLevel::Silent) {
|
|
// --silent used, just need the url
|
|
format!("{url}\n")
|
|
} else {
|
|
// normal printing with status and sizes
|
|
let color_status = status_colorizer(status);
|
|
if status.contains("MSG") {
|
|
format!(
|
|
"{color_status} {method:>8} {line_count:>9} {word_count:>9} {content_length:>9} {url}\n"
|
|
)
|
|
} else {
|
|
format!(
|
|
"{color_status} {method:>8} {line_count:>8}l {word_count:>8}w {content_length:>8}c {url}\n"
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Attempts to set the soft limit for the RLIMIT_NOFILE resource
|
|
///
|
|
/// RLIMIT_NOFILE is the maximum number of file descriptors that can be opened by this process
|
|
///
|
|
/// The soft limit is the value that the kernel enforces for the corresponding resource.
|
|
/// The hard limit acts as a ceiling for the soft limit: an unprivileged process may set only its
|
|
/// soft limit to a value in the range from 0 up to the hard limit, and (irreversibly) lower its
|
|
/// hard limit.
|
|
///
|
|
/// A child process created via fork(2) inherits its parent's resource limits. Resource limits are
|
|
/// per-process attributes that are shared by all of the threads in a process.
|
|
///
|
|
/// Based on the above information, no attempt is made to restore the limit to its pre-scan value
|
|
/// as the adjustment made here is only valid for the scan itself (and any child processes, of which
|
|
/// there are none).
|
|
#[cfg(not(target_os = "windows"))]
|
|
pub fn set_open_file_limit(limit: u64) -> bool {
|
|
log::trace!("enter: set_open_file_limit");
|
|
|
|
if let Ok((soft, hard)) = getrlimit(Resource::NOFILE) {
|
|
if hard > limit {
|
|
// our default open file limit is less than the current hard limit, this means we can
|
|
// set the soft limit to our default
|
|
|
|
if setrlimit(Resource::NOFILE, limit, hard).is_ok() {
|
|
log::debug!("set open file descriptor limit to {limit}");
|
|
|
|
log::trace!("exit: set_open_file_limit -> {}", true);
|
|
return true;
|
|
}
|
|
} else if soft != hard {
|
|
// hard limit is lower than our default, the next best option is to set the soft limit as
|
|
// high as the hard limit will allow
|
|
if setrlimit(Resource::NOFILE, hard, hard).is_ok() {
|
|
log::debug!("set open file descriptor limit to {limit}");
|
|
|
|
log::trace!("exit: set_open_file_limit -> {}", true);
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// failed to set a new limit, as limit adjustments are a 'nice to have', we'll just log
|
|
// and move along
|
|
log::warn!("could not set open file descriptor limit to {limit}");
|
|
|
|
log::trace!("exit: set_open_file_limit -> {}", false);
|
|
false
|
|
}
|
|
|
|
/// Given a string and a reference to a locked buffered file, write the contents and flush
|
|
/// the buffer to disk.
|
|
pub fn write_to<T>(
|
|
value: &T,
|
|
file: &mut io::BufWriter<fs::File>,
|
|
convert_to_json: bool,
|
|
) -> Result<()>
|
|
where
|
|
T: FeroxSerialize,
|
|
{
|
|
// note to future self: adding logging of anything other than error to this function
|
|
// is a bad idea. we call this function while processing records generated by the logger.
|
|
// If we then call log::... while already processing some logging output, it results in
|
|
// the second log entry being injected into the first.
|
|
|
|
let contents = if convert_to_json {
|
|
value.as_json()?
|
|
} else {
|
|
value.as_str()
|
|
};
|
|
|
|
let contents = strip_ansi_codes(&contents);
|
|
|
|
let written = file.write(contents.as_bytes())?;
|
|
|
|
if written > 0 {
|
|
// this function is used within async functions/loops, so i'm flushing so that in
|
|
// the event of a ctrl+c or w/e results seen so far are saved instead of left lying
|
|
// around in the buffer
|
|
file.flush()?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// determine if a url should be denied based on the given absolute url
|
|
fn should_deny_absolute(url_to_test: &Url, denier: &Url, handles: Arc<Handles>) -> Result<bool> {
|
|
log::trace!(
|
|
"enter: should_deny_absolute({}, {:?})",
|
|
url_to_test.as_str(),
|
|
denier.as_str(),
|
|
);
|
|
|
|
// simplest case is an exact match, check for it first
|
|
if url_to_test == denier {
|
|
log::trace!("exit: should_deny_absolute -> true");
|
|
return Ok(true);
|
|
}
|
|
|
|
match (url_to_test.host(), denier.host()) {
|
|
// .host() will return an enum with ipv4|6 or domain and is comparable
|
|
// whereas .domain() returns None for ip addresses
|
|
(Some(normed_host), Some(denier_host)) => {
|
|
if normed_host != denier_host {
|
|
// domains don't even match
|
|
return Ok(false);
|
|
}
|
|
}
|
|
_ => {
|
|
// one or the other couldn't determine the host value, which probably means
|
|
// it's not suitable for further comparison
|
|
return Ok(false);
|
|
}
|
|
}
|
|
|
|
let tested_host = url_to_test.host().unwrap(); // match above will catch errors
|
|
|
|
// at this point, we have a matching set of ips or domain names. now we can process the
|
|
// url path. The goal is to determine whether the given url's path is a subpath of any
|
|
// url in the deny list, for example
|
|
// GIVEN URL URL DENY LIST USER-SPECIFIED URLS TO SCAN
|
|
// http://some.domain/stuff/things, [http://some.domain/stuff], [http://some.domain] => true
|
|
// http://some.domain/stuff/things, [http://some.domain/stuff/things], [http://some.domain] => true
|
|
// http://some.domain/stuff/things, [http://some.domain/api], [http://some.domain] => false
|
|
// the examples above are all pretty obvious, the kicker comes when the blocking url's
|
|
// path is a parent to a scanned url
|
|
// http://some.domain/stuff/things, [http://some.domain/], [http://some.domain/stuff] => false
|
|
// http://some.domain/api, [http://some.domain/], [http://some.domain/stuff] => true
|
|
// we want to deny all children of the parent, unless that child is a child of a scan
|
|
// we specified through -u(s) or --stdin
|
|
|
|
let deny_path = denier.path();
|
|
let tested_path = url_to_test.path();
|
|
|
|
if tested_path.starts_with(deny_path) {
|
|
// at this point, we know that the given normalized path is a sub-path of the
|
|
// current deny-url, now we just need to check to see if this deny-url is a parent
|
|
// to a scanned url that is also a parent of the given url
|
|
for ferox_scan in handles.ferox_scans()?.get_active_scans() {
|
|
let scanner = parse_url_with_raw_path(ferox_scan.url().trim_end_matches('/'))
|
|
.with_context(|| format!("Could not parse {ferox_scan} as a url"))?;
|
|
|
|
// by calling the new parse_url_with_raw_path, and reaching this point without an
|
|
// error, we know we have an authority and therefore a host. leaving the code
|
|
// below, but we should never hit the else condition. leaving it in so if we find
|
|
// a case where i'm mistaken, we'll know about it and can address it
|
|
|
|
if let Some(scan_host) = scanner.host() {
|
|
// same domain/ip check we perform on the denier above
|
|
if tested_host != scan_host {
|
|
// domains don't even match, keep on keepin' on...
|
|
continue;
|
|
}
|
|
} else {
|
|
// couldn't process .host from scanner
|
|
unreachable!("should_deny_absolute: scanner.host() returned None, which shouldn't be possible");
|
|
};
|
|
|
|
let scan_path = scanner.path();
|
|
|
|
if scan_path.starts_with(deny_path) && tested_path.starts_with(scan_path) {
|
|
// user-specified scan url is a sub-path of the deny-urls's path AND the
|
|
// url to check is a sub-path of the user-specified scan url
|
|
//
|
|
// the assumption is the user knew what they wanted and we're going to give
|
|
// the scanned url precedence, even though it's a sub-path
|
|
log::trace!("exit: should_deny_absolute -> false");
|
|
return Ok(false);
|
|
}
|
|
}
|
|
log::trace!("exit: should_deny_absolute -> true");
|
|
return Ok(true);
|
|
}
|
|
|
|
log::trace!("exit: should_deny_absolute -> false");
|
|
Ok(false)
|
|
}
|
|
|
|
/// determine if a url should be denied based on the given regular expression
|
|
///
|
|
/// the regex ONLY matches against the PATH of the url (not the scheme, host, port, etc)
|
|
fn should_deny_regex(url_to_test: &Url, denier: &Regex) -> bool {
|
|
log::trace!(
|
|
"enter: should_deny_regex({}, {})",
|
|
url_to_test.as_str(),
|
|
denier,
|
|
);
|
|
|
|
let result = denier.is_match(url_to_test.as_str());
|
|
|
|
log::trace!("exit: should_deny_regex -> {result}");
|
|
result
|
|
}
|
|
|
|
/// determines whether or not a given url should be denied based on the user-supplied --dont-scan
|
|
/// flag
|
|
pub fn should_deny_url(url: &Url, handles: Arc<Handles>) -> Result<bool> {
|
|
log::trace!(
|
|
"enter: should_deny_url({}, {:?}, {:?})",
|
|
url.as_str(),
|
|
handles.config.url_denylist,
|
|
handles.ferox_scans()?
|
|
);
|
|
|
|
// normalization for comparison is to remove the trailing / if one exists, this is done for
|
|
// the given url and any url to which it's compared
|
|
let normed_url = parse_url_with_raw_path(url.to_string().trim_end_matches('/'))?;
|
|
|
|
for denier in &handles.config.url_denylist {
|
|
// note to self: it may seem as though we can use regex only for --dont-scan, however, in
|
|
// doing so, we lose the ability to block a parent directory while scanning a child
|
|
if let Ok(should_deny) = should_deny_absolute(&normed_url, denier, handles.clone()) {
|
|
if should_deny {
|
|
return Ok(true);
|
|
}
|
|
}
|
|
}
|
|
|
|
for denier in &handles.config.regex_denylist {
|
|
if should_deny_regex(&normed_url, denier) {
|
|
return Ok(true);
|
|
}
|
|
}
|
|
|
|
// made it to the end of the deny lists unscathed, return false, indicating we should not deny
|
|
// this particular url
|
|
log::trace!("exit: should_deny_url -> false");
|
|
Ok(false)
|
|
}
|
|
|
|
/// given a url and filename-suffix, return a unique filename comprised of the slugified url,
|
|
/// current unix timestamp and suffix
|
|
///
|
|
/// ex: ferox-http_telsa_com-1606947491.state
|
|
pub fn slugify_filename(url: &str, prefix: &str, suffix: &str) -> String {
|
|
log::trace!("enter: slugify({url:?}, {prefix:?}, {suffix:?})");
|
|
|
|
let ts = SystemTime::now()
|
|
.duration_since(UNIX_EPOCH)
|
|
.unwrap_or_else(|_| Duration::from_secs(0))
|
|
.as_secs();
|
|
|
|
let altered_prefix = if !prefix.is_empty() {
|
|
format!("{prefix}-")
|
|
} else {
|
|
String::new()
|
|
};
|
|
|
|
let slug = url.replace("://", "_").replace(['/', '.', ':'], "_");
|
|
|
|
let filename = format!("{altered_prefix}{slug}-{ts}.{suffix}");
|
|
|
|
log::trace!("exit: slugify -> {filename}");
|
|
filename
|
|
}
|
|
|
|
/// This function takes a url string and returns a `url::Url`
|
|
///
|
|
/// It is primarily used to detect url paths that `url::Url::parse` will
|
|
/// silently transform, such as /path/../file.html -> /file.html
|
|
///
|
|
/// # Warning
|
|
///
|
|
/// In the instance of a url with encoded path traversal strings, such as
|
|
/// /path/%2e%2e/file.html, the underlying `url::Url::parse` will
|
|
/// further encode the %-signs and return /path/%252e%252e/file.html
|
|
pub fn parse_url_with_raw_path(url: &str) -> Result<Url> {
|
|
log::trace!("enter: parse_url_with_raw_path({url})");
|
|
|
|
let parsed = Url::parse(url)?;
|
|
|
|
if !parsed.has_authority() {
|
|
// parsed correctly, but no authority, meaning mailto: or tel: or
|
|
// some other url that we don't care about
|
|
bail!("url to parse has no authority and is therefore invalid");
|
|
}
|
|
|
|
// thanks to @devx00: the possibility exists for Url to return true for
|
|
// has_authority, but not have a host/port, so we'll check for that
|
|
// and bail if it's the case
|
|
if parsed.host().is_none() {
|
|
bail!("url to parse doesn't have a host");
|
|
}
|
|
|
|
// we have a valid url, the next step is to check the path and see if it's
|
|
// something that url::Url::parse would silently transform
|
|
//
|
|
// i.e. if the path is /path/../file.html, url::Url::parse will transform it
|
|
// to /file.html, which is not what we want
|
|
|
|
let farthest_right_authority_part;
|
|
|
|
// we want to find the farthest right authority component, which is the
|
|
// component that is the furthest right in the url that is part of the
|
|
// authority
|
|
//
|
|
// per RFC 3986, the authority is defined as:
|
|
// - authority = [ userinfo "@" ] host [ ":" port ]
|
|
//
|
|
// so the farthest right authority component is either the port or the host
|
|
//
|
|
// i.e. in http://example.com:80/path/file.html, the farthest right authority
|
|
// component is :80
|
|
//
|
|
// in http://example.com/path/file.html, the farthest right authority component
|
|
// is example.com
|
|
//
|
|
// the farthest right authority component is used to split the url into two
|
|
// parts: the part before the authority and the part after the authority
|
|
if let Some(port) = parsed.port() {
|
|
// if the url has a port, then the farthest right authority component is
|
|
// the port
|
|
farthest_right_authority_part = format!(":{port}");
|
|
} else if parsed.has_host() {
|
|
// if the url has a host, then the farthest right authority component is
|
|
// the host
|
|
farthest_right_authority_part = parsed.host_str().unwrap().to_owned();
|
|
} else {
|
|
// if the url has neither a port nor a host, then the url is invalid
|
|
// and we can't do anything with it, but i don't think this is possible
|
|
unreachable!("url has an authority, but has neither a port nor a host");
|
|
}
|
|
|
|
// split the original url string into two parts: the part before the authority and the part
|
|
// after the authority (i.e. the path + query + fragment)
|
|
|
|
let Some((_, after_authority)) = url.split_once(&farthest_right_authority_part) else {
|
|
// if we can't split the url string into two parts, then the url doesn't conform to our
|
|
// expectations, and we can't continue processing it, so we'll return the parsed url
|
|
return Ok(parsed);
|
|
};
|
|
|
|
// when there is a port, but it matches the default port for the scheme,
|
|
// url::Url::parse will mark the port as None, giving us a
|
|
// `after_authority` that looks something like this:
|
|
// - :80/path/file.html
|
|
let after_authority = after_authority
|
|
.replacen(":80", "", 1)
|
|
.replacen(":443", "", 1);
|
|
|
|
// snippets from rfc-3986:
|
|
//
|
|
// foo://example.com:8042/over/there?name=ferret#nose
|
|
// \_/ \______________/\_________/ \_________/ \__/
|
|
// | | | | |
|
|
// scheme authority path query fragment
|
|
//
|
|
// The path component is terminated
|
|
// by the first question mark ("?") or number sign ("#") character, or
|
|
// by the end of the URI.
|
|
//
|
|
// The query component is indicated by the first question
|
|
// mark ("?") character and terminated by a number sign ("#") character
|
|
// or by the end of the URI.
|
|
let (path, _discarded) = after_authority
|
|
.split_once('?')
|
|
// if there isn't a '?', try to remove a fragment
|
|
.unwrap_or_else(|| {
|
|
// if there isn't a '#', return (original, empty)
|
|
after_authority
|
|
.split_once('#')
|
|
.unwrap_or((&after_authority, ""))
|
|
});
|
|
|
|
// at this point, we have the path, all by itself
|
|
|
|
// each of the following is a string that we can expect url::Url::parse to
|
|
// transform. The variety is to ensure we cover most common path traversal
|
|
// encodings
|
|
let transformation_detectors = [
|
|
// ascii
|
|
"..",
|
|
// single url encoded
|
|
"%2e%2e",
|
|
// double url encoded
|
|
"%25%32%65%25%32%65",
|
|
// utf-8 encoded
|
|
"%c0%ae%c0%ae",
|
|
"%e0%40%ae%e0%40%ae",
|
|
"%c0ae%c0ae",
|
|
// 16 bit shenanigans
|
|
"%uff0e%uff0e",
|
|
"%u002e%u002e",
|
|
];
|
|
|
|
let parsing_will_transform_path = transformation_detectors
|
|
.iter()
|
|
.any(|detector| path.to_lowercase().contains(detector));
|
|
|
|
if !parsing_will_transform_path {
|
|
// there's no string in the path of the url that will trigger a transformation
|
|
// so, we can return it as-is
|
|
return Ok(parsed);
|
|
}
|
|
|
|
// if we reach this point, the path contains a string that will trigger a transformation
|
|
// so we need to manually create a Url that doesn't have the transformation
|
|
// and return that
|
|
//
|
|
// special thanks to github user @lavafroth for this workaround
|
|
|
|
let mut hacked_url = if path.ends_with('/') {
|
|
// from_file_path silently strips trailing slashes, and
|
|
// from_directory_path adds them, so we'll choose the appropriate
|
|
// constructor based on the presence of a path's trailing slash
|
|
|
|
// according to from_file_path docs:
|
|
// from_file_path returns `Err` if the given path is not absolute or,
|
|
// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
|
|
//
|
|
// since we parsed out a valid url path, we know it is absolute, so on non-windows
|
|
// platforms, we can safely unwrap. On windows, we need to fix up the path
|
|
#[cfg(target_os = "windows")]
|
|
{
|
|
let path = format!("\\/IGNOREME{path}");
|
|
Url::from_directory_path(path).unwrap()
|
|
}
|
|
#[cfg(not(target_os = "windows"))]
|
|
Url::from_directory_path(path).unwrap()
|
|
} else {
|
|
#[cfg(target_os = "windows")]
|
|
{
|
|
let path = format!("\\/IGNOREME{path}");
|
|
Url::from_file_path(path).unwrap()
|
|
}
|
|
#[cfg(not(target_os = "windows"))]
|
|
Url::from_file_path(path).unwrap()
|
|
};
|
|
|
|
// host must be set first, otherwise multiple components may return Err
|
|
hacked_url.set_host(parsed.host_str())?;
|
|
// scheme/port/username/password can fail, but in this instance, we know they won't
|
|
hacked_url.set_scheme(parsed.scheme()).unwrap();
|
|
hacked_url.set_port(parsed.port()).unwrap();
|
|
hacked_url.set_username(parsed.username()).unwrap();
|
|
hacked_url.set_password(parsed.password()).unwrap();
|
|
// query/fragment can't fail
|
|
hacked_url.set_query(parsed.query());
|
|
hacked_url.set_fragment(parsed.fragment());
|
|
|
|
log::trace!("exit: parse_url_with_raw_path -> {hacked_url}");
|
|
Ok(hacked_url)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::config::Configuration;
|
|
use crate::scan_manager::{FeroxScans, ScanOrder};
|
|
|
|
#[test]
|
|
/// parse_url_with_raw_path with javascript:// should not throw an unimplemented! error
|
|
fn utils_parse_url_with_raw_path_javascript() {
|
|
let url = "javascript://";
|
|
let parsed = parse_url_with_raw_path(url);
|
|
assert!(parsed.is_err());
|
|
assert!(parsed
|
|
.unwrap_err()
|
|
.to_string()
|
|
.contains("url to parse doesn't have a host"));
|
|
}
|
|
|
|
#[test]
|
|
/// multiple tests for parse_url_with_raw_path
|
|
fn utils_parse_url_with_raw_path() {
|
|
// ../.. is preserved
|
|
let url = "https://www.google.com/../../stuff";
|
|
let parsed = parse_url_with_raw_path(url).unwrap();
|
|
assert_eq!(parsed.as_str(), url);
|
|
|
|
// ../.. is preserved as well as the trailing slash
|
|
let url = "https://www.google.com/../../stuff/";
|
|
let parsed = parse_url_with_raw_path(url).unwrap();
|
|
assert_eq!(parsed.as_str(), url);
|
|
|
|
// no trailing slash is preserved
|
|
let url = "https://www.google.com/stuff";
|
|
let parsed = parse_url_with_raw_path(url).unwrap();
|
|
assert_eq!(parsed.as_str(), url);
|
|
|
|
// trailing slash is preserved
|
|
let url = "https://www.google.com/stuff/";
|
|
let parsed: Url = parse_url_with_raw_path(url).unwrap();
|
|
assert_eq!(parsed.as_str(), url);
|
|
|
|
// mailto is an error
|
|
let url = "mailto:user@example.com";
|
|
let parsed = parse_url_with_raw_path(url);
|
|
assert!(parsed.is_err());
|
|
|
|
// relative url is an error
|
|
let url = "../../stuff";
|
|
let parsed = parse_url_with_raw_path(url);
|
|
assert!(parsed.is_err());
|
|
|
|
// absolute without host is an error
|
|
let url = "/../../stuff";
|
|
let parsed = parse_url_with_raw_path(url);
|
|
assert!(parsed.is_err());
|
|
|
|
// default ports are parsed correctly
|
|
for url in [
|
|
"http://example.com:80/path/file.html",
|
|
"https://example.com:443/path/file.html",
|
|
] {
|
|
let parsed = parse_url_with_raw_path(url).unwrap();
|
|
assert!(parsed.port().is_none());
|
|
assert_eq!(parsed.host().unwrap().to_string().as_str(), "example.com");
|
|
}
|
|
|
|
// non-default ports are parsed correctly
|
|
for url in [
|
|
"http://example.com:8080/path/file.html",
|
|
"https://example.com:4433/path/file.html",
|
|
] {
|
|
let parsed = parse_url_with_raw_path(url).unwrap();
|
|
assert!(parsed.port().is_some());
|
|
assert_eq!(parsed.as_str(), url);
|
|
}
|
|
|
|
// different encodings are respected if found in doubles
|
|
//
|
|
// note that the % sign is encoded as %25...
|
|
let url = "http://user:pass@example.com/%2e%2e/stuff.php";
|
|
let parsed = parse_url_with_raw_path(url).unwrap();
|
|
assert_eq!(
|
|
parsed.as_str(),
|
|
"http://user:pass@example.com/%252e%252e/stuff.php"
|
|
);
|
|
|
|
let url = "http://user:pass@example.com/%25%32%65%25%32%65/stuff.php";
|
|
let parsed = parse_url_with_raw_path(url).unwrap();
|
|
assert_eq!(parsed.username(), "user");
|
|
assert_eq!(parsed.password().unwrap(), "pass");
|
|
assert_eq!(
|
|
parsed.as_str(),
|
|
"http://user:pass@example.com/%2525%2532%2565%2525%2532%2565/stuff.php"
|
|
);
|
|
|
|
let url = "http://user:pass@example.com/%c0%ae%c0%ae/stuff.php";
|
|
let parsed = parse_url_with_raw_path(url).unwrap();
|
|
assert_eq!(parsed.username(), "user");
|
|
assert_eq!(parsed.password().unwrap(), "pass");
|
|
assert_eq!(
|
|
parsed.as_str(),
|
|
"http://user:pass@example.com/%25c0%25ae%25c0%25ae/stuff.php"
|
|
);
|
|
|
|
let url = "http://user:pass@example.com/%e0%40%ae%e0%40%ae/stuff.php";
|
|
let parsed = parse_url_with_raw_path(url).unwrap();
|
|
assert_eq!(parsed.username(), "user");
|
|
assert_eq!(parsed.password().unwrap(), "pass");
|
|
assert_eq!(
|
|
parsed.as_str(),
|
|
"http://user:pass@example.com/%25e0%2540%25ae%25e0%2540%25ae/stuff.php"
|
|
);
|
|
|
|
let url = "http://user:pass@example.com/%c0ae%c0ae/stuff.php";
|
|
let parsed = parse_url_with_raw_path(url).unwrap();
|
|
assert_eq!(parsed.username(), "user");
|
|
assert_eq!(parsed.password().unwrap(), "pass");
|
|
assert_eq!(
|
|
parsed.as_str(),
|
|
"http://user:pass@example.com/%25c0ae%25c0ae/stuff.php"
|
|
);
|
|
|
|
let url = "http://user:pass@example.com/%uff0e%uff0e/stuff.php";
|
|
let parsed = parse_url_with_raw_path(url).unwrap();
|
|
assert_eq!(parsed.username(), "user");
|
|
assert_eq!(parsed.password().unwrap(), "pass");
|
|
assert_eq!(
|
|
parsed.as_str(),
|
|
"http://user:pass@example.com/%25uff0e%25uff0e/stuff.php"
|
|
);
|
|
|
|
let url = "http://user:pass@example.com/%u002e%u002e/stuff.php";
|
|
let parsed = parse_url_with_raw_path(url).unwrap();
|
|
assert_eq!(parsed.username(), "user");
|
|
assert_eq!(parsed.password().unwrap(), "pass");
|
|
assert_eq!(
|
|
parsed.as_str(),
|
|
"http://user:pass@example.com/%25u002e%25u002e/stuff.php"
|
|
);
|
|
}
|
|
|
|
#[cfg(not(target_os = "windows"))]
|
|
mod nix_only_tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
/// set_open_file_limit with a low requested limit succeeds
|
|
fn utils_set_open_file_limit_with_low_requested_limit() {
|
|
let (_, hard) = getrlimit(Resource::NOFILE).unwrap();
|
|
let lower_limit = hard - 1;
|
|
assert!(set_open_file_limit(lower_limit));
|
|
}
|
|
|
|
#[test]
|
|
/// set_open_file_limit with a high requested limit succeeds
|
|
fn utils_set_open_file_limit_with_high_requested_limit() {
|
|
let (_, hard) = getrlimit(Resource::NOFILE).unwrap();
|
|
let higher_limit = hard + 1;
|
|
// calculate a new soft to ensure soft != hard and hit that logic branch
|
|
let new_soft = hard - 1;
|
|
setrlimit(Resource::NOFILE, new_soft, hard).unwrap();
|
|
assert!(set_open_file_limit(higher_limit));
|
|
}
|
|
|
|
#[test]
|
|
/// set_open_file_limit should fail when hard == soft
|
|
fn utils_set_open_file_limit_with_fails_when_both_limits_are_equal() {
|
|
let (_, hard) = getrlimit(Resource::NOFILE).unwrap();
|
|
// calculate a new soft to ensure soft == hard and hit the failure logic branch
|
|
setrlimit(Resource::NOFILE, hard, hard).unwrap();
|
|
assert!(!set_open_file_limit(hard)); // returns false
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
/// status colorizer uses red for 500s
|
|
fn status_colorizer_uses_red_for_500s() {
|
|
assert_eq!(status_colorizer("500"), style("500").red().to_string());
|
|
}
|
|
|
|
#[test]
|
|
/// status colorizer uses red for 400s
|
|
fn status_colorizer_uses_red_for_400s() {
|
|
assert_eq!(status_colorizer("400"), style("400").red().to_string());
|
|
}
|
|
|
|
#[test]
|
|
/// status colorizer uses red for errors
|
|
fn status_colorizer_uses_red_for_errors() {
|
|
assert_eq!(status_colorizer("ERROR"), style("ERROR").red().to_string());
|
|
}
|
|
|
|
#[test]
|
|
/// status colorizer uses cyan for wildcards
|
|
fn status_colorizer_uses_cyan_for_wildcards() {
|
|
assert_eq!(status_colorizer("WLD"), style("WLD").cyan().to_string());
|
|
}
|
|
|
|
#[test]
|
|
/// status colorizer uses blue for 100s
|
|
fn status_colorizer_uses_blue_for_100s() {
|
|
assert_eq!(status_colorizer("100"), style("100").blue().to_string());
|
|
}
|
|
|
|
#[test]
|
|
/// status colorizer uses green for 200s
|
|
fn status_colorizer_uses_green_for_200s() {
|
|
assert_eq!(status_colorizer("200"), style("200").green().to_string());
|
|
}
|
|
|
|
#[test]
|
|
/// status colorizer uses yellow for 300s
|
|
fn status_colorizer_uses_yellow_for_300s() {
|
|
assert_eq!(status_colorizer("300"), style("300").yellow().to_string());
|
|
}
|
|
|
|
#[test]
|
|
/// status colorizer doesnt color anything else
|
|
fn status_colorizer_returns_as_is() {
|
|
assert_eq!(status_colorizer("farfignewton"), "farfignewton".to_string());
|
|
}
|
|
|
|
#[test]
|
|
/// provide a url that should be blocked where the denier is an exact match for the tested url
|
|
/// expect true
|
|
fn should_deny_url_blocks_when_denier_is_exact_match() {
|
|
let scan_url = "https://testdomain.com/";
|
|
let deny_url = "https://testdomain.com/denied";
|
|
let tested_url = Url::parse("https://testdomain.com/denied/").unwrap();
|
|
|
|
let scans = Arc::new(FeroxScans::default());
|
|
scans.add_directory_scan(
|
|
scan_url,
|
|
ScanOrder::Initial,
|
|
Arc::new(Handles::for_testing(None, None).0),
|
|
);
|
|
|
|
let mut config = Configuration::new().unwrap();
|
|
config.url_denylist = vec![Url::parse(deny_url).unwrap()];
|
|
let config = Arc::new(config);
|
|
|
|
let handles = Arc::new(Handles::for_testing(Some(scans), Some(config)).0);
|
|
|
|
assert!(should_deny_url(&tested_url, handles).unwrap());
|
|
}
|
|
|
|
#[test]
|
|
/// provide a url that has a different host than the denier but the same path, expect false
|
|
fn should_deny_url_doesnt_compare_mismatched_domains() {
|
|
let scan_url = "https://testdomain.com/";
|
|
let deny_url = "https://dev.testdomain.com/denied";
|
|
let tested_url = Url::parse("https://testdomain.com/denied/").unwrap();
|
|
|
|
let scans = Arc::new(FeroxScans::default());
|
|
scans.add_directory_scan(
|
|
scan_url,
|
|
ScanOrder::Initial,
|
|
Arc::new(Handles::for_testing(None, None).0),
|
|
);
|
|
|
|
let mut config = Configuration::new().unwrap();
|
|
config.url_denylist = vec![Url::parse(deny_url).unwrap()];
|
|
let config = Arc::new(config);
|
|
|
|
let handles = Arc::new(Handles::for_testing(Some(scans), Some(config)).0);
|
|
|
|
assert!(!should_deny_url(&tested_url, handles).unwrap());
|
|
}
|
|
|
|
#[test]
|
|
/// provide a denier from which we can't check a host, which results in no comparison, expect false
|
|
fn should_deny_url_doesnt_compare_non_domains() {
|
|
let scan_url = "https://testdomain.com/";
|
|
let deny_url = "unix:/run/foo.socket";
|
|
let tested_url = Url::parse("https://testdomain.com/denied/").unwrap();
|
|
|
|
let scans = Arc::new(FeroxScans::default());
|
|
scans.add_directory_scan(
|
|
scan_url,
|
|
ScanOrder::Initial,
|
|
Arc::new(Handles::for_testing(None, None).0),
|
|
);
|
|
|
|
let mut config = Configuration::new().unwrap();
|
|
config.url_denylist = vec![Url::parse(deny_url).unwrap()];
|
|
let config = Arc::new(config);
|
|
|
|
let handles = Arc::new(Handles::for_testing(Some(scans), Some(config)).0);
|
|
|
|
assert!(!should_deny_url(&tested_url, handles).unwrap());
|
|
}
|
|
|
|
#[test]
|
|
/// provide a url that has a different host than the denier but the same path, expect false
|
|
/// because the denier is a parent to the tested, even tho the scanned doesn't compare, it
|
|
/// still returns true
|
|
fn should_deny_url_doesnt_compare_mismatched_domains_in_scanned() {
|
|
let deny_url = "https://testdomain.com/";
|
|
let scan_url = "https://dev.testdomain.com/denied";
|
|
let tested_url = Url::parse("https://testdomain.com/denied/").unwrap();
|
|
|
|
let scans = Arc::new(FeroxScans::default());
|
|
scans.add_directory_scan(
|
|
scan_url,
|
|
ScanOrder::Initial,
|
|
Arc::new(Handles::for_testing(None, None).0),
|
|
);
|
|
|
|
let mut config = Configuration::new().unwrap();
|
|
config.url_denylist = vec![Url::parse(deny_url).unwrap()];
|
|
let config = Arc::new(config);
|
|
|
|
let handles = Arc::new(Handles::for_testing(Some(scans), Some(config)).0);
|
|
|
|
assert!(should_deny_url(&tested_url, handles).unwrap());
|
|
}
|
|
|
|
#[test]
|
|
/// provide a denier from which we can't check a host, which results in no comparison, expect false
|
|
/// because the denier is a parent to the tested, even tho the scanned doesn't compare, it
|
|
/// still returns true
|
|
///
|
|
/// note: adding parse_url_with_raw_path changed the behavior of this test, it used to return
|
|
/// true, now it returns false. see my note in should_deny_absolute and the unreachable!
|
|
/// call block to see why
|
|
///
|
|
/// leaving this test here to document the behavior change and to catch regressions in the
|
|
/// new expected behavior
|
|
fn should_deny_url_doesnt_compare_non_domains_in_scanned() {
|
|
let deny_url = "https://testdomain.com/";
|
|
let scan_url = "unix:/run/foo.socket";
|
|
let tested_url = Url::parse("https://testdomain.com/denied/").unwrap();
|
|
|
|
let scans = Arc::new(FeroxScans::default());
|
|
scans.add_directory_scan(
|
|
scan_url,
|
|
ScanOrder::Initial,
|
|
Arc::new(Handles::for_testing(None, None).0),
|
|
);
|
|
|
|
let mut config = Configuration::new().unwrap();
|
|
config.url_denylist = vec![Url::parse(deny_url).unwrap()];
|
|
let config = Arc::new(config);
|
|
|
|
let handles = Arc::new(Handles::for_testing(Some(scans), Some(config)).0);
|
|
assert!(!should_deny_url(&tested_url, handles).unwrap());
|
|
}
|
|
|
|
#[test]
|
|
/// provide a denier where the tested url is a sub-path and the scanned url is not, expect true
|
|
fn should_deny_url_blocks_child() {
|
|
let scan_url = "https://testdomain.com/";
|
|
let deny_url = "https://testdomain.com/api";
|
|
let tested_url = Url::parse("https://testdomain.com/api/denied/").unwrap();
|
|
|
|
let scans = Arc::new(FeroxScans::default());
|
|
scans.add_directory_scan(
|
|
scan_url,
|
|
ScanOrder::Initial,
|
|
Arc::new(Handles::for_testing(None, None).0),
|
|
);
|
|
|
|
let mut config = Configuration::new().unwrap();
|
|
config.url_denylist = vec![Url::parse(deny_url).unwrap()];
|
|
let config = Arc::new(config);
|
|
|
|
let handles = Arc::new(Handles::for_testing(Some(scans), Some(config)).0);
|
|
|
|
assert!(should_deny_url(&tested_url, handles).unwrap());
|
|
}
|
|
|
|
#[test]
|
|
/// provide a denier where the tested url is not a sub-path and the scanned url is not, expect false
|
|
fn should_deny_url_doesnt_block_non_child() {
|
|
let scan_url = "https://testdomain.com/";
|
|
let deny_url = "https://testdomain.com/api";
|
|
let tested_url = Url::parse("https://testdomain.com/not-denied/").unwrap();
|
|
|
|
let scans = Arc::new(FeroxScans::default());
|
|
scans.add_directory_scan(
|
|
scan_url,
|
|
ScanOrder::Initial,
|
|
Arc::new(Handles::for_testing(None, None).0),
|
|
);
|
|
|
|
let mut config = Configuration::new().unwrap();
|
|
config.url_denylist = vec![Url::parse(deny_url).unwrap()];
|
|
let config = Arc::new(config);
|
|
|
|
let handles = Arc::new(Handles::for_testing(Some(scans), Some(config)).0);
|
|
|
|
assert!(!should_deny_url(&tested_url, handles).unwrap());
|
|
}
|
|
|
|
#[test]
|
|
/// provide a denier where the tested url is a sub-path and the scanned url is not, expect true
|
|
fn should_deny_url_blocks_child_when_scan_url_isnt_parent() {
|
|
let scan_url = "https://testdomain.com/api";
|
|
let deny_url = "https://testdomain.com/";
|
|
let tested_url = Url::parse("https://testdomain.com/stuff/").unwrap();
|
|
|
|
let scans = Arc::new(FeroxScans::default());
|
|
scans.add_directory_scan(
|
|
scan_url,
|
|
ScanOrder::Initial,
|
|
Arc::new(Handles::for_testing(None, None).0),
|
|
);
|
|
|
|
let mut config = Configuration::new().unwrap();
|
|
config.url_denylist = vec![Url::parse(deny_url).unwrap()];
|
|
let config = Arc::new(config);
|
|
|
|
let handles = Arc::new(Handles::for_testing(Some(scans), Some(config)).0);
|
|
|
|
assert!(should_deny_url(&tested_url, handles).unwrap());
|
|
}
|
|
|
|
#[test]
|
|
/// provide a denier where the tested url is not a sub-path and the scanned url is not, expect false
|
|
fn should_deny_url_doesnt_block_child_when_scan_url_is_parent() {
|
|
let scan_url = "https://testdomain.com/api";
|
|
let deny_url = "https://testdomain.com/";
|
|
let tested_url = Url::parse("https://testdomain.com/api/not-denied/").unwrap();
|
|
|
|
let scans = Arc::new(FeroxScans::default());
|
|
scans.add_directory_scan(
|
|
scan_url,
|
|
ScanOrder::Initial,
|
|
Arc::new(Handles::for_testing(None, None).0),
|
|
);
|
|
|
|
let mut config = Configuration::new().unwrap();
|
|
config.url_denylist = vec![Url::parse(deny_url).unwrap()];
|
|
let config = Arc::new(config);
|
|
|
|
let handles = Arc::new(Handles::for_testing(Some(scans), Some(config)).0);
|
|
|
|
assert!(!should_deny_url(&tested_url, handles).unwrap());
|
|
}
|
|
|
|
#[test]
|
|
/// provide a denier where the tested url is matched against a regular expression in the path
|
|
/// of the url
|
|
fn should_deny_url_blocks_urls_based_on_regex_in_path() {
|
|
let scan_url = "https://testdomain.com/";
|
|
let deny_pattern = "/deni.*";
|
|
let tested_url = Url::parse("https://testdomain.com/denied/").unwrap();
|
|
|
|
let scans = Arc::new(FeroxScans::default());
|
|
scans.add_directory_scan(
|
|
scan_url,
|
|
ScanOrder::Initial,
|
|
Arc::new(Handles::for_testing(None, None).0),
|
|
);
|
|
|
|
let mut config = Configuration::new().unwrap();
|
|
config.regex_denylist = vec![Regex::new(deny_pattern).unwrap()];
|
|
let config = Arc::new(config);
|
|
|
|
let handles = Arc::new(Handles::for_testing(Some(scans), Some(config)).0);
|
|
|
|
assert!(should_deny_url(&tested_url, handles).unwrap());
|
|
}
|
|
|
|
#[test]
|
|
/// provide a denier where the tested url is matched against a regular expression in the scheme
|
|
/// of the url
|
|
fn should_deny_url_blocks_urls_based_on_regex_in_scheme() {
|
|
let scan_url = "https://testdomain.com/";
|
|
let deny_pattern = "http:";
|
|
let tested_http_url = Url::parse("http://testdomain.com/denied/").unwrap();
|
|
let tested_https_url = Url::parse("https://testdomain.com/denied/").unwrap();
|
|
|
|
let scans = Arc::new(FeroxScans::default());
|
|
scans.add_directory_scan(
|
|
scan_url,
|
|
ScanOrder::Initial,
|
|
Arc::new(Handles::for_testing(None, None).0),
|
|
);
|
|
|
|
let mut config = Configuration::new().unwrap();
|
|
config.regex_denylist = vec![Regex::new(deny_pattern).unwrap()];
|
|
let config = Arc::new(config);
|
|
|
|
let handles = Arc::new(Handles::for_testing(Some(scans), Some(config)).0);
|
|
|
|
assert!(!should_deny_url(&tested_https_url, handles.clone()).unwrap());
|
|
assert!(should_deny_url(&tested_http_url, handles).unwrap());
|
|
}
|
|
}
|