Files
feroxbuster/src/extractor/tests.rs

406 lines
14 KiB
Rust
Raw Normal View History

use super::builder::{LINKFINDER_REGEX, ROBOTS_TXT_REGEX, URL_CHARS_REGEX};
use super::container::request_link;
2021-01-16 08:07:38 -06:00
use super::*;
use crate::config::{Configuration, OutputLevel};
use crate::scan_manager::ScanOrder;
use crate::{
event_handlers::Handles, scan_manager::FeroxScans, utils::make_request, Command, FeroxChannel,
DEFAULT_METHOD,
};
2021-01-16 08:07:38 -06:00
use anyhow::Result;
use httpmock::{Method::GET, MockServer};
2021-01-16 08:07:38 -06:00
use lazy_static::lazy_static;
use reqwest::{Client, StatusCode, Url};
2021-01-16 08:07:38 -06:00
use std::collections::HashSet;
use tokio::sync::mpsc;
lazy_static! {
/// Extractor for testing robots.txt
2021-01-24 07:37:38 -06:00
static ref ROBOTS_EXT: Extractor<'static> = setup_extractor(ExtractionTarget::RobotsTxt, Arc::new(FeroxScans::default()));
2021-01-16 08:07:38 -06:00
/// Extractor for testing response bodies
2021-01-24 07:37:38 -06:00
static ref BODY_EXT: Extractor<'static> = setup_extractor(ExtractionTarget::ResponseBody, Arc::new(FeroxScans::default()));
2021-01-16 08:07:38 -06:00
2022-01-15 00:58:27 -05:00
/// Extractor for testing paring html
static ref PARSEHTML_EXT: Extractor<'static> = setup_extractor(ExtractionTarget::DirectoryListing, Arc::new(FeroxScans::default()));
2022-01-15 00:58:27 -05:00
2021-01-16 08:07:38 -06:00
/// FeroxResponse for Extractor
static ref RESPONSE: FeroxResponse = get_test_response();
}
2021-01-16 14:17:44 -06:00
/// constructor for the default FeroxResponse used during testing
2021-01-16 08:07:38 -06:00
fn get_test_response() -> FeroxResponse {
let mut resp = FeroxResponse::default();
resp.set_text("nulla pharetra diam sit amet nisl suscipit adipiscing bibendum est");
resp
2021-01-16 08:07:38 -06:00
}
/// creates a single extractor that can be used to test standalone functions
2021-01-24 07:37:38 -06:00
fn setup_extractor(target: ExtractionTarget, scanned_urls: Arc<FeroxScans>) -> Extractor<'static> {
2021-02-01 15:09:30 -06:00
let mut builder = ExtractorBuilder::default();
let builder = match target {
ExtractionTarget::ResponseBody => builder
.target(ExtractionTarget::ResponseBody)
.response(&RESPONSE),
ExtractionTarget::RobotsTxt => builder
.url("http://localhost")
.target(ExtractionTarget::RobotsTxt),
ExtractionTarget::DirectoryListing => builder
2022-01-15 00:58:27 -05:00
.url("http://localhost")
.target(ExtractionTarget::DirectoryListing),
2021-01-16 08:07:38 -06:00
};
2021-02-01 15:09:30 -06:00
let config = Arc::new(Configuration::new().unwrap());
let handles = Arc::new(Handles::for_testing(Some(scanned_urls), Some(config)).0);
builder.handles(handles).build().unwrap()
2021-01-16 08:07:38 -06:00
}
#[test]
/// extract sub paths from the given url fragment; expect 4 sub paths and that all are
/// in the expected array
fn extractor_get_sub_paths_from_path_with_multiple_paths() {
let path = "homepage/assets/img/icons/handshake.svg";
2021-07-30 16:14:25 -05:00
let r_paths = ROBOTS_EXT.get_sub_paths_from_path(path);
let b_paths = BODY_EXT.get_sub_paths_from_path(path);
2021-01-16 08:07:38 -06:00
let expected = vec![
"homepage/",
"homepage/assets/",
"homepage/assets/img/",
"homepage/assets/img/icons/",
"homepage/assets/img/icons/handshake.svg",
];
assert_eq!(r_paths.len(), expected.len());
assert_eq!(b_paths.len(), expected.len());
for expected_path in expected {
2021-06-17 16:13:18 -05:00
assert!(r_paths.contains(&expected_path.to_string()));
assert!(b_paths.contains(&expected_path.to_string()));
2021-01-16 08:07:38 -06:00
}
}
#[test]
/// extract sub paths from the given url fragment; expect 2 sub paths and that all are
/// in the expected array. the fragment is wrapped in slashes to ensure no empty strings are
/// returned
fn extractor_get_sub_paths_from_path_with_enclosing_slashes() {
let path = "/homepage/assets/";
2021-07-30 16:14:25 -05:00
let r_paths = ROBOTS_EXT.get_sub_paths_from_path(path);
let b_paths = BODY_EXT.get_sub_paths_from_path(path);
2021-01-16 08:07:38 -06:00
let expected = vec!["homepage/", "homepage/assets"];
assert_eq!(r_paths.len(), expected.len());
assert_eq!(b_paths.len(), expected.len());
for expected_path in expected {
2021-06-17 16:13:18 -05:00
assert!(r_paths.contains(&expected_path.to_string()));
assert!(b_paths.contains(&expected_path.to_string()));
2021-01-16 08:07:38 -06:00
}
}
#[test]
/// extract sub paths from the given url fragment; expect 1 sub path, no forward slashes are
/// included
fn extractor_get_sub_paths_from_path_with_only_a_word() {
let path = "homepage";
2021-07-30 16:14:25 -05:00
let r_paths = ROBOTS_EXT.get_sub_paths_from_path(path);
let b_paths = BODY_EXT.get_sub_paths_from_path(path);
2021-01-16 08:07:38 -06:00
let expected = vec!["homepage"];
assert_eq!(r_paths.len(), expected.len());
assert_eq!(b_paths.len(), expected.len());
for expected_path in expected {
2021-06-17 16:13:18 -05:00
assert!(r_paths.contains(&expected_path.to_string()));
assert!(b_paths.contains(&expected_path.to_string()));
2021-01-16 08:07:38 -06:00
}
}
#[test]
/// extract sub paths from the given url fragment; expect 1 sub path, forward slash removed
fn extractor_get_sub_paths_from_path_with_an_absolute_word() {
let path = "/homepage";
2021-07-30 16:14:25 -05:00
let r_paths = ROBOTS_EXT.get_sub_paths_from_path(path);
let b_paths = BODY_EXT.get_sub_paths_from_path(path);
2021-01-16 08:07:38 -06:00
let expected = vec!["homepage"];
assert_eq!(r_paths.len(), expected.len());
assert_eq!(b_paths.len(), expected.len());
for expected_path in expected {
2021-06-17 16:13:18 -05:00
assert!(r_paths.contains(&expected_path.to_string()));
assert!(b_paths.contains(&expected_path.to_string()));
2021-01-16 08:07:38 -06:00
}
}
2021-01-16 08:07:38 -06:00
#[test]
/// test that an ExtractorBuilder without a FeroxResponse and without a URL bails
fn extractor_builder_bails_when_neither_required_field_is_set() {
let handles = Arc::new(Handles::for_testing(None, None).0);
2021-01-16 08:07:38 -06:00
2021-02-01 15:09:30 -06:00
let extractor = ExtractorBuilder::default()
.url("")
.target(ExtractionTarget::RobotsTxt)
.handles(handles)
2021-01-16 08:07:38 -06:00
.build();
assert!(extractor.is_err());
}
#[test]
/// Extractor with a non-base url bails
fn extractor_with_non_base_url_bails() -> Result<()> {
let mut links = HashSet::<String>::new();
let link = "admin";
let handles = Arc::new(Handles::for_testing(None, None).0);
2021-02-01 15:09:30 -06:00
let extractor = ExtractorBuilder::default()
.url("\\\\\\")
.handles(handles)
2021-02-01 15:09:30 -06:00
.target(ExtractionTarget::RobotsTxt)
.build()?;
let result = extractor.add_link_to_set_of_links(link, &mut links);
assert!(result.is_err());
Ok(())
}
2021-01-16 08:07:38 -06:00
#[test]
/// test that a full url and fragment are joined correctly, then added to the given list
/// i.e. the happy path
fn extractor_add_link_to_set_of_links_happy_path() {
let mut r_links = HashSet::<String>::new();
let r_link = "admin";
let mut b_links = HashSet::<String>::new();
let b_link = "shmadmin";
assert_eq!(r_links.len(), 0);
ROBOTS_EXT
.add_link_to_set_of_links(r_link, &mut r_links)
.unwrap();
assert_eq!(r_links.len(), 1);
assert!(r_links.contains("http://localhost/admin"));
2021-01-16 08:07:38 -06:00
assert_eq!(b_links.len(), 0);
BODY_EXT
.add_link_to_set_of_links(b_link, &mut b_links)
.unwrap();
assert_eq!(b_links.len(), 1);
assert!(b_links.contains("http://localhost/shmadmin"));
2021-01-16 08:07:38 -06:00
}
#[test]
/// test that an invalid path fragment doesn't add anything to the set of links
fn extractor_add_link_to_set_of_links_with_non_base_url() {
let mut links = HashSet::<String>::new();
let link = "\\\\\\\\";
assert_eq!(links.len(), 0);
assert!(ROBOTS_EXT
.add_link_to_set_of_links(link, &mut links)
.is_err());
assert!(BODY_EXT.add_link_to_set_of_links(link, &mut links).is_err());
assert_eq!(links.len(), 0);
assert!(links.is_empty());
}
#[test]
/// test for filtering queries and fragments
fn normalize_url_path_filters_queries_and_fragments() {
let handles = Arc::new(Handles::for_testing(None, None).0);
let extractor = ExtractorBuilder::default()
.url("doesnt matter")
.target(ExtractionTarget::RobotsTxt)
.handles(handles)
.build()
.unwrap();
let test_strings = [
"over/there?name=ferret#nose",
"over/there?name=ferret",
"over/there#nose",
"over/there",
"over/there?name#nose",
"over/there?name",
" over/there?name=ferret#nose ",
"over/there?name=ferret ",
" over/there#nose",
];
test_strings.iter().for_each(|&ts| {
let normed = extractor.normalize_url_path(ts);
assert_eq!(normed, "over/there");
});
}
2021-01-16 08:07:38 -06:00
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
/// use make_request to generate a Response, and use the Response to test get_links;
/// the response will contain an absolute path to a domain that is not part of the scanned
/// domain; expect an empty set returned
async fn extractor_get_links_with_absolute_url_that_differs_from_target_domain() -> Result<()> {
2021-01-17 13:46:54 -06:00
let (tx_stats, _): FeroxChannel<Command> = mpsc::unbounded_channel();
2021-01-16 08:07:38 -06:00
let srv = MockServer::start();
let mock = srv.mock(|when, then| {
when.method(GET).path("/some-path");
then.status(200).body(
"\"http://definitely.not.a.thing.probably.com/homepage/assets/img/icons/handshake.svg\"",
2021-01-16 08:07:38 -06:00
);
});
let client = Client::new();
let url = Url::parse(&srv.url("/some-path")).unwrap();
let config = Configuration::new().unwrap();
let response = make_request(
&client,
&url,
2021-12-23 18:10:02 +03:00
DEFAULT_METHOD,
2021-12-30 19:18:51 +03:00
None,
OutputLevel::Default,
&config,
tx_stats.clone(),
)
.await
.unwrap();
let (handles, _rx) = Handles::for_testing(None, None);
2021-01-16 08:07:38 -06:00
let handles = Arc::new(handles);
let ferox_response =
FeroxResponse::from(response, &srv.url(""), DEFAULT_METHOD, OutputLevel::Default).await;
2021-01-16 08:07:38 -06:00
2021-01-16 11:50:23 -06:00
let extractor = Extractor {
links_regex: Regex::new(LINKFINDER_REGEX).unwrap(),
robots_regex: Regex::new(ROBOTS_TXT_REGEX).unwrap(),
url_regex: Regex::new(URL_CHARS_REGEX).unwrap(),
2021-01-16 11:50:23 -06:00
response: Some(&ferox_response),
url: String::new(),
target: ExtractionTarget::ResponseBody,
handles: handles.clone(),
2021-01-16 11:50:23 -06:00
};
2021-01-16 08:07:38 -06:00
let links = extractor.extract_from_body().await?;
2021-01-16 08:07:38 -06:00
assert!(links.is_empty());
assert_eq!(mock.hits(), 1);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
/// test that /robots.txt is correctly requested given a base url (happy path)
async fn request_robots_txt_without_proxy() -> Result<()> {
let handles = Arc::new(Handles::for_testing(None, None).0);
2021-01-16 08:07:38 -06:00
let srv = MockServer::start();
let mock = srv.mock(|when, then| {
when.method(GET).path("/robots.txt");
then.status(200).body("this is a test");
});
2021-01-16 11:50:23 -06:00
let extractor = Extractor {
links_regex: Regex::new(LINKFINDER_REGEX).unwrap(),
robots_regex: Regex::new(ROBOTS_TXT_REGEX).unwrap(),
url_regex: Regex::new(URL_CHARS_REGEX).unwrap(),
2021-01-16 11:50:23 -06:00
response: None,
url: srv.url("/api/users/stuff/things"),
target: ExtractionTarget::RobotsTxt,
handles,
2021-01-16 11:50:23 -06:00
};
2021-01-16 08:07:38 -06:00
let resp = extractor.make_extract_request("/robots.txt").await?;
2021-01-16 08:07:38 -06:00
assert!(matches!(resp.status(), &StatusCode::OK));
2023-02-15 19:39:27 -06:00
println!("{resp}");
2021-01-16 08:07:38 -06:00
assert_eq!(resp.content_length(), 14);
assert_eq!(mock.hits(), 1);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
/// test that /robots.txt is correctly requested given a base url (happy path) when a proxy is used
async fn request_robots_txt_with_proxy() -> Result<()> {
let handles = Arc::new(Handles::for_testing(None, None).0);
2021-01-28 15:02:37 -06:00
let mut config = Configuration::new()?;
2021-01-16 08:07:38 -06:00
let srv = MockServer::start();
let mock = srv.mock(|when, then| {
when.method(GET).path("/robots.txt");
then.status(200).body("this is also a test");
});
// note: the proxy doesn't actually do anything other than hit a different code branch
// in this unit test; it would however have an effect on an integration test
config.proxy = srv.url("/ima-proxy");
config.no_recursion = true;
2021-01-16 08:07:38 -06:00
2021-02-01 15:09:30 -06:00
let extractor = ExtractorBuilder::default()
.url(&srv.url("/api/different/path"))
.target(ExtractionTarget::RobotsTxt)
.handles(handles)
2021-01-16 08:07:38 -06:00
.build()?;
let resp = extractor.make_extract_request("/robots.txt").await?;
2021-01-16 08:07:38 -06:00
assert!(matches!(resp.status(), &StatusCode::OK));
assert_eq!(resp.content_length(), 19);
assert_eq!(mock.hits(), 1);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2021-01-24 07:37:38 -06:00
/// request_link's happy path, expect back a FeroxResponse
async fn request_link_happy_path() -> Result<()> {
2021-01-16 08:07:38 -06:00
let srv = MockServer::start();
let mock = srv.mock(|when, then| {
when.method(GET).path("/login.php");
then.status(200).body("this is a test");
});
let r_resp = request_link(&srv.url("/login.php"), ROBOTS_EXT.handles.clone()).await?;
let b_resp = request_link(&srv.url("/login.php"), BODY_EXT.handles.clone()).await?;
2021-01-16 08:07:38 -06:00
assert!(matches!(r_resp.status(), StatusCode::OK));
assert!(matches!(b_resp.status(), StatusCode::OK));
assert_eq!(r_resp.content_length().unwrap(), 14);
assert_eq!(b_resp.content_length().unwrap(), 14);
2021-01-16 08:07:38 -06:00
assert_eq!(mock.hits(), 2);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2021-01-24 07:37:38 -06:00
/// request_link should bail in the event that the url is already in scanned_urls
async fn request_link_bails_on_seen_url() -> Result<()> {
2021-01-16 08:07:38 -06:00
let url = "/unique-for-this-test.php";
let srv = MockServer::start();
let served = srv.url(url);
let mock = srv.mock(|when, then| {
when.method(GET).path(url);
then.status(200)
.body("this is a unique test, don't reuse the endpoint");
});
2021-01-24 07:37:38 -06:00
let scans = Arc::new(FeroxScans::default());
scans.add_file_scan(
&served,
ScanOrder::Latest,
Arc::new(Handles::for_testing(None, None).0),
);
2021-01-24 07:37:38 -06:00
let robots = setup_extractor(ExtractionTarget::RobotsTxt, scans.clone());
let body = setup_extractor(ExtractionTarget::ResponseBody, scans);
2021-01-16 08:07:38 -06:00
let r_resp = request_link(&served, robots.handles.clone()).await;
let b_resp = request_link(&served, body.handles.clone()).await;
2021-01-16 08:07:38 -06:00
assert!(r_resp.is_err());
assert!(b_resp.is_err());
assert_eq!(mock.hits(), 0); // function exits before requests can happen
Ok(())
}