added integration test for robots.txt extraction

This commit is contained in:
epi
2020-12-19 09:20:06 -06:00
parent f7ef202849
commit 086c9808a3
3 changed files with 77 additions and 11 deletions

View File

@@ -7,7 +7,7 @@ use feroxbuster::{
progress::add_bar,
reporter,
scan_manager::{self, PAUSE_SCAN},
scanner::{self, scan_url, RESPONSES, SCANNED_URLS},
scanner::{self, scan_url, send_report, RESPONSES, SCANNED_URLS},
utils::{ferox_print, get_current_depth, module_colorizer, status_colorizer},
FeroxError, FeroxResponse, FeroxResult, FeroxSerialize, SLEEP_DURATION, VERSION,
};
@@ -157,18 +157,20 @@ async fn scan(
None => continue,
};
let (unknown, _) = if ferox_response.is_file() {
SCANNED_URLS.add_file_scan(&robot_link)
if ferox_response.is_file() {
SCANNED_URLS.add_file_scan(&robot_link);
send_report(tx_term.clone(), ferox_response);
} else {
SCANNED_URLS.add_directory_scan(&robot_link)
};
let (unknown, _) = SCANNED_URLS.add_directory_scan(&robot_link);
if !unknown {
// not unknown, i.e. we've seen the url before and don't need to scan again
continue;
if !unknown {
// known directory; can skip (unlikely)
continue;
}
// unknown directory; add to targets for scanning
targets.push(robot_link);
}
targets.push(robot_link);
}
}
}

View File

@@ -433,7 +433,7 @@ async fn make_requests(
}
/// Simple helper to send a `FeroxResponse` over the tx side of an `mpsc::unbounded_channel`
fn send_report(report_sender: UnboundedSender<FeroxResponse>, response: FeroxResponse) {
pub fn send_report(report_sender: UnboundedSender<FeroxResponse>, response: FeroxResponse) {
log::trace!("enter: send_report({:?}, {}", report_sender, response);
match report_sender.send(response) {

View File

@@ -219,3 +219,67 @@ fn extractor_finds_filtered_content() -> Result<(), Box<dyn std::error::Error>>
teardown_tmp_directory(tmp_dir);
Ok(())
}
#[test]
/// serve a robots.txt with a file and and a folder link contained within it. ferox should
/// find both links and request each one. Additionally, a scan should start with the directory
/// link found, meaning the wordlist will be thrown at the sub directory
fn extractor_finds_robots_txt_links_and_displays_files_or_scans_directories() {
let srv = MockServer::start();
let (tmp_dir, file) = setup_tmp_directory(&["LICENSE".to_string()], "wordlist").unwrap();
let mock = srv.mock(|when, then| {
when.method(GET).path("/LICENSE");
then.status(200).body("im a little teapot"); // 18
});
let mock_two = srv.mock(|when, then| {
when.method(GET).path("/robots.txt");
then.status(200).body(
r#"
User-agent: *
Crawl-delay: 10
# CSS, JS, Images
Allow: /misc/*.css$
Disallow: /misc/stupidfile.php
Disallow: /disallowed-subdir/
"#,
);
});
let mock_file = srv.mock(|when, then| {
when.method(GET).path("/misc/stupidfile.php");
then.status(200).body("im a little teapot too"); // 22
});
let mock_dir = srv.mock(|when, then| {
when.method(GET).path("/disallowed-subdir/LICENSE");
then.status(200).body("i too, am a container for tea"); // 29
});
let cmd = Command::cargo_bin("feroxbuster")
.unwrap()
.arg("--url")
.arg(srv.url("/"))
.arg("--wordlist")
.arg(file.as_os_str())
.arg("--extract-links")
.unwrap();
cmd.assert().success().stdout(
predicate::str::contains("/LICENSE") // 2 directories contain LICENSE
.count(2)
.and(predicate::str::contains("18c"))
.and(predicate::str::contains("/misc/stupidfile.php"))
.and(predicate::str::contains("22c"))
.and(predicate::str::contains("/disallowed-subdir/LICENSE"))
.and(predicate::str::contains("29c"))
.and(predicate::str::contains("200").count(3)),
);
assert_eq!(mock.hits(), 1);
assert_eq!(mock_dir.hits(), 1);
assert_eq!(mock_two.hits(), 1);
assert_eq!(mock_file.hits(), 1);
teardown_tmp_directory(tmp_dir);
}