moved functions related to scan management into their own module

This commit is contained in:
epi
2020-11-12 15:00:49 -06:00
parent 171238b71d
commit b00a47e5e5
5 changed files with 331 additions and 349 deletions

View File

@@ -8,22 +8,13 @@ pub mod logger;
pub mod parser;
pub mod progress;
pub mod reporter;
pub mod scan_manager;
pub mod scanner;
pub mod utils;
use indicatif::ProgressBar;
use reqwest::{
header::HeaderMap,
{Response, StatusCode, Url},
};
use std::{
cmp::PartialEq,
error, fmt,
sync::{Arc, Mutex},
};
use reqwest::{header::HeaderMap, Response, StatusCode, Url};
use std::{error, fmt};
use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender};
use tokio::task::JoinHandle;
use uuid::Uuid;
/// Generic Result type to ease error handling in async contexts
pub type FeroxResult<T> = std::result::Result<T, Box<dyn error::Error + Send + Sync + 'static>>;
@@ -203,180 +194,6 @@ impl FeroxResponse {
}
}
/// Struct to hold scan-related state
///
/// The purpose of this container is to open up the pathway to aborting currently running tasks and
/// serialization of all scan state into a state file in order to resume scans that were cut short
#[derive(Debug)]
struct FeroxScan {
/// UUID that uniquely ID's the scan
pub id: String,
/// The URL that to be scanned
pub url: String,
/// Whether or not this scan has completed
pub complete: bool,
/// The spawned tokio task performing this scan
pub task: Option<JoinHandle<()>>,
/// The progress bar associated with this scan
pub progress_bar: Option<ProgressBar>,
}
/// Implementation of FeroxScan
impl FeroxScan {
/// Stop a currently running scan
pub fn abort(&self) {
if let Some(_task) = &self.task {
// task.abort(); todo uncomment once upgraded to tokio 0.3
}
self.stop_progress_bar();
}
/// Create a default FeroxScan, populates ID with a new UUID
fn default() -> Self {
let new_id = Uuid::new_v4().to_simple().to_string();
FeroxScan {
id: new_id,
complete: false,
url: String::new(),
task: None,
progress_bar: None,
}
}
/// Simple helper to call .finish on the scan's progress bar
fn stop_progress_bar(&self) {
if let Some(pb) = &self.progress_bar {
pb.finish();
}
}
/// Given a URL and ProgressBar, create a new FeroxScan, wrap it in an Arc and return it
pub fn new(url: &str, pb: ProgressBar) -> Arc<Mutex<Self>> {
let mut me = Self::default();
me.url = utils::normalize_url(url);
me.progress_bar = Some(pb);
Arc::new(Mutex::new(me))
}
/// Mark the scan as complete and stop the scan's progress bar
pub fn finish(&mut self) {
self.complete = true;
self.stop_progress_bar();
}
}
// /// Eq implementation
// impl Eq for FeroxScan {}
/// PartialEq implementation; uses FeroxScan.id for comparison
impl PartialEq for FeroxScan {
fn eq(&self, other: &Self) -> bool {
self.id == other.id
}
}
// /// Hash implementation; uses uses FeroxScan.id and uses FeroxScan.url for hashing
// impl Hash for FeroxScan {
// /// Do the hashing with the hasher
// fn hash<H: Hasher>(&self, state: &mut H) {
// self.id.hash(state);
// self.url.hash(state);
// }
// }
/// Container around a locked hashset of `FeroxScan`s, adds wrappers for insertion and searching
#[derive(Debug, Default)]
struct FeroxScans {
scans: Mutex<Vec<Arc<Mutex<FeroxScan>>>>,
}
/// Implementation of `FeroxScans`
impl FeroxScans {
/// Add a `FeroxScan` to the internal container
///
/// If the internal container did NOT contain the scan, true is returned; else false
pub fn insert(&mut self, scan: Arc<Mutex<FeroxScan>>) -> bool {
let sentry = match scan.lock() {
Ok(locked_scan) => {
// If the container did contain the scan, set sentry to false
// If the container did not contain the scan, set sentry to true
!self.contains(&locked_scan.url)
}
Err(e) => {
// poisoned lock
log::error!("FeroxScan's ({:?}) mutex is poisoned: {}", self, e);
false
}
};
if sentry {
// can't update the internal container while the scan itself is locked, so first
// lock the scan and check the container for the scan's presence, then add if
// not found
match self.scans.lock() {
Ok(mut scans) => {
scans.push(scan);
}
Err(e) => {
log::error!("FeroxScans' container's mutex is poisoned: {}", e);
return false;
}
}
}
sentry
}
/// Simple check for whether or not a FeroxScan is contained within the inner container based
/// on the given URL
pub fn contains(&self, url: &str) -> bool {
let normalized_url = utils::normalize_url(url);
match self.scans.lock() {
Ok(scans) => {
for scan in scans.iter() {
if let Ok(locked_scan) = scan.lock() {
if locked_scan.url == normalized_url {
return true;
}
}
}
}
Err(e) => {
log::error!("FeroxScans' container's mutex is poisoned: {}", e);
}
}
false
}
/// Find and return a `FeroxScan` based on the given URL
pub fn get_scan_by_url(&self, url: &str) -> Option<Arc<Mutex<FeroxScan>>> {
let normalized_url = utils::normalize_url(url);
match self.scans.lock() {
Ok(scans) => {
for scan in scans.iter() {
if let Ok(locked_scan) = scan.lock() {
if locked_scan.url == normalized_url {
return Some(scan.clone());
}
}
}
}
Err(e) => {
log::error!("FeroxScans' container's mutex is poisoned: {}", e);
}
}
None
}
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -3,7 +3,8 @@ use feroxbuster::{
banner,
config::{CONFIGURATION, PROGRESS_BAR, PROGRESS_PRINTER},
heuristics, logger, reporter,
scanner::{self, scan_url, PAUSE_SCAN},
scanner::{self, scan_url},
scan_manager::PAUSE_SCAN,
utils::{ferox_print, get_current_depth, module_colorizer, status_colorizer},
FeroxError, FeroxResponse, FeroxResult, SLEEP_DURATION, VERSION,
};

312
src/scan_manager.rs Normal file
View File

@@ -0,0 +1,312 @@
use crate::{config::PROGRESS_PRINTER, progress, SLEEP_DURATION, scanner::NUMBER_OF_REQUESTS};
use indicatif::ProgressBar;
use std::{
cmp::PartialEq,
sync::{Arc, Mutex},
};
use std::{
io::{stderr, Write},
sync::atomic::{AtomicBool, AtomicUsize, Ordering},
};
use tokio::{task::JoinHandle, time};
use uuid::Uuid;
/// Single atomic number that gets incremented once, used to track first thread to interact with
/// when pausing a scan
static INTERACTIVE_BARRIER: AtomicUsize = AtomicUsize::new(0);
/// Atomic boolean flag, used to determine whether or not a scan should pause or resume
pub static PAUSE_SCAN: AtomicBool = AtomicBool::new(false);
/// Struct to hold scan-related state
///
/// The purpose of this container is to open up the pathway to aborting currently running tasks and
/// serialization of all scan state into a state file in order to resume scans that were cut short
#[derive(Debug)]
pub struct FeroxScan {
/// UUID that uniquely ID's the scan
pub id: String,
/// The URL that to be scanned
pub url: String,
/// Whether or not this scan has completed
pub complete: bool,
/// The spawned tokio task performing this scan
pub task: Option<JoinHandle<()>>,
/// The progress bar associated with this scan
pub progress_bar: Option<ProgressBar>,
}
/// Implementation of FeroxScan
impl FeroxScan {
/// Stop a currently running scan
pub fn abort(&self) {
if let Some(_task) = &self.task {
// task.abort(); todo uncomment once upgraded to tokio 0.3
}
self.stop_progress_bar();
}
/// Create a default FeroxScan, populates ID with a new UUID
fn default() -> Self {
let new_id = Uuid::new_v4().to_simple().to_string();
FeroxScan {
id: new_id,
complete: false,
url: String::new(),
task: None,
progress_bar: None,
}
}
/// Simple helper to call .finish on the scan's progress bar
fn stop_progress_bar(&self) {
if let Some(pb) = &self.progress_bar {
pb.finish();
}
}
/// Given a URL and ProgressBar, create a new FeroxScan, wrap it in an Arc and return it
pub fn new(url: &str, pb: ProgressBar) -> Arc<Mutex<Self>> {
let mut me = Self::default();
me.url = url.to_string();
me.progress_bar = Some(pb);
Arc::new(Mutex::new(me))
}
/// Mark the scan as complete and stop the scan's progress bar
pub fn finish(&mut self) {
self.complete = true;
self.stop_progress_bar();
}
}
// /// Eq implementation
// impl Eq for FeroxScan {}
/// PartialEq implementation; uses FeroxScan.id for comparison
impl PartialEq for FeroxScan {
fn eq(&self, other: &Self) -> bool {
self.id == other.id
}
}
/// Container around a locked hashset of `FeroxScan`s, adds wrappers for insertion and searching
#[derive(Debug, Default)]
pub struct FeroxScans {
/// Internal structure: locked hashset of `FeroxScan`s
pub scans: Mutex<Vec<Arc<Mutex<FeroxScan>>>>,
}
/// Implementation of `FeroxScans`
impl FeroxScans {
/// Add a `FeroxScan` to the internal container
///
/// If the internal container did NOT contain the scan, true is returned; else false
pub fn insert(&self, scan: Arc<Mutex<FeroxScan>>) -> bool {
let sentry = match scan.lock() {
Ok(locked_scan) => {
// If the container did contain the scan, set sentry to false
// If the container did not contain the scan, set sentry to true
!self.contains(&locked_scan.url)
}
Err(e) => {
// poisoned lock
log::error!("FeroxScan's ({:?}) mutex is poisoned: {}", self, e);
false
}
};
if sentry {
// can't update the internal container while the scan itself is locked, so first
// lock the scan and check the container for the scan's presence, then add if
// not found
match self.scans.lock() {
Ok(mut scans) => {
scans.push(scan);
}
Err(e) => {
log::error!("FeroxScans' container's mutex is poisoned: {}", e);
return false;
}
}
}
sentry
}
/// Simple check for whether or not a FeroxScan is contained within the inner container based
/// on the given URL
pub fn contains(&self, url: &str) -> bool {
match self.scans.lock() {
Ok(scans) => {
for scan in scans.iter() {
if let Ok(locked_scan) = scan.lock() {
if locked_scan.url == url {
return true;
}
}
}
}
Err(e) => {
log::error!("FeroxScans' container's mutex is poisoned: {}", e);
}
}
false
}
/// Find and return a `FeroxScan` based on the given URL
pub fn get_scan_by_url(&self, url: &str) -> Option<Arc<Mutex<FeroxScan>>> {
match self.scans.lock() {
Ok(scans) => {
for scan in scans.iter() {
if let Ok(locked_scan) = scan.lock() {
if locked_scan.url == url {
return Some(scan.clone());
}
}
}
}
Err(e) => {
log::error!("FeroxScans' container's mutex is poisoned: {}", e);
}
}
None
}
/// Forced the calling thread into a busy loop
///
/// Every `SLEEP_DURATION` milliseconds, the function examines the result stored in `PAUSE_SCAN`
///
/// When the value stored in `PAUSE_SCAN` becomes `false`, the function returns, exiting the busy
/// loop
pub async fn pause(&self) {
log::trace!("enter: pause_scan");
// function uses tokio::time, not std
// local testing showed a pretty slow increase (less than linear) in CPU usage as # of
// concurrent scans rose when SLEEP_DURATION was set to 500, using that as the default for now
let mut interval = time::interval(time::Duration::from_millis(SLEEP_DURATION));
// ignore any error returned
let _ = stderr().flush();
if INTERACTIVE_BARRIER.load(Ordering::Relaxed) == 0 {
INTERACTIVE_BARRIER.fetch_add(1, Ordering::Relaxed);
PROGRESS_PRINTER.println(format!("Here's your shit: {:?}", self.scans));
let mut s = String::new();
std::io::stdin().read_line(&mut s).unwrap();
PROGRESS_PRINTER.println(format!("Here's your shit: {}", s));
}
loop {
// first tick happens immediately, all others wait the specified duration
interval.tick().await;
if !PAUSE_SCAN.load(Ordering::Acquire) {
// PAUSE_SCAN is false, so we can exit the busy loop
let _ = stderr().flush();
if INTERACTIVE_BARRIER.load(Ordering::Relaxed) == 1 {
INTERACTIVE_BARRIER.fetch_sub(1, Ordering::Relaxed);
}
log::trace!("exit: pause_scan");
return;
}
}
}
/// Given a url, create a new `FeroxScan` and add it to `FeroxScans`
///
/// If `FeroxScans` did not already contain the scan, return true; otherwise return false
///
/// Also return a reference to the new `FeroxScan`
pub fn add_scan(&self, url: &str) -> (bool, Arc<Mutex<FeroxScan>>) {
let progress_bar =
progress::add_bar(&url, NUMBER_OF_REQUESTS.load(Ordering::Relaxed), false);
progress_bar.reset_elapsed();
let ferox_scan = FeroxScan::new(&url, progress_bar);
// If the set did not contain the scan, true is returned.
// If the set did contain the scan, false is returned.
let response = self.insert(ferox_scan.clone());
(response, ferox_scan)
}
}
#[cfg(test)]
mod tests {
use super::*;
// todo add_url_* and pause_scan tests need to be redone
#[tokio::test(core_threads = 1)]
/// tests that pause_scan pauses execution and releases execution when PAUSE_SCAN is toggled
/// the spinner used during the test has had .finish_and_clear called on it, meaning that
/// a new one will be created, taking the if branch within the function
async fn scanner_pause_scan_with_finished_spinner() {
let now = time::Instant::now();
PAUSE_SCAN.store(true, Ordering::Relaxed);
// BARRIER.write().unwrap().finish_and_clear();
let expected = time::Duration::from_secs(2);
tokio::spawn(async move {
time::delay_for(expected).await;
PAUSE_SCAN.store(false, Ordering::Relaxed);
});
pause_scan().await;
assert!(now.elapsed() > expected);
}
#[test]
/// add an unknown url to the hashset, expect true
fn add_url_to_list_of_scanned_urls_with_unknown_url() {
let urls = FeroxScans::default();
let url = "http://unknown_url";
let (result, _scan) = add_url_to_list_of_scanned_urls(url, &urls);
assert_eq!(result, true);
}
#[test]
/// add a known url to the hashset, with a trailing slash, expect false
fn add_url_to_list_of_scanned_urls_with_known_url() {
let urls = FeroxScans::default();
let pb = ProgressBar::new(1);
let url = "http://unknown_url/";
let mut scan = FeroxScan::new(url, pb);
assert_eq!(urls.insert(scan), true);
let (result, _scan) = add_url_to_list_of_scanned_urls(url, &urls);
assert_eq!(result, false);
}
#[test]
/// add a known url to the hashset, without a trailing slash, expect false
fn add_url_to_list_of_scanned_urls_with_known_url_without_slash() {
let urls = FeroxScans::default();
let pb = ProgressBar::new(1);
let url = "http://unknown_url";
let mut scan = FeroxScan::new(url, pb);
assert_eq!(urls.insert(scan), true);
let (result, _scan) = add_url_to_list_of_scanned_urls(url, &urls);
assert_eq!(result, false);
}
}

View File

@@ -1,10 +1,11 @@
use crate::{
config::{CONFIGURATION, PROGRESS_PRINTER},
config::CONFIGURATION,
extractor::get_links,
filters::{FeroxFilter, StatusCodeFilter, WildcardFilter},
heuristics, progress,
heuristics,
scan_manager::{FeroxScans, PAUSE_SCAN},
utils::{format_url, get_current_depth, make_request},
FeroxChannel, FeroxResponse, FeroxScan, FeroxScans, SLEEP_DURATION,
FeroxChannel, FeroxResponse,
};
use futures::{
future::{BoxFuture, FutureExt},
@@ -12,14 +13,12 @@ use futures::{
};
use lazy_static::lazy_static;
use reqwest::Url;
use std::sync::atomic::AtomicU64;
use std::{
collections::HashSet,
convert::TryInto,
io::{stderr, Write},
ops::Deref,
sync::atomic::{AtomicBool, AtomicUsize, Ordering},
sync::{Arc, Mutex, RwLock},
sync::atomic::{AtomicU64, AtomicUsize, Ordering},
sync::{Arc, RwLock},
};
use tokio::{
sync::{
@@ -27,26 +26,19 @@ use tokio::{
Semaphore,
},
task::JoinHandle,
time,
};
/// Single atomic number that gets incremented once, used to track first scan vs. all others
static CALL_COUNT: AtomicUsize = AtomicUsize::new(0);
/// Single atomic number that gets holds the number of requests to be sent per directory scanned
static NUMBER_OF_REQUESTS: AtomicU64 = AtomicU64::new(0);
/// Single atomic number that gets incremented once, used to track first thread to interact with
/// when pausing a scan
static INTERACTIVE_BARRIER: AtomicUsize = AtomicUsize::new(0);
/// Atomic boolean flag, used to determine whether or not a scan should pause or resume
pub static PAUSE_SCAN: AtomicBool = AtomicBool::new(false);
pub static NUMBER_OF_REQUESTS: AtomicU64 = AtomicU64::new(0);
lazy_static! {
/// Set of urls that have been sent to [scan_url](fn.scan_url.html), used for deduplication
static ref SCANNED_URLS: FeroxScans = FeroxScans::default();
// todo remove if not needed
// /// A clock spinner protected with a RwLock to allow for a single thread to use at a time
// static ref BARRIER: Arc<RwLock<bool>> = Arc::new(RwLock::new(true));
@@ -57,77 +49,6 @@ lazy_static! {
static ref SCAN_LIMITER: Semaphore = Semaphore::new(CONFIGURATION.scan_limit);
}
/// Forced the calling thread into a busy loop
///
/// Every `SLEEP_DURATION` milliseconds, the function examines the result stored in `PAUSE_SCAN`
///
/// When the value stored in `PAUSE_SCAN` becomes `false`, the function returns, exiting the busy
/// loop
async fn pause_scan() {
log::trace!("enter: pause_scan");
// function uses tokio::time, not std
// local testing showed a pretty slow increase (less than linear) in CPU usage as # of
// concurrent scans rose when SLEEP_DURATION was set to 500, using that as the default for now
let mut interval = time::interval(time::Duration::from_millis(SLEEP_DURATION));
// ignore any error returned
let _ = stderr().flush();
if INTERACTIVE_BARRIER.load(Ordering::Relaxed) == 0 {
INTERACTIVE_BARRIER.fetch_add(1, Ordering::Relaxed);
PROGRESS_PRINTER.println(format!("Here's your shit: {:?}", SCANNED_URLS.scans));
}
loop {
// first tick happens immediately, all others wait the specified duration
interval.tick().await;
if !PAUSE_SCAN.load(Ordering::Acquire) {
// PAUSE_SCAN is false, so we can exit the busy loop
let _ = stderr().flush();
if INTERACTIVE_BARRIER.load(Ordering::Relaxed) == 1 {
INTERACTIVE_BARRIER.fetch_sub(1, Ordering::Relaxed);
}
log::trace!("exit: pause_scan");
return;
}
}
}
/// Given a url, create a new `FeroxScan` and add it to `FeroxScans`
///
/// If `FeroxScans` did not already contain the scan, return true; otherwise return false
///
/// Also return a reference to the new `FeroxScan`
fn add_url_to_list_of_scanned_urls(
url: &str,
scanned_urls: &mut FeroxScans,
) -> (bool, Arc<Mutex<FeroxScan>>) {
log::trace!(
"enter: add_url_to_list_of_scanned_urls({}, {:?})",
url,
scanned_urls
);
let progress_bar = progress::add_bar(&url, NUMBER_OF_REQUESTS.load(Ordering::Relaxed), false);
progress_bar.reset_elapsed();
let ferox_scan = FeroxScan::new(&url, progress_bar);
// If the set did not contain the scan, true is returned.
// If the set did contain the scan, false is returned.
let response = scanned_urls.insert(ferox_scan.clone());
log::trace!(
"exit: add_url_to_list_of_scanned_urls -> ({}, {:?})",
response,
ferox_scan
);
(response, ferox_scan)
}
/// Adds the given FeroxFilter to the given list of FeroxFilter implementors
///
/// If the given list did not already contain the filter, return true; otherwise return false
@@ -187,7 +108,7 @@ fn spawn_recursion_handler(
let mut scans = vec![];
while let Some(resp) = recursion_channel.recv().await {
let (unknown, _ferox_scan) = add_url_to_list_of_scanned_urls(&resp, &SCANNED_URLS);
let (unknown, _ferox_scan) = SCANNED_URLS.add_scan(&resp);
if !unknown {
// not unknown, i.e. we've seen the url before and don't need to scan again
@@ -472,7 +393,7 @@ async fn make_requests(
let new_links = get_links(&ferox_response).await;
for new_link in new_links {
let (unknown, _) = add_url_to_list_of_scanned_urls(&new_link, &SCANNED_URLS);
let (unknown, _) = SCANNED_URLS.add_scan(&new_link);
if !unknown {
// not unknown, i.e. we've seen the url before and don't need to scan again
@@ -589,7 +510,7 @@ pub async fn scan_url(
// this protection allows us to add the first scanned url to SCANNED_URLS
// from within the scan_url function instead of the recursion handler
add_url_to_list_of_scanned_urls(&target_url, &SCANNED_URLS);
SCANNED_URLS.add_scan(&target_url);
}
let ferox_scan = SCANNED_URLS.get_scan_by_url(&target_url);
@@ -662,7 +583,7 @@ pub async fn scan_url(
// for every word in the wordlist, check to see if PAUSE_SCAN is set to true
// when true; enter a busy loop that only exits by setting PAUSE_SCAN back
// to false
pause_scan().await;
SCANNED_URLS.pause().await;
}
make_requests(&tgt, &word, base_depth, txd, txr).await
}),
@@ -751,6 +672,7 @@ pub fn initialize(
#[cfg(test)]
mod tests {
use super::*;
use indicatif::ProgressBar;
#[test]
/// sending url + word without any extensions should get back one url with the joined word
@@ -845,68 +767,4 @@ mod tests {
let result = reached_max_depth(&url, 0, 2);
assert!(result);
}
#[test]
/// add an unknown url to the hashset, expect true
fn add_url_to_list_of_scanned_urls_with_unknown_url() {
let urls = RwLock::new(HashSet::<String>::new());
let url = "http://unknown_url";
assert_eq!(add_url_to_list_of_scanned_urls(url, &urls), true);
}
#[test]
/// add a known url to the hashset, with a trailing slash, expect false
fn add_url_to_list_of_scanned_urls_with_known_url() {
let urls = RwLock::new(HashSet::<String>::new());
let url = "http://unknown_url/";
assert_eq!(urls.write().unwrap().insert(url.to_string()), true);
assert_eq!(add_url_to_list_of_scanned_urls(url, &urls), false);
}
#[test]
/// add a known url to the hashset, without a trailing slash, expect false
fn add_url_to_list_of_scanned_urls_with_known_url_without_slash() {
let urls = RwLock::new(HashSet::<String>::new());
let url = "http://unknown_url";
assert_eq!(
urls.write()
.unwrap()
.insert("http://unknown_url".to_string()),
true
);
assert_eq!(add_url_to_list_of_scanned_urls(url, &urls), false);
}
#[test]
/// test that get_single_spinner returns the correct spinner
fn scanner_get_single_spinner_returns_spinner() {
let spinner = get_single_spinner();
assert!(!spinner.is_finished());
}
#[tokio::test(core_threads = 1)]
/// tests that pause_scan pauses execution and releases execution when PAUSE_SCAN is toggled
/// the spinner used during the test has had .finish_and_clear called on it, meaning that
/// a new one will be created, taking the if branch within the function
async fn scanner_pause_scan_with_finished_spinner() {
let now = time::Instant::now();
PAUSE_SCAN.store(true, Ordering::Relaxed);
// BARRIER.write().unwrap().finish_and_clear();
let expected = time::Duration::from_secs(2);
tokio::spawn(async move {
time::delay_for(expected).await;
PAUSE_SCAN.store(false, Ordering::Relaxed);
});
pause_scan().await;
assert!(now.elapsed() > expected);
}
}

View File

@@ -21,13 +21,7 @@ use std::convert::TryInto;
pub fn get_current_depth(target: &str) -> usize {
log::trace!("enter: get_current_depth({})", target);
let target = if !target.ends_with('/') {
// target url doesn't end with a /, for the purposes of determining depth, we'll normalize
// all urls to end in a / and then calculate accordingly
format!("{}/", target)
} else {
String::from(target)
};
let target = normalize_url(target);
match Url::parse(&target) {
Ok(url) => {