all todo done; wildcard filter default changed to u64::MAX

This commit is contained in:
epi
2021-01-26 06:55:17 -06:00
parent f6eae256a4
commit d9c99913d3
11 changed files with 270 additions and 339 deletions

View File

@@ -41,14 +41,11 @@ pub enum Command {
/// Send a `FeroxResponse` to the output handler for reporting
Report(Box<FeroxResponse>),
/// Send a url to be scanned (in the context of recursion), use sender to notify main when done
ScanUrl(String, Sender<bool>),
/// Send a group of urls to be scanned (only used for the urls passed in explicitly by the user)
ScanInitialUrls(Vec<String>),
/// Determine whether or not recursion is appropriate, given a FeroxResponse, if so start a scan
TryRecursion(FeroxResponse),
TryRecursion(Box<FeroxResponse>),
/// Send a pointer to the wordlist to the recursion handler
UpdateWordlist(Arc<HashSet<String>>),

View File

@@ -2,7 +2,6 @@ use super::command::Command::UpdateUsizeField;
use super::*;
use crate::utils::get_url_depth;
use crate::{
config::CONFIGURATION,
scan_manager::{FeroxScan, FeroxScans, ScanOrder},
scanner::scan_url,
statistics::StatField::TotalScans,
@@ -55,6 +54,9 @@ pub struct ScanHandler {
/// group of scans that need to be joined
tasks: Vec<Arc<FeroxScan>>,
/// Maximum recursion depth, a depth of 0 is infinite recursion
max_depth: usize,
/// depths associated with the initial targets provided by the user
depths: Vec<(String, usize)>,
}
@@ -62,11 +64,17 @@ pub struct ScanHandler {
/// implementation of event handler for filters
impl ScanHandler {
/// create new event handler
pub fn new(data: Arc<FeroxScans>, handles: Arc<Handles>, receiver: CommandReceiver) -> Self {
pub fn new(
data: Arc<FeroxScans>,
handles: Arc<Handles>,
max_depth: usize,
receiver: CommandReceiver,
) -> Self {
Self {
data,
handles,
receiver,
max_depth,
tasks: Vec::new(),
depths: Vec::new(),
wordlist: std::sync::Mutex::new(None),
@@ -84,13 +92,13 @@ impl ScanHandler {
/// Initialize new `FeroxScans` and the sc side of an mpsc channel that is responsible for
/// updates to the aforementioned object.
pub fn initialize(handles: Arc<Handles>) -> (Joiner, ScanHandle) {
pub fn initialize(handles: Arc<Handles>, max_depth: usize) -> (Joiner, ScanHandle) {
log::trace!("enter: initialize");
let data = Arc::new(FeroxScans::default());
let (tx, rx): FeroxChannel<Command> = mpsc::unbounded_channel();
let mut handler = Self::new(data.clone(), handles, rx);
let mut handler = Self::new(data.clone(), handles, max_depth, rx);
let task = tokio::spawn(async move { handler.start().await });
@@ -109,10 +117,6 @@ impl ScanHandler {
while let Some(command) = self.receiver.recv().await {
match command {
Command::ScanUrl(url, sender) => {
self.ordered_scan_url(vec![url], ScanOrder::Latest).await?;
sender.send(true).expect("oneshot channel failed");
}
Command::ScanInitialUrls(targets) => {
self.ordered_scan_url(targets, ScanOrder::Initial).await?;
}
@@ -134,6 +138,9 @@ impl ScanHandler {
Command::TryRecursion(response) => {
self.try_recursion(response).await?;
}
Command::Sync(sender) => {
sender.send(true).unwrap_or_default();
}
_ => {} // no other commands needed for RecursionHandler
}
}
@@ -155,21 +162,30 @@ impl ScanHandler {
/// wrapper around scanning a url to stay DRY
async fn ordered_scan_url(&mut self, targets: Vec<String>, order: ScanOrder) -> Result<()> {
for target in targets {
let (unknown, scan) = self
.data
.add_directory_scan(&target, self.handles.stats.data.clone());
log::trace!("enter: ordered_scan_url({:?}, {:?})", targets, order);
if !unknown {
// not unknown, i.e. we've seen the url before and don't need to scan again
for target in targets {
if self.data.contains(&target) && matches!(order, ScanOrder::Latest) {
// FeroxScans knows about this url and scan isn't an Initial scan
// initial scans are skipped because when resuming from a .state file, the scans
// will already be populated in FeroxScans, so we need to not skip kicking off
// their scans
continue;
}
let scan = if let Some(ferox_scan) = self.data.get_scan_by_url(&target) {
ferox_scan // scan already known
} else {
self.data.add_directory_scan(&target, order).1 // add the new target; return FeroxScan
};
let list = self.get_wordlist()?;
log::info!("scan handler received {} - beginning scan", target);
if matches!(order, ScanOrder::Initial) {
// keeps track of the initial targets' scan depths in order to enforce the
// maximum recursion depth on any identified sub-directories
self.depths.push((target.clone(), get_url_depth(&target)));
}
@@ -187,13 +203,14 @@ impl ScanHandler {
self.tasks.push(scan.clone());
}
log::trace!("exit: ordered_scan_url");
Ok(())
}
async fn try_recursion(&mut self, response: FeroxResponse) -> Result<()> {
async fn try_recursion(&mut self, response: Box<FeroxResponse>) -> Result<()> {
log::trace!("enter: try_recursion({:?})", response,);
// todo get depth from self.depths
let mut base_depth = 1_usize;
for (base_url, base_url_depth) in &self.depths {
@@ -202,8 +219,7 @@ impl ScanHandler {
}
}
// todo remove CONFIG dependence, maybe in init
if response.reached_max_depth(base_depth, CONFIGURATION.depth) {
if response.reached_max_depth(base_depth, self.max_depth) {
// at or past recursion depth
return Ok(());
}

View File

@@ -1,5 +1,5 @@
use super::*;
use crate::CommandSender;
use crate::event_handlers::Handles;
use anyhow::{bail, Result};
/// Regular expression used in [LinkFinder](https://github.com/GerbenJavado/LinkFinder)
@@ -31,26 +31,11 @@ pub struct ExtractorBuilder<'a> {
/// Response from which to extract links
url: String,
/// Whether or not to try recursion
/// current configuration
config: Option<&'a Configuration>,
/// transmitter to the mpsc that handles statistics gathering
tx_stats: Option<CommandSender>,
/// transmitter to the mpsc that handles recursive scan calls
tx_recursion: Option<CommandSender>,
/// transmitter to the mpsc that handles reporting information to the user
tx_reporter: Option<CommandSender>,
/// list of urls that will be added to when new urls are extracted
scanned_urls: Option<Arc<FeroxScans>>,
/// depth at which the scan was started
depth: Option<usize>,
/// copy of Stats object
stats: Option<Arc<Stats>>,
/// Handles object to house the underlying mpsc transmitters
handles: Option<Arc<Handles>>,
/// type of extraction to be performed
target: ExtractionTarget,
@@ -66,12 +51,7 @@ impl<'a> ExtractorBuilder<'a> {
response: Some(response),
url: "".to_string(),
config: None,
tx_stats: None,
tx_recursion: None,
tx_reporter: None,
scanned_urls: None,
depth: None,
stats: None,
handles: None,
target: ExtractionTarget::ResponseBody,
}
}
@@ -84,12 +64,7 @@ impl<'a> ExtractorBuilder<'a> {
response: None,
url: url.to_string(),
config: None,
tx_stats: None,
tx_recursion: None,
tx_reporter: None,
scanned_urls: None,
depth: None,
stats: None,
handles: None,
target: ExtractionTarget::RobotsTxt,
}
}
@@ -100,41 +75,9 @@ impl<'a> ExtractorBuilder<'a> {
self
}
/// builder call to set `tx_recursion`
pub fn recursion_transmitter(&mut self, tx_recursion: CommandSender) -> &mut Self {
// todo change to scans_transmitter or w/e same on struct; don't bother, going to make extractor take a Handles object later anyway
self.tx_recursion = Some(tx_recursion);
self
}
/// builder call to set `tx_stats`
pub fn stats_transmitter(&mut self, tx_stats: CommandSender) -> &mut Self {
self.tx_stats = Some(tx_stats);
self
}
/// builder call to set `tx_reporter`
pub fn reporter_transmitter(&mut self, tx_reporter: CommandSender) -> &mut Self {
// todo change to outputs or w/e same on struct; don't bother, going to make extractor take a Handles object later anyway
self.tx_reporter = Some(tx_reporter);
self
}
/// builder call to set `scanned_urls`
pub fn scanned_urls(&mut self, scanned_urls: Arc<FeroxScans>) -> &mut Self {
self.scanned_urls = Some(scanned_urls);
self
}
/// builder call to set `stats`
pub fn stats(&mut self, stats: Arc<Stats>) -> &mut Self {
self.stats = Some(stats);
self
}
/// builder call to set `depth`
pub fn depth(&mut self, depth: usize) -> &mut Self {
self.depth = Some(depth);
/// builder call to set `handles`
pub fn handles(&mut self, handles: Arc<Handles>) -> &mut Self {
self.handles = Some(handles);
self
}
@@ -156,12 +99,7 @@ impl<'a> ExtractorBuilder<'a> {
},
url: self.url.to_owned(),
config: self.config.unwrap(),
tx_stats: self.tx_stats.as_ref().unwrap().clone(),
tx_recursion: self.tx_recursion.as_ref().unwrap().clone(),
tx_reporter: self.tx_reporter.as_ref().unwrap().clone(),
scanned_urls: self.scanned_urls.as_ref().unwrap().clone(),
depth: self.depth.unwrap(),
stats: self.stats.as_ref().unwrap().clone(),
handles: self.handles.as_ref().unwrap().clone(),
target: self.target,
})
}

View File

@@ -1,12 +1,12 @@
use super::*;
use crate::event_handlers::Command;
use crate::scan_manager::ScanOrder;
use crate::{
client,
event_handlers::Command::UpdateUsizeField,
event_handlers::{Command::UpdateUsizeField, Handles},
scanner::send_report,
send_command,
statistics::StatField::{LinksExtracted, TotalExpected},
utils::{format_url, make_request},
CommandSender,
};
use anyhow::{bail, Context, Result};
use reqwest::{StatusCode, Url};
@@ -40,23 +40,8 @@ pub struct Extractor<'a> {
/// Whether or not to try recursion
pub(super) config: &'a Configuration,
/// transmitter to the mpsc that handles statistics gathering
pub(super) tx_stats: CommandSender,
/// transmitter to the mpsc that handles recursive scan calls
pub(super) tx_recursion: CommandSender,
/// transmitter to the mpsc that handles reporting information to the user
pub(super) tx_reporter: CommandSender,
/// list of urls that will be added to when new urls are extracted
pub(super) scanned_urls: Arc<FeroxScans>,
/// depth at which the scan was started
pub(super) depth: usize,
/// copy of Stats object
pub(super) stats: Arc<Stats>,
/// Handles object to house the underlying mpsc transmitters
pub(super) handles: Arc<Handles>,
/// type of extraction to be performed
pub(super) target: ExtractionTarget,
@@ -77,6 +62,8 @@ impl<'a> Extractor<'a> {
RecursionStatus::Recursive
};
let scanned_urls = self.handles.ferox_scans()?;
for link in links {
let mut resp = match self.request_link(&link).await {
Ok(resp) => resp,
@@ -84,19 +71,22 @@ impl<'a> Extractor<'a> {
};
// filter if necessary
// if should_filter_response(&resp, self.tx_stats.clone()) {
// continue;
// }
// todo this needs to be reimplemented
if self
.handles
.filters
.data
.should_filter_response(&resp, self.handles.stats.tx.clone())
{
continue;
}
if resp.is_file() {
// very likely a file, simply request and report
log::debug!("Extracted file: {}", resp);
self.scanned_urls
.add_file_scan(&resp.url().to_string(), self.stats.clone());
scanned_urls.add_file_scan(&resp.url().to_string(), ScanOrder::Latest);
send_report(self.tx_reporter.clone(), resp);
send_report(self.handles.output.tx.clone(), resp);
continue;
}
@@ -118,8 +108,8 @@ impl<'a> Extractor<'a> {
resp.set_url(&format!("{}/", resp.url()));
}
// try_recursion(&resp, self.depth, self.tx_recursion.clone()).await;
// todo needs to be sent across to scans handler
self.handles
.send_scan_command(Command::TryRecursion(Box::new(resp)))?;
}
}
Ok(())
@@ -177,7 +167,7 @@ impl<'a> Extractor<'a> {
}
}
self.update_stats(links.len());
self.update_stats(links.len())?;
log::trace!("exit: get_links -> {:?}", links);
@@ -296,14 +286,11 @@ impl<'a> Extractor<'a> {
self.config.add_slash,
&self.config.queries,
None,
self.tx_stats.clone(),
self.handles.stats.tx.clone(),
)?;
let scanned_urls = self.handles.ferox_scans()?;
if self
.scanned_urls
.get_scan_by_url(&new_url.to_string())
.is_some()
{
if scanned_urls.get_scan_by_url(&new_url.to_string()).is_some() {
//we've seen the url before and don't need to scan again
log::trace!("exit: request_link -> None");
bail!("previously seen url");
@@ -311,7 +298,7 @@ impl<'a> Extractor<'a> {
// make the request and store the response
let new_response =
make_request(&self.config.client, &new_url, self.tx_stats.clone()).await?;
make_request(&self.config.client, &new_url, self.handles.stats.tx.clone()).await?;
let new_ferox_response = FeroxResponse::from(new_response, true).await;
@@ -345,7 +332,7 @@ impl<'a> Extractor<'a> {
}
}
self.update_stats(links.len());
self.update_stats(links.len())?;
log::trace!("exit: extract_robots_txt -> {:?}", links);
Ok(links)
@@ -385,7 +372,7 @@ impl<'a> Extractor<'a> {
let mut url = Url::parse(&self.url)?;
url.set_path("/robots.txt"); // overwrite existing path with /robots.txt
let response = make_request(&client, &url, self.tx_stats.clone()).await?;
let response = make_request(&client, &url, self.handles.stats.tx.clone()).await?;
let ferox_response = FeroxResponse::from(response, true).await;
log::trace!("exit: get_robots_file -> {}", ferox_response);
@@ -393,13 +380,16 @@ impl<'a> Extractor<'a> {
}
/// update total number of links extracted and expected responses
fn update_stats(&self, num_links: usize) {
fn update_stats(&self, num_links: usize) -> Result<()> {
let multiplier = self.config.extensions.len().max(1);
send_command!(self.tx_stats, UpdateUsizeField(LinksExtracted, num_links));
send_command!(
self.tx_stats,
UpdateUsizeField(TotalExpected, num_links * multiplier)
);
self.handles
.stats
.send(UpdateUsizeField(LinksExtracted, num_links))?;
self.handles
.stats
.send(UpdateUsizeField(TotalExpected, num_links * multiplier))?;
Ok(())
}
}

View File

@@ -8,6 +8,6 @@ pub use self::builder::ExtractionTarget;
pub use self::builder::ExtractorBuilder;
pub use self::container::Extractor;
use crate::{config::Configuration, scan_manager::FeroxScans, statistics::Stats, FeroxResponse};
use crate::{config::Configuration, FeroxResponse};
use regex::Regex;
use std::sync::Arc;

View File

@@ -8,7 +8,7 @@ use super::*;
///
/// `size` is size of the response that should be included with filters passed via runtime
/// configuration and any static wildcard lengths.
#[derive(Debug, Default, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq)]
pub struct WildcardFilter {
/// size of the response that will later be combined with the length of the path of the url
/// requested
@@ -18,6 +18,17 @@ pub struct WildcardFilter {
pub size: u64,
}
/// implement default that populates both values with u64::MAX
impl Default for WildcardFilter {
/// populate both values with u64::MAX
fn default() -> Self {
Self {
size: u64::MAX,
dynamic: u64::MAX,
}
}
}
/// implementation of FeroxFilter for WildcardFilter
impl FeroxFilter for WildcardFilter {
/// Examine size, dynamic, and content_len to determine whether or not the response received
@@ -33,7 +44,7 @@ impl FeroxFilter for WildcardFilter {
return false;
}
if self.size > 0 && self.size == response.content_length() {
if self.size != u64::MAX && self.size == response.content_length() {
// static wildcard size found during testing
// size isn't default, size equals response length, and auto-filter is on
log::debug!("static wildcard: filtered out {}", response.url());
@@ -41,7 +52,7 @@ impl FeroxFilter for WildcardFilter {
return true;
}
if self.dynamic > 0 {
if self.dynamic != u64::MAX {
// dynamic wildcard offset found during testing
// I'm about to manually split this url path instead of using reqwest::Url's

View File

@@ -1,12 +1,14 @@
use crate::{
config::{CONFIGURATION, PROGRESS_PRINTER},
event_handlers::Command,
event_handlers::{Command, Handles},
filters::WildcardFilter,
utils::{ferox_print, format_url, get_url_path_length, make_request, status_colorizer},
CommandSender, FeroxResponse,
FeroxResponse,
};
use anyhow::Result;
use console::style;
use indicatif::ProgressBar;
use std::sync::Arc;
use tokio::sync::mpsc::UnboundedSender;
use uuid::Uuid;
@@ -39,31 +41,22 @@ fn unique_string(length: usize) -> String {
pub async fn wildcard_test(
target_url: &str,
bar: ProgressBar,
tx_term: CommandSender,
tx_stats: CommandSender,
) -> Option<WildcardFilter> {
handles: Arc<Handles>,
) -> Result<()> {
log::trace!(
"enter: wildcard_test({:?}, {:?}, {:?}, {:?})",
"enter: wildcard_test({:?}, {:?}, {:?})",
target_url,
bar,
tx_term,
tx_stats
handles,
);
if CONFIGURATION.dont_filter {
// early return, dont_filter scans don't need tested
log::trace!("exit: wildcard_test -> None");
return None;
return Ok(());
}
let tx_term_mwcr1 = tx_term.clone();
let tx_term_mwcr2 = tx_term.clone();
let tx_stats_mwcr1 = tx_stats.clone();
let tx_stats_mwcr2 = tx_stats.clone();
if let Some(ferox_response) =
make_wildcard_request(&target_url, 1, tx_term_mwcr1, tx_stats_mwcr1).await
{
if let Some(ferox_response) = make_wildcard_request(&target_url, 1, handles.clone()).await {
bar.inc(1);
// found a wildcard response
@@ -73,14 +66,15 @@ pub async fn wildcard_test(
if wc_length == 0 {
log::trace!("exit: wildcard_test -> Some({:?})", wildcard);
return Some(wildcard);
handles
.filters
.send(Command::AddFilter(Box::new(wildcard)))?;
return Ok(());
}
// content length of wildcard is non-zero, perform additional tests:
// make a second request, with a known-sized (64) longer request
if let Some(resp_two) =
make_wildcard_request(&target_url, 3, tx_term_mwcr2, tx_stats_mwcr2).await
{
if let Some(resp_two) = make_wildcard_request(&target_url, 3, handles.clone()).await {
bar.inc(1);
let wc2_length = resp_two.content_length();
@@ -129,11 +123,14 @@ pub async fn wildcard_test(
}
log::trace!("exit: wildcard_test -> Some({:?})", wildcard);
return Some(wildcard);
handles
.filters
.send(Command::AddFilter(Box::new(wildcard)))?;
return Ok(());
}
log::trace!("exit: wildcard_test -> None");
None
Ok(())
}
/// Generates a uuid and appends it to the given target url. The reasoning is that the randomly
@@ -145,15 +142,13 @@ pub async fn wildcard_test(
async fn make_wildcard_request(
target_url: &str,
length: usize,
tx_term: CommandSender,
tx_stats: CommandSender,
handles: Arc<Handles>,
) -> Option<FeroxResponse> {
log::trace!(
"enter: make_wildcard_request({}, {}, {:?}, {:?})",
"enter: make_wildcard_request({}, {}, {:?})",
target_url,
length,
tx_term,
tx_stats,
handles
);
let unique_str = unique_string(length);
@@ -164,7 +159,7 @@ async fn make_wildcard_request(
CONFIGURATION.add_slash,
&CONFIGURATION.queries,
None,
tx_stats.clone(),
handles.stats.tx.clone(),
) {
Ok(url) => url,
Err(e) => {
@@ -177,7 +172,7 @@ async fn make_wildcard_request(
match make_request(
&CONFIGURATION.client,
&nonexistent.to_owned(),
tx_stats.clone(),
handles.stats.tx.clone(),
)
.await
{
@@ -191,8 +186,12 @@ async fn make_wildcard_request(
ferox_response.wildcard = true;
if !CONFIGURATION.quiet
// && !should_filter_response(&ferox_response, tx_stats.clone()) // todo this needs to be reimplemented
&& tx_term
&& !handles
.filters
.data
.should_filter_response(&ferox_response, handles.stats.tx.clone())
&& handles
.output
.send(Command::Report(Box::new(ferox_response.clone())))
.is_err()
{

View File

@@ -130,13 +130,12 @@ async fn scan(targets: Vec<String>, handles: Arc<Handles>) -> Result<()> {
handles.stats.sync().await?;
if CONFIGURATION.resumed {
let from_here = CONFIGURATION.resume_from.clone();
handles.stats.send(LoadStats(from_here))?;
// display what has already been completed
scanned_urls.print_known_responses();
scanned_urls.print_completed_bars(words.len())?;
}
log::debug!("sending {:?} to be scanned as initial targets", targets);
handles.send_scan_command(ScanInitialUrls(targets))?;
log::trace!("exit: scan");
@@ -146,7 +145,7 @@ async fn scan(targets: Vec<String>, handles: Arc<Handles>) -> Result<()> {
/// Get targets from either commandline or stdin, pass them back to the caller as a Result<Vec>
async fn get_targets(handles: Arc<Handles>) -> Result<Vec<String>> {
log::trace!("enter: get_targets");
log::trace!("enter: get_targets({:?})", handles);
let mut targets = vec![];
@@ -166,7 +165,7 @@ async fn get_targets(handles: Arc<Handles>) -> Result<Vec<String>> {
if let Ok(scans) = ferox_scans.scans.read() {
for scan in scans.iter() {
// SCANNED_URLS gets deserialized scans added to it at program start if --resume-from
// ferox_scans gets deserialized scans added to it at program start if --resume-from
// is used, so scans that aren't marked complete still need to be scanned
if scan.is_complete() {
// this one's already done, ignore it
@@ -210,7 +209,7 @@ async fn wrapped_main() -> Result<()> {
// bundle up all the disparate handles and JoinHandles (tasks)
let handles = Arc::new(Handles::new(stats_handle, filters_handle, out_handle));
let (scan_task, scan_handle) = ScanHandler::initialize(handles.clone());
let (scan_task, scan_handle) = ScanHandler::initialize(handles.clone(), CONFIGURATION.depth);
handles.scan_handle(scan_handle); // set's the ScanHandle after Handles initialization
@@ -220,11 +219,9 @@ async fn wrapped_main() -> Result<()> {
if !CONFIGURATION.time_limit.is_empty() {
// --time-limit value not an empty string, need to kick off the thread that enforces
// the limit
let max_time_stats = handles.stats.data.clone();
let time_handles = handles.clone();
tokio::spawn(async move {
scan_manager::start_max_time_thread(&CONFIGURATION.time_limit, max_time_stats).await
scan_manager::start_max_time_thread(&CONFIGURATION.time_limit, time_handles).await
});
}
@@ -238,11 +235,21 @@ async fn wrapped_main() -> Result<()> {
if CONFIGURATION.save_state {
// start the ctrl+c handler
scan_manager::initialize(handles.stats.data.clone());
scan_manager::initialize(handles.clone());
}
if CONFIGURATION.resumed {
let scanned_urls = handles.ferox_scans()?;
let from_here = CONFIGURATION.resume_from.clone();
// populate FeroxScans object with previously seen scans
scanned_urls.add_serialized_scans(&from_here)?;
// populate Stats object with previously known statistics
handles.stats.send(LoadStats(from_here))?;
}
// get targets from command line or stdin
// todo a bunch of fucking functions needs SCANNED_URLS replaced
let targets = match get_targets(handles.clone()).await {
Ok(t) => t,
Err(e) => {
@@ -259,9 +266,9 @@ async fn wrapped_main() -> Result<()> {
let mut banner = Banner::new(&targets, &CONFIGURATION);
// only interested in the side-effect that sets banner.update_status
let _ = banner
banner
.check_for_updates(&CONFIGURATION.client, UPDATE_URL, handles.stats.tx.clone())
.await;
.await?;
if banner.print_to(std_stderr, &CONFIGURATION).is_err() {
clean_up(handles, tasks).await?;
@@ -288,7 +295,6 @@ async fn wrapped_main() -> Result<()> {
}
// kick off a scan against any targets determined to be responsive
match scan(live_targets, handles.clone()).await {
Ok(_) => {}
Err(e) => {
@@ -297,11 +303,6 @@ async fn wrapped_main() -> Result<()> {
}
}
// todo known things not working: overall bar lags behind other bars (seems ok, keep an eye on it during this branch)
// todo known things not working: confirm multi target from stdin works
// todo known things not working: confirm same # of requests seen in burp as reported
// todo known things not working: scan cancel menu is hard fkn broke
clean_up(handles, tasks).await?;
log::trace!("exit: wrapped_main");

View File

@@ -26,13 +26,14 @@ use tokio::{
};
use uuid::Uuid;
use crate::event_handlers::Handles;
use crate::utils::fmt_err;
use crate::utils::write_to;
use crate::{
config::{Configuration, CONFIGURATION, PROGRESS_BAR, PROGRESS_PRINTER},
parser::TIMESPEC_REGEX,
progress::{add_bar, BarType},
scanner::{RESPONSES, SCANNED_URLS},
scanner::RESPONSES,
statistics::Stats,
utils::open_file,
FeroxResponse, FeroxSerialize, SLEEP_DURATION,
@@ -90,6 +91,9 @@ pub struct FeroxScan {
/// The type of scan
pub scan_type: ScanType,
/// The order in which the scan was received
pub scan_order: ScanOrder,
/// Number of requests to populate the progress bar with
pub num_requests: u64,
@@ -114,6 +118,7 @@ impl Default for FeroxScan {
task: sync::Mutex::new(None), // tokio mutex
status: Mutex::new(ScanStatus::default()),
num_requests: 0,
scan_order: ScanOrder::Latest,
url: String::new(),
progress_bar: Mutex::new(None),
scan_type: ScanType::File,
@@ -190,12 +195,14 @@ impl FeroxScan {
pub fn new(
url: &str,
scan_type: ScanType,
scan_order: ScanOrder,
num_requests: u64,
pb: Option<ProgressBar>,
) -> Arc<Self> {
Arc::new(Self {
url: url.to_string(),
scan_type,
scan_order,
num_requests,
progress_bar: Mutex::new(pb),
..Default::default()
@@ -232,7 +239,7 @@ impl FeroxScan {
/// await a task's completion, similar to a thread's join; perform necessary bookkeeping
pub async fn join(&self) {
log::trace!("enter join({:?})", self);
log::debug!("enter join({:?})", self);
let mut guard = self.task.lock().await;
if guard.is_some() {
@@ -243,7 +250,7 @@ impl FeroxScan {
}
}
log::trace!("exit join({:?})", self);
// log::trace!("exit join({:?})", self);
}
}
@@ -511,6 +518,10 @@ pub struct FeroxScans {
/// menu used for providing a way for users to cancel a scan
menu: Menu,
/// number of requests expected per scan (mirrors the same on Stats); used for initializing
/// progress bars and feroxscans
bar_length: Mutex<u64>,
}
/// Serialize implementation for FeroxScans
@@ -565,6 +576,31 @@ impl FeroxScans {
sentry
}
/// load serialized FeroxScan(s) into this FeroxScans
pub fn add_serialized_scans(&self, filename: &str) -> Result<()> {
log::trace!("enter: add_serialized_scans({})", filename);
let file = File::open(filename)?;
let reader = BufReader::new(file);
let state: serde_json::Value = serde_json::from_reader(reader)?;
if let Some(scans) = state.get("scans") {
if let Some(arr_scans) = scans.as_array() {
for scan in arr_scans {
let deser_scan: FeroxScan =
serde_json::from_value(scan.clone()).unwrap_or_default();
// need to determine if it's complete and based on that create a progress bar
// populate it accordingly based on completion
log::debug!("added: {}", deser_scan);
self.insert(Arc::new(deser_scan));
}
}
}
log::trace!("exit: add_serialized_scans");
Ok(())
}
/// Simple check for whether or not a FeroxScan is contained within the inner container based
/// on the given URL
pub fn contains(&self, url: &str) -> bool {
@@ -613,13 +649,10 @@ impl FeroxScans {
};
for (i, scan) in scans.iter().enumerate() {
if scan.task.lock().await.is_none() {
// no JoinHandle associated with this FeroxScan, meaning it was an original
// target passed in via either -u or --stdin
// todo check this assumption, as we swap out the task with None once joined
if matches!(scan.scan_order, ScanOrder::Initial) || scan.task.try_lock().is_err() {
// original target passed in via either -u or --stdin
continue;
}
self.menu.println(&format!("fdaf {}", scan));
if matches!(scan.scan_type, ScanType::Directory) {
// we're only interested in displaying directory scans, as those are
@@ -749,6 +782,13 @@ impl FeroxScans {
}
}
/// set the bar length of FeroxScans
pub fn set_bar_length(&self, bar_length: u64) {
if let Ok(mut guard) = self.bar_length.lock() {
*guard = bar_length;
}
}
/// Given a url, create a new `FeroxScan` and add it to `FeroxScans`
///
/// If `FeroxScans` did not already contain the scan, return true; otherwise return false
@@ -758,14 +798,17 @@ impl FeroxScans {
&self,
url: &str,
scan_type: ScanType,
stats: Arc<Stats>,
scan_order: ScanOrder,
) -> (bool, Arc<FeroxScan>) {
// todo eventually this should live on the struct and remove need ofr stats being passed in
let num_requests = stats.expected_per_scan() as u64;
let bar_length = if let Ok(guard) = self.bar_length.lock() {
*guard
} else {
0
};
let bar = match scan_type {
ScanType::Directory => {
let progress_bar = add_bar(&url, num_requests, BarType::Default);
let progress_bar = add_bar(&url, bar_length, BarType::Default);
progress_bar.reset_elapsed();
@@ -774,7 +817,7 @@ impl FeroxScans {
ScanType::File => None,
};
let ferox_scan = FeroxScan::new(&url, scan_type, num_requests, bar);
let ferox_scan = FeroxScan::new(&url, scan_type, scan_order, bar_length, bar);
// If the set did not contain the scan, true is returned.
// If the set did contain the scan, false is returned.
@@ -788,8 +831,8 @@ impl FeroxScans {
/// If `FeroxScans` did not already contain the scan, return true; otherwise return false
///
/// Also return a reference to the new `FeroxScan`
pub fn add_directory_scan(&self, url: &str, stats: Arc<Stats>) -> (bool, Arc<FeroxScan>) {
self.add_scan(&url, ScanType::Directory, stats)
pub fn add_directory_scan(&self, url: &str, scan_order: ScanOrder) -> (bool, Arc<FeroxScan>) {
self.add_scan(&url, ScanType::Directory, scan_order)
}
/// Given a url, create a new `FeroxScan` and add it to `FeroxScans` as a File Scan
@@ -797,8 +840,8 @@ impl FeroxScans {
/// If `FeroxScans` did not already contain the scan, return true; otherwise return false
///
/// Also return a reference to the new `FeroxScan`
pub fn add_file_scan(&self, url: &str, stats: Arc<Stats>) -> (bool, Arc<FeroxScan>) {
self.add_scan(&url, ScanType::File, stats)
pub fn add_file_scan(&self, url: &str, scan_order: ScanOrder) -> (bool, Arc<FeroxScan>) {
self.add_scan(&url, ScanType::File, scan_order)
}
pub fn has_active_scans(&self) -> bool {
@@ -893,7 +936,7 @@ impl FeroxResponses {
#[derive(Serialize, Debug)]
pub struct FeroxState {
/// Known scans
scans: &'static FeroxScans,
scans: Arc<FeroxScans>,
/// Current running config
config: &'static Configuration,
@@ -923,7 +966,7 @@ impl FeroxSerialize for FeroxState {
/// that representation to seconds and then wait for those seconds to elapse. Once that period
/// of time has elapsed, kill all currently running scans and dump a state file to disk that can
/// be used to resume any unfinished scan.
pub async fn start_max_time_thread(time_spec: &str, stats: Arc<Stats>) {
pub async fn start_max_time_thread(time_spec: &str, handles: Arc<Handles>) {
log::trace!("enter: start_max_time_thread({})", time_spec);
// as this function has already made it through the parser, which calls is_match on
@@ -955,7 +998,7 @@ pub async fn start_max_time_thread(time_spec: &str, stats: Arc<Stats>) {
#[cfg(test)]
panic!(stats);
#[cfg(not(test))]
let _ = sigint_handler(stats);
let _ = sigint_handler(handles);
}
log::error!(
@@ -965,8 +1008,8 @@ pub async fn start_max_time_thread(time_spec: &str, stats: Arc<Stats>) {
}
/// Writes the current state of the program to disk (if save_state is true) and then exits
fn sigint_handler(stats: Arc<Stats>) -> Result<()> {
log::trace!("enter: sigint_handler({:?})", stats);
fn sigint_handler(handles: Arc<Handles>) -> Result<()> {
log::trace!("enter: sigint_handler({:?})", handles);
let ts = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs();
@@ -993,9 +1036,9 @@ fn sigint_handler(stats: Arc<Stats>) -> Result<()> {
let state = FeroxState {
config: &CONFIGURATION,
scans: &SCANNED_URLS,
scans: handles.ferox_scans()?,
responses: &RESPONSES,
statistics: stats,
statistics: handles.stats.data.clone(),
};
let state_file = open_file(&filename);
@@ -1008,11 +1051,11 @@ fn sigint_handler(stats: Arc<Stats>) -> Result<()> {
}
/// Initialize the ctrl+c handler that saves scan state to disk
pub fn initialize(stats: Arc<Stats>) {
log::trace!("enter: initialize({:?})", stats);
pub fn initialize(handles: Arc<Handles>) {
log::trace!("enter: initialize({:?})", handles);
let result = ctrlc::set_handler(move || {
let _ = sigint_handler(stats.clone());
let _ = sigint_handler(handles.clone());
});
if result.is_err() {
@@ -1058,18 +1101,6 @@ pub fn resume_scan(filename: &str) -> Configuration {
}
}
if let Some(scans) = state.get("scans") {
if let Some(arr_scans) = scans.as_array() {
for scan in arr_scans {
let deser_scan: FeroxScan =
serde_json::from_value(scan.clone()).unwrap_or_default();
// need to determine if it's complete and based on that create a progress bar
// populate it accordingly based on completion
SCANNED_URLS.insert(Arc::new(deser_scan));
}
}
}
log::trace!("exit: resume_scan -> {:?}", config);
config
}

View File

@@ -6,14 +6,12 @@ use crate::{
},
extractor::ExtractorBuilder,
filters::{
LinesFilter, RegexFilter, SimilarityFilter, SizeFilter, StatusCodeFilter, WildcardFilter,
WordsFilter,
LinesFilter, RegexFilter, SimilarityFilter, SizeFilter, StatusCodeFilter, WordsFilter,
},
heuristics,
scan_manager::{FeroxResponses, FeroxScans, ScanOrder, ScanStatus, PAUSE_SCAN},
scan_manager::{FeroxResponses, ScanOrder, ScanStatus, PAUSE_SCAN},
statistics::StatField::{DirScanTimes, ExpectedPerScan},
traits::FeroxFilter,
utils::{fmt_err, format_url, get_url_depth, make_request},
utils::{fmt_err, format_url, make_request},
CommandSender, FeroxResponse, SIMILARITY_THRESHOLD,
};
use anyhow::{bail, Result};
@@ -25,22 +23,12 @@ use reqwest::Url;
#[cfg(not(test))]
use std::process::exit;
use std::{
collections::HashSet,
convert::TryInto,
ops::Deref,
sync::atomic::Ordering,
sync::{Arc, RwLock},
collections::HashSet, convert::TryInto, ops::Deref, sync::atomic::Ordering, sync::Arc,
time::Instant,
};
use tokio::sync::{mpsc::UnboundedSender, Semaphore};
lazy_static! {
/// Set of urls that have been sent to [scan_url](fn.scan_url.html), used for deduplication
pub static ref SCANNED_URLS: FeroxScans = FeroxScans::default();
/// Vector of implementors of the FeroxFilter trait
static ref FILTERS: Arc<RwLock<Vec<Box<dyn FeroxFilter>>>> = Arc::new(RwLock::new(Vec::<Box<dyn FeroxFilter>>::new()));
/// Vector of FeroxResponse objects
pub static ref RESPONSES: FeroxResponses = FeroxResponses::default();
@@ -105,12 +93,11 @@ fn create_urls(
/// Makes multiple requests based on the presence of extensions
///
/// Attempts recursion when appropriate and sends Responses to the output handler for processing
async fn make_requests(target_url: &str, word: &str, base_depth: usize, handles: Arc<Handles>) {
async fn make_requests(target_url: &str, word: &str, handles: Arc<Handles>) -> Result<()> {
log::trace!(
"enter: make_requests({}, {}, {}, {:?})",
"enter: make_requests({}, {}, {:?})",
target_url,
word,
base_depth,
handles
);
@@ -121,56 +108,42 @@ async fn make_requests(target_url: &str, word: &str, base_depth: usize, handles:
handles.stats.tx.clone(),
);
let scanned_urls = handles.ferox_scans().expect("Could not get FeroxScans");
// todo abstract away, and by that i mean that extractor and try_recursion should either take
// Handles or be put into a struct somewhere
let tx_scans = handles.scans.read().unwrap().as_ref().unwrap().tx.clone();
for url in urls {
if let Ok(response) =
make_request(&CONFIGURATION.client, &url, handles.stats.tx.clone()).await
{
// response came back without error, convert it to FeroxResponse
let ferox_response = FeroxResponse::from(response, true).await;
let response = make_request(&CONFIGURATION.client, &url, handles.stats.tx.clone()).await?;
// do recursion if appropriate
if !CONFIGURATION.no_recursion {
tx_scans
.send(Command::TryRecursion(ferox_response.clone()))
.unwrap_or_else(|e| log::warn!("Could not send {} for recursion: {}", url, e));
}
// response came back without error, convert it to FeroxResponse
let ferox_response = FeroxResponse::from(response, true).await;
// purposefully doing recursion before filtering. the thought process is that
// even though this particular url is filtered, subsequent urls may not
if handles
.filters
.data
.should_filter_response(&ferox_response, handles.stats.tx.clone())
{
continue;
}
if CONFIGURATION.extract_links && !ferox_response.status().is_redirection() {
// todo extractor should probably just take Handles
let extractor = ExtractorBuilder::with_response(&ferox_response)
.depth(base_depth)
.config(&CONFIGURATION)
.recursion_transmitter(tx_scans.clone())
.stats_transmitter(handles.stats.tx.clone())
.reporter_transmitter(handles.output.tx.clone())
.scanned_urls(scanned_urls.clone())
.stats(handles.stats.data.clone())
.build()
.unwrap(); // todo change once this function returns Result
let _ = extractor.extract().await;
}
// everything else should be reported
send_report(handles.output.tx.clone(), ferox_response);
// do recursion if appropriate
if !CONFIGURATION.no_recursion {
handles.send_scan_command(Command::TryRecursion(Box::new(ferox_response.clone())))?;
}
// purposefully doing recursion before filtering. the thought process is that
// even though this particular url is filtered, subsequent urls may not
if handles
.filters
.data
.should_filter_response(&ferox_response, handles.stats.tx.clone())
{
continue;
}
if CONFIGURATION.extract_links && !ferox_response.status().is_redirection() {
let extractor = ExtractorBuilder::with_response(&ferox_response)
.config(&CONFIGURATION)
.handles(handles.clone())
.build()?;
extractor.extract().await?;
}
// everything else should be reported
send_report(handles.output.tx.clone(), ferox_response);
}
log::trace!("exit: make_requests");
Ok(())
}
/// Simple helper to send a `FeroxResponse` over the tx side of an `mpsc::unbounded_channel`
@@ -204,8 +177,6 @@ pub async fn scan_url(
handles
);
let depth = get_url_depth(&target_url); // todo
log::info!("Starting scan against: {}", target_url);
let scan_timer = Instant::now();
@@ -213,24 +184,17 @@ pub async fn scan_url(
if matches!(order, ScanOrder::Initial) && CONFIGURATION.extract_links {
// only grab robots.txt on the initial scan_url calls. all fresh dirs will be passed
// to try_recursion
// todo Extractor should just take Handles
let extractor = ExtractorBuilder::with_url(target_url)
.depth(depth)
.config(&CONFIGURATION)
// todo abstract the call here, or just leave it til i put handles in extractor instead
.recursion_transmitter(handles.scans.read().unwrap().as_ref().unwrap().tx.clone())
.stats_transmitter(handles.stats.tx.clone())
.reporter_transmitter(handles.output.tx.clone())
.scanned_urls(handles.ferox_scans()?)
.stats(handles.stats.data.clone())
.handles(handles.clone())
.build()?;
let _ = extractor.extract().await;
}
let ferox_scans = handles.ferox_scans()?;
let scanned_urls = handles.ferox_scans()?;
let ferox_scan = match ferox_scans.get_scan_by_url(&target_url) {
let ferox_scan = match scanned_urls.get_scan_by_url(&target_url) {
Some(scan) => {
scan.set_status(ScanStatus::Running)?;
scan
@@ -251,33 +215,12 @@ pub async fn scan_url(
// waits until an outstanding permit is dropped. At this point, the freed permit is assigned
// to the caller.
let permit = SCAN_LIMITER.acquire().await;
// todo can be moved to scan handler, just acquire before calling scan
// Arc clones to be passed around to the various scans
let wildcard_bar = progress_bar.clone();
let looping_words = wordlist.clone();
// add any wildcard filters to `FILTERS`
// todo if you want to remove the 0-based skipping of wildcards, this needs addressed
// todo wildcard_test should take handles probably? idk, could see tradeoff between memsize
// of two clones vs the handles clone
// todo should take handles
let filter = match heuristics::wildcard_test(
&target_url,
wildcard_bar,
handles.output.tx.clone(),
handles.stats.tx.clone(),
)
.await
{
Some(f) => Box::new(f),
None => Box::new(WildcardFilter::default()),
};
handles.filters.send(AddFilter(filter))?;
let scanned_urls = handles.ferox_scans()?;
heuristics::wildcard_test(&target_url, wildcard_bar, handles.clone()).await?;
// producer tasks (mp of mpsc); responsible for making requests
let producers = stream::iter(looping_words.deref().to_owned())
@@ -294,7 +237,7 @@ pub async fn scan_url(
// to false
scanned_urls_clone.pause(true).await;
}
make_requests(&tgt, &word, depth, handles_clone).await
make_requests(&tgt, &word, handles_clone).await
}),
pb,
)
@@ -352,13 +295,19 @@ pub async fn initialize(
total.try_into()?
};
{
// no real reason to keep the arc around beyond this call
let scans = handles.ferox_scans()?;
scans.set_bar_length(num_reqs_expected);
}
// tell Stats object about the number of expected requests
handles.stats.send(UpdateUsizeField(
ExpectedPerScan,
num_reqs_expected as usize,
))?;
// add any status code filters to `FILTERS` (-C|--filter-status)
// add any status code filters to filters handler's FeroxFilters (-C|--filter-status)
for code_filter in &config.filter_status {
let filter = StatusCodeFilter {
filter_code: *code_filter,
@@ -367,7 +316,7 @@ pub async fn initialize(
handles.filters.send(AddFilter(boxed_filter))?;
}
// add any line count filters to `FILTERS` (-N|--filter-lines)
// add any line count filters to filters handler's FeroxFilters (-N|--filter-lines)
for lines_filter in &config.filter_line_count {
let filter = LinesFilter {
line_count: *lines_filter,
@@ -376,7 +325,7 @@ pub async fn initialize(
handles.filters.send(AddFilter(boxed_filter))?;
}
// add any line count filters to `FILTERS` (-W|--filter-words)
// add any line count filters to filters handler's FeroxFilters (-W|--filter-words)
for words_filter in &config.filter_word_count {
let filter = WordsFilter {
word_count: *words_filter,
@@ -385,7 +334,7 @@ pub async fn initialize(
handles.filters.send(AddFilter(boxed_filter))?;
}
// add any line count filters to `FILTERS` (-S|--filter-size)
// add any line count filters to filters handler's FeroxFilters (-S|--filter-size)
for size_filter in &config.filter_size {
let filter = SizeFilter {
content_length: *size_filter,
@@ -394,7 +343,7 @@ pub async fn initialize(
handles.filters.send(AddFilter(boxed_filter))?;
}
// add any regex filters to `FILTERS` (-X|--filter-regex)
// add any regex filters to filters handler's FeroxFilters (-X|--filter-regex)
for regex_filter in &config.filter_regex {
let raw = regex_filter;
let compiled = match Regex::new(&raw) {
@@ -416,7 +365,7 @@ pub async fn initialize(
handles.filters.send(AddFilter(boxed_filter))?;
}
// add any similarity filters to `FILTERS` (--filter-similar-to)
// add any similarity filters to filters handler's FeroxFilters (--filter-similar-to)
for similarity_filter in &config.filter_similar {
// url as-is based on input, ignores user-specified url manipulation options (add-slash etc)
if let Ok(url) = format_url(

View File

@@ -49,7 +49,6 @@ pub fn open_file(filename: &str) -> Result<BufWriter<fs::File>> {
///
/// returns 0 on error and relative urls
pub fn get_url_depth(target: &str) -> usize {
// todo move ot scanner struct (i.e. once scanner or scan_manager is rewritten)
log::trace!("enter: get_url_depth({})", target);
let target = normalize_url(target);