even more cleanup; extraction looking mostly complete

This commit is contained in:
epi
2020-10-19 19:47:03 -05:00
parent 6fa542ecc5
commit 74aa5e8047
2 changed files with 157 additions and 38 deletions

View File

@@ -120,15 +120,38 @@ impl FeroxResponse {
};
}
/// Make a reasonable guess at whether the response is a file or not
///
/// Examines the last part of a path to determine if it has an obvious extension
/// i.e. http://localhost/some/path/stuff.js where stuff.js indicates a file
///
/// Additionally, inspects query parameters, as they're also often indicative of a file
pub fn is_file(&self) -> bool {
let has_extension = match self.url.path_segments() {
Some(path) => {
if let Some(last) = path.last() {
last.contains('.') // last segment has some sort of extension, probably
} else {
false
}
}
None => false,
};
self.url.query_pairs().count() > 0 || has_extension
}
/// Create a new `FeroxResponse` from the given `Response`
pub async fn from(response: Response) -> Self {
let url = response.url().clone();
let status = response.status().clone();
let status = response.status();
let headers = response.headers().clone();
let content_length = response.content_length().unwrap_or(0);
let text = if CONFIGURATION.extract_links {
// .text() consumes the response, must be called last
// additionally, --extract-links is currently the only place we use the body of the
// response, so we forego the processing if not performing extraction
match response.text().await {
// await the response's body
Ok(text) => text,

View File

@@ -1,11 +1,12 @@
use crate::config::{CONFIGURATION, PROGRESS_BAR};
use crate::extractor::get_links;
use crate::heuristics::WildcardFilter;
use crate::utils::{format_url, get_current_depth, get_url_path_length, make_request};
use crate::{heuristics, progress, FeroxChannel, FeroxResponse};
use futures::future::{BoxFuture, FutureExt};
use futures::{stream, StreamExt};
use lazy_static::lazy_static;
use reqwest::{Response, Url};
use reqwest::Url;
use std::collections::HashSet;
use std::convert::TryInto;
use std::ops::Deref;
@@ -160,7 +161,7 @@ fn create_urls(target_url: &str, word: &str, extensions: &[String]) -> Vec<Url>
///
/// handles 2xx and 3xx responses by either checking if the url ends with a / (2xx)
/// or if the Location header is present and matches the base url + / (3xx)
fn response_is_directory(response: &Response) -> bool {
fn response_is_directory(response: &FeroxResponse) -> bool {
log::trace!("enter: is_directory({:?})", response);
if response.status().is_redirection() {
@@ -240,7 +241,7 @@ fn reached_max_depth(url: &Url, base_depth: usize, max_depth: usize) -> bool {
///
/// When a recursion opportunity is found, the new url is sent across the recursion channel
async fn try_recursion(
response: &Response,
response: &FeroxResponse,
base_depth: usize,
transmitter: UnboundedSender<String>,
) {
@@ -290,6 +291,45 @@ async fn try_recursion(
log::trace!("exit: try_recursion");
}
/// Simple helper to stay DRY; determines whether or not a given `FeroxResponse` should be reported
/// to the user or not.
fn should_filter_response(
content_len: &u64,
filter: Arc<WildcardFilter>,
response: &FeroxResponse,
) -> bool {
if CONFIGURATION.sizefilters.contains(content_len) {
// filtered value from --sizefilters, move on to the next url
log::debug!("size filter: filtered out {}", response.url());
return true;
}
if filter.size > 0 && filter.size == *content_len && !CONFIGURATION.dontfilter {
// static wildcard size found during testing
// size isn't default, size equals response length, and auto-filter is on
log::debug!("static wildcard: filtered out {}", response.url());
return true;
}
if filter.dynamic > 0 && !CONFIGURATION.dontfilter {
// dynamic wildcard offset found during testing
// I'm about to manually split this url path instead of using reqwest::Url's
// builtin parsing. The reason is that they call .split() on the url path
// except that I don't want an empty string taking up the last index in the
// event that the url ends with a forward slash. It's ugly enough to be split
// into its own function for readability.
let url_len = get_url_path_length(&response.url());
if url_len + filter.dynamic == *content_len {
log::debug!("dynamic wildcard: filtered out {}", response.url());
return true;
}
}
false
}
/// Wrapper for [make_request](fn.make_request.html)
///
/// Handles making multiple requests based on the presence of extensions
@@ -316,63 +356,119 @@ async fn make_requests(
for url in urls {
if let Ok(response) = make_request(&CONFIGURATION.client, &url).await {
// response came back without error
// response came back without error, convert it to FeroxResponse
let ferox_response = FeroxResponse::from(response).await;
// do recursion if appropriate
if !CONFIGURATION.norecursion && response_is_directory(&response) {
try_recursion(&response, base_depth, dir_chan.clone()).await;
if !CONFIGURATION.norecursion {
try_recursion(&ferox_response, base_depth, dir_chan.clone()).await;
}
// purposefully doing recursion before filtering. the thought process is that
// even though this particular url is filtered, subsequent urls may not
let content_len = &response.content_length().unwrap_or(0);
let content_len = &ferox_response.content_length();
if CONFIGURATION.sizefilters.contains(content_len) {
// filtered value from --sizefilters, move on to the next url
log::debug!("size filter: filtered out {}", response.url());
if should_filter_response(content_len, filter.clone(), &ferox_response) {
continue;
}
if filter.size > 0 && filter.size == *content_len && !CONFIGURATION.dontfilter {
// static wildcard size found during testing
// size isn't default, size equals response length, and auto-filter is on
log::debug!("static wildcard: filtered out {}", response.url());
continue;
}
if CONFIGURATION.extract_links && !ferox_response.status().is_redirection() {
let new_links = get_links(&ferox_response).await;
if filter.dynamic > 0 && !CONFIGURATION.dontfilter {
// dynamic wildcard offset found during testing
for new_link in new_links {
let unknown = add_url_to_list_of_scanned_urls(&new_link, &SCANNED_URLS);
// I'm about to manually split this url path instead of using reqwest::Url's
// builtin parsing. The reason is that they call .split() on the url path
// except that I don't want an empty string taking up the last index in the
// event that the url ends with a forward slash. It's ugly enough to be split
// into its own function for readability.
let url_len = get_url_path_length(&response.url());
if !unknown {
// not unknown, i.e. we've seen the url before and don't need to scan again
continue;
}
if url_len + filter.dynamic == *content_len {
log::debug!("dynamic wildcard: filtered out {}", response.url());
continue;
// create a url based on the given command line options, continue on error
let new_url = match format_url(
&new_link,
&"",
CONFIGURATION.addslash,
&CONFIGURATION.queries,
None,
) {
Ok(url) => url,
Err(_) => continue,
};
// make the request and store the response
let new_response = match make_request(&CONFIGURATION.client, &new_url).await {
Ok(resp) => resp,
Err(_) => continue,
};
let mut new_ferox_response = FeroxResponse::from(new_response).await;
// filter if necessary
if should_filter_response(
&new_ferox_response.content_length(),
filter.clone(),
&new_ferox_response,
) {
continue;
}
if new_ferox_response.is_file() {
// very likely a file, simply request and report
log::debug!(
"Singular extraction: {} ({})",
new_ferox_response.url(),
new_ferox_response.status().as_str(),
);
send_report(report_chan.clone(), new_ferox_response);
continue;
}
if !CONFIGURATION.norecursion {
log::debug!(
"Recursive extraction: {} ({})",
new_ferox_response.url(),
new_ferox_response.status().as_str()
);
if new_ferox_response.status().is_success()
&& !new_ferox_response.url().as_str().ends_with('/')
{
// since all of these are 2xx, recursion is only attempted if the
// url ends in a /. I am actually ok with adding the slash and not
// adding it, as both have merit. Leaving it in for now to see how
// things turn out (current as of: v1.1.0)
new_ferox_response.set_url(&format!("{}/", new_ferox_response.url()));
}
try_recursion(&new_ferox_response, base_depth, dir_chan.clone()).await;
}
}
}
let ferox_response = FeroxResponse::new(response).await;
// everything else should be reported
match report_chan.send(ferox_response) {
Ok(_) => {
log::debug!("sent {}/{} over reporting channel", &target_url, &word);
}
Err(e) => {
log::error!("wtf: {}", e);
}
}
send_report(report_chan.clone(), ferox_response);
}
}
log::trace!("exit: make_requests");
}
/// Simple helper to send a `FeroxResponse` over the tx side of an `mpsc::unbounded_channel`
fn send_report(report_sender: UnboundedSender<FeroxResponse>, response: FeroxResponse) {
log::trace!("enter: send_report({:?}, {:?}", report_sender, response);
match report_sender.send(response) {
Ok(_) => {}
Err(e) => {
log::error!("{}", e);
}
}
log::trace!("exit: send_report");
}
/// Scan a given url using a given wordlist
///
/// This is the primary entrypoint for the scanner