mirror of
https://github.com/epi052/feroxbuster.git
synced 2026-06-06 00:41:13 -03:00
even more cleanup; extraction looking mostly complete
This commit is contained in:
25
src/lib.rs
25
src/lib.rs
@@ -120,15 +120,38 @@ impl FeroxResponse {
|
||||
};
|
||||
}
|
||||
|
||||
/// Make a reasonable guess at whether the response is a file or not
|
||||
///
|
||||
/// Examines the last part of a path to determine if it has an obvious extension
|
||||
/// i.e. http://localhost/some/path/stuff.js where stuff.js indicates a file
|
||||
///
|
||||
/// Additionally, inspects query parameters, as they're also often indicative of a file
|
||||
pub fn is_file(&self) -> bool {
|
||||
let has_extension = match self.url.path_segments() {
|
||||
Some(path) => {
|
||||
if let Some(last) = path.last() {
|
||||
last.contains('.') // last segment has some sort of extension, probably
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
None => false,
|
||||
};
|
||||
|
||||
self.url.query_pairs().count() > 0 || has_extension
|
||||
}
|
||||
|
||||
/// Create a new `FeroxResponse` from the given `Response`
|
||||
pub async fn from(response: Response) -> Self {
|
||||
let url = response.url().clone();
|
||||
let status = response.status().clone();
|
||||
let status = response.status();
|
||||
let headers = response.headers().clone();
|
||||
let content_length = response.content_length().unwrap_or(0);
|
||||
|
||||
let text = if CONFIGURATION.extract_links {
|
||||
// .text() consumes the response, must be called last
|
||||
// additionally, --extract-links is currently the only place we use the body of the
|
||||
// response, so we forego the processing if not performing extraction
|
||||
match response.text().await {
|
||||
// await the response's body
|
||||
Ok(text) => text,
|
||||
|
||||
170
src/scanner.rs
170
src/scanner.rs
@@ -1,11 +1,12 @@
|
||||
use crate::config::{CONFIGURATION, PROGRESS_BAR};
|
||||
use crate::extractor::get_links;
|
||||
use crate::heuristics::WildcardFilter;
|
||||
use crate::utils::{format_url, get_current_depth, get_url_path_length, make_request};
|
||||
use crate::{heuristics, progress, FeroxChannel, FeroxResponse};
|
||||
use futures::future::{BoxFuture, FutureExt};
|
||||
use futures::{stream, StreamExt};
|
||||
use lazy_static::lazy_static;
|
||||
use reqwest::{Response, Url};
|
||||
use reqwest::Url;
|
||||
use std::collections::HashSet;
|
||||
use std::convert::TryInto;
|
||||
use std::ops::Deref;
|
||||
@@ -160,7 +161,7 @@ fn create_urls(target_url: &str, word: &str, extensions: &[String]) -> Vec<Url>
|
||||
///
|
||||
/// handles 2xx and 3xx responses by either checking if the url ends with a / (2xx)
|
||||
/// or if the Location header is present and matches the base url + / (3xx)
|
||||
fn response_is_directory(response: &Response) -> bool {
|
||||
fn response_is_directory(response: &FeroxResponse) -> bool {
|
||||
log::trace!("enter: is_directory({:?})", response);
|
||||
|
||||
if response.status().is_redirection() {
|
||||
@@ -240,7 +241,7 @@ fn reached_max_depth(url: &Url, base_depth: usize, max_depth: usize) -> bool {
|
||||
///
|
||||
/// When a recursion opportunity is found, the new url is sent across the recursion channel
|
||||
async fn try_recursion(
|
||||
response: &Response,
|
||||
response: &FeroxResponse,
|
||||
base_depth: usize,
|
||||
transmitter: UnboundedSender<String>,
|
||||
) {
|
||||
@@ -290,6 +291,45 @@ async fn try_recursion(
|
||||
log::trace!("exit: try_recursion");
|
||||
}
|
||||
|
||||
/// Simple helper to stay DRY; determines whether or not a given `FeroxResponse` should be reported
|
||||
/// to the user or not.
|
||||
fn should_filter_response(
|
||||
content_len: &u64,
|
||||
filter: Arc<WildcardFilter>,
|
||||
response: &FeroxResponse,
|
||||
) -> bool {
|
||||
if CONFIGURATION.sizefilters.contains(content_len) {
|
||||
// filtered value from --sizefilters, move on to the next url
|
||||
log::debug!("size filter: filtered out {}", response.url());
|
||||
return true;
|
||||
}
|
||||
|
||||
if filter.size > 0 && filter.size == *content_len && !CONFIGURATION.dontfilter {
|
||||
// static wildcard size found during testing
|
||||
// size isn't default, size equals response length, and auto-filter is on
|
||||
log::debug!("static wildcard: filtered out {}", response.url());
|
||||
return true;
|
||||
}
|
||||
|
||||
if filter.dynamic > 0 && !CONFIGURATION.dontfilter {
|
||||
// dynamic wildcard offset found during testing
|
||||
|
||||
// I'm about to manually split this url path instead of using reqwest::Url's
|
||||
// builtin parsing. The reason is that they call .split() on the url path
|
||||
// except that I don't want an empty string taking up the last index in the
|
||||
// event that the url ends with a forward slash. It's ugly enough to be split
|
||||
// into its own function for readability.
|
||||
let url_len = get_url_path_length(&response.url());
|
||||
|
||||
if url_len + filter.dynamic == *content_len {
|
||||
log::debug!("dynamic wildcard: filtered out {}", response.url());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Wrapper for [make_request](fn.make_request.html)
|
||||
///
|
||||
/// Handles making multiple requests based on the presence of extensions
|
||||
@@ -316,63 +356,119 @@ async fn make_requests(
|
||||
|
||||
for url in urls {
|
||||
if let Ok(response) = make_request(&CONFIGURATION.client, &url).await {
|
||||
// response came back without error
|
||||
// response came back without error, convert it to FeroxResponse
|
||||
let ferox_response = FeroxResponse::from(response).await;
|
||||
|
||||
// do recursion if appropriate
|
||||
if !CONFIGURATION.norecursion && response_is_directory(&response) {
|
||||
try_recursion(&response, base_depth, dir_chan.clone()).await;
|
||||
if !CONFIGURATION.norecursion {
|
||||
try_recursion(&ferox_response, base_depth, dir_chan.clone()).await;
|
||||
}
|
||||
|
||||
// purposefully doing recursion before filtering. the thought process is that
|
||||
// even though this particular url is filtered, subsequent urls may not
|
||||
|
||||
let content_len = &response.content_length().unwrap_or(0);
|
||||
let content_len = &ferox_response.content_length();
|
||||
|
||||
if CONFIGURATION.sizefilters.contains(content_len) {
|
||||
// filtered value from --sizefilters, move on to the next url
|
||||
log::debug!("size filter: filtered out {}", response.url());
|
||||
if should_filter_response(content_len, filter.clone(), &ferox_response) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if filter.size > 0 && filter.size == *content_len && !CONFIGURATION.dontfilter {
|
||||
// static wildcard size found during testing
|
||||
// size isn't default, size equals response length, and auto-filter is on
|
||||
log::debug!("static wildcard: filtered out {}", response.url());
|
||||
continue;
|
||||
}
|
||||
if CONFIGURATION.extract_links && !ferox_response.status().is_redirection() {
|
||||
let new_links = get_links(&ferox_response).await;
|
||||
|
||||
if filter.dynamic > 0 && !CONFIGURATION.dontfilter {
|
||||
// dynamic wildcard offset found during testing
|
||||
for new_link in new_links {
|
||||
let unknown = add_url_to_list_of_scanned_urls(&new_link, &SCANNED_URLS);
|
||||
|
||||
// I'm about to manually split this url path instead of using reqwest::Url's
|
||||
// builtin parsing. The reason is that they call .split() on the url path
|
||||
// except that I don't want an empty string taking up the last index in the
|
||||
// event that the url ends with a forward slash. It's ugly enough to be split
|
||||
// into its own function for readability.
|
||||
let url_len = get_url_path_length(&response.url());
|
||||
if !unknown {
|
||||
// not unknown, i.e. we've seen the url before and don't need to scan again
|
||||
continue;
|
||||
}
|
||||
|
||||
if url_len + filter.dynamic == *content_len {
|
||||
log::debug!("dynamic wildcard: filtered out {}", response.url());
|
||||
continue;
|
||||
// create a url based on the given command line options, continue on error
|
||||
let new_url = match format_url(
|
||||
&new_link,
|
||||
&"",
|
||||
CONFIGURATION.addslash,
|
||||
&CONFIGURATION.queries,
|
||||
None,
|
||||
) {
|
||||
Ok(url) => url,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
// make the request and store the response
|
||||
let new_response = match make_request(&CONFIGURATION.client, &new_url).await {
|
||||
Ok(resp) => resp,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
let mut new_ferox_response = FeroxResponse::from(new_response).await;
|
||||
|
||||
// filter if necessary
|
||||
if should_filter_response(
|
||||
&new_ferox_response.content_length(),
|
||||
filter.clone(),
|
||||
&new_ferox_response,
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if new_ferox_response.is_file() {
|
||||
// very likely a file, simply request and report
|
||||
log::debug!(
|
||||
"Singular extraction: {} ({})",
|
||||
new_ferox_response.url(),
|
||||
new_ferox_response.status().as_str(),
|
||||
);
|
||||
|
||||
send_report(report_chan.clone(), new_ferox_response);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if !CONFIGURATION.norecursion {
|
||||
log::debug!(
|
||||
"Recursive extraction: {} ({})",
|
||||
new_ferox_response.url(),
|
||||
new_ferox_response.status().as_str()
|
||||
);
|
||||
|
||||
if new_ferox_response.status().is_success()
|
||||
&& !new_ferox_response.url().as_str().ends_with('/')
|
||||
{
|
||||
// since all of these are 2xx, recursion is only attempted if the
|
||||
// url ends in a /. I am actually ok with adding the slash and not
|
||||
// adding it, as both have merit. Leaving it in for now to see how
|
||||
// things turn out (current as of: v1.1.0)
|
||||
new_ferox_response.set_url(&format!("{}/", new_ferox_response.url()));
|
||||
}
|
||||
|
||||
try_recursion(&new_ferox_response, base_depth, dir_chan.clone()).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let ferox_response = FeroxResponse::new(response).await;
|
||||
|
||||
// everything else should be reported
|
||||
match report_chan.send(ferox_response) {
|
||||
Ok(_) => {
|
||||
log::debug!("sent {}/{} over reporting channel", &target_url, &word);
|
||||
}
|
||||
Err(e) => {
|
||||
log::error!("wtf: {}", e);
|
||||
}
|
||||
}
|
||||
send_report(report_chan.clone(), ferox_response);
|
||||
}
|
||||
}
|
||||
log::trace!("exit: make_requests");
|
||||
}
|
||||
|
||||
/// Simple helper to send a `FeroxResponse` over the tx side of an `mpsc::unbounded_channel`
|
||||
fn send_report(report_sender: UnboundedSender<FeroxResponse>, response: FeroxResponse) {
|
||||
log::trace!("enter: send_report({:?}, {:?}", report_sender, response);
|
||||
|
||||
match report_sender.send(response) {
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
log::error!("{}", e);
|
||||
}
|
||||
}
|
||||
|
||||
log::trace!("exit: send_report");
|
||||
}
|
||||
|
||||
/// Scan a given url using a given wordlist
|
||||
///
|
||||
/// This is the primary entrypoint for the scanner
|
||||
|
||||
Reference in New Issue
Block a user