From 6fa542ecc598f3cbf893b042efb291814aaeaa1e Mon Sep 17 00:00:00 2001 From: epi Date: Sun, 18 Oct 2020 21:02:09 -0500 Subject: [PATCH] lots of post-implementation cleanup done --- Cargo.toml | 2 +- src/extractor.rs | 22 ++++++++------------ src/lib.rs | 52 ++++++++++++++++++++++++++++++++++++++---------- src/main.rs | 2 +- src/reporter.rs | 2 +- src/utils.rs | 6 +++++- 6 files changed, 57 insertions(+), 29 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 61f76d2..e8c0b79 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "feroxbuster" -version = "1.0.5" +version = "1.1.0" authors = ["Ben 'epi' Risher "] license = "MIT" edition = "2018" diff --git a/src/extractor.rs b/src/extractor.rs index f623632..70111e3 100644 --- a/src/extractor.rs +++ b/src/extractor.rs @@ -1,6 +1,6 @@ +use crate::FeroxResponse; use lazy_static::lazy_static; use regex::Regex; -use reqwest::Response; use reqwest::Url; use std::collections::HashSet; @@ -83,20 +83,12 @@ fn add_link_to_set_of_links(link: &str, url: &Url, links: &mut HashSet) /// - homepage/assets/img/ /// - homepage/assets/ /// - homepage/ -pub async fn get_links(response: Response) -> HashSet { +pub async fn get_links(response: &FeroxResponse) -> HashSet { log::trace!("enter: get_links({})", response.url().as_str()); - let url = response.url().clone(); let mut links = HashSet::::new(); - let body = match response.text().await { - // await the response's body - Ok(text) => text, - Err(e) => { - log::error!("Could not parse body from response: {}", e); - return links; - } - }; + let body = response.text(); for capture in REGEX.captures_iter(&body) { // remove single & double quotes from both ends of the capture @@ -105,7 +97,7 @@ pub async fn get_links(response: Response) -> HashSet { match Url::parse(link) { Ok(absolute) => { - if absolute.domain() != url.domain() { + if absolute.domain() != response.url().domain() { // domains are not the same, don't scan things that aren't part of the original // target url continue; @@ -118,7 +110,8 @@ pub async fn get_links(response: Response) -> HashSet { // - homepage/assets/img/ // - homepage/assets/ // - homepage/ - add_link_to_set_of_links(&sub_path, &url, &mut links); + log::debug!("Adding {} to {:?}", sub_path, links); + add_link_to_set_of_links(&sub_path, &response.url(), &mut links); } } Err(e) => { @@ -128,7 +121,8 @@ pub async fn get_links(response: Response) -> HashSet { if e.to_string().contains("relative URL without a base") { for sub_path in get_sub_paths_from_path(link) { // incrementally save all sub-paths that led to the relative url's resource - add_link_to_set_of_links(&sub_path, &url, &mut links); + log::debug!("Adding {} to {:?}", sub_path, links); + add_link_to_set_of_links(&sub_path, &response.url(), &mut links); } } else { // unexpected error has occurred diff --git a/src/lib.rs b/src/lib.rs index 6e191f6..a005708 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,7 +12,8 @@ pub mod utils; use crate::config::CONFIGURATION; -use reqwest::{Url, StatusCode, Response}; +use reqwest::header::HeaderMap; +use reqwest::{Response, StatusCode, Url}; use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender}; /// Generic Result type to ease error handling in async contexts @@ -64,20 +65,23 @@ pub const DEFAULT_CONFIG_NAME: &str = "ferox-config.toml"; /// A `FeroxResponse`, derived from a `Response` to a submitted `Request` #[derive(Debug)] pub struct FeroxResponse { - /// todo doc + /// The final `Url` of this `FeroxResponse` pub url: Url, - /// todo doc + /// The `StatusCode` of this `FeroxResponse` pub status: StatusCode, - /// todo doc + /// The full response text pub text: String, - /// todo doc - pub content_length: u64 + /// The content-length of this response, if known + pub content_length: u64, + + /// The `Headers` of this `FeroxResponse` + pub headers: HeaderMap, } -/// todo doc +/// `FeroxResponse` implementation impl FeroxResponse { /// Get the `StatusCode` of this `FeroxResponse` pub fn status(&self) -> &StatusCode { @@ -94,20 +98,45 @@ impl FeroxResponse { &self.text } + /// Get the `Headers` of this `FeroxResponse` + pub fn headers(&self) -> &HeaderMap { + &self.headers + } + /// Get the content-length of this response, if known pub fn content_length(&self) -> u64 { self.content_length } - /// todo doc - pub async fn new(response: Response) -> Self { + /// Set `FeroxResponse`'s `url` attribute, has no affect if an error occurs + pub fn set_url(&mut self, url: &str) { + match Url::parse(&url) { + Ok(url) => { + self.url = url; + } + Err(e) => { + log::error!("Could not parse {} into a Url: {}", url, e); + } + }; + } + + /// Create a new `FeroxResponse` from the given `Response` + pub async fn from(response: Response) -> Self { let url = response.url().clone(); let status = response.status().clone(); + let headers = response.headers().clone(); let content_length = response.content_length().unwrap_or(0); let text = if CONFIGURATION.extract_links { // .text() consumes the response, must be called last - response.text().await.unwrap() + match response.text().await { + // await the response's body + Ok(text) => text, + Err(e) => { + log::error!("Could not parse body from response: {}", e); + String::new() + } + } } else { String::new() }; @@ -116,7 +145,8 @@ impl FeroxResponse { url, status, content_length, - text + text, + headers, } } } diff --git a/src/main.rs b/src/main.rs index 622fad3..7c8369c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,7 @@ use feroxbuster::config::{CONFIGURATION, PROGRESS_PRINTER}; use feroxbuster::scanner::scan_url; use feroxbuster::utils::{ferox_print, get_current_depth, module_colorizer, status_colorizer}; -use feroxbuster::{banner, heuristics, logger, reporter, FeroxResult, FeroxResponse}; +use feroxbuster::{banner, heuristics, logger, reporter, FeroxResponse, FeroxResult}; use futures::StreamExt; use std::collections::HashSet; use std::fs::File; diff --git a/src/reporter.rs b/src/reporter.rs index 5cc4fdf..8465ad7 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -1,6 +1,6 @@ use crate::config::{CONFIGURATION, PROGRESS_PRINTER}; use crate::utils::{ferox_print, status_colorizer}; -use crate::{FeroxResponse, FeroxChannel}; +use crate::{FeroxChannel, FeroxResponse}; use console::strip_ansi_codes; use std::io::Write; use std::sync::{Arc, Once, RwLock}; diff --git a/src/utils.rs b/src/utils.rs index fe0bff6..048cfbd 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -160,7 +160,11 @@ pub fn format_url( // // the transforms that occur here will need to keep this in mind, i.e. add a slash to preserve // the current directory sent as part of the url - let url = if !url.ends_with('/') { + let url = if word.is_empty() { + // v1.0.6: added during --extract-links feature inplementation to support creating urls + // that were extracted from response bodies, i.e. http://localhost/some/path/js/main.js + url.to_string() + } else if !url.ends_with('/') { format!("{}/", url) } else { url.to_string()