Merge pull request #87 from epi052/FEATURE-add-link-extraction--integrate-get-links-into-scanner-v2

Integrate extractor::get_links into scanner v2
2026-05-26 16:01:12 -03:00 · 2020-10-21 20:19:28 -05:00
parent 96ab0381e8 61648394cc
commit ba279079b6
10 changed files with 650 additions and 104 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "feroxbuster"
-version = "1.0.4"
+version = "1.1.0"
 authors = ["Ben 'epi' Risher <epibar052@gmail.com>"]
 license = "MIT"
 edition = "2018"
--- a/README.md
+++ b/README.md
@@ -88,22 +88,25 @@ Releases for multiple architectures can be found in the [Releases](https://githu

 #### Linux x86
 ```
-wget -sLO https://github.com/epi052/feroxbuster/releases/latest/download/x86-linux-feroxbuster.zip
+curl -sLO https://github.com/epi052/feroxbuster/releases/latest/download/x86-linux-feroxbuster.zip
 unzip x86-linux-feroxbuster.zip
+chmod +x ./feroxbuster
 ./feroxbuster -V
 ```
 #### Linux x86_64

 ```
-wget -sLO https://github.com/epi052/feroxbuster/releases/latest/download/x86_64-linux-feroxbuster.zip
+curl -sLO https://github.com/epi052/feroxbuster/releases/latest/download/x86_64-linux-feroxbuster.zip
 unzip x86_64-linux-feroxbuster.zip
+chmod +x ./feroxbuster
 ./feroxbuster -V
 ```

 #### MacOS x86_64
 ```
-wget -sLO https://github.com/epi052/feroxbuster/releases/latest/download/x86_64-macos-feroxbuster.zip
+curl -sLO https://github.com/epi052/feroxbuster/releases/latest/download/x86_64-macos-feroxbuster.zip
 unzip x86_64-macos-feroxbuster.zip
+chmod +x ./feroxbuster
 ./feroxbuster -V
 ```

@@ -239,6 +242,11 @@ built-in defaults.
 - The same directory as the `feroxbuster` executable (per-user)
 - The user's current working directory (per-target)

+> `CONFIG_DIR` is defined as the following:
+> - Linux: `$XDG_CONFIG_HOME` or `$HOME/.config` i.e. `/home/bob/.config`
+> - MacOs: `$HOME/Library/Application Support` i.e. `/Users/bob/Library/Application Support`
+> - Windows: `{FOLDERID_RoamingAppData}` i.e. `C:\Users\Bob\AppData\Roaming`
+
 If more than one valid configuration file is found, each one overwrites the values found previously.  

 If no configuration file is found, nothing happens at this stage.
--- a/src/extractor.rs
+++ b/src/extractor.rs
@@ -1,6 +1,6 @@
+use crate::FeroxResponse;
 use lazy_static::lazy_static;
 use regex::Regex;
-use reqwest::Response;
 use reqwest::Url;
 use std::collections::HashSet;

@@ -83,20 +83,12 @@ fn add_link_to_set_of_links(link: &str, url: &Url, links: &mut HashSet<String>)
 ///         - homepage/assets/img/
 ///         - homepage/assets/
 ///         - homepage/
-pub async fn get_links(response: Response) -> HashSet<String> {
+pub async fn get_links(response: &FeroxResponse) -> HashSet<String> {
    log::trace!("enter: get_links({})", response.url().as_str());

-    let url = response.url().clone();
    let mut links = HashSet::<String>::new();

-    let body = match response.text().await {
-        // await the response's body
-        Ok(text) => text,
-        Err(e) => {
-            log::error!("Could not parse body from response: {}", e);
-            return links;
-        }
-    };
+    let body = response.text();

    for capture in REGEX.captures_iter(&body) {
        // remove single & double quotes from both ends of the capture
@@ -105,8 +97,10 @@ pub async fn get_links(response: Response) -> HashSet<String> {

        match Url::parse(link) {
            Ok(absolute) => {
-                if absolute.domain() != url.domain() {
-                    // domains are not the same, don't scan things that aren't part of the original
+                if absolute.domain() != response.url().domain()
+                    || absolute.host() != response.url().host()
+                {
+                    // domains/ips are not the same, don't scan things that aren't part of the original
                    // target url
                    continue;
                }
@@ -118,7 +112,8 @@ pub async fn get_links(response: Response) -> HashSet<String> {
                    //     - homepage/assets/img/
                    //     - homepage/assets/
                    //     - homepage/
-                    add_link_to_set_of_links(&sub_path, &url, &mut links);
+                    log::debug!("Adding {} to {:?}", sub_path, links);
+                    add_link_to_set_of_links(&sub_path, &response.url(), &mut links);
                }
            }
            Err(e) => {
@@ -128,7 +123,8 @@ pub async fn get_links(response: Response) -> HashSet<String> {
                if e.to_string().contains("relative URL without a base") {
                    for sub_path in get_sub_paths_from_path(link) {
                        // incrementally save all sub-paths that led to the relative url's resource
-                        add_link_to_set_of_links(&sub_path, &url, &mut links);
+                        log::debug!("Adding {} to {:?}", sub_path, links);
+                        add_link_to_set_of_links(&sub_path, &response.url(), &mut links);
                    }
                } else {
                    // unexpected error has occurred
@@ -145,6 +141,10 @@ pub async fn get_links(response: Response) -> HashSet<String> {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use crate::utils::make_request;
+    use httpmock::Method::GET;
+    use httpmock::{Mock, MockServer};
+    use reqwest::Client;

    #[test]
    /// extract sub paths from the given url fragment; expect 4 sub paths and that all are
@@ -236,4 +236,34 @@ mod tests {
        assert_eq!(links.len(), 0);
        assert!(links.is_empty());
    }
+
+    #[tokio::test(core_threads = 1)]
+    /// use make_request to generate a Response, and use the Response to test get_links;
+    /// the response will contain an absolute path to a domain that is not part of the scanned
+    /// domain; expect an empty set returned
+    async fn extractor_get_links_with_absolute_url_that_differs_from_target_domain(
+    ) -> Result<(), Box<dyn std::error::Error>> {
+        let srv = MockServer::start();
+
+        let mock = Mock::new()
+            .expect_method(GET)
+            .expect_path("/some-path")
+            .return_status(200)
+            .return_body("\"http://defintely.not.a.thing.probably.com/homepage/assets/img/icons/handshake.svg\"")
+            .create_on(&srv);
+
+        let client = Client::new();
+        let url = Url::parse(&srv.url("/some-path")).unwrap();
+
+        let response = make_request(&client, &url).await.unwrap();
+
+        let ferox_response = FeroxResponse::from(response, true).await;
+
+        let links = get_links(&ferox_response).await;
+
+        assert!(links.is_empty());
+
+        assert_eq!(mock.times_called(), 1);
+        Ok(())
+    }
 }
--- a/src/heuristics.rs
+++ b/src/heuristics.rs
@@ -1,4 +1,5 @@
 use crate::config::{CONFIGURATION, PROGRESS_PRINTER};
+use crate::scanner::should_filter_response;
 use crate::utils::{
    ferox_print, format_url, get_url_path_length, make_request, module_colorizer, status_colorizer,
 };
@@ -20,7 +21,7 @@ const UUID_LENGTH: u64 = 32;
 ///
 /// `size` is size of the response that should be included with filters passed via runtime
 /// configuration and any static wildcard lengths.
-#[derive(Default, Debug)]
+#[derive(Default, Debug, PartialEq, Copy, Clone)]
 pub struct WildcardFilter {
    /// size of the response that will later be combined with the length of the path of the url
    /// requested
@@ -99,11 +100,15 @@ pub async fn wildcard_test(
                // reflected in the response along with some static content; aka custom 404
                let url_len = get_url_path_length(&resp_one.url());

-                if !CONFIGURATION.quiet {
+                wildcard.dynamic = wc_length - url_len;
+
+                if !CONFIGURATION.quiet
+                    && !should_filter_response(&wildcard.dynamic, &resp_one.url())
+                {
                    let msg = format!(
                            "{} {:>10} Wildcard response is dynamic; {} ({} + url length) responses; toggle this behavior by using {}\n",
                            status_colorizer("WLD"),
-                            wc_length - url_len,
+                            wildcard.dynamic,
                            style("auto-filtering").yellow(),
                            style(wc_length - url_len).cyan(),
                            style("--dontfilter").yellow()
@@ -117,10 +122,11 @@ pub async fn wildcard_test(
                        !CONFIGURATION.output.is_empty(),
                    );
                }
-
-                wildcard.dynamic = wc_length - url_len;
            } else if wc_length == wc2_length {
-                if !CONFIGURATION.quiet {
+                wildcard.size = wc_length;
+
+                if !CONFIGURATION.quiet && !should_filter_response(&wildcard.size, &resp_one.url())
+                {
                    let msg = format!(
                        "{} {:>10} Wildcard response is static; {} {} responses; toggle this behavior by using {}\n",
                        status_colorizer("WLD"),
@@ -138,7 +144,6 @@ pub async fn wildcard_test(
                        !CONFIGURATION.output.is_empty(),
                    );
                }
-                wildcard.size = wc_length;
            }
        } else {
            bar.inc(2);
@@ -199,7 +204,7 @@ async fn make_wildcard_request(
                let url_len = get_url_path_length(&response.url());
                let content_len = response.content_length().unwrap_or(0);

-                if !CONFIGURATION.quiet {
+                if !CONFIGURATION.quiet && !should_filter_response(&content_len, &response.url()) {
                    let msg = format!(
                        "{} {:>10} Got {} for {} (url length: {})\n",
                        wildcard,
@@ -221,31 +226,16 @@ async fn make_wildcard_request(
                if response.status().is_redirection() {
                    // show where it goes, if possible
                    if let Some(next_loc) = response.headers().get("Location") {
-                        if let Ok(next_loc_str) = next_loc.to_str() {
-                            if !CONFIGURATION.quiet {
-                                let msg = format!(
-                                    "{} {:>10} {} redirects to => {}\n",
-                                    wildcard,
-                                    content_len,
-                                    response.url(),
-                                    next_loc_str
-                                );
-
-                                ferox_print(&msg, &PROGRESS_PRINTER);
-
-                                try_send_message_to_file(
-                                    &msg,
-                                    tx_file.clone(),
-                                    !CONFIGURATION.output.is_empty(),
-                                );
-                            }
-                        } else if !CONFIGURATION.quiet {
+                        let next_loc_str = next_loc.to_str().unwrap_or("Unknown");
+                        if !CONFIGURATION.quiet
+                            && !should_filter_response(&content_len, &response.url())
+                        {
                            let msg = format!(
-                                "{} {:>10} {} redirects to => {:?}\n",
+                                "{} {:>10} {} redirects to => {}\n",
                                wildcard,
                                content_len,
                                response.url(),
-                                next_loc
+                                next_loc_str
                            );

                            ferox_print(&msg, &PROGRESS_PRINTER);
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -10,7 +10,8 @@ pub mod reporter;
 pub mod scanner;
 pub mod utils;

-use reqwest::StatusCode;
+use reqwest::header::HeaderMap;
+use reqwest::{Response, StatusCode, Url};
 use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender};

 /// Generic Result type to ease error handling in async contexts
@@ -59,6 +60,118 @@ pub const DEFAULT_STATUS_CODES: [StatusCode; 9] = [
 /// Expected location is in the same directory as the feroxbuster binary.
 pub const DEFAULT_CONFIG_NAME: &str = "ferox-config.toml";

+/// A `FeroxResponse`, derived from a `Response` to a submitted `Request`
+#[derive(Debug)]
+pub struct FeroxResponse {
+    /// The final `Url` of this `FeroxResponse`
+    url: Url,
+
+    /// The `StatusCode` of this `FeroxResponse`
+    status: StatusCode,
+
+    /// The full response text
+    text: String,
+
+    /// The content-length of this response, if known
+    content_length: u64,
+
+    /// The `Headers` of this `FeroxResponse`
+    headers: HeaderMap,
+}
+
+/// `FeroxResponse` implementation
+impl FeroxResponse {
+    /// Get the `StatusCode` of this `FeroxResponse`
+    pub fn status(&self) -> &StatusCode {
+        &self.status
+    }
+
+    /// Get the final `Url` of this `FeroxResponse`.
+    pub fn url(&self) -> &Url {
+        &self.url
+    }
+
+    /// Get the full response text
+    pub fn text(&self) -> &str {
+        &self.text
+    }
+
+    /// Get the `Headers` of this `FeroxResponse`
+    pub fn headers(&self) -> &HeaderMap {
+        &self.headers
+    }
+
+    /// Get the content-length of this response, if known
+    pub fn content_length(&self) -> u64 {
+        self.content_length
+    }
+
+    /// Set `FeroxResponse`'s `url` attribute, has no affect if an error occurs
+    pub fn set_url(&mut self, url: &str) {
+        match Url::parse(&url) {
+            Ok(url) => {
+                self.url = url;
+            }
+            Err(e) => {
+                log::error!("Could not parse {} into a Url: {}", url, e);
+            }
+        };
+    }
+
+    /// Make a reasonable guess at whether the response is a file or not
+    ///
+    /// Examines the last part of a path to determine if it has an obvious extension
+    /// i.e. http://localhost/some/path/stuff.js where stuff.js indicates a file
+    ///
+    /// Additionally, inspects query parameters, as they're also often indicative of a file
+    pub fn is_file(&self) -> bool {
+        let has_extension = match self.url.path_segments() {
+            Some(path) => {
+                if let Some(last) = path.last() {
+                    last.contains('.') // last segment has some sort of extension, probably
+                } else {
+                    false
+                }
+            }
+            None => false,
+        };
+
+        self.url.query_pairs().count() > 0 || has_extension
+    }
+
+    /// Create a new `FeroxResponse` from the given `Response`
+    pub async fn from(response: Response, read_body: bool) -> Self {
+        let url = response.url().clone();
+        let status = response.status();
+        let headers = response.headers().clone();
+        let content_length = response.content_length().unwrap_or(0);
+
+        let text = if read_body {
+            // .text() consumes the response, must be called last
+            // additionally, --extract-links is currently the only place we use the body of the
+            // response, so we forego the processing if not performing extraction
+            match response.text().await {
+                // await the response's body
+                Ok(text) => text,
+                Err(e) => {
+                    log::error!("Could not parse body from response: {}", e);
+                    String::new()
+                }
+            }
+        } else {
+            String::new()
+        };
+
+        FeroxResponse {
+            url,
+            status,
+            content_length,
+            text,
+            headers,
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,9 +1,8 @@
 use feroxbuster::config::{CONFIGURATION, PROGRESS_PRINTER};
 use feroxbuster::scanner::scan_url;
 use feroxbuster::utils::{ferox_print, get_current_depth, module_colorizer, status_colorizer};
-use feroxbuster::{banner, heuristics, logger, reporter, FeroxResult};
+use feroxbuster::{banner, heuristics, logger, reporter, FeroxResponse, FeroxResult};
 use futures::StreamExt;
-use reqwest::Response;
 use std::collections::HashSet;
 use std::fs::File;
 use std::io::{BufRead, BufReader};
@@ -38,7 +37,13 @@ fn get_unique_words_from_wordlist(path: &str) -> FeroxResult<Arc<HashSet<String>
    let mut words = HashSet::new();

    for line in reader.lines() {
-        words.insert(line?);
+        let result = line?;
+
+        if result.starts_with('#') || result.is_empty() {
+            continue;
+        }
+
+        words.insert(result);
    }

    log::trace!(
@@ -52,7 +57,7 @@ fn get_unique_words_from_wordlist(path: &str) -> FeroxResult<Arc<HashSet<String>
 /// Determine whether it's a single url scan or urls are coming from stdin, then scan as needed
 async fn scan(
    targets: Vec<String>,
-    tx_term: UnboundedSender<Response>,
+    tx_term: UnboundedSender<FeroxResponse>,
    tx_file: UnboundedSender<String>,
 ) -> FeroxResult<()> {
    log::trace!("enter: scan({:?}, {:?}, {:?})", targets, tx_term, tx_file);
--- a/src/reporter.rs
+++ b/src/reporter.rs
@@ -1,8 +1,7 @@
 use crate::config::{CONFIGURATION, PROGRESS_PRINTER};
 use crate::utils::{ferox_print, status_colorizer};
-use crate::FeroxChannel;
+use crate::{FeroxChannel, FeroxResponse};
 use console::strip_ansi_codes;
-use reqwest::Response;
 use std::io::Write;
 use std::sync::{Arc, Once, RwLock};
 use std::{fs, io};
@@ -41,14 +40,14 @@ pub fn initialize(
    output_file: &str,
    save_output: bool,
 ) -> (
-    UnboundedSender<Response>,
+    UnboundedSender<FeroxResponse>,
    UnboundedSender<String>,
    JoinHandle<()>,
    Option<JoinHandle<()>>,
 ) {
    log::trace!("enter: initialize({}, {})", output_file, save_output);

-    let (tx_rpt, rx_rpt): FeroxChannel<Response> = mpsc::unbounded_channel();
+    let (tx_rpt, rx_rpt): FeroxChannel<FeroxResponse> = mpsc::unbounded_channel();
    let (tx_file, rx_file): FeroxChannel<String> = mpsc::unbounded_channel();

    let file_clone = tx_file.clone();
@@ -81,7 +80,7 @@ pub fn initialize(
 /// The consumer simply receives responses and prints them if they meet the given
 /// reporting criteria
 async fn spawn_terminal_reporter(
-    mut resp_chan: UnboundedReceiver<Response>,
+    mut resp_chan: UnboundedReceiver<FeroxResponse>,
    file_chan: UnboundedSender<String>,
    save_output: bool,
 ) {
@@ -107,7 +106,7 @@ async fn spawn_terminal_reporter(
                    // 200       3280 https://localhost.com/FAQ
                    "{} {:>10} {}\n",
                    status,
-                    resp.content_length().unwrap_or(0),
+                    resp.content_length(),
                    resp.url()
                )
            };
--- a/src/scanner.rs
+++ b/src/scanner.rs
@@ -1,11 +1,12 @@
 use crate::config::{CONFIGURATION, PROGRESS_BAR};
+use crate::extractor::get_links;
 use crate::heuristics::WildcardFilter;
 use crate::utils::{format_url, get_current_depth, get_url_path_length, make_request};
-use crate::{heuristics, progress, FeroxChannel};
+use crate::{heuristics, progress, FeroxChannel, FeroxResponse};
 use futures::future::{BoxFuture, FutureExt};
 use futures::{stream, StreamExt};
 use lazy_static::lazy_static;
-use reqwest::{Response, Url};
+use reqwest::Url;
 use std::collections::HashSet;
 use std::convert::TryInto;
 use std::ops::Deref;
@@ -20,6 +21,9 @@ static CALL_COUNT: AtomicUsize = AtomicUsize::new(0);
 lazy_static! {
    /// Set of urls that have been sent to [scan_url](fn.scan_url.html), used for deduplication
    static ref SCANNED_URLS: RwLock<HashSet<String>> = RwLock::new(HashSet::new());
+
+    /// Vector of WildcardFilters that have been ID'd through heuristics
+    static ref WILDCARD_FILTERS: Arc<RwLock<Vec<Arc<WildcardFilter>>>> = Arc::new(RwLock::new(Vec::<Arc<WildcardFilter>>::new()));
 }

 /// Adds the given url to `SCANNED_URLS`
@@ -59,6 +63,42 @@ fn add_url_to_list_of_scanned_urls(resp: &str, scanned_urls: &RwLock<HashSet<Str
    }
 }

+/// Adds the given WildcardFilter to `WILDCARD_FILTERS`
+///
+/// If `WILDCARD_FILTERS` did not already contain the filter, return true; otherwise return false
+fn add_filter_to_list_of_wildcard_filters(
+    filter: Arc<WildcardFilter>,
+    wildcard_filters: Arc<RwLock<Vec<Arc<WildcardFilter>>>>,
+) -> bool {
+    log::trace!(
+        "enter: add_filter_to_list_of_wildcard_filters({:?}, {:?})",
+        filter,
+        wildcard_filters
+    );
+
+    match wildcard_filters.write() {
+        Ok(mut filters) => {
+            // If the set did not contain the assigned filter, true is returned.
+            // If the set did contain the assigned filter, false is returned.
+            if filters.contains(&filter) {
+                log::trace!("exit: add_filter_to_list_of_wildcard_filters -> false");
+                return false;
+            }
+
+            filters.push(filter);
+
+            log::trace!("exit: add_filter_to_list_of_wildcard_filters -> true");
+            true
+        }
+        Err(e) => {
+            // poisoned lock
+            log::error!("Set of wildcard filters poisoned: {}", e);
+            log::trace!("exit: add_filter_to_list_of_wildcard_filters -> false");
+            false
+        }
+    }
+}
+
 /// Spawn a single consumer task (sc side of mpsc)
 ///
 /// The consumer simply receives Urls and scans them
@@ -66,7 +106,7 @@ fn spawn_recursion_handler(
    mut recursion_channel: UnboundedReceiver<String>,
    wordlist: Arc<HashSet<String>>,
    base_depth: usize,
-    tx_term: UnboundedSender<Response>,
+    tx_term: UnboundedSender<FeroxResponse>,
    tx_file: UnboundedSender<String>,
 ) -> BoxFuture<'static, Vec<JoinHandle<()>>> {
    log::trace!(
@@ -160,7 +200,7 @@ fn create_urls(target_url: &str, word: &str, extensions: &[String]) -> Vec<Url>
 ///
 /// handles 2xx and 3xx responses by either checking if the url ends with a / (2xx)
 /// or if the Location header is present and matches the base url + / (3xx)
-fn response_is_directory(response: &Response) -> bool {
+fn response_is_directory(response: &FeroxResponse) -> bool {
    log::trace!("enter: is_directory({:?})", response);

    if response.status().is_redirection() {
@@ -240,7 +280,7 @@ fn reached_max_depth(url: &Url, base_depth: usize, max_depth: usize) -> bool {
 ///
 /// When a recursion opportunity is found, the new url is sent across the recursion channel
 async fn try_recursion(
-    response: &Response,
+    response: &FeroxResponse,
    base_depth: usize,
    transmitter: UnboundedSender<String>,
 ) {
@@ -290,6 +330,54 @@ async fn try_recursion(
    log::trace!("exit: try_recursion");
 }

+/// Simple helper to stay DRY; determines whether or not a given `FeroxResponse` should be reported
+/// to the user or not.
+pub fn should_filter_response(content_len: &u64, url: &Url) -> bool {
+    if CONFIGURATION.sizefilters.contains(content_len) {
+        // filtered value from --sizefilters, move on to the next url
+        log::debug!("size filter: filtered out {}", url);
+        return true;
+    }
+
+    match WILDCARD_FILTERS.read() {
+        Ok(filters) => {
+            for filter in filters.iter() {
+                if CONFIGURATION.dontfilter {
+                    // quick return if dontfilter is set
+                    return false;
+                }
+
+                if filter.size > 0 && filter.size == *content_len {
+                    // static wildcard size found during testing
+                    // size isn't default, size equals response length, and auto-filter is on
+                    log::debug!("static wildcard: filtered out {}", url);
+                    return true;
+                }
+
+                if filter.dynamic > 0 {
+                    // dynamic wildcard offset found during testing
+
+                    // I'm about to manually split this url path instead of using reqwest::Url's
+                    // builtin parsing. The reason is that they call .split() on the url path
+                    // except that I don't want an empty string taking up the last index in the
+                    // event that the url ends with a forward slash.  It's ugly enough to be split
+                    // into its own function for readability.
+                    let url_len = get_url_path_length(&url);
+
+                    if url_len + filter.dynamic == *content_len {
+                        log::debug!("dynamic wildcard: filtered out {}", url);
+                        return true;
+                    }
+                }
+            }
+        }
+        Err(e) => {
+            log::error!("{}", e);
+        }
+    }
+    false
+}
+
 /// Wrapper for [make_request](fn.make_request.html)
 ///
 /// Handles making multiple requests based on the presence of extensions
@@ -299,9 +387,8 @@ async fn make_requests(
    target_url: &str,
    word: &str,
    base_depth: usize,
-    filter: Arc<WildcardFilter>,
    dir_chan: UnboundedSender<String>,
-    report_chan: UnboundedSender<Response>,
+    report_chan: UnboundedSender<FeroxResponse>,
 ) {
    log::trace!(
        "enter: make_requests({}, {}, {}, {:?}, {:?})",
@@ -316,61 +403,117 @@ async fn make_requests(

    for url in urls {
        if let Ok(response) = make_request(&CONFIGURATION.client, &url).await {
-            // response came back without error
+            // response came back without error, convert it to FeroxResponse
+            let ferox_response = FeroxResponse::from(response, CONFIGURATION.extract_links).await;

            // do recursion if appropriate
-            if !CONFIGURATION.norecursion && response_is_directory(&response) {
-                try_recursion(&response, base_depth, dir_chan.clone()).await;
+            if !CONFIGURATION.norecursion {
+                try_recursion(&ferox_response, base_depth, dir_chan.clone()).await;
            }

            // purposefully doing recursion before filtering. the thought process is that
            // even though this particular url is filtered, subsequent urls may not

-            let content_len = &response.content_length().unwrap_or(0);
+            let content_len = &ferox_response.content_length();

-            if CONFIGURATION.sizefilters.contains(content_len) {
-                // filtered value from --sizefilters, move on to the next url
-                log::debug!("size filter: filtered out {}", response.url());
+            if should_filter_response(content_len, &ferox_response.url()) {
                continue;
            }

-            if filter.size > 0 && filter.size == *content_len && !CONFIGURATION.dontfilter {
-                // static wildcard size found during testing
-                // size isn't default, size equals response length, and auto-filter is on
-                log::debug!("static wildcard: filtered out {}", response.url());
-                continue;
-            }
+            if CONFIGURATION.extract_links && !ferox_response.status().is_redirection() {
+                let new_links = get_links(&ferox_response).await;

-            if filter.dynamic > 0 && !CONFIGURATION.dontfilter {
-                // dynamic wildcard offset found during testing
+                for new_link in new_links {
+                    let unknown = add_url_to_list_of_scanned_urls(&new_link, &SCANNED_URLS);

-                // I'm about to manually split this url path instead of using reqwest::Url's
-                // builtin parsing. The reason is that they call .split() on the url path
-                // except that I don't want an empty string taking up the last index in the
-                // event that the url ends with a forward slash.  It's ugly enough to be split
-                // into its own function for readability.
-                let url_len = get_url_path_length(&response.url());
+                    if !unknown {
+                        // not unknown, i.e. we've seen the url before and don't need to scan again
+                        continue;
+                    }

-                if url_len + filter.dynamic == *content_len {
-                    log::debug!("dynamic wildcard: filtered out {}", response.url());
-                    continue;
+                    // create a url based on the given command line options, continue on error
+                    let new_url = match format_url(
+                        &new_link,
+                        &"",
+                        CONFIGURATION.addslash,
+                        &CONFIGURATION.queries,
+                        None,
+                    ) {
+                        Ok(url) => url,
+                        Err(_) => continue,
+                    };
+
+                    // make the request and store the response
+                    let new_response = match make_request(&CONFIGURATION.client, &new_url).await {
+                        Ok(resp) => resp,
+                        Err(_) => continue,
+                    };
+
+                    let mut new_ferox_response =
+                        FeroxResponse::from(new_response, CONFIGURATION.extract_links).await;
+
+                    // filter if necessary
+                    let new_content_len = &new_ferox_response.content_length();
+                    if should_filter_response(new_content_len, &new_ferox_response.url()) {
+                        continue;
+                    }
+
+                    if new_ferox_response.is_file() {
+                        // very likely a file, simply request and report
+                        log::debug!(
+                            "Singular extraction: {} ({})",
+                            new_ferox_response.url(),
+                            new_ferox_response.status().as_str(),
+                        );
+
+                        send_report(report_chan.clone(), new_ferox_response);
+
+                        continue;
+                    }
+
+                    if !CONFIGURATION.norecursion {
+                        log::debug!(
+                            "Recursive extraction: {} ({})",
+                            new_ferox_response.url(),
+                            new_ferox_response.status().as_str()
+                        );
+
+                        if new_ferox_response.status().is_success()
+                            && !new_ferox_response.url().as_str().ends_with('/')
+                        {
+                            // since all of these are 2xx, recursion is only attempted if the
+                            // url ends in a /. I am actually ok with adding the slash and not
+                            // adding it, as both have merit.  Leaving it in for now to see how
+                            // things turn out (current as of: v1.1.0)
+                            new_ferox_response.set_url(&format!("{}/", new_ferox_response.url()));
+                        }
+
+                        try_recursion(&new_ferox_response, base_depth, dir_chan.clone()).await;
+                    }
                }
            }

            // everything else should be reported
-            match report_chan.send(response) {
-                Ok(_) => {
-                    log::debug!("sent {}/{} over reporting channel", &target_url, &word);
-                }
-                Err(e) => {
-                    log::error!("wtf: {}", e);
-                }
-            }
+            send_report(report_chan.clone(), ferox_response);
        }
    }
    log::trace!("exit: make_requests");
 }

+/// Simple helper to send a `FeroxResponse` over the tx side of an `mpsc::unbounded_channel`
+fn send_report(report_sender: UnboundedSender<FeroxResponse>, response: FeroxResponse) {
+    log::trace!("enter: send_report({:?}, {:?}", report_sender, response);
+
+    match report_sender.send(response) {
+        Ok(_) => {}
+        Err(e) => {
+            log::error!("{}", e);
+        }
+    }
+
+    log::trace!("exit: send_report");
+}
+
 /// Scan a given url using a given wordlist
 ///
 /// This is the primary entrypoint for the scanner
@@ -378,7 +521,7 @@ pub async fn scan_url(
    target_url: &str,
    wordlist: Arc<HashSet<String>>,
    base_depth: usize,
-    tx_term: UnboundedSender<Response>,
+    tx_term: UnboundedSender<FeroxResponse>,
    tx_file: UnboundedSender<String>,
 ) {
    log::trace!(
@@ -439,18 +582,17 @@ pub async fn scan_url(
            None => Arc::new(WildcardFilter::default()),
        };

+    add_filter_to_list_of_wildcard_filters(filter.clone(), WILDCARD_FILTERS.clone());
+
    // producer tasks (mp of mpsc); responsible for making requests
    let producers = stream::iter(looping_words.deref().to_owned())
        .map(|word| {
-            let wc_filter = filter.clone();
            let txd = tx_dir.clone();
            let txr = tx_term.clone();
            let pb = progress_bar.clone(); // progress bar is an Arc around internal state
            let tgt = target_url.to_string(); // done to satisfy 'static lifetime below
            (
-                tokio::spawn(async move {
-                    make_requests(&tgt, &word, base_depth, wc_filter, txd, txr).await
-                }),
+                tokio::spawn(async move { make_requests(&tgt, &word, base_depth, txd, txr).await }),
                pb,
            )
        })
@@ -616,4 +758,30 @@ mod tests {

        assert_eq!(add_url_to_list_of_scanned_urls(url, &urls), false);
    }
+
+    #[test]
+    /// add a wildcard filter with the `size` attribute set to WILDCARD_FILTERS and ensure that
+    /// should_filter_response correctly returns true
+    fn should_filter_response_filters_wildcard_size() {
+        let mut filter = WildcardFilter::default();
+        let url = Url::parse("http://localhost").unwrap();
+        filter.size = 18;
+        let filter = Arc::new(filter);
+        add_filter_to_list_of_wildcard_filters(filter, WILDCARD_FILTERS.clone());
+        let result = should_filter_response(&18, &url);
+        assert!(result);
+    }
+
+    #[test]
+    /// add a wildcard filter with the `dynamic` attribute set to WILDCARD_FILTERS and ensure that
+    /// should_filter_response correctly returns true
+    fn should_filter_response_filters_wildcard_dynamic() {
+        let mut filter = WildcardFilter::default();
+        let url = Url::parse("http://localhost/some-path").unwrap();
+        filter.dynamic = 9;
+        let filter = Arc::new(filter);
+        add_filter_to_list_of_wildcard_filters(filter, WILDCARD_FILTERS.clone());
+        let result = should_filter_response(&18, &url);
+        assert!(result);
+    }
 }
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -160,7 +160,11 @@ pub fn format_url(
    //
    // the transforms that occur here will need to keep this in mind, i.e. add a slash to preserve
    // the current directory sent as part of the url
-    let url = if !url.ends_with('/') {
+    let url = if word.is_empty() {
+        // v1.0.6: added during --extract-links feature inplementation to support creating urls
+        // that were extracted from response bodies, i.e. http://localhost/some/path/js/main.js
+        url.to_string()
+    } else if !url.ends_with('/') {
        format!("{}/", url)
    } else {
        url.to_string()
--- a/tests/test_extractor.rs
+++ b/tests/test_extractor.rs
@@ -0,0 +1,229 @@
+mod utils;
+use assert_cmd::prelude::*;
+use httpmock::Method::GET;
+use httpmock::{Mock, MockServer};
+use predicates::prelude::*;
+use std::process::Command;
+use utils::{setup_tmp_directory, teardown_tmp_directory};
+
+#[test]
+/// send a request to a page that contains a relative link, --extract-links should find the link
+/// and make a request to the new link
+fn extractor_finds_absolute_url() -> Result<(), Box<dyn std::error::Error>> {
+    let srv = MockServer::start();
+    let (tmp_dir, file) = setup_tmp_directory(&["LICENSE".to_string()], "wordlist")?;
+
+    let mock = Mock::new()
+        .expect_method(GET)
+        .expect_path("/LICENSE")
+        .return_status(200)
+        .return_body(&srv.url("'/homepage/assets/img/icons/handshake.svg'"))
+        .create_on(&srv);
+
+    let mock_two = Mock::new()
+        .expect_method(GET)
+        .expect_path("/homepage/assets/img/icons/handshake.svg")
+        .return_status(200)
+        .create_on(&srv);
+
+    let cmd = Command::cargo_bin("feroxbuster")
+        .unwrap()
+        .arg("--url")
+        .arg(srv.url("/"))
+        .arg("--wordlist")
+        .arg(file.as_os_str())
+        .arg("--extract-links")
+        .unwrap();
+
+    cmd.assert().success().stdout(
+        predicate::str::contains("/LICENSE")
+            .and(predicate::str::contains("200"))
+            .and(predicate::str::contains(
+                "/homepage/assets/img/icons/handshake.svg",
+            )),
+    );
+
+    assert_eq!(mock.times_called(), 1);
+    assert_eq!(mock_two.times_called(), 1);
+    teardown_tmp_directory(tmp_dir);
+    Ok(())
+}
+
+#[test]
+/// send a request to a page that contains an absolute link to another domain, scanner should not
+/// follow
+fn extractor_finds_absolute_url_to_different_domain() -> Result<(), Box<dyn std::error::Error>> {
+    let srv = MockServer::start();
+    let (tmp_dir, file) = setup_tmp_directory(&["LICENSE".to_string()], "wordlist")?;
+
+    let mock = Mock::new()
+        .expect_method(GET)
+        .expect_path("/LICENSE")
+        .return_status(200)
+        .return_body("\"http://localhost/homepage/assets/img/icons/handshake.svg\"")
+        .create_on(&srv);
+
+    let cmd = Command::cargo_bin("feroxbuster")
+        .unwrap()
+        .arg("--url")
+        .arg(srv.url("/"))
+        .arg("--wordlist")
+        .arg(file.as_os_str())
+        .arg("--extract-links")
+        .unwrap();
+
+    cmd.assert().success().stdout(
+        predicate::str::contains("/LICENSE")
+            .and(predicate::str::contains("200"))
+            .and(predicate::str::contains(
+                "/homepage/assets/img/icons/handshake.svg",
+            ))
+            .not(),
+    );
+
+    assert_eq!(mock.times_called(), 1);
+    teardown_tmp_directory(tmp_dir);
+    Ok(())
+}
+
+#[test]
+/// send a request to a page that contains a relative link, should follow
+fn extractor_finds_relative_url() -> Result<(), Box<dyn std::error::Error>> {
+    let srv = MockServer::start();
+    let (tmp_dir, file) = setup_tmp_directory(&["LICENSE".to_string()], "wordlist")?;
+
+    let mock = Mock::new()
+        .expect_method(GET)
+        .expect_path("/LICENSE")
+        .return_status(200)
+        .return_body("\"/homepage/assets/img/icons/handshake.svg\"")
+        .create_on(&srv);
+
+    let mock_two = Mock::new()
+        .expect_method(GET)
+        .expect_path("/homepage/assets/img/icons/handshake.svg")
+        .return_status(200)
+        .create_on(&srv);
+
+    let cmd = Command::cargo_bin("feroxbuster")
+        .unwrap()
+        .arg("--url")
+        .arg(srv.url("/"))
+        .arg("--wordlist")
+        .arg(file.as_os_str())
+        .arg("--extract-links")
+        .unwrap();
+
+    cmd.assert().success().stdout(
+        predicate::str::contains("/LICENSE")
+            .and(predicate::str::contains("200"))
+            .and(predicate::str::contains(
+                "/homepage/assets/img/icons/handshake.svg",
+            )),
+    );
+
+    assert_eq!(mock.times_called(), 1);
+    assert_eq!(mock_two.times_called(), 1);
+    teardown_tmp_directory(tmp_dir);
+    Ok(())
+}
+
+#[test]
+/// send a request to a page that contains an relative link, follow it, and find the same link again
+/// should follow then filter
+fn extractor_finds_same_relative_url_twice() -> Result<(), Box<dyn std::error::Error>> {
+    let srv = MockServer::start();
+    let (tmp_dir, file) =
+        setup_tmp_directory(&["LICENSE".to_string(), "README".to_string()], "wordlist")?;
+
+    let mock = Mock::new()
+        .expect_method(GET)
+        .expect_path("/LICENSE")
+        .return_status(200)
+        .return_body(&srv.url("\"/homepage/assets/img/icons/handshake.svg\""))
+        .create_on(&srv);
+
+    let mock_two = Mock::new()
+        .expect_method(GET)
+        .expect_path("/README")
+        .return_body(&srv.url("\"/homepage/assets/img/icons/handshake.svg\""))
+        .return_status(200)
+        .create_on(&srv);
+
+    let mock_three = Mock::new()
+        .expect_method(GET)
+        .expect_path("/homepage/assets/img/icons/handshake.svg")
+        .return_status(200)
+        .create_on(&srv);
+
+    let cmd = Command::cargo_bin("feroxbuster")
+        .unwrap()
+        .arg("--url")
+        .arg(srv.url("/"))
+        .arg("--wordlist")
+        .arg(file.as_os_str())
+        .arg("--extract-links")
+        .unwrap();
+
+    cmd.assert().success().stdout(
+        predicate::str::contains("/LICENSE")
+            .and(predicate::str::contains("200"))
+            .and(predicate::str::contains(
+                "/homepage/assets/img/icons/handshake.svg",
+            )),
+    );
+
+    assert_eq!(mock.times_called(), 1);
+    assert_eq!(mock_two.times_called(), 1);
+    assert_eq!(mock_three.times_called(), 1);
+    teardown_tmp_directory(tmp_dir);
+    Ok(())
+}
+
+#[test]
+/// send a request to a page that contains an absolute link that leads to a page with a sizefilter
+/// that should filter it out, expect not to see the second response reported
+fn extractor_finds_filtered_content() -> Result<(), Box<dyn std::error::Error>> {
+    let srv = MockServer::start();
+    let (tmp_dir, file) =
+        setup_tmp_directory(&["LICENSE".to_string(), "README".to_string()], "wordlist")?;
+
+    let mock = Mock::new()
+        .expect_method(GET)
+        .expect_path("/LICENSE")
+        .return_status(200)
+        .return_body(&srv.url("\"/homepage/assets/img/icons/handshake.svg\""))
+        .create_on(&srv);
+
+    let mock_two = Mock::new()
+        .expect_method(GET)
+        .expect_path("/homepage/assets/img/icons/handshake.svg")
+        .return_body("im a little teapot")
+        .return_status(200)
+        .create_on(&srv);
+
+    let cmd = Command::cargo_bin("feroxbuster")
+        .unwrap()
+        .arg("--url")
+        .arg(srv.url("/"))
+        .arg("--wordlist")
+        .arg(file.as_os_str())
+        .arg("--extract-links")
+        .arg("--sizefilter")
+        .arg("18")
+        .unwrap();
+
+    cmd.assert().success().stdout(
+        predicate::str::contains("/LICENSE")
+            .and(predicate::str::contains("200"))
+            .and(predicate::str::contains(
+                "/homepage/assets/img/icons/handshake.svg",
+            ))
+            .not(),
+    );
+
+    assert_eq!(mock.times_called(), 1);
+    assert_eq!(mock_two.times_called(), 1);
+    teardown_tmp_directory(tmp_dir);
+    Ok(())
+}