diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7f40842..0c5a3ff 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -90,6 +90,26 @@ jobs: name: x86_64-linux-debug-feroxbuster path: target/x86_64-unknown-linux-musl/debug/feroxbuster + build-debug-windows: + env: + IN_PIPELINE: true + runs-on: windows-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Rust toolchain + uses: dtolnay/rust-toolchain@master + with: + toolchain: stable + target: x86_64-pc-windows-msvc + + - name: Build the project + run: cargo build --target=x86_64-pc-windows-msvc + - uses: actions/upload-artifact@v4 + with: + name: x86_64-windows-debug-feroxbuster.exe + path: target\x86_64-pc-windows-msvc\debug\feroxbuster.exe + build-deb: needs: [build-nix] runs-on: ubuntu-latest diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index fd6568c..d238ddd 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -34,6 +34,8 @@ jobs: - name: Cache cargo & target directories uses: Swatinem/rust-cache@v2 - uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt - run: cargo fmt --all -- --check clippy: @@ -44,4 +46,6 @@ jobs: - name: Cache cargo & target directories uses: Swatinem/rust-cache@v2 - uses: dtolnay/rust-toolchain@stable + with: + components: clippy - run: cargo clippy --all-targets --all-features -- -D warnings diff --git a/Cargo.lock b/Cargo.lock index a685da3..4b5e09e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -946,7 +946,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "feroxbuster" -version = "2.12.0" +version = "2.13.0" dependencies = [ "anyhow", "assert_cmd", diff --git a/Cargo.toml b/Cargo.toml index 0eac926..cd8fa57 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "feroxbuster" -version = "2.12.0" +version = "2.13.0" authors = ["Ben 'epi' Risher (@epi052)"] license = "MIT" edition = "2021" diff --git a/ferox-config.toml.example b/ferox-config.toml.example index 95a619d..5ac0fe0 100644 --- a/ferox-config.toml.example +++ b/ferox-config.toml.example @@ -38,6 +38,10 @@ # methods = ["GET", "POST"] # data = [11, 12, 13, 14, 15] # url_denylist = ["http://dont-scan.me", "https://also-not.me"] +# any subdomain of a domain provided to scope is implicitly allowed also. +# so things like "api.other.com" and "sub.third.com" would also be considered +# in-scope given the example config below. +# scope = ["example.com", "other.com", "third.com"] # regex_denylist = ["/deny.*"] # no_recursion = true # add_slash = true diff --git a/shell_completions/_feroxbuster b/shell_completions/_feroxbuster index 723d157..892f48d 100644 --- a/shell_completions/_feroxbuster +++ b/shell_completions/_feroxbuster @@ -27,8 +27,8 @@ _feroxbuster() { '--replay-proxy=[Send only unfiltered requests through a Replay Proxy, instead of all requests]:REPLAY_PROXY:_urls' \ '*-R+[Status Codes to send through a Replay Proxy when found (default\: --status-codes value)]:REPLAY_CODE:_default' \ '*--replay-codes=[Status Codes to send through a Replay Proxy when found (default\: --status-codes value)]:REPLAY_CODE:_default' \ -'-a+[Sets the User-Agent (default\: feroxbuster/2.12.0)]:USER_AGENT:_default' \ -'--user-agent=[Sets the User-Agent (default\: feroxbuster/2.12.0)]:USER_AGENT:_default' \ +'-a+[Sets the User-Agent (default\: feroxbuster/2.13.0)]:USER_AGENT:_default' \ +'--user-agent=[Sets the User-Agent (default\: feroxbuster/2.13.0)]:USER_AGENT:_default' \ '*-x+[File extension(s) to search for (ex\: -x php -x pdf js); reads values (newline-separated) from file if input starts with an @ (ex\: @ext.txt)]:FILE_EXTENSION:_default' \ '*--extensions=[File extension(s) to search for (ex\: -x php -x pdf js); reads values (newline-separated) from file if input starts with an @ (ex\: @ext.txt)]:FILE_EXTENSION:_default' \ '*-m+[Which HTTP request method(s) should be sent (default\: GET)]:HTTP_METHODS:_default' \ @@ -42,6 +42,7 @@ _feroxbuster() { '*--query=[Request'\''s URL query parameters (ex\: -Q token=stuff -Q secret=key)]:QUERY:_default' \ '--protocol=[Specify the protocol to use when targeting via --request-file or --url with domain only (default\: https)]:PROTOCOL:_default' \ '*--dont-scan=[URL(s) or Regex Pattern(s) to exclude from recursion/scans]:URL:_default' \ +'*--scope=[Additional domains/URLs to consider in-scope for scanning (in addition to current domain)]:URL:_default' \ '*-S+[Filter out messages of a particular size (ex\: -S 5120 -S 4927,1970)]:SIZE:_default' \ '*--filter-size=[Filter out messages of a particular size (ex\: -S 5120 -S 4927,1970)]:SIZE:_default' \ '*-X+[Filter out messages via regular expression matching on the response'\''s body/headers (ex\: -X '\''^ignore me\$'\'')]:REGEX:_default' \ diff --git a/shell_completions/_feroxbuster.ps1 b/shell_completions/_feroxbuster.ps1 index b7f8961..8893193 100644 --- a/shell_completions/_feroxbuster.ps1 +++ b/shell_completions/_feroxbuster.ps1 @@ -33,8 +33,8 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock { [CompletionResult]::new('--replay-proxy', '--replay-proxy', [CompletionResultType]::ParameterName, 'Send only unfiltered requests through a Replay Proxy, instead of all requests') [CompletionResult]::new('-R', '-R ', [CompletionResultType]::ParameterName, 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)') [CompletionResult]::new('--replay-codes', '--replay-codes', [CompletionResultType]::ParameterName, 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)') - [CompletionResult]::new('-a', '-a', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.12.0)') - [CompletionResult]::new('--user-agent', '--user-agent', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.12.0)') + [CompletionResult]::new('-a', '-a', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.13.0)') + [CompletionResult]::new('--user-agent', '--user-agent', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.13.0)') [CompletionResult]::new('-x', '-x', [CompletionResultType]::ParameterName, 'File extension(s) to search for (ex: -x php -x pdf js); reads values (newline-separated) from file if input starts with an @ (ex: @ext.txt)') [CompletionResult]::new('--extensions', '--extensions', [CompletionResultType]::ParameterName, 'File extension(s) to search for (ex: -x php -x pdf js); reads values (newline-separated) from file if input starts with an @ (ex: @ext.txt)') [CompletionResult]::new('-m', '-m', [CompletionResultType]::ParameterName, 'Which HTTP request method(s) should be sent (default: GET)') @@ -48,6 +48,7 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock { [CompletionResult]::new('--query', '--query', [CompletionResultType]::ParameterName, 'Request''s URL query parameters (ex: -Q token=stuff -Q secret=key)') [CompletionResult]::new('--protocol', '--protocol', [CompletionResultType]::ParameterName, 'Specify the protocol to use when targeting via --request-file or --url with domain only (default: https)') [CompletionResult]::new('--dont-scan', '--dont-scan', [CompletionResultType]::ParameterName, 'URL(s) or Regex Pattern(s) to exclude from recursion/scans') + [CompletionResult]::new('--scope', '--scope', [CompletionResultType]::ParameterName, 'Additional domains/URLs to consider in-scope for scanning (in addition to current domain)') [CompletionResult]::new('-S', '-S ', [CompletionResultType]::ParameterName, 'Filter out messages of a particular size (ex: -S 5120 -S 4927,1970)') [CompletionResult]::new('--filter-size', '--filter-size', [CompletionResultType]::ParameterName, 'Filter out messages of a particular size (ex: -S 5120 -S 4927,1970)') [CompletionResult]::new('-X', '-X ', [CompletionResultType]::ParameterName, 'Filter out messages via regular expression matching on the response''s body/headers (ex: -X ''^ignore me$'')') diff --git a/shell_completions/feroxbuster.bash b/shell_completions/feroxbuster.bash index 6b9935e..457aabc 100644 --- a/shell_completions/feroxbuster.bash +++ b/shell_completions/feroxbuster.bash @@ -23,7 +23,7 @@ _feroxbuster() { case "${cmd}" in feroxbuster) - opts="-u -p -P -R -a -A -x -m -H -b -Q -f -S -X -W -N -C -s -T -r -k -t -n -d -e -L -w -D -E -B -g -I -v -q -o -U -h -V --url --stdin --resume-from --request-file --burp --burp-replay --data-urlencoded --data-json --smart --thorough --proxy --replay-proxy --replay-codes --user-agent --random-agent --extensions --methods --data --headers --cookies --query --add-slash --protocol --dont-scan --filter-size --filter-regex --filter-words --filter-lines --filter-status --filter-similar-to --status-codes --unique --timeout --redirects --insecure --server-certs --client-cert --client-key --threads --no-recursion --depth --force-recursion --extract-links --dont-extract-links --scan-limit --parallel --rate-limit --response-size-limit --time-limit --wordlist --auto-tune --auto-bail --dont-filter --collect-extensions --collect-backups --collect-words --dont-collect --scan-dir-listings --verbosity --silent --quiet --json --output --debug-log --no-state --limit-bars --update --help --version" + opts="-u -p -P -R -a -A -x -m -H -b -Q -f -S -X -W -N -C -s -T -r -k -t -n -d -e -L -w -D -E -B -g -I -v -q -o -U -h -V --url --stdin --resume-from --request-file --burp --burp-replay --data-urlencoded --data-json --smart --thorough --proxy --replay-proxy --replay-codes --user-agent --random-agent --extensions --methods --data --headers --cookies --query --add-slash --protocol --dont-scan --scope --filter-size --filter-regex --filter-words --filter-lines --filter-status --filter-similar-to --status-codes --unique --timeout --redirects --insecure --server-certs --client-cert --client-key --threads --no-recursion --depth --force-recursion --extract-links --dont-extract-links --scan-limit --parallel --rate-limit --response-size-limit --time-limit --wordlist --auto-tune --auto-bail --dont-filter --collect-extensions --collect-backups --collect-words --dont-collect --scan-dir-listings --verbosity --silent --quiet --json --output --debug-log --no-state --limit-bars --update --help --version" if [[ ${cur} == -* || ${COMP_CWORD} -eq 1 ]] ; then COMPREPLY=( $(compgen -W "${opts}" -- "${cur}") ) return 0 @@ -159,6 +159,10 @@ _feroxbuster() { COMPREPLY=($(compgen -f "${cur}")) return 0 ;; + --scope) + COMPREPLY=($(compgen -f "${cur}")) + return 0 + ;; --filter-size) COMPREPLY=($(compgen -f "${cur}")) return 0 diff --git a/shell_completions/feroxbuster.elv b/shell_completions/feroxbuster.elv index c43a07c..d12cebe 100644 --- a/shell_completions/feroxbuster.elv +++ b/shell_completions/feroxbuster.elv @@ -30,8 +30,8 @@ set edit:completion:arg-completer[feroxbuster] = {|@words| cand --replay-proxy 'Send only unfiltered requests through a Replay Proxy, instead of all requests' cand -R 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)' cand --replay-codes 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)' - cand -a 'Sets the User-Agent (default: feroxbuster/2.12.0)' - cand --user-agent 'Sets the User-Agent (default: feroxbuster/2.12.0)' + cand -a 'Sets the User-Agent (default: feroxbuster/2.13.0)' + cand --user-agent 'Sets the User-Agent (default: feroxbuster/2.13.0)' cand -x 'File extension(s) to search for (ex: -x php -x pdf js); reads values (newline-separated) from file if input starts with an @ (ex: @ext.txt)' cand --extensions 'File extension(s) to search for (ex: -x php -x pdf js); reads values (newline-separated) from file if input starts with an @ (ex: @ext.txt)' cand -m 'Which HTTP request method(s) should be sent (default: GET)' @@ -45,6 +45,7 @@ set edit:completion:arg-completer[feroxbuster] = {|@words| cand --query 'Request''s URL query parameters (ex: -Q token=stuff -Q secret=key)' cand --protocol 'Specify the protocol to use when targeting via --request-file or --url with domain only (default: https)' cand --dont-scan 'URL(s) or Regex Pattern(s) to exclude from recursion/scans' + cand --scope 'Additional domains/URLs to consider in-scope for scanning (in addition to current domain)' cand -S 'Filter out messages of a particular size (ex: -S 5120 -S 4927,1970)' cand --filter-size 'Filter out messages of a particular size (ex: -S 5120 -S 4927,1970)' cand -X 'Filter out messages via regular expression matching on the response''s body/headers (ex: -X ''^ignore me$'')' diff --git a/shell_completions/feroxbuster.fish b/shell_completions/feroxbuster.fish index bba06dc..d0f481e 100644 --- a/shell_completions/feroxbuster.fish +++ b/shell_completions/feroxbuster.fish @@ -6,7 +6,7 @@ complete -c feroxbuster -l data-json -d 'Set -H \'Content-Type: application/json complete -c feroxbuster -s p -l proxy -d 'Proxy to use for requests (ex: http(s)://host:port, socks5(h)://host:port)' -r -f complete -c feroxbuster -s P -l replay-proxy -d 'Send only unfiltered requests through a Replay Proxy, instead of all requests' -r -f complete -c feroxbuster -s R -l replay-codes -d 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)' -r -complete -c feroxbuster -s a -l user-agent -d 'Sets the User-Agent (default: feroxbuster/2.12.0)' -r +complete -c feroxbuster -s a -l user-agent -d 'Sets the User-Agent (default: feroxbuster/2.13.0)' -r complete -c feroxbuster -s x -l extensions -d 'File extension(s) to search for (ex: -x php -x pdf js); reads values (newline-separated) from file if input starts with an @ (ex: @ext.txt)' -r complete -c feroxbuster -s m -l methods -d 'Which HTTP request method(s) should be sent (default: GET)' -r complete -c feroxbuster -l data -d 'Request\'s Body; can read data from a file if input starts with an @ (ex: @post.bin)' -r @@ -15,6 +15,7 @@ complete -c feroxbuster -s b -l cookies -d 'Specify HTTP cookies to be used in e complete -c feroxbuster -s Q -l query -d 'Request\'s URL query parameters (ex: -Q token=stuff -Q secret=key)' -r complete -c feroxbuster -l protocol -d 'Specify the protocol to use when targeting via --request-file or --url with domain only (default: https)' -r complete -c feroxbuster -l dont-scan -d 'URL(s) or Regex Pattern(s) to exclude from recursion/scans' -r +complete -c feroxbuster -l scope -d 'Additional domains/URLs to consider in-scope for scanning (in addition to current domain)' -r complete -c feroxbuster -s S -l filter-size -d 'Filter out messages of a particular size (ex: -S 5120 -S 4927,1970)' -r complete -c feroxbuster -s X -l filter-regex -d 'Filter out messages via regular expression matching on the response\'s body/headers (ex: -X \'^ignore me$\')' -r complete -c feroxbuster -s W -l filter-words -d 'Filter out messages of a particular word count (ex: -W 312 -W 91,82)' -r diff --git a/src/banner/container.rs b/src/banner/container.rs index dc13ba1..16bf5ff 100644 --- a/src/banner/container.rs +++ b/src/banner/container.rs @@ -156,6 +156,9 @@ pub struct Banner { /// represents Configuration.url_denylist url_denylist: Vec, + /// represents Configuration.scope + scope: Vec, + /// current version of feroxbuster pub(super) version: String, @@ -199,6 +202,7 @@ impl Banner { pub fn new(tgts: &[String], config: &Configuration) -> Self { let mut targets = Vec::new(); let mut url_denylist = Vec::new(); + let mut scope = Vec::new(); let mut code_filters = Vec::new(); let mut replay_codes = Vec::new(); let mut headers = Vec::new(); @@ -229,6 +233,15 @@ impl Banner { )); } + for scope_url in &config.scope { + let value = match scope_url.host() { + Some(host) => host.to_string(), + None => scope_url.as_str().to_string(), + }; + + scope.push(BannerEntry::new("🚩", "In-Scope Url", &value)); + } + // the +2 is for the 2 experimental status codes we add to the default list manually let status_codes = if config.status_codes.len() == DEFAULT_STATUS_CODES.len() + 2 { let all_str = format!( @@ -486,6 +499,7 @@ impl Banner { force_recursion, time_limit, url_denylist, + scope, collect_extensions, collect_backups, collect_words, @@ -544,17 +558,35 @@ by Ben "epi" Risher {} ver: {}"#, // we don't want to leak sensitive header info / include auth headers // with the github api request, so we'll build a client specifically // for this task. thanks to @stuhlmann for the suggestion! - let client = client::initialize( - handles.config.timeout, - "feroxbuster-update-check", - handles.config.redirects, - handles.config.insecure, - &HashMap::new(), - Some(&handles.config.proxy), - &handles.config.server_certs, - Some(&handles.config.client_cert), - Some(&handles.config.client_key), - )?; + let headers = HashMap::new(); + let client_cert = if handles.config.client_cert.is_empty() { + None + } else { + Some(handles.config.client_cert.as_str()) + }; + let client_key = if handles.config.client_key.is_empty() { + None + } else { + Some(handles.config.client_key.as_str()) + }; + let proxy = if handles.config.proxy.is_empty() { + None + } else { + Some(handles.config.proxy.as_str()) + }; + let client_config = client::ClientConfig { + timeout: handles.config.timeout, + user_agent: "feroxbuster-update-check", + redirects: handles.config.redirects, + insecure: handles.config.insecure, + headers: &headers, + proxy, + server_certs: Some(&handles.config.server_certs), + client_cert, + client_key, + scope: &handles.config.scope, + }; + let client = client::initialize(client_config)?; let level = handles.config.output_level; let tx_stats = handles.stats.tx.clone(); @@ -616,6 +648,10 @@ by Ben "epi" Risher {} ver: {}"#, writeln!(&mut writer, "{denied_url}")?; } + for scoped_url in &self.scope { + writeln!(&mut writer, "{scoped_url}")?; + } + writeln!(&mut writer, "{}", self.threads)?; writeln!(&mut writer, "{}", self.wordlist)?; @@ -636,7 +672,7 @@ by Ben "epi" Risher {} ver: {}"#, } // followed by the maybe printed or variably displayed values - if !config.request_file.is_empty() || !config.target_url.starts_with("http") { + if !config.request_file.is_empty() { writeln!(&mut writer, "{}", self.protocol)?; } diff --git a/src/client.rs b/src/client.rs index 1f80a3d..d6c2086 100644 --- a/src/client.rs +++ b/src/client.rs @@ -1,3 +1,4 @@ +use crate::url::UrlExt; use anyhow::{Context, Result}; use reqwest::header::HeaderMap; use reqwest::{redirect::Policy, Client, Proxy}; @@ -5,42 +6,87 @@ use std::collections::HashMap; use std::convert::TryInto; use std::path::Path; use std::time::Duration; +use url::Url; -/// Create and return an instance of [reqwest::Client](https://docs.rs/reqwest/latest/reqwest/struct.Client.html) -/// For now, silence clippy for this one -#[allow(clippy::too_many_arguments)] -pub fn initialize( - timeout: u64, - user_agent: &str, - redirects: bool, - insecure: bool, - headers: &HashMap, - proxy: Option<&str>, - server_certs: I, - client_cert: Option<&str>, - client_key: Option<&str>, -) -> Result +/// Configuration struct for initializing a reqwest client +pub struct ClientConfig<'a, I> where I: IntoIterator, I::Item: AsRef + std::fmt::Debug, { - let policy = if redirects { + /// The timeout for requests in seconds + pub timeout: u64, + /// The User-Agent string to use for requests + pub user_agent: &'a str, + /// Whether to follow redirects + pub redirects: bool, + /// Whether to allow insecure connections + pub insecure: bool, + /// Headers to include in requests + pub headers: &'a HashMap, + /// Proxy server to use for requests + pub proxy: Option<&'a str>, + /// Server certificates to use for requests + pub server_certs: Option, + /// Client certificate to use for requests + pub client_cert: Option<&'a str>, + /// Client key to use for requests + pub client_key: Option<&'a str>, + /// scope for redirect handling + pub scope: &'a [Url], +} + +/// Create a redirect policy based on the provided config +fn create_redirect_policy(config: &ClientConfig<'_, I>) -> Policy +where + I: IntoIterator, + I::Item: AsRef + std::fmt::Debug, +{ + // old behavior set Policy::limited(10) if redirects were enabled + // and Policy::none() if they were not. New policy behavior is + // scope-aware when redirects are enabled and scope is provided. + + if config.redirects && config.scope.is_empty() { + // scope should never be empty, so this should never be hit, just a fallback Policy::limited(10) + } else if config.redirects { + // create a custom policy that checks scope for each redirect + let scoped_urls = config.scope.to_vec(); + + Policy::custom(move |attempt| { + let redirect_url = attempt.url(); + + if redirect_url.is_in_scope(&scoped_urls) { + attempt.follow() + } else { + attempt.stop() + } + }) } else { Policy::none() - }; + } +} - let header_map: HeaderMap = headers.try_into()?; +/// Create and return an instance of [reqwest::Client](https://docs.rs/reqwest/latest/reqwest/struct.Client.html) +/// with optional scope-aware redirect handling +pub fn initialize(config: ClientConfig<'_, I>) -> Result +where + I: IntoIterator, + I::Item: AsRef + std::fmt::Debug, +{ + let policy = create_redirect_policy(&config); + + let header_map: HeaderMap = config.headers.try_into()?; let mut client = Client::builder() - .timeout(Duration::new(timeout, 0)) - .user_agent(user_agent) - .danger_accept_invalid_certs(insecure) + .timeout(Duration::new(config.timeout, 0)) + .user_agent(config.user_agent) + .danger_accept_invalid_certs(config.insecure) .default_headers(header_map) .redirect(policy) .http1_title_case_headers(); - if let Some(some_proxy) = proxy { + if let Some(some_proxy) = config.proxy { if !some_proxy.is_empty() { // it's not an empty string; set the proxy let proxy_obj = Proxy::all(some_proxy)?; @@ -50,7 +96,7 @@ where } } - for cert_path in server_certs { + for cert_path in config.server_certs.into_iter().flatten() { let buf = std::fs::read(&cert_path)?; let cert = match reqwest::Certificate::from_pem(&buf) { @@ -66,7 +112,7 @@ where client = client.add_root_certificate(cert); } - if let (Some(cert_path), Some(key_path)) = (client_cert, client_key) { + if let (Some(cert_path), Some(key_path)) = (config.client_cert, config.client_key) { if !cert_path.is_empty() && !key_path.is_empty() { let cert = std::fs::read(cert_path)?; let key = std::fs::read(key_path)?; @@ -92,18 +138,19 @@ mod tests { /// create client with a bad proxy, expect panic fn client_with_bad_proxy() { let headers = HashMap::new(); - initialize( - 0, - "stuff", - true, - false, - &headers, - Some("not a valid proxy"), - Vec::::new(), - None, - None, - ) - .unwrap(); + let client_config = ClientConfig { + timeout: 0, + user_agent: "stuff", + redirects: true, + insecure: false, + headers: &headers, + proxy: Some("not a valid proxy"), + server_certs: Option::>::None, + client_cert: None, + client_key: None, + scope: &Vec::new(), + }; + initialize(client_config).unwrap(); } #[test] @@ -111,80 +158,85 @@ mod tests { fn client_with_good_proxy() { let headers = HashMap::new(); let proxy = "http://127.0.0.1:8080"; - initialize( - 0, - "stuff", - true, - true, - &headers, - Some(proxy), - Vec::::new(), - None, - None, - ) - .unwrap(); + let client_config = ClientConfig { + timeout: 0, + user_agent: "stuff", + redirects: true, + insecure: true, + headers: &headers, + proxy: Some(proxy), + server_certs: Option::>::None, + client_cert: None, + client_key: None, + scope: &Vec::new(), + }; + initialize(client_config).unwrap(); } #[test] /// create client with a server cert in pem format, expect no error fn client_with_valid_server_pem() { let headers = HashMap::new(); - - initialize( - 0, - "stuff", - true, - true, - &headers, - None, - vec!["tests/mutual-auth/certs/server/server.crt.1".to_string()], - None, - None, - ) - .unwrap(); + let server_certs = vec!["tests/mutual-auth/certs/server/server.crt.1".to_string()]; + let client_config = ClientConfig { + timeout: 0, + user_agent: "stuff", + redirects: true, + insecure: true, + headers: &headers, + proxy: None, + server_certs: Some(server_certs), + client_cert: None, + client_key: None, + scope: &Vec::new(), + }; + initialize(client_config).unwrap(); } #[test] /// create client with a server cert in der format, expect no error fn client_with_valid_server_der() { let headers = HashMap::new(); - - initialize( - 0, - "stuff", - true, - true, - &headers, - None, - vec!["tests/mutual-auth/certs/server/server.der".to_string()], - None, - None, - ) - .unwrap(); + let server_certs = vec!["tests/mutual-auth/certs/server/server.der".to_string()]; + let client_config = ClientConfig { + timeout: 0, + user_agent: "stuff", + redirects: true, + insecure: true, + headers: &headers, + proxy: None, + server_certs: Some(server_certs), + client_cert: None, + client_key: None, + scope: &Vec::new(), + }; + initialize(client_config).unwrap(); } #[test] /// create client with two server certs (pem and der), expect no error fn client_with_valid_server_pem_and_der() { let headers = HashMap::new(); + let server_certs = vec![ + "tests/mutual-auth/certs/server/server.crt.1".to_string(), + "tests/mutual-auth/certs/server/server.der".to_string(), + ]; println!("{}", std::env::current_dir().unwrap().display()); - initialize( - 0, - "stuff", - true, - true, - &headers, - None, - vec![ - "tests/mutual-auth/certs/server/server.crt.1".to_string(), - "tests/mutual-auth/certs/server/server.der".to_string(), - ], - None, - None, - ) - .unwrap(); + let client_config = ClientConfig { + timeout: 0, + user_agent: "stuff", + redirects: true, + insecure: true, + headers: &headers, + proxy: None, + server_certs: Some(server_certs), + client_cert: None, + client_key: None, + scope: &Vec::new(), + }; + initialize(client_config).unwrap(); } /// create client with invalid certificate, expect panic @@ -192,18 +244,68 @@ mod tests { #[should_panic] fn client_with_invalid_server_cert() { let headers = HashMap::new(); + let server_certs = vec!["tests/mutual-auth/certs/client/client.key".to_string()]; + let client_config = ClientConfig { + timeout: 0, + user_agent: "stuff", + redirects: true, + insecure: true, + headers: &headers, + proxy: None, + server_certs: Some(server_certs), + client_cert: None, + client_key: None, + scope: &Vec::new(), + }; + initialize(client_config).unwrap(); + } - initialize( - 0, - "stuff", - true, - true, - &headers, - None, - vec!["tests/mutual-auth/certs/client/client.key".to_string()], - None, - None, - ) - .unwrap(); + #[test] + /// test that scope-aware client can be created with valid parameters + fn initialize_with_scope_creates_client() { + let headers = HashMap::new(); + let scope = vec![ + Url::parse("https://api.example.com").unwrap(), + Url::parse("https://cdn.example.com").unwrap(), + ]; + + let client_config = ClientConfig { + timeout: 5, + user_agent: "test-agent", + redirects: true, + insecure: false, + headers: &headers, + proxy: None, + server_certs: Option::>::None, + client_cert: None, + client_key: None, + scope: &scope, + }; + let client = initialize(client_config); + + assert!(client.is_ok()); + } + + #[test] + /// test that scope-aware client works without scope (should use default behavior) + fn initialize_with_scope_empty_scope() { + let headers = HashMap::new(); + let scope = vec![]; + + let client_config = ClientConfig { + timeout: 5, + user_agent: "test-agent", + redirects: true, + insecure: false, + headers: &headers, + proxy: None, + server_certs: Option::>::None, + client_cert: None, + client_key: None, + scope: &scope, + }; + let client = initialize(client_config); + + assert!(client.is_ok()); } } diff --git a/src/config/container.rs b/src/config/container.rs index 376153c..9181ef0 100644 --- a/src/config/container.rs +++ b/src/config/container.rs @@ -24,6 +24,7 @@ use std::{ collections::HashMap, env::{current_dir, current_exe}, fs::read_to_string, + io::BufRead, path::{Path, PathBuf}, }; use url::form_urlencoded; @@ -245,6 +246,10 @@ pub struct Configuration { #[serde(default)] pub stdin: bool, + /// Cached stdin contents to facilitate populating scope from stdin targets + #[serde(skip)] + pub cached_stdin: Vec, + /// Maximum recursion depth, a depth of 0 is infinite recursion #[serde(default = "depth")] pub depth: usize, @@ -310,6 +315,10 @@ pub struct Configuration { #[serde(with = "serde_regex", default)] pub regex_denylist: Vec, + /// Allowed domains/URLs for redirects and link extraction + #[serde(default)] + pub scope: Vec, + /// Automatically discover extensions and add them to --extensions (unless they're in --dont-collect) #[serde(default)] pub collect_extensions: bool, @@ -367,18 +376,20 @@ impl Default for Configuration { fn default() -> Self { let timeout = timeout(); let user_agent = user_agent(); - let client = client::initialize( + let headers = HashMap::new(); + let client_config = client::ClientConfig { timeout, - &user_agent, - false, - false, - &HashMap::new(), - None, - Vec::::new(), - None, - None, - ) - .expect("Could not build client"); + user_agent: &user_agent, + redirects: false, + insecure: false, + headers: &headers, + proxy: None, + server_certs: Option::>::None, + client_cert: None, + client_key: None, + scope: &Vec::new(), // no scope by default + }; + let client = client::initialize(client_config).expect("Could not build client"); let replay_client = None; let status_codes = status_codes(); let replay_codes = status_codes.clone(); @@ -444,6 +455,8 @@ impl Default for Configuration { filter_regex: Vec::new(), url_denylist: Vec::new(), regex_denylist: Vec::new(), + scope: Vec::new(), + cached_stdin: Vec::new(), filter_line_count: Vec::new(), filter_word_count: Vec::new(), filter_status: Vec::new(), @@ -495,6 +508,7 @@ impl Configuration { /// - **data**: `None` /// - **url_denylist**: `None` /// - **regex_denylist**: `None` + /// - **scope**: `None` /// - **filter_size**: `None` /// - **filter_similar**: `None` /// - **filter_regex**: `None` @@ -677,7 +691,17 @@ impl Configuration { update_config_if_present!(&mut config.debug_log, args, "debug_log", String); update_config_if_present!(&mut config.resume_from, args, "resume_from", String); update_config_if_present!(&mut config.request_file, args, "request_file", String); - update_config_if_present!(&mut config.protocol, args, "protocol", String); + + // both target-url and scope rely on this value to help parse relative urls + // so this logic must stay above target/scope parsing in this fn + if let Some(proto) = args.get_one::("protocol") { + if proto != "http" && proto != "https" { + report_and_exit(&format!( + "Invalid value for --protocol: {proto}, must be 'http' or 'https'" + )); + } + config.protocol = proto.to_owned(); + } if let Ok(Some(inner)) = args.try_get_one::("time_limit") { inner.clone_into(&mut config.time_limit); @@ -770,10 +794,72 @@ impl Configuration { } } + /// internal helper to parse both scope urls and target urls + fn parse_url_with_no_base_correction( + config: &Configuration, + url: &str, + ) -> Result { + // Url::parse fails if the url is relative (ex: example.com) instead of absolute + // (ex: https://example.com). In the case of a relative url, we can prepend + // "https://" (or whatever the user provided to --protocol) and try again + match parse_url_with_raw_path(url.trim_end_matches('/')) { + Ok(absolute) => Ok(absolute), + Err(err) => { + log::debug!("Initial url parse failed: {err}"); + + // user provided a relative url, which we can massage into an absolute + // url by prepending the config.protocol (which is parsed earlier in the outer + // function, meaning we'll get the actual protocol if the user specified + // one, otherwise it'll be the default "https") + let url_with_scheme = + format!("{}://{}", config.protocol, url.trim_end_matches('/')); + + match parse_url_with_raw_path(&url_with_scheme) { + Ok(url) => { + // successfully parsed the relative url after prepending the + // scheme, add it to the scope + Ok(url) + } + Err(err) => { + report_and_exit(&format!("Could not parse '{url}' as a url: {err}")); + } + } + } + } + } + if came_from_cli!(args, "stdin") { config.stdin = true; + + // read from stdin and cache it for later use, which allows us to still + // call get_targets in main without worrying about stdin being consumed + let cached_stdin = std::io::stdin() + .lock() + .lines() + .filter(|line| { + if let Ok(l) = line { + !l.trim().is_empty() + } else { + false + } + }) + .filter_map(|line| line.ok()) + .collect::>(); + + // if stdin is being used, we need to populate scope with the urls read from stdin + for line in &cached_stdin { + if let Ok(url) = parse_url_with_no_base_correction(&config, line) { + config.cached_stdin.push(url.as_str().to_string()); + config.scope.push(url); + } + } } else if let Some(url) = args.get_one::("url") { - config.target_url = url.into(); + if let Ok(parsed) = parse_url_with_no_base_correction(&config, url) { + config.target_url = parsed.as_str().to_string(); + config.scope.push(parsed); + } else { + config.target_url = url.into(); + } } if let Some(arg) = args.get_many::("url_denylist") { @@ -820,6 +906,16 @@ impl Configuration { } } + if let Some(arg) = args.get_many::("scope") { + // using a similar approach as above, we need to handle both absolute and relative URLs + // e.g. https://example.com or example.com + for scoped_url in arg { + if let Ok(url) = parse_url_with_no_base_correction(&config, scoped_url) { + config.scope.push(url); + } + } + } + if let Some(arg) = args.get_many::("filter_regex") { config.filter_regex = arg.map(|val| val.to_string()).collect(); } @@ -1160,36 +1256,38 @@ impl Configuration { || client_cert.is_some() || client_key.is_some() { - configuration.client = client::initialize( - configuration.timeout, - &configuration.user_agent, - configuration.redirects, - configuration.insecure, - &configuration.headers, + let client_config = client::ClientConfig { + timeout: configuration.timeout, + user_agent: &configuration.user_agent, + redirects: configuration.redirects, + insecure: configuration.insecure, + headers: &configuration.headers, proxy, - server_certs, + server_certs: Some(server_certs), client_cert, client_key, - ) - .expect("Could not rebuild client"); + scope: &configuration.scope, + }; + configuration.client = + client::initialize(client_config).expect("Could not rebuild client"); } if !configuration.replay_proxy.is_empty() { // only set replay_client when replay_proxy is set - configuration.replay_client = Some( - client::initialize( - configuration.timeout, - &configuration.user_agent, - configuration.redirects, - configuration.insecure, - &configuration.headers, - Some(&configuration.replay_proxy), - server_certs, - client_cert, - client_key, - ) - .expect("Could not rebuild client"), - ); + let client_config = client::ClientConfig { + timeout: configuration.timeout, + user_agent: &configuration.user_agent, + redirects: configuration.redirects, + insecure: configuration.insecure, + headers: &configuration.headers, + proxy: Some(&configuration.replay_proxy), + server_certs: Some(server_certs), + client_cert, + client_key, + scope: &configuration.scope, + }; + configuration.replay_client = + Some(client::initialize(client_config).expect("Could not rebuild client")); } } @@ -1250,6 +1348,7 @@ impl Configuration { update_if_not_default!(&mut conf.methods, new.methods, methods()); update_if_not_default!(&mut conf.data, new.data, Vec::::new()); update_if_not_default!(&mut conf.url_denylist, new.url_denylist, Vec::::new()); + update_if_not_default!(&mut conf.scope, new.scope, Vec::::new()); update_if_not_default!(&mut conf.update_app, new.update_app, false); if !new.regex_denylist.is_empty() { // cant use the update_if_not_default macro due to the following error @@ -1327,6 +1426,11 @@ impl Configuration { new.dont_collect, ignored_extensions() ); + update_if_not_default!( + &mut conf.cached_stdin, + new.cached_stdin, + Vec::::new() + ); } /// If present, read in `DEFAULT_CONFIG_NAME` and deserialize the specified values @@ -1334,7 +1438,8 @@ impl Configuration { /// uses serde to deserialize the toml into a `Configuration` struct pub(super) fn parse_config(config_file: PathBuf) -> Result { let content = read_to_string(config_file)?; - let mut config: Self = toml::from_str(content.as_str())?; + let mut config: Self = toml::from_str(content.as_str()) + .with_context(|| fmt_err("Could not parse config file"))?; if !config.extensions.is_empty() { // remove leading periods, if any are found diff --git a/src/config/tests.rs b/src/config/tests.rs index 56b1840..a99a45a 100644 --- a/src/config/tests.rs +++ b/src/config/tests.rs @@ -38,6 +38,7 @@ fn setup_config_test() -> Configuration { methods = ["GET", "PUT", "DELETE"] data = [31, 32, 33, 34] url_denylist = ["http://dont-scan.me", "https://also-not.me"] + scope = ["http://example.com", "https://other.com"] regex_denylist = ["/deny.*"] headers = {stuff = "things", mostuff = "mothings"} queries = [["name","value"], ["rick", "astley"]] @@ -122,6 +123,7 @@ fn default_configuration() { assert_eq!(config.methods, vec!["GET"]); assert_eq!(config.data, Vec::::new()); assert_eq!(config.url_denylist, Vec::::new()); + assert_eq!(config.scope, Vec::::new()); assert_eq!(config.dont_collect, ignored_extensions()); assert_eq!(config.filter_regex, Vec::::new()); assert_eq!(config.filter_similar, Vec::::new()); @@ -407,6 +409,19 @@ fn config_reads_url_denylist() { ); } +#[test] +/// parse the test config and see that the value parsed is correct +fn config_reads_scope() { + let config = setup_config_test(); + assert_eq!( + config.scope, + vec![ + Url::parse("http://example.com").unwrap(), + Url::parse("https://other.com").unwrap(), + ] + ); +} + #[test] /// parse the test config and see that the value parsed is correct fn config_reads_filter_regex() { diff --git a/src/config/utils.rs b/src/config/utils.rs index 8b88821..a0f1bcd 100644 --- a/src/config/utils.rs +++ b/src/config/utils.rs @@ -509,12 +509,13 @@ pub fn parse_request_file(config: &mut Configuration) -> Result<()> { url.set_fragment(None); config.target_url = url.to_string(); + config.scope.push(url); } else { // uri in request line is not a valid URL, so it's most likely a path/relative url // we need to combine it with the host header for (key, value) in &config.headers { if key.to_lowercase() == "host" { - config.target_url = format!("{value}{uri}"); + config.target_url = format!("{}://{value}{uri}", config.protocol); break; } } @@ -523,6 +524,15 @@ pub fn parse_request_file(config: &mut Configuration) -> Result<()> { bail!("Invalid request: Missing Host header and request line URI isn't a full URL"); } + if let Ok(url) = parse_url_with_raw_path(&config.target_url) { + config.scope.push(url); + } else { + bail!( + "Invalid request: Could not parse target URL {}", + config.target_url + ); + } + // need to parse queries from the uri, if any are present let mut uri_parts = uri.splitn(2, '?'); @@ -1153,7 +1163,7 @@ mod tests { let result = parse_request_file(&mut tmp.config); assert!(result.is_ok()); - assert_eq!(tmp.config.target_url, "example.com/srv"); + assert_eq!(tmp.config.target_url, "https://example.com/srv"); tmp.cleanup(); Ok(()) diff --git a/src/event_handlers/inputs.rs b/src/event_handlers/inputs.rs index 2e394fd..caff0b9 100644 --- a/src/event_handlers/inputs.rs +++ b/src/event_handlers/inputs.rs @@ -78,7 +78,10 @@ impl TermInputHandler { pub fn sigint_handler(handles: Arc) -> Result<()> { log::trace!("enter: sigint_handler({handles:?})"); - let filename = if !handles.config.target_url.is_empty() { + // check for STATE_FILENAME env var first; credit to Tobias Rauch for the idea + let filename = if let Ok(path) = std::env::var("STATE_FILENAME") { + path + } else if !handles.config.target_url.is_empty() { // target url populated slugify_filename(&handles.config.target_url, "ferox", "state") } else { diff --git a/src/extractor/container.rs b/src/extractor/container.rs index 148350e..2ad850e 100644 --- a/src/extractor/container.rs +++ b/src/extractor/container.rs @@ -11,7 +11,7 @@ use crate::{ StatError::Other, StatField::{LinksExtracted, TotalExpected}, }, - url::FeroxUrl, + url::{FeroxUrl, UrlExt}, utils::{ logged_request, make_request, parse_url_with_raw_path, send_try_recursion_command, should_deny_url, @@ -116,24 +116,20 @@ impl<'a> Extractor<'a> { /// wrapper around logic that performs the following: /// - parses `url_to_parse` - /// - bails if the parsed url doesn't belong to the original host/domain + /// - bails if the parsed url doesn't belong to the list of in-scope urls /// - otherwise, calls `add_all_sub_paths` with the parsed result fn parse_url_and_add_subpaths( &self, url_to_parse: &str, - original_url: &Url, links: &mut HashSet, ) -> Result<()> { log::trace!("enter: parse_url_and_add_subpaths({links:?})"); match parse_url_with_raw_path(url_to_parse) { Ok(absolute) => { - if absolute.domain() != original_url.domain() - || absolute.host() != original_url.host() - { - // domains/ips are not the same, don't scan things that aren't part of the original - // target url - bail!("parsed url does not belong to original domain/host"); + if !absolute.is_in_scope(&self.handles.config.scope) { + // URL is not in scope based on domain/scope configuration + bail!("parsed url is not in scope"); } if self.add_all_sub_paths(absolute.path(), links).is_err() { @@ -145,6 +141,9 @@ impl<'a> Extractor<'a> { // ex: Url::parse("/login") -> Err("relative URL without a base") // while this is technically an error, these are good results for us if e.to_string().contains("relative URL without a base") { + // scope for these should be enforced in add_all_sub_paths since + // we join the fragment with the base url there and can check + // the full Url against scope if self.add_all_sub_paths(url_to_parse, links).is_err() { log::warn!("could not add sub-paths from {url_to_parse} to {links:?}"); } @@ -359,10 +358,7 @@ impl<'a> Extractor<'a> { // capture[0] is the entire match, additional capture groups start at [1] let link = capture[0].trim_matches(|c| c == '\'' || c == '"'); - if self - .parse_url_and_add_subpaths(link, response_url, links) - .is_err() - { + if self.parse_url_and_add_subpaths(link, links).is_err() { // purposely not logging the error here, due to the frequency with which it gets hit } } @@ -503,10 +499,9 @@ impl<'a> Extractor<'a> { .join(link) .with_context(|| format!("Could not join {old_url} with {link}"))?; - if old_url.domain() != new_url.domain() || old_url.host() != new_url.host() { - // domains/ips are not the same, don't scan things that aren't part of the original - // target url - log::debug!("Skipping {new_url} because it's not part of the original target",); + if !new_url.is_in_scope(&self.handles.config.scope) { + // URL is not in scope based on domain/scope configuration + log::debug!("Skipping {new_url} because it's not in scope"); log::trace!("exit: add_link_to_set_of_links"); return Ok(()); } @@ -615,10 +610,7 @@ impl<'a> Extractor<'a> { if let Some(link) = tag.value().attr(html_attr) { log::debug!("Parsed link \"{}\" from {}", link, resp_url.as_str()); - if self - .parse_url_and_add_subpaths(link, resp_url, links) - .is_err() - { + if self.parse_url_and_add_subpaths(link, links).is_err() { log::debug!("link didn't belong to the target domain/host: {link}"); } } @@ -665,17 +657,19 @@ impl<'a> Extractor<'a> { Some(self.handles.config.client_key.as_str()) }; - client = client::initialize( - self.handles.config.timeout, - &self.handles.config.user_agent, - follow_redirects, - self.handles.config.insecure, - &self.handles.config.headers, + let client_config = client::ClientConfig { + timeout: self.handles.config.timeout, + user_agent: &self.handles.config.user_agent, + redirects: follow_redirects, + insecure: self.handles.config.insecure, + headers: &self.handles.config.headers, proxy, - server_certs, + server_certs: Some(server_certs), client_cert, client_key, - )?; + scope: &self.handles.config.scope, + }; + client = client::initialize(client_config)?; } let client = if location != "/robots.txt" { diff --git a/src/extractor/tests.rs b/src/extractor/tests.rs index 3173750..664d615 100644 --- a/src/extractor/tests.rs +++ b/src/extractor/tests.rs @@ -51,7 +51,12 @@ fn setup_extractor(target: ExtractionTarget, scanned_urls: Arc) -> E .target(ExtractionTarget::DirectoryListing), }; - let config = Arc::new(Configuration::new().unwrap()); + // need to add scope to the config to allow extracted links to make it through the + // full pipeline + let mut config = Configuration::new().unwrap(); + config.scope.push(Url::parse("http://localhost").unwrap()); + + let config = Arc::new(config); let handles = Arc::new(Handles::for_testing(Some(scanned_urls), Some(config)).0); builder.handles(handles).build().unwrap() diff --git a/src/main.rs b/src/main.rs index 8620d05..45b99ec 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,3 @@ -use std::io::stdin; use std::{ env::{ args, @@ -146,7 +145,7 @@ async fn get_targets(handles: Arc) -> Result> { let mut targets = vec![]; - if handles.config.stdin { + if handles.config.stdin && handles.config.cached_stdin.is_empty() { // got targets from stdin, i.e. cat sites | ./feroxbuster ... // just need to read the targets from stdin and spawn a future for each target found let stdin = io::stdin(); // tokio's stdin, not std @@ -155,6 +154,10 @@ async fn get_targets(handles: Arc) -> Result> { while let Some(line) = reader.next().await { targets.push(line?); } + } else if !handles.config.cached_stdin.is_empty() { + // cached_stdin populated from config::container if --stdin was used + // keeping the if block above as a failsafe, but i dont think we'll hit it anymore + targets = handles.config.cached_stdin.clone(); } else if handles.config.resumed { // resume-from can't be used with --url, and --stdin is marked false for every resumed // scan, making it mutually exclusive from either of the other two options @@ -199,6 +202,9 @@ async fn get_targets(handles: Arc) -> Result> { if !target.starts_with("http") { // --url hackerone.com + // as of the 2.13.0 update, config::container handles both --url hackerone.com + // and urls coming in from --stdin. I think this is dead code now, but leaving + // it in just in case *target = format!("{}://{target}", handles.config.protocol); } } @@ -666,10 +672,10 @@ fn main() -> Result<()> { .contains("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") { // support the handful of tests that use `--stdin` - let targets: Vec<_> = if config.stdin { - stdin().lock().lines().map(|tgt| tgt.unwrap()).collect() - } else { + let targets: Vec<_> = if config.cached_stdin.is_empty() { vec!["http://localhost".to_string()] + } else { + config.cached_stdin.clone() }; // print the banner to stderr diff --git a/src/parser.rs b/src/parser.rs index 27a5541..8b4a707 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -296,6 +296,15 @@ pub fn initialize() -> Command { .use_value_delimiter(true) .help_heading("Request filters") .help("URL(s) or Regex Pattern(s) to exclude from recursion/scans"), + ).arg( + Arg::new("scope") + .long("scope") + .value_name("URL") + .num_args(1..) + .action(ArgAction::Append) + .use_value_delimiter(true) + .help_heading("Request filters") + .help("Additional domains/URLs to consider in-scope for scanning (in addition to current domain)"), ); ///////////////////////////////////////////////////////////////////// diff --git a/src/scan_manager/tests.rs b/src/scan_manager/tests.rs index 374cf96..b0c92ab 100644 --- a/src/scan_manager/tests.rs +++ b/src/scan_manager/tests.rs @@ -529,6 +529,7 @@ fn feroxstates_feroxserialize_implementation() { r#""time_limit":"""#, r#""filter_similar":[]"#, r#""url_denylist":[]"#, + r#""scope":[]"#, r#""responses""#, r#""type":"response""#, r#""client_cert":"""#, diff --git a/src/scanner/requester.rs b/src/scanner/requester.rs index da8f350..bb45d4e 100644 --- a/src/scanner/requester.rs +++ b/src/scanner/requester.rs @@ -255,17 +255,17 @@ impl Requester { async fn set_rate_limiter(&self, new_limit: Option) -> Result<()> { let mut guard = self.rate_limiter.write().await; - let new_bucket = if new_limit.is_none() { + let new_bucket = if let Some(limit) = new_limit { + if guard.is_some() && guard.as_ref().unwrap().max() == limit { + // this function is called more often than i'd prefer due to Send requirements of + // mutex/rwlock primitives and awaits, this will minimize the cost of the extra calls + return Ok(()); + } else { + Some(Self::build_a_bucket(limit)?) + } + } else { // got None, need to remove the rate_limiter None - } else if guard.is_some() && guard.as_ref().unwrap().max() == new_limit.unwrap() { - // new_limit is checked for None in first branch, should be fine to unwrap - - // this function is called more often than i'd prefer due to Send requirements of - // mutex/rwlock primitives and awaits, this will minimize the cost of the extra calls - return Ok(()); - } else { - Some(Self::build_a_bucket(new_limit.unwrap())?) }; let _ = std::mem::replace(&mut *guard, new_bucket); diff --git a/src/url.rs b/src/url.rs index e1d13bd..d4c9f66 100644 --- a/src/url.rs +++ b/src/url.rs @@ -5,6 +5,82 @@ use reqwest::Url; use std::collections::HashSet; use std::{fmt, sync::Arc}; +/// Trait extension for reqwest::Url to add scope checking functionality +pub trait UrlExt { + /// Check if this URL is allowed based on scope configuration + /// + /// A URL is considered in-scope if: + /// 1. It belongs to the same domain as an in-scope url, OR + /// 2. It belongs to a subdomain of an in-scope url + /// + /// note: the scope list passed in is populated from either --url or --stdin + /// as well as --scope. This means we don't have to worry about checking + /// against the original target url, as that is already in the scope list + fn is_in_scope(&self, scope: &[Url]) -> bool; + + /// Check if this URL is a subdomain of the given parent domain + fn is_subdomain_of(&self, parent_url: &Url) -> bool; +} + +impl UrlExt for Url { + fn is_in_scope(&self, scope: &[Url]) -> bool { + log::trace!("enter: is_in_scope({}, scope: {:?})", self.as_str(), scope); + + if scope.is_empty() { + log::error!("is_in_scope check failed (scope is empty, this should not happen)"); + log::trace!("exit: is_in_scope -> false"); + return false; + } + + for url in scope { + if self.host() == url.host() { + log::trace!("exit: is_in_scope -> true (same domain/host)"); + return true; + } + + if self.is_subdomain_of(url) { + log::trace!("exit: is_in_scope -> true (subdomain)"); + return true; + } + } + + log::trace!("exit: is_in_scope -> false"); + false + } + + fn is_subdomain_of(&self, parent_url: &Url) -> bool { + if let (Some(url_domain), Some(parent_domain)) = (self.domain(), parent_url.domain()) { + let candidate = url_domain.to_lowercase(); + let candidate = candidate.trim_end_matches('.'); + + let parent = parent_domain.to_lowercase(); + let parent = parent.trim_end_matches('.'); + + if candidate == parent { + // same domain is not a subdomain + return false; + } + + let candidate_parts: Vec<&str> = candidate.split('.').collect(); + let parent_parts: Vec<&str> = parent.split('.').collect(); + + if candidate_parts.len() <= parent_parts.len() { + // candidate has fewer or equal parts than parent, so it can't be a subdomain + return false; + } + + // check if parent parts match the rightmost parts of candidate + candidate_parts + .iter() + .rev() + .zip(parent_parts.iter().rev()) + .all(|(c, p)| c == p) + } else { + false + } + } +} + /// abstraction around target urls; collects all Url related shenanigans in one place #[derive(Debug)] pub struct FeroxUrl { @@ -489,4 +565,186 @@ mod tests { Err(err) => panic!("{}", err.to_string()), } } + + #[test] + /// test is_in_scope function to ensure that it checks for presence within scope list + fn test_is_in_scope() { + let url = Url::parse("http://localhost").unwrap(); + let scope = vec![ + Url::parse("http://localhost").unwrap(), + Url::parse("http://example.com").unwrap(), + ]; + + assert!(url.is_in_scope(&scope)); + } + + #[test] + /// test is_in_scope function to ensure that it checks that a subdomain of a domain within + /// the scope list returns true + fn test_is_in_scope_subdomain() { + let url = Url::parse("http://sub.localhost").unwrap(); + let scope = vec![ + Url::parse("http://localhost").unwrap(), + Url::parse("http://example.com").unwrap(), + ]; + + assert!(url.is_in_scope(&scope)); + } + + #[test] + /// test is_in_scope returns false when url is not in scope + fn test_is_in_scope_not_in_scope() { + let url = Url::parse("http://notinscope.com").unwrap(); + let scope = vec![ + Url::parse("http://localhost").unwrap(), + Url::parse("http://example.com").unwrap(), + ]; + + assert!(!url.is_in_scope(&scope)); + } + + #[test] + /// test is_in_scope with empty scope returns false + fn test_is_in_scope_empty_scope() { + let url = Url::parse("http://localhost").unwrap(); + let scope: Vec = vec![]; + + assert!(!url.is_in_scope(&scope)); + } + + #[test] + /// test is_in_scope with domain-only scope entry (not a URL) + fn test_is_in_scope_domain_only_scope() { + let url = Url::parse("http://example.com").unwrap(); + let scope = vec![Url::parse("http://example.com").unwrap()]; + + assert!(url.is_in_scope(&scope)); + } + + #[test] + /// test is_in_scope with subdomain and domain-only scope entry + fn test_is_in_scope_subdomain_domain_only_scope() { + let url = Url::parse("http://sub.example.com").unwrap(); + let scope = vec![Url::parse("http://example.com").unwrap()]; + + assert!(url.is_in_scope(&scope)); + } + + #[test] + /// test is_in_scope with URL that has no domain + fn test_is_in_scope_no_domain() { + // This creates a URL that may not have a domain (like a file:// URL) + let url = Url::parse("file:///path/to/file").unwrap(); + let scope = vec![Url::parse("http://example.com").unwrap()]; + + assert!(!url.is_in_scope(&scope)); + } + + #[test] + /// test is_subdomain_of basic functionality + fn test_is_subdomain_of_true() { + let subdomain_url = Url::parse("http://sub.example.com").unwrap(); + let parent_url = Url::parse("http://example.com").unwrap(); + + assert!(subdomain_url.is_subdomain_of(&parent_url)); + } + + #[test] + /// test is_subdomain_of returns false for same domain + fn test_is_subdomain_of_same_domain() { + let url = Url::parse("http://example.com").unwrap(); + let parent_url = Url::parse("http://example.com").unwrap(); + + assert!(!url.is_subdomain_of(&parent_url)); + } + + #[test] + /// test is_subdomain_of returns false for different domain + fn test_is_subdomain_of_different_domain() { + let url = Url::parse("http://other.com").unwrap(); + let parent_url = Url::parse("http://example.com").unwrap(); + + assert!(!url.is_subdomain_of(&parent_url)); + } + + #[test] + /// test is_subdomain_of with multi-level subdomain + fn test_is_subdomain_of_multi_level() { + let subdomain_url = Url::parse("http://deep.sub.example.com").unwrap(); + let parent_url = Url::parse("http://example.com").unwrap(); + + assert!(subdomain_url.is_subdomain_of(&parent_url)); + } + + #[test] + /// test is_subdomain_of with URLs that have no domain + fn test_is_subdomain_of_no_domain() { + let url = Url::parse("file:///path/to/file").unwrap(); + let parent_url = Url::parse("http://example.com").unwrap(); + + assert!(!url.is_subdomain_of(&parent_url)); + } + + #[test] + /// test is_subdomain_of where parent has no domain + fn test_is_subdomain_of_parent_no_domain() { + let url = Url::parse("http://example.com").unwrap(); + let parent_url = Url::parse("file:///path/to/file").unwrap(); + + assert!(!url.is_subdomain_of(&parent_url)); + } + + #[test] + /// test is_in_scope with same domain/host + fn test_is_not_in_empty_scope() { + let url = Url::parse("http://example.com/path").unwrap(); + let scope: Vec = Vec::new(); + + assert!(!url.is_in_scope(&scope)); + } + + #[test] + /// test is_in_scope with subdomain + fn test_is_in_scope_subdomain_with_empty_scope() { + let url = Url::parse("http://sub.example.com").unwrap(); + let scope: Vec = vec![]; + + assert!(!url.is_in_scope(&scope)); + } + + #[test] + /// test is_in_scope with scope match + fn test_is_in_scope_scope_match() { + let url = Url::parse("http://other.com").unwrap(); + let scope = vec![Url::parse("http://other.com").unwrap()]; + + assert!(url.is_in_scope(&scope)); + } + + #[test] + /// test is_in_scope returns false when not in scope + fn test_is_in_scope_not_allowed() { + let url = Url::parse("http://notallowed.com").unwrap(); + let scope = vec![Url::parse("http://other.com").unwrap()]; + + assert!(!url.is_in_scope(&scope)); + } + + #[test] + /// test is_in_scope with empty scope and different domain + fn test_is_in_scope_empty_scope_different_domain() { + let url = Url::parse("http://other.com").unwrap(); + let scope: Vec = vec![]; + + assert!(!url.is_in_scope(&scope)); + } + + #[test] + /// test is_in_scope with subdomain in scope + fn test_is_in_scope_subdomain_in_scope() { + let url = Url::parse("http://sub.allowed.com").unwrap(); + let scope = vec![Url::parse("http://allowed.com").unwrap()]; + + assert!(url.is_in_scope(&scope)); + } } diff --git a/src/utils.rs b/src/utils.rs index 4360ceb..bd7843f 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -607,7 +607,7 @@ pub fn slugify_filename(url: &str, prefix: &str, suffix: &str) -> String { String::new() }; - let slug = url.replace("://", "_").replace(['/', '.'], "_"); + let slug = url.replace("://", "_").replace(['/', '.', ':'], "_"); let filename = format!("{altered_prefix}{slug}-{ts}.{suffix}"); diff --git a/tests/test_banner.rs b/tests/test_banner.rs index 690812b..d5720d2 100644 --- a/tests/test_banner.rs +++ b/tests/test_banner.rs @@ -151,6 +151,39 @@ fn banner_prints_denied_urls() { ); } +#[test] +/// test allows non-existent wordlist to trigger the banner printing to stderr +/// expect to see all mandatory prints + multiple scope url entries +fn banner_prints_scope_urls() { + Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg("http://localhost") + .arg("--scope") + .arg("example.com") + .arg("api.example.com") + .arg("sub.example.com") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") + .assert() + .success() + .stderr( + predicate::str::contains("─┬─") + .and(predicate::str::contains("Target Url")) + .and(predicate::str::contains("http://localhost")) + .and(predicate::str::contains("Threads")) + .and(predicate::str::contains("Wordlist")) + .and(predicate::str::contains("Status Codes")) + .and(predicate::str::contains("Timeout (secs)")) + .and(predicate::str::contains("User-Agent")) + .and(predicate::str::contains("In-Scope Url")) + .and(predicate::str::contains("example.com")) + .and(predicate::str::contains("api.example.com")) + .and(predicate::str::contains("sub.example.com")) + .and(predicate::str::contains("─┴─")), + ); +} + #[test] /// test allows non-existent wordlist to trigger the banner printing to stderr /// expect to see all mandatory prints + multiple headers @@ -1667,34 +1700,6 @@ fn banner_prints_scan_dir_listings() { ); } -#[test] -/// test allows non-existent wordlist to trigger the banner printing to stderr -/// expect to see all mandatory prints + protocol -fn banner_prints_protocol() { - Command::cargo_bin("feroxbuster") - .unwrap() - .arg("--url") - .arg("localhost") - .arg("--protocol") - .arg("http") - .arg("--wordlist") - .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") - .assert() - .success() - .stderr( - predicate::str::contains("─┬─") - .and(predicate::str::contains("Target Url")) - .and(predicate::str::contains("http://localhost")) - .and(predicate::str::contains("Threads")) - .and(predicate::str::contains("Wordlist")) - .and(predicate::str::contains("Status Codes")) - .and(predicate::str::contains("Timeout (secs)")) - .and(predicate::str::contains("User-Agent")) - .and(predicate::str::contains("Default Protocol")) - .and(predicate::str::contains("─┴─")), - ); -} - #[test] /// test allows non-existent wordlist to trigger the banner printing to stderr /// expect to see all mandatory prints + protocol diff --git a/tests/test_heuristics.rs b/tests/test_heuristics.rs index bbc2af3..45bb7a7 100644 --- a/tests/test_heuristics.rs +++ b/tests/test_heuristics.rs @@ -21,7 +21,7 @@ fn test_single_target_cannot_connect() -> Result<(), Box> .assert() .success() .stdout( - predicate::str::contains("Could not connect to http://fjdksafjkdsajfkdsajkfdsajkfsdjkdsfdsafdsafdsajkr3l2ajfdskafdsjk, skipping...", ) + predicate::str::contains("Could not connect to http://fjdksafjkdsajfkdsajkfdsajkfsdjkdsfdsafdsafdsajkr3l2ajfdskafdsjk/, skipping...", ) ); teardown_tmp_directory(tmp_dir); @@ -47,7 +47,7 @@ fn test_two_targets_cannot_connect() -> Result<(), Box> { .assert() .success() .stdout( - predicate::str::contains("Could not connect to http://fjdksafjkdsajfkdsajkfdsajkfsdjkdsfdsafdsafdsajkr3l2ajfdskafdsjk, skipping...", ) + predicate::str::contains("Could not connect to http://fjdksafjkdsajfkdsajkfdsajkfsdjkdsfdsafdsafdsajkr3l2ajfdskafdsjk/, skipping...", ) ); teardown_tmp_directory(tmp_dir);