Compare commits

...

30 Commits

Author SHA1 Message Date
epi
1cf37e38a2 Merge pull request #884 from epi052/878-support-raw-urls
878 support raw urls
2023-04-26 06:59:04 -05:00
epi
9876759606 nitpickery 2023-04-26 06:45:13 -05:00
epi
4150b61a42 fixed windows logic 2023-04-26 06:33:43 -05:00
epi
16d34bbee0 bumped version to 2.9.5 2023-04-25 07:10:48 -05:00
epi
f1fd2fc379 updated Url::parse callsites to use the new utility function 2023-04-25 07:09:56 -05:00
epi
3dd070a0db fmt 2023-04-24 06:20:14 -05:00
epi
a3dc6c97a0 added workaround to add partial support for raw urls 2023-04-24 06:19:21 -05:00
epi
ec78ec3049 added ability to specify install directory for install-nix.sh 2023-04-19 17:15:50 -05:00
epi
960536e918 Merge pull request #879 from epi052/all-contributors/add-DrorDvash
docs: add DrorDvash as a contributor for bug
2023-04-19 08:05:15 -05:00
allcontributors[bot]
fdae9aa9d6 docs: update .all-contributorsrc [skip ci] 2023-04-19 13:03:50 +00:00
allcontributors[bot]
5c73c3fb23 docs: update README.md [skip ci] 2023-04-19 13:03:49 +00:00
epi
02ef6d7e3f Merge pull request #877 from epi052/update-indicatif-finally
Random improvements
2023-04-19 07:59:47 -05:00
epi
3378246820 updated arm release names for --update fix 2023-04-19 07:46:43 -05:00
epi
692db93048 clippy/tests and added logic to wait for link extraction if done 2023-04-19 06:57:36 -05:00
epi
233cf99907 made link extraction req/resp async 2023-04-19 06:56:52 -05:00
epi
8cd9918b76 upgraded deps 2023-04-19 06:55:23 -05:00
epi
66bcbfc2f2 bumped version to 2.9.4 2023-04-19 06:51:35 -05:00
epi
8b127c0093 made 404-like req/resp async 2023-04-17 06:37:28 -05:00
epi
94de58d855 removed response body from mpsc traversal 2023-04-17 06:36:47 -05:00
epi
2b95b7be69 updated indicatif to 0.17.3 2023-04-17 06:26:59 -05:00
epi
e77c1314b1 Merge pull request #869 from epi052/auto-filtering-account-for-extensions
added extensions and status codes into auto filtering decision calculus
2023-04-11 19:07:53 -05:00
epi
1ced3b5d77 modified msg when dir listing is found with dont-extract 2023-04-11 18:48:18 -05:00
epi
b5472f5341 updated deps 2023-04-11 18:39:28 -05:00
epi
ea81600850 clippy 2023-04-11 18:36:37 -05:00
epi
4f679592b8 bumped version to 2.9.3 2023-04-11 18:34:02 -05:00
epi
b375893461 nitpickery 2023-04-11 18:32:56 -05:00
epi
e110f86f39 added extensions and status codes into auto filtering decision calculus 2023-04-11 18:29:12 -05:00
epi
c7498a7695 Merge pull request #839 from epi052/all-contributors/add-acut3
docs: add acut3 as a contributor for bug
2023-03-18 12:23:34 -05:00
allcontributors[bot]
f973baaba8 docs: update .all-contributorsrc [skip ci] 2023-03-18 17:23:25 +00:00
allcontributors[bot]
148982cdc4 docs: update README.md [skip ci] 2023-03-18 17:23:24 +00:00
30 changed files with 1431 additions and 714 deletions

View File

@@ -562,6 +562,24 @@
"contributions": [
"bug"
]
},
{
"login": "acut3",
"name": "Nicolas Christin",
"avatar_url": "https://avatars.githubusercontent.com/u/17295243?v=4",
"profile": "https://acut3.github.io/",
"contributions": [
"bug"
]
},
{
"login": "DrorDvash",
"name": "DrDv",
"avatar_url": "https://avatars.githubusercontent.com/u/8413651?v=4",
"profile": "https://github.com/DrorDvash",
"contributions": [
"bug"
]
}
],
"contributorsPerLine": 7,

View File

@@ -27,13 +27,13 @@ jobs:
- type: armv7
os: ubuntu-latest
target: armv7-unknown-linux-gnueabihf
name: armv7-feroxbuster
name: armv7-linux-feroxbuster
path: target/armv7-unknown-linux-gnueabihf/release/feroxbuster
pkg_config_path: /usr/lib/x86_64-linux-gnu/pkgconfig
- type: aarch64
os: ubuntu-latest
target: aarch64-unknown-linux-gnu
name: aarch64-feroxbuster
name: aarch64-linux-feroxbuster
path: target/aarch64-unknown-linux-gnu/release/feroxbuster
pkg_config_path: /usr/lib/x86_64-linux-gnu/pkgconfig
steps:

590
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
[package]
name = "feroxbuster"
version = "2.9.2"
version = "2.9.5"
authors = ["Ben 'epi' Risher (@epi052)"]
license = "MIT"
edition = "2021"
@@ -22,47 +22,52 @@ build = "build.rs"
maintenance = { status = "actively-developed" }
[build-dependencies]
clap = { version = "4.1.8", features = ["wrap_help", "cargo"] }
clap_complete = "4.1.4"
regex = "1.5.5"
lazy_static = "1.4.0"
dirs = "4.0.0"
clap = { version = "4.2", features = ["wrap_help", "cargo"] }
clap_complete = "4.1"
regex = "1.5"
lazy_static = "1.4"
dirs = "5.0"
[dependencies]
scraper = "0.15.0"
futures = "0.3.26"
tokio = { version = "1.26.0", features = ["full"] }
tokio-util = { version = "0.7.7", features = ["codec"] }
log = "0.4.17"
env_logger = "0.10.0"
reqwest = { version = "0.11.10", features = ["socks"] }
scraper = "0.16"
futures = "0.3"
tokio = { version = "1.26", features = ["full"] }
tokio-util = { version = "0.7", features = ["codec"] }
log = "0.4"
env_logger = "0.10"
reqwest = { version = "0.11", features = ["socks"] }
# uses feature unification to add 'serde' to reqwest::Url
url = { version = "2.2.2", features = ["serde"] }
serde_regex = "1.1.0"
clap = { version = "4.1.8", features = ["wrap_help", "cargo"] }
lazy_static = "1.4.0"
toml = "0.7.2"
serde = { version = "1.0.137", features = ["derive", "rc"] }
serde_json = "1.0.94"
uuid = { version = "1.3.0", features = ["v4"] }
indicatif = "0.15"
console = "0.15.2"
url = { version = "2.2", features = ["serde"] }
serde_regex = "1.1"
clap = { version = "4.2", features = ["wrap_help", "cargo"] }
lazy_static = "1.4"
toml = "0.7"
serde = { version = "1.0", features = ["derive", "rc"] }
serde_json = "1.0"
uuid = { version = "1.3", features = ["v4"] }
indicatif = "0.17"
console = "0.15"
openssl = { version = "0.10", features = ["vendored"] }
dirs = "4.0.0"
regex = "1.5.5"
crossterm = "0.26.0"
rlimit = "0.9.1"
ctrlc = "3.2.2"
anyhow = "1.0.69"
leaky-bucket = "0.12.1"
gaoya = "0.1.2"
self_update = {version = "0.36.0", features = ["archive-tar", "compression-flate2", "archive-zip", "compression-zip-deflate"]}
dirs = "5.0"
regex = "1.5"
crossterm = "0.26"
rlimit = "0.9"
ctrlc = "3.2"
anyhow = "1.0"
leaky-bucket = "0.12"
gaoya = "0.1"
self_update = { version = "0.36", features = [
"archive-tar",
"compression-flate2",
"archive-zip",
"compression-zip-deflate",
] }
[dev-dependencies]
tempfile = "3.3.0"
httpmock = "0.6.6"
assert_cmd = "2.0.4"
predicates = "3.0.1"
tempfile = "3.3"
httpmock = "0.6"
assert_cmd = "2.0"
predicates = "3.0"
[profile.release]
lto = true

View File

@@ -11,7 +11,7 @@ rm ferox-*.state
# dependency management
[tasks.upgrade-deps]
command = "cargo"
args = ["upgrade", "--exclude", "indicatif"]
args = ["upgrade"]
[tasks.update]
command = "cargo"

View File

@@ -97,8 +97,14 @@ sudo apt update && sudo apt install -y feroxbuster
#### Linux (32 and 64-bit) & MacOS
Install to a particular directory
```
curl -sL https://raw.githubusercontent.com/epi052/feroxbuster/master/install-nix.sh | bash
curl -sL https://raw.githubusercontent.com/epi052/feroxbuster/main/install-nix.sh | bash -s $HOME/.local/bin
```
Install to current working directory
```
curl -sL https://raw.githubusercontent.com/epi052/feroxbuster/main/install-nix.sh | bash
```
#### MacOS via Homebrew
@@ -278,6 +284,8 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d
<td align="center" valign="top" width="14.28%"><a href="https://github.com/Luoooio"><img src="https://avatars.githubusercontent.com/u/26653157?v=4?s=100" width="100px;" alt="Luoooio"/><br /><sub><b>Luoooio</b></sub></a><br /><a href="#ideas-Luoooio" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center" valign="top" width="14.28%"><a href="https://petruknisme.com"><img src="https://avatars.githubusercontent.com/u/6284204?v=4?s=100" width="100px;" alt="Aan"/><br /><sub><b>Aan</b></sub></a><br /><a href="https://github.com/epi052/feroxbuster/commits?author=aancw" title="Code">💻</a> <a href="#infra-aancw" title="Infrastructure (Hosting, Build-Tools, etc)">🚇</a> <a href="#ideas-aancw" title="Ideas, Planning, & Feedback">🤔</a></td>
<td align="center" valign="top" width="14.28%"><a href="https://github.com/imBigo"><img src="https://avatars.githubusercontent.com/u/54672433?v=4?s=100" width="100px;" alt="Simon"/><br /><sub><b>Simon</b></sub></a><br /><a href="https://github.com/epi052/feroxbuster/issues?q=author%3AimBigo" title="Bug reports">🐛</a></td>
<td align="center" valign="top" width="14.28%"><a href="https://acut3.github.io/"><img src="https://avatars.githubusercontent.com/u/17295243?v=4?s=100" width="100px;" alt="Nicolas Christin"/><br /><sub><b>Nicolas Christin</b></sub></a><br /><a href="https://github.com/epi052/feroxbuster/issues?q=author%3Aacut3" title="Bug reports">🐛</a></td>
<td align="center" valign="top" width="14.28%"><a href="https://github.com/DrorDvash"><img src="https://avatars.githubusercontent.com/u/8413651?v=4?s=100" width="100px;" alt="DrDv"/><br /><sub><b>DrDv</b></sub></a><br /><a href="https://github.com/epi052/feroxbuster/issues?q=author%3ADrorDvash" title="Bug reports">🐛</a></td>
</tr>
</tbody>
</table>

View File

@@ -13,13 +13,13 @@ LIN64_URL="$BASE_URL/$LIN64_ZIP"
EMOJI_URL=https://gist.github.com/epi052/8196b550ea51d0907ad4b93751b1b57d/raw/6112c9f32ae07922983fdc549c54fd3fb9a38e4c/NotoColorEmoji.ttf
echo "[+] Installing feroxbuster!"
INSTALL_DIR="${1:-$(pwd)}"
echo "[+] Installing feroxbuster to ${INSTALL_DIR}!"
which unzip &>/dev/null
if [ "$?" = "0" ]; then
echo "[+] unzip found"
else
echo "[ ] unzip not found, exiting. "
if [ "$?" != "0" ]; then
echo "[!] unzip not found, exiting. "
exit -1
fi
@@ -27,20 +27,20 @@ if [[ "$(uname)" == "Darwin" ]]; then
echo "[=] Found MacOS, downloading from $MAC_URL"
curl -sLO "$MAC_URL"
unzip -o "$MAC_ZIP" >/dev/null
unzip -o "$MAC_ZIP" -d "${INSTALL_DIR}" >/dev/null
rm "$MAC_ZIP"
elif [[ "$(expr substr $(uname -s) 1 5)" == "Linux" ]]; then
if [[ $(getconf LONG_BIT) == 32 ]]; then
echo "[=] Found 32-bit Linux, downloading from $LIN32_URL"
curl -sLO "$LIN32_URL"
unzip -o "$LIN32_ZIP" >/dev/null
unzip -o "$LIN32_ZIP" -d "${INSTALL_DIR}" >/dev/null
rm "$LIN32_ZIP"
else
echo "[=] Found 64-bit Linux, downloading from $LIN64_URL"
curl -sLO "$LIN64_URL"
unzip -o "$LIN64_ZIP" >/dev/null
unzip -o "$LIN64_ZIP" -d "${INSTALL_DIR}" >/dev/null
rm "$LIN64_ZIP"
fi
@@ -60,6 +60,8 @@ elif [[ "$(expr substr $(uname -s) 1 5)" == "Linux" ]]; then
fi
fi
chmod +x ./feroxbuster
chmod +x "${INSTALL_DIR}/feroxbuster"
echo "[+] Installed feroxbuster version $(./feroxbuster -V)"
echo "[+] Installed feroxbuster"
echo " [-] path: ${INSTALL_DIR}/feroxbuster"
echo " [-] version: $(${INSTALL_DIR}/feroxbuster -V | awk '{print $2}')"

View File

@@ -24,8 +24,8 @@ _feroxbuster() {
'--replay-proxy=[Send only unfiltered requests through a Replay Proxy, instead of all requests]:REPLAY_PROXY:_urls' \
'*-R+[Status Codes to send through a Replay Proxy when found (default: --status-codes value)]:REPLAY_CODE: ' \
'*--replay-codes=[Status Codes to send through a Replay Proxy when found (default: --status-codes value)]:REPLAY_CODE: ' \
'-a+[Sets the User-Agent (default: feroxbuster/2.9.2)]:USER_AGENT: ' \
'--user-agent=[Sets the User-Agent (default: feroxbuster/2.9.2)]:USER_AGENT: ' \
'-a+[Sets the User-Agent (default: feroxbuster/2.9.5)]:USER_AGENT: ' \
'--user-agent=[Sets the User-Agent (default: feroxbuster/2.9.5)]:USER_AGENT: ' \
'*-x+[File extension(s) to search for (ex: -x php -x pdf js)]:FILE_EXTENSION: ' \
'*--extensions=[File extension(s) to search for (ex: -x php -x pdf js)]:FILE_EXTENSION: ' \
'*-m+[Which HTTP request method(s) should be sent (default: GET)]:HTTP_METHODS: ' \

View File

@@ -30,8 +30,8 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock {
[CompletionResult]::new('--replay-proxy', 'replay-proxy', [CompletionResultType]::ParameterName, 'Send only unfiltered requests through a Replay Proxy, instead of all requests')
[CompletionResult]::new('-R', 'R', [CompletionResultType]::ParameterName, 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)')
[CompletionResult]::new('--replay-codes', 'replay-codes', [CompletionResultType]::ParameterName, 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)')
[CompletionResult]::new('-a', 'a', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.9.2)')
[CompletionResult]::new('--user-agent', 'user-agent', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.9.2)')
[CompletionResult]::new('-a', 'a', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.9.5)')
[CompletionResult]::new('--user-agent', 'user-agent', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.9.5)')
[CompletionResult]::new('-x', 'x', [CompletionResultType]::ParameterName, 'File extension(s) to search for (ex: -x php -x pdf js)')
[CompletionResult]::new('--extensions', 'extensions', [CompletionResultType]::ParameterName, 'File extension(s) to search for (ex: -x php -x pdf js)')
[CompletionResult]::new('-m', 'm', [CompletionResultType]::ParameterName, 'Which HTTP request method(s) should be sent (default: GET)')

View File

@@ -27,8 +27,8 @@ set edit:completion:arg-completer[feroxbuster] = {|@words|
cand --replay-proxy 'Send only unfiltered requests through a Replay Proxy, instead of all requests'
cand -R 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)'
cand --replay-codes 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)'
cand -a 'Sets the User-Agent (default: feroxbuster/2.9.2)'
cand --user-agent 'Sets the User-Agent (default: feroxbuster/2.9.2)'
cand -a 'Sets the User-Agent (default: feroxbuster/2.9.5)'
cand --user-agent 'Sets the User-Agent (default: feroxbuster/2.9.5)'
cand -x 'File extension(s) to search for (ex: -x php -x pdf js)'
cand --extensions 'File extension(s) to search for (ex: -x php -x pdf js)'
cand -m 'Which HTTP request method(s) should be sent (default: GET)'

View File

@@ -2,12 +2,11 @@ use super::entry::BannerEntry;
use crate::{
config::Configuration,
event_handlers::Handles,
utils::{logged_request, status_colorizer},
utils::{logged_request, parse_url_with_raw_path, status_colorizer},
DEFAULT_IGNORED_EXTENSIONS, DEFAULT_METHOD, DEFAULT_STATUS_CODES, VERSION,
};
use anyhow::{bail, Result};
use console::{style, Emoji};
use reqwest::Url;
use serde_json::Value;
use std::{io::Write, sync::Arc};
@@ -478,7 +477,7 @@ by Ben "epi" Risher {} ver: {}"#,
pub async fn check_for_updates(&mut self, url: &str, handles: Arc<Handles>) -> Result<()> {
log::trace!("enter: needs_update({}, {:?})", url, handles);
let api_url = Url::parse(url)?;
let api_url = parse_url_with_raw_path(url)?;
let result = logged_request(&api_url, DEFAULT_METHOD, None, handles.clone()).await?;
let body = result.text().await?;

View File

@@ -6,7 +6,10 @@ use super::utils::{
use crate::config::determine_output_level;
use crate::config::utils::determine_requester_policy;
use crate::{
client, parser, scan_manager::resume_scan, traits::FeroxSerialize, utils::fmt_err,
client, parser,
scan_manager::resume_scan,
traits::FeroxSerialize,
utils::{fmt_err, parse_url_with_raw_path},
DEFAULT_CONFIG_NAME,
};
use anyhow::{anyhow, Context, Result};
@@ -673,7 +676,7 @@ impl Configuration {
for denier in arg {
// could be an absolute url or a regex, need to determine which and populate the
// appropriate vector
match Url::parse(denier.trim_end_matches('/')) {
match parse_url_with_raw_path(denier.trim_end_matches('/')) {
Ok(absolute) => {
// denier is an absolute url and can be parsed as such
config.url_denylist.push(absolute);

View File

@@ -242,14 +242,6 @@ impl TermOutHandler {
log::trace!("enter: process_response({:?}, {:?})", resp, call_type);
async move {
let should_filter = self
.handles
.as_ref()
.unwrap()
.filters
.data
.should_filter_response(&resp, tx_stats.clone());
let contains_sentry = if !self.config.filter_status.is_empty() {
// -C was used, meaning -s was not and we should ignore the defaults
// https://github.com/epi052/feroxbuster/issues/535
@@ -261,7 +253,7 @@ impl TermOutHandler {
};
let unknown_sentry = !RESPONSES.contains(&resp); // !contains == unknown
let should_process_response = contains_sentry && unknown_sentry && !should_filter;
let should_process_response = contains_sentry && unknown_sentry;
if should_process_response {
// print to stdout

View File

@@ -16,7 +16,7 @@ use crate::{
use super::command::Command::AddToUsizeField;
use super::*;
use crate::statistics::StatField;
use reqwest::Url;
use crate::utils::parse_url_with_raw_path;
use tokio::time::Duration;
#[derive(Debug)]
@@ -266,7 +266,7 @@ impl ScanHandler {
let bar = scan.progress_bar();
// (4000 - 3000) / 2 => 500 words left to send
let length = bar.length();
let length = bar.length().unwrap_or(1);
let num_words_left = (length - bar.position()) / divisor;
// accumulate each bar's increment value for incrementing the total bar
@@ -325,7 +325,9 @@ impl ScanHandler {
self.data.add_directory_scan(&target, order).1 // add the new target; return FeroxScan
};
if should_test_deny && should_deny_url(&Url::parse(&target)?, self.handles.clone())? {
if should_test_deny
&& should_deny_url(&parse_url_with_raw_path(&target)?, self.handles.clone())?
{
// response was caught by a user-provided deny list
// checking this last, since it's most susceptible to longer runtimes due to what
// input is received

View File

@@ -147,7 +147,7 @@ impl StatsHandler {
self.stats.errors(),
);
self.bar.set_message(&msg);
self.bar.set_message(msg);
if self.bar.position() < self.stats.total_expected() as u64 {
// don't run off the end when we're a few requests over the expected total

View File

@@ -11,16 +11,60 @@ use crate::{
StatField::{LinksExtracted, TotalExpected},
},
url::FeroxUrl,
utils::{logged_request, make_request, send_try_recursion_command, should_deny_url},
utils::{
logged_request, make_request, parse_url_with_raw_path, send_try_recursion_command,
should_deny_url,
},
ExtractionResult, DEFAULT_METHOD,
};
use anyhow::{bail, Context, Result};
use reqwest::{Client, StatusCode, Url};
use futures::StreamExt;
use reqwest::{Client, Response, StatusCode, Url};
use scraper::{Html, Selector};
use std::{borrow::Cow, collections::HashSet};
/// Wrapper around link extraction logic
/// - create a new Url object based on cli options/args
/// - check if the new Url has already been seen/scanned -> None
/// - make a request to the new Url ? -> Some(response) : None
pub(super) async fn request_link(url: &str, handles: Arc<Handles>) -> Result<Response> {
log::trace!("enter: request_link({})", url);
let ferox_url = FeroxUrl::from_string(url, handles.clone());
// create a url based on the given command line options
let new_url = ferox_url.format("", None)?;
let scanned_urls = handles.ferox_scans()?;
if scanned_urls.get_scan_by_url(new_url.as_ref()).is_some() {
//we've seen the url before and don't need to scan again
log::trace!("exit: request_link -> None");
bail!("previously seen url");
}
if (!handles.config.url_denylist.is_empty() || !handles.config.regex_denylist.is_empty())
&& should_deny_url(&new_url, handles.clone())?
{
// can't allow a denied url to be requested
bail!(
"prevented request to {} due to {:?} || {:?}",
url,
handles.config.url_denylist,
handles.config.regex_denylist,
);
}
// make the request and store the response
let new_response = logged_request(&new_url, DEFAULT_METHOD, None, handles.clone()).await?;
log::trace!("exit: request_link -> {:?}", new_response);
Ok(new_response)
}
/// Whether an active scan is recursive or not
#[derive(Debug)]
#[derive(Debug, Copy, Clone)]
enum RecursionStatus {
/// Scan is recursive
Recursive,
@@ -81,7 +125,7 @@ impl<'a> Extractor<'a> {
) -> Result<()> {
log::trace!("enter: parse_url_and_add_subpaths({:?})", links);
match Url::parse(url_to_parse) {
match parse_url_with_raw_path(url_to_parse) {
Ok(absolute) => {
if absolute.domain() != original_url.domain()
|| absolute.host() != original_url.host()
@@ -121,91 +165,140 @@ impl<'a> Extractor<'a> {
/// given a set of links from a normal http body response, task the request handler to make
/// the requests
pub async fn request_links(&mut self, links: HashSet<String>) -> Result<()> {
pub async fn request_links(
&mut self,
links: HashSet<String>,
) -> Result<Option<tokio::task::JoinHandle<()>>> {
log::trace!("enter: request_links({:?})", links);
if links.is_empty() {
return Ok(());
return Ok(None);
}
self.update_stats(links.len())?;
// create clones/remove use of self of/from everything the async move block will need to function
let cloned_scanned_urls = self.handles.ferox_scans()?;
let cloned_handles = self.handles.clone();
let cloned_url = self.url.clone();
let threads = self.handles.config.threads;
let recursive = if self.handles.config.no_recursion {
RecursionStatus::NotRecursive
} else {
RecursionStatus::Recursive
};
let scanned_urls = self.handles.ferox_scans()?;
self.update_stats(links.len())?;
let link_request_task = tokio::spawn(async move {
let producers = futures::stream::iter(links.into_iter())
.map(|link| {
// another clone to satisfy the async move block
let inner_clone = cloned_handles.clone();
for link in links {
let mut resp = match self.request_link(&link).await {
Ok(resp) => resp,
Err(_) => continue,
};
(
tokio::spawn(async move { request_link(&link, inner_clone).await }),
cloned_handles.clone(),
cloned_scanned_urls.clone(),
recursive,
cloned_url.clone(),
)
})
.for_each_concurrent(
threads,
|(join_handle, c_handles, c_scanned_urls, c_recursive, og_url)| async move {
match join_handle.await {
Ok(Ok(reqwest_response)) => {
let mut resp = FeroxResponse::from(
reqwest_response,
&og_url,
DEFAULT_METHOD,
c_handles.config.output_level,
)
.await;
// filter if necessary
if self
.handles
.filters
.data
.should_filter_response(&resp, self.handles.stats.tx.clone())
{
continue;
}
// filter if necessary
if c_handles
.filters
.data
.should_filter_response(&resp, c_handles.stats.tx.clone())
{
return;
}
// request and report assumed file
if resp.is_file() || !resp.is_directory() {
log::debug!("Extracted File: {}", resp);
// request and report assumed file
if resp.is_file() || !resp.is_directory() {
log::debug!("Extracted File: {}", resp);
scanned_urls.add_file_scan(resp.url().as_str(), ScanOrder::Latest);
c_scanned_urls
.add_file_scan(resp.url().as_str(), ScanOrder::Latest);
if self.handles.config.collect_extensions {
resp.parse_extension(self.handles.clone())?;
}
if c_handles.config.collect_extensions {
// no real reason this should fail
resp.parse_extension(c_handles.clone()).unwrap();
}
if let Err(e) = resp.send_report(self.handles.output.tx.clone()) {
log::warn!("Could not send FeroxResponse to output handler: {}", e);
}
if let Err(e) = resp.send_report(c_handles.output.tx.clone()) {
log::warn!(
"Could not send FeroxResponse to output handler: {}",
e
);
}
continue;
}
return;
}
if matches!(recursive, RecursionStatus::Recursive) {
log::debug!("Extracted Directory: {}", resp);
if matches!(c_recursive, RecursionStatus::Recursive) {
log::debug!("Extracted Directory: {}", resp);
if !resp.url().as_str().ends_with('/')
&& (resp.status().is_success()
|| matches!(resp.status(), &StatusCode::FORBIDDEN))
{
// if the url doesn't end with a /
// and the response code is either a 2xx or 403
if !resp.url().as_str().ends_with('/')
&& (resp.status().is_success()
|| matches!(resp.status(), &StatusCode::FORBIDDEN))
{
// if the url doesn't end with a /
// and the response code is either a 2xx or 403
// since all of these are 2xx or 403, recursion is only attempted if the
// url ends in a /. I am actually ok with adding the slash and not
// adding it, as both have merit. Leaving it in for now to see how
// things turn out (current as of: v1.1.0)
resp.set_url(&format!("{}/", resp.url()));
}
// since all of these are 2xx or 403, recursion is only attempted if the
// url ends in a /. I am actually ok with adding the slash and not
// adding it, as both have merit. Leaving it in for now to see how
// things turn out (current as of: v1.1.0)
resp.set_url(&format!("{}/", resp.url()));
}
if c_handles.config.filter_status.is_empty() {
// -C wasn't used, so -s is the only 'filter' left to account for
if c_handles
.config
.status_codes
.contains(&resp.status().as_u16())
{
send_try_recursion_command(c_handles.clone(), resp)
.await
.unwrap_or_default();
}
} else {
// -C was used, that means the filters above would have removed
// those responses, and anything else should be let through
send_try_recursion_command(c_handles.clone(), resp)
.await
.unwrap_or_default();
}
}
}
Ok(Err(err)) => {
log::warn!("Error during link extraction: {}", err);
}
Err(err) => {
log::warn!("JoinError during link extraction: {}", err);
}
}
},
);
// wait for the requests to finish
producers.await;
});
if self.handles.config.filter_status.is_empty() {
// -C wasn't used, so -s is the only 'filter' left to account for
if self
.handles
.config
.status_codes
.contains(&resp.status().as_u16())
{
send_try_recursion_command(self.handles.clone(), resp).await?;
}
} else {
// -C was used, that means the filters above would have removed
// those responses, and anything else should be let through
send_try_recursion_command(self.handles.clone(), resp).await?;
}
}
}
log::trace!("exit: request_links");
Ok(())
Ok(Some(link_request_task))
}
/// wrapper around link extraction via html attributes
@@ -385,7 +478,7 @@ impl<'a> Extractor<'a> {
ExtractionTarget::ResponseBody | ExtractionTarget::DirectoryListing => {
self.response.unwrap().url().clone()
}
ExtractionTarget::RobotsTxt => match Url::parse(&self.url) {
ExtractionTarget::RobotsTxt => match parse_url_with_raw_path(&self.url) {
Ok(u) => u,
Err(e) => {
bail!("Could not parse {}: {}", self.url, e);
@@ -415,56 +508,6 @@ impl<'a> Extractor<'a> {
Ok(())
}
/// Wrapper around link extraction logic
/// - create a new Url object based on cli options/args
/// - check if the new Url has already been seen/scanned -> None
/// - make a request to the new Url ? -> Some(response) : None
pub(super) async fn request_link(&self, url: &str) -> Result<FeroxResponse> {
log::trace!("enter: request_link({})", url);
let ferox_url = FeroxUrl::from_string(url, self.handles.clone());
// create a url based on the given command line options
let new_url = ferox_url.format("", None)?;
let scanned_urls = self.handles.ferox_scans()?;
if scanned_urls.get_scan_by_url(new_url.as_ref()).is_some() {
//we've seen the url before and don't need to scan again
log::trace!("exit: request_link -> None");
bail!("previously seen url");
}
if (!self.handles.config.url_denylist.is_empty()
|| !self.handles.config.regex_denylist.is_empty())
&& should_deny_url(&new_url, self.handles.clone())?
{
// can't allow a denied url to be requested
bail!(
"prevented request to {} due to {:?} || {:?}",
url,
self.handles.config.url_denylist,
self.handles.config.regex_denylist,
);
}
// make the request and store the response
let new_response =
logged_request(&new_url, DEFAULT_METHOD, None, self.handles.clone()).await?;
let new_ferox_response = FeroxResponse::from(
new_response,
url,
DEFAULT_METHOD,
self.handles.config.output_level,
)
.await;
log::trace!("exit: request_link -> {:?}", new_ferox_response);
Ok(new_ferox_response)
}
/// Entry point to perform link extraction from robots.txt
///
/// `base_url` can have paths and subpaths, however robots.txt will be requested from the
@@ -484,7 +527,7 @@ impl<'a> Extractor<'a> {
for capture in self.robots_regex.captures_iter(body) {
if let Some(new_path) = capture.name("url_path") {
let mut new_url = Url::parse(&self.url)?;
let mut new_url = parse_url_with_raw_path(&self.url)?;
new_url.set_path(new_path.as_str());
@@ -614,7 +657,7 @@ impl<'a> Extractor<'a> {
&client
};
let mut url = Url::parse(&self.url)?;
let mut url = parse_url_with_raw_path(&self.url)?;
url.set_path(location); // overwrite existing path
// purposefully not using logged_request here due to using the special client

View File

@@ -1,4 +1,5 @@
use super::builder::{LINKFINDER_REGEX, ROBOTS_TXT_REGEX, URL_CHARS_REGEX};
use super::container::request_link;
use super::*;
use crate::config::{Configuration, OutputLevel};
use crate::scan_manager::ScanOrder;
@@ -360,13 +361,13 @@ async fn request_link_happy_path() -> Result<()> {
then.status(200).body("this is a test");
});
let r_resp = ROBOTS_EXT.request_link(&srv.url("/login.php")).await?;
let b_resp = BODY_EXT.request_link(&srv.url("/login.php")).await?;
let r_resp = request_link(&srv.url("/login.php"), ROBOTS_EXT.handles.clone()).await?;
let b_resp = request_link(&srv.url("/login.php"), BODY_EXT.handles.clone()).await?;
assert!(matches!(r_resp.status(), &StatusCode::OK));
assert!(matches!(b_resp.status(), &StatusCode::OK));
assert_eq!(r_resp.content_length(), 14);
assert_eq!(b_resp.content_length(), 14);
assert!(matches!(r_resp.status(), StatusCode::OK));
assert!(matches!(b_resp.status(), StatusCode::OK));
assert_eq!(r_resp.content_length().unwrap(), 14);
assert_eq!(b_resp.content_length().unwrap(), 14);
assert_eq!(mock.hits(), 2);
Ok(())
}
@@ -390,8 +391,8 @@ async fn request_link_bails_on_seen_url() -> Result<()> {
let robots = setup_extractor(ExtractionTarget::RobotsTxt, scans.clone());
let body = setup_extractor(ExtractionTarget::ResponseBody, scans);
let r_resp = robots.request_link(&served).await;
let b_resp = body.request_link(&served).await;
let r_resp = request_link(&served, robots.handles.clone()).await;
let b_resp = request_link(&served, body.handles.clone()).await;
assert!(r_resp.is_err());
assert!(b_resp.is_err());

View File

@@ -4,11 +4,10 @@ use crate::event_handlers::Handles;
use crate::filters::similarity::SIM_HASHER;
use crate::nlp::preprocess;
use crate::response::FeroxResponse;
use crate::utils::logged_request;
use crate::utils::{logged_request, parse_url_with_raw_path};
use crate::DEFAULT_METHOD;
use anyhow::Result;
use regex::Regex;
use reqwest::Url;
use std::sync::Arc;
/// wrapper around logic necessary to create a SimilarityFilter
@@ -23,7 +22,7 @@ pub(crate) async fn create_similarity_filter(
handles: Arc<Handles>,
) -> Result<SimilarityFilter> {
// url as-is based on input, ignores user-specified url manipulation options (add-slash etc)
let url = Url::parse(similarity_filter)?;
let url = parse_url_with_raw_path(similarity_filter)?;
// attempt to request the given url
let resp = logged_request(&url, DEFAULT_METHOD, None, handles.clone()).await?;

View File

@@ -1,6 +1,8 @@
use std::collections::HashMap;
use std::sync::Arc;
use anyhow::{bail, Result};
use futures::future;
use scraper::{Html, Selector};
use uuid::Uuid;
@@ -276,138 +278,185 @@ impl HeuristicTests {
None
};
// 4 is due to the array in the nested for loop below
let mut responses = Vec::with_capacity(4);
// no matter what, we want an empty extension for the base case
let mut extensions = vec!["".to_string()];
// and then we want to add any extensions that was specified
// or has since been added to the running config
for ext in &self.handles.config.extensions {
extensions.push(format!(".{}", ext));
}
// for every method, attempt to id its 404 response
//
// a good example of one where the GET/POST differ is on hackthebox:
// - http://prd.m.rendering-api.interface.htb/api
//
// a good example of one where the heuristics return a 403 and a 404 (apache)
// as well as return two different types of 404s based on the file extension
// - http://10.10.11.198 (Encoding box in normal labs)
//
// both methods and extensions can elicit different responses from a given
// server, so both are considered when building auto-filter rules
for method in self.handles.config.methods.iter() {
for (prefix, length) in [("", 1), ("", 3), (".htaccess", 1), ("admin", 1)] {
let path = format!("{prefix}{}", self.unique_string(length));
for extension in extensions.iter() {
// build out the 6 paths we'll use
let paths = [
("", 1),
("", 3),
(".htaccess", 1),
(".htaccess", 3),
("admin", 1),
("admin", 3),
]
.map(|(prefix, length)| {
format!("{prefix}{}{extension}", self.unique_string(length))
});
let ferox_url = FeroxUrl::from_string(target_url, self.handles.clone());
// allow all 6 requests to fly asynchronously
let responses = future::join_all(paths.into_iter().map(|path| async move {
let ferox_url = FeroxUrl::from_string(target_url, self.handles.clone());
let nonexistent_url = ferox_url.format(&path, slash)?;
let Ok(nonexistent_url) = ferox_url.format(&path, slash) else {
return None;
};
// example requests:
// - http://localhost/2fc1077836ad43ab98b7a31c2ca28fea
// - http://localhost/92969beae6bf4beb855d1622406d87e395c87387a9ad432e8a11245002b709b03cf609d471004154b83bcc1c6ec49f6f
// - http://localhost/.htaccessa005a2131e68449aa26e99029c914c09
// - http://localhost/adminf1d2541e73c44dcb9d1fb7d93334b280
let response =
logged_request(&nonexistent_url, method, data, self.handles.clone()).await;
// example requests:
// - http://localhost/2fc1077836ad43ab98b7a31c2ca28fea
// - http://localhost/92969beae6bf4beb855d1622406d87e395c87387a9ad432e8a11245002b709b03cf609d471004154b83bcc1c6ec49f6f
// - http://localhost/.htaccessa005a2131e68449aa26e99029c914c09
// - http://localhost/.htaccess92969beae6bf4beb855d1622406d87e395c87387a9ad432e8a11245002b709b03cf609d471004154b83bcc1c6ec49f6f
// - http://localhost/adminf1d2541e73c44dcb9d1fb7d93334b280
// - http://localhost/admin92969beae6bf4beb855d1622406d87e395c87387a9ad432e8a11245002b709b03cf609d471004154b83bcc1c6ec49f6f
let Ok(response) =
logged_request(&nonexistent_url, method, data, self.handles.clone())
.await else {
return None;
};
req_counter += 1;
if !self
.handles
.config
.status_codes
.contains(&response.status().as_u16())
{
// if the response code isn't one that's accepted via -s values, then skip to the next
//
// the default value for -s is all status codes, so unless the user says otherwise
// this won't fire
return None;
}
// continue to next on error
let response = skip_fail!(response);
Some(
FeroxResponse::from(
response,
&ferox_url.target,
method,
self.handles.config.output_level,
)
.await,
)
}))
.await // await gives vector of options containing feroxresponses
.into_iter()
.flatten() // strip out the none values
.collect::<Vec<_>>();
if !self
.handles
.config
.status_codes
.contains(&response.status().as_u16())
{
// if the response code isn't one that's accepted via -s values, then skip to the next
//
// the default value for -s is all status codes, so unless the user says otherwise
// this won't fire
if responses.len() < 2 {
// don't have enough responses to make a determination, continue to next method
log::debug!("not enough responses to make a determination");
continue;
}
let ferox_response = FeroxResponse::from(
response,
&ferox_url.target,
method,
// check the responses for similarities on which we can filter, multiple may be returned
let Some((wildcard_filters, wildcard_responses)) = self.examine_404_like_responses(&responses) else {
// no match was found during analysis of responses
log::warn!("no match found for 404 responses");
continue;
};
// report to the user, if appropriate
if matches!(
self.handles.config.output_level,
)
.await;
OutputLevel::Default | OutputLevel::Quiet
) {
// sentry value to control whether or not to print the filter
// used because we only want to print the same filter once
let mut print_sentry;
responses.push(ferox_response);
}
if let Ok(filters) = self.handles.filters.data.filters.read() {
for new_wildcard in &wildcard_filters {
// reset the sentry for every new wildcard produced by examine_404_like_responses
print_sentry = true;
if responses.len() < 2 {
// don't have enough responses to make a determination, continue to next method
responses.clear();
continue;
}
for other in filters.iter() {
if let Some(other_wildcard) =
other.as_any().downcast_ref::<WildcardFilter>()
{
// check the new wildcard against all existing wildcards, if it was added
// on the cli or by a previous directory, don't print it
if new_wildcard.as_ref() == other_wildcard {
print_sentry = false;
break;
}
}
}
// Command::AddFilter, &str (bytes/words/lines), usize (i.e. length associated with the type)
let Some(filter) = self.examine_404_like_responses(&responses) else {
// no match was found during analysis of responses
responses.clear();
continue;
};
// report to the user, if appropriate
if matches!(
self.handles.config.output_level,
OutputLevel::Default | OutputLevel::Quiet
) {
// sentry value to control whether or not to print the filter
// used because we only want to print the same filter once
let mut print_sentry = true;
if let Ok(filters) = self.handles.filters.data.filters.read() {
for other in filters.iter() {
if let Some(other_wildcard) =
other.as_any().downcast_ref::<WildcardFilter>()
{
if &*filter == other_wildcard {
print_sentry = false;
break;
// if we're here, we've found a new wildcard that we didn't previously display, print it
if print_sentry {
ferox_print(&format!("{}", new_wildcard), &PROGRESS_PRINTER);
}
}
}
}
if print_sentry {
ferox_print(&format!("{}", filter), &PROGRESS_PRINTER);
// create the new filter
for wildcard in wildcard_filters {
self.handles.filters.send(Command::AddFilter(wildcard))?;
}
// if we're here, we've detected a 404-like response pattern, and we're already filtering for size/word/line
//
// in addition, we'll create a similarity filter as a fallback
for resp in wildcard_responses {
let hash = SIM_HASHER.create_signature(preprocess(resp.text()).iter());
let sim_filter = SimilarityFilter {
hash,
original_url: resp.url().to_string(),
};
self.handles
.filters
.send(Command::AddFilter(Box::new(sim_filter)))?;
if resp.is_directory() {
// response is either a 3XX with a Location header that matches url + '/'
// or it's a 2XX that ends with a '/'
// or it's a 403 that ends with a '/'
// set the wildcard flag to true, so we can check it when preventing
// recursion in event_handlers/scans.rs
// we'd need to clone the response to give ownership to the global list anyway
// so we'll also use that clone to set the wildcard flag
let mut cloned_resp = resp.clone();
cloned_resp.set_wildcard(true);
// add the response to the global list of responses
RESPONSES.insert(cloned_resp);
// function-internal magic number, indicates that we've detected a wildcard directory
req_counter += 100;
}
}
}
// create the new filter
self.handles.filters.send(Command::AddFilter(filter))?;
// if we're here, we've detected a 404-like response pattern, and we're already filtering for size/word/line
//
// in addition, we'll create a similarity filter as a fallback
let hash = SIM_HASHER.create_signature(preprocess(responses[0].text()).iter());
let sim_filter = SimilarityFilter {
hash,
original_url: responses[0].url().to_string(),
};
self.handles
.filters
.send(Command::AddFilter(Box::new(sim_filter)))?;
if responses[0].is_directory() {
// response is either a 3XX with a Location header that matches url + '/'
// or it's a 2XX that ends with a '/'
// or it's a 403 that ends with a '/'
// set the wildcard flag to true, so we can check it when preventing
// recursion in event_handlers/scans.rs
responses[0].set_wildcard(true);
// add the response to the global list of responses
RESPONSES.insert(responses[0].clone());
// function-internal magic number, indicates that we've detected a wildcard directory
req_counter += 100;
}
// reset the responses for the next method, if it exists
responses.clear();
}
log::trace!("exit: detect_404_like_responses");
let retval = if req_counter > 100 {
let retval = if req_counter >= 100 {
WildcardResult::WildcardDirectory(req_counter)
} else {
WildcardResult::FourOhFourLike(req_counter)
@@ -416,96 +465,138 @@ impl HeuristicTests {
Ok(Some(retval))
}
/// for all responses, examine chars/words/lines
/// if all responses respective lengths match each other, we can assume
/// that will remain true for subsequent non-existent urls
/// for all responses, group them by status code, then examine chars/words/lines.
/// if all responses' respective lengths within a status code grouping match
/// each other, we can assume that will remain true for subsequent non-existent urls
///
/// values are examined from most to least specific (content length, word count, line count)
fn examine_404_like_responses(
/// within a status code grouping, values are examined from most to
/// least specific (content length, word count, line count)
#[allow(clippy::vec_box)] // the box is needed in the caller and i dont feel like changing it
fn examine_404_like_responses<'a>(
&self,
responses: &[FeroxResponse],
) -> Option<Box<WildcardFilter>> {
responses: &'a [FeroxResponse],
) -> Option<(Vec<Box<WildcardFilter>>, Vec<&'a FeroxResponse>)> {
// aside from word/line/byte counts, additional discriminators are status code
// extension, and request method. The request method and extension are handled by
// the caller, since they're part of the request and make up the nested for loops
// in detect_404_like_responses.
//
// The status code is handled here, since it's part of the response to catch cases
// where we have something like a 403 and a 404
let mut size_sentry = true;
let mut word_sentry = true;
let mut line_sentry = true;
let method = responses[0].method();
let status_code = responses[0].status();
let content_length = responses[0].content_length();
let word_count = responses[0].word_count();
let line_count = responses[0].line_count();
// returned vec of boxed wildcard filters
let mut wildcards = Vec::new();
for response in &responses[1..] {
// if any of the responses differ in length, that particular
// response length type is no longer a candidate for filtering
if response.content_length() != content_length {
size_sentry = false;
}
// returned vec of ferox responses that are needed for additional
// analysis
let mut wild_responses = Vec::new();
if response.word_count() != word_count {
word_sentry = false;
}
// mapping of grouped responses to status code
let mut grouped_responses = HashMap::new();
if response.line_count() != line_count {
line_sentry = false;
}
// iterate over all responses and add each response to its
// corresponding status code group
for response in responses {
grouped_responses
.entry(response.status())
.or_insert_with(Vec::new)
.push(response);
}
if !size_sentry && !word_sentry && !line_sentry {
// none of the response lengths match, so we can't filter on any of them
return None;
// iterate over each grouped response and determine the most specific
// filter that can be applied to all responses in the group, i.e.
// start from byte count and work 'out' to line count
for response_group in grouped_responses.values() {
if response_group.len() < 2 {
// not enough responses to make a determination
continue;
}
let method = response_group[0].method();
let status_code = response_group[0].status();
let content_length = response_group[0].content_length();
let word_count = response_group[0].word_count();
let line_count = response_group[0].line_count();
for response in &response_group[1..] {
// if any of the responses differ in length, that particular
// response length type is no longer a candidate for filtering
if response.content_length() != content_length {
size_sentry = false;
}
if response.word_count() != word_count {
word_sentry = false;
}
if response.line_count() != line_count {
line_sentry = false;
}
}
if !size_sentry && !word_sentry && !line_sentry {
// none of the response lengths match, so we can't filter on any of them
continue;
}
let mut wildcard = WildcardFilter {
content_length: None,
line_count: None,
word_count: None,
method: method.to_string(),
status_code: status_code.as_u16(),
dont_filter: self.handles.config.dont_filter,
};
match (size_sentry, word_sentry, line_sentry) {
(true, true, true) => {
// all three types of length match, so we can't filter on any of them
wildcard.content_length = Some(content_length);
wildcard.word_count = Some(word_count);
wildcard.line_count = Some(line_count);
}
(true, true, false) => {
// content length and word count match, so we can filter on either
wildcard.content_length = Some(content_length);
wildcard.word_count = Some(word_count);
}
(true, false, true) => {
// content length and line count match, so we can filter on either
wildcard.content_length = Some(content_length);
wildcard.line_count = Some(line_count);
}
(false, true, true) => {
// word count and line count match, so we can filter on either
wildcard.word_count = Some(word_count);
wildcard.line_count = Some(line_count);
}
(true, false, false) => {
// content length matches, so we can filter on that
wildcard.content_length = Some(content_length);
}
(false, true, false) => {
// word count matches, so we can filter on that
wildcard.word_count = Some(word_count);
}
(false, false, true) => {
// line count matches, so we can filter on that
wildcard.line_count = Some(line_count);
}
(false, false, false) => {
// none of the length types match, so we can't filter on any of them
unreachable!("no wildcard size matches; handled by the if statement above");
}
};
wild_responses.push(response_group[0]);
wildcards.push(Box::new(wildcard));
}
let mut wildcard = WildcardFilter {
content_length: None,
line_count: None,
word_count: None,
method: method.to_string(),
status_code: status_code.as_u16(),
dont_filter: self.handles.config.dont_filter,
};
match (size_sentry, word_sentry, line_sentry) {
(true, true, true) => {
// all three types of length match, so we can't filter on any of them
wildcard.content_length = Some(content_length);
wildcard.word_count = Some(word_count);
wildcard.line_count = Some(line_count);
}
(true, true, false) => {
// content length and word count match, so we can filter on either
wildcard.content_length = Some(content_length);
wildcard.word_count = Some(word_count);
}
(true, false, true) => {
// content length and line count match, so we can filter on either
wildcard.content_length = Some(content_length);
wildcard.line_count = Some(line_count);
}
(false, true, true) => {
// word count and line count match, so we can filter on either
wildcard.word_count = Some(word_count);
wildcard.line_count = Some(line_count);
}
(true, false, false) => {
// content length matches, so we can filter on that
wildcard.content_length = Some(content_length);
}
(false, true, false) => {
// word count matches, so we can filter on that
wildcard.word_count = Some(word_count);
}
(false, false, true) => {
// line count matches, so we can filter on that
wildcard.line_count = Some(line_count);
}
(false, false, false) => {
// none of the length types match, so we can't filter on any of them
unreachable!("no wildcard size matches; handled by the if statement above");
}
};
Some(Box::new(wildcard))
Some((wildcards, wild_responses))
}
}

View File

@@ -31,7 +31,7 @@ use feroxbuster::{
TermOutHandler, SCAN_COMPLETE,
},
filters, heuristics, logger,
progress::{PROGRESS_BAR, PROGRESS_PRINTER},
progress::PROGRESS_PRINTER,
scan_manager::{self, ScanType},
scanner,
utils::{fmt_err, slugify_filename},
@@ -220,7 +220,6 @@ async fn wrapped_main(config: Arc<Configuration>) -> Result<()> {
// PROGRESS_PRINTER and PROGRESS_BAR have been used at least once. This call satisfies
// that constraint
PROGRESS_PRINTER.println("");
PROGRESS_BAR.join().unwrap();
});
// check if update_app is true

View File

@@ -1,4 +1,6 @@
use indicatif::{MultiProgress, ProgressBar, ProgressDrawTarget, ProgressStyle};
use std::time::Duration;
use indicatif::{HumanDuration, MultiProgress, ProgressBar, ProgressDrawTarget, ProgressStyle};
use lazy_static::lazy_static;
lazy_static! {
@@ -31,30 +33,68 @@ pub enum BarType {
/// Add an [indicatif::ProgressBar](https://docs.rs/indicatif/latest/indicatif/struct.ProgressBar.html)
/// to the global [PROGRESS_BAR](../config/struct.PROGRESS_BAR.html)
pub fn add_bar(prefix: &str, length: u64, bar_type: BarType) -> ProgressBar {
let mut style = ProgressStyle::default_bar().progress_chars("#>-");
let mut style = ProgressStyle::default_bar()
.progress_chars("#>-")
.with_key(
"smoothed_per_sec",
|state: &indicatif::ProgressState, w: &mut dyn std::fmt::Write| match (
state.pos(),
state.elapsed().as_millis(),
) {
// https://github.com/console-rs/indicatif/issues/394#issuecomment-1309971049
//
// indicatif released a change to how they reported eta/per_sec
// and the results looked really weird based on how we use the progress
// bars. this fixes that
(pos, elapsed_ms) if elapsed_ms > 0 => {
write!(w, "{:.0}/s", pos as f64 * 1000_f64 / elapsed_ms as f64).unwrap()
}
_ => write!(w, "-").unwrap(),
},
)
.with_key(
"smoothed_eta",
|state: &indicatif::ProgressState, w: &mut dyn std::fmt::Write| match (
state.pos(),
state.len(),
) {
(pos, Some(len)) => write!(
w,
"{:#}",
HumanDuration(Duration::from_millis(
(state.elapsed().as_millis()
* ((len as u128).checked_sub(pos as u128).unwrap_or(1))
.checked_div(pos as u128)
.unwrap_or(1)) as u64
))
)
.unwrap(),
_ => write!(w, "-").unwrap(),
},
);
style = match bar_type {
BarType::Hidden => style.template(""),
BarType::Default => style.template(
"[{bar:.cyan/blue}] - {elapsed:<4} {pos:>7}/{len:7} {per_sec:7} {prefix} {msg}",
),
BarType::Message => style.template(&format!(
BarType::Hidden => style.template("").unwrap(),
BarType::Default => style
.template("[{bar:.cyan/blue}] - {elapsed:<4} {pos:>7}/{len:7} {smoothed_per_sec:7} {prefix} {msg}")
.unwrap(),
BarType::Message => style
.template(&format!(
"[{{bar:.cyan/blue}}] - {{elapsed:<4}} {{pos:>7}}/{{len:7}} {:7} {{prefix}} {{msg}}",
"-"
)),
BarType::Total => {
style.template("[{bar:.yellow/blue}] - {elapsed:<4} {pos:>7}/{len:7} {eta:7} {msg}")
}
BarType::Quiet => style.template("Scanning: {prefix}"),
))
.unwrap(),
BarType::Total => style
.template("[{bar:.yellow/blue}] - {elapsed:<4} {pos:>7}/{len:7} {smoothed_eta:7} {msg}")
.unwrap(),
BarType::Quiet => style.template("Scanning: {prefix}").unwrap(),
};
let progress_bar = PROGRESS_BAR.add(ProgressBar::new(length));
progress_bar.set_style(style);
progress_bar.set_prefix(prefix);
progress_bar
PROGRESS_BAR.add(
ProgressBar::new(length)
.with_style(style)
.with_prefix(prefix.to_string()),
)
}
#[cfg(test)]

View File

@@ -21,7 +21,7 @@ use crate::{
event_handlers::{Command, Handles},
traits::FeroxSerialize,
url::FeroxUrl,
utils::{self, fmt_err, status_colorizer},
utils::{self, fmt_err, parse_url_with_raw_path, status_colorizer},
CommandSender,
};
@@ -140,7 +140,7 @@ impl FeroxResponse {
/// Set `FeroxResponse`'s `url` attribute, has no affect if an error occurs
pub fn set_url(&mut self, url: &str) {
match Url::parse(url) {
match parse_url_with_raw_path(url) {
Ok(url) => {
self.url = url;
}
@@ -170,7 +170,8 @@ impl FeroxResponse {
/// free the `text` data, reducing memory usage
pub fn drop_text(&mut self) {
self.text = String::new();
self.text.clear(); // length is set to 0
self.text.shrink_to_fit(); // allocated capacity shrinks to reflect the new size
}
/// Make a reasonable guess at whether the response is a file or not
@@ -394,7 +395,14 @@ impl FeroxResponse {
pub fn send_report(self, report_sender: CommandSender) -> Result<()> {
log::trace!("enter: send_report({:?}", report_sender);
report_sender.send(Command::Report(Box::new(self)))?;
// there's no reason to send the response body across the mpsc
//
// the only possible reason is for filtering on the body, but both `send_report`
// calls are gated behind checks for `should_filter_response`
let mut me = self;
me.drop_text();
report_sender.send(Command::Report(Box::new(me)))?;
log::trace!("exit: send_report");
Ok(())
@@ -591,7 +599,7 @@ impl<'de> Deserialize<'de> for FeroxResponse {
match key.as_str() {
"url" => {
if let Some(url) = value.as_str() {
if let Ok(parsed) = Url::parse(url) {
if let Ok(parsed) = parse_url_with_raw_path(url) {
response.url = parsed;
}
}

View File

@@ -159,7 +159,7 @@ impl FeroxScan {
if pb.position() > self.num_requests {
pb.finish()
} else {
pb.finish_at_current_pos()
pb.abandon()
}
}
}

View File

@@ -379,7 +379,7 @@ impl FeroxScans {
.unwrap_or_else(|e| log::warn!("Could not cancel task: {}", e));
let pb = selected.progress_bar();
num_cancelled += pb.length() as usize - pb.position() as usize;
num_cancelled += pb.length().unwrap_or(0) as usize - pb.position() as usize;
} else {
self.menu.println("Ok, doing nothing...");
}

View File

@@ -72,7 +72,7 @@ fn add_url_to_list_of_scanned_urls_with_known_url() {
url,
ScanType::Directory,
ScanOrder::Latest,
pb.length(),
pb.length().unwrap(),
OutputLevel::Default,
Some(pb),
);
@@ -94,7 +94,7 @@ fn stop_progress_bar_stops_bar() {
url,
ScanType::Directory,
ScanOrder::Latest,
pb.length(),
pb.length().unwrap(),
OutputLevel::Default,
Some(pb),
);
@@ -152,7 +152,7 @@ async fn call_display_scans() {
url,
ScanType::Directory,
ScanOrder::Latest,
pb.length(),
pb.length().unwrap(),
OutputLevel::Default,
Some(pb),
);
@@ -160,7 +160,7 @@ async fn call_display_scans() {
url_two,
ScanType::Directory,
ScanOrder::Latest,
pb_two.length(),
pb_two.length().unwrap(),
OutputLevel::Default,
Some(pb_two),
);

View File

@@ -203,6 +203,9 @@ impl FeroxScanner {
log::info!("Starting scan against: {}", self.target_url);
let mut scan_timer = Instant::now();
// every time we extract links we'll need to await the task to make sure
// it completes before the scan ends
let mut extraction_tasks = Vec::new();
if self.handles.config.extract_links && matches!(self.order, ScanOrder::Initial) {
// check for robots.txt (cannot be in sub-directories, so limited to Initial)
@@ -213,7 +216,7 @@ impl FeroxScanner {
.build()?;
let result = extractor.extract().await?;
extractor.request_links(result).await?;
extraction_tasks.push(extractor.request_links(result).await?)
}
let scanned_urls = self.handles.ferox_scans()?;
@@ -265,7 +268,7 @@ impl FeroxScanner {
let result = extractor.extract_from_dir_listing().await?;
extractor.request_links(result).await?;
extraction_tasks.push(extractor.request_links(result).await?);
log::trace!("exit: scan_url -> Directory listing heuristic");
@@ -276,19 +279,27 @@ impl FeroxScanner {
self.handles.stats.send(SubtractFromUsizeField(
TotalExpected,
progress_bar.length() as usize,
progress_bar.length().unwrap_or(0) as usize,
))?;
}
let mut message = format!("=> {}", style("Directory listing").blue().bright());
if !self.handles.config.extract_links {
write!(message, " (add {} to scan)", style("-e").bright().yellow())?;
write!(
message,
" (remove {} to scan)",
style("--dont-extract-links").bright().yellow()
)?;
}
if !self.handles.config.force_recursion {
for handle in extraction_tasks.into_iter().flatten() {
_ = handle.await;
}
progress_bar.reset_eta();
progress_bar.finish_with_message(&message);
progress_bar.finish_with_message(message);
ferox_scan.finish()?;
@@ -313,7 +324,7 @@ impl FeroxScanner {
style("Wildcard").blue().bright(),
style("stopped").red()
);
progress_bar.set_message(&message);
progress_bar.set_message(message);
progress_bar.inc(num_reqs as u64);
}
Some(WildcardResult::FourOhFourLike(num_reqs)) => {
@@ -340,7 +351,7 @@ impl FeroxScanner {
let new_words = TF_IDF.read().unwrap().all_words();
let new_words_len = new_words.len();
let cur_length = progress_bar.length();
let cur_length = progress_bar.length().unwrap_or(0);
let new_length = cur_length + new_words_len as u64;
progress_bar.set_length(new_length);
@@ -370,6 +381,10 @@ impl FeroxScanner {
scan_timer.elapsed().as_secs_f64(),
))?;
for handle in extraction_tasks.into_iter().flatten() {
_ = handle.await;
}
ferox_scan.finish()?;
log::trace!("exit: scan_url");

View File

@@ -217,7 +217,7 @@ impl Requester {
self.ferox_scan
.progress_bar()
.set_message(&format!("=> 🚦 {styled_direction} scan speed",));
.set_message(format!("=> 🚦 {styled_direction} scan speed",));
}
self.policy_data.set_errors(scan_errors);
} else {
@@ -230,7 +230,7 @@ impl Requester {
self.ferox_scan
.progress_bar()
.set_message(&format!("=> 🚦 {styled_direction} scan speed",));
.set_message(format!("=> 🚦 {styled_direction} scan speed",));
}
}
@@ -286,7 +286,7 @@ impl Requester {
self.set_rate_limiter(Some(new_limit)).await?;
self.ferox_scan
.progress_bar()
.set_message(&format!("=> 🚦 set rate limit ({new_limit}/s)"));
.set_message(format!("=> 🚦 set rate limit ({new_limit}/s)"));
}
self.adjust_limit(trigger, true).await?;
@@ -321,11 +321,11 @@ impl Requester {
// figure out how many requests are skipped as a result
let pb = self.ferox_scan.progress_bar();
let num_skipped = pb.length().saturating_sub(pb.position()) as usize;
let num_skipped = pb.length().unwrap_or(0).saturating_sub(pb.position()) as usize;
let styled_trigger = style(format!("{trigger:?}")).red();
pb.set_message(&format!(
pb.set_message(format!(
"=> 💀 too many {} ({}) 💀 bailing",
styled_trigger,
self.ferox_scan.num_errors(trigger),
@@ -490,6 +490,7 @@ impl Requester {
.target(ExtractionTarget::ResponseBody)
.response(&ferox_response)
.handles(self.handles.clone())
.url(self.ferox_scan.url())
.build()?;
let new_links: HashSet<_>;
@@ -513,7 +514,11 @@ impl Requester {
}
if !new_links.is_empty() {
extractor.request_links(new_links).await?;
let extraction_task = extractor.request_links(new_links).await?;
if let Some(task) = extraction_task {
_ = task.await;
}
}
}

View File

@@ -1,3 +1,4 @@
use crate::utils::parse_url_with_raw_path;
use crate::{event_handlers::Handles, statistics::StatError::UrlFormat, Command::AddError};
use anyhow::{anyhow, bail, Result};
use reqwest::Url;
@@ -142,19 +143,19 @@ impl FeroxUrl {
word = word.trim_start_matches('/').to_string();
};
let base_url = Url::parse(&url)?;
let joined = base_url.join(&word)?;
let base_url = parse_url_with_raw_path(&url)?;
let mut joined = base_url.join(&word)?;
if self.handles.config.queries.is_empty() {
// no query params to process
log::trace!("exit: format -> {}", joined);
Ok(joined)
} else {
let with_params =
Url::parse_with_params(joined.as_str(), &self.handles.config.queries)?;
log::trace!("exit: format_url -> {}", with_params);
Ok(with_params) // request with params attached
if !self.handles.config.queries.is_empty() {
// if called, this adds a '?' to the url, whether or not there are queries to be added
// so we need to check if there are queries to be added before blindly adding the '?'
joined
.query_pairs_mut()
.extend_pairs(self.handles.config.queries.iter());
}
log::trace!("exit: format_url -> {}", joined);
Ok(joined)
}
/// Simple helper to abstract away adding a forward-slash to a url if not present
@@ -189,7 +190,7 @@ impl FeroxUrl {
let target = self.normalize();
let parsed = Url::parse(&target)?;
let parsed = parse_url_with_raw_path(&target)?;
let parts = parsed
.path_segments()
.ok_or_else(|| anyhow!("No path segments found"))?;

View File

@@ -75,7 +75,12 @@ pub(crate) async fn send_try_recursion_command(
handles: Arc<Handles>,
response: FeroxResponse,
) -> Result<()> {
handles.send_scan_command(Command::TryRecursion(Box::new(response.clone())))?;
// make the response mutable so we can drop the body before
// sending it over the mpsc
let mut response = response;
response.drop_text();
handles.send_scan_command(Command::TryRecursion(Box::new(response)))?;
let (tx, rx) = oneshot::channel::<bool>();
handles.send_scan_command(Command::Sync(tx))?;
rx.await?;
@@ -420,9 +425,14 @@ fn should_deny_absolute(url_to_test: &Url, denier: &Url, handles: Arc<Handles>)
// current deny-url, now we just need to check to see if this deny-url is a parent
// to a scanned url that is also a parent of the given url
for ferox_scan in handles.ferox_scans()?.get_active_scans() {
let scanner = Url::parse(ferox_scan.url().trim_end_matches('/'))
let scanner = parse_url_with_raw_path(ferox_scan.url().trim_end_matches('/'))
.with_context(|| format!("Could not parse {ferox_scan} as a url"))?;
// by calling the new parse_url_with_raw_path, and reaching this point without an
// error, we know we have an authority and therefore a host. leaving the code
// below, but we should never hit the else condition. leaving it in so if we find
// a case where i'm mistaken, we'll know about it and can address it
if let Some(scan_host) = scanner.host() {
// same domain/ip check we perform on the denier above
if tested_host != scan_host {
@@ -431,7 +441,7 @@ fn should_deny_absolute(url_to_test: &Url, denier: &Url, handles: Arc<Handles>)
}
} else {
// couldn't process .host from scanner
continue;
unreachable!("should_deny_absolute: scanner.host() returned None, which shouldn't be possible");
};
let scan_path = scanner.path();
@@ -482,7 +492,7 @@ pub fn should_deny_url(url: &Url, handles: Arc<Handles>) -> Result<bool> {
// normalization for comparison is to remove the trailing / if one exists, this is done for
// the given url and any url to which it's compared
let normed_url = Url::parse(url.to_string().trim_end_matches('/'))?;
let normed_url = parse_url_with_raw_path(url.to_string().trim_end_matches('/'))?;
for denier in &handles.config.url_denylist {
// note to self: it may seem as though we can use regex only for --dont-scan, however, in
@@ -532,6 +542,187 @@ pub fn slugify_filename(url: &str, prefix: &str, suffix: &str) -> String {
filename
}
/// This function takes a url string and returns a `url::Url`
///
/// It is primarily used to detect url paths that `url::Url::parse` will
/// silently transform, such as /path/../file.html -> /file.html
///
/// # Warning
///
/// In the instance of a url with encoded path traversal strings, such as
/// /path/%2e%2e/file.html, the underlying `url::Url::parse` will
/// further encode the %-signs and return /path/%252e%252e/file.html
pub fn parse_url_with_raw_path(url: &str) -> Result<Url> {
log::trace!("enter: parse_url_with_raw_path({})", url);
let parsed = Url::parse(url)?;
if !parsed.has_authority() {
// parsed correctly, but no authority, meaning mailto: or tel: or
// some other url that we don't care about
bail!("url to parse has no authority and is therefore invalid");
}
// we have a valid url, the next step is to check the path and see if it's
// something that url::Url::parse would silently transform
//
// i.e. if the path is /path/../file.html, url::Url::parse will transform it
// to /file.html, which is not what we want
let farthest_right_authority_part;
// we want to find the farthest right authority component, which is the
// component that is the furthest right in the url that is part of the
// authority
//
// per RFC 3986, the authority is defined as:
// - authority = [ userinfo "@" ] host [ ":" port ]
//
// so the farthest right authority component is either the port or the host
//
// i.e. in http://example.com:80/path/file.html, the farthest right authority
// component is :80
//
// in http://example.com/path/file.html, the farthest right authority component
// is example.com
//
// the farthest right authority component is used to split the url into two
// parts: the part before the authority and the part after the authority
if let Some(port) = parsed.port() {
// if the url has a port, then the farthest right authority component is
// the port
farthest_right_authority_part = format!(":{}", port);
} else if parsed.has_host() {
// if the url has a host, then the farthest right authority component is
// the host
farthest_right_authority_part = parsed.host_str().unwrap().to_owned();
} else {
// if the url has neither a port nor a host, then the url is invalid
// and we can't do anything with it, but i don't think this is possible
unreachable!("url has an authority, but has neither a port nor a host");
}
// split the original url string into two parts: the part before the authority and the part
// after the authority (i.e. the path + query + fragment)
let Some((_, after_authority)) = url.split_once(&farthest_right_authority_part) else {
// if we can't split the url string into two parts, then the url doesn't conform to our
// expectations, and we can't continue processing it, so we'll return the parsed url
return Ok(parsed);
};
// when there is a port, but it matches the default port for the scheme,
// url::Url::parse will mark the port as None, giving us a
// `after_authority` that looks something like this:
// - :80/path/file.html
let after_authority = after_authority
.replacen(":80", "", 1)
.replacen(":443", "", 1);
// snippets from rfc-3986:
//
// foo://example.com:8042/over/there?name=ferret#nose
// \_/ \______________/\_________/ \_________/ \__/
// | | | | |
// scheme authority path query fragment
//
// The path component is terminated
// by the first question mark ("?") or number sign ("#") character, or
// by the end of the URI.
//
// The query component is indicated by the first question
// mark ("?") character and terminated by a number sign ("#") character
// or by the end of the URI.
let (path, _discarded) = after_authority
.split_once('?')
// if there isn't a '?', try to remove a fragment
.unwrap_or_else(|| {
// if there isn't a '#', return (original, empty)
after_authority
.split_once('#')
.unwrap_or((&after_authority, ""))
});
// at this point, we have the path, all by itself
// each of the following is a string that we can expect url::Url::parse to
// transform. The variety is to ensure we cover most common path traversal
// encodings
let transformation_detectors = vec![
// ascii
"..",
// single url encoded
"%2e%2e",
// double url encoded
"%25%32%65%25%32%65",
// utf-8 encoded
"%c0%ae%c0%ae",
"%e0%40%ae%e0%40%ae",
"%c0ae%c0ae",
// 16 bit shenanigans
"%uff0e%uff0e",
"%u002e%u002e",
];
let parsing_will_transform_path = transformation_detectors
.iter()
.any(|detector| path.to_lowercase().contains(detector));
if !parsing_will_transform_path {
// there's no string in the path of the url that will trigger a transformation
// so, we can return it as-is
return Ok(parsed);
}
// if we reach this point, the path contains a string that will trigger a transformation
// so we need to manually create a Url that doesn't have the transformation
// and return that
//
// special thanks to github user @lavafroth for this workaround
let mut hacked_url = if path.ends_with('/') {
// from_file_path silently strips trailing slashes, and
// from_directory_path adds them, so we'll choose the appropriate
// constructor based on the presence of a path's trailing slash
// according to from_file_path docs:
// from_file_path returns `Err` if the given path is not absolute or,
// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
//
// since we parsed out a valid url path, we know it is absolute, so on non-windows
// platforms, we can safely unwrap. On windows, we need to fix up the path
#[cfg(target_os = "windows")]
{
let path = format!("\\/IGNOREME{path}");
Url::from_directory_path(path).unwrap()
}
#[cfg(not(target_os = "windows"))]
Url::from_directory_path(path).unwrap()
} else {
#[cfg(target_os = "windows")]
{
let path = format!("\\/IGNOREME{path}");
Url::from_file_path(path).unwrap()
}
#[cfg(not(target_os = "windows"))]
Url::from_file_path(path).unwrap()
};
// host must be set first, otherwise multiple components may return Err
hacked_url.set_host(parsed.host_str())?;
// scheme/port/username/password can fail, but in this instance, we know they won't
hacked_url.set_scheme(parsed.scheme()).unwrap();
hacked_url.set_port(parsed.port()).unwrap();
hacked_url.set_username(parsed.username()).unwrap();
hacked_url.set_password(parsed.password()).unwrap();
// query/fragment can't fail
hacked_url.set_query(parsed.query());
hacked_url.set_fragment(parsed.fragment());
log::trace!("exit: parse_url_with_raw_path -> {}", hacked_url);
Ok(hacked_url)
}
#[cfg(test)]
mod tests {
use super::*;
@@ -539,31 +730,159 @@ mod tests {
use crate::scan_manager::{FeroxScans, ScanOrder};
#[test]
/// set_open_file_limit with a low requested limit succeeds
fn utils_set_open_file_limit_with_low_requested_limit() {
let (_, hard) = getrlimit(Resource::NOFILE).unwrap();
let lower_limit = hard - 1;
assert!(set_open_file_limit(lower_limit));
/// multiple tests for parse_url_with_raw_path
fn utils_parse_url_with_raw_path() {
// ../.. is preserved
let url = "https://www.google.com/../../stuff";
let parsed = parse_url_with_raw_path(url).unwrap();
assert_eq!(parsed.as_str(), url);
// ../.. is preserved as well as the trailing slash
let url = "https://www.google.com/../../stuff/";
let parsed = parse_url_with_raw_path(url).unwrap();
assert_eq!(parsed.as_str(), url);
// no trailing slash is preserved
let url = "https://www.google.com/stuff";
let parsed = parse_url_with_raw_path(url).unwrap();
assert_eq!(parsed.as_str(), url);
// trailing slash is preserved
let url = "https://www.google.com/stuff/";
let parsed: Url = parse_url_with_raw_path(url).unwrap();
assert_eq!(parsed.as_str(), url);
// mailto is an error
let url = "mailto:user@example.com";
let parsed = parse_url_with_raw_path(url);
assert!(parsed.is_err());
// relative url is an error
let url = "../../stuff";
let parsed = parse_url_with_raw_path(url);
assert!(parsed.is_err());
// absolute without host is an error
let url = "/../../stuff";
let parsed = parse_url_with_raw_path(url);
assert!(parsed.is_err());
// default ports are parsed correctly
for url in [
"http://example.com:80/path/file.html",
"https://example.com:443/path/file.html",
] {
let parsed = parse_url_with_raw_path(url).unwrap();
assert!(parsed.port().is_none());
assert_eq!(parsed.host().unwrap().to_string().as_str(), "example.com");
}
// non-default ports are parsed correctly
for url in [
"http://example.com:8080/path/file.html",
"https://example.com:4433/path/file.html",
] {
let parsed = parse_url_with_raw_path(url).unwrap();
assert!(parsed.port().is_some());
assert_eq!(parsed.as_str(), url);
}
// different encodings are respected if found in doubles
//
// note that the % sign is encoded as %25...
let url = "http://user:pass@example.com/%2e%2e/stuff.php";
let parsed = parse_url_with_raw_path(url).unwrap();
assert_eq!(
parsed.as_str(),
"http://user:pass@example.com/%252e%252e/stuff.php"
);
let url = "http://user:pass@example.com/%25%32%65%25%32%65/stuff.php";
let parsed = parse_url_with_raw_path(url).unwrap();
assert_eq!(parsed.username(), "user");
assert_eq!(parsed.password().unwrap(), "pass");
assert_eq!(
parsed.as_str(),
"http://user:pass@example.com/%2525%2532%2565%2525%2532%2565/stuff.php"
);
let url = "http://user:pass@example.com/%c0%ae%c0%ae/stuff.php";
let parsed = parse_url_with_raw_path(url).unwrap();
assert_eq!(parsed.username(), "user");
assert_eq!(parsed.password().unwrap(), "pass");
assert_eq!(
parsed.as_str(),
"http://user:pass@example.com/%25c0%25ae%25c0%25ae/stuff.php"
);
let url = "http://user:pass@example.com/%e0%40%ae%e0%40%ae/stuff.php";
let parsed = parse_url_with_raw_path(url).unwrap();
assert_eq!(parsed.username(), "user");
assert_eq!(parsed.password().unwrap(), "pass");
assert_eq!(
parsed.as_str(),
"http://user:pass@example.com/%25e0%2540%25ae%25e0%2540%25ae/stuff.php"
);
let url = "http://user:pass@example.com/%c0ae%c0ae/stuff.php";
let parsed = parse_url_with_raw_path(url).unwrap();
assert_eq!(parsed.username(), "user");
assert_eq!(parsed.password().unwrap(), "pass");
assert_eq!(
parsed.as_str(),
"http://user:pass@example.com/%25c0ae%25c0ae/stuff.php"
);
let url = "http://user:pass@example.com/%uff0e%uff0e/stuff.php";
let parsed = parse_url_with_raw_path(url).unwrap();
assert_eq!(parsed.username(), "user");
assert_eq!(parsed.password().unwrap(), "pass");
assert_eq!(
parsed.as_str(),
"http://user:pass@example.com/%25uff0e%25uff0e/stuff.php"
);
let url = "http://user:pass@example.com/%u002e%u002e/stuff.php";
let parsed = parse_url_with_raw_path(url).unwrap();
assert_eq!(parsed.username(), "user");
assert_eq!(parsed.password().unwrap(), "pass");
assert_eq!(
parsed.as_str(),
"http://user:pass@example.com/%25u002e%25u002e/stuff.php"
);
}
#[test]
/// set_open_file_limit with a high requested limit succeeds
fn utils_set_open_file_limit_with_high_requested_limit() {
let (_, hard) = getrlimit(Resource::NOFILE).unwrap();
let higher_limit = hard + 1;
// calculate a new soft to ensure soft != hard and hit that logic branch
let new_soft = hard - 1;
setrlimit(Resource::NOFILE, new_soft, hard).unwrap();
assert!(set_open_file_limit(higher_limit));
}
#[cfg(not(target_os = "windows"))]
mod nix_only_tests {
use super::*;
#[test]
/// set_open_file_limit should fail when hard == soft
fn utils_set_open_file_limit_with_fails_when_both_limits_are_equal() {
let (_, hard) = getrlimit(Resource::NOFILE).unwrap();
// calculate a new soft to ensure soft == hard and hit the failure logic branch
setrlimit(Resource::NOFILE, hard, hard).unwrap();
assert!(!set_open_file_limit(hard)); // returns false
#[test]
/// set_open_file_limit with a low requested limit succeeds
fn utils_set_open_file_limit_with_low_requested_limit() {
let (_, hard) = getrlimit(Resource::NOFILE).unwrap();
let lower_limit = hard - 1;
assert!(set_open_file_limit(lower_limit));
}
#[test]
/// set_open_file_limit with a high requested limit succeeds
fn utils_set_open_file_limit_with_high_requested_limit() {
let (_, hard) = getrlimit(Resource::NOFILE).unwrap();
let higher_limit = hard + 1;
// calculate a new soft to ensure soft != hard and hit that logic branch
let new_soft = hard - 1;
setrlimit(Resource::NOFILE, new_soft, hard).unwrap();
assert!(set_open_file_limit(higher_limit));
}
#[test]
/// set_open_file_limit should fail when hard == soft
fn utils_set_open_file_limit_with_fails_when_both_limits_are_equal() {
let (_, hard) = getrlimit(Resource::NOFILE).unwrap();
// calculate a new soft to ensure soft == hard and hit the failure logic branch
setrlimit(Resource::NOFILE, hard, hard).unwrap();
assert!(!set_open_file_limit(hard)); // returns false
}
}
#[test]
@@ -697,6 +1016,13 @@ mod tests {
/// provide a denier from which we can't check a host, which results in no comparison, expect false
/// because the denier is a parent to the tested, even tho the scanned doesn't compare, it
/// still returns true
///
/// note: adding parse_url_with_raw_path changed the behavior of this test, it used to return
/// true, now it returns false. see my note in should_deny_absolute and the unreachable!
/// call block to see why
///
/// leaving this test here to document the behavior change and to catch regressions in the
/// new expected behavior
fn should_deny_url_doesnt_compare_non_domains_in_scanned() {
let deny_url = "https://testdomain.com/";
let scan_url = "unix:/run/foo.socket";
@@ -710,8 +1036,7 @@ mod tests {
let config = Arc::new(config);
let handles = Arc::new(Handles::for_testing(Some(scans), Some(config)).0);
assert!(should_deny_url(&tested_url, handles).unwrap());
assert!(!should_deny_url(&tested_url, handles).unwrap());
}
#[test]

View File

@@ -164,7 +164,7 @@ fn test_static_wildcard_request_found() -> Result<(), Box<dyn std::error::Error>
let mock = srv.mock(|when, then| {
when.method(GET)
.path_matches(Regex::new("/[a-zA-Z0-9]{32}/").unwrap());
.path_matches(Regex::new("/[.a-zA-Z0-9]{32,}/").unwrap());
then.status(200).body("this is a test");
});
@@ -188,7 +188,8 @@ fn test_static_wildcard_request_found() -> Result<(), Box<dyn std::error::Error>
.and(predicate::str::contains("1l")),
);
assert_eq!(mock.hits(), 1);
assert_eq!(mock.hits(), 6);
Ok(())
}
@@ -305,11 +306,67 @@ fn heuristics_wildcard_test_with_two_static_wildcards_with_silent_enabled(
.success()
.stdout(predicate::str::contains(srv.url("/")));
assert_eq!(mock.hits(), 4);
assert_eq!(mock.hits(), 6);
assert_eq!(mock2.hits(), 1);
Ok(())
}
#[test]
/// test finds a 404-like response that returns a 403 and a 403 directory should still be allowed
/// to be tested for recrusion
fn heuristics_wildcard_test_that_auto_filtering_403s_still_allows_for_recursion_into_403_directories(
) -> Result<(), Box<dyn std::error::Error>> {
let srv = MockServer::start();
let super_long = String::from("92969beae6bf4beb855d1622406d87e395c87387a9ad432e8a11245002b709b03cf609d471004154b83bcc1c6ec49f6f09d471004154b83bcc1c6ec49f6f");
let (tmp_dir, file) =
setup_tmp_directory(&["LICENSE".to_string(), super_long.clone()], "wordlist")?;
srv.mock(|when, then| {
when.method(GET)
.path_matches(Regex::new("/.?[a-zA-Z0-9]{32,103}").unwrap());
then.status(403)
.body("this is a testAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA");
});
srv.mock(|when, then| {
when.method(GET).path("/LICENSE/");
then.status(403)
.body("this is a testAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA");
});
srv.mock(|when, then| {
when.method(GET).path(format!("/LICENSE/{}", super_long));
then.status(200);
});
let cmd = Command::cargo_bin("feroxbuster")
.unwrap()
.arg("--url")
.arg(srv.url("/"))
.arg("--wordlist")
.arg(file.as_os_str())
.arg("--add-slash")
.unwrap();
teardown_tmp_directory(tmp_dir);
cmd.assert().success().stdout(
predicate::str::contains("GET")
.and(predicate::str::contains(
"Auto-filtering found 404-like response and created new filter",
))
.and(predicate::str::contains("403"))
.and(predicate::str::contains("1l"))
.and(predicate::str::contains("4w"))
.and(predicate::str::contains("46c"))
.and(predicate::str::contains(srv.url("/LICENSE/LICENSE/"))),
);
Ok(())
}
// #[test]
// /// test finds a static wildcard and reports as much to stdout and a file
// fn heuristics_wildcard_test_with_two_static_wildcards_and_output_to_file() {