mirror of
https://github.com/epi052/feroxbuster.git
synced 2026-05-27 00:11:11 -03:00
Compare commits
30 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1cf37e38a2 | ||
|
|
9876759606 | ||
|
|
4150b61a42 | ||
|
|
16d34bbee0 | ||
|
|
f1fd2fc379 | ||
|
|
3dd070a0db | ||
|
|
a3dc6c97a0 | ||
|
|
ec78ec3049 | ||
|
|
960536e918 | ||
|
|
fdae9aa9d6 | ||
|
|
5c73c3fb23 | ||
|
|
02ef6d7e3f | ||
|
|
3378246820 | ||
|
|
692db93048 | ||
|
|
233cf99907 | ||
|
|
8cd9918b76 | ||
|
|
66bcbfc2f2 | ||
|
|
8b127c0093 | ||
|
|
94de58d855 | ||
|
|
2b95b7be69 | ||
|
|
e77c1314b1 | ||
|
|
1ced3b5d77 | ||
|
|
b5472f5341 | ||
|
|
ea81600850 | ||
|
|
4f679592b8 | ||
|
|
b375893461 | ||
|
|
e110f86f39 | ||
|
|
c7498a7695 | ||
|
|
f973baaba8 | ||
|
|
148982cdc4 |
@@ -562,6 +562,24 @@
|
||||
"contributions": [
|
||||
"bug"
|
||||
]
|
||||
},
|
||||
{
|
||||
"login": "acut3",
|
||||
"name": "Nicolas Christin",
|
||||
"avatar_url": "https://avatars.githubusercontent.com/u/17295243?v=4",
|
||||
"profile": "https://acut3.github.io/",
|
||||
"contributions": [
|
||||
"bug"
|
||||
]
|
||||
},
|
||||
{
|
||||
"login": "DrorDvash",
|
||||
"name": "DrDv",
|
||||
"avatar_url": "https://avatars.githubusercontent.com/u/8413651?v=4",
|
||||
"profile": "https://github.com/DrorDvash",
|
||||
"contributions": [
|
||||
"bug"
|
||||
]
|
||||
}
|
||||
],
|
||||
"contributorsPerLine": 7,
|
||||
|
||||
4
.github/workflows/build.yml
vendored
4
.github/workflows/build.yml
vendored
@@ -27,13 +27,13 @@ jobs:
|
||||
- type: armv7
|
||||
os: ubuntu-latest
|
||||
target: armv7-unknown-linux-gnueabihf
|
||||
name: armv7-feroxbuster
|
||||
name: armv7-linux-feroxbuster
|
||||
path: target/armv7-unknown-linux-gnueabihf/release/feroxbuster
|
||||
pkg_config_path: /usr/lib/x86_64-linux-gnu/pkgconfig
|
||||
- type: aarch64
|
||||
os: ubuntu-latest
|
||||
target: aarch64-unknown-linux-gnu
|
||||
name: aarch64-feroxbuster
|
||||
name: aarch64-linux-feroxbuster
|
||||
path: target/aarch64-unknown-linux-gnu/release/feroxbuster
|
||||
pkg_config_path: /usr/lib/x86_64-linux-gnu/pkgconfig
|
||||
steps:
|
||||
|
||||
590
Cargo.lock
generated
590
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
77
Cargo.toml
77
Cargo.toml
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "feroxbuster"
|
||||
version = "2.9.2"
|
||||
version = "2.9.5"
|
||||
authors = ["Ben 'epi' Risher (@epi052)"]
|
||||
license = "MIT"
|
||||
edition = "2021"
|
||||
@@ -22,47 +22,52 @@ build = "build.rs"
|
||||
maintenance = { status = "actively-developed" }
|
||||
|
||||
[build-dependencies]
|
||||
clap = { version = "4.1.8", features = ["wrap_help", "cargo"] }
|
||||
clap_complete = "4.1.4"
|
||||
regex = "1.5.5"
|
||||
lazy_static = "1.4.0"
|
||||
dirs = "4.0.0"
|
||||
clap = { version = "4.2", features = ["wrap_help", "cargo"] }
|
||||
clap_complete = "4.1"
|
||||
regex = "1.5"
|
||||
lazy_static = "1.4"
|
||||
dirs = "5.0"
|
||||
|
||||
[dependencies]
|
||||
scraper = "0.15.0"
|
||||
futures = "0.3.26"
|
||||
tokio = { version = "1.26.0", features = ["full"] }
|
||||
tokio-util = { version = "0.7.7", features = ["codec"] }
|
||||
log = "0.4.17"
|
||||
env_logger = "0.10.0"
|
||||
reqwest = { version = "0.11.10", features = ["socks"] }
|
||||
scraper = "0.16"
|
||||
futures = "0.3"
|
||||
tokio = { version = "1.26", features = ["full"] }
|
||||
tokio-util = { version = "0.7", features = ["codec"] }
|
||||
log = "0.4"
|
||||
env_logger = "0.10"
|
||||
reqwest = { version = "0.11", features = ["socks"] }
|
||||
# uses feature unification to add 'serde' to reqwest::Url
|
||||
url = { version = "2.2.2", features = ["serde"] }
|
||||
serde_regex = "1.1.0"
|
||||
clap = { version = "4.1.8", features = ["wrap_help", "cargo"] }
|
||||
lazy_static = "1.4.0"
|
||||
toml = "0.7.2"
|
||||
serde = { version = "1.0.137", features = ["derive", "rc"] }
|
||||
serde_json = "1.0.94"
|
||||
uuid = { version = "1.3.0", features = ["v4"] }
|
||||
indicatif = "0.15"
|
||||
console = "0.15.2"
|
||||
url = { version = "2.2", features = ["serde"] }
|
||||
serde_regex = "1.1"
|
||||
clap = { version = "4.2", features = ["wrap_help", "cargo"] }
|
||||
lazy_static = "1.4"
|
||||
toml = "0.7"
|
||||
serde = { version = "1.0", features = ["derive", "rc"] }
|
||||
serde_json = "1.0"
|
||||
uuid = { version = "1.3", features = ["v4"] }
|
||||
indicatif = "0.17"
|
||||
console = "0.15"
|
||||
openssl = { version = "0.10", features = ["vendored"] }
|
||||
dirs = "4.0.0"
|
||||
regex = "1.5.5"
|
||||
crossterm = "0.26.0"
|
||||
rlimit = "0.9.1"
|
||||
ctrlc = "3.2.2"
|
||||
anyhow = "1.0.69"
|
||||
leaky-bucket = "0.12.1"
|
||||
gaoya = "0.1.2"
|
||||
self_update = {version = "0.36.0", features = ["archive-tar", "compression-flate2", "archive-zip", "compression-zip-deflate"]}
|
||||
dirs = "5.0"
|
||||
regex = "1.5"
|
||||
crossterm = "0.26"
|
||||
rlimit = "0.9"
|
||||
ctrlc = "3.2"
|
||||
anyhow = "1.0"
|
||||
leaky-bucket = "0.12"
|
||||
gaoya = "0.1"
|
||||
self_update = { version = "0.36", features = [
|
||||
"archive-tar",
|
||||
"compression-flate2",
|
||||
"archive-zip",
|
||||
"compression-zip-deflate",
|
||||
] }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.3.0"
|
||||
httpmock = "0.6.6"
|
||||
assert_cmd = "2.0.4"
|
||||
predicates = "3.0.1"
|
||||
tempfile = "3.3"
|
||||
httpmock = "0.6"
|
||||
assert_cmd = "2.0"
|
||||
predicates = "3.0"
|
||||
|
||||
[profile.release]
|
||||
lto = true
|
||||
|
||||
@@ -11,7 +11,7 @@ rm ferox-*.state
|
||||
# dependency management
|
||||
[tasks.upgrade-deps]
|
||||
command = "cargo"
|
||||
args = ["upgrade", "--exclude", "indicatif"]
|
||||
args = ["upgrade"]
|
||||
|
||||
[tasks.update]
|
||||
command = "cargo"
|
||||
|
||||
10
README.md
10
README.md
@@ -97,8 +97,14 @@ sudo apt update && sudo apt install -y feroxbuster
|
||||
|
||||
#### Linux (32 and 64-bit) & MacOS
|
||||
|
||||
Install to a particular directory
|
||||
```
|
||||
curl -sL https://raw.githubusercontent.com/epi052/feroxbuster/master/install-nix.sh | bash
|
||||
curl -sL https://raw.githubusercontent.com/epi052/feroxbuster/main/install-nix.sh | bash -s $HOME/.local/bin
|
||||
```
|
||||
|
||||
Install to current working directory
|
||||
```
|
||||
curl -sL https://raw.githubusercontent.com/epi052/feroxbuster/main/install-nix.sh | bash
|
||||
```
|
||||
|
||||
#### MacOS via Homebrew
|
||||
@@ -278,6 +284,8 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d
|
||||
<td align="center" valign="top" width="14.28%"><a href="https://github.com/Luoooio"><img src="https://avatars.githubusercontent.com/u/26653157?v=4?s=100" width="100px;" alt="Luoooio"/><br /><sub><b>Luoooio</b></sub></a><br /><a href="#ideas-Luoooio" title="Ideas, Planning, & Feedback">🤔</a></td>
|
||||
<td align="center" valign="top" width="14.28%"><a href="https://petruknisme.com"><img src="https://avatars.githubusercontent.com/u/6284204?v=4?s=100" width="100px;" alt="Aan"/><br /><sub><b>Aan</b></sub></a><br /><a href="https://github.com/epi052/feroxbuster/commits?author=aancw" title="Code">💻</a> <a href="#infra-aancw" title="Infrastructure (Hosting, Build-Tools, etc)">🚇</a> <a href="#ideas-aancw" title="Ideas, Planning, & Feedback">🤔</a></td>
|
||||
<td align="center" valign="top" width="14.28%"><a href="https://github.com/imBigo"><img src="https://avatars.githubusercontent.com/u/54672433?v=4?s=100" width="100px;" alt="Simon"/><br /><sub><b>Simon</b></sub></a><br /><a href="https://github.com/epi052/feroxbuster/issues?q=author%3AimBigo" title="Bug reports">🐛</a></td>
|
||||
<td align="center" valign="top" width="14.28%"><a href="https://acut3.github.io/"><img src="https://avatars.githubusercontent.com/u/17295243?v=4?s=100" width="100px;" alt="Nicolas Christin"/><br /><sub><b>Nicolas Christin</b></sub></a><br /><a href="https://github.com/epi052/feroxbuster/issues?q=author%3Aacut3" title="Bug reports">🐛</a></td>
|
||||
<td align="center" valign="top" width="14.28%"><a href="https://github.com/DrorDvash"><img src="https://avatars.githubusercontent.com/u/8413651?v=4?s=100" width="100px;" alt="DrDv"/><br /><sub><b>DrDv</b></sub></a><br /><a href="https://github.com/epi052/feroxbuster/issues?q=author%3ADrorDvash" title="Bug reports">🐛</a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
@@ -13,13 +13,13 @@ LIN64_URL="$BASE_URL/$LIN64_ZIP"
|
||||
|
||||
EMOJI_URL=https://gist.github.com/epi052/8196b550ea51d0907ad4b93751b1b57d/raw/6112c9f32ae07922983fdc549c54fd3fb9a38e4c/NotoColorEmoji.ttf
|
||||
|
||||
echo "[+] Installing feroxbuster!"
|
||||
INSTALL_DIR="${1:-$(pwd)}"
|
||||
|
||||
echo "[+] Installing feroxbuster to ${INSTALL_DIR}!"
|
||||
|
||||
which unzip &>/dev/null
|
||||
if [ "$?" = "0" ]; then
|
||||
echo "[+] unzip found"
|
||||
else
|
||||
echo "[ ] unzip not found, exiting. "
|
||||
if [ "$?" != "0" ]; then
|
||||
echo "[!] unzip not found, exiting. "
|
||||
exit -1
|
||||
fi
|
||||
|
||||
@@ -27,20 +27,20 @@ if [[ "$(uname)" == "Darwin" ]]; then
|
||||
echo "[=] Found MacOS, downloading from $MAC_URL"
|
||||
|
||||
curl -sLO "$MAC_URL"
|
||||
unzip -o "$MAC_ZIP" >/dev/null
|
||||
unzip -o "$MAC_ZIP" -d "${INSTALL_DIR}" >/dev/null
|
||||
rm "$MAC_ZIP"
|
||||
elif [[ "$(expr substr $(uname -s) 1 5)" == "Linux" ]]; then
|
||||
if [[ $(getconf LONG_BIT) == 32 ]]; then
|
||||
echo "[=] Found 32-bit Linux, downloading from $LIN32_URL"
|
||||
|
||||
curl -sLO "$LIN32_URL"
|
||||
unzip -o "$LIN32_ZIP" >/dev/null
|
||||
unzip -o "$LIN32_ZIP" -d "${INSTALL_DIR}" >/dev/null
|
||||
rm "$LIN32_ZIP"
|
||||
else
|
||||
echo "[=] Found 64-bit Linux, downloading from $LIN64_URL"
|
||||
|
||||
curl -sLO "$LIN64_URL"
|
||||
unzip -o "$LIN64_ZIP" >/dev/null
|
||||
unzip -o "$LIN64_ZIP" -d "${INSTALL_DIR}" >/dev/null
|
||||
rm "$LIN64_ZIP"
|
||||
fi
|
||||
|
||||
@@ -60,6 +60,8 @@ elif [[ "$(expr substr $(uname -s) 1 5)" == "Linux" ]]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
chmod +x ./feroxbuster
|
||||
chmod +x "${INSTALL_DIR}/feroxbuster"
|
||||
|
||||
echo "[+] Installed feroxbuster version $(./feroxbuster -V)"
|
||||
echo "[+] Installed feroxbuster"
|
||||
echo " [-] path: ${INSTALL_DIR}/feroxbuster"
|
||||
echo " [-] version: $(${INSTALL_DIR}/feroxbuster -V | awk '{print $2}')"
|
||||
|
||||
@@ -24,8 +24,8 @@ _feroxbuster() {
|
||||
'--replay-proxy=[Send only unfiltered requests through a Replay Proxy, instead of all requests]:REPLAY_PROXY:_urls' \
|
||||
'*-R+[Status Codes to send through a Replay Proxy when found (default: --status-codes value)]:REPLAY_CODE: ' \
|
||||
'*--replay-codes=[Status Codes to send through a Replay Proxy when found (default: --status-codes value)]:REPLAY_CODE: ' \
|
||||
'-a+[Sets the User-Agent (default: feroxbuster/2.9.2)]:USER_AGENT: ' \
|
||||
'--user-agent=[Sets the User-Agent (default: feroxbuster/2.9.2)]:USER_AGENT: ' \
|
||||
'-a+[Sets the User-Agent (default: feroxbuster/2.9.5)]:USER_AGENT: ' \
|
||||
'--user-agent=[Sets the User-Agent (default: feroxbuster/2.9.5)]:USER_AGENT: ' \
|
||||
'*-x+[File extension(s) to search for (ex: -x php -x pdf js)]:FILE_EXTENSION: ' \
|
||||
'*--extensions=[File extension(s) to search for (ex: -x php -x pdf js)]:FILE_EXTENSION: ' \
|
||||
'*-m+[Which HTTP request method(s) should be sent (default: GET)]:HTTP_METHODS: ' \
|
||||
|
||||
@@ -30,8 +30,8 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock {
|
||||
[CompletionResult]::new('--replay-proxy', 'replay-proxy', [CompletionResultType]::ParameterName, 'Send only unfiltered requests through a Replay Proxy, instead of all requests')
|
||||
[CompletionResult]::new('-R', 'R', [CompletionResultType]::ParameterName, 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)')
|
||||
[CompletionResult]::new('--replay-codes', 'replay-codes', [CompletionResultType]::ParameterName, 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)')
|
||||
[CompletionResult]::new('-a', 'a', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.9.2)')
|
||||
[CompletionResult]::new('--user-agent', 'user-agent', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.9.2)')
|
||||
[CompletionResult]::new('-a', 'a', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.9.5)')
|
||||
[CompletionResult]::new('--user-agent', 'user-agent', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.9.5)')
|
||||
[CompletionResult]::new('-x', 'x', [CompletionResultType]::ParameterName, 'File extension(s) to search for (ex: -x php -x pdf js)')
|
||||
[CompletionResult]::new('--extensions', 'extensions', [CompletionResultType]::ParameterName, 'File extension(s) to search for (ex: -x php -x pdf js)')
|
||||
[CompletionResult]::new('-m', 'm', [CompletionResultType]::ParameterName, 'Which HTTP request method(s) should be sent (default: GET)')
|
||||
|
||||
@@ -27,8 +27,8 @@ set edit:completion:arg-completer[feroxbuster] = {|@words|
|
||||
cand --replay-proxy 'Send only unfiltered requests through a Replay Proxy, instead of all requests'
|
||||
cand -R 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)'
|
||||
cand --replay-codes 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)'
|
||||
cand -a 'Sets the User-Agent (default: feroxbuster/2.9.2)'
|
||||
cand --user-agent 'Sets the User-Agent (default: feroxbuster/2.9.2)'
|
||||
cand -a 'Sets the User-Agent (default: feroxbuster/2.9.5)'
|
||||
cand --user-agent 'Sets the User-Agent (default: feroxbuster/2.9.5)'
|
||||
cand -x 'File extension(s) to search for (ex: -x php -x pdf js)'
|
||||
cand --extensions 'File extension(s) to search for (ex: -x php -x pdf js)'
|
||||
cand -m 'Which HTTP request method(s) should be sent (default: GET)'
|
||||
|
||||
@@ -2,12 +2,11 @@ use super::entry::BannerEntry;
|
||||
use crate::{
|
||||
config::Configuration,
|
||||
event_handlers::Handles,
|
||||
utils::{logged_request, status_colorizer},
|
||||
utils::{logged_request, parse_url_with_raw_path, status_colorizer},
|
||||
DEFAULT_IGNORED_EXTENSIONS, DEFAULT_METHOD, DEFAULT_STATUS_CODES, VERSION,
|
||||
};
|
||||
use anyhow::{bail, Result};
|
||||
use console::{style, Emoji};
|
||||
use reqwest::Url;
|
||||
use serde_json::Value;
|
||||
use std::{io::Write, sync::Arc};
|
||||
|
||||
@@ -478,7 +477,7 @@ by Ben "epi" Risher {} ver: {}"#,
|
||||
pub async fn check_for_updates(&mut self, url: &str, handles: Arc<Handles>) -> Result<()> {
|
||||
log::trace!("enter: needs_update({}, {:?})", url, handles);
|
||||
|
||||
let api_url = Url::parse(url)?;
|
||||
let api_url = parse_url_with_raw_path(url)?;
|
||||
|
||||
let result = logged_request(&api_url, DEFAULT_METHOD, None, handles.clone()).await?;
|
||||
let body = result.text().await?;
|
||||
|
||||
@@ -6,7 +6,10 @@ use super::utils::{
|
||||
use crate::config::determine_output_level;
|
||||
use crate::config::utils::determine_requester_policy;
|
||||
use crate::{
|
||||
client, parser, scan_manager::resume_scan, traits::FeroxSerialize, utils::fmt_err,
|
||||
client, parser,
|
||||
scan_manager::resume_scan,
|
||||
traits::FeroxSerialize,
|
||||
utils::{fmt_err, parse_url_with_raw_path},
|
||||
DEFAULT_CONFIG_NAME,
|
||||
};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
@@ -673,7 +676,7 @@ impl Configuration {
|
||||
for denier in arg {
|
||||
// could be an absolute url or a regex, need to determine which and populate the
|
||||
// appropriate vector
|
||||
match Url::parse(denier.trim_end_matches('/')) {
|
||||
match parse_url_with_raw_path(denier.trim_end_matches('/')) {
|
||||
Ok(absolute) => {
|
||||
// denier is an absolute url and can be parsed as such
|
||||
config.url_denylist.push(absolute);
|
||||
|
||||
@@ -242,14 +242,6 @@ impl TermOutHandler {
|
||||
log::trace!("enter: process_response({:?}, {:?})", resp, call_type);
|
||||
|
||||
async move {
|
||||
let should_filter = self
|
||||
.handles
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.filters
|
||||
.data
|
||||
.should_filter_response(&resp, tx_stats.clone());
|
||||
|
||||
let contains_sentry = if !self.config.filter_status.is_empty() {
|
||||
// -C was used, meaning -s was not and we should ignore the defaults
|
||||
// https://github.com/epi052/feroxbuster/issues/535
|
||||
@@ -261,7 +253,7 @@ impl TermOutHandler {
|
||||
};
|
||||
|
||||
let unknown_sentry = !RESPONSES.contains(&resp); // !contains == unknown
|
||||
let should_process_response = contains_sentry && unknown_sentry && !should_filter;
|
||||
let should_process_response = contains_sentry && unknown_sentry;
|
||||
|
||||
if should_process_response {
|
||||
// print to stdout
|
||||
|
||||
@@ -16,7 +16,7 @@ use crate::{
|
||||
use super::command::Command::AddToUsizeField;
|
||||
use super::*;
|
||||
use crate::statistics::StatField;
|
||||
use reqwest::Url;
|
||||
use crate::utils::parse_url_with_raw_path;
|
||||
use tokio::time::Duration;
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -266,7 +266,7 @@ impl ScanHandler {
|
||||
let bar = scan.progress_bar();
|
||||
|
||||
// (4000 - 3000) / 2 => 500 words left to send
|
||||
let length = bar.length();
|
||||
let length = bar.length().unwrap_or(1);
|
||||
let num_words_left = (length - bar.position()) / divisor;
|
||||
|
||||
// accumulate each bar's increment value for incrementing the total bar
|
||||
@@ -325,7 +325,9 @@ impl ScanHandler {
|
||||
self.data.add_directory_scan(&target, order).1 // add the new target; return FeroxScan
|
||||
};
|
||||
|
||||
if should_test_deny && should_deny_url(&Url::parse(&target)?, self.handles.clone())? {
|
||||
if should_test_deny
|
||||
&& should_deny_url(&parse_url_with_raw_path(&target)?, self.handles.clone())?
|
||||
{
|
||||
// response was caught by a user-provided deny list
|
||||
// checking this last, since it's most susceptible to longer runtimes due to what
|
||||
// input is received
|
||||
|
||||
@@ -147,7 +147,7 @@ impl StatsHandler {
|
||||
self.stats.errors(),
|
||||
);
|
||||
|
||||
self.bar.set_message(&msg);
|
||||
self.bar.set_message(msg);
|
||||
|
||||
if self.bar.position() < self.stats.total_expected() as u64 {
|
||||
// don't run off the end when we're a few requests over the expected total
|
||||
|
||||
@@ -11,16 +11,60 @@ use crate::{
|
||||
StatField::{LinksExtracted, TotalExpected},
|
||||
},
|
||||
url::FeroxUrl,
|
||||
utils::{logged_request, make_request, send_try_recursion_command, should_deny_url},
|
||||
utils::{
|
||||
logged_request, make_request, parse_url_with_raw_path, send_try_recursion_command,
|
||||
should_deny_url,
|
||||
},
|
||||
ExtractionResult, DEFAULT_METHOD,
|
||||
};
|
||||
use anyhow::{bail, Context, Result};
|
||||
use reqwest::{Client, StatusCode, Url};
|
||||
use futures::StreamExt;
|
||||
use reqwest::{Client, Response, StatusCode, Url};
|
||||
use scraper::{Html, Selector};
|
||||
use std::{borrow::Cow, collections::HashSet};
|
||||
|
||||
/// Wrapper around link extraction logic
|
||||
/// - create a new Url object based on cli options/args
|
||||
/// - check if the new Url has already been seen/scanned -> None
|
||||
/// - make a request to the new Url ? -> Some(response) : None
|
||||
pub(super) async fn request_link(url: &str, handles: Arc<Handles>) -> Result<Response> {
|
||||
log::trace!("enter: request_link({})", url);
|
||||
|
||||
let ferox_url = FeroxUrl::from_string(url, handles.clone());
|
||||
|
||||
// create a url based on the given command line options
|
||||
let new_url = ferox_url.format("", None)?;
|
||||
|
||||
let scanned_urls = handles.ferox_scans()?;
|
||||
|
||||
if scanned_urls.get_scan_by_url(new_url.as_ref()).is_some() {
|
||||
//we've seen the url before and don't need to scan again
|
||||
log::trace!("exit: request_link -> None");
|
||||
bail!("previously seen url");
|
||||
}
|
||||
|
||||
if (!handles.config.url_denylist.is_empty() || !handles.config.regex_denylist.is_empty())
|
||||
&& should_deny_url(&new_url, handles.clone())?
|
||||
{
|
||||
// can't allow a denied url to be requested
|
||||
bail!(
|
||||
"prevented request to {} due to {:?} || {:?}",
|
||||
url,
|
||||
handles.config.url_denylist,
|
||||
handles.config.regex_denylist,
|
||||
);
|
||||
}
|
||||
|
||||
// make the request and store the response
|
||||
let new_response = logged_request(&new_url, DEFAULT_METHOD, None, handles.clone()).await?;
|
||||
|
||||
log::trace!("exit: request_link -> {:?}", new_response);
|
||||
|
||||
Ok(new_response)
|
||||
}
|
||||
|
||||
/// Whether an active scan is recursive or not
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
enum RecursionStatus {
|
||||
/// Scan is recursive
|
||||
Recursive,
|
||||
@@ -81,7 +125,7 @@ impl<'a> Extractor<'a> {
|
||||
) -> Result<()> {
|
||||
log::trace!("enter: parse_url_and_add_subpaths({:?})", links);
|
||||
|
||||
match Url::parse(url_to_parse) {
|
||||
match parse_url_with_raw_path(url_to_parse) {
|
||||
Ok(absolute) => {
|
||||
if absolute.domain() != original_url.domain()
|
||||
|| absolute.host() != original_url.host()
|
||||
@@ -121,91 +165,140 @@ impl<'a> Extractor<'a> {
|
||||
|
||||
/// given a set of links from a normal http body response, task the request handler to make
|
||||
/// the requests
|
||||
pub async fn request_links(&mut self, links: HashSet<String>) -> Result<()> {
|
||||
pub async fn request_links(
|
||||
&mut self,
|
||||
links: HashSet<String>,
|
||||
) -> Result<Option<tokio::task::JoinHandle<()>>> {
|
||||
log::trace!("enter: request_links({:?})", links);
|
||||
|
||||
if links.is_empty() {
|
||||
return Ok(());
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
self.update_stats(links.len())?;
|
||||
|
||||
// create clones/remove use of self of/from everything the async move block will need to function
|
||||
let cloned_scanned_urls = self.handles.ferox_scans()?;
|
||||
let cloned_handles = self.handles.clone();
|
||||
let cloned_url = self.url.clone();
|
||||
let threads = self.handles.config.threads;
|
||||
let recursive = if self.handles.config.no_recursion {
|
||||
RecursionStatus::NotRecursive
|
||||
} else {
|
||||
RecursionStatus::Recursive
|
||||
};
|
||||
|
||||
let scanned_urls = self.handles.ferox_scans()?;
|
||||
self.update_stats(links.len())?;
|
||||
let link_request_task = tokio::spawn(async move {
|
||||
let producers = futures::stream::iter(links.into_iter())
|
||||
.map(|link| {
|
||||
// another clone to satisfy the async move block
|
||||
let inner_clone = cloned_handles.clone();
|
||||
|
||||
for link in links {
|
||||
let mut resp = match self.request_link(&link).await {
|
||||
Ok(resp) => resp,
|
||||
Err(_) => continue,
|
||||
};
|
||||
(
|
||||
tokio::spawn(async move { request_link(&link, inner_clone).await }),
|
||||
cloned_handles.clone(),
|
||||
cloned_scanned_urls.clone(),
|
||||
recursive,
|
||||
cloned_url.clone(),
|
||||
)
|
||||
})
|
||||
.for_each_concurrent(
|
||||
threads,
|
||||
|(join_handle, c_handles, c_scanned_urls, c_recursive, og_url)| async move {
|
||||
match join_handle.await {
|
||||
Ok(Ok(reqwest_response)) => {
|
||||
let mut resp = FeroxResponse::from(
|
||||
reqwest_response,
|
||||
&og_url,
|
||||
DEFAULT_METHOD,
|
||||
c_handles.config.output_level,
|
||||
)
|
||||
.await;
|
||||
|
||||
// filter if necessary
|
||||
if self
|
||||
.handles
|
||||
.filters
|
||||
.data
|
||||
.should_filter_response(&resp, self.handles.stats.tx.clone())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
// filter if necessary
|
||||
if c_handles
|
||||
.filters
|
||||
.data
|
||||
.should_filter_response(&resp, c_handles.stats.tx.clone())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// request and report assumed file
|
||||
if resp.is_file() || !resp.is_directory() {
|
||||
log::debug!("Extracted File: {}", resp);
|
||||
// request and report assumed file
|
||||
if resp.is_file() || !resp.is_directory() {
|
||||
log::debug!("Extracted File: {}", resp);
|
||||
|
||||
scanned_urls.add_file_scan(resp.url().as_str(), ScanOrder::Latest);
|
||||
c_scanned_urls
|
||||
.add_file_scan(resp.url().as_str(), ScanOrder::Latest);
|
||||
|
||||
if self.handles.config.collect_extensions {
|
||||
resp.parse_extension(self.handles.clone())?;
|
||||
}
|
||||
if c_handles.config.collect_extensions {
|
||||
// no real reason this should fail
|
||||
resp.parse_extension(c_handles.clone()).unwrap();
|
||||
}
|
||||
|
||||
if let Err(e) = resp.send_report(self.handles.output.tx.clone()) {
|
||||
log::warn!("Could not send FeroxResponse to output handler: {}", e);
|
||||
}
|
||||
if let Err(e) = resp.send_report(c_handles.output.tx.clone()) {
|
||||
log::warn!(
|
||||
"Could not send FeroxResponse to output handler: {}",
|
||||
e
|
||||
);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if matches!(recursive, RecursionStatus::Recursive) {
|
||||
log::debug!("Extracted Directory: {}", resp);
|
||||
if matches!(c_recursive, RecursionStatus::Recursive) {
|
||||
log::debug!("Extracted Directory: {}", resp);
|
||||
|
||||
if !resp.url().as_str().ends_with('/')
|
||||
&& (resp.status().is_success()
|
||||
|| matches!(resp.status(), &StatusCode::FORBIDDEN))
|
||||
{
|
||||
// if the url doesn't end with a /
|
||||
// and the response code is either a 2xx or 403
|
||||
if !resp.url().as_str().ends_with('/')
|
||||
&& (resp.status().is_success()
|
||||
|| matches!(resp.status(), &StatusCode::FORBIDDEN))
|
||||
{
|
||||
// if the url doesn't end with a /
|
||||
// and the response code is either a 2xx or 403
|
||||
|
||||
// since all of these are 2xx or 403, recursion is only attempted if the
|
||||
// url ends in a /. I am actually ok with adding the slash and not
|
||||
// adding it, as both have merit. Leaving it in for now to see how
|
||||
// things turn out (current as of: v1.1.0)
|
||||
resp.set_url(&format!("{}/", resp.url()));
|
||||
}
|
||||
// since all of these are 2xx or 403, recursion is only attempted if the
|
||||
// url ends in a /. I am actually ok with adding the slash and not
|
||||
// adding it, as both have merit. Leaving it in for now to see how
|
||||
// things turn out (current as of: v1.1.0)
|
||||
resp.set_url(&format!("{}/", resp.url()));
|
||||
}
|
||||
|
||||
if c_handles.config.filter_status.is_empty() {
|
||||
// -C wasn't used, so -s is the only 'filter' left to account for
|
||||
if c_handles
|
||||
.config
|
||||
.status_codes
|
||||
.contains(&resp.status().as_u16())
|
||||
{
|
||||
send_try_recursion_command(c_handles.clone(), resp)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
}
|
||||
} else {
|
||||
// -C was used, that means the filters above would have removed
|
||||
// those responses, and anything else should be let through
|
||||
send_try_recursion_command(c_handles.clone(), resp)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Err(err)) => {
|
||||
log::warn!("Error during link extraction: {}", err);
|
||||
}
|
||||
Err(err) => {
|
||||
log::warn!("JoinError during link extraction: {}", err);
|
||||
}
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
// wait for the requests to finish
|
||||
producers.await;
|
||||
});
|
||||
|
||||
if self.handles.config.filter_status.is_empty() {
|
||||
// -C wasn't used, so -s is the only 'filter' left to account for
|
||||
if self
|
||||
.handles
|
||||
.config
|
||||
.status_codes
|
||||
.contains(&resp.status().as_u16())
|
||||
{
|
||||
send_try_recursion_command(self.handles.clone(), resp).await?;
|
||||
}
|
||||
} else {
|
||||
// -C was used, that means the filters above would have removed
|
||||
// those responses, and anything else should be let through
|
||||
send_try_recursion_command(self.handles.clone(), resp).await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
log::trace!("exit: request_links");
|
||||
Ok(())
|
||||
Ok(Some(link_request_task))
|
||||
}
|
||||
|
||||
/// wrapper around link extraction via html attributes
|
||||
@@ -385,7 +478,7 @@ impl<'a> Extractor<'a> {
|
||||
ExtractionTarget::ResponseBody | ExtractionTarget::DirectoryListing => {
|
||||
self.response.unwrap().url().clone()
|
||||
}
|
||||
ExtractionTarget::RobotsTxt => match Url::parse(&self.url) {
|
||||
ExtractionTarget::RobotsTxt => match parse_url_with_raw_path(&self.url) {
|
||||
Ok(u) => u,
|
||||
Err(e) => {
|
||||
bail!("Could not parse {}: {}", self.url, e);
|
||||
@@ -415,56 +508,6 @@ impl<'a> Extractor<'a> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Wrapper around link extraction logic
|
||||
/// - create a new Url object based on cli options/args
|
||||
/// - check if the new Url has already been seen/scanned -> None
|
||||
/// - make a request to the new Url ? -> Some(response) : None
|
||||
pub(super) async fn request_link(&self, url: &str) -> Result<FeroxResponse> {
|
||||
log::trace!("enter: request_link({})", url);
|
||||
|
||||
let ferox_url = FeroxUrl::from_string(url, self.handles.clone());
|
||||
|
||||
// create a url based on the given command line options
|
||||
let new_url = ferox_url.format("", None)?;
|
||||
|
||||
let scanned_urls = self.handles.ferox_scans()?;
|
||||
|
||||
if scanned_urls.get_scan_by_url(new_url.as_ref()).is_some() {
|
||||
//we've seen the url before and don't need to scan again
|
||||
log::trace!("exit: request_link -> None");
|
||||
bail!("previously seen url");
|
||||
}
|
||||
|
||||
if (!self.handles.config.url_denylist.is_empty()
|
||||
|| !self.handles.config.regex_denylist.is_empty())
|
||||
&& should_deny_url(&new_url, self.handles.clone())?
|
||||
{
|
||||
// can't allow a denied url to be requested
|
||||
bail!(
|
||||
"prevented request to {} due to {:?} || {:?}",
|
||||
url,
|
||||
self.handles.config.url_denylist,
|
||||
self.handles.config.regex_denylist,
|
||||
);
|
||||
}
|
||||
|
||||
// make the request and store the response
|
||||
let new_response =
|
||||
logged_request(&new_url, DEFAULT_METHOD, None, self.handles.clone()).await?;
|
||||
|
||||
let new_ferox_response = FeroxResponse::from(
|
||||
new_response,
|
||||
url,
|
||||
DEFAULT_METHOD,
|
||||
self.handles.config.output_level,
|
||||
)
|
||||
.await;
|
||||
|
||||
log::trace!("exit: request_link -> {:?}", new_ferox_response);
|
||||
|
||||
Ok(new_ferox_response)
|
||||
}
|
||||
|
||||
/// Entry point to perform link extraction from robots.txt
|
||||
///
|
||||
/// `base_url` can have paths and subpaths, however robots.txt will be requested from the
|
||||
@@ -484,7 +527,7 @@ impl<'a> Extractor<'a> {
|
||||
|
||||
for capture in self.robots_regex.captures_iter(body) {
|
||||
if let Some(new_path) = capture.name("url_path") {
|
||||
let mut new_url = Url::parse(&self.url)?;
|
||||
let mut new_url = parse_url_with_raw_path(&self.url)?;
|
||||
|
||||
new_url.set_path(new_path.as_str());
|
||||
|
||||
@@ -614,7 +657,7 @@ impl<'a> Extractor<'a> {
|
||||
&client
|
||||
};
|
||||
|
||||
let mut url = Url::parse(&self.url)?;
|
||||
let mut url = parse_url_with_raw_path(&self.url)?;
|
||||
url.set_path(location); // overwrite existing path
|
||||
|
||||
// purposefully not using logged_request here due to using the special client
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use super::builder::{LINKFINDER_REGEX, ROBOTS_TXT_REGEX, URL_CHARS_REGEX};
|
||||
use super::container::request_link;
|
||||
use super::*;
|
||||
use crate::config::{Configuration, OutputLevel};
|
||||
use crate::scan_manager::ScanOrder;
|
||||
@@ -360,13 +361,13 @@ async fn request_link_happy_path() -> Result<()> {
|
||||
then.status(200).body("this is a test");
|
||||
});
|
||||
|
||||
let r_resp = ROBOTS_EXT.request_link(&srv.url("/login.php")).await?;
|
||||
let b_resp = BODY_EXT.request_link(&srv.url("/login.php")).await?;
|
||||
let r_resp = request_link(&srv.url("/login.php"), ROBOTS_EXT.handles.clone()).await?;
|
||||
let b_resp = request_link(&srv.url("/login.php"), BODY_EXT.handles.clone()).await?;
|
||||
|
||||
assert!(matches!(r_resp.status(), &StatusCode::OK));
|
||||
assert!(matches!(b_resp.status(), &StatusCode::OK));
|
||||
assert_eq!(r_resp.content_length(), 14);
|
||||
assert_eq!(b_resp.content_length(), 14);
|
||||
assert!(matches!(r_resp.status(), StatusCode::OK));
|
||||
assert!(matches!(b_resp.status(), StatusCode::OK));
|
||||
assert_eq!(r_resp.content_length().unwrap(), 14);
|
||||
assert_eq!(b_resp.content_length().unwrap(), 14);
|
||||
assert_eq!(mock.hits(), 2);
|
||||
Ok(())
|
||||
}
|
||||
@@ -390,8 +391,8 @@ async fn request_link_bails_on_seen_url() -> Result<()> {
|
||||
let robots = setup_extractor(ExtractionTarget::RobotsTxt, scans.clone());
|
||||
let body = setup_extractor(ExtractionTarget::ResponseBody, scans);
|
||||
|
||||
let r_resp = robots.request_link(&served).await;
|
||||
let b_resp = body.request_link(&served).await;
|
||||
let r_resp = request_link(&served, robots.handles.clone()).await;
|
||||
let b_resp = request_link(&served, body.handles.clone()).await;
|
||||
|
||||
assert!(r_resp.is_err());
|
||||
assert!(b_resp.is_err());
|
||||
|
||||
@@ -4,11 +4,10 @@ use crate::event_handlers::Handles;
|
||||
use crate::filters::similarity::SIM_HASHER;
|
||||
use crate::nlp::preprocess;
|
||||
use crate::response::FeroxResponse;
|
||||
use crate::utils::logged_request;
|
||||
use crate::utils::{logged_request, parse_url_with_raw_path};
|
||||
use crate::DEFAULT_METHOD;
|
||||
use anyhow::Result;
|
||||
use regex::Regex;
|
||||
use reqwest::Url;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// wrapper around logic necessary to create a SimilarityFilter
|
||||
@@ -23,7 +22,7 @@ pub(crate) async fn create_similarity_filter(
|
||||
handles: Arc<Handles>,
|
||||
) -> Result<SimilarityFilter> {
|
||||
// url as-is based on input, ignores user-specified url manipulation options (add-slash etc)
|
||||
let url = Url::parse(similarity_filter)?;
|
||||
let url = parse_url_with_raw_path(similarity_filter)?;
|
||||
|
||||
// attempt to request the given url
|
||||
let resp = logged_request(&url, DEFAULT_METHOD, None, handles.clone()).await?;
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use futures::future;
|
||||
use scraper::{Html, Selector};
|
||||
use uuid::Uuid;
|
||||
|
||||
@@ -276,138 +278,185 @@ impl HeuristicTests {
|
||||
None
|
||||
};
|
||||
|
||||
// 4 is due to the array in the nested for loop below
|
||||
let mut responses = Vec::with_capacity(4);
|
||||
// no matter what, we want an empty extension for the base case
|
||||
let mut extensions = vec!["".to_string()];
|
||||
|
||||
// and then we want to add any extensions that was specified
|
||||
// or has since been added to the running config
|
||||
for ext in &self.handles.config.extensions {
|
||||
extensions.push(format!(".{}", ext));
|
||||
}
|
||||
|
||||
// for every method, attempt to id its 404 response
|
||||
//
|
||||
// a good example of one where the GET/POST differ is on hackthebox:
|
||||
// - http://prd.m.rendering-api.interface.htb/api
|
||||
//
|
||||
// a good example of one where the heuristics return a 403 and a 404 (apache)
|
||||
// as well as return two different types of 404s based on the file extension
|
||||
// - http://10.10.11.198 (Encoding box in normal labs)
|
||||
//
|
||||
// both methods and extensions can elicit different responses from a given
|
||||
// server, so both are considered when building auto-filter rules
|
||||
for method in self.handles.config.methods.iter() {
|
||||
for (prefix, length) in [("", 1), ("", 3), (".htaccess", 1), ("admin", 1)] {
|
||||
let path = format!("{prefix}{}", self.unique_string(length));
|
||||
for extension in extensions.iter() {
|
||||
// build out the 6 paths we'll use
|
||||
let paths = [
|
||||
("", 1),
|
||||
("", 3),
|
||||
(".htaccess", 1),
|
||||
(".htaccess", 3),
|
||||
("admin", 1),
|
||||
("admin", 3),
|
||||
]
|
||||
.map(|(prefix, length)| {
|
||||
format!("{prefix}{}{extension}", self.unique_string(length))
|
||||
});
|
||||
|
||||
let ferox_url = FeroxUrl::from_string(target_url, self.handles.clone());
|
||||
// allow all 6 requests to fly asynchronously
|
||||
let responses = future::join_all(paths.into_iter().map(|path| async move {
|
||||
let ferox_url = FeroxUrl::from_string(target_url, self.handles.clone());
|
||||
|
||||
let nonexistent_url = ferox_url.format(&path, slash)?;
|
||||
let Ok(nonexistent_url) = ferox_url.format(&path, slash) else {
|
||||
return None;
|
||||
};
|
||||
|
||||
// example requests:
|
||||
// - http://localhost/2fc1077836ad43ab98b7a31c2ca28fea
|
||||
// - http://localhost/92969beae6bf4beb855d1622406d87e395c87387a9ad432e8a11245002b709b03cf609d471004154b83bcc1c6ec49f6f
|
||||
// - http://localhost/.htaccessa005a2131e68449aa26e99029c914c09
|
||||
// - http://localhost/adminf1d2541e73c44dcb9d1fb7d93334b280
|
||||
let response =
|
||||
logged_request(&nonexistent_url, method, data, self.handles.clone()).await;
|
||||
// example requests:
|
||||
// - http://localhost/2fc1077836ad43ab98b7a31c2ca28fea
|
||||
// - http://localhost/92969beae6bf4beb855d1622406d87e395c87387a9ad432e8a11245002b709b03cf609d471004154b83bcc1c6ec49f6f
|
||||
// - http://localhost/.htaccessa005a2131e68449aa26e99029c914c09
|
||||
// - http://localhost/.htaccess92969beae6bf4beb855d1622406d87e395c87387a9ad432e8a11245002b709b03cf609d471004154b83bcc1c6ec49f6f
|
||||
// - http://localhost/adminf1d2541e73c44dcb9d1fb7d93334b280
|
||||
// - http://localhost/admin92969beae6bf4beb855d1622406d87e395c87387a9ad432e8a11245002b709b03cf609d471004154b83bcc1c6ec49f6f
|
||||
let Ok(response) =
|
||||
logged_request(&nonexistent_url, method, data, self.handles.clone())
|
||||
.await else {
|
||||
return None;
|
||||
};
|
||||
|
||||
req_counter += 1;
|
||||
if !self
|
||||
.handles
|
||||
.config
|
||||
.status_codes
|
||||
.contains(&response.status().as_u16())
|
||||
{
|
||||
// if the response code isn't one that's accepted via -s values, then skip to the next
|
||||
//
|
||||
// the default value for -s is all status codes, so unless the user says otherwise
|
||||
// this won't fire
|
||||
return None;
|
||||
}
|
||||
|
||||
// continue to next on error
|
||||
let response = skip_fail!(response);
|
||||
Some(
|
||||
FeroxResponse::from(
|
||||
response,
|
||||
&ferox_url.target,
|
||||
method,
|
||||
self.handles.config.output_level,
|
||||
)
|
||||
.await,
|
||||
)
|
||||
}))
|
||||
.await // await gives vector of options containing feroxresponses
|
||||
.into_iter()
|
||||
.flatten() // strip out the none values
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if !self
|
||||
.handles
|
||||
.config
|
||||
.status_codes
|
||||
.contains(&response.status().as_u16())
|
||||
{
|
||||
// if the response code isn't one that's accepted via -s values, then skip to the next
|
||||
//
|
||||
// the default value for -s is all status codes, so unless the user says otherwise
|
||||
// this won't fire
|
||||
if responses.len() < 2 {
|
||||
// don't have enough responses to make a determination, continue to next method
|
||||
log::debug!("not enough responses to make a determination");
|
||||
continue;
|
||||
}
|
||||
|
||||
let ferox_response = FeroxResponse::from(
|
||||
response,
|
||||
&ferox_url.target,
|
||||
method,
|
||||
// check the responses for similarities on which we can filter, multiple may be returned
|
||||
let Some((wildcard_filters, wildcard_responses)) = self.examine_404_like_responses(&responses) else {
|
||||
// no match was found during analysis of responses
|
||||
log::warn!("no match found for 404 responses");
|
||||
continue;
|
||||
};
|
||||
|
||||
// report to the user, if appropriate
|
||||
if matches!(
|
||||
self.handles.config.output_level,
|
||||
)
|
||||
.await;
|
||||
OutputLevel::Default | OutputLevel::Quiet
|
||||
) {
|
||||
// sentry value to control whether or not to print the filter
|
||||
// used because we only want to print the same filter once
|
||||
let mut print_sentry;
|
||||
|
||||
responses.push(ferox_response);
|
||||
}
|
||||
if let Ok(filters) = self.handles.filters.data.filters.read() {
|
||||
for new_wildcard in &wildcard_filters {
|
||||
// reset the sentry for every new wildcard produced by examine_404_like_responses
|
||||
print_sentry = true;
|
||||
|
||||
if responses.len() < 2 {
|
||||
// don't have enough responses to make a determination, continue to next method
|
||||
responses.clear();
|
||||
continue;
|
||||
}
|
||||
for other in filters.iter() {
|
||||
if let Some(other_wildcard) =
|
||||
other.as_any().downcast_ref::<WildcardFilter>()
|
||||
{
|
||||
// check the new wildcard against all existing wildcards, if it was added
|
||||
// on the cli or by a previous directory, don't print it
|
||||
if new_wildcard.as_ref() == other_wildcard {
|
||||
print_sentry = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Command::AddFilter, &str (bytes/words/lines), usize (i.e. length associated with the type)
|
||||
let Some(filter) = self.examine_404_like_responses(&responses) else {
|
||||
// no match was found during analysis of responses
|
||||
responses.clear();
|
||||
continue;
|
||||
};
|
||||
|
||||
// report to the user, if appropriate
|
||||
if matches!(
|
||||
self.handles.config.output_level,
|
||||
OutputLevel::Default | OutputLevel::Quiet
|
||||
) {
|
||||
// sentry value to control whether or not to print the filter
|
||||
// used because we only want to print the same filter once
|
||||
let mut print_sentry = true;
|
||||
|
||||
if let Ok(filters) = self.handles.filters.data.filters.read() {
|
||||
for other in filters.iter() {
|
||||
if let Some(other_wildcard) =
|
||||
other.as_any().downcast_ref::<WildcardFilter>()
|
||||
{
|
||||
if &*filter == other_wildcard {
|
||||
print_sentry = false;
|
||||
break;
|
||||
// if we're here, we've found a new wildcard that we didn't previously display, print it
|
||||
if print_sentry {
|
||||
ferox_print(&format!("{}", new_wildcard), &PROGRESS_PRINTER);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if print_sentry {
|
||||
ferox_print(&format!("{}", filter), &PROGRESS_PRINTER);
|
||||
// create the new filter
|
||||
for wildcard in wildcard_filters {
|
||||
self.handles.filters.send(Command::AddFilter(wildcard))?;
|
||||
}
|
||||
|
||||
// if we're here, we've detected a 404-like response pattern, and we're already filtering for size/word/line
|
||||
//
|
||||
// in addition, we'll create a similarity filter as a fallback
|
||||
for resp in wildcard_responses {
|
||||
let hash = SIM_HASHER.create_signature(preprocess(resp.text()).iter());
|
||||
|
||||
let sim_filter = SimilarityFilter {
|
||||
hash,
|
||||
original_url: resp.url().to_string(),
|
||||
};
|
||||
|
||||
self.handles
|
||||
.filters
|
||||
.send(Command::AddFilter(Box::new(sim_filter)))?;
|
||||
|
||||
if resp.is_directory() {
|
||||
// response is either a 3XX with a Location header that matches url + '/'
|
||||
// or it's a 2XX that ends with a '/'
|
||||
// or it's a 403 that ends with a '/'
|
||||
|
||||
// set the wildcard flag to true, so we can check it when preventing
|
||||
// recursion in event_handlers/scans.rs
|
||||
|
||||
// we'd need to clone the response to give ownership to the global list anyway
|
||||
// so we'll also use that clone to set the wildcard flag
|
||||
let mut cloned_resp = resp.clone();
|
||||
|
||||
cloned_resp.set_wildcard(true);
|
||||
|
||||
// add the response to the global list of responses
|
||||
RESPONSES.insert(cloned_resp);
|
||||
|
||||
// function-internal magic number, indicates that we've detected a wildcard directory
|
||||
req_counter += 100;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// create the new filter
|
||||
self.handles.filters.send(Command::AddFilter(filter))?;
|
||||
|
||||
// if we're here, we've detected a 404-like response pattern, and we're already filtering for size/word/line
|
||||
//
|
||||
// in addition, we'll create a similarity filter as a fallback
|
||||
let hash = SIM_HASHER.create_signature(preprocess(responses[0].text()).iter());
|
||||
|
||||
let sim_filter = SimilarityFilter {
|
||||
hash,
|
||||
original_url: responses[0].url().to_string(),
|
||||
};
|
||||
|
||||
self.handles
|
||||
.filters
|
||||
.send(Command::AddFilter(Box::new(sim_filter)))?;
|
||||
|
||||
if responses[0].is_directory() {
|
||||
// response is either a 3XX with a Location header that matches url + '/'
|
||||
// or it's a 2XX that ends with a '/'
|
||||
// or it's a 403 that ends with a '/'
|
||||
|
||||
// set the wildcard flag to true, so we can check it when preventing
|
||||
// recursion in event_handlers/scans.rs
|
||||
responses[0].set_wildcard(true);
|
||||
|
||||
// add the response to the global list of responses
|
||||
RESPONSES.insert(responses[0].clone());
|
||||
|
||||
// function-internal magic number, indicates that we've detected a wildcard directory
|
||||
req_counter += 100;
|
||||
}
|
||||
|
||||
// reset the responses for the next method, if it exists
|
||||
responses.clear();
|
||||
}
|
||||
|
||||
log::trace!("exit: detect_404_like_responses");
|
||||
|
||||
let retval = if req_counter > 100 {
|
||||
let retval = if req_counter >= 100 {
|
||||
WildcardResult::WildcardDirectory(req_counter)
|
||||
} else {
|
||||
WildcardResult::FourOhFourLike(req_counter)
|
||||
@@ -416,96 +465,138 @@ impl HeuristicTests {
|
||||
Ok(Some(retval))
|
||||
}
|
||||
|
||||
/// for all responses, examine chars/words/lines
|
||||
/// if all responses respective lengths match each other, we can assume
|
||||
/// that will remain true for subsequent non-existent urls
|
||||
/// for all responses, group them by status code, then examine chars/words/lines.
|
||||
/// if all responses' respective lengths within a status code grouping match
|
||||
/// each other, we can assume that will remain true for subsequent non-existent urls
|
||||
///
|
||||
/// values are examined from most to least specific (content length, word count, line count)
|
||||
fn examine_404_like_responses(
|
||||
/// within a status code grouping, values are examined from most to
|
||||
/// least specific (content length, word count, line count)
|
||||
#[allow(clippy::vec_box)] // the box is needed in the caller and i dont feel like changing it
|
||||
fn examine_404_like_responses<'a>(
|
||||
&self,
|
||||
responses: &[FeroxResponse],
|
||||
) -> Option<Box<WildcardFilter>> {
|
||||
responses: &'a [FeroxResponse],
|
||||
) -> Option<(Vec<Box<WildcardFilter>>, Vec<&'a FeroxResponse>)> {
|
||||
// aside from word/line/byte counts, additional discriminators are status code
|
||||
// extension, and request method. The request method and extension are handled by
|
||||
// the caller, since they're part of the request and make up the nested for loops
|
||||
// in detect_404_like_responses.
|
||||
//
|
||||
// The status code is handled here, since it's part of the response to catch cases
|
||||
// where we have something like a 403 and a 404
|
||||
|
||||
let mut size_sentry = true;
|
||||
let mut word_sentry = true;
|
||||
let mut line_sentry = true;
|
||||
|
||||
let method = responses[0].method();
|
||||
let status_code = responses[0].status();
|
||||
let content_length = responses[0].content_length();
|
||||
let word_count = responses[0].word_count();
|
||||
let line_count = responses[0].line_count();
|
||||
// returned vec of boxed wildcard filters
|
||||
let mut wildcards = Vec::new();
|
||||
|
||||
for response in &responses[1..] {
|
||||
// if any of the responses differ in length, that particular
|
||||
// response length type is no longer a candidate for filtering
|
||||
if response.content_length() != content_length {
|
||||
size_sentry = false;
|
||||
}
|
||||
// returned vec of ferox responses that are needed for additional
|
||||
// analysis
|
||||
let mut wild_responses = Vec::new();
|
||||
|
||||
if response.word_count() != word_count {
|
||||
word_sentry = false;
|
||||
}
|
||||
// mapping of grouped responses to status code
|
||||
let mut grouped_responses = HashMap::new();
|
||||
|
||||
if response.line_count() != line_count {
|
||||
line_sentry = false;
|
||||
}
|
||||
// iterate over all responses and add each response to its
|
||||
// corresponding status code group
|
||||
for response in responses {
|
||||
grouped_responses
|
||||
.entry(response.status())
|
||||
.or_insert_with(Vec::new)
|
||||
.push(response);
|
||||
}
|
||||
|
||||
if !size_sentry && !word_sentry && !line_sentry {
|
||||
// none of the response lengths match, so we can't filter on any of them
|
||||
return None;
|
||||
// iterate over each grouped response and determine the most specific
|
||||
// filter that can be applied to all responses in the group, i.e.
|
||||
// start from byte count and work 'out' to line count
|
||||
for response_group in grouped_responses.values() {
|
||||
if response_group.len() < 2 {
|
||||
// not enough responses to make a determination
|
||||
continue;
|
||||
}
|
||||
|
||||
let method = response_group[0].method();
|
||||
let status_code = response_group[0].status();
|
||||
let content_length = response_group[0].content_length();
|
||||
let word_count = response_group[0].word_count();
|
||||
let line_count = response_group[0].line_count();
|
||||
|
||||
for response in &response_group[1..] {
|
||||
// if any of the responses differ in length, that particular
|
||||
// response length type is no longer a candidate for filtering
|
||||
if response.content_length() != content_length {
|
||||
size_sentry = false;
|
||||
}
|
||||
|
||||
if response.word_count() != word_count {
|
||||
word_sentry = false;
|
||||
}
|
||||
|
||||
if response.line_count() != line_count {
|
||||
line_sentry = false;
|
||||
}
|
||||
}
|
||||
|
||||
if !size_sentry && !word_sentry && !line_sentry {
|
||||
// none of the response lengths match, so we can't filter on any of them
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut wildcard = WildcardFilter {
|
||||
content_length: None,
|
||||
line_count: None,
|
||||
word_count: None,
|
||||
method: method.to_string(),
|
||||
status_code: status_code.as_u16(),
|
||||
dont_filter: self.handles.config.dont_filter,
|
||||
};
|
||||
|
||||
match (size_sentry, word_sentry, line_sentry) {
|
||||
(true, true, true) => {
|
||||
// all three types of length match, so we can't filter on any of them
|
||||
wildcard.content_length = Some(content_length);
|
||||
wildcard.word_count = Some(word_count);
|
||||
wildcard.line_count = Some(line_count);
|
||||
}
|
||||
(true, true, false) => {
|
||||
// content length and word count match, so we can filter on either
|
||||
wildcard.content_length = Some(content_length);
|
||||
wildcard.word_count = Some(word_count);
|
||||
}
|
||||
(true, false, true) => {
|
||||
// content length and line count match, so we can filter on either
|
||||
wildcard.content_length = Some(content_length);
|
||||
wildcard.line_count = Some(line_count);
|
||||
}
|
||||
(false, true, true) => {
|
||||
// word count and line count match, so we can filter on either
|
||||
wildcard.word_count = Some(word_count);
|
||||
wildcard.line_count = Some(line_count);
|
||||
}
|
||||
(true, false, false) => {
|
||||
// content length matches, so we can filter on that
|
||||
wildcard.content_length = Some(content_length);
|
||||
}
|
||||
(false, true, false) => {
|
||||
// word count matches, so we can filter on that
|
||||
wildcard.word_count = Some(word_count);
|
||||
}
|
||||
(false, false, true) => {
|
||||
// line count matches, so we can filter on that
|
||||
wildcard.line_count = Some(line_count);
|
||||
}
|
||||
(false, false, false) => {
|
||||
// none of the length types match, so we can't filter on any of them
|
||||
unreachable!("no wildcard size matches; handled by the if statement above");
|
||||
}
|
||||
};
|
||||
|
||||
wild_responses.push(response_group[0]);
|
||||
wildcards.push(Box::new(wildcard));
|
||||
}
|
||||
|
||||
let mut wildcard = WildcardFilter {
|
||||
content_length: None,
|
||||
line_count: None,
|
||||
word_count: None,
|
||||
method: method.to_string(),
|
||||
status_code: status_code.as_u16(),
|
||||
dont_filter: self.handles.config.dont_filter,
|
||||
};
|
||||
|
||||
match (size_sentry, word_sentry, line_sentry) {
|
||||
(true, true, true) => {
|
||||
// all three types of length match, so we can't filter on any of them
|
||||
wildcard.content_length = Some(content_length);
|
||||
wildcard.word_count = Some(word_count);
|
||||
wildcard.line_count = Some(line_count);
|
||||
}
|
||||
(true, true, false) => {
|
||||
// content length and word count match, so we can filter on either
|
||||
wildcard.content_length = Some(content_length);
|
||||
wildcard.word_count = Some(word_count);
|
||||
}
|
||||
(true, false, true) => {
|
||||
// content length and line count match, so we can filter on either
|
||||
wildcard.content_length = Some(content_length);
|
||||
wildcard.line_count = Some(line_count);
|
||||
}
|
||||
(false, true, true) => {
|
||||
// word count and line count match, so we can filter on either
|
||||
wildcard.word_count = Some(word_count);
|
||||
wildcard.line_count = Some(line_count);
|
||||
}
|
||||
(true, false, false) => {
|
||||
// content length matches, so we can filter on that
|
||||
wildcard.content_length = Some(content_length);
|
||||
}
|
||||
(false, true, false) => {
|
||||
// word count matches, so we can filter on that
|
||||
wildcard.word_count = Some(word_count);
|
||||
}
|
||||
(false, false, true) => {
|
||||
// line count matches, so we can filter on that
|
||||
wildcard.line_count = Some(line_count);
|
||||
}
|
||||
(false, false, false) => {
|
||||
// none of the length types match, so we can't filter on any of them
|
||||
unreachable!("no wildcard size matches; handled by the if statement above");
|
||||
}
|
||||
};
|
||||
|
||||
Some(Box::new(wildcard))
|
||||
Some((wildcards, wild_responses))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ use feroxbuster::{
|
||||
TermOutHandler, SCAN_COMPLETE,
|
||||
},
|
||||
filters, heuristics, logger,
|
||||
progress::{PROGRESS_BAR, PROGRESS_PRINTER},
|
||||
progress::PROGRESS_PRINTER,
|
||||
scan_manager::{self, ScanType},
|
||||
scanner,
|
||||
utils::{fmt_err, slugify_filename},
|
||||
@@ -220,7 +220,6 @@ async fn wrapped_main(config: Arc<Configuration>) -> Result<()> {
|
||||
// PROGRESS_PRINTER and PROGRESS_BAR have been used at least once. This call satisfies
|
||||
// that constraint
|
||||
PROGRESS_PRINTER.println("");
|
||||
PROGRESS_BAR.join().unwrap();
|
||||
});
|
||||
|
||||
// check if update_app is true
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
use indicatif::{MultiProgress, ProgressBar, ProgressDrawTarget, ProgressStyle};
|
||||
use std::time::Duration;
|
||||
|
||||
use indicatif::{HumanDuration, MultiProgress, ProgressBar, ProgressDrawTarget, ProgressStyle};
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
lazy_static! {
|
||||
@@ -31,30 +33,68 @@ pub enum BarType {
|
||||
/// Add an [indicatif::ProgressBar](https://docs.rs/indicatif/latest/indicatif/struct.ProgressBar.html)
|
||||
/// to the global [PROGRESS_BAR](../config/struct.PROGRESS_BAR.html)
|
||||
pub fn add_bar(prefix: &str, length: u64, bar_type: BarType) -> ProgressBar {
|
||||
let mut style = ProgressStyle::default_bar().progress_chars("#>-");
|
||||
let mut style = ProgressStyle::default_bar()
|
||||
.progress_chars("#>-")
|
||||
.with_key(
|
||||
"smoothed_per_sec",
|
||||
|state: &indicatif::ProgressState, w: &mut dyn std::fmt::Write| match (
|
||||
state.pos(),
|
||||
state.elapsed().as_millis(),
|
||||
) {
|
||||
// https://github.com/console-rs/indicatif/issues/394#issuecomment-1309971049
|
||||
//
|
||||
// indicatif released a change to how they reported eta/per_sec
|
||||
// and the results looked really weird based on how we use the progress
|
||||
// bars. this fixes that
|
||||
(pos, elapsed_ms) if elapsed_ms > 0 => {
|
||||
write!(w, "{:.0}/s", pos as f64 * 1000_f64 / elapsed_ms as f64).unwrap()
|
||||
}
|
||||
_ => write!(w, "-").unwrap(),
|
||||
},
|
||||
)
|
||||
.with_key(
|
||||
"smoothed_eta",
|
||||
|state: &indicatif::ProgressState, w: &mut dyn std::fmt::Write| match (
|
||||
state.pos(),
|
||||
state.len(),
|
||||
) {
|
||||
(pos, Some(len)) => write!(
|
||||
w,
|
||||
"{:#}",
|
||||
HumanDuration(Duration::from_millis(
|
||||
(state.elapsed().as_millis()
|
||||
* ((len as u128).checked_sub(pos as u128).unwrap_or(1))
|
||||
.checked_div(pos as u128)
|
||||
.unwrap_or(1)) as u64
|
||||
))
|
||||
)
|
||||
.unwrap(),
|
||||
_ => write!(w, "-").unwrap(),
|
||||
},
|
||||
);
|
||||
|
||||
style = match bar_type {
|
||||
BarType::Hidden => style.template(""),
|
||||
BarType::Default => style.template(
|
||||
"[{bar:.cyan/blue}] - {elapsed:<4} {pos:>7}/{len:7} {per_sec:7} {prefix} {msg}",
|
||||
),
|
||||
BarType::Message => style.template(&format!(
|
||||
BarType::Hidden => style.template("").unwrap(),
|
||||
BarType::Default => style
|
||||
.template("[{bar:.cyan/blue}] - {elapsed:<4} {pos:>7}/{len:7} {smoothed_per_sec:7} {prefix} {msg}")
|
||||
.unwrap(),
|
||||
BarType::Message => style
|
||||
.template(&format!(
|
||||
"[{{bar:.cyan/blue}}] - {{elapsed:<4}} {{pos:>7}}/{{len:7}} {:7} {{prefix}} {{msg}}",
|
||||
"-"
|
||||
)),
|
||||
BarType::Total => {
|
||||
style.template("[{bar:.yellow/blue}] - {elapsed:<4} {pos:>7}/{len:7} {eta:7} {msg}")
|
||||
}
|
||||
BarType::Quiet => style.template("Scanning: {prefix}"),
|
||||
))
|
||||
.unwrap(),
|
||||
BarType::Total => style
|
||||
.template("[{bar:.yellow/blue}] - {elapsed:<4} {pos:>7}/{len:7} {smoothed_eta:7} {msg}")
|
||||
.unwrap(),
|
||||
BarType::Quiet => style.template("Scanning: {prefix}").unwrap(),
|
||||
};
|
||||
|
||||
let progress_bar = PROGRESS_BAR.add(ProgressBar::new(length));
|
||||
|
||||
progress_bar.set_style(style);
|
||||
|
||||
progress_bar.set_prefix(prefix);
|
||||
|
||||
progress_bar
|
||||
PROGRESS_BAR.add(
|
||||
ProgressBar::new(length)
|
||||
.with_style(style)
|
||||
.with_prefix(prefix.to_string()),
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -21,7 +21,7 @@ use crate::{
|
||||
event_handlers::{Command, Handles},
|
||||
traits::FeroxSerialize,
|
||||
url::FeroxUrl,
|
||||
utils::{self, fmt_err, status_colorizer},
|
||||
utils::{self, fmt_err, parse_url_with_raw_path, status_colorizer},
|
||||
CommandSender,
|
||||
};
|
||||
|
||||
@@ -140,7 +140,7 @@ impl FeroxResponse {
|
||||
|
||||
/// Set `FeroxResponse`'s `url` attribute, has no affect if an error occurs
|
||||
pub fn set_url(&mut self, url: &str) {
|
||||
match Url::parse(url) {
|
||||
match parse_url_with_raw_path(url) {
|
||||
Ok(url) => {
|
||||
self.url = url;
|
||||
}
|
||||
@@ -170,7 +170,8 @@ impl FeroxResponse {
|
||||
|
||||
/// free the `text` data, reducing memory usage
|
||||
pub fn drop_text(&mut self) {
|
||||
self.text = String::new();
|
||||
self.text.clear(); // length is set to 0
|
||||
self.text.shrink_to_fit(); // allocated capacity shrinks to reflect the new size
|
||||
}
|
||||
|
||||
/// Make a reasonable guess at whether the response is a file or not
|
||||
@@ -394,7 +395,14 @@ impl FeroxResponse {
|
||||
pub fn send_report(self, report_sender: CommandSender) -> Result<()> {
|
||||
log::trace!("enter: send_report({:?}", report_sender);
|
||||
|
||||
report_sender.send(Command::Report(Box::new(self)))?;
|
||||
// there's no reason to send the response body across the mpsc
|
||||
//
|
||||
// the only possible reason is for filtering on the body, but both `send_report`
|
||||
// calls are gated behind checks for `should_filter_response`
|
||||
let mut me = self;
|
||||
me.drop_text();
|
||||
|
||||
report_sender.send(Command::Report(Box::new(me)))?;
|
||||
|
||||
log::trace!("exit: send_report");
|
||||
Ok(())
|
||||
@@ -591,7 +599,7 @@ impl<'de> Deserialize<'de> for FeroxResponse {
|
||||
match key.as_str() {
|
||||
"url" => {
|
||||
if let Some(url) = value.as_str() {
|
||||
if let Ok(parsed) = Url::parse(url) {
|
||||
if let Ok(parsed) = parse_url_with_raw_path(url) {
|
||||
response.url = parsed;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -159,7 +159,7 @@ impl FeroxScan {
|
||||
if pb.position() > self.num_requests {
|
||||
pb.finish()
|
||||
} else {
|
||||
pb.finish_at_current_pos()
|
||||
pb.abandon()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -379,7 +379,7 @@ impl FeroxScans {
|
||||
.unwrap_or_else(|e| log::warn!("Could not cancel task: {}", e));
|
||||
|
||||
let pb = selected.progress_bar();
|
||||
num_cancelled += pb.length() as usize - pb.position() as usize;
|
||||
num_cancelled += pb.length().unwrap_or(0) as usize - pb.position() as usize;
|
||||
} else {
|
||||
self.menu.println("Ok, doing nothing...");
|
||||
}
|
||||
|
||||
@@ -72,7 +72,7 @@ fn add_url_to_list_of_scanned_urls_with_known_url() {
|
||||
url,
|
||||
ScanType::Directory,
|
||||
ScanOrder::Latest,
|
||||
pb.length(),
|
||||
pb.length().unwrap(),
|
||||
OutputLevel::Default,
|
||||
Some(pb),
|
||||
);
|
||||
@@ -94,7 +94,7 @@ fn stop_progress_bar_stops_bar() {
|
||||
url,
|
||||
ScanType::Directory,
|
||||
ScanOrder::Latest,
|
||||
pb.length(),
|
||||
pb.length().unwrap(),
|
||||
OutputLevel::Default,
|
||||
Some(pb),
|
||||
);
|
||||
@@ -152,7 +152,7 @@ async fn call_display_scans() {
|
||||
url,
|
||||
ScanType::Directory,
|
||||
ScanOrder::Latest,
|
||||
pb.length(),
|
||||
pb.length().unwrap(),
|
||||
OutputLevel::Default,
|
||||
Some(pb),
|
||||
);
|
||||
@@ -160,7 +160,7 @@ async fn call_display_scans() {
|
||||
url_two,
|
||||
ScanType::Directory,
|
||||
ScanOrder::Latest,
|
||||
pb_two.length(),
|
||||
pb_two.length().unwrap(),
|
||||
OutputLevel::Default,
|
||||
Some(pb_two),
|
||||
);
|
||||
|
||||
@@ -203,6 +203,9 @@ impl FeroxScanner {
|
||||
log::info!("Starting scan against: {}", self.target_url);
|
||||
|
||||
let mut scan_timer = Instant::now();
|
||||
// every time we extract links we'll need to await the task to make sure
|
||||
// it completes before the scan ends
|
||||
let mut extraction_tasks = Vec::new();
|
||||
|
||||
if self.handles.config.extract_links && matches!(self.order, ScanOrder::Initial) {
|
||||
// check for robots.txt (cannot be in sub-directories, so limited to Initial)
|
||||
@@ -213,7 +216,7 @@ impl FeroxScanner {
|
||||
.build()?;
|
||||
|
||||
let result = extractor.extract().await?;
|
||||
extractor.request_links(result).await?;
|
||||
extraction_tasks.push(extractor.request_links(result).await?)
|
||||
}
|
||||
|
||||
let scanned_urls = self.handles.ferox_scans()?;
|
||||
@@ -265,7 +268,7 @@ impl FeroxScanner {
|
||||
|
||||
let result = extractor.extract_from_dir_listing().await?;
|
||||
|
||||
extractor.request_links(result).await?;
|
||||
extraction_tasks.push(extractor.request_links(result).await?);
|
||||
|
||||
log::trace!("exit: scan_url -> Directory listing heuristic");
|
||||
|
||||
@@ -276,19 +279,27 @@ impl FeroxScanner {
|
||||
|
||||
self.handles.stats.send(SubtractFromUsizeField(
|
||||
TotalExpected,
|
||||
progress_bar.length() as usize,
|
||||
progress_bar.length().unwrap_or(0) as usize,
|
||||
))?;
|
||||
}
|
||||
|
||||
let mut message = format!("=> {}", style("Directory listing").blue().bright());
|
||||
|
||||
if !self.handles.config.extract_links {
|
||||
write!(message, " (add {} to scan)", style("-e").bright().yellow())?;
|
||||
write!(
|
||||
message,
|
||||
" (remove {} to scan)",
|
||||
style("--dont-extract-links").bright().yellow()
|
||||
)?;
|
||||
}
|
||||
|
||||
if !self.handles.config.force_recursion {
|
||||
for handle in extraction_tasks.into_iter().flatten() {
|
||||
_ = handle.await;
|
||||
}
|
||||
|
||||
progress_bar.reset_eta();
|
||||
progress_bar.finish_with_message(&message);
|
||||
progress_bar.finish_with_message(message);
|
||||
|
||||
ferox_scan.finish()?;
|
||||
|
||||
@@ -313,7 +324,7 @@ impl FeroxScanner {
|
||||
style("Wildcard").blue().bright(),
|
||||
style("stopped").red()
|
||||
);
|
||||
progress_bar.set_message(&message);
|
||||
progress_bar.set_message(message);
|
||||
progress_bar.inc(num_reqs as u64);
|
||||
}
|
||||
Some(WildcardResult::FourOhFourLike(num_reqs)) => {
|
||||
@@ -340,7 +351,7 @@ impl FeroxScanner {
|
||||
let new_words = TF_IDF.read().unwrap().all_words();
|
||||
let new_words_len = new_words.len();
|
||||
|
||||
let cur_length = progress_bar.length();
|
||||
let cur_length = progress_bar.length().unwrap_or(0);
|
||||
let new_length = cur_length + new_words_len as u64;
|
||||
|
||||
progress_bar.set_length(new_length);
|
||||
@@ -370,6 +381,10 @@ impl FeroxScanner {
|
||||
scan_timer.elapsed().as_secs_f64(),
|
||||
))?;
|
||||
|
||||
for handle in extraction_tasks.into_iter().flatten() {
|
||||
_ = handle.await;
|
||||
}
|
||||
|
||||
ferox_scan.finish()?;
|
||||
|
||||
log::trace!("exit: scan_url");
|
||||
|
||||
@@ -217,7 +217,7 @@ impl Requester {
|
||||
|
||||
self.ferox_scan
|
||||
.progress_bar()
|
||||
.set_message(&format!("=> 🚦 {styled_direction} scan speed",));
|
||||
.set_message(format!("=> 🚦 {styled_direction} scan speed",));
|
||||
}
|
||||
self.policy_data.set_errors(scan_errors);
|
||||
} else {
|
||||
@@ -230,7 +230,7 @@ impl Requester {
|
||||
|
||||
self.ferox_scan
|
||||
.progress_bar()
|
||||
.set_message(&format!("=> 🚦 {styled_direction} scan speed",));
|
||||
.set_message(format!("=> 🚦 {styled_direction} scan speed",));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -286,7 +286,7 @@ impl Requester {
|
||||
self.set_rate_limiter(Some(new_limit)).await?;
|
||||
self.ferox_scan
|
||||
.progress_bar()
|
||||
.set_message(&format!("=> 🚦 set rate limit ({new_limit}/s)"));
|
||||
.set_message(format!("=> 🚦 set rate limit ({new_limit}/s)"));
|
||||
}
|
||||
|
||||
self.adjust_limit(trigger, true).await?;
|
||||
@@ -321,11 +321,11 @@ impl Requester {
|
||||
|
||||
// figure out how many requests are skipped as a result
|
||||
let pb = self.ferox_scan.progress_bar();
|
||||
let num_skipped = pb.length().saturating_sub(pb.position()) as usize;
|
||||
let num_skipped = pb.length().unwrap_or(0).saturating_sub(pb.position()) as usize;
|
||||
|
||||
let styled_trigger = style(format!("{trigger:?}")).red();
|
||||
|
||||
pb.set_message(&format!(
|
||||
pb.set_message(format!(
|
||||
"=> 💀 too many {} ({}) 💀 bailing",
|
||||
styled_trigger,
|
||||
self.ferox_scan.num_errors(trigger),
|
||||
@@ -490,6 +490,7 @@ impl Requester {
|
||||
.target(ExtractionTarget::ResponseBody)
|
||||
.response(&ferox_response)
|
||||
.handles(self.handles.clone())
|
||||
.url(self.ferox_scan.url())
|
||||
.build()?;
|
||||
|
||||
let new_links: HashSet<_>;
|
||||
@@ -513,7 +514,11 @@ impl Requester {
|
||||
}
|
||||
|
||||
if !new_links.is_empty() {
|
||||
extractor.request_links(new_links).await?;
|
||||
let extraction_task = extractor.request_links(new_links).await?;
|
||||
|
||||
if let Some(task) = extraction_task {
|
||||
_ = task.await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
25
src/url.rs
25
src/url.rs
@@ -1,3 +1,4 @@
|
||||
use crate::utils::parse_url_with_raw_path;
|
||||
use crate::{event_handlers::Handles, statistics::StatError::UrlFormat, Command::AddError};
|
||||
use anyhow::{anyhow, bail, Result};
|
||||
use reqwest::Url;
|
||||
@@ -142,19 +143,19 @@ impl FeroxUrl {
|
||||
word = word.trim_start_matches('/').to_string();
|
||||
};
|
||||
|
||||
let base_url = Url::parse(&url)?;
|
||||
let joined = base_url.join(&word)?;
|
||||
let base_url = parse_url_with_raw_path(&url)?;
|
||||
let mut joined = base_url.join(&word)?;
|
||||
|
||||
if self.handles.config.queries.is_empty() {
|
||||
// no query params to process
|
||||
log::trace!("exit: format -> {}", joined);
|
||||
Ok(joined)
|
||||
} else {
|
||||
let with_params =
|
||||
Url::parse_with_params(joined.as_str(), &self.handles.config.queries)?;
|
||||
log::trace!("exit: format_url -> {}", with_params);
|
||||
Ok(with_params) // request with params attached
|
||||
if !self.handles.config.queries.is_empty() {
|
||||
// if called, this adds a '?' to the url, whether or not there are queries to be added
|
||||
// so we need to check if there are queries to be added before blindly adding the '?'
|
||||
joined
|
||||
.query_pairs_mut()
|
||||
.extend_pairs(self.handles.config.queries.iter());
|
||||
}
|
||||
|
||||
log::trace!("exit: format_url -> {}", joined);
|
||||
Ok(joined)
|
||||
}
|
||||
|
||||
/// Simple helper to abstract away adding a forward-slash to a url if not present
|
||||
@@ -189,7 +190,7 @@ impl FeroxUrl {
|
||||
|
||||
let target = self.normalize();
|
||||
|
||||
let parsed = Url::parse(&target)?;
|
||||
let parsed = parse_url_with_raw_path(&target)?;
|
||||
let parts = parsed
|
||||
.path_segments()
|
||||
.ok_or_else(|| anyhow!("No path segments found"))?;
|
||||
|
||||
381
src/utils.rs
381
src/utils.rs
@@ -75,7 +75,12 @@ pub(crate) async fn send_try_recursion_command(
|
||||
handles: Arc<Handles>,
|
||||
response: FeroxResponse,
|
||||
) -> Result<()> {
|
||||
handles.send_scan_command(Command::TryRecursion(Box::new(response.clone())))?;
|
||||
// make the response mutable so we can drop the body before
|
||||
// sending it over the mpsc
|
||||
let mut response = response;
|
||||
response.drop_text();
|
||||
|
||||
handles.send_scan_command(Command::TryRecursion(Box::new(response)))?;
|
||||
let (tx, rx) = oneshot::channel::<bool>();
|
||||
handles.send_scan_command(Command::Sync(tx))?;
|
||||
rx.await?;
|
||||
@@ -420,9 +425,14 @@ fn should_deny_absolute(url_to_test: &Url, denier: &Url, handles: Arc<Handles>)
|
||||
// current deny-url, now we just need to check to see if this deny-url is a parent
|
||||
// to a scanned url that is also a parent of the given url
|
||||
for ferox_scan in handles.ferox_scans()?.get_active_scans() {
|
||||
let scanner = Url::parse(ferox_scan.url().trim_end_matches('/'))
|
||||
let scanner = parse_url_with_raw_path(ferox_scan.url().trim_end_matches('/'))
|
||||
.with_context(|| format!("Could not parse {ferox_scan} as a url"))?;
|
||||
|
||||
// by calling the new parse_url_with_raw_path, and reaching this point without an
|
||||
// error, we know we have an authority and therefore a host. leaving the code
|
||||
// below, but we should never hit the else condition. leaving it in so if we find
|
||||
// a case where i'm mistaken, we'll know about it and can address it
|
||||
|
||||
if let Some(scan_host) = scanner.host() {
|
||||
// same domain/ip check we perform on the denier above
|
||||
if tested_host != scan_host {
|
||||
@@ -431,7 +441,7 @@ fn should_deny_absolute(url_to_test: &Url, denier: &Url, handles: Arc<Handles>)
|
||||
}
|
||||
} else {
|
||||
// couldn't process .host from scanner
|
||||
continue;
|
||||
unreachable!("should_deny_absolute: scanner.host() returned None, which shouldn't be possible");
|
||||
};
|
||||
|
||||
let scan_path = scanner.path();
|
||||
@@ -482,7 +492,7 @@ pub fn should_deny_url(url: &Url, handles: Arc<Handles>) -> Result<bool> {
|
||||
|
||||
// normalization for comparison is to remove the trailing / if one exists, this is done for
|
||||
// the given url and any url to which it's compared
|
||||
let normed_url = Url::parse(url.to_string().trim_end_matches('/'))?;
|
||||
let normed_url = parse_url_with_raw_path(url.to_string().trim_end_matches('/'))?;
|
||||
|
||||
for denier in &handles.config.url_denylist {
|
||||
// note to self: it may seem as though we can use regex only for --dont-scan, however, in
|
||||
@@ -532,6 +542,187 @@ pub fn slugify_filename(url: &str, prefix: &str, suffix: &str) -> String {
|
||||
filename
|
||||
}
|
||||
|
||||
/// This function takes a url string and returns a `url::Url`
|
||||
///
|
||||
/// It is primarily used to detect url paths that `url::Url::parse` will
|
||||
/// silently transform, such as /path/../file.html -> /file.html
|
||||
///
|
||||
/// # Warning
|
||||
///
|
||||
/// In the instance of a url with encoded path traversal strings, such as
|
||||
/// /path/%2e%2e/file.html, the underlying `url::Url::parse` will
|
||||
/// further encode the %-signs and return /path/%252e%252e/file.html
|
||||
pub fn parse_url_with_raw_path(url: &str) -> Result<Url> {
|
||||
log::trace!("enter: parse_url_with_raw_path({})", url);
|
||||
|
||||
let parsed = Url::parse(url)?;
|
||||
|
||||
if !parsed.has_authority() {
|
||||
// parsed correctly, but no authority, meaning mailto: or tel: or
|
||||
// some other url that we don't care about
|
||||
bail!("url to parse has no authority and is therefore invalid");
|
||||
}
|
||||
|
||||
// we have a valid url, the next step is to check the path and see if it's
|
||||
// something that url::Url::parse would silently transform
|
||||
//
|
||||
// i.e. if the path is /path/../file.html, url::Url::parse will transform it
|
||||
// to /file.html, which is not what we want
|
||||
|
||||
let farthest_right_authority_part;
|
||||
|
||||
// we want to find the farthest right authority component, which is the
|
||||
// component that is the furthest right in the url that is part of the
|
||||
// authority
|
||||
//
|
||||
// per RFC 3986, the authority is defined as:
|
||||
// - authority = [ userinfo "@" ] host [ ":" port ]
|
||||
//
|
||||
// so the farthest right authority component is either the port or the host
|
||||
//
|
||||
// i.e. in http://example.com:80/path/file.html, the farthest right authority
|
||||
// component is :80
|
||||
//
|
||||
// in http://example.com/path/file.html, the farthest right authority component
|
||||
// is example.com
|
||||
//
|
||||
// the farthest right authority component is used to split the url into two
|
||||
// parts: the part before the authority and the part after the authority
|
||||
if let Some(port) = parsed.port() {
|
||||
// if the url has a port, then the farthest right authority component is
|
||||
// the port
|
||||
farthest_right_authority_part = format!(":{}", port);
|
||||
} else if parsed.has_host() {
|
||||
// if the url has a host, then the farthest right authority component is
|
||||
// the host
|
||||
farthest_right_authority_part = parsed.host_str().unwrap().to_owned();
|
||||
} else {
|
||||
// if the url has neither a port nor a host, then the url is invalid
|
||||
// and we can't do anything with it, but i don't think this is possible
|
||||
unreachable!("url has an authority, but has neither a port nor a host");
|
||||
}
|
||||
|
||||
// split the original url string into two parts: the part before the authority and the part
|
||||
// after the authority (i.e. the path + query + fragment)
|
||||
|
||||
let Some((_, after_authority)) = url.split_once(&farthest_right_authority_part) else {
|
||||
// if we can't split the url string into two parts, then the url doesn't conform to our
|
||||
// expectations, and we can't continue processing it, so we'll return the parsed url
|
||||
return Ok(parsed);
|
||||
};
|
||||
|
||||
// when there is a port, but it matches the default port for the scheme,
|
||||
// url::Url::parse will mark the port as None, giving us a
|
||||
// `after_authority` that looks something like this:
|
||||
// - :80/path/file.html
|
||||
let after_authority = after_authority
|
||||
.replacen(":80", "", 1)
|
||||
.replacen(":443", "", 1);
|
||||
|
||||
// snippets from rfc-3986:
|
||||
//
|
||||
// foo://example.com:8042/over/there?name=ferret#nose
|
||||
// \_/ \______________/\_________/ \_________/ \__/
|
||||
// | | | | |
|
||||
// scheme authority path query fragment
|
||||
//
|
||||
// The path component is terminated
|
||||
// by the first question mark ("?") or number sign ("#") character, or
|
||||
// by the end of the URI.
|
||||
//
|
||||
// The query component is indicated by the first question
|
||||
// mark ("?") character and terminated by a number sign ("#") character
|
||||
// or by the end of the URI.
|
||||
let (path, _discarded) = after_authority
|
||||
.split_once('?')
|
||||
// if there isn't a '?', try to remove a fragment
|
||||
.unwrap_or_else(|| {
|
||||
// if there isn't a '#', return (original, empty)
|
||||
after_authority
|
||||
.split_once('#')
|
||||
.unwrap_or((&after_authority, ""))
|
||||
});
|
||||
|
||||
// at this point, we have the path, all by itself
|
||||
|
||||
// each of the following is a string that we can expect url::Url::parse to
|
||||
// transform. The variety is to ensure we cover most common path traversal
|
||||
// encodings
|
||||
let transformation_detectors = vec![
|
||||
// ascii
|
||||
"..",
|
||||
// single url encoded
|
||||
"%2e%2e",
|
||||
// double url encoded
|
||||
"%25%32%65%25%32%65",
|
||||
// utf-8 encoded
|
||||
"%c0%ae%c0%ae",
|
||||
"%e0%40%ae%e0%40%ae",
|
||||
"%c0ae%c0ae",
|
||||
// 16 bit shenanigans
|
||||
"%uff0e%uff0e",
|
||||
"%u002e%u002e",
|
||||
];
|
||||
|
||||
let parsing_will_transform_path = transformation_detectors
|
||||
.iter()
|
||||
.any(|detector| path.to_lowercase().contains(detector));
|
||||
|
||||
if !parsing_will_transform_path {
|
||||
// there's no string in the path of the url that will trigger a transformation
|
||||
// so, we can return it as-is
|
||||
return Ok(parsed);
|
||||
}
|
||||
|
||||
// if we reach this point, the path contains a string that will trigger a transformation
|
||||
// so we need to manually create a Url that doesn't have the transformation
|
||||
// and return that
|
||||
//
|
||||
// special thanks to github user @lavafroth for this workaround
|
||||
|
||||
let mut hacked_url = if path.ends_with('/') {
|
||||
// from_file_path silently strips trailing slashes, and
|
||||
// from_directory_path adds them, so we'll choose the appropriate
|
||||
// constructor based on the presence of a path's trailing slash
|
||||
|
||||
// according to from_file_path docs:
|
||||
// from_file_path returns `Err` if the given path is not absolute or,
|
||||
// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
|
||||
//
|
||||
// since we parsed out a valid url path, we know it is absolute, so on non-windows
|
||||
// platforms, we can safely unwrap. On windows, we need to fix up the path
|
||||
#[cfg(target_os = "windows")]
|
||||
{
|
||||
let path = format!("\\/IGNOREME{path}");
|
||||
Url::from_directory_path(path).unwrap()
|
||||
}
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
Url::from_directory_path(path).unwrap()
|
||||
} else {
|
||||
#[cfg(target_os = "windows")]
|
||||
{
|
||||
let path = format!("\\/IGNOREME{path}");
|
||||
Url::from_file_path(path).unwrap()
|
||||
}
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
Url::from_file_path(path).unwrap()
|
||||
};
|
||||
|
||||
// host must be set first, otherwise multiple components may return Err
|
||||
hacked_url.set_host(parsed.host_str())?;
|
||||
// scheme/port/username/password can fail, but in this instance, we know they won't
|
||||
hacked_url.set_scheme(parsed.scheme()).unwrap();
|
||||
hacked_url.set_port(parsed.port()).unwrap();
|
||||
hacked_url.set_username(parsed.username()).unwrap();
|
||||
hacked_url.set_password(parsed.password()).unwrap();
|
||||
// query/fragment can't fail
|
||||
hacked_url.set_query(parsed.query());
|
||||
hacked_url.set_fragment(parsed.fragment());
|
||||
|
||||
log::trace!("exit: parse_url_with_raw_path -> {}", hacked_url);
|
||||
Ok(hacked_url)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -539,31 +730,159 @@ mod tests {
|
||||
use crate::scan_manager::{FeroxScans, ScanOrder};
|
||||
|
||||
#[test]
|
||||
/// set_open_file_limit with a low requested limit succeeds
|
||||
fn utils_set_open_file_limit_with_low_requested_limit() {
|
||||
let (_, hard) = getrlimit(Resource::NOFILE).unwrap();
|
||||
let lower_limit = hard - 1;
|
||||
assert!(set_open_file_limit(lower_limit));
|
||||
/// multiple tests for parse_url_with_raw_path
|
||||
fn utils_parse_url_with_raw_path() {
|
||||
// ../.. is preserved
|
||||
let url = "https://www.google.com/../../stuff";
|
||||
let parsed = parse_url_with_raw_path(url).unwrap();
|
||||
assert_eq!(parsed.as_str(), url);
|
||||
|
||||
// ../.. is preserved as well as the trailing slash
|
||||
let url = "https://www.google.com/../../stuff/";
|
||||
let parsed = parse_url_with_raw_path(url).unwrap();
|
||||
assert_eq!(parsed.as_str(), url);
|
||||
|
||||
// no trailing slash is preserved
|
||||
let url = "https://www.google.com/stuff";
|
||||
let parsed = parse_url_with_raw_path(url).unwrap();
|
||||
assert_eq!(parsed.as_str(), url);
|
||||
|
||||
// trailing slash is preserved
|
||||
let url = "https://www.google.com/stuff/";
|
||||
let parsed: Url = parse_url_with_raw_path(url).unwrap();
|
||||
assert_eq!(parsed.as_str(), url);
|
||||
|
||||
// mailto is an error
|
||||
let url = "mailto:user@example.com";
|
||||
let parsed = parse_url_with_raw_path(url);
|
||||
assert!(parsed.is_err());
|
||||
|
||||
// relative url is an error
|
||||
let url = "../../stuff";
|
||||
let parsed = parse_url_with_raw_path(url);
|
||||
assert!(parsed.is_err());
|
||||
|
||||
// absolute without host is an error
|
||||
let url = "/../../stuff";
|
||||
let parsed = parse_url_with_raw_path(url);
|
||||
assert!(parsed.is_err());
|
||||
|
||||
// default ports are parsed correctly
|
||||
for url in [
|
||||
"http://example.com:80/path/file.html",
|
||||
"https://example.com:443/path/file.html",
|
||||
] {
|
||||
let parsed = parse_url_with_raw_path(url).unwrap();
|
||||
assert!(parsed.port().is_none());
|
||||
assert_eq!(parsed.host().unwrap().to_string().as_str(), "example.com");
|
||||
}
|
||||
|
||||
// non-default ports are parsed correctly
|
||||
for url in [
|
||||
"http://example.com:8080/path/file.html",
|
||||
"https://example.com:4433/path/file.html",
|
||||
] {
|
||||
let parsed = parse_url_with_raw_path(url).unwrap();
|
||||
assert!(parsed.port().is_some());
|
||||
assert_eq!(parsed.as_str(), url);
|
||||
}
|
||||
|
||||
// different encodings are respected if found in doubles
|
||||
//
|
||||
// note that the % sign is encoded as %25...
|
||||
let url = "http://user:pass@example.com/%2e%2e/stuff.php";
|
||||
let parsed = parse_url_with_raw_path(url).unwrap();
|
||||
assert_eq!(
|
||||
parsed.as_str(),
|
||||
"http://user:pass@example.com/%252e%252e/stuff.php"
|
||||
);
|
||||
|
||||
let url = "http://user:pass@example.com/%25%32%65%25%32%65/stuff.php";
|
||||
let parsed = parse_url_with_raw_path(url).unwrap();
|
||||
assert_eq!(parsed.username(), "user");
|
||||
assert_eq!(parsed.password().unwrap(), "pass");
|
||||
assert_eq!(
|
||||
parsed.as_str(),
|
||||
"http://user:pass@example.com/%2525%2532%2565%2525%2532%2565/stuff.php"
|
||||
);
|
||||
|
||||
let url = "http://user:pass@example.com/%c0%ae%c0%ae/stuff.php";
|
||||
let parsed = parse_url_with_raw_path(url).unwrap();
|
||||
assert_eq!(parsed.username(), "user");
|
||||
assert_eq!(parsed.password().unwrap(), "pass");
|
||||
assert_eq!(
|
||||
parsed.as_str(),
|
||||
"http://user:pass@example.com/%25c0%25ae%25c0%25ae/stuff.php"
|
||||
);
|
||||
|
||||
let url = "http://user:pass@example.com/%e0%40%ae%e0%40%ae/stuff.php";
|
||||
let parsed = parse_url_with_raw_path(url).unwrap();
|
||||
assert_eq!(parsed.username(), "user");
|
||||
assert_eq!(parsed.password().unwrap(), "pass");
|
||||
assert_eq!(
|
||||
parsed.as_str(),
|
||||
"http://user:pass@example.com/%25e0%2540%25ae%25e0%2540%25ae/stuff.php"
|
||||
);
|
||||
|
||||
let url = "http://user:pass@example.com/%c0ae%c0ae/stuff.php";
|
||||
let parsed = parse_url_with_raw_path(url).unwrap();
|
||||
assert_eq!(parsed.username(), "user");
|
||||
assert_eq!(parsed.password().unwrap(), "pass");
|
||||
assert_eq!(
|
||||
parsed.as_str(),
|
||||
"http://user:pass@example.com/%25c0ae%25c0ae/stuff.php"
|
||||
);
|
||||
|
||||
let url = "http://user:pass@example.com/%uff0e%uff0e/stuff.php";
|
||||
let parsed = parse_url_with_raw_path(url).unwrap();
|
||||
assert_eq!(parsed.username(), "user");
|
||||
assert_eq!(parsed.password().unwrap(), "pass");
|
||||
assert_eq!(
|
||||
parsed.as_str(),
|
||||
"http://user:pass@example.com/%25uff0e%25uff0e/stuff.php"
|
||||
);
|
||||
|
||||
let url = "http://user:pass@example.com/%u002e%u002e/stuff.php";
|
||||
let parsed = parse_url_with_raw_path(url).unwrap();
|
||||
assert_eq!(parsed.username(), "user");
|
||||
assert_eq!(parsed.password().unwrap(), "pass");
|
||||
assert_eq!(
|
||||
parsed.as_str(),
|
||||
"http://user:pass@example.com/%25u002e%25u002e/stuff.php"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// set_open_file_limit with a high requested limit succeeds
|
||||
fn utils_set_open_file_limit_with_high_requested_limit() {
|
||||
let (_, hard) = getrlimit(Resource::NOFILE).unwrap();
|
||||
let higher_limit = hard + 1;
|
||||
// calculate a new soft to ensure soft != hard and hit that logic branch
|
||||
let new_soft = hard - 1;
|
||||
setrlimit(Resource::NOFILE, new_soft, hard).unwrap();
|
||||
assert!(set_open_file_limit(higher_limit));
|
||||
}
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
mod nix_only_tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
/// set_open_file_limit should fail when hard == soft
|
||||
fn utils_set_open_file_limit_with_fails_when_both_limits_are_equal() {
|
||||
let (_, hard) = getrlimit(Resource::NOFILE).unwrap();
|
||||
// calculate a new soft to ensure soft == hard and hit the failure logic branch
|
||||
setrlimit(Resource::NOFILE, hard, hard).unwrap();
|
||||
assert!(!set_open_file_limit(hard)); // returns false
|
||||
#[test]
|
||||
/// set_open_file_limit with a low requested limit succeeds
|
||||
fn utils_set_open_file_limit_with_low_requested_limit() {
|
||||
let (_, hard) = getrlimit(Resource::NOFILE).unwrap();
|
||||
let lower_limit = hard - 1;
|
||||
assert!(set_open_file_limit(lower_limit));
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// set_open_file_limit with a high requested limit succeeds
|
||||
fn utils_set_open_file_limit_with_high_requested_limit() {
|
||||
let (_, hard) = getrlimit(Resource::NOFILE).unwrap();
|
||||
let higher_limit = hard + 1;
|
||||
// calculate a new soft to ensure soft != hard and hit that logic branch
|
||||
let new_soft = hard - 1;
|
||||
setrlimit(Resource::NOFILE, new_soft, hard).unwrap();
|
||||
assert!(set_open_file_limit(higher_limit));
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// set_open_file_limit should fail when hard == soft
|
||||
fn utils_set_open_file_limit_with_fails_when_both_limits_are_equal() {
|
||||
let (_, hard) = getrlimit(Resource::NOFILE).unwrap();
|
||||
// calculate a new soft to ensure soft == hard and hit the failure logic branch
|
||||
setrlimit(Resource::NOFILE, hard, hard).unwrap();
|
||||
assert!(!set_open_file_limit(hard)); // returns false
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -697,6 +1016,13 @@ mod tests {
|
||||
/// provide a denier from which we can't check a host, which results in no comparison, expect false
|
||||
/// because the denier is a parent to the tested, even tho the scanned doesn't compare, it
|
||||
/// still returns true
|
||||
///
|
||||
/// note: adding parse_url_with_raw_path changed the behavior of this test, it used to return
|
||||
/// true, now it returns false. see my note in should_deny_absolute and the unreachable!
|
||||
/// call block to see why
|
||||
///
|
||||
/// leaving this test here to document the behavior change and to catch regressions in the
|
||||
/// new expected behavior
|
||||
fn should_deny_url_doesnt_compare_non_domains_in_scanned() {
|
||||
let deny_url = "https://testdomain.com/";
|
||||
let scan_url = "unix:/run/foo.socket";
|
||||
@@ -710,8 +1036,7 @@ mod tests {
|
||||
let config = Arc::new(config);
|
||||
|
||||
let handles = Arc::new(Handles::for_testing(Some(scans), Some(config)).0);
|
||||
|
||||
assert!(should_deny_url(&tested_url, handles).unwrap());
|
||||
assert!(!should_deny_url(&tested_url, handles).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -164,7 +164,7 @@ fn test_static_wildcard_request_found() -> Result<(), Box<dyn std::error::Error>
|
||||
|
||||
let mock = srv.mock(|when, then| {
|
||||
when.method(GET)
|
||||
.path_matches(Regex::new("/[a-zA-Z0-9]{32}/").unwrap());
|
||||
.path_matches(Regex::new("/[.a-zA-Z0-9]{32,}/").unwrap());
|
||||
then.status(200).body("this is a test");
|
||||
});
|
||||
|
||||
@@ -188,7 +188,8 @@ fn test_static_wildcard_request_found() -> Result<(), Box<dyn std::error::Error>
|
||||
.and(predicate::str::contains("1l")),
|
||||
);
|
||||
|
||||
assert_eq!(mock.hits(), 1);
|
||||
assert_eq!(mock.hits(), 6);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -305,11 +306,67 @@ fn heuristics_wildcard_test_with_two_static_wildcards_with_silent_enabled(
|
||||
.success()
|
||||
.stdout(predicate::str::contains(srv.url("/")));
|
||||
|
||||
assert_eq!(mock.hits(), 4);
|
||||
assert_eq!(mock.hits(), 6);
|
||||
assert_eq!(mock2.hits(), 1);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// test finds a 404-like response that returns a 403 and a 403 directory should still be allowed
|
||||
/// to be tested for recrusion
|
||||
fn heuristics_wildcard_test_that_auto_filtering_403s_still_allows_for_recursion_into_403_directories(
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let srv = MockServer::start();
|
||||
|
||||
let super_long = String::from("92969beae6bf4beb855d1622406d87e395c87387a9ad432e8a11245002b709b03cf609d471004154b83bcc1c6ec49f6f09d471004154b83bcc1c6ec49f6f");
|
||||
|
||||
let (tmp_dir, file) =
|
||||
setup_tmp_directory(&["LICENSE".to_string(), super_long.clone()], "wordlist")?;
|
||||
|
||||
srv.mock(|when, then| {
|
||||
when.method(GET)
|
||||
.path_matches(Regex::new("/.?[a-zA-Z0-9]{32,103}").unwrap());
|
||||
then.status(403)
|
||||
.body("this is a testAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA");
|
||||
});
|
||||
|
||||
srv.mock(|when, then| {
|
||||
when.method(GET).path("/LICENSE/");
|
||||
then.status(403)
|
||||
.body("this is a testAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA");
|
||||
});
|
||||
|
||||
srv.mock(|when, then| {
|
||||
when.method(GET).path(format!("/LICENSE/{}", super_long));
|
||||
then.status(200);
|
||||
});
|
||||
|
||||
let cmd = Command::cargo_bin("feroxbuster")
|
||||
.unwrap()
|
||||
.arg("--url")
|
||||
.arg(srv.url("/"))
|
||||
.arg("--wordlist")
|
||||
.arg(file.as_os_str())
|
||||
.arg("--add-slash")
|
||||
.unwrap();
|
||||
|
||||
teardown_tmp_directory(tmp_dir);
|
||||
|
||||
cmd.assert().success().stdout(
|
||||
predicate::str::contains("GET")
|
||||
.and(predicate::str::contains(
|
||||
"Auto-filtering found 404-like response and created new filter",
|
||||
))
|
||||
.and(predicate::str::contains("403"))
|
||||
.and(predicate::str::contains("1l"))
|
||||
.and(predicate::str::contains("4w"))
|
||||
.and(predicate::str::contains("46c"))
|
||||
.and(predicate::str::contains(srv.url("/LICENSE/LICENSE/"))),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// #[test]
|
||||
// /// test finds a static wildcard and reports as much to stdout and a file
|
||||
// fn heuristics_wildcard_test_with_two_static_wildcards_and_output_to_file() {
|
||||
|
||||
Reference in New Issue
Block a user