mirror of
https://github.com/epi052/feroxbuster.git
synced 2026-05-27 16:51:13 -03:00
fixed rate limiting
* fixed requests/sec for small values * ensured limit var is never 0 in build_a_bucket, not just refill * removed unnecessary cooldown flag manipulation in cool_down func * removed minor toctou in should_enforce_policy * added new flag releases before returns from should_enforce_policy * cleaned up how limitheap is initialized from tune func * added (more) safety/bounds checks to limitheap * capped timeout to 30sec; added lock error logging * added per-trigger error tracking to policy data * updated requester to use new policy data per-trigger errors * fixed race condition in progress bar message display; fixed tests * touched up a few minor issues in nlp * fixed req/sec test * fixed more tests * added new test suite for tuning; fixed more tests * clippy/fmt * fixed possible deadlock in error path for tune/bail * fixed a handful of minor correctness issues * removed unnecessary array allocation for error tracking * --rate-limit now serves as a hard cap, in general and on --auto-tune if both are provided together * renamed test file * bumped version to 2.13.1 * added new dirlisting detection heuristics * clippy * nitpickery
This commit is contained in:
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -946,7 +946,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
||||
|
||||
[[package]]
|
||||
name = "feroxbuster"
|
||||
version = "2.13.0"
|
||||
version = "2.13.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"assert_cmd",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "feroxbuster"
|
||||
version = "2.13.0"
|
||||
version = "2.13.1"
|
||||
authors = ["Ben 'epi' Risher (@epi052)"]
|
||||
license = "MIT"
|
||||
edition = "2021"
|
||||
|
||||
@@ -27,8 +27,8 @@ _feroxbuster() {
|
||||
'--replay-proxy=[Send only unfiltered requests through a Replay Proxy, instead of all requests]:REPLAY_PROXY:_urls' \
|
||||
'*-R+[Status Codes to send through a Replay Proxy when found (default\: --status-codes value)]:REPLAY_CODE:_default' \
|
||||
'*--replay-codes=[Status Codes to send through a Replay Proxy when found (default\: --status-codes value)]:REPLAY_CODE:_default' \
|
||||
'-a+[Sets the User-Agent (default\: feroxbuster/2.13.0)]:USER_AGENT:_default' \
|
||||
'--user-agent=[Sets the User-Agent (default\: feroxbuster/2.13.0)]:USER_AGENT:_default' \
|
||||
'-a+[Sets the User-Agent (default\: feroxbuster/2.13.1)]:USER_AGENT:_default' \
|
||||
'--user-agent=[Sets the User-Agent (default\: feroxbuster/2.13.1)]:USER_AGENT:_default' \
|
||||
'*-x+[File extension(s) to search for (ex\: -x php -x pdf js); reads values (newline-separated) from file if input starts with an @ (ex\: @ext.txt)]:FILE_EXTENSION:_default' \
|
||||
'*--extensions=[File extension(s) to search for (ex\: -x php -x pdf js); reads values (newline-separated) from file if input starts with an @ (ex\: @ext.txt)]:FILE_EXTENSION:_default' \
|
||||
'*-m+[Which HTTP request method(s) should be sent (default\: GET)]:HTTP_METHODS:_default' \
|
||||
@@ -68,7 +68,7 @@ _feroxbuster() {
|
||||
'-L+[Limit total number of concurrent scans (default\: 0, i.e. no limit)]:SCAN_LIMIT:_default' \
|
||||
'--scan-limit=[Limit total number of concurrent scans (default\: 0, i.e. no limit)]:SCAN_LIMIT:_default' \
|
||||
'(-v --verbosity -u --url)--parallel=[Run parallel feroxbuster instances (one child process per url passed via stdin)]:PARALLEL_SCANS:_default' \
|
||||
'(--auto-tune)--rate-limit=[Limit number of requests per second (per directory) (default\: 0, i.e. no limit)]:RATE_LIMIT:_default' \
|
||||
'--rate-limit=[Limit number of requests per second (per directory) (default\: 0, i.e. no limit)]:RATE_LIMIT:_default' \
|
||||
'--response-size-limit=[Limit size of response body to read in bytes (default\: 4MB)]:BYTES:_default' \
|
||||
'--time-limit=[Limit total run time of all scans (ex\: --time-limit 10m)]:TIME_SPEC:_default' \
|
||||
'-w+[Path or URL of the wordlist]:FILE:_files' \
|
||||
|
||||
@@ -33,8 +33,8 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock {
|
||||
[CompletionResult]::new('--replay-proxy', '--replay-proxy', [CompletionResultType]::ParameterName, 'Send only unfiltered requests through a Replay Proxy, instead of all requests')
|
||||
[CompletionResult]::new('-R', '-R ', [CompletionResultType]::ParameterName, 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)')
|
||||
[CompletionResult]::new('--replay-codes', '--replay-codes', [CompletionResultType]::ParameterName, 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)')
|
||||
[CompletionResult]::new('-a', '-a', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.13.0)')
|
||||
[CompletionResult]::new('--user-agent', '--user-agent', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.13.0)')
|
||||
[CompletionResult]::new('-a', '-a', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.13.1)')
|
||||
[CompletionResult]::new('--user-agent', '--user-agent', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.13.1)')
|
||||
[CompletionResult]::new('-x', '-x', [CompletionResultType]::ParameterName, 'File extension(s) to search for (ex: -x php -x pdf js); reads values (newline-separated) from file if input starts with an @ (ex: @ext.txt)')
|
||||
[CompletionResult]::new('--extensions', '--extensions', [CompletionResultType]::ParameterName, 'File extension(s) to search for (ex: -x php -x pdf js); reads values (newline-separated) from file if input starts with an @ (ex: @ext.txt)')
|
||||
[CompletionResult]::new('-m', '-m', [CompletionResultType]::ParameterName, 'Which HTTP request method(s) should be sent (default: GET)')
|
||||
|
||||
@@ -30,8 +30,8 @@ set edit:completion:arg-completer[feroxbuster] = {|@words|
|
||||
cand --replay-proxy 'Send only unfiltered requests through a Replay Proxy, instead of all requests'
|
||||
cand -R 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)'
|
||||
cand --replay-codes 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)'
|
||||
cand -a 'Sets the User-Agent (default: feroxbuster/2.13.0)'
|
||||
cand --user-agent 'Sets the User-Agent (default: feroxbuster/2.13.0)'
|
||||
cand -a 'Sets the User-Agent (default: feroxbuster/2.13.1)'
|
||||
cand --user-agent 'Sets the User-Agent (default: feroxbuster/2.13.1)'
|
||||
cand -x 'File extension(s) to search for (ex: -x php -x pdf js); reads values (newline-separated) from file if input starts with an @ (ex: @ext.txt)'
|
||||
cand --extensions 'File extension(s) to search for (ex: -x php -x pdf js); reads values (newline-separated) from file if input starts with an @ (ex: @ext.txt)'
|
||||
cand -m 'Which HTTP request method(s) should be sent (default: GET)'
|
||||
|
||||
@@ -6,7 +6,7 @@ complete -c feroxbuster -l data-json -d 'Set -H \'Content-Type: application/json
|
||||
complete -c feroxbuster -s p -l proxy -d 'Proxy to use for requests (ex: http(s)://host:port, socks5(h)://host:port)' -r -f
|
||||
complete -c feroxbuster -s P -l replay-proxy -d 'Send only unfiltered requests through a Replay Proxy, instead of all requests' -r -f
|
||||
complete -c feroxbuster -s R -l replay-codes -d 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)' -r
|
||||
complete -c feroxbuster -s a -l user-agent -d 'Sets the User-Agent (default: feroxbuster/2.13.0)' -r
|
||||
complete -c feroxbuster -s a -l user-agent -d 'Sets the User-Agent (default: feroxbuster/2.13.1)' -r
|
||||
complete -c feroxbuster -s x -l extensions -d 'File extension(s) to search for (ex: -x php -x pdf js); reads values (newline-separated) from file if input starts with an @ (ex: @ext.txt)' -r
|
||||
complete -c feroxbuster -s m -l methods -d 'Which HTTP request method(s) should be sent (default: GET)' -r
|
||||
complete -c feroxbuster -l data -d 'Request\'s Body; can read data from a file if input starts with an @ (ex: @post.bin)' -r
|
||||
|
||||
@@ -157,20 +157,17 @@ impl Handles {
|
||||
multiplier * num_words
|
||||
}
|
||||
|
||||
/// number of extensions plus the number of request method types plus any dynamically collected
|
||||
/// extensions
|
||||
/// estimate of HTTP requests per word = (base + static extensions + collected extensions)
|
||||
/// multiplied by the number of request methods
|
||||
pub fn expected_num_requests_multiplier(&self) -> usize {
|
||||
let mut multiplier = self.config.extensions.len().max(1);
|
||||
let methods = self.config.methods.len().max(1);
|
||||
let base_requests = 1; // the bare word (with optional slash)
|
||||
let static_extensions = self.config.extensions.len();
|
||||
let dynamic_extensions = self.num_collected_extensions();
|
||||
|
||||
if multiplier > 1 {
|
||||
// when we have more than one extension, we need to account for the fact that we'll
|
||||
// be making a request for each extension and the base word (e.g. /foo.html and /foo)
|
||||
multiplier += 1;
|
||||
}
|
||||
let total_paths = base_requests + static_extensions + dynamic_extensions;
|
||||
|
||||
multiplier *= self.config.methods.len().max(1) * self.num_collected_extensions().max(1);
|
||||
|
||||
multiplier
|
||||
total_paths * methods
|
||||
}
|
||||
|
||||
/// Helper to easily get the (locked) underlying FeroxScans object
|
||||
|
||||
@@ -600,7 +600,10 @@ impl<'a> Extractor<'a> {
|
||||
) {
|
||||
log::trace!("enter: extract_links_by_attr");
|
||||
|
||||
let selector = Selector::parse(html_tag).unwrap();
|
||||
let Some(selector) = Selector::parse(html_tag).ok() else {
|
||||
log::warn!("Failed to parse selector for tag: {html_tag}");
|
||||
return;
|
||||
};
|
||||
|
||||
let tags = html
|
||||
.select(&selector)
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
use std::collections::HashMap;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use console::style;
|
||||
use futures::future;
|
||||
use lazy_static::lazy_static;
|
||||
use scraper::{Html, Selector};
|
||||
use uuid::Uuid;
|
||||
|
||||
@@ -18,9 +19,26 @@ use crate::{
|
||||
skip_fail,
|
||||
url::FeroxUrl,
|
||||
utils::{ferox_print, fmt_err, logged_request},
|
||||
DEFAULT_METHOD,
|
||||
COMMON_FILE_EXTENSIONS, DEFAULT_BACKUP_EXTENSIONS, DEFAULT_METHOD,
|
||||
};
|
||||
|
||||
lazy_static! {
|
||||
/// Pre-built HashSet of file extensions for O(1) lookup in directory listing detection
|
||||
/// Combines COMMON_FILE_EXTENSIONS and DEFAULT_BACKUP_EXTENSIONS
|
||||
static ref FILE_EXTENSION_SET: HashSet<&'static str> = {
|
||||
let mut set = HashSet::with_capacity(
|
||||
COMMON_FILE_EXTENSIONS.len() + DEFAULT_BACKUP_EXTENSIONS.len()
|
||||
);
|
||||
for ext in COMMON_FILE_EXTENSIONS.iter() {
|
||||
set.insert(*ext);
|
||||
}
|
||||
for ext in DEFAULT_BACKUP_EXTENSIONS.iter() {
|
||||
set.insert(*ext);
|
||||
}
|
||||
set
|
||||
};
|
||||
}
|
||||
|
||||
/// enum representing the different servers that `parse_html` can detect when directory listing is
|
||||
/// enabled
|
||||
#[derive(Copy, Debug, Clone)]
|
||||
@@ -34,6 +52,9 @@ pub enum DirListingType {
|
||||
/// ASP.NET server, detected by `Directory Listing -- /`
|
||||
AspDotNet,
|
||||
|
||||
/// custom/non-standard directory listing, detected by high-signal heuristics
|
||||
Custom,
|
||||
|
||||
// /// IIS/Azure server, detected by `HOST_NAME - /` (not currently used)
|
||||
// IIS_AZURE,
|
||||
/// variant that represents the absence of directory listing
|
||||
@@ -176,16 +197,14 @@ impl HeuristicTests {
|
||||
let body = ferox_response.text();
|
||||
let html = Html::parse_document(body);
|
||||
|
||||
let dirlist_type = self.detect_directory_listing(&html);
|
||||
|
||||
if dirlist_type.is_some() {
|
||||
if let Some(dir_type) = self.detect_directory_listing(&html) {
|
||||
// folks that run things and step away/rely on logs need to be notified of directory
|
||||
// listing, since they won't see the message on the bar; bastardizing FeroxMessage
|
||||
// for ease of implementation. This could use a bit of polish at some point.
|
||||
|
||||
let msg = format!(
|
||||
"detected directory listing: {} ({:?})",
|
||||
target_url,
|
||||
dirlist_type.unwrap()
|
||||
target_url, dir_type
|
||||
);
|
||||
let ferox_msg = FeroxMessage {
|
||||
kind: "log".to_string(),
|
||||
@@ -203,7 +222,7 @@ impl HeuristicTests {
|
||||
log::info!("{msg}");
|
||||
|
||||
let result = DirListingResult {
|
||||
dir_list_type: dirlist_type,
|
||||
dir_list_type: Some(dir_type),
|
||||
response: ferox_response,
|
||||
};
|
||||
|
||||
@@ -221,10 +240,11 @@ impl HeuristicTests {
|
||||
/// - tomcat/python: `Directory Listing for /`
|
||||
/// - ASP.NET: `Directory Listing -- /`
|
||||
/// - <host> - /: iis, azure, skipping due to loose heuristic
|
||||
/// - custom: detected by combining multiple high-signal heuristics
|
||||
fn detect_directory_listing(&self, html: &Html) -> Option<DirListingType> {
|
||||
log::trace!("enter: detect_directory_listing(html body...)");
|
||||
|
||||
let title_selector = Selector::parse("title").expect("couldn't parse title selector");
|
||||
let title_selector = Selector::parse("title").ok()?;
|
||||
|
||||
for t in html.select(&title_selector) {
|
||||
let title = t.inner_html().to_lowercase();
|
||||
@@ -246,10 +266,228 @@ impl HeuristicTests {
|
||||
}
|
||||
}
|
||||
|
||||
// If no standard title-based detection, try high-signal custom heuristics
|
||||
let has_parent_link = self.has_parent_directory_link(html);
|
||||
let has_table_headers = self.has_directory_table_headers(html);
|
||||
let has_sorting_params = self.has_sorting_query_params(html);
|
||||
let has_link_density = self.has_high_link_density(html);
|
||||
|
||||
let signal_count = [
|
||||
has_parent_link,
|
||||
has_table_headers,
|
||||
has_sorting_params,
|
||||
has_link_density,
|
||||
]
|
||||
.iter()
|
||||
.filter(|&&x| x)
|
||||
.count();
|
||||
|
||||
if signal_count >= 2 {
|
||||
let mut signals = Vec::new();
|
||||
if has_parent_link {
|
||||
signals.push("parent-link");
|
||||
}
|
||||
if has_table_headers {
|
||||
signals.push("table-headers");
|
||||
}
|
||||
if has_sorting_params {
|
||||
signals.push("sorting-params");
|
||||
}
|
||||
if has_link_density {
|
||||
signals.push("link-density");
|
||||
}
|
||||
log::debug!("custom directory listing signals: [{}]", signals.join(", "));
|
||||
log::trace!("exit: detect_directory_listing -> Some(Custom)");
|
||||
return Some(DirListingType::Custom);
|
||||
}
|
||||
|
||||
log::trace!("exit: detect_directory_listing -> None");
|
||||
None
|
||||
}
|
||||
|
||||
/// check if the HTML contains a link to the parent directory
|
||||
///
|
||||
/// returns true if any anchor element has:
|
||||
/// - href equals "../" or ".."
|
||||
/// - visible text contains "parent directory", "to parent", or "up to parent"
|
||||
fn has_parent_directory_link(&self, html: &Html) -> bool {
|
||||
log::trace!("enter: has_parent_directory_link");
|
||||
|
||||
let Some(anchor_selector) = Selector::parse("a").ok() else {
|
||||
log::warn!("failed to parse anchor selector in has_parent_directory_link");
|
||||
return false;
|
||||
};
|
||||
|
||||
for anchor in html.select(&anchor_selector) {
|
||||
if let Some(href) = anchor.value().attr("href") {
|
||||
let href_lower = href.trim().to_lowercase();
|
||||
if href_lower == "../" || href_lower == ".." {
|
||||
log::trace!("exit: has_parent_directory_link -> true (href match)");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
let text = anchor.text().collect::<String>().to_lowercase();
|
||||
let text_trimmed = text.trim();
|
||||
if text_trimmed.contains("parent directory")
|
||||
|| text_trimmed.contains("to parent")
|
||||
|| text_trimmed.contains("up to parent")
|
||||
{
|
||||
log::trace!("exit: has_parent_directory_link -> true (text match)");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
log::trace!("exit: has_parent_directory_link -> false");
|
||||
false
|
||||
}
|
||||
|
||||
/// check if the HTML contains table headers typical of directory listings
|
||||
///
|
||||
/// returns true if at least two of the following header categories are present:
|
||||
/// - name headers: "file name", "filename", "name"
|
||||
/// - size headers: "size", "file size"
|
||||
/// - time headers: "date", "last modified", "modified", "last mod"
|
||||
fn has_directory_table_headers(&self, html: &Html) -> bool {
|
||||
log::trace!("enter: has_directory_table_headers");
|
||||
|
||||
let Some(th_selector) = Selector::parse("th").ok() else {
|
||||
log::warn!("failed to parse th selector in has_directory_table_headers");
|
||||
return false;
|
||||
};
|
||||
let Some(td_selector) = Selector::parse("td").ok() else {
|
||||
log::warn!("failed to parse td selector in has_directory_table_headers");
|
||||
return false;
|
||||
};
|
||||
|
||||
let mut headers = Vec::new();
|
||||
|
||||
// try <th> elements first
|
||||
for th in html.select(&th_selector) {
|
||||
let text = th.text().collect::<String>().to_lowercase();
|
||||
headers.push(text.trim().to_string());
|
||||
}
|
||||
|
||||
// fallback: if no <th> elements, try first row of <td> elements
|
||||
if headers.is_empty() {
|
||||
if let Ok(tr_selector) = Selector::parse("tr") {
|
||||
if let Some(first_row) = html.select(&tr_selector).next() {
|
||||
for td in first_row.select(&td_selector) {
|
||||
let text = td.text().collect::<String>().to_lowercase();
|
||||
headers.push(text.trim().to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut has_name = false;
|
||||
let mut has_size = false;
|
||||
let mut has_time = false;
|
||||
|
||||
for header in headers {
|
||||
if header == "name" || header.contains("file name") || header.contains("filename") {
|
||||
has_name = true;
|
||||
}
|
||||
if header.contains("size") || header.contains("file size") {
|
||||
has_size = true;
|
||||
}
|
||||
if header.contains("date")
|
||||
|| header.contains("last modified")
|
||||
|| header.contains("modified")
|
||||
|| header.contains("last mod")
|
||||
{
|
||||
has_time = true;
|
||||
}
|
||||
}
|
||||
|
||||
let category_count = [has_name, has_size, has_time]
|
||||
.iter()
|
||||
.filter(|&&x| x)
|
||||
.count();
|
||||
let result = category_count >= 2;
|
||||
|
||||
log::trace!("exit: has_directory_table_headers -> {result}");
|
||||
result
|
||||
}
|
||||
|
||||
/// check if the HTML contains sorting query parameters typical of auto-index pages
|
||||
///
|
||||
/// returns true if any anchor href contains sorting parameters like:
|
||||
/// - ?C=N (name), ?C=S (size), ?C=M (modified), ?C=D (date)
|
||||
/// - optionally combined with &O=A or &O=D (ascending/descending)
|
||||
fn has_sorting_query_params(&self, html: &Html) -> bool {
|
||||
log::trace!("enter: has_sorting_query_params");
|
||||
|
||||
let Some(anchor_selector) = Selector::parse("a").ok() else {
|
||||
log::warn!("failed to parse anchor selector in has_sorting_query_params");
|
||||
return false;
|
||||
};
|
||||
|
||||
for anchor in html.select(&anchor_selector) {
|
||||
if let Some(href) = anchor.value().attr("href") {
|
||||
let href_lower = href.to_lowercase();
|
||||
if href_lower.contains("?c=n")
|
||||
|| href_lower.contains("?c=s")
|
||||
|| href_lower.contains("?c=m")
|
||||
|| href_lower.contains("?c=d")
|
||||
{
|
||||
log::trace!("exit: has_sorting_query_params -> true");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log::trace!("exit: has_sorting_query_params -> false");
|
||||
false
|
||||
}
|
||||
|
||||
/// check if the HTML has a high density of file/directory links
|
||||
///
|
||||
/// returns true if there are at least 3 links that look like files or directories:
|
||||
/// - href ends with '/' (likely subdirectory)
|
||||
/// - href looks like a file (common extensions)
|
||||
fn has_high_link_density(&self, html: &Html) -> bool {
|
||||
log::trace!("enter: has_high_link_density");
|
||||
|
||||
const MIN_LINKS: usize = 3;
|
||||
|
||||
let Some(anchor_selector) = Selector::parse("a").ok() else {
|
||||
log::warn!("failed to parse anchor selector in has_high_link_density");
|
||||
return false;
|
||||
};
|
||||
let mut count = 0;
|
||||
|
||||
for anchor in html.select(&anchor_selector) {
|
||||
if let Some(href) = anchor.value().attr("href") {
|
||||
let href_trimmed = href.trim();
|
||||
|
||||
// skip parent directory links and fragments
|
||||
if href_trimmed == "../" || href_trimmed == ".." || href_trimmed.starts_with('#') {
|
||||
continue;
|
||||
}
|
||||
|
||||
// check if it's a directory (ends with /)
|
||||
if href_trimmed.ends_with('/') {
|
||||
count += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// check if it looks like a file - extract extension and O(1) lookup
|
||||
let href_lower = href_trimmed.to_lowercase();
|
||||
if let Some(dot_pos) = href_lower.rfind('.') {
|
||||
let extension = &href_lower[dot_pos..];
|
||||
if FILE_EXTENSION_SET.contains(extension) {
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let result = count >= MIN_LINKS;
|
||||
log::trace!("exit: has_high_link_density -> {result} (count: {count})");
|
||||
result
|
||||
}
|
||||
|
||||
/// given a target's base url, attempt to automatically detect its 404 response
|
||||
/// pattern(s), and then set filters that will exclude those patterns from future
|
||||
/// responses
|
||||
@@ -660,4 +898,210 @@ mod tests {
|
||||
let dirlist_type = heuristics.detect_directory_listing(&parsed);
|
||||
assert!(dirlist_type.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// `has_parent_directory_link` detects parent directory links by href
|
||||
fn has_parent_directory_link_detects_by_href() {
|
||||
let html = r#"<a href="../">Go up</a>"#;
|
||||
let parsed = Html::parse_document(html);
|
||||
let handles = Handles::for_testing(None, None);
|
||||
let heuristics = HeuristicTests::new(Arc::new(handles.0));
|
||||
assert!(heuristics.has_parent_directory_link(&parsed));
|
||||
|
||||
let html2 = r#"<a href="..">Go up</a>"#;
|
||||
let parsed2 = Html::parse_document(html2);
|
||||
assert!(heuristics.has_parent_directory_link(&parsed2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// `has_parent_directory_link` detects parent directory links by text
|
||||
fn has_parent_directory_link_detects_by_text() {
|
||||
let html = r#"<a href="/parent">Parent Directory</a>"#;
|
||||
let parsed = Html::parse_document(html);
|
||||
let handles = Handles::for_testing(None, None);
|
||||
let heuristics = HeuristicTests::new(Arc::new(handles.0));
|
||||
assert!(heuristics.has_parent_directory_link(&parsed));
|
||||
|
||||
let html2 = r#"<a href="/up">To Parent</a>"#;
|
||||
let parsed2 = Html::parse_document(html2);
|
||||
assert!(heuristics.has_parent_directory_link(&parsed2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// `has_parent_directory_link` returns false when no parent link
|
||||
fn has_parent_directory_link_returns_false_when_absent() {
|
||||
let html = r#"<a href="/about">About</a>"#;
|
||||
let parsed = Html::parse_document(html);
|
||||
let handles = Handles::for_testing(None, None);
|
||||
let heuristics = HeuristicTests::new(Arc::new(handles.0));
|
||||
assert!(!heuristics.has_parent_directory_link(&parsed));
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// `has_directory_table_headers` detects table headers with name and size
|
||||
fn has_directory_table_headers_detects_name_and_size() {
|
||||
let html = r#"<table><thead><tr><th>File Name</th><th>Size</th></tr></thead></table>"#;
|
||||
let parsed = Html::parse_document(html);
|
||||
let handles = Handles::for_testing(None, None);
|
||||
let heuristics = HeuristicTests::new(Arc::new(handles.0));
|
||||
assert!(heuristics.has_directory_table_headers(&parsed));
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// `has_directory_table_headers` detects table headers with name and date
|
||||
fn has_directory_table_headers_detects_name_and_date() {
|
||||
let html = r#"<table><thead><tr><th>Name</th><th>Last Modified</th></tr></thead></table>"#;
|
||||
let parsed = Html::parse_document(html);
|
||||
let handles = Handles::for_testing(None, None);
|
||||
let heuristics = HeuristicTests::new(Arc::new(handles.0));
|
||||
assert!(heuristics.has_directory_table_headers(&parsed));
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// `has_directory_table_headers` returns false with only one category
|
||||
fn has_directory_table_headers_requires_two_categories() {
|
||||
let html = r#"<table><thead><tr><th>Name</th><th>Description</th></tr></thead></table>"#;
|
||||
let parsed = Html::parse_document(html);
|
||||
let handles = Handles::for_testing(None, None);
|
||||
let heuristics = HeuristicTests::new(Arc::new(handles.0));
|
||||
assert!(!heuristics.has_directory_table_headers(&parsed));
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// `has_sorting_query_params` detects Apache-style sorting parameters
|
||||
fn has_sorting_query_params_detects_apache_style() {
|
||||
let html = r#"<a href="?C=N&O=A">Name</a><a href="?C=S&O=D">Size</a>"#;
|
||||
let parsed = Html::parse_document(html);
|
||||
let handles = Handles::for_testing(None, None);
|
||||
let heuristics = HeuristicTests::new(Arc::new(handles.0));
|
||||
assert!(heuristics.has_sorting_query_params(&parsed));
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// `has_sorting_query_params` returns false when no sorting params
|
||||
fn has_sorting_query_params_returns_false_when_absent() {
|
||||
let html = r#"<a href="/page?q=search">Search</a>"#;
|
||||
let parsed = Html::parse_document(html);
|
||||
let handles = Handles::for_testing(None, None);
|
||||
let heuristics = HeuristicTests::new(Arc::new(handles.0));
|
||||
assert!(!heuristics.has_sorting_query_params(&parsed));
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// `has_high_link_density` detects high density of file/directory links
|
||||
fn has_high_link_density_detects_files_and_dirs() {
|
||||
let html = r#"
|
||||
<a href="backup/">backup/</a>
|
||||
<a href="file1.html">file1.html</a>
|
||||
<a href="file2.txt">file2.txt</a>
|
||||
"#;
|
||||
let parsed = Html::parse_document(html);
|
||||
let handles = Handles::for_testing(None, None);
|
||||
let heuristics = HeuristicTests::new(Arc::new(handles.0));
|
||||
assert!(heuristics.has_high_link_density(&parsed));
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// `has_high_link_density` requires at least 3 links
|
||||
fn has_high_link_density_requires_minimum_links() {
|
||||
let html = r#"
|
||||
<a href="backup/">backup/</a>
|
||||
<a href="file.html">file.html</a>
|
||||
"#;
|
||||
let parsed = Html::parse_document(html);
|
||||
let handles = Handles::for_testing(None, None);
|
||||
let heuristics = HeuristicTests::new(Arc::new(handles.0));
|
||||
assert!(!heuristics.has_high_link_density(&parsed));
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// `has_high_link_density` ignores parent directory links
|
||||
fn has_high_link_density_ignores_parent_links() {
|
||||
let html = r#"
|
||||
<a href="../">Parent</a>
|
||||
<a href="backup/">backup/</a>
|
||||
<a href="file.html">file.html</a>
|
||||
"#;
|
||||
let parsed = Html::parse_document(html);
|
||||
let handles = Handles::for_testing(None, None);
|
||||
let heuristics = HeuristicTests::new(Arc::new(handles.0));
|
||||
assert!(!heuristics.has_high_link_density(&parsed));
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// `detect_directory_listing` detects custom listing with 2+ signals
|
||||
fn detect_directory_listing_detects_custom_with_multiple_signals() {
|
||||
// This HTML has parent link, table headers, sorting params, and link density
|
||||
let html = r#"
|
||||
<table><thead><tr>
|
||||
<th><a href="?C=N&O=A">File Name</a></th>
|
||||
<th><a href="?C=S&O=A">Size</a></th>
|
||||
</tr></thead>
|
||||
<tbody>
|
||||
<tr><td><a href="../">Parent directory/</a></td></tr>
|
||||
<tr><td><a href="backup/">backup/</a></td></tr>
|
||||
<tr><td><a href="pass.html">pass.html</a></td></tr>
|
||||
</tbody></table>
|
||||
"#;
|
||||
let parsed = Html::parse_document(html);
|
||||
let handles = Handles::for_testing(None, None);
|
||||
let heuristics = HeuristicTests::new(Arc::new(handles.0));
|
||||
let dirlist_type = heuristics.detect_directory_listing(&parsed);
|
||||
assert!(matches!(dirlist_type, Some(DirListingType::Custom)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// `detect_directory_listing` requires at least 2 signals for custom detection
|
||||
fn detect_directory_listing_requires_two_signals() {
|
||||
// This HTML has only parent link (1 signal)
|
||||
let html = r#"<a href="../">Parent directory/</a>"#;
|
||||
let parsed = Html::parse_document(html);
|
||||
let handles = Handles::for_testing(None, None);
|
||||
let heuristics = HeuristicTests::new(Arc::new(handles.0));
|
||||
let dirlist_type = heuristics.detect_directory_listing(&parsed);
|
||||
assert!(dirlist_type.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// `detect_directory_listing` detects Root-Me sample page as custom
|
||||
fn detect_directory_listing_detects_rootme_sample() {
|
||||
// Simplified version of response.html from Root-Me
|
||||
let html = r#"
|
||||
<table id="list">
|
||||
<thead><tr>
|
||||
<th><a href="?C=N&O=A">File Name</a></th>
|
||||
<th><a href="?C=S&O=A">File Size</a></th>
|
||||
<th><a href="?C=M&O=A">Date</a></th>
|
||||
</tr></thead>
|
||||
<tbody>
|
||||
<tr><td><a href="../">Parent directory/</a></td><td>-</td><td>-</td></tr>
|
||||
<tr><td><a href="backup/">backup/</a></td><td>-</td><td>2021-Dec-10</td></tr>
|
||||
<tr><td><a href="pass.html">pass.html</a></td><td>346 B</td><td>2021-Dec-10</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
"#;
|
||||
let parsed = Html::parse_document(html);
|
||||
let handles = Handles::for_testing(None, None);
|
||||
let heuristics = HeuristicTests::new(Arc::new(handles.0));
|
||||
let dirlist_type = heuristics.detect_directory_listing(&parsed);
|
||||
assert!(matches!(dirlist_type, Some(DirListingType::Custom)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// `detect_directory_listing` does not trigger on pages with many random links
|
||||
fn detect_directory_listing_ignores_generic_pages() {
|
||||
let html = r#"
|
||||
<nav>
|
||||
<a href="/about">About</a>
|
||||
<a href="/contact">Contact</a>
|
||||
<a href="/services">Services</a>
|
||||
<a href="/products">Products</a>
|
||||
</nav>
|
||||
"#;
|
||||
let parsed = Html::parse_document(html);
|
||||
let handles = Handles::for_testing(None, None);
|
||||
let heuristics = HeuristicTests::new(Arc::new(handles.0));
|
||||
let dirlist_type = heuristics.detect_directory_listing(&parsed);
|
||||
assert!(dirlist_type.is_none());
|
||||
}
|
||||
}
|
||||
|
||||
169
src/lib.rs
169
src/lib.rs
@@ -63,6 +63,175 @@ pub(crate) const DEFAULT_IGNORED_EXTENSIONS: [&str; 38] = [
|
||||
/// Default set of extensions to search for when auto-collecting backups during scans
|
||||
pub(crate) const DEFAULT_BACKUP_EXTENSIONS: [&str; 5] = ["~", ".bak", ".bak2", ".old", ".1"];
|
||||
|
||||
/// list of common file extensions for link density detection in directory listings
|
||||
/// based on https://www.computerhope.com/issues/ch001789.htm
|
||||
pub(crate) const COMMON_FILE_EXTENSIONS: [&str; 154] = [
|
||||
// Web & Documents
|
||||
".html",
|
||||
".htm",
|
||||
".php",
|
||||
".asp",
|
||||
".aspx",
|
||||
".jsp",
|
||||
".jspx",
|
||||
".cgi",
|
||||
".pl",
|
||||
".py",
|
||||
".rb",
|
||||
".lua",
|
||||
".txt",
|
||||
".pdf",
|
||||
".doc",
|
||||
".docx",
|
||||
".xls",
|
||||
".xlsx",
|
||||
".ppt",
|
||||
".pptx",
|
||||
".odt",
|
||||
".ods",
|
||||
".odp",
|
||||
".rtf",
|
||||
".tex",
|
||||
".md",
|
||||
".csv",
|
||||
// Programming & Scripts
|
||||
".js",
|
||||
".mjs",
|
||||
".ts",
|
||||
".jsx",
|
||||
".tsx",
|
||||
".css",
|
||||
".scss",
|
||||
".sass",
|
||||
".less",
|
||||
".java",
|
||||
".class",
|
||||
".jar",
|
||||
".c",
|
||||
".cpp",
|
||||
".h",
|
||||
".hpp",
|
||||
".cs",
|
||||
".vb",
|
||||
".go",
|
||||
".rs",
|
||||
".swift",
|
||||
".kt",
|
||||
".scala",
|
||||
".r",
|
||||
".m",
|
||||
".mm",
|
||||
".f",
|
||||
".f90",
|
||||
".pas",
|
||||
".asm",
|
||||
".sh",
|
||||
".bash",
|
||||
".zsh",
|
||||
".fish",
|
||||
".bat",
|
||||
".cmd",
|
||||
".ps1",
|
||||
".psm1",
|
||||
// Data & Config
|
||||
".xml",
|
||||
".json",
|
||||
".yaml",
|
||||
".yml",
|
||||
".toml",
|
||||
".ini",
|
||||
".conf",
|
||||
".config",
|
||||
".cfg",
|
||||
".properties",
|
||||
".env",
|
||||
".sql",
|
||||
".db",
|
||||
".sqlite",
|
||||
".mdb",
|
||||
".accdb",
|
||||
// Archives & Compressed
|
||||
".zip",
|
||||
".rar",
|
||||
".7z",
|
||||
".tar",
|
||||
".gz",
|
||||
".bz2",
|
||||
".xz",
|
||||
".tgz",
|
||||
".tbz2",
|
||||
".cab",
|
||||
".dmg",
|
||||
".iso",
|
||||
".img",
|
||||
// Executables & Libraries
|
||||
".exe",
|
||||
".dll",
|
||||
".so",
|
||||
".dylib",
|
||||
".app",
|
||||
".deb",
|
||||
".rpm",
|
||||
".apk",
|
||||
".msi",
|
||||
// Images
|
||||
".jpg",
|
||||
".jpeg",
|
||||
".png",
|
||||
".gif",
|
||||
".bmp",
|
||||
".svg",
|
||||
".webp",
|
||||
".ico",
|
||||
".tif",
|
||||
".tiff",
|
||||
".psd",
|
||||
".ai",
|
||||
".eps",
|
||||
".raw",
|
||||
".cr2",
|
||||
".nef",
|
||||
// Audio
|
||||
".mp3",
|
||||
".wav",
|
||||
".flac",
|
||||
".aac",
|
||||
".ogg",
|
||||
".wma",
|
||||
".m4a",
|
||||
".opus",
|
||||
".aiff",
|
||||
// Video
|
||||
".mp4",
|
||||
".avi",
|
||||
".mkv",
|
||||
".mov",
|
||||
".wmv",
|
||||
".flv",
|
||||
".webm",
|
||||
".m4v",
|
||||
".mpg",
|
||||
".mpeg",
|
||||
".3gp",
|
||||
".ogv",
|
||||
// Fonts
|
||||
".ttf",
|
||||
".otf",
|
||||
".woff",
|
||||
".woff2",
|
||||
".eot",
|
||||
// Backups & Logs
|
||||
".log",
|
||||
".bak",
|
||||
".tmp",
|
||||
".temp",
|
||||
".swp",
|
||||
".swo",
|
||||
".old",
|
||||
".orig",
|
||||
".backup",
|
||||
];
|
||||
|
||||
/// Default wordlist to use when `-w|--wordlist` isn't specified and not `wordlist` isn't set
|
||||
/// in a [ferox-config.toml](constant.DEFAULT_CONFIG_NAME.html) config file.
|
||||
///
|
||||
|
||||
@@ -20,11 +20,10 @@ impl Document {
|
||||
|
||||
let processed = preprocess(text);
|
||||
|
||||
document.number_of_terms += processed.len();
|
||||
|
||||
for normalized in processed {
|
||||
if normalized.len() >= 2 {
|
||||
document.add_term(&normalized)
|
||||
document.add_term(&normalized);
|
||||
document.number_of_terms += 1;
|
||||
}
|
||||
}
|
||||
document
|
||||
|
||||
@@ -73,7 +73,11 @@ impl TfIdf {
|
||||
to_add.push(score);
|
||||
}
|
||||
|
||||
let average: f32 = to_add.iter().sum::<f32>() / to_add.len() as f32;
|
||||
let average = if to_add.is_empty() {
|
||||
0.0
|
||||
} else {
|
||||
to_add.iter().sum::<f32>() / to_add.len() as f32
|
||||
};
|
||||
|
||||
*metadata.tf_idf_score_mut() = average;
|
||||
}
|
||||
|
||||
@@ -22,6 +22,15 @@ impl Term {
|
||||
}
|
||||
|
||||
/// metadata to be associated with a `Term`
|
||||
///
|
||||
/// # Design Note
|
||||
///
|
||||
/// The `count` field represents the number of times a term appeared in a **single document**
|
||||
/// and is only meaningful in the per-document context (i.e., within a `Document`).
|
||||
///
|
||||
/// When `TermMetaData` is stored in the global `TfIdf` model, the `count` field becomes stale
|
||||
/// and is not used. Instead, the model relies on `term_frequencies` (which tracks the term
|
||||
/// frequency for each document the term appears in) and calculates TF-IDF scores from those.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub(super) struct TermMetaData {
|
||||
/// number of times the associated `Term` was seen in a single document
|
||||
|
||||
@@ -547,7 +547,6 @@ pub fn initialize() -> Command {
|
||||
.long("rate-limit")
|
||||
.value_name("RATE_LIMIT")
|
||||
.num_args(1)
|
||||
.conflicts_with("auto_tune")
|
||||
.help_heading("Scan settings")
|
||||
.help("Limit number of requests per second (per directory) (default: 0, i.e. no limit)")
|
||||
)
|
||||
|
||||
@@ -86,7 +86,7 @@ pub struct FeroxScan {
|
||||
pub(super) errors: AtomicUsize,
|
||||
|
||||
/// tracker for the time at which this scan was started
|
||||
pub(super) start_time: Instant,
|
||||
pub(super) start_time: Mutex<Instant>,
|
||||
|
||||
/// whether the progress bar is currently visible or hidden
|
||||
pub(super) visible: AtomicBool,
|
||||
@@ -117,7 +117,7 @@ impl Default for FeroxScan {
|
||||
errors: Default::default(),
|
||||
status_429s: Default::default(),
|
||||
status_403s: Default::default(),
|
||||
start_time: Instant::now(),
|
||||
start_time: Mutex::new(Instant::now()),
|
||||
visible: AtomicBool::new(true),
|
||||
}
|
||||
}
|
||||
@@ -210,6 +210,14 @@ impl FeroxScan {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// small wrapper to set `start_time`
|
||||
pub fn set_start_time(&self, start_time: Instant) -> Result<()> {
|
||||
if let Ok(mut guard) = self.start_time.lock() {
|
||||
let _ = std::mem::replace(&mut *guard, start_time);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Simple helper to call .finish on the scan's progress bar
|
||||
pub(super) fn stop_progress_bar(&self, active_bars: usize) {
|
||||
if let Ok(guard) = self.progress_bar.lock() {
|
||||
@@ -428,9 +436,24 @@ impl FeroxScan {
|
||||
}
|
||||
|
||||
let reqs = self.requests();
|
||||
let seconds = self.start_time.elapsed().as_secs();
|
||||
let seconds = if let Ok(guard) = self.start_time.lock() {
|
||||
guard.elapsed().as_secs_f64()
|
||||
} else {
|
||||
log::warn!("Could not acquire lock to read start_time for requests_per_second calculation on scan: {self:?}");
|
||||
0.0
|
||||
};
|
||||
|
||||
reqs.checked_div(seconds).unwrap_or(0)
|
||||
if seconds == 0.0 || !seconds.is_finite() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
let rate = reqs as f64 / seconds;
|
||||
|
||||
if rate > u64::MAX as f64 {
|
||||
u64::MAX
|
||||
} else {
|
||||
rate as u64
|
||||
}
|
||||
}
|
||||
|
||||
/// return the number of requests performed by this scan's scanner
|
||||
@@ -646,11 +669,11 @@ mod tests {
|
||||
status: Mutex::new(ScanStatus::Running),
|
||||
task: Default::default(),
|
||||
progress_bar: Mutex::new(None),
|
||||
output_level: Default::default(),
|
||||
output_level: OutputLevel::Silent,
|
||||
status_403s: Default::default(),
|
||||
status_429s: Default::default(),
|
||||
errors: Default::default(),
|
||||
start_time: Instant::now(),
|
||||
start_time: Mutex::new(Instant::now()),
|
||||
handles: None,
|
||||
};
|
||||
|
||||
@@ -661,7 +684,13 @@ mod tests {
|
||||
|
||||
let req_sec = scan.requests_per_second();
|
||||
|
||||
assert_eq!(req_sec, 100);
|
||||
// allow for timing imprecision: sleep overhead makes elapsed time slightly > 1 second
|
||||
// e.g., 100 reqs / 1.01s = 99 req/s
|
||||
assert!(
|
||||
(99..=101).contains(&req_sec),
|
||||
"Expected ~100 req/s, got {}",
|
||||
req_sec
|
||||
);
|
||||
|
||||
scan.finish(0).unwrap();
|
||||
assert_eq!(scan.requests_per_second(), 0);
|
||||
|
||||
@@ -617,7 +617,7 @@ fn feroxscan_display() {
|
||||
num_requests: 0,
|
||||
requests_made_so_far: 0,
|
||||
visible: AtomicBool::new(true),
|
||||
start_time: Instant::now(),
|
||||
start_time: std::sync::Mutex::new(Instant::now()),
|
||||
output_level: OutputLevel::Default,
|
||||
status_403s: Default::default(),
|
||||
status_429s: Default::default(),
|
||||
@@ -663,7 +663,7 @@ async fn ferox_scan_abort() {
|
||||
scan_type: Default::default(),
|
||||
num_requests: 0,
|
||||
requests_made_so_far: 0,
|
||||
start_time: Instant::now(),
|
||||
start_time: std::sync::Mutex::new(Instant::now()),
|
||||
output_level: OutputLevel::Default,
|
||||
visible: AtomicBool::new(true),
|
||||
status_403s: Default::default(),
|
||||
|
||||
@@ -256,6 +256,7 @@ impl FeroxScanner {
|
||||
ferox_scan.set_status(ScanStatus::Waiting)?;
|
||||
let _permit = self.scan_limiter.acquire().await;
|
||||
ferox_scan.set_status(ScanStatus::Running)?;
|
||||
ferox_scan.set_start_time(Instant::now())?;
|
||||
|
||||
if self.handles.config.scan_limit > 0 {
|
||||
scan_timer = Instant::now();
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::cmp::max;
|
||||
use std::fmt::{Debug, Formatter, Result};
|
||||
|
||||
/// bespoke variation on an array-backed max-heap
|
||||
@@ -51,7 +52,18 @@ impl LimitHeap {
|
||||
pub(super) fn move_right(&mut self) -> usize {
|
||||
if self.has_children() {
|
||||
let tmp = self.current;
|
||||
self.current = self.current * 2 + 2;
|
||||
let new_index = self.current * 2 + 2;
|
||||
|
||||
// bounds check to prevent overflow
|
||||
if new_index < self.inner.len() {
|
||||
self.current = new_index;
|
||||
} else {
|
||||
log::warn!(
|
||||
"Heap navigation out of bounds: move_right from {} would go to {}",
|
||||
tmp,
|
||||
new_index
|
||||
);
|
||||
}
|
||||
return tmp;
|
||||
}
|
||||
self.current
|
||||
@@ -61,7 +73,18 @@ impl LimitHeap {
|
||||
pub(super) fn move_left(&mut self) -> usize {
|
||||
if self.has_children() {
|
||||
let tmp = self.current;
|
||||
self.current = self.current * 2 + 1;
|
||||
let new_index = self.current * 2 + 1;
|
||||
|
||||
// Bounds check to prevent overflow
|
||||
if new_index < self.inner.len() {
|
||||
self.current = new_index;
|
||||
} else {
|
||||
log::warn!(
|
||||
"Heap navigation out of bounds: move_left from {} would go to {}",
|
||||
tmp,
|
||||
new_index
|
||||
);
|
||||
}
|
||||
return tmp;
|
||||
}
|
||||
self.current
|
||||
@@ -79,17 +102,42 @@ impl LimitHeap {
|
||||
|
||||
/// move directly to the given index
|
||||
pub(super) fn move_to(&mut self, index: usize) {
|
||||
self.current = index;
|
||||
if index < self.inner.len() {
|
||||
self.current = index;
|
||||
} else {
|
||||
log::warn!(
|
||||
"Heap navigation out of bounds: move_to({}) exceeds array length {}",
|
||||
index,
|
||||
self.inner.len()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// get the current node's value
|
||||
pub(super) fn value(&self) -> i32 {
|
||||
self.inner[self.current]
|
||||
if self.current < self.inner.len() {
|
||||
self.inner[self.current]
|
||||
} else {
|
||||
log::error!(
|
||||
"Heap index out of bounds in value(): current={}, len={}",
|
||||
self.current,
|
||||
self.inner.len()
|
||||
);
|
||||
0 // Return safe default
|
||||
}
|
||||
}
|
||||
|
||||
/// set the current node's value
|
||||
pub(super) fn set_value(&mut self, value: i32) {
|
||||
self.inner[self.current] = value;
|
||||
if self.current < self.inner.len() {
|
||||
self.inner[self.current] = value;
|
||||
} else {
|
||||
log::error!(
|
||||
"Heap index out of bounds in set_value(): current={}, len={}",
|
||||
self.current,
|
||||
self.inner.len()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// check that this node has a parent (true for all except root)
|
||||
@@ -144,17 +192,35 @@ impl LimitHeap {
|
||||
self.move_up();
|
||||
}
|
||||
|
||||
/// clamp all heap values to a maximum limit
|
||||
///
|
||||
/// this is used when --rate-limit is set alongside --auto-tune to ensure
|
||||
/// that no auto-tuning adjustment can exceed the user's specified rate limit.
|
||||
/// only clamps non-zero values to preserve the "unset" marker (0) used during
|
||||
/// heap construction.
|
||||
pub(super) fn clamp_to_max(&mut self, max: i32) {
|
||||
for i in 0..self.inner.len() {
|
||||
if self.inner[i] > 0 && self.inner[i] > max {
|
||||
self.inner[i] = max;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// iterate over the backing array, filling in each child's value based on the original value
|
||||
pub(super) fn build(&mut self) {
|
||||
// ex: original is 400
|
||||
// arr[0] == 200
|
||||
// arr[1] (left child) == 300
|
||||
// arr[2] (right child) == 100
|
||||
let root = self.original / 2;
|
||||
|
||||
// safety: ensure original is at least 2 so root = original/2 >= 1
|
||||
// this prevents heap from producing limit=0 which would panic in rate limiter
|
||||
let original = max(self.original, 2);
|
||||
let root = original / 2;
|
||||
|
||||
self.inner[0] = root; // set root node to half of the original value
|
||||
self.inner[1] = ((self.original - root).abs() / 2) + root;
|
||||
self.inner[2] = root - ((self.original - root).abs() / 2);
|
||||
self.inner[1] = ((original - root).abs() / 2) + root;
|
||||
self.inner[2] = root - ((original - root).abs() / 2);
|
||||
|
||||
// start with index 1 and fill in each child below that node
|
||||
for i in 1..self.inner.len() {
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||
|
||||
use crate::{atomic_load, atomic_store, config::RequesterPolicy};
|
||||
|
||||
use super::limit_heap::LimitHeap;
|
||||
use super::{limit_heap::LimitHeap, PolicyTrigger};
|
||||
|
||||
/// data regarding policy and metadata about last enforced trigger etc...
|
||||
#[derive(Default, Debug)]
|
||||
@@ -19,8 +19,11 @@ pub struct PolicyData {
|
||||
/// rate limit (at last interval)
|
||||
limit: AtomicUsize,
|
||||
|
||||
/// whether the heap has been initialized
|
||||
pub(super) heap_initialized: AtomicBool,
|
||||
|
||||
/// number of errors (at last interval)
|
||||
pub(super) errors: AtomicUsize,
|
||||
pub(super) errors: [AtomicUsize; 3],
|
||||
|
||||
/// whether or not the owning Requester should remove the rate_limiter, happens when a scan
|
||||
/// has been limited and moves back up to the point of its original scan speed
|
||||
@@ -28,6 +31,11 @@ pub struct PolicyData {
|
||||
|
||||
/// heap of values used for adjusting # of requests/second
|
||||
pub(super) heap: std::sync::RwLock<LimitHeap>,
|
||||
|
||||
/// maximum limit for requests per second; optionally set by --rate-limit
|
||||
/// if not set, the maximum limit during auto-tuning is unbounded and determined
|
||||
/// dynamically based on the observed request rate
|
||||
pub(super) rate_limit: Option<usize>,
|
||||
}
|
||||
|
||||
/// implementation of PolicyData
|
||||
@@ -35,7 +43,10 @@ impl PolicyData {
|
||||
/// given a RequesterPolicy, create a new PolicyData
|
||||
pub fn new(policy: RequesterPolicy, timeout: u64) -> Self {
|
||||
// can use this as a tweak for how aggressively adjustments should be made when tuning
|
||||
// cap at 30 seconds to prevent unbounded waits (e.g., with timeout=100000)
|
||||
const MAX_WAIT_TIME_MS: u64 = 30_000;
|
||||
let wait_time = ((timeout as f64 / 2.0) * 1000.0) as u64;
|
||||
let wait_time = wait_time.min(MAX_WAIT_TIME_MS);
|
||||
|
||||
Self {
|
||||
policy,
|
||||
@@ -44,18 +55,62 @@ impl PolicyData {
|
||||
}
|
||||
}
|
||||
|
||||
/// builder for rate limit
|
||||
///
|
||||
/// builder method chosen to not conflict with existing `new` api
|
||||
pub fn with_rate_limit(mut self, rate_limit: usize) -> Self {
|
||||
self.rate_limit = Some(rate_limit);
|
||||
self
|
||||
}
|
||||
|
||||
/// setter for requests / second; populates the underlying heap with values from req/sec seed
|
||||
pub(super) fn set_reqs_sec(&self, reqs_sec: usize) {
|
||||
if let Ok(mut guard) = self.heap.write() {
|
||||
guard.original = reqs_sec as i32;
|
||||
guard.build();
|
||||
|
||||
if let Some(cap) = self.rate_limit {
|
||||
// if a rate limit was set, clamp the heap to that maximum
|
||||
// this method is only called from tune, which implies that auto-tune is enabled
|
||||
guard.clamp_to_max(cap as i32);
|
||||
}
|
||||
|
||||
self.set_limit(guard.inner[0] as usize); // set limit to 1/2 of current request rate
|
||||
self.heap_initialized.store(true, Ordering::Release);
|
||||
} else {
|
||||
log::warn!("Could not acquire heap write lock in set_reqs_sec; heap not initialized");
|
||||
}
|
||||
}
|
||||
|
||||
/// setter for errors
|
||||
pub(super) fn set_errors(&self, errors: usize) {
|
||||
atomic_store!(self.errors, errors);
|
||||
/// setter for errors (trigger-specific)
|
||||
pub(super) fn set_errors(&self, trigger: PolicyTrigger, errors: usize) {
|
||||
if trigger == PolicyTrigger::TryAdjustUp {
|
||||
return;
|
||||
}
|
||||
atomic_store!(self.errors[trigger.as_index()], errors);
|
||||
}
|
||||
|
||||
/// getter for errors (trigger-specific)
|
||||
pub(super) fn get_errors(&self, trigger: PolicyTrigger) -> usize {
|
||||
if trigger == PolicyTrigger::TryAdjustUp {
|
||||
return 0;
|
||||
}
|
||||
atomic_load!(self.errors[trigger.as_index()])
|
||||
}
|
||||
|
||||
/// status of heap initialization
|
||||
pub(super) fn heap_initialized(&self) -> bool {
|
||||
atomic_load!(self.heap_initialized, Ordering::Acquire)
|
||||
}
|
||||
|
||||
/// reset the heap and initialization flag, called when auto-tune is being disabled
|
||||
pub(super) fn reset_heap(&self) {
|
||||
if let Ok(mut guard) = self.heap.write() {
|
||||
*guard = LimitHeap::default();
|
||||
self.heap_initialized.store(false, Ordering::Release);
|
||||
} else {
|
||||
log::warn!("Could not acquire heap write lock in reset_heap");
|
||||
}
|
||||
}
|
||||
|
||||
/// setter for limit
|
||||
@@ -106,6 +161,8 @@ impl PolicyData {
|
||||
atomic_store!(self.remove_limit, true);
|
||||
}
|
||||
self.set_limit(heap.value() as usize);
|
||||
} else {
|
||||
log::debug!("Could not acquire heap write lock in adjust_up; rate limit unchanged");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -116,6 +173,8 @@ impl PolicyData {
|
||||
heap.move_right();
|
||||
self.set_limit(heap.value() as usize);
|
||||
}
|
||||
} else {
|
||||
log::debug!("Could not acquire heap write lock in adjust_down; rate limit unchanged");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -142,8 +201,12 @@ mod tests {
|
||||
/// PolicyData setters/getters tests for code coverage / sanity
|
||||
fn policy_data_getters_and_setters() {
|
||||
let pd = PolicyData::new(RequesterPolicy::AutoBail, 7);
|
||||
pd.set_errors(20);
|
||||
assert_eq!(pd.errors.load(Ordering::Relaxed), 20);
|
||||
pd.set_errors(PolicyTrigger::Errors, 20);
|
||||
assert_eq!(pd.get_errors(PolicyTrigger::Errors), 20);
|
||||
pd.set_errors(PolicyTrigger::Status403, 15);
|
||||
assert_eq!(pd.get_errors(PolicyTrigger::Status403), 15);
|
||||
pd.set_errors(PolicyTrigger::Status429, 10);
|
||||
assert_eq!(pd.get_errors(PolicyTrigger::Status429), 10);
|
||||
pd.set_limit(200);
|
||||
assert_eq!(pd.get_limit(), 200);
|
||||
}
|
||||
|
||||
@@ -80,17 +80,18 @@ impl Requester {
|
||||
pub fn from(scanner: &FeroxScanner, ferox_scan: Arc<FeroxScan>) -> Result<Self> {
|
||||
let limit = scanner.handles.config.rate_limit;
|
||||
|
||||
let mut policy_data = PolicyData::new(
|
||||
scanner.handles.config.requester_policy,
|
||||
scanner.handles.config.timeout,
|
||||
);
|
||||
|
||||
let rate_limiter = if limit > 0 {
|
||||
policy_data = policy_data.with_rate_limit(limit);
|
||||
Some(Self::build_a_bucket(limit)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let policy_data = PolicyData::new(
|
||||
scanner.handles.config.requester_policy,
|
||||
scanner.handles.config.timeout,
|
||||
);
|
||||
|
||||
Ok(Self {
|
||||
ferox_scan,
|
||||
policy_data,
|
||||
@@ -105,39 +106,41 @@ impl Requester {
|
||||
|
||||
/// build a RateLimiter, given a rate limit (as requests per second)
|
||||
fn build_a_bucket(limit: usize) -> Result<RateLimiter> {
|
||||
let refill = max((limit as f64 / 10.0).round() as usize, 1); // minimum of 1 per second
|
||||
// safety: ensure limit is at least 1 to prevent panic from .initial > .max
|
||||
let limit = max(limit, 1);
|
||||
|
||||
// For accurate rate limiting across all integer values (including low rates like 1-14 req/s),
|
||||
// we use a 1-second interval and refill with exactly `limit` tokens per interval.
|
||||
// This ensures refill/interval == limit for any value, avoiding the previous bug where
|
||||
// limits <15 collapsed to 1 req/s due to rounding.
|
||||
let refill = limit;
|
||||
let tokens = max((limit as f64 / 2.0).round() as usize, 1);
|
||||
let interval = if refill == 1 { 1000 } else { 100 }; // 1 second if refill is 1
|
||||
let interval = 1000; // 1 second interval for all rates
|
||||
|
||||
Ok(RateLimiter::builder()
|
||||
.interval(Duration::from_millis(interval)) // add tokens every 0.1s
|
||||
.refill(refill) // ex: 100 req/s -> 10 tokens per 0.1s
|
||||
.initial(tokens) // reduce initial burst, 2 is arbitrary, but felt good
|
||||
.interval(Duration::from_millis(interval))
|
||||
.refill(refill)
|
||||
.initial(tokens) // start with half capacity to reduce initial burst
|
||||
.max(limit)
|
||||
.build())
|
||||
}
|
||||
|
||||
/// sleep and set a flag that can be checked by other threads
|
||||
async fn cool_down(&self) {
|
||||
if atomic_load!(self.policy_data.cooling_down, Ordering::SeqCst) {
|
||||
// prevents a few racy threads making it in here and doubling the wait time erroneously
|
||||
return;
|
||||
}
|
||||
|
||||
atomic_store!(self.policy_data.cooling_down, true, Ordering::SeqCst);
|
||||
|
||||
// should_enforce_policy=>tune call chain has already acquired cooling_down flag
|
||||
// just need to sleep and reset
|
||||
sleep(Duration::from_millis(self.policy_data.wait_time)).await;
|
||||
self.ferox_scan.progress_bar().set_message("");
|
||||
|
||||
atomic_store!(self.policy_data.cooling_down, false, Ordering::SeqCst);
|
||||
atomic_store!(self.policy_data.cooling_down, false, Ordering::Release);
|
||||
}
|
||||
|
||||
/// limit the number of requests per second
|
||||
pub async fn limit(&self) -> Result<()> {
|
||||
let guard = self.rate_limiter.read().await;
|
||||
|
||||
if guard.is_some() {
|
||||
guard.as_ref().unwrap().acquire_one().await;
|
||||
if let Some(limiter) = guard.as_ref() {
|
||||
limiter.acquire_one().await;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -174,16 +177,26 @@ impl Requester {
|
||||
/// - 90% of requests are 403
|
||||
/// - 30% of requests are 429
|
||||
fn should_enforce_policy(&self) -> Option<PolicyTrigger> {
|
||||
if atomic_load!(self.policy_data.cooling_down, Ordering::SeqCst) {
|
||||
// prevents a few racy threads making it in here and doubling the wait time erroneously
|
||||
// use compare_exchange to ensure only one thread can proceed with policy enforcement
|
||||
// this prevents multiple threads from simultaneously deciding to enforce policy
|
||||
// AcqRel provides necessary synchronization
|
||||
if self
|
||||
.policy_data
|
||||
.cooling_down
|
||||
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire)
|
||||
.is_err()
|
||||
{
|
||||
// Another thread is already enforcing policy or cooling down
|
||||
return None;
|
||||
}
|
||||
|
||||
let requests = atomic_load!(self.handles.stats.data.requests);
|
||||
let requests = self.ferox_scan.requests() as usize;
|
||||
|
||||
if requests < max(self.handles.config.threads, 50) {
|
||||
// check whether at least a full round of threads has made requests or 50 (default # of
|
||||
// threads), whichever is higher
|
||||
// check whether at least a full round of threads has made requests for this specific
|
||||
// scan (not globally), or 50 (default # of threads), whichever is higher
|
||||
// need to reset the flag since we're not actually enforcing
|
||||
atomic_store!(self.policy_data.cooling_down, false, Ordering::Release);
|
||||
return None;
|
||||
}
|
||||
|
||||
@@ -199,48 +212,98 @@ impl Requester {
|
||||
return Some(PolicyTrigger::Status429);
|
||||
}
|
||||
|
||||
// No policy trigger found, reset the flag
|
||||
atomic_store!(self.policy_data.cooling_down, false, Ordering::Release);
|
||||
None
|
||||
}
|
||||
|
||||
/// wrapper for adjust_[up,down] functions, checks error levels to determine adjustment direction
|
||||
async fn adjust_limit(&self, trigger: PolicyTrigger, create_limiter: bool) -> Result<()> {
|
||||
let scan_errors = self.ferox_scan.num_errors(trigger);
|
||||
let policy_errors = atomic_load!(self.policy_data.errors, Ordering::SeqCst);
|
||||
let policy_errors = self.policy_data.get_errors(trigger);
|
||||
|
||||
// track if we need to update the progress bar message outside the lock
|
||||
let pb_message: Option<String>;
|
||||
|
||||
// Scope the lock so it's dropped before any async operations
|
||||
{
|
||||
// Use blocking lock instead of try_lock to avoid spurious warnings and ensure
|
||||
// adjustments are properly serialized
|
||||
let mut guard = match self.tuning_lock.lock() {
|
||||
Ok(g) => g,
|
||||
Err(e) => {
|
||||
log::error!("tuning_lock poisoned in adjust_limit: {}", e);
|
||||
return Ok(()); // Skip this adjustment
|
||||
}
|
||||
};
|
||||
|
||||
if let Ok(mut guard) = self.tuning_lock.try_lock() {
|
||||
if scan_errors > policy_errors {
|
||||
// errors have increased, need to reduce the requests/sec limit
|
||||
*guard = 0; // reset streak counter to 0
|
||||
if atomic_load!(self.policy_data.errors) != 0 {
|
||||
if policy_errors != 0 {
|
||||
self.policy_data.adjust_down();
|
||||
|
||||
log::info!(
|
||||
"auto-tune: errors increased; reducing speed to {} reqs/sec for {}",
|
||||
self.policy_data.get_limit(),
|
||||
self.target_url
|
||||
);
|
||||
|
||||
let styled_direction = style("reduced").red();
|
||||
|
||||
self.ferox_scan
|
||||
.progress_bar()
|
||||
.set_message(format!("=> 🚦 {styled_direction} scan speed",));
|
||||
pb_message = Some(format!(
|
||||
"=> 🚦 {styled_direction} scan speed ({}/s)",
|
||||
self.policy_data.get_limit()
|
||||
));
|
||||
} else {
|
||||
pb_message = None;
|
||||
}
|
||||
self.policy_data.set_errors(scan_errors);
|
||||
self.policy_data.set_errors(trigger, scan_errors);
|
||||
} else {
|
||||
// errors can only be incremented, so an else is sufficient
|
||||
*guard += 1;
|
||||
|
||||
self.policy_data.adjust_up(&guard);
|
||||
|
||||
log::info!(
|
||||
"auto-tune: errors decreased; increasing speed to {} reqs/sec for {}",
|
||||
self.policy_data.get_limit(),
|
||||
self.target_url
|
||||
);
|
||||
|
||||
let styled_direction = style("increased").green();
|
||||
|
||||
self.ferox_scan
|
||||
.progress_bar()
|
||||
.set_message(format!("=> 🚦 {styled_direction} scan speed",));
|
||||
pb_message = Some(format!(
|
||||
"=> 🚦 {styled_direction} scan speed ({}/s)",
|
||||
self.policy_data.get_limit()
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// update progress bar while still holding the lock to prevent races
|
||||
if let Some(ref msg) = pb_message {
|
||||
self.ferox_scan.progress_bar().set_message(msg.clone());
|
||||
}
|
||||
} // guard is dropped here automatically
|
||||
|
||||
if atomic_load!(self.policy_data.remove_limit) {
|
||||
self.set_rate_limiter(None).await?;
|
||||
if let Some(rate_limit) = self.policy_data.rate_limit {
|
||||
self.set_rate_limiter(Some(rate_limit)).await?;
|
||||
} else {
|
||||
self.set_rate_limiter(None).await?;
|
||||
}
|
||||
|
||||
atomic_store!(self.policy_data.remove_limit, false);
|
||||
self.ferox_scan
|
||||
.progress_bar()
|
||||
.set_message("=> 🚦 removed rate limiter 🚀");
|
||||
|
||||
// reset the auto-tune state machine so it can be re-triggered if needed
|
||||
atomic_store!(self.policy_triggered, false, Ordering::Release);
|
||||
self.policy_data.reset_heap();
|
||||
|
||||
// acquire lock just for the progress bar update to prevent races
|
||||
if let Ok(_guard) = self.tuning_lock.try_lock() {
|
||||
self.ferox_scan
|
||||
.progress_bar()
|
||||
.set_message("=> 🚦 removed rate limiter 🚀");
|
||||
}
|
||||
} else if create_limiter {
|
||||
// create_limiter is really just used for unit testing situations, it's true anytime
|
||||
// during actual execution
|
||||
@@ -274,16 +337,48 @@ impl Requester {
|
||||
|
||||
/// enforce auto-tune policy
|
||||
async fn tune(&self, trigger: PolicyTrigger) -> Result<()> {
|
||||
if atomic_load!(self.policy_data.errors) == 0 {
|
||||
// set original number of reqs/second the first time tune is called, skip otherwise
|
||||
if !self.policy_data.heap_initialized() {
|
||||
// keep attempting to set original number of reqs/second when tune is called
|
||||
let reqs_sec = self.ferox_scan.requests_per_second() as usize;
|
||||
self.policy_data.set_reqs_sec(reqs_sec);
|
||||
|
||||
// guard against req/sec < 2, which would create heap with root=0 and cause panic
|
||||
// when building rate limiter (.initial > .max). need at least 2 req/sec for stable
|
||||
// rate limiting (original/2 = 1, which is minimum viable limit)
|
||||
if reqs_sec < 2 {
|
||||
log::debug!("auto-tune: {} reqs/sec is too low; not initializing heap and resetting cooldown period", reqs_sec);
|
||||
|
||||
// reset heap and initialization flags since we need the should_enforce_limit->tune
|
||||
// flow to execute again
|
||||
self.policy_data.reset_heap();
|
||||
atomic_store!(self.policy_data.cooling_down, false, Ordering::Release);
|
||||
atomic_store!(self.policy_triggered, false, Ordering::Release);
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// cap the initial reqs/sec to the user-specified rate limit if it exists
|
||||
// this ensures that the heap is built in such a way that clamping occurs correctly
|
||||
let seed = if let Some(cap) = self.policy_data.rate_limit {
|
||||
reqs_sec.min(cap)
|
||||
} else {
|
||||
reqs_sec
|
||||
};
|
||||
|
||||
self.policy_data.set_reqs_sec(seed);
|
||||
|
||||
// set the flag to indicate that we have triggered the rate limiter
|
||||
// at least once
|
||||
atomic_store!(self.policy_triggered, true);
|
||||
|
||||
let new_limit = self.policy_data.get_limit();
|
||||
|
||||
log::info!(
|
||||
"auto-tune: {} reqs/sec was too fast; enforcing limit {} reqs/sec for {}",
|
||||
reqs_sec,
|
||||
new_limit,
|
||||
self.target_url
|
||||
);
|
||||
|
||||
self.set_rate_limiter(Some(new_limit)).await?;
|
||||
self.ferox_scan
|
||||
.progress_bar()
|
||||
@@ -362,7 +457,14 @@ impl Requester {
|
||||
|
||||
for url in urls {
|
||||
for method in self.handles.config.methods.iter() {
|
||||
// auto_tune is true, or rate_limit was set (mutually exclusive to user)
|
||||
// Check denylist BEFORE consuming rate limit tokens to avoid wasting permits
|
||||
// on URLs that will be skipped anyway
|
||||
if should_test_deny && should_deny_url(&url, self.handles.clone())? {
|
||||
// can't allow a denied url to be requested
|
||||
continue;
|
||||
}
|
||||
|
||||
// check if rate limiting should be applied (either via --rate-limit or auto-tune)
|
||||
// and a rate_limiter has been created
|
||||
// short-circuiting the lock access behind the first boolean check
|
||||
let should_tune =
|
||||
@@ -377,11 +479,6 @@ impl Requester {
|
||||
}
|
||||
}
|
||||
|
||||
if should_test_deny && should_deny_url(&url, self.handles.clone())? {
|
||||
// can't allow a denied url to be requested
|
||||
continue;
|
||||
}
|
||||
|
||||
let data = if self.handles.config.data.is_empty() {
|
||||
None
|
||||
} else {
|
||||
@@ -392,7 +489,7 @@ impl Requester {
|
||||
logged_request(&url, method.as_str(), data, self.handles.clone()).await?;
|
||||
|
||||
if (should_tune || self.handles.config.auto_bail)
|
||||
&& !atomic_load!(self.policy_data.cooling_down, Ordering::SeqCst)
|
||||
&& !atomic_load!(self.policy_data.cooling_down, Ordering::Acquire)
|
||||
{
|
||||
// only check for policy enforcement when the trigger isn't on cooldown and tuning
|
||||
// or bailing is in place (should_tune used here because when auto-tune is on, we'll
|
||||
@@ -400,15 +497,46 @@ impl Requester {
|
||||
match self.policy_data.policy {
|
||||
RequesterPolicy::AutoTune => {
|
||||
if let Some(trigger) = self.should_enforce_policy() {
|
||||
self.tune(trigger).await?;
|
||||
if let Err(e) = self.tune(trigger).await {
|
||||
// reset cooling_down flag on error to prevent permanent lockout
|
||||
atomic_store!(
|
||||
self.policy_data.cooling_down,
|
||||
false,
|
||||
Ordering::Release
|
||||
);
|
||||
atomic_store!(self.policy_triggered, false, Ordering::Release);
|
||||
return Err(e);
|
||||
}
|
||||
} else if atomic_load!(self.policy_triggered) {
|
||||
self.adjust_limit(PolicyTrigger::TryAdjustUp, true).await?;
|
||||
self.cool_down().await;
|
||||
// Use compare_exchange to ensure only one thread attempts upward adjustment
|
||||
// at a time, preventing races and duplicate adjustments
|
||||
if self
|
||||
.policy_data
|
||||
.cooling_down
|
||||
.compare_exchange(
|
||||
false,
|
||||
true,
|
||||
Ordering::AcqRel,
|
||||
Ordering::Acquire,
|
||||
)
|
||||
.is_ok()
|
||||
{
|
||||
self.adjust_limit(PolicyTrigger::TryAdjustUp, true).await?;
|
||||
self.cool_down().await;
|
||||
}
|
||||
}
|
||||
}
|
||||
RequesterPolicy::AutoBail => {
|
||||
if let Some(trigger) = self.should_enforce_policy() {
|
||||
self.bail(trigger).await?;
|
||||
if let Err(e) = self.bail(trigger).await {
|
||||
// reset cooling_down flag on error to prevent permanent lockout
|
||||
atomic_store!(
|
||||
self.policy_data.cooling_down,
|
||||
false,
|
||||
Ordering::Release
|
||||
);
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
RequesterPolicy::Default => {}
|
||||
@@ -599,6 +727,8 @@ mod tests {
|
||||
for _ in 0..num_errors {
|
||||
handles.stats.send(AddError(StatError::Other)).unwrap();
|
||||
scan.add_error();
|
||||
// Also increment the progress bar to represent a request being made
|
||||
scan.progress_bar().inc(1);
|
||||
}
|
||||
|
||||
handles.stats.sync().await.unwrap();
|
||||
@@ -635,6 +765,8 @@ mod tests {
|
||||
) {
|
||||
for _ in 0..num_codes {
|
||||
handles.stats.send(AddStatus(code)).unwrap();
|
||||
// Also increment the progress bar to represent a request being made
|
||||
scan.progress_bar().inc(1);
|
||||
if code == StatusCode::FORBIDDEN {
|
||||
scan.add_403();
|
||||
} else {
|
||||
@@ -933,8 +1065,9 @@ mod tests {
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
|
||||
/// cooldown should pause execution and prevent others calling it by setting cooling_down flag
|
||||
async fn cooldown_pauses_and_sets_flag() {
|
||||
/// cooldown should pause execution for the specified wait_time
|
||||
/// note: cooling_down flag is now set by should_enforce_policy, not cool_down itself
|
||||
async fn cooldown_pauses_for_wait_time() {
|
||||
let (handles, _) = setup_requester_test(None).await;
|
||||
|
||||
let requester = Arc::new(Requester {
|
||||
@@ -949,17 +1082,14 @@ mod tests {
|
||||
});
|
||||
|
||||
let start = Instant::now();
|
||||
let clone = requester.clone();
|
||||
let resp = tokio::task::spawn(async move {
|
||||
sleep(Duration::new(1, 0)).await;
|
||||
clone.policy_data.cooling_down.load(Ordering::Relaxed)
|
||||
});
|
||||
|
||||
requester.cool_down().await;
|
||||
|
||||
assert!(resp.await.unwrap());
|
||||
println!("{}", start.elapsed().as_millis());
|
||||
// verify cooldown paused for wait_time (3500ms for timeout=7s)
|
||||
assert!(start.elapsed().as_millis() >= 3500);
|
||||
|
||||
// verify flag was reset to false after cooldown completes
|
||||
assert!(!requester.policy_data.cooling_down.load(Ordering::Relaxed));
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
|
||||
@@ -1019,7 +1149,7 @@ mod tests {
|
||||
};
|
||||
|
||||
requester.policy_data.set_reqs_sec(400);
|
||||
requester.policy_data.set_errors(1);
|
||||
requester.policy_data.set_errors(PolicyTrigger::Errors, 1);
|
||||
|
||||
{
|
||||
let mut guard = requester.tuning_lock.lock().unwrap();
|
||||
@@ -1033,7 +1163,7 @@ mod tests {
|
||||
|
||||
assert_eq!(*requester.tuning_lock.lock().unwrap(), 0);
|
||||
assert_eq!(requester.policy_data.get_limit(), 100);
|
||||
assert_eq!(requester.policy_data.errors.load(Ordering::Relaxed), 2);
|
||||
assert_eq!(requester.policy_data.get_errors(PolicyTrigger::Errors), 2);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
|
||||
@@ -1182,18 +1312,446 @@ mod tests {
|
||||
pb.set_position(400);
|
||||
sleep(Duration::new(1, 0)).await; // used to get req/sec up to 400
|
||||
|
||||
assert_eq!(requester.policy_data.errors.load(Ordering::Relaxed), 0);
|
||||
assert_eq!(
|
||||
requester.policy_data.get_errors(PolicyTrigger::Status429),
|
||||
0
|
||||
);
|
||||
|
||||
requester.tune(PolicyTrigger::Status429).await.unwrap();
|
||||
|
||||
assert_eq!(requester.policy_data.heap.read().unwrap().original, 400);
|
||||
assert_eq!(requester.policy_data.get_limit(), 200);
|
||||
assert_eq!(
|
||||
requester.rate_limiter.read().await.as_ref().unwrap().max(),
|
||||
200
|
||||
let original = requester.policy_data.heap.read().unwrap().original;
|
||||
// Allow for timing imprecision: 400 reqs / 1.01s elapsed = 399 req/s
|
||||
assert!(
|
||||
(399..=401).contains(&original),
|
||||
"Expected ~400 req/s original, got {}",
|
||||
original
|
||||
);
|
||||
|
||||
let limit = requester.policy_data.get_limit();
|
||||
// Limit is original/2, so with original 399-401, limit is 199-200
|
||||
assert!(
|
||||
(199..=201).contains(&limit),
|
||||
"Expected limit ~200, got {}",
|
||||
limit
|
||||
);
|
||||
|
||||
let rate_limiter_max = requester.rate_limiter.read().await.as_ref().unwrap().max();
|
||||
assert!(
|
||||
(199..=201).contains(&rate_limiter_max),
|
||||
"Expected rate limiter max ~200, got {}",
|
||||
rate_limiter_max
|
||||
);
|
||||
|
||||
scan.finish(0).unwrap();
|
||||
assert!(start.elapsed().as_millis() >= 2000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// verify build_a_bucket produces correct rate limits for low values (1-20 req/s)
|
||||
/// This test validates the fix for Bug #1 where limits < 15 collapsed to 1 req/s
|
||||
fn build_a_bucket_handles_low_rates_correctly() {
|
||||
// Test various low rate limits to ensure accurate token bucket configuration
|
||||
for limit in 1..=20 {
|
||||
let result = Requester::build_a_bucket(limit);
|
||||
assert!(result.is_ok(), "build_a_bucket failed for limit {}", limit);
|
||||
|
||||
let bucket = result.unwrap();
|
||||
|
||||
// With our fix: interval=1000ms, refill=limit
|
||||
// This ensures refill/interval == limit for accurate rate limiting
|
||||
assert_eq!(
|
||||
bucket.max(),
|
||||
limit,
|
||||
"Bucket max should equal requested limit {} but got {}",
|
||||
limit,
|
||||
bucket.max()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
|
||||
/// verify that policy_triggered flag is reset when rate limiter is removed
|
||||
/// This test validates the fix for Bug #2 where auto-tune never disengaged
|
||||
async fn policy_triggered_reset_when_limiter_removed() {
|
||||
let (handles, _) = setup_requester_test(None).await;
|
||||
let ferox_scan = Arc::new(FeroxScan::default());
|
||||
|
||||
let requester = Requester {
|
||||
handles,
|
||||
seen_links: RwLock::new(HashSet::<String>::new()),
|
||||
tuning_lock: Mutex::new(0),
|
||||
ferox_scan,
|
||||
target_url: "http://localhost".to_string(),
|
||||
rate_limiter: RwLock::new(None),
|
||||
policy_data: PolicyData::new(RequesterPolicy::AutoTune, 7),
|
||||
policy_triggered: AtomicBool::new(false),
|
||||
};
|
||||
|
||||
// Set policy_triggered to true (as if auto-tune was triggered)
|
||||
atomic_store!(requester.policy_triggered, true, Ordering::Release);
|
||||
|
||||
// Initialize heap to simulate auto-tune being active
|
||||
requester.policy_data.set_reqs_sec(100);
|
||||
assert!(requester.policy_data.heap_initialized());
|
||||
|
||||
// Simulate the condition where limiter should be removed
|
||||
atomic_store!(requester.policy_data.remove_limit, true);
|
||||
|
||||
// Call adjust_limit which should remove the limiter and reset state
|
||||
requester
|
||||
.adjust_limit(PolicyTrigger::Errors, true)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Verify policy_triggered was reset
|
||||
assert!(
|
||||
!atomic_load!(requester.policy_triggered),
|
||||
"policy_triggered should be reset to false when limiter is removed"
|
||||
);
|
||||
|
||||
// Verify heap was reset
|
||||
assert!(
|
||||
!requester.policy_data.heap_initialized(),
|
||||
"heap should be reset when limiter is removed"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
|
||||
/// verify should_enforce_policy uses per-scan request counts, not global
|
||||
/// This test validates the fix for Bug #4 where global counters caused false positives
|
||||
async fn should_enforce_policy_uses_per_scan_requests() {
|
||||
let mut config = Configuration::new().unwrap_or_default();
|
||||
config.threads = 50;
|
||||
|
||||
let (handles, _) = setup_requester_test(Some(Arc::new(config))).await;
|
||||
let ferox_scan = Arc::new(FeroxScan::default());
|
||||
|
||||
let requester = Requester {
|
||||
handles: handles.clone(),
|
||||
seen_links: RwLock::new(HashSet::<String>::new()),
|
||||
tuning_lock: Mutex::new(0),
|
||||
ferox_scan: ferox_scan.clone(),
|
||||
target_url: "http://localhost".to_string(),
|
||||
rate_limiter: RwLock::new(None),
|
||||
policy_data: PolicyData::new(RequesterPolicy::AutoTune, 7),
|
||||
policy_triggered: AtomicBool::new(false),
|
||||
};
|
||||
|
||||
// Add many errors globally (simulating previous scans)
|
||||
for _ in 0..100 {
|
||||
handles.stats.send(AddError(StatError::Other)).unwrap();
|
||||
}
|
||||
handles.stats.sync().await.unwrap();
|
||||
|
||||
// But this scan has only made a few requests
|
||||
ferox_scan.progress_bar().inc(5);
|
||||
for _ in 0..5 {
|
||||
ferox_scan.add_error();
|
||||
}
|
||||
|
||||
// should_enforce_policy should return None because THIS scan hasn't made enough requests
|
||||
// even though global request count is high
|
||||
assert_eq!(
|
||||
requester.should_enforce_policy(),
|
||||
None,
|
||||
"should_enforce_policy should use per-scan requests, not global"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
|
||||
/// verify heap values are clamped when rate_limit cap is set
|
||||
async fn heap_values_clamped_to_rate_limit_cap() {
|
||||
let policy_data = PolicyData::new(RequesterPolicy::AutoTune, 7).with_rate_limit(100);
|
||||
|
||||
// Set a high RPS that exceeds the cap
|
||||
policy_data.set_reqs_sec(500);
|
||||
|
||||
// All heap values should be clamped to 100
|
||||
let heap = policy_data.heap.read().unwrap();
|
||||
for i in 0..heap.inner.len() {
|
||||
if heap.inner[i] > 0 {
|
||||
assert!(
|
||||
heap.inner[i] <= 100,
|
||||
"Heap value at index {} is {}, expected <= 100",
|
||||
i,
|
||||
heap.inner[i]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Root should be 100 (clamped from 250)
|
||||
assert_eq!(heap.inner[0], 100, "Root should be clamped to cap");
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
|
||||
/// verify auto-tune with cap adjusts down correctly on errors
|
||||
async fn auto_tune_with_cap_adjusts_down_on_errors() {
|
||||
let policy_data = PolicyData::new(RequesterPolicy::AutoTune, 7).with_rate_limit(100);
|
||||
|
||||
// Build heap with cap of 100
|
||||
policy_data.set_reqs_sec(100);
|
||||
|
||||
// Initial limit should be 50 (half of 100)
|
||||
assert_eq!(policy_data.get_limit(), 50);
|
||||
|
||||
// Adjust down (simulating errors)
|
||||
policy_data.adjust_down();
|
||||
|
||||
// Should move to right child, which is 25
|
||||
assert_eq!(policy_data.get_limit(), 25);
|
||||
|
||||
// Adjust down again
|
||||
policy_data.adjust_down();
|
||||
|
||||
// Should continue moving down the tree
|
||||
let new_limit = policy_data.get_limit();
|
||||
assert!(new_limit < 25, "Limit should decrease further");
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
|
||||
/// verify auto-tune with cap never exceeds cap on upward adjustment
|
||||
async fn auto_tune_with_cap_never_exceeds_cap_on_upward_adjustment() {
|
||||
let policy_data = PolicyData::new(RequesterPolicy::AutoTune, 7).with_rate_limit(100);
|
||||
|
||||
// Build heap with cap of 100
|
||||
policy_data.set_reqs_sec(100);
|
||||
|
||||
// Move to a low value in the tree
|
||||
{
|
||||
let mut heap = policy_data.heap.write().unwrap();
|
||||
heap.move_to(15); // Deep in the tree
|
||||
}
|
||||
|
||||
// Continuously adjust up with streak counter to reach root
|
||||
for _ in 0..10 {
|
||||
policy_data.adjust_up(&3); // Use high streak to move up faster
|
||||
let current_limit = policy_data.get_limit();
|
||||
assert!(
|
||||
current_limit <= 100,
|
||||
"Limit {} exceeded cap of 100",
|
||||
current_limit
|
||||
);
|
||||
}
|
||||
|
||||
// Should be at or near the cap, but heap navigation may not reach exact root
|
||||
let final_limit = policy_data.get_limit();
|
||||
assert!(
|
||||
(50..=100).contains(&final_limit),
|
||||
"Final limit {} should be between 50 and 100",
|
||||
final_limit
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
|
||||
/// verify remove_limit with cap sets to cap instead of removing
|
||||
async fn remove_limit_with_cap_sets_to_cap_instead_of_removing() {
|
||||
let mut config = Configuration::new().unwrap_or_default();
|
||||
config.rate_limit = 100;
|
||||
config.auto_tune = true;
|
||||
config.requester_policy = RequesterPolicy::AutoTune;
|
||||
|
||||
let (handles, _) = setup_requester_test(Some(Arc::new(config))).await;
|
||||
let ferox_scan = Arc::new(FeroxScan::default());
|
||||
|
||||
let policy_data = PolicyData::new(RequesterPolicy::AutoTune, 7).with_rate_limit(100);
|
||||
|
||||
let requester = Requester {
|
||||
handles: handles.clone(),
|
||||
seen_links: RwLock::new(HashSet::<String>::new()),
|
||||
tuning_lock: Mutex::new(0),
|
||||
ferox_scan: ferox_scan.clone(),
|
||||
target_url: "http://localhost".to_string(),
|
||||
rate_limiter: RwLock::new(Some(Requester::build_a_bucket(50).unwrap())),
|
||||
policy_data,
|
||||
policy_triggered: AtomicBool::new(true),
|
||||
};
|
||||
|
||||
// Set remove_limit flag
|
||||
atomic_store!(requester.policy_data.remove_limit, true);
|
||||
|
||||
// Call adjust_limit
|
||||
requester
|
||||
.adjust_limit(PolicyTrigger::Errors, true)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Verify limiter was set to cap, not removed
|
||||
let limiter = requester.rate_limiter.read().await;
|
||||
assert!(
|
||||
limiter.is_some(),
|
||||
"Limiter should not be removed when cap exists"
|
||||
);
|
||||
assert_eq!(
|
||||
limiter.as_ref().unwrap().max(),
|
||||
100,
|
||||
"Limiter should be set to cap value"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
|
||||
/// verify initial limiter set to cap when both rate_limit and auto_tune are present
|
||||
async fn initial_limiter_set_to_cap_when_both_flags_present() {
|
||||
let mut config = Configuration::new().unwrap_or_default();
|
||||
config.rate_limit = 100;
|
||||
config.auto_tune = true;
|
||||
|
||||
let (handles, _) = setup_requester_test(Some(Arc::new(config))).await;
|
||||
let ferox_scan = Arc::new(FeroxScan::default());
|
||||
|
||||
let policy_data = PolicyData::new(RequesterPolicy::AutoTune, 7).with_rate_limit(100);
|
||||
|
||||
// Manually construct requester to verify initialization
|
||||
let requester = Requester {
|
||||
handles: handles.clone(),
|
||||
seen_links: RwLock::new(HashSet::<String>::new()),
|
||||
tuning_lock: Mutex::new(0),
|
||||
ferox_scan: ferox_scan.clone(),
|
||||
target_url: "http://localhost".to_string(),
|
||||
rate_limiter: RwLock::new(Some(Requester::build_a_bucket(100).unwrap())),
|
||||
policy_data,
|
||||
policy_triggered: AtomicBool::new(false),
|
||||
};
|
||||
|
||||
// Verify initial limiter is set
|
||||
let limiter = requester.rate_limiter.read().await;
|
||||
assert!(limiter.is_some(), "Limiter should be initialized");
|
||||
assert_eq!(
|
||||
limiter.as_ref().unwrap().max(),
|
||||
100,
|
||||
"Initial limiter should be set to rate_limit value"
|
||||
);
|
||||
|
||||
// Verify policy_data has the cap
|
||||
assert_eq!(
|
||||
requester.policy_data.rate_limit,
|
||||
Some(100),
|
||||
"PolicyData should have rate_limit set"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
|
||||
/// Full lifecycle test: --rate-limit 100 --auto-tune
|
||||
/// Simulates errors triggering reduction, then success allowing increase, never exceeding cap
|
||||
async fn capped_auto_tune_full_lifecycle() {
|
||||
let mut config = Configuration::new().unwrap_or_default();
|
||||
config.rate_limit = 100;
|
||||
config.auto_tune = true;
|
||||
config.requester_policy = RequesterPolicy::AutoTune;
|
||||
config.threads = 50;
|
||||
|
||||
let (handles, _) = setup_requester_test(Some(Arc::new(config))).await;
|
||||
|
||||
// Create a proper Directory scan that will report as active
|
||||
let ferox_scan = FeroxScan::new(
|
||||
"http://localhost",
|
||||
ScanType::Directory,
|
||||
ScanOrder::Latest,
|
||||
0,
|
||||
OutputLevel::Default,
|
||||
None,
|
||||
true,
|
||||
handles.clone(),
|
||||
);
|
||||
|
||||
// Simulate scan running - need at least 2 req/s for tune() to initialize
|
||||
ferox_scan.set_status(ScanStatus::Running).unwrap();
|
||||
ferox_scan.set_start_time(Instant::now()).unwrap();
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
|
||||
|
||||
// Add enough requests to get RPS >= 2 (100 requests in 0.1s = 1000 req/s)
|
||||
ferox_scan.progress_bar().inc(100);
|
||||
|
||||
let policy_data = PolicyData::new(RequesterPolicy::AutoTune, 7).with_rate_limit(100);
|
||||
|
||||
let requester = Requester {
|
||||
handles: handles.clone(),
|
||||
seen_links: RwLock::new(HashSet::<String>::new()),
|
||||
tuning_lock: Mutex::new(0),
|
||||
ferox_scan: ferox_scan.clone(),
|
||||
target_url: "http://localhost".to_string(),
|
||||
rate_limiter: RwLock::new(Some(Requester::build_a_bucket(100).unwrap())),
|
||||
policy_data,
|
||||
policy_triggered: AtomicBool::new(false),
|
||||
};
|
||||
|
||||
// Step 1: Trigger auto-tune due to errors
|
||||
for _ in 0..50 {
|
||||
ferox_scan.add_error();
|
||||
}
|
||||
|
||||
requester.tune(PolicyTrigger::Errors).await.unwrap();
|
||||
|
||||
// Heap should be initialized now (RPS is high, capped to 100)
|
||||
assert!(
|
||||
requester.policy_data.heap_initialized(),
|
||||
"Heap should be initialized after tune()"
|
||||
);
|
||||
|
||||
let initial_limit = requester.policy_data.get_limit();
|
||||
assert!(
|
||||
initial_limit <= 100,
|
||||
"Initial limit {} should not exceed cap",
|
||||
initial_limit
|
||||
);
|
||||
assert_eq!(
|
||||
initial_limit, 50,
|
||||
"Initial limit should be 50 (half of capped seed 100)"
|
||||
);
|
||||
|
||||
// Step 2: More errors - adjust down
|
||||
// Don't reset policy errors - they're already set to 50 from tune()
|
||||
// Add more scan errors so scan_errors (75) > policy_errors (50)
|
||||
for _ in 0..25 {
|
||||
ferox_scan.add_error();
|
||||
}
|
||||
|
||||
requester
|
||||
.adjust_limit(PolicyTrigger::Errors, true)
|
||||
.await
|
||||
.unwrap();
|
||||
let reduced_limit = requester.policy_data.get_limit();
|
||||
assert!(
|
||||
reduced_limit < initial_limit,
|
||||
"Limit should decrease on errors: {} < {}",
|
||||
reduced_limit,
|
||||
initial_limit
|
||||
);
|
||||
|
||||
// Step 3: Success - adjust up multiple times
|
||||
// Set policy errors higher than scan errors to trigger upward adjustment
|
||||
requester.policy_data.set_errors(PolicyTrigger::Errors, 200);
|
||||
for i in 0..5 {
|
||||
requester
|
||||
.adjust_limit(PolicyTrigger::Errors, true)
|
||||
.await
|
||||
.unwrap();
|
||||
let current_limit = requester.policy_data.get_limit();
|
||||
|
||||
// Should never exceed cap
|
||||
assert!(
|
||||
current_limit <= 100,
|
||||
"Iteration {}: Limit {} exceeded cap of 100",
|
||||
i,
|
||||
current_limit
|
||||
);
|
||||
}
|
||||
|
||||
// Step 4: Verify limiter stays at cap (not removed)
|
||||
atomic_store!(requester.policy_data.remove_limit, true);
|
||||
requester
|
||||
.adjust_limit(PolicyTrigger::Errors, true)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let final_limiter = requester.rate_limiter.read().await;
|
||||
assert!(
|
||||
final_limiter.is_some(),
|
||||
"Limiter should not be removed when cap exists"
|
||||
);
|
||||
assert_eq!(
|
||||
final_limiter.as_ref().unwrap().max(),
|
||||
100,
|
||||
"Limiter should be at cap value"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,17 @@ pub enum PolicyTrigger {
|
||||
/// dummy error for upward rate adjustment
|
||||
TryAdjustUp,
|
||||
}
|
||||
|
||||
impl PolicyTrigger {
|
||||
/// get the index into the `PolicyData.errors` array for this trigger
|
||||
pub fn as_index(&self) -> usize {
|
||||
match self {
|
||||
PolicyTrigger::Status403 => 0,
|
||||
PolicyTrigger::Status429 => 1,
|
||||
PolicyTrigger::Errors => 2,
|
||||
PolicyTrigger::TryAdjustUp => {
|
||||
unreachable!("TryAdjustUp should never be used to access the errors array");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -230,11 +230,22 @@ fn auto_tune_slows_scan_with_429s() {
|
||||
|
||||
teardown_tmp_directory(tmp_dir);
|
||||
|
||||
assert!(normal_reqs_mock.hits() + error_mock.hits() > 25); // must have at least 50 reqs fly
|
||||
let normal_hits = normal_reqs_mock.hits();
|
||||
let error_hits = error_mock.hits();
|
||||
|
||||
println!("elapsed: {}", start.elapsed().as_millis()); // 3523ms without tuning
|
||||
assert!(normal_reqs_mock.hits() < 500);
|
||||
assert!(error_mock.hits() <= 180); // may or may not see all other error requests
|
||||
println!("normal_reqs_mock.hits(): {}", normal_hits);
|
||||
println!("error_mock.hits(): {}", error_hits);
|
||||
|
||||
assert!(normal_hits + error_hits > 25); // must have at least 50 reqs fly
|
||||
|
||||
println!("elapsed: {}", start.elapsed().as_millis());
|
||||
// With auto-tune and 429s, the scan should be slowed down but may still process
|
||||
// ~1800-2000 requests in 7 seconds. The key is that it hits the time limit.
|
||||
assert!(
|
||||
normal_hits < 3000,
|
||||
"Should process fewer than 3000 requests due to rate limiting"
|
||||
);
|
||||
assert!(error_hits <= 180); // may or may not see all other error requests
|
||||
assert!(start.elapsed().as_millis() >= 7000); // scan should hit time limit due to limiting
|
||||
}
|
||||
|
||||
@@ -283,11 +294,22 @@ fn auto_tune_slows_scan_with_403s() {
|
||||
|
||||
teardown_tmp_directory(tmp_dir);
|
||||
|
||||
assert!(normal_reqs_mock.hits() + error_mock.hits() > 25); // must have at least 50 reqs fly
|
||||
let normal_hits = normal_reqs_mock.hits();
|
||||
let error_hits = error_mock.hits();
|
||||
|
||||
println!("elapsed: {}", start.elapsed().as_millis()); // 3523ms without tuning
|
||||
assert!(normal_reqs_mock.hits() < 500);
|
||||
assert!(error_mock.hits() <= 180); // may or may not see all other error requests
|
||||
println!("normal_reqs_mock.hits(): {}", normal_hits);
|
||||
println!("error_mock.hits(): {}", error_hits);
|
||||
|
||||
assert!(normal_hits + error_hits > 25); // must have at least 50 reqs fly
|
||||
|
||||
println!("elapsed: {}", start.elapsed().as_millis());
|
||||
// With auto-tune and 403s, the scan should be slowed down but may still process
|
||||
// ~1800-2000 requests in 7 seconds. The key is that it hits the time limit.
|
||||
assert!(
|
||||
normal_hits < 3000,
|
||||
"Should process fewer than 3000 requests due to rate limiting"
|
||||
);
|
||||
assert!(error_hits <= 180); // may or may not see all other error requests
|
||||
assert!(start.elapsed().as_millis() >= 7000); // scan should hit time limit due to limiting
|
||||
}
|
||||
|
||||
@@ -339,8 +361,19 @@ fn auto_tune_slows_scan_with_general_errors() {
|
||||
|
||||
teardown_tmp_directory(tmp_dir);
|
||||
|
||||
println!("elapsed: {}", start.elapsed().as_millis()); // 3523ms without tuning
|
||||
assert!(normal_reqs_mock.hits() < 500);
|
||||
assert!(error_mock.hits() <= 180); // may or may not see all other error requests
|
||||
let normal_hits = normal_reqs_mock.hits();
|
||||
let error_hits = error_mock.hits();
|
||||
|
||||
println!("normal_reqs_mock.hits(): {}", normal_hits);
|
||||
println!("error_mock.hits(): {}", error_hits);
|
||||
println!("elapsed: {}", start.elapsed().as_millis());
|
||||
|
||||
// Normal requests timeout (3s delay with 2s timeout), triggering error policy
|
||||
// The scan should be rate-limited and hit the time limit
|
||||
assert!(
|
||||
normal_hits < 3000,
|
||||
"Should process fewer requests due to rate limiting and timeouts"
|
||||
);
|
||||
assert!(error_hits <= 180); // may or may not see all other error requests
|
||||
assert!(start.elapsed().as_millis() >= 7000); // scan should hit time limit due to limiting
|
||||
}
|
||||
|
||||
402
tests/test_rate_limiting.rs
Normal file
402
tests/test_rate_limiting.rs
Normal file
@@ -0,0 +1,402 @@
|
||||
mod utils;
|
||||
|
||||
use assert_cmd::Command;
|
||||
use httpmock::prelude::*;
|
||||
use httpmock::MockServer;
|
||||
use regex::Regex;
|
||||
use std::fs::{read_to_string, write};
|
||||
use utils::{setup_tmp_directory, teardown_tmp_directory};
|
||||
|
||||
/// Helper to create a test wordlist with controllable patterns
|
||||
fn create_test_wordlist(
|
||||
normal: usize,
|
||||
errors: usize,
|
||||
status403: usize,
|
||||
status429: usize,
|
||||
) -> String {
|
||||
let mut words = Vec::new();
|
||||
|
||||
// Normal responses
|
||||
for i in 0..normal {
|
||||
words.push(format!("normal_{:06}", i));
|
||||
}
|
||||
|
||||
// Timeout errors
|
||||
for i in 0..errors {
|
||||
words.push(format!("error_{:06}", i));
|
||||
}
|
||||
|
||||
// 403 responses
|
||||
for i in 0..status403 {
|
||||
words.push(format!("s403_{:06}", i));
|
||||
}
|
||||
|
||||
// 429 responses
|
||||
for i in 0..status429 {
|
||||
words.push(format!("s429_{:06}", i));
|
||||
}
|
||||
|
||||
words.join("\n")
|
||||
}
|
||||
|
||||
/// Scenario 1: High 403 rate - tests policy enforcement
|
||||
#[test]
|
||||
fn scenario_high_403_rate() {
|
||||
let srv = MockServer::start();
|
||||
let (tmp_dir, file) = setup_tmp_directory(&[], "wordlist").unwrap();
|
||||
let (log_dir, logfile) = setup_tmp_directory(&[], "debug-log").unwrap();
|
||||
|
||||
// Create wordlist with high 403 rate
|
||||
// Need 90%+ ratio and enough requests to trigger policy: 900/(900+100) = 90%
|
||||
let wordlist = create_test_wordlist(100, 0, 900, 0);
|
||||
write(&file, wordlist).unwrap();
|
||||
|
||||
let _normal_mock = srv.mock(|when, then| {
|
||||
when.method(GET)
|
||||
.path_matches(Regex::new("/normal_.*").unwrap());
|
||||
then.status(200).body("OK");
|
||||
});
|
||||
|
||||
let _forbidden_mock = srv.mock(|when, then| {
|
||||
when.method(GET)
|
||||
.path_matches(Regex::new("/s403_.*").unwrap());
|
||||
then.status(403).body("Forbidden");
|
||||
});
|
||||
|
||||
Command::cargo_bin("feroxbuster")
|
||||
.unwrap()
|
||||
.arg("--url")
|
||||
.arg(srv.url("/"))
|
||||
.arg("--wordlist")
|
||||
.arg(file.as_os_str())
|
||||
.arg("--auto-tune")
|
||||
.arg("--dont-filter")
|
||||
.arg("--threads")
|
||||
.arg("10")
|
||||
.arg("--debug-log")
|
||||
.arg(logfile.as_os_str())
|
||||
.arg("--json")
|
||||
.arg("-vv")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
let debug_log = read_to_string(&logfile).unwrap();
|
||||
|
||||
let mut found_403_policy = false;
|
||||
|
||||
for line in debug_log.lines() {
|
||||
if let Ok(log) = serde_json::from_str::<serde_json::Value>(line) {
|
||||
if let Some(msg) = log.get("message").and_then(|m| m.as_str()) {
|
||||
if msg.contains("auto-tune:") && msg.contains("enforcing limit") {
|
||||
found_403_policy = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
teardown_tmp_directory(tmp_dir);
|
||||
teardown_tmp_directory(log_dir);
|
||||
|
||||
assert!(found_403_policy, "High 403 rate should trigger policy");
|
||||
}
|
||||
|
||||
/// Scenario 2: High 429 rate - tests aggressive rate limiting
|
||||
#[test]
|
||||
fn scenario_high_429_rate() {
|
||||
let srv = MockServer::start();
|
||||
let (tmp_dir, file) = setup_tmp_directory(&[], "wordlist").unwrap();
|
||||
let (log_dir, logfile) = setup_tmp_directory(&[], "debug-log").unwrap();
|
||||
|
||||
// High 429 rate should trigger more aggressive limiting
|
||||
// Need 30%+ ratio and enough requests: 450/(450+150) = 75%
|
||||
let wordlist = create_test_wordlist(150, 0, 0, 450);
|
||||
write(&file, wordlist).unwrap();
|
||||
|
||||
let _normal_mock = srv.mock(|when, then| {
|
||||
when.method(GET)
|
||||
.path_matches(Regex::new("/normal_.*").unwrap());
|
||||
then.status(200).body("OK");
|
||||
});
|
||||
|
||||
let _rate_limit_mock = srv.mock(|when, then| {
|
||||
when.method(GET)
|
||||
.path_matches(Regex::new("/s429_.*").unwrap());
|
||||
then.status(429).body("Too Many Requests");
|
||||
});
|
||||
|
||||
Command::cargo_bin("feroxbuster")
|
||||
.unwrap()
|
||||
.arg("--url")
|
||||
.arg(srv.url("/"))
|
||||
.arg("--wordlist")
|
||||
.arg(file.as_os_str())
|
||||
.arg("--auto-tune")
|
||||
.arg("--dont-filter")
|
||||
.arg("--threads")
|
||||
.arg("10")
|
||||
.arg("--debug-log")
|
||||
.arg(logfile.as_os_str())
|
||||
.arg("--json")
|
||||
.arg("-vv")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
let debug_log = read_to_string(&logfile).unwrap();
|
||||
|
||||
let mut found_429_policy = false;
|
||||
|
||||
for line in debug_log.lines() {
|
||||
if let Ok(log) = serde_json::from_str::<serde_json::Value>(line) {
|
||||
if let Some(msg) = log.get("message").and_then(|m| m.as_str()) {
|
||||
if msg.contains("auto-tune:") && msg.contains("enforcing limit") {
|
||||
found_429_policy = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
teardown_tmp_directory(tmp_dir);
|
||||
teardown_tmp_directory(log_dir);
|
||||
|
||||
assert!(found_429_policy, "High 429 rate should trigger policy");
|
||||
}
|
||||
|
||||
/// Scenario 3: Recovery pattern - errors then normal
|
||||
#[test]
|
||||
fn scenario_recovery_pattern() {
|
||||
let srv = MockServer::start();
|
||||
let (tmp_dir, file) = setup_tmp_directory(&[], "wordlist").unwrap();
|
||||
let (log_dir, logfile) = setup_tmp_directory(&[], "debug-log").unwrap();
|
||||
|
||||
// Pattern: errors first, then normal - should slow down then speed up
|
||||
let mut wordlist = Vec::new();
|
||||
for i in 0..100 {
|
||||
wordlist.push(format!("s403_{:04}", i));
|
||||
}
|
||||
for i in 0..300 {
|
||||
wordlist.push(format!("normal_{:04}", i));
|
||||
}
|
||||
|
||||
write(&file, wordlist.join("\n")).unwrap();
|
||||
|
||||
let _normal_mock = srv.mock(|when, then| {
|
||||
when.method(GET)
|
||||
.path_matches(Regex::new("/normal_.*").unwrap());
|
||||
then.status(200).body("OK");
|
||||
});
|
||||
|
||||
let _error_mock = srv.mock(|when, then| {
|
||||
when.method(GET)
|
||||
.path_matches(Regex::new("/s403_.*").unwrap());
|
||||
then.status(403).body("Forbidden");
|
||||
});
|
||||
|
||||
Command::cargo_bin("feroxbuster")
|
||||
.unwrap()
|
||||
.arg("--url")
|
||||
.arg(srv.url("/"))
|
||||
.arg("--wordlist")
|
||||
.arg(file.as_os_str())
|
||||
.arg("--auto-tune")
|
||||
.arg("--dont-filter")
|
||||
.arg("--threads")
|
||||
.arg("10")
|
||||
.arg("--debug-log")
|
||||
.arg(logfile.as_os_str())
|
||||
.arg("--json")
|
||||
.arg("-vv")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
let debug_log = read_to_string(&logfile).unwrap();
|
||||
|
||||
let mut auto_tune_triggered = false;
|
||||
|
||||
for line in debug_log.lines() {
|
||||
if let Ok(log) = serde_json::from_str::<serde_json::Value>(line) {
|
||||
if let Some(msg) = log.get("message").and_then(|m| m.as_str()) {
|
||||
if msg.contains("auto-tune:") && msg.contains("enforcing limit") {
|
||||
auto_tune_triggered = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
teardown_tmp_directory(tmp_dir);
|
||||
teardown_tmp_directory(log_dir);
|
||||
|
||||
assert!(
|
||||
auto_tune_triggered,
|
||||
"Should trigger auto-tune due to errors"
|
||||
);
|
||||
}
|
||||
|
||||
/// Scenario 4: Mixed steady state - balanced errors and normal
|
||||
#[test]
|
||||
fn scenario_mixed_steady_state() {
|
||||
let srv = MockServer::start();
|
||||
let (tmp_dir, file) = setup_tmp_directory(&[], "wordlist").unwrap();
|
||||
let (log_dir, logfile) = setup_tmp_directory(&[], "debug-log").unwrap();
|
||||
|
||||
// Evenly mixed - not enough to trigger bail, but enough for tuning
|
||||
// Need 25+ general errors to trigger: 30 >= 25
|
||||
let wordlist = create_test_wordlist(150, 30, 10, 10);
|
||||
write(&file, wordlist).unwrap();
|
||||
|
||||
let normal_mock = srv.mock(|when, then| {
|
||||
when.method(GET)
|
||||
.path_matches(Regex::new("/normal_.*").unwrap());
|
||||
then.status(200).body("OK");
|
||||
});
|
||||
|
||||
let error_mock = srv.mock(|when, then| {
|
||||
when.method(GET)
|
||||
.path_matches(Regex::new("/error_.*").unwrap());
|
||||
then.status(504).body("Gateway Timeout");
|
||||
});
|
||||
|
||||
let forbidden_mock = srv.mock(|when, then| {
|
||||
when.method(GET)
|
||||
.path_matches(Regex::new("/s403_.*").unwrap());
|
||||
then.status(403).body("Forbidden");
|
||||
});
|
||||
|
||||
let rate_limit_mock = srv.mock(|when, then| {
|
||||
when.method(GET)
|
||||
.path_matches(Regex::new("/s429_.*").unwrap());
|
||||
then.status(429).body("Too Many Requests");
|
||||
});
|
||||
|
||||
Command::cargo_bin("feroxbuster")
|
||||
.unwrap()
|
||||
.arg("--url")
|
||||
.arg(srv.url("/"))
|
||||
.arg("--wordlist")
|
||||
.arg(file.as_os_str())
|
||||
.arg("--auto-tune")
|
||||
.arg("--threads")
|
||||
.arg("10")
|
||||
.arg("--debug-log")
|
||||
.arg(logfile.as_os_str())
|
||||
.arg("-vv")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
let debug_log = read_to_string(&logfile).unwrap();
|
||||
let mut _policy_adjustments = 0;
|
||||
|
||||
for line in debug_log.lines() {
|
||||
if let Ok(log) = serde_json::from_str::<serde_json::Value>(line) {
|
||||
if let Some(msg) = log.get("message").and_then(|m| m.as_str()) {
|
||||
if msg.contains("scan speed") || msg.contains("set rate limit") {
|
||||
_policy_adjustments += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let total =
|
||||
normal_mock.hits() + error_mock.hits() + forbidden_mock.hits() + rate_limit_mock.hits();
|
||||
|
||||
teardown_tmp_directory(tmp_dir);
|
||||
teardown_tmp_directory(log_dir);
|
||||
|
||||
// With mixed but not extreme errors, should see some adjustments
|
||||
assert!(total > 100, "Should complete significant portion of scan");
|
||||
}
|
||||
|
||||
/// Scenario 5: Capped auto-tune - --rate-limit caps --auto-tune adjustments
|
||||
#[test]
|
||||
fn scenario_capped_auto_tune() {
|
||||
let srv = MockServer::start();
|
||||
let (tmp_dir, file) = setup_tmp_directory(&[], "wordlist").unwrap();
|
||||
let (log_dir, logfile) = setup_tmp_directory(&[], "debug-log").unwrap();
|
||||
|
||||
// Pattern: errors first to trigger rate limiting, then normal responses to allow upward adjustment
|
||||
// The rate limit cap should prevent exceeding the specified limit
|
||||
let mut wordlist = Vec::new();
|
||||
|
||||
// Start with many errors to trigger auto-tune
|
||||
for i in 0..200 {
|
||||
wordlist.push(format!("s403_{:04}", i));
|
||||
}
|
||||
|
||||
// Then many normal responses to allow upward adjustment
|
||||
for i in 0..400 {
|
||||
wordlist.push(format!("normal_{:04}", i));
|
||||
}
|
||||
|
||||
write(&file, wordlist.join("\n")).unwrap();
|
||||
|
||||
let _normal_mock = srv.mock(|when, then| {
|
||||
when.method(GET)
|
||||
.path_matches(Regex::new("/normal_.*").unwrap());
|
||||
then.status(200).body("OK");
|
||||
});
|
||||
|
||||
let _error_mock = srv.mock(|when, then| {
|
||||
when.method(GET)
|
||||
.path_matches(Regex::new("/s403_.*").unwrap());
|
||||
then.status(403).body("Forbidden");
|
||||
});
|
||||
|
||||
Command::cargo_bin("feroxbuster")
|
||||
.unwrap()
|
||||
.arg("--url")
|
||||
.arg(srv.url("/"))
|
||||
.arg("--wordlist")
|
||||
.arg(file.as_os_str())
|
||||
.arg("--auto-tune")
|
||||
.arg("--rate-limit")
|
||||
.arg("50") // Cap at 50 req/s
|
||||
.arg("--dont-filter")
|
||||
.arg("--threads")
|
||||
.arg("10")
|
||||
.arg("--debug-log")
|
||||
.arg(logfile.as_os_str())
|
||||
.arg("--json")
|
||||
.arg("-vv")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
let debug_log = read_to_string(&logfile).unwrap();
|
||||
|
||||
let mut auto_tune_triggered = false;
|
||||
let mut max_rate_seen = 0;
|
||||
|
||||
for line in debug_log.lines() {
|
||||
if let Ok(log) = serde_json::from_str::<serde_json::Value>(line) {
|
||||
if let Some(msg) = log.get("message").and_then(|m| m.as_str()) {
|
||||
// Check for auto-tune activation
|
||||
if msg.contains("auto-tune:") && msg.contains("enforcing limit") {
|
||||
auto_tune_triggered = true;
|
||||
}
|
||||
|
||||
// Extract rate values from messages like "set rate limit (25/s)" or "scan speed (30/s)"
|
||||
if msg.contains("/s)") {
|
||||
if let Some(start) = msg.rfind('(') {
|
||||
if let Some(end) = msg.rfind("/s)") {
|
||||
if let Ok(rate) = msg[start + 1..end].parse::<usize>() {
|
||||
max_rate_seen = max_rate_seen.max(rate);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
teardown_tmp_directory(tmp_dir);
|
||||
teardown_tmp_directory(log_dir);
|
||||
|
||||
assert!(
|
||||
auto_tune_triggered,
|
||||
"Auto-tune should be triggered by errors"
|
||||
);
|
||||
|
||||
assert!(
|
||||
max_rate_seen <= 50,
|
||||
"Auto-tune should never exceed rate-limit cap of 50, but saw {}",
|
||||
max_rate_seen
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user