mirror of
https://github.com/epi052/feroxbuster.git
synced 2026-04-19 06:31:13 -03:00
added minhash algo when resp too short for ssdeep
This commit is contained in:
@@ -11,7 +11,7 @@ pub(crate) use self::empty::EmptyFilter;
|
||||
pub use self::init::initialize;
|
||||
pub use self::lines::LinesFilter;
|
||||
pub use self::regex::RegexFilter;
|
||||
pub use self::similarity::SimilarityFilter;
|
||||
pub use self::similarity::{SimilarityFilter, HashValueType};
|
||||
pub use self::size::SizeFilter;
|
||||
pub use self::status_code::StatusCodeFilter;
|
||||
pub(crate) use self::utils::{create_similarity_filter, filter_lookup};
|
||||
|
||||
@@ -1,12 +1,33 @@
|
||||
use std::hash::BuildHasherDefault;
|
||||
|
||||
use super::*;
|
||||
use crate::MIN_SSDEEP_SIZE;
|
||||
use fuzzyhash::FuzzyHash;
|
||||
use gaoya::minhash::{MinHash, MinHasher, MinHasher16};
|
||||
use gaoya::text::whitespace_split;
|
||||
|
||||
/// enum wrapper for two distinct hashing signature types
|
||||
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub(crate) enum HashValueType {
|
||||
/// String value for FuzzyHash
|
||||
String(String),
|
||||
|
||||
/// Vec<u16> value for minhash
|
||||
Vec(Vec<u16>),
|
||||
}
|
||||
|
||||
impl Default for HashValueType {
|
||||
fn default() -> Self {
|
||||
Self::String(String::new())
|
||||
}
|
||||
}
|
||||
|
||||
/// Simple implementor of FeroxFilter; used to filter out responses based on the similarity of a
|
||||
/// Response body with a known response; specified using --filter-similar-to
|
||||
#[derive(Default, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct SimilarityFilter {
|
||||
/// Hash of Response's body to be used during similarity comparison
|
||||
pub hash: String,
|
||||
pub hash: HashValueType,
|
||||
|
||||
/// Percentage of similarity at which a page is determined to be a near-duplicate of another
|
||||
pub threshold: u32,
|
||||
@@ -20,11 +41,23 @@ impl FeroxFilter for SimilarityFilter {
|
||||
/// Check `FeroxResponse::text` against what was requested from the site passed in via
|
||||
/// --filter-similar-to
|
||||
fn should_filter_response(&self, response: &FeroxResponse) -> bool {
|
||||
match self.hash {
|
||||
HashValueType::String(ref hash) => {
|
||||
// original response size was over the minimum required to effectively use ssdeep
|
||||
let other = FuzzyHash::new(response.text());
|
||||
|
||||
if let Ok(result) = FuzzyHash::compare(&self.hash, other.to_string()) {
|
||||
if let Ok(result) = FuzzyHash::compare(hash, other.to_string()) {
|
||||
return result >= self.threshold;
|
||||
}
|
||||
}
|
||||
HashValueType::Vec(ref hash) => {
|
||||
// original response was too small for ssdeep, so minhash was used as an alternative
|
||||
let hasher = MinHasher16::new(256);
|
||||
let other = hasher.create_signature(whitespace_split(response.text()));
|
||||
let result = hasher.compute_similarity(hash.iter(), other.iter());
|
||||
return (result * 100.0) as u32 >= self.threshold;
|
||||
}
|
||||
}
|
||||
|
||||
// couldn't hash the response, don't filter
|
||||
log::warn!(
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use super::*;
|
||||
use ::fuzzyhash::FuzzyHash;
|
||||
use ::regex::Regex;
|
||||
use super::similarity::HashValueType;
|
||||
|
||||
#[test]
|
||||
/// simply test the default values for wildcardfilter, expect 0, 0
|
||||
@@ -186,7 +187,7 @@ fn similarity_filter_is_accurate() {
|
||||
resp.set_text("sitting");
|
||||
|
||||
let mut filter = SimilarityFilter {
|
||||
hash: FuzzyHash::new("kitten").to_string(),
|
||||
hash: HashValueType::String(FuzzyHash::new("kitten").to_string()),
|
||||
threshold: 95,
|
||||
original_url: "".to_string(),
|
||||
};
|
||||
@@ -195,14 +196,14 @@ fn similarity_filter_is_accurate() {
|
||||
assert!(!filter.should_filter_response(&resp));
|
||||
|
||||
resp.set_text("");
|
||||
filter.hash = String::new();
|
||||
filter.hash = HashValueType::String(String::new());
|
||||
filter.threshold = 100;
|
||||
|
||||
// two empty strings are the same, however ssdeep doesn't accept empty strings, expect false
|
||||
assert!(!filter.should_filter_response(&resp));
|
||||
|
||||
resp.set_text("some data to hash for the purposes of running a test");
|
||||
filter.hash = FuzzyHash::new("some data to hash for the purposes of running a te").to_string();
|
||||
filter.hash = HashValueType::String(FuzzyHash::new("some data to hash for the purposes of running a te").to_string());
|
||||
filter.threshold = 17;
|
||||
|
||||
assert!(filter.should_filter_response(&resp));
|
||||
@@ -212,20 +213,19 @@ fn similarity_filter_is_accurate() {
|
||||
/// just a simple test to increase code coverage by hitting as_any and the inner value
|
||||
fn similarity_filter_as_any() {
|
||||
let filter = SimilarityFilter {
|
||||
hash: String::from("stuff"),
|
||||
hash: HashValueType::String(String::from("stuff")),
|
||||
threshold: 95,
|
||||
original_url: "".to_string(),
|
||||
};
|
||||
|
||||
let filter2 = SimilarityFilter {
|
||||
hash: String::from("stuff"),
|
||||
hash: HashValueType::String(String::from("stuff")),
|
||||
threshold: 95,
|
||||
original_url: "".to_string(),
|
||||
};
|
||||
|
||||
assert!(filter.box_eq(filter2.as_any()));
|
||||
|
||||
assert_eq!(filter.hash, "stuff");
|
||||
assert_eq!(
|
||||
*filter.as_any().downcast_ref::<SimilarityFilter>().unwrap(),
|
||||
filter
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
use super::similarity::HashValueType;
|
||||
use super::FeroxFilter;
|
||||
use super::SimilarityFilter;
|
||||
use crate::event_handlers::Handles;
|
||||
use crate::response::FeroxResponse;
|
||||
use crate::utils::logged_request;
|
||||
use crate::{DEFAULT_METHOD, SIMILARITY_THRESHOLD};
|
||||
use crate::{DEFAULT_METHOD, MIN_SSDEEP_SIZE, SIMILARITY_THRESHOLD};
|
||||
use anyhow::Result;
|
||||
use fuzzyhash::FuzzyHash;
|
||||
use gaoya::minhash::{MinHasher, MinHasher16};
|
||||
use gaoya::text::whitespace_split;
|
||||
use regex::Regex;
|
||||
use reqwest::Url;
|
||||
use std::sync::Arc;
|
||||
@@ -41,7 +44,14 @@ pub(crate) async fn create_similarity_filter(
|
||||
}
|
||||
|
||||
// hash the response body and store the resulting hash in the filter object
|
||||
let hash = FuzzyHash::new(fr.text()).to_string();
|
||||
let hash = if fr.content_length() <= MIN_SSDEEP_SIZE {
|
||||
// response too small for ssdeep
|
||||
let hasher = MinHasher16::new(256);
|
||||
HashValueType::Vec(hasher.create_signature(whitespace_split(fr.text())))
|
||||
} else {
|
||||
// size over ssdeep's minimum value
|
||||
HashValueType::String(FuzzyHash::new(fr.text()).to_string())
|
||||
};
|
||||
|
||||
Ok(SimilarityFilter {
|
||||
hash,
|
||||
@@ -95,7 +105,7 @@ pub(crate) fn filter_lookup(filter_type: &str, filter_value: &str) -> Option<Box
|
||||
}
|
||||
"similarity" => {
|
||||
return Some(Box::new(SimilarityFilter {
|
||||
hash: String::new(),
|
||||
hash: HashValueType::String(String::new()),
|
||||
threshold: SIMILARITY_THRESHOLD,
|
||||
original_url: filter_value.to_string(),
|
||||
}));
|
||||
@@ -157,7 +167,7 @@ mod tests {
|
||||
assert_eq!(
|
||||
filter.as_any().downcast_ref::<SimilarityFilter>().unwrap(),
|
||||
&SimilarityFilter {
|
||||
hash: String::new(),
|
||||
hash: HashValueType::String(String::new()),
|
||||
threshold: SIMILARITY_THRESHOLD,
|
||||
original_url: "http://localhost".to_string()
|
||||
}
|
||||
@@ -195,7 +205,7 @@ mod tests {
|
||||
assert_eq!(
|
||||
filter,
|
||||
SimilarityFilter {
|
||||
hash: "3:YKEpn:Yfp".to_string(),
|
||||
hash: HashValueType::String("3:YKEpn:Yfp".to_string()),
|
||||
threshold: SIMILARITY_THRESHOLD,
|
||||
original_url: srv.url("/")
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@ use console::style;
|
||||
use scraper::{Html, Selector};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::filters::SimilarityFilter;
|
||||
use crate::message::FeroxMessage;
|
||||
use crate::{
|
||||
config::OutputLevel,
|
||||
|
||||
@@ -55,6 +55,11 @@ pub const DEFAULT_OPEN_FILE_LIMIT: u64 = 8192;
|
||||
/// Default value used to determine near-duplicate web pages (equivalent to 95%)
|
||||
pub const SIMILARITY_THRESHOLD: u32 = 95;
|
||||
|
||||
/// Minimum size of response body for ssdeep to produce meaningful results
|
||||
///
|
||||
/// ref: https://github.com/glaslos/ssdeep/issues/17
|
||||
pub(crate) const MIN_SSDEEP_SIZE: u64 = 4096;
|
||||
|
||||
/// Default set of extensions to Ignore when auto-collecting extensions during scans
|
||||
pub(crate) const DEFAULT_IGNORED_EXTENSIONS: [&str; 38] = [
|
||||
"tif", "tiff", "ico", "cur", "bmp", "webp", "svg", "png", "jpg", "jpeg", "jfif", "gif", "avif",
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use super::*;
|
||||
use crate::filters::{
|
||||
FeroxFilters, LinesFilter, RegexFilter, SimilarityFilter, SizeFilter, StatusCodeFilter,
|
||||
WordsFilter,
|
||||
WordsFilter,HashValueType
|
||||
};
|
||||
use crate::{
|
||||
config::{Configuration, OutputLevel},
|
||||
@@ -399,7 +399,7 @@ fn feroxstates_feroxserialize_implementation() {
|
||||
.unwrap();
|
||||
filters
|
||||
.push(Box::new(SimilarityFilter {
|
||||
hash: "3:YKEpn:Yfp".to_string(),
|
||||
hash: HashValueType::String("3:YKEpn:Yfp".to_string()),
|
||||
threshold: SIMILARITY_THRESHOLD,
|
||||
original_url: "http://localhost:12345/".to_string(),
|
||||
}))
|
||||
|
||||
Reference in New Issue
Block a user