mirror of
https://github.com/epi052/feroxbuster.git
synced 2026-06-08 02:31:16 -03:00
Merge pull request #936 from epi052/935-fix-scan-menu-range-issue
935 fix scan menu range issue
This commit is contained in:
885
Cargo.lock
generated
885
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
37
Cargo.toml
37
Cargo.toml
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "feroxbuster"
|
||||
version = "2.10.0"
|
||||
version = "2.10.1"
|
||||
authors = ["Ben 'epi' Risher (@epi052)"]
|
||||
license = "MIT"
|
||||
edition = "2021"
|
||||
@@ -22,40 +22,45 @@ build = "build.rs"
|
||||
maintenance = { status = "actively-developed" }
|
||||
|
||||
[build-dependencies]
|
||||
clap = { version = "4.2", features = ["wrap_help", "cargo"] }
|
||||
clap_complete = "4.2"
|
||||
regex = "1.8"
|
||||
clap = { version = "4.3", features = ["wrap_help", "cargo"] }
|
||||
clap_complete = "4.3"
|
||||
regex = "1.9"
|
||||
lazy_static = "1.4"
|
||||
dirs = "5.0"
|
||||
|
||||
[dependencies]
|
||||
scraper = "0.16"
|
||||
scraper = "0.17"
|
||||
futures = "0.3"
|
||||
tokio = { version = "1.28", features = ["full"] }
|
||||
tokio = { version = "1.29", features = ["full"] }
|
||||
tokio-util = { version = "0.7", features = ["codec"] }
|
||||
log = "0.4"
|
||||
env_logger = "0.10"
|
||||
reqwest = { version = "0.11", features = ["socks", "native-tls"] }
|
||||
# uses feature unification to add 'serde' to reqwest::Url
|
||||
url = { version = "2.3", features = ["serde"] }
|
||||
url = { version = "2.4", features = ["serde"] }
|
||||
serde_regex = "1.1"
|
||||
clap = { version = "4.2", features = ["wrap_help", "cargo"] }
|
||||
clap = { version = "4.3", features = ["wrap_help", "cargo"] }
|
||||
lazy_static = "1.4"
|
||||
toml = "0.7"
|
||||
serde = { version = "1.0", features = ["derive", "rc"] }
|
||||
serde_json = "1.0"
|
||||
uuid = { version = "1.3", features = ["v4"] }
|
||||
indicatif = "0.17"
|
||||
uuid = { version = "1.4", features = ["v4"] }
|
||||
# last known working version of indicatif; 0.17.5 has a bug that causes the
|
||||
# scan menu to fail spectacularly
|
||||
indicatif = { version = "0.17.3" }
|
||||
console = "0.15"
|
||||
openssl = { version = "0.10", features = ["vendored"] }
|
||||
dirs = "5.0"
|
||||
regex = "1.8"
|
||||
regex = "1.9"
|
||||
crossterm = "0.26"
|
||||
rlimit = "0.9"
|
||||
ctrlc = "3.2"
|
||||
rlimit = "0.10"
|
||||
ctrlc = "3.4"
|
||||
anyhow = "1.0"
|
||||
leaky-bucket = "0.12"
|
||||
gaoya = "0.1"
|
||||
leaky-bucket = "1.0"
|
||||
gaoya = "0.2"
|
||||
# 0.37+ relies on the broken version of indicatif and forces
|
||||
# the broken version to be used regardless of the version
|
||||
# specified above
|
||||
self_update = { version = "0.36", features = [
|
||||
"archive-tar",
|
||||
"compression-flate2",
|
||||
@@ -64,7 +69,7 @@ self_update = { version = "0.36", features = [
|
||||
] }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.5"
|
||||
tempfile = "3.6"
|
||||
httpmock = "0.6"
|
||||
assert_cmd = "2.0"
|
||||
predicates = "3.0"
|
||||
|
||||
@@ -11,7 +11,7 @@ rm ferox-*.state
|
||||
# dependency management
|
||||
[tasks.upgrade-deps]
|
||||
command = "cargo"
|
||||
args = ["upgrade", "--to-lockfile"]
|
||||
args = ["upgrade", "--to-lockfile", "--exclude", "indicatif", "self_update"]
|
||||
|
||||
[tasks.update]
|
||||
command = "cargo"
|
||||
|
||||
@@ -24,8 +24,8 @@ _feroxbuster() {
|
||||
'--replay-proxy=[Send only unfiltered requests through a Replay Proxy, instead of all requests]:REPLAY_PROXY:_urls' \
|
||||
'*-R+[Status Codes to send through a Replay Proxy when found (default\: --status-codes value)]:REPLAY_CODE: ' \
|
||||
'*--replay-codes=[Status Codes to send through a Replay Proxy when found (default\: --status-codes value)]:REPLAY_CODE: ' \
|
||||
'-a+[Sets the User-Agent (default\: feroxbuster/2.10.0)]:USER_AGENT: ' \
|
||||
'--user-agent=[Sets the User-Agent (default\: feroxbuster/2.10.0)]:USER_AGENT: ' \
|
||||
'-a+[Sets the User-Agent (default\: feroxbuster/2.10.1)]:USER_AGENT: ' \
|
||||
'--user-agent=[Sets the User-Agent (default\: feroxbuster/2.10.1)]:USER_AGENT: ' \
|
||||
'*-x+[File extension(s) to search for (ex\: -x php -x pdf js)]:FILE_EXTENSION: ' \
|
||||
'*--extensions=[File extension(s) to search for (ex\: -x php -x pdf js)]:FILE_EXTENSION: ' \
|
||||
'*-m+[Which HTTP request method(s) should be sent (default\: GET)]:HTTP_METHODS: ' \
|
||||
|
||||
@@ -26,38 +26,38 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock {
|
||||
[CompletionResult]::new('--resume-from', 'resume-from', [CompletionResultType]::ParameterName, 'State file from which to resume a partially complete scan (ex. --resume-from ferox-1606586780.state)')
|
||||
[CompletionResult]::new('-p', 'p', [CompletionResultType]::ParameterName, 'Proxy to use for requests (ex: http(s)://host:port, socks5(h)://host:port)')
|
||||
[CompletionResult]::new('--proxy', 'proxy', [CompletionResultType]::ParameterName, 'Proxy to use for requests (ex: http(s)://host:port, socks5(h)://host:port)')
|
||||
[CompletionResult]::new('-P', 'P', [CompletionResultType]::ParameterName, 'Send only unfiltered requests through a Replay Proxy, instead of all requests')
|
||||
[CompletionResult]::new('-P', 'P ', [CompletionResultType]::ParameterName, 'Send only unfiltered requests through a Replay Proxy, instead of all requests')
|
||||
[CompletionResult]::new('--replay-proxy', 'replay-proxy', [CompletionResultType]::ParameterName, 'Send only unfiltered requests through a Replay Proxy, instead of all requests')
|
||||
[CompletionResult]::new('-R', 'R', [CompletionResultType]::ParameterName, 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)')
|
||||
[CompletionResult]::new('-R', 'R ', [CompletionResultType]::ParameterName, 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)')
|
||||
[CompletionResult]::new('--replay-codes', 'replay-codes', [CompletionResultType]::ParameterName, 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)')
|
||||
[CompletionResult]::new('-a', 'a', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.10.0)')
|
||||
[CompletionResult]::new('--user-agent', 'user-agent', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.10.0)')
|
||||
[CompletionResult]::new('-a', 'a', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.10.1)')
|
||||
[CompletionResult]::new('--user-agent', 'user-agent', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.10.1)')
|
||||
[CompletionResult]::new('-x', 'x', [CompletionResultType]::ParameterName, 'File extension(s) to search for (ex: -x php -x pdf js)')
|
||||
[CompletionResult]::new('--extensions', 'extensions', [CompletionResultType]::ParameterName, 'File extension(s) to search for (ex: -x php -x pdf js)')
|
||||
[CompletionResult]::new('-m', 'm', [CompletionResultType]::ParameterName, 'Which HTTP request method(s) should be sent (default: GET)')
|
||||
[CompletionResult]::new('--methods', 'methods', [CompletionResultType]::ParameterName, 'Which HTTP request method(s) should be sent (default: GET)')
|
||||
[CompletionResult]::new('--data', 'data', [CompletionResultType]::ParameterName, 'Request''s Body; can read data from a file if input starts with an @ (ex: @post.bin)')
|
||||
[CompletionResult]::new('-H', 'H', [CompletionResultType]::ParameterName, 'Specify HTTP headers to be used in each request (ex: -H Header:val -H ''stuff: things'')')
|
||||
[CompletionResult]::new('-H', 'H ', [CompletionResultType]::ParameterName, 'Specify HTTP headers to be used in each request (ex: -H Header:val -H ''stuff: things'')')
|
||||
[CompletionResult]::new('--headers', 'headers', [CompletionResultType]::ParameterName, 'Specify HTTP headers to be used in each request (ex: -H Header:val -H ''stuff: things'')')
|
||||
[CompletionResult]::new('-b', 'b', [CompletionResultType]::ParameterName, 'Specify HTTP cookies to be used in each request (ex: -b stuff=things)')
|
||||
[CompletionResult]::new('--cookies', 'cookies', [CompletionResultType]::ParameterName, 'Specify HTTP cookies to be used in each request (ex: -b stuff=things)')
|
||||
[CompletionResult]::new('-Q', 'Q', [CompletionResultType]::ParameterName, 'Request''s URL query parameters (ex: -Q token=stuff -Q secret=key)')
|
||||
[CompletionResult]::new('-Q', 'Q ', [CompletionResultType]::ParameterName, 'Request''s URL query parameters (ex: -Q token=stuff -Q secret=key)')
|
||||
[CompletionResult]::new('--query', 'query', [CompletionResultType]::ParameterName, 'Request''s URL query parameters (ex: -Q token=stuff -Q secret=key)')
|
||||
[CompletionResult]::new('--dont-scan', 'dont-scan', [CompletionResultType]::ParameterName, 'URL(s) or Regex Pattern(s) to exclude from recursion/scans')
|
||||
[CompletionResult]::new('-S', 'S', [CompletionResultType]::ParameterName, 'Filter out messages of a particular size (ex: -S 5120 -S 4927,1970)')
|
||||
[CompletionResult]::new('-S', 'S ', [CompletionResultType]::ParameterName, 'Filter out messages of a particular size (ex: -S 5120 -S 4927,1970)')
|
||||
[CompletionResult]::new('--filter-size', 'filter-size', [CompletionResultType]::ParameterName, 'Filter out messages of a particular size (ex: -S 5120 -S 4927,1970)')
|
||||
[CompletionResult]::new('-X', 'X', [CompletionResultType]::ParameterName, 'Filter out messages via regular expression matching on the response''s body (ex: -X ''^ignore me$'')')
|
||||
[CompletionResult]::new('-X', 'X ', [CompletionResultType]::ParameterName, 'Filter out messages via regular expression matching on the response''s body (ex: -X ''^ignore me$'')')
|
||||
[CompletionResult]::new('--filter-regex', 'filter-regex', [CompletionResultType]::ParameterName, 'Filter out messages via regular expression matching on the response''s body (ex: -X ''^ignore me$'')')
|
||||
[CompletionResult]::new('-W', 'W', [CompletionResultType]::ParameterName, 'Filter out messages of a particular word count (ex: -W 312 -W 91,82)')
|
||||
[CompletionResult]::new('-W', 'W ', [CompletionResultType]::ParameterName, 'Filter out messages of a particular word count (ex: -W 312 -W 91,82)')
|
||||
[CompletionResult]::new('--filter-words', 'filter-words', [CompletionResultType]::ParameterName, 'Filter out messages of a particular word count (ex: -W 312 -W 91,82)')
|
||||
[CompletionResult]::new('-N', 'N', [CompletionResultType]::ParameterName, 'Filter out messages of a particular line count (ex: -N 20 -N 31,30)')
|
||||
[CompletionResult]::new('-N', 'N ', [CompletionResultType]::ParameterName, 'Filter out messages of a particular line count (ex: -N 20 -N 31,30)')
|
||||
[CompletionResult]::new('--filter-lines', 'filter-lines', [CompletionResultType]::ParameterName, 'Filter out messages of a particular line count (ex: -N 20 -N 31,30)')
|
||||
[CompletionResult]::new('-C', 'C', [CompletionResultType]::ParameterName, 'Filter out status codes (deny list) (ex: -C 200 -C 401)')
|
||||
[CompletionResult]::new('-C', 'C ', [CompletionResultType]::ParameterName, 'Filter out status codes (deny list) (ex: -C 200 -C 401)')
|
||||
[CompletionResult]::new('--filter-status', 'filter-status', [CompletionResultType]::ParameterName, 'Filter out status codes (deny list) (ex: -C 200 -C 401)')
|
||||
[CompletionResult]::new('--filter-similar-to', 'filter-similar-to', [CompletionResultType]::ParameterName, 'Filter out pages that are similar to the given page (ex. --filter-similar-to http://site.xyz/soft404)')
|
||||
[CompletionResult]::new('-s', 's', [CompletionResultType]::ParameterName, 'Status Codes to include (allow list) (default: All Status Codes)')
|
||||
[CompletionResult]::new('--status-codes', 'status-codes', [CompletionResultType]::ParameterName, 'Status Codes to include (allow list) (default: All Status Codes)')
|
||||
[CompletionResult]::new('-T', 'T', [CompletionResultType]::ParameterName, 'Number of seconds before a client''s request times out (default: 7)')
|
||||
[CompletionResult]::new('-T', 'T ', [CompletionResultType]::ParameterName, 'Number of seconds before a client''s request times out (default: 7)')
|
||||
[CompletionResult]::new('--timeout', 'timeout', [CompletionResultType]::ParameterName, 'Number of seconds before a client''s request times out (default: 7)')
|
||||
[CompletionResult]::new('--server-certs', 'server-certs', [CompletionResultType]::ParameterName, 'Add custom root certificate(s) for servers with unknown certificates')
|
||||
[CompletionResult]::new('--client-cert', 'client-cert', [CompletionResultType]::ParameterName, 'Add a PEM encoded certificate for mutual authentication (mTLS)')
|
||||
@@ -66,14 +66,14 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock {
|
||||
[CompletionResult]::new('--threads', 'threads', [CompletionResultType]::ParameterName, 'Number of concurrent threads (default: 50)')
|
||||
[CompletionResult]::new('-d', 'd', [CompletionResultType]::ParameterName, 'Maximum recursion depth, a depth of 0 is infinite recursion (default: 4)')
|
||||
[CompletionResult]::new('--depth', 'depth', [CompletionResultType]::ParameterName, 'Maximum recursion depth, a depth of 0 is infinite recursion (default: 4)')
|
||||
[CompletionResult]::new('-L', 'L', [CompletionResultType]::ParameterName, 'Limit total number of concurrent scans (default: 0, i.e. no limit)')
|
||||
[CompletionResult]::new('-L', 'L ', [CompletionResultType]::ParameterName, 'Limit total number of concurrent scans (default: 0, i.e. no limit)')
|
||||
[CompletionResult]::new('--scan-limit', 'scan-limit', [CompletionResultType]::ParameterName, 'Limit total number of concurrent scans (default: 0, i.e. no limit)')
|
||||
[CompletionResult]::new('--parallel', 'parallel', [CompletionResultType]::ParameterName, 'Run parallel feroxbuster instances (one child process per url passed via stdin)')
|
||||
[CompletionResult]::new('--rate-limit', 'rate-limit', [CompletionResultType]::ParameterName, 'Limit number of requests per second (per directory) (default: 0, i.e. no limit)')
|
||||
[CompletionResult]::new('--time-limit', 'time-limit', [CompletionResultType]::ParameterName, 'Limit total run time of all scans (ex: --time-limit 10m)')
|
||||
[CompletionResult]::new('-w', 'w', [CompletionResultType]::ParameterName, 'Path or URL of the wordlist')
|
||||
[CompletionResult]::new('--wordlist', 'wordlist', [CompletionResultType]::ParameterName, 'Path or URL of the wordlist')
|
||||
[CompletionResult]::new('-I', 'I', [CompletionResultType]::ParameterName, 'File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)')
|
||||
[CompletionResult]::new('-I', 'I ', [CompletionResultType]::ParameterName, 'File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)')
|
||||
[CompletionResult]::new('--dont-collect', 'dont-collect', [CompletionResultType]::ParameterName, 'File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)')
|
||||
[CompletionResult]::new('-o', 'o', [CompletionResultType]::ParameterName, 'Output file to write results to (use w/ --json for JSON entries)')
|
||||
[CompletionResult]::new('--output', 'output', [CompletionResultType]::ParameterName, 'Output file to write results to (use w/ --json for JSON entries)')
|
||||
@@ -83,7 +83,7 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock {
|
||||
[CompletionResult]::new('--burp-replay', 'burp-replay', [CompletionResultType]::ParameterName, 'Set --replay-proxy to http://127.0.0.1:8080 and set --insecure to true')
|
||||
[CompletionResult]::new('--smart', 'smart', [CompletionResultType]::ParameterName, 'Set --auto-tune, --collect-words, and --collect-backups to true')
|
||||
[CompletionResult]::new('--thorough', 'thorough', [CompletionResultType]::ParameterName, 'Use the same settings as --smart and set --collect-extensions to true')
|
||||
[CompletionResult]::new('-A', 'A', [CompletionResultType]::ParameterName, 'Use a random User-Agent')
|
||||
[CompletionResult]::new('-A', 'A ', [CompletionResultType]::ParameterName, 'Use a random User-Agent')
|
||||
[CompletionResult]::new('--random-agent', 'random-agent', [CompletionResultType]::ParameterName, 'Use a random User-Agent')
|
||||
[CompletionResult]::new('-f', 'f', [CompletionResultType]::ParameterName, 'Append / to each request''s URL')
|
||||
[CompletionResult]::new('--add-slash', 'add-slash', [CompletionResultType]::ParameterName, 'Append / to each request''s URL')
|
||||
@@ -99,11 +99,11 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock {
|
||||
[CompletionResult]::new('--dont-extract-links', 'dont-extract-links', [CompletionResultType]::ParameterName, 'Don''t extract links from response body (html, javascript, etc...)')
|
||||
[CompletionResult]::new('--auto-tune', 'auto-tune', [CompletionResultType]::ParameterName, 'Automatically lower scan rate when an excessive amount of errors are encountered')
|
||||
[CompletionResult]::new('--auto-bail', 'auto-bail', [CompletionResultType]::ParameterName, 'Automatically stop scanning when an excessive amount of errors are encountered')
|
||||
[CompletionResult]::new('-D', 'D', [CompletionResultType]::ParameterName, 'Don''t auto-filter wildcard responses')
|
||||
[CompletionResult]::new('-D', 'D ', [CompletionResultType]::ParameterName, 'Don''t auto-filter wildcard responses')
|
||||
[CompletionResult]::new('--dont-filter', 'dont-filter', [CompletionResultType]::ParameterName, 'Don''t auto-filter wildcard responses')
|
||||
[CompletionResult]::new('-E', 'E', [CompletionResultType]::ParameterName, 'Automatically discover extensions and add them to --extensions (unless they''re in --dont-collect)')
|
||||
[CompletionResult]::new('-E', 'E ', [CompletionResultType]::ParameterName, 'Automatically discover extensions and add them to --extensions (unless they''re in --dont-collect)')
|
||||
[CompletionResult]::new('--collect-extensions', 'collect-extensions', [CompletionResultType]::ParameterName, 'Automatically discover extensions and add them to --extensions (unless they''re in --dont-collect)')
|
||||
[CompletionResult]::new('-B', 'B', [CompletionResultType]::ParameterName, 'Automatically request likely backup extensions for "found" urls')
|
||||
[CompletionResult]::new('-B', 'B ', [CompletionResultType]::ParameterName, 'Automatically request likely backup extensions for "found" urls')
|
||||
[CompletionResult]::new('--collect-backups', 'collect-backups', [CompletionResultType]::ParameterName, 'Automatically request likely backup extensions for "found" urls')
|
||||
[CompletionResult]::new('-g', 'g', [CompletionResultType]::ParameterName, 'Automatically discover important words from within responses and add them to the wordlist')
|
||||
[CompletionResult]::new('--collect-words', 'collect-words', [CompletionResultType]::ParameterName, 'Automatically discover important words from within responses and add them to the wordlist')
|
||||
@@ -114,11 +114,11 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock {
|
||||
[CompletionResult]::new('--quiet', 'quiet', [CompletionResultType]::ParameterName, 'Hide progress bars and banner (good for tmux windows w/ notifications)')
|
||||
[CompletionResult]::new('--json', 'json', [CompletionResultType]::ParameterName, 'Emit JSON logs to --output and --debug-log instead of normal text')
|
||||
[CompletionResult]::new('--no-state', 'no-state', [CompletionResultType]::ParameterName, 'Disable state output file (*.state)')
|
||||
[CompletionResult]::new('-U', 'U', [CompletionResultType]::ParameterName, 'Update feroxbuster to the latest version')
|
||||
[CompletionResult]::new('-U', 'U ', [CompletionResultType]::ParameterName, 'Update feroxbuster to the latest version')
|
||||
[CompletionResult]::new('--update', 'update', [CompletionResultType]::ParameterName, 'Update feroxbuster to the latest version')
|
||||
[CompletionResult]::new('-h', 'h', [CompletionResultType]::ParameterName, 'Print help (see more with ''--help'')')
|
||||
[CompletionResult]::new('--help', 'help', [CompletionResultType]::ParameterName, 'Print help (see more with ''--help'')')
|
||||
[CompletionResult]::new('-V', 'V', [CompletionResultType]::ParameterName, 'Print version')
|
||||
[CompletionResult]::new('-V', 'V ', [CompletionResultType]::ParameterName, 'Print version')
|
||||
[CompletionResult]::new('--version', 'version', [CompletionResultType]::ParameterName, 'Print version')
|
||||
break
|
||||
}
|
||||
|
||||
@@ -27,8 +27,8 @@ set edit:completion:arg-completer[feroxbuster] = {|@words|
|
||||
cand --replay-proxy 'Send only unfiltered requests through a Replay Proxy, instead of all requests'
|
||||
cand -R 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)'
|
||||
cand --replay-codes 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)'
|
||||
cand -a 'Sets the User-Agent (default: feroxbuster/2.10.0)'
|
||||
cand --user-agent 'Sets the User-Agent (default: feroxbuster/2.10.0)'
|
||||
cand -a 'Sets the User-Agent (default: feroxbuster/2.10.1)'
|
||||
cand --user-agent 'Sets the User-Agent (default: feroxbuster/2.10.1)'
|
||||
cand -x 'File extension(s) to search for (ex: -x php -x pdf js)'
|
||||
cand --extensions 'File extension(s) to search for (ex: -x php -x pdf js)'
|
||||
cand -m 'Which HTTP request method(s) should be sent (default: GET)'
|
||||
|
||||
@@ -40,15 +40,16 @@ impl Document {
|
||||
}
|
||||
|
||||
/// create a new `Document` from the given HTML string
|
||||
pub(crate) fn from_html(raw_html: &str) -> Self {
|
||||
pub(crate) fn from_html(raw_html: &str) -> Option<Self> {
|
||||
let selector = Selector::parse("body").unwrap();
|
||||
|
||||
let html = Html::parse_document(raw_html);
|
||||
|
||||
let text = html
|
||||
.select(&selector)
|
||||
.next()
|
||||
.unwrap()
|
||||
let Some(element) = html.select(&selector).next() else {
|
||||
return None;
|
||||
};
|
||||
|
||||
let text = element
|
||||
.descendants()
|
||||
.filter_map(|node| {
|
||||
if !node.value().is_text() && !node.value().is_comment() {
|
||||
@@ -95,7 +96,7 @@ impl Document {
|
||||
|
||||
// call `new` to push the parsed html through the pre-processing pipeline and process all
|
||||
// the words
|
||||
Self::new(&text)
|
||||
Some(Self::new(&text))
|
||||
}
|
||||
|
||||
/// Log normalized weighting scheme for term frequency
|
||||
@@ -146,19 +147,20 @@ mod tests {
|
||||
#[test]
|
||||
/// `Document::new` should preprocess html and generate a hashmap of `Term, TermMetadata`
|
||||
fn nlp_document_creation_from_html() {
|
||||
let empty = Document::from_html("<html></html>");
|
||||
let empty = Document::from_html("<html></html>").unwrap();
|
||||
assert_eq!(empty.number_of_terms, 0);
|
||||
|
||||
let other_empty = Document::from_html("<html><body><p></p></body></html>");
|
||||
let other_empty = Document::from_html("<html><body><p></p></body></html>").unwrap();
|
||||
assert_eq!(other_empty.number_of_terms, 0);
|
||||
|
||||
let third_empty = Document::from_html("<!DOCTYPE html><html><!DOCTYPE html><p></p></html>");
|
||||
let third_empty =
|
||||
Document::from_html("<!DOCTYPE html><html><!DOCTYPE html><p></p></html>").unwrap();
|
||||
assert_eq!(third_empty.number_of_terms, 0);
|
||||
|
||||
// p tag for is_text check and comment for is_comment
|
||||
let doc = Document::from_html(
|
||||
"<html><body><p>The air quality in Singapore.</p><!--got worse on Wednesday--></body></html>",
|
||||
);
|
||||
).unwrap();
|
||||
|
||||
let expected_terms = ["air", "quality", "singapore", "worse", "wednesday"];
|
||||
|
||||
@@ -209,7 +211,7 @@ mod tests {
|
||||
/// ensure words in script/style tags aren't processed
|
||||
fn document_creation_skips_script_and_style_tags() {
|
||||
let html = "<body><script>The air quality</script><style>in Singapore</style><p>got worse on Wednesday.</p></body>";
|
||||
let doc = Document::from_html(html);
|
||||
let doc = Document::from_html(html).unwrap();
|
||||
let keys = doc.terms().keys().map(|key| key.raw()).collect::<Vec<_>>();
|
||||
|
||||
let expected = ["worse", "wednesday"];
|
||||
|
||||
@@ -110,7 +110,7 @@ impl FeroxScan {
|
||||
|
||||
match self.task.try_lock() {
|
||||
Ok(mut guard) => {
|
||||
if let Some(task) = std::mem::replace(&mut *guard, None) {
|
||||
if let Some(task) = guard.take() {
|
||||
log::trace!("aborting {:?}", self);
|
||||
task.abort();
|
||||
self.set_status(ScanStatus::Cancelled)?;
|
||||
@@ -268,7 +268,7 @@ impl FeroxScan {
|
||||
let mut guard = self.task.lock().await;
|
||||
|
||||
if guard.is_some() {
|
||||
if let Some(task) = std::mem::replace(&mut *guard, None) {
|
||||
if let Some(task) = guard.take() {
|
||||
task.await.unwrap();
|
||||
self.set_status(ScanStatus::Complete)
|
||||
.unwrap_or_else(|e| log::warn!("Could not mark scan complete: {}", e))
|
||||
|
||||
@@ -330,6 +330,13 @@ impl FeroxScans {
|
||||
self.menu
|
||||
.println(&format!("{}:", style("Scans").bright().blue()));
|
||||
}
|
||||
|
||||
if let Ok(guard) = scan.status.lock() {
|
||||
if matches!(*guard, ScanStatus::Cancelled) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// we're only interested in displaying directory scans, as those are
|
||||
// the only ones that make sense to be stopped
|
||||
let scan_msg = format!("{i:3}: {scan}");
|
||||
@@ -360,7 +367,14 @@ impl FeroxScans {
|
||||
sleep(menu_pause_duration);
|
||||
continue;
|
||||
}
|
||||
u_scans.index(num).clone()
|
||||
|
||||
let selected = u_scans.index(num);
|
||||
|
||||
if matches!(selected.scan_type, ScanType::File) {
|
||||
continue;
|
||||
}
|
||||
|
||||
selected.clone()
|
||||
}
|
||||
Err(..) => continue,
|
||||
};
|
||||
|
||||
@@ -202,6 +202,7 @@ fn partial_eq_compares_the_id_field() {
|
||||
|
||||
assert!(!scan.eq(&scan_two));
|
||||
|
||||
#[allow(clippy::redundant_clone)]
|
||||
let scan_two = scan.clone();
|
||||
|
||||
assert!(scan.eq(&scan_two));
|
||||
|
||||
@@ -475,12 +475,13 @@ impl Requester {
|
||||
|
||||
if self.handles.config.collect_words {
|
||||
if let Ok(mut guard) = TF_IDF.write() {
|
||||
let doc = Document::from_html(ferox_response.text());
|
||||
guard.add_document(doc);
|
||||
if guard.num_documents() % 12 == 0
|
||||
|| (guard.num_documents() < 5 && guard.num_documents() % 2 == 0)
|
||||
{
|
||||
guard.calculate_tf_idf_scores();
|
||||
if let Some(doc) = Document::from_html(ferox_response.text()) {
|
||||
guard.add_document(doc);
|
||||
if guard.num_documents() % 12 == 0
|
||||
|| (guard.num_documents() < 5 && guard.num_documents() % 2 == 0)
|
||||
{
|
||||
guard.calculate_tf_idf_scores();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user