mirror of
https://github.com/epi052/feroxbuster.git
synced 2026-06-08 10:51:13 -03:00
Merge pull request #464 from godylockz/ferox-parsehtml
Directory Listing & Web Scraping Links
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -9,6 +9,9 @@ target/
|
||||
# jetbrains metadata folder
|
||||
.idea/
|
||||
|
||||
# vscode metadata folder
|
||||
.vscode/
|
||||
|
||||
# personal feroxbuster config for testing
|
||||
ferox-config.toml
|
||||
|
||||
|
||||
@@ -166,7 +166,7 @@ primarily related to continuous integration and release deployment.
|
||||
|
||||
feroxbuster uses the [`clippy`](https://rust-lang.github.io/rust-clippy/) code linter.
|
||||
|
||||
The command that will ultimately be used in the CI pipeline for linting is `cargo clippy --all-targets --all-features -- -D warnings -A clippy::unnecessary_unwrap`.
|
||||
The command that will ultimately be used in the CI pipeline for linting is `cargo clippy --all-targets --all-features -- -D warnings -A clippy::mutex-atomic`.
|
||||
|
||||
Before submitting a Pull Request, the above command should be run. Please do not ignore any linting errors in code you write or modify, as they are meant to **help** by ensuring a clean and simple code base.
|
||||
|
||||
|
||||
376
Cargo.lock
generated
376
Cargo.lock
generated
@@ -291,6 +291,12 @@ version = "3.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4a45a46ab1f2412e53d3a0ade76ffad2025804294569aae387231a0cd6e0899"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
|
||||
|
||||
[[package]]
|
||||
name = "bytes"
|
||||
version = "1.1.0"
|
||||
@@ -371,6 +377,12 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "convert_case"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e"
|
||||
|
||||
[[package]]
|
||||
name = "core-foundation"
|
||||
version = "0.9.2"
|
||||
@@ -428,6 +440,33 @@ version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
|
||||
|
||||
[[package]]
|
||||
name = "cssparser"
|
||||
version = "0.27.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a"
|
||||
dependencies = [
|
||||
"cssparser-macros",
|
||||
"dtoa-short",
|
||||
"itoa 0.4.8",
|
||||
"matches",
|
||||
"phf",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"smallvec",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cssparser-macros"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e"
|
||||
dependencies = [
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ctor"
|
||||
version = "0.1.21"
|
||||
@@ -479,6 +518,19 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_more"
|
||||
version = "0.99.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321"
|
||||
dependencies = [
|
||||
"convert_case",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustc_version",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "diff"
|
||||
version = "0.1.12"
|
||||
@@ -538,6 +590,27 @@ version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
|
||||
|
||||
[[package]]
|
||||
name = "dtoa"
|
||||
version = "0.4.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56899898ce76aaf4a0f24d914c97ea6ed976d42fec6ad33fcbb0a1103e07b2b0"
|
||||
|
||||
[[package]]
|
||||
name = "dtoa-short"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bde03329ae10e79ede66c9ce4dc930aa8599043b0743008548680f25b91502d6"
|
||||
dependencies = [
|
||||
"dtoa",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ego-tree"
|
||||
version = "0.6.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591"
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.6.1"
|
||||
@@ -621,6 +694,7 @@ dependencies = [
|
||||
"regex",
|
||||
"reqwest",
|
||||
"rlimit",
|
||||
"scraper",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_regex",
|
||||
@@ -678,6 +752,16 @@ dependencies = [
|
||||
"percent-encoding",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futf"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7c9c1ce3fa9336301af935ab852c437817d14cd33690446569392e65170aac3b"
|
||||
dependencies = [
|
||||
"mac",
|
||||
"new_debug_unreachable",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures"
|
||||
version = "0.3.19"
|
||||
@@ -788,6 +872,35 @@ version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8fb6c4351f4f134772edf9bcd17de13b7fbcb2c56928b440d6823bd4dc9ebd80"
|
||||
|
||||
[[package]]
|
||||
name = "fxhash"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getopts"
|
||||
version = "0.2.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5"
|
||||
dependencies = [
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.1.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"wasi 0.9.0+wasi-snapshot-preview1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.4"
|
||||
@@ -796,7 +909,7 @@ checksum = "418d37c8b1d42553c93648be529cb70f920d3baf8ef469b74b9638df426e0b4c"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"wasi",
|
||||
"wasi 0.10.2+wasi-snapshot-preview1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -846,6 +959,20 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "html5ever"
|
||||
version = "0.25.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b"
|
||||
dependencies = [
|
||||
"log",
|
||||
"mac",
|
||||
"markup5ever",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http"
|
||||
version = "0.2.6"
|
||||
@@ -1170,6 +1297,26 @@ dependencies = [
|
||||
"value-bag",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mac"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
|
||||
|
||||
[[package]]
|
||||
name = "markup5ever"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a24f40fb03852d1cdd84330cddcaf98e9ec08a7b7768e952fad3b4cf048ec8fd"
|
||||
dependencies = [
|
||||
"log",
|
||||
"phf",
|
||||
"phf_codegen",
|
||||
"string_cache",
|
||||
"string_cache_codegen",
|
||||
"tendril",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "matches"
|
||||
version = "0.1.9"
|
||||
@@ -1256,6 +1403,12 @@ dependencies = [
|
||||
"memoffset",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nodrop"
|
||||
version = "0.1.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
|
||||
|
||||
[[package]]
|
||||
name = "normalize-line-endings"
|
||||
version = "0.3.0"
|
||||
@@ -1401,6 +1554,51 @@ dependencies = [
|
||||
"indexmap",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12"
|
||||
dependencies = [
|
||||
"phf_macros",
|
||||
"phf_shared",
|
||||
"proc-macro-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_codegen"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815"
|
||||
dependencies = [
|
||||
"phf_generator",
|
||||
"phf_shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_generator"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526"
|
||||
dependencies = [
|
||||
"phf_shared",
|
||||
"rand",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_macros"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c"
|
||||
dependencies = [
|
||||
"phf_generator",
|
||||
"phf_shared",
|
||||
"proc-macro-hack",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_shared"
|
||||
version = "0.8.0"
|
||||
@@ -1467,6 +1665,12 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872"
|
||||
|
||||
[[package]]
|
||||
name = "precomputed-hash"
|
||||
version = "0.1.1"
|
||||
@@ -1503,6 +1707,12 @@ dependencies = [
|
||||
"termtree",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro-hack"
|
||||
version = "0.5.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.36"
|
||||
@@ -1521,6 +1731,57 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
|
||||
dependencies = [
|
||||
"getrandom 0.1.16",
|
||||
"libc",
|
||||
"rand_chacha",
|
||||
"rand_core",
|
||||
"rand_hc",
|
||||
"rand_pcg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402"
|
||||
dependencies = [
|
||||
"ppv-lite86",
|
||||
"rand_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
|
||||
dependencies = [
|
||||
"getrandom 0.1.16",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_hc"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
|
||||
dependencies = [
|
||||
"rand_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_pcg"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429"
|
||||
dependencies = [
|
||||
"rand_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.2.10"
|
||||
@@ -1536,7 +1797,7 @@ version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"getrandom 0.2.4",
|
||||
"redox_syscall",
|
||||
]
|
||||
|
||||
@@ -1618,6 +1879,15 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc_version"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
|
||||
dependencies = [
|
||||
"semver",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.6"
|
||||
@@ -1646,6 +1916,22 @@ version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
|
||||
|
||||
[[package]]
|
||||
name = "scraper"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "48e02aa790c80c2e494130dec6a522033b6a23603ffc06360e9fe6c611ea2c12"
|
||||
dependencies = [
|
||||
"cssparser",
|
||||
"ego-tree",
|
||||
"getopts",
|
||||
"html5ever",
|
||||
"matches",
|
||||
"selectors",
|
||||
"smallvec",
|
||||
"tendril",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "security-framework"
|
||||
version = "2.4.2"
|
||||
@@ -1669,6 +1955,32 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "selectors"
|
||||
version = "0.22.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df320f1889ac4ba6bc0cdc9c9af7af4bd64bb927bccdf32d81140dc1f9be12fe"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"cssparser",
|
||||
"derive_more",
|
||||
"fxhash",
|
||||
"log",
|
||||
"matches",
|
||||
"phf",
|
||||
"phf_codegen",
|
||||
"precomputed-hash",
|
||||
"servo_arc",
|
||||
"smallvec",
|
||||
"thin-slice",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "semver"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "568a8e6258aa33c13358f81fd834adb854c6f7c9468520910a9b1e8fac068012"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.133"
|
||||
@@ -1722,6 +2034,16 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "servo_arc"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d98238b800e0d1576d8b6e3de32827c2d74bee68bb97748dcf5071fb53965432"
|
||||
dependencies = [
|
||||
"nodrop",
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook"
|
||||
version = "0.3.13"
|
||||
@@ -1797,6 +2119,12 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stable_deref_trait"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
|
||||
|
||||
[[package]]
|
||||
name = "string_cache"
|
||||
version = "0.8.2"
|
||||
@@ -1808,6 +2136,19 @@ dependencies = [
|
||||
"parking_lot",
|
||||
"phf_shared",
|
||||
"precomputed-hash",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "string_cache_codegen"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f24c8e5e19d22a726626f1a5e16fe15b132dcf21d10177fa5a45ce7962996b97"
|
||||
dependencies = [
|
||||
"phf_generator",
|
||||
"phf_shared",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1841,6 +2182,17 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tendril"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a9ef557cb397a4f0a5a3a628f06515f78563f2209e64d47055d9dc6052bf5e33"
|
||||
dependencies = [
|
||||
"futf",
|
||||
"mac",
|
||||
"utf-8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "term"
|
||||
version = "0.7.0"
|
||||
@@ -1886,6 +2238,12 @@ dependencies = [
|
||||
"terminal_size",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thin-slice"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c"
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.30"
|
||||
@@ -2111,13 +2469,19 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "utf-8"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
|
||||
|
||||
[[package]]
|
||||
name = "uuid"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"getrandom 0.2.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2167,6 +2531,12 @@ dependencies = [
|
||||
"try-lock",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.9.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.10.2+wasi-snapshot-preview1"
|
||||
|
||||
@@ -23,6 +23,7 @@ lazy_static = "1.4"
|
||||
dirs = "4.0"
|
||||
|
||||
[dependencies]
|
||||
scraper = "0.12"
|
||||
futures = { version = "0.3"}
|
||||
tokio = { version = "1.15", features = ["full"] }
|
||||
tokio-util = {version = "0.6", features = ["codec"]}
|
||||
|
||||
@@ -21,6 +21,9 @@ pub enum ExtractionTarget {
|
||||
|
||||
/// Examine robots.txt (specifically) and extract links
|
||||
RobotsTxt,
|
||||
|
||||
// Parse HTML and extract links
|
||||
ParseHtml,
|
||||
}
|
||||
|
||||
/// responsible for building an `Extractor`
|
||||
@@ -28,7 +31,7 @@ pub struct ExtractorBuilder<'a> {
|
||||
/// Response from which to extract links
|
||||
response: Option<&'a FeroxResponse>,
|
||||
|
||||
/// Response from which to extract links
|
||||
/// URL of where to extract links
|
||||
url: String,
|
||||
|
||||
/// Handles object to house the underlying mpsc transmitters
|
||||
|
||||
@@ -18,6 +18,7 @@ use crate::{
|
||||
};
|
||||
use anyhow::{bail, Context, Result};
|
||||
use reqwest::{StatusCode, Url};
|
||||
use scraper::{Html, Selector};
|
||||
use std::collections::HashSet;
|
||||
use tokio::sync::oneshot;
|
||||
|
||||
@@ -43,7 +44,7 @@ pub struct Extractor<'a> {
|
||||
/// Response from which to extract links
|
||||
pub(super) response: Option<&'a FeroxResponse>,
|
||||
|
||||
/// Response from which to extract links
|
||||
/// URL of where to extract links
|
||||
pub(super) url: String,
|
||||
|
||||
/// Handles object to house the underlying mpsc transmitters
|
||||
@@ -56,11 +57,12 @@ pub struct Extractor<'a> {
|
||||
/// Extractor implementation
|
||||
impl<'a> Extractor<'a> {
|
||||
/// perform extraction from the given target and return any links found
|
||||
pub async fn extract(&self) -> Result<HashSet<String>> {
|
||||
pub async fn extract(&self) -> Result<(HashSet<String>, bool)> {
|
||||
log::trace!("enter: extract (this fn has associated trace exit msg)");
|
||||
match self.target {
|
||||
ExtractionTarget::ResponseBody => Ok(self.extract_from_body().await?),
|
||||
ExtractionTarget::RobotsTxt => Ok(self.extract_from_robots().await?),
|
||||
ExtractionTarget::ParseHtml => Ok(self.parse_html().await?),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -92,11 +94,11 @@ impl<'a> Extractor<'a> {
|
||||
continue;
|
||||
}
|
||||
|
||||
if resp.is_file() {
|
||||
// very likely a file, simply request and report
|
||||
log::debug!("Extracted file: {}", resp);
|
||||
// request and report assumed file
|
||||
if resp.is_file() || !resp.is_directory() {
|
||||
log::debug!("Extracted File: {}", resp);
|
||||
|
||||
scanned_urls.add_file_scan(&resp.url().to_string(), ScanOrder::Latest);
|
||||
scanned_urls.add_file_scan(resp.url().as_str(), ScanOrder::Latest);
|
||||
|
||||
if let Err(e) = resp.send_report(self.handles.output.tx.clone()) {
|
||||
log::warn!("Could not send FeroxResponse to output handler: {}", e);
|
||||
@@ -143,12 +145,28 @@ impl<'a> Extractor<'a> {
|
||||
/// - homepage/assets/img/
|
||||
/// - homepage/assets/
|
||||
/// - homepage/
|
||||
pub(super) async fn extract_from_body(&self) -> Result<HashSet<String>> {
|
||||
log::trace!("enter: get_links");
|
||||
pub(super) async fn extract_from_body(&self) -> Result<(HashSet<String>, bool)> {
|
||||
log::trace!("enter: extract_from_body");
|
||||
|
||||
let mut links = HashSet::<String>::new();
|
||||
let dirlist_flag = false;
|
||||
|
||||
let body = self.response.unwrap().text();
|
||||
// Response
|
||||
let response = self.response.unwrap();
|
||||
let resp_url = response.url();
|
||||
let body = response.text();
|
||||
let html = Html::parse_document(body);
|
||||
|
||||
// Extract Links
|
||||
self.extract_links_by_attr(resp_url, &mut links, &html, "a", "href");
|
||||
self.extract_links_by_attr(resp_url, &mut links, &html, "img", "src");
|
||||
self.extract_links_by_attr(resp_url, &mut links, &html, "form", "action");
|
||||
self.extract_links_by_attr(resp_url, &mut links, &html, "script", "src");
|
||||
self.extract_links_by_attr(resp_url, &mut links, &html, "iframe", "src");
|
||||
self.extract_links_by_attr(resp_url, &mut links, &html, "div", "src");
|
||||
self.extract_links_by_attr(resp_url, &mut links, &html, "frame", "src");
|
||||
self.extract_links_by_attr(resp_url, &mut links, &html, "embed", "src");
|
||||
self.extract_links_by_attr(resp_url, &mut links, &html, "script", "src");
|
||||
|
||||
for capture in self.links_regex.captures_iter(body) {
|
||||
// remove single & double quotes from both ends of the capture
|
||||
@@ -188,17 +206,16 @@ impl<'a> Extractor<'a> {
|
||||
|
||||
self.update_stats(links.len())?;
|
||||
|
||||
log::trace!("exit: get_links -> {:?}", links);
|
||||
|
||||
Ok(links)
|
||||
log::trace!("exit: extract_from_body -> {:?} {}", links, dirlist_flag);
|
||||
Ok((links, dirlist_flag))
|
||||
}
|
||||
|
||||
/// take a url fragment like homepage/assets/img/icons/handshake.svg and
|
||||
/// incrementally add
|
||||
/// - homepage/assets/img/icons/
|
||||
/// - homepage/assets/img/
|
||||
/// - homepage/assets/
|
||||
/// - homepage/
|
||||
/// - homepage/assets/img/icons/
|
||||
/// - homepage/assets/img/
|
||||
/// - homepage/assets/
|
||||
/// - homepage/
|
||||
fn add_all_sub_paths(&self, url_path: &str, links: &mut HashSet<String>) -> Result<()> {
|
||||
log::trace!("enter: add_all_sub_paths({}, {:?})", url_path, links);
|
||||
|
||||
@@ -267,12 +284,14 @@ impl<'a> Extractor<'a> {
|
||||
|
||||
let old_url = match self.target {
|
||||
ExtractionTarget::ResponseBody => self.response.unwrap().url().clone(),
|
||||
ExtractionTarget::RobotsTxt => match Url::parse(&self.url) {
|
||||
Ok(u) => u,
|
||||
Err(e) => {
|
||||
bail!("Could not parse {}: {}", self.url, e);
|
||||
ExtractionTarget::ParseHtml | ExtractionTarget::RobotsTxt => {
|
||||
match Url::parse(&self.url) {
|
||||
Ok(u) => u,
|
||||
Err(e) => {
|
||||
bail!("Could not parse {}: {}", self.url, e);
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
let new_url = old_url
|
||||
@@ -287,11 +306,6 @@ impl<'a> Extractor<'a> {
|
||||
}
|
||||
|
||||
/// Wrapper around link extraction logic
|
||||
/// currently used in two places:
|
||||
/// - links from response bodies
|
||||
/// - links from robots.txt responses
|
||||
///
|
||||
/// general steps taken:
|
||||
/// - create a new Url object based on cli options/args
|
||||
/// - check if the new Url has already been seen/scanned -> None
|
||||
/// - make a request to the new Url ? -> Some(response) : None
|
||||
@@ -350,14 +364,17 @@ impl<'a> Extractor<'a> {
|
||||
/// http://localhost/stuff/things
|
||||
/// this function requests:
|
||||
/// http://localhost/robots.txt
|
||||
pub(super) async fn extract_from_robots(&self) -> Result<HashSet<String>> {
|
||||
pub(super) async fn extract_from_robots(&self) -> Result<(HashSet<String>, bool)> {
|
||||
log::trace!("enter: extract_robots_txt");
|
||||
|
||||
let mut links: HashSet<String> = HashSet::new();
|
||||
let dirlist_flag = false;
|
||||
|
||||
let response = self.request_robots_txt().await?;
|
||||
// request
|
||||
let response = self.make_extract_request("/robots.txt").await?;
|
||||
let body = response.text();
|
||||
|
||||
for capture in self.robots_regex.captures_iter(response.text()) {
|
||||
for capture in self.robots_regex.captures_iter(body) {
|
||||
if let Some(new_path) = capture.name("url_path") {
|
||||
let mut new_url = Url::parse(&self.url)?;
|
||||
new_url.set_path(new_path.as_str());
|
||||
@@ -369,19 +386,126 @@ impl<'a> Extractor<'a> {
|
||||
|
||||
self.update_stats(links.len())?;
|
||||
|
||||
log::trace!("exit: extract_robots_txt -> {:?}", links);
|
||||
Ok(links)
|
||||
log::trace!("exit: extract_robots_txt -> {:?} {}", links, dirlist_flag);
|
||||
Ok((links, dirlist_flag))
|
||||
}
|
||||
|
||||
/// helper function that simply requests /robots.txt on the given url's base url
|
||||
/// Entry point to parse html for links (i.e. webscraping, directory listings)
|
||||
/// this function requests:
|
||||
/// http://localhost/<location>
|
||||
pub(super) async fn parse_html(&self) -> Result<(HashSet<String>, bool)> {
|
||||
log::trace!("enter: parse_html");
|
||||
|
||||
let mut links: HashSet<String> = HashSet::new();
|
||||
let mut dirlist_flag = false;
|
||||
|
||||
// Response
|
||||
let url = Url::parse(&self.url)?;
|
||||
let response = self.make_extract_request(url.path()).await?;
|
||||
let resp_url = response.url();
|
||||
let body = response.text();
|
||||
let html = Html::parse_document(body);
|
||||
|
||||
// Directory listing heuristic detection to not continue scanning
|
||||
// Index of /: apache
|
||||
// Directory Listing for /: tomcat,
|
||||
// Directory Listing -- /: ASP.NET
|
||||
// <host> - /: iis, azure, skipping due to loose heuristic
|
||||
let title_selector = Selector::parse("title").unwrap();
|
||||
for t in html.select(&title_selector) {
|
||||
let title = t.inner_html().to_lowercase();
|
||||
if title.contains("directory listing for /")
|
||||
|| title.contains("index of /")
|
||||
|| title.contains("directory listing -- /")
|
||||
{
|
||||
log::debug!("Directory listing heuristic detection from \"{}\"", title);
|
||||
dirlist_flag = true;
|
||||
|
||||
self.extract_links_by_attr(resp_url, &mut links, &html, "a", "href");
|
||||
self.update_stats(links.len())?;
|
||||
|
||||
log::trace!("exit: parse_html -> {:?} {}", links, dirlist_flag);
|
||||
return Ok((links, dirlist_flag));
|
||||
}
|
||||
}
|
||||
|
||||
// Extract Links
|
||||
self.extract_links_by_attr(resp_url, &mut links, &html, "a", "href");
|
||||
self.extract_links_by_attr(resp_url, &mut links, &html, "img", "src");
|
||||
self.extract_links_by_attr(resp_url, &mut links, &html, "form", "action");
|
||||
self.extract_links_by_attr(resp_url, &mut links, &html, "script", "src");
|
||||
self.extract_links_by_attr(resp_url, &mut links, &html, "iframe", "src");
|
||||
self.extract_links_by_attr(resp_url, &mut links, &html, "div", "src");
|
||||
self.extract_links_by_attr(resp_url, &mut links, &html, "frame", "src");
|
||||
self.extract_links_by_attr(resp_url, &mut links, &html, "embed", "src");
|
||||
self.extract_links_by_attr(resp_url, &mut links, &html, "script", "src");
|
||||
|
||||
self.update_stats(links.len())?;
|
||||
|
||||
log::trace!("exit: parse_html -> {:?} {}", links, dirlist_flag);
|
||||
Ok((links, dirlist_flag))
|
||||
}
|
||||
|
||||
/// simple helper to get html links by tag/attribute and add it to the `links` HashSet
|
||||
fn extract_links_by_attr(
|
||||
&self,
|
||||
resp_url: &Url,
|
||||
links: &mut HashSet<String>,
|
||||
html: &Html,
|
||||
html_tag: &str,
|
||||
html_attr: &str,
|
||||
) {
|
||||
log::trace!("enter: extract_links_by_attr");
|
||||
|
||||
let selector = Selector::parse(html_tag).unwrap();
|
||||
let tags = html
|
||||
.select(&selector)
|
||||
.filter(|a| a.value().attrs().any(|attr| attr.0 == html_attr));
|
||||
for t in tags {
|
||||
if let Some(link) = t.value().attr(html_attr) {
|
||||
log::debug!("Parsed link \"{}\" from {}", link, resp_url.as_str());
|
||||
|
||||
match Url::parse(link) {
|
||||
Ok(absolute) => {
|
||||
if absolute.domain() != resp_url.domain()
|
||||
|| absolute.host() != resp_url.host()
|
||||
{
|
||||
// domains/ips are not the same, don't scan things that aren't part of the original
|
||||
// target url
|
||||
continue;
|
||||
}
|
||||
|
||||
if self.add_all_sub_paths(absolute.path(), links).is_err() {
|
||||
log::warn!("could not add sub-paths from {} to {:?}", absolute, links);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
// this is the expected error that happens when we try to parse a url fragment
|
||||
// ex: Url::parse("/login") -> Err("relative URL without a base")
|
||||
// while this is technically an error, these are good results for us
|
||||
if e.to_string().contains("relative URL without a base") {
|
||||
if self.add_all_sub_paths(link, links).is_err() {
|
||||
log::warn!("could not add sub-paths from {} to {:?}", link, links);
|
||||
}
|
||||
} else {
|
||||
// unexpected error has occurred
|
||||
log::warn!("Could not parse given url: {}", e);
|
||||
self.handles.stats.send(AddError(Other)).unwrap_or_default();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log::trace!("exit: extract_links_by_attr");
|
||||
}
|
||||
|
||||
/// helper function that simply requests at <location> on the given url's base url
|
||||
///
|
||||
/// example:
|
||||
/// http://localhost/api/users -> http://localhost/robots.txt
|
||||
///
|
||||
/// The length of the given path has no effect on what's requested; it's always
|
||||
/// base url + /robots.txt
|
||||
pub(super) async fn request_robots_txt(&self) -> Result<FeroxResponse> {
|
||||
log::trace!("enter: get_robots_file");
|
||||
/// http://localhost/api/users -> http://localhost/<location>
|
||||
pub(super) async fn make_extract_request(&self, location: &str) -> Result<FeroxResponse> {
|
||||
log::trace!("enter: make_extract_request");
|
||||
|
||||
// more often than not, domain/robots.txt will redirect to www.domain/robots.txt or something
|
||||
// similar; to account for that, create a client that will follow redirects, regardless of
|
||||
@@ -405,7 +529,7 @@ impl<'a> Extractor<'a> {
|
||||
)?;
|
||||
|
||||
let mut url = Url::parse(&self.url)?;
|
||||
url.set_path("/robots.txt"); // overwrite existing path with /robots.txt
|
||||
url.set_path(location); // overwrite existing path
|
||||
|
||||
// purposefully not using logged_request here due to using the special client
|
||||
let response = make_request(
|
||||
@@ -428,7 +552,7 @@ impl<'a> Extractor<'a> {
|
||||
)
|
||||
.await;
|
||||
|
||||
log::trace!("exit: get_robots_file -> {}", ferox_response);
|
||||
log::trace!("exit: make_extract_request -> {}", ferox_response);
|
||||
Ok(ferox_response)
|
||||
}
|
||||
|
||||
|
||||
@@ -20,6 +20,9 @@ lazy_static! {
|
||||
/// Extractor for testing response bodies
|
||||
static ref BODY_EXT: Extractor<'static> = setup_extractor(ExtractionTarget::ResponseBody, Arc::new(FeroxScans::default()));
|
||||
|
||||
/// Extractor for testing paring html
|
||||
static ref PARSEHTML_EXT: Extractor<'static> = setup_extractor(ExtractionTarget::ParseHtml, Arc::new(FeroxScans::default()));
|
||||
|
||||
/// FeroxResponse for Extractor
|
||||
static ref RESPONSE: FeroxResponse = get_test_response();
|
||||
}
|
||||
@@ -42,6 +45,9 @@ fn setup_extractor(target: ExtractionTarget, scanned_urls: Arc<FeroxScans>) -> E
|
||||
ExtractionTarget::RobotsTxt => builder
|
||||
.url("http://localhost")
|
||||
.target(ExtractionTarget::RobotsTxt),
|
||||
ExtractionTarget::ParseHtml => builder
|
||||
.url("http://localhost")
|
||||
.target(ExtractionTarget::ParseHtml),
|
||||
};
|
||||
|
||||
let config = Arc::new(Configuration::new().unwrap());
|
||||
@@ -252,7 +258,7 @@ async fn extractor_get_links_with_absolute_url_that_differs_from_target_domain()
|
||||
handles: handles.clone(),
|
||||
};
|
||||
|
||||
let links = extractor.extract_from_body().await?;
|
||||
let links = (extractor.extract_from_body().await?).0;
|
||||
|
||||
assert!(links.is_empty());
|
||||
assert_eq!(mock.hits(), 1);
|
||||
@@ -280,7 +286,7 @@ async fn request_robots_txt_without_proxy() -> Result<()> {
|
||||
handles,
|
||||
};
|
||||
|
||||
let resp = extractor.request_robots_txt().await?;
|
||||
let resp = extractor.make_extract_request("/robots.txt").await?;
|
||||
|
||||
assert!(matches!(resp.status(), &StatusCode::OK));
|
||||
println!("{}", resp);
|
||||
@@ -313,7 +319,7 @@ async fn request_robots_txt_with_proxy() -> Result<()> {
|
||||
.handles(handles)
|
||||
.build()?;
|
||||
|
||||
let resp = extractor.request_robots_txt().await?;
|
||||
let resp = extractor.make_extract_request("/robots.txt").await?;
|
||||
|
||||
assert!(matches!(resp.status(), &StatusCode::OK));
|
||||
assert_eq!(resp.content_length(), 19);
|
||||
|
||||
@@ -35,10 +35,11 @@ pub fn add_bar(prefix: &str, length: u64, bar_type: BarType) -> ProgressBar {
|
||||
|
||||
style = match bar_type {
|
||||
BarType::Hidden => style.template(""),
|
||||
BarType::Default => style
|
||||
.template("[{bar:.cyan/blue}] - {elapsed:<4} {pos:>7}/{len:7} {per_sec:7} {prefix}"),
|
||||
BarType::Default => style.template(
|
||||
"[{bar:.cyan/blue}] - {elapsed:<4} {pos:>7}/{len:7} {per_sec:7} {prefix} {msg}",
|
||||
),
|
||||
BarType::Message => style.template(&format!(
|
||||
"[{{bar:.cyan/blue}}] - {{elapsed:<4}} {{pos:>7}}/{{len:7}} {:7} {{prefix}}",
|
||||
"[{{bar:.cyan/blue}}] - {{elapsed:<4}} {{pos:>7}}/{{len:7}} {:7} {{prefix}} {{msg}}",
|
||||
"-"
|
||||
)),
|
||||
BarType::Total => {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use std::{ops::Deref, sync::atomic::Ordering, sync::Arc, time::Instant};
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use console::style;
|
||||
use futures::{stream, StreamExt};
|
||||
use lazy_static::lazy_static;
|
||||
use tokio::sync::Semaphore;
|
||||
@@ -10,7 +11,7 @@ use crate::{
|
||||
Command::{AddError, AddToF64Field, SubtractFromUsizeField},
|
||||
Handles,
|
||||
},
|
||||
extractor::{ExtractionTarget::RobotsTxt, ExtractorBuilder},
|
||||
extractor::{ExtractionTarget, ExtractorBuilder},
|
||||
heuristics,
|
||||
scan_manager::{FeroxResponses, MenuCmdResult, ScanOrder, ScanStatus, PAUSE_SCAN},
|
||||
statistics::{
|
||||
@@ -43,7 +44,7 @@ pub struct FeroxScanner {
|
||||
/// wordlist that's already been read from disk
|
||||
wordlist: Arc<Vec<String>>,
|
||||
|
||||
/// limiter that restricts the number of active FeroxScanners
|
||||
/// limiter that restricts the number of active FeroxScanners
|
||||
scan_limiter: Arc<Semaphore>,
|
||||
}
|
||||
|
||||
@@ -74,22 +75,33 @@ impl FeroxScanner {
|
||||
log::info!("Starting scan against: {}", self.target_url);
|
||||
|
||||
let scan_timer = Instant::now();
|
||||
let mut dirlist_flag = false;
|
||||
|
||||
if matches!(self.order, ScanOrder::Initial) && self.handles.config.extract_links {
|
||||
// only grab robots.txt on the initial scan_url calls. all fresh dirs will be passed
|
||||
// to try_recursion
|
||||
if self.handles.config.extract_links {
|
||||
// parse html for links (i.e. web scraping)
|
||||
let extractor = ExtractorBuilder::default()
|
||||
.target(ExtractionTarget::ParseHtml)
|
||||
.url(&self.target_url)
|
||||
.handles(self.handles.clone())
|
||||
.target(RobotsTxt)
|
||||
.build()?;
|
||||
|
||||
let links = extractor.extract().await?;
|
||||
let extract_out = extractor.extract().await?;
|
||||
let links = extract_out.0;
|
||||
dirlist_flag = extract_out.1;
|
||||
extractor.request_links(links).await?;
|
||||
|
||||
if matches!(self.order, ScanOrder::Initial) {
|
||||
// check for robots.txt (cannot be in subdirs)
|
||||
let extractor = ExtractorBuilder::default()
|
||||
.target(ExtractionTarget::RobotsTxt)
|
||||
.url(&self.target_url)
|
||||
.handles(self.handles.clone())
|
||||
.build()?;
|
||||
let links = (extractor.extract().await?).0;
|
||||
extractor.request_links(links).await?;
|
||||
}
|
||||
}
|
||||
|
||||
let scanned_urls = self.handles.ferox_scans()?;
|
||||
|
||||
let ferox_scan = match scanned_urls.get_scan_by_url(&self.target_url) {
|
||||
Some(scan) => {
|
||||
scan.set_status(ScanStatus::Running)?;
|
||||
@@ -106,6 +118,28 @@ impl FeroxScanner {
|
||||
|
||||
let progress_bar = ferox_scan.progress_bar();
|
||||
|
||||
// Directory listing heuristic detection to not continue scanning
|
||||
if dirlist_flag {
|
||||
log::trace!("exit: scan_url -> Directory listing heuristic");
|
||||
|
||||
self.handles.stats.send(AddToF64Field(
|
||||
DirScanTimes,
|
||||
scan_timer.elapsed().as_secs_f64(),
|
||||
))?;
|
||||
|
||||
self.handles.stats.send(SubtractFromUsizeField(
|
||||
TotalExpected,
|
||||
progress_bar.length() as usize,
|
||||
))?;
|
||||
|
||||
progress_bar.reset_eta();
|
||||
progress_bar.finish_with_message(&format!("=> {}", style("Directory listing").green()));
|
||||
|
||||
ferox_scan.finish()?;
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// When acquire is called and the semaphore has remaining permits, the function immediately
|
||||
// returns a permit. However, if no remaining permits are available, acquire (asynchronously)
|
||||
// waits until an outstanding permit is dropped, at which point, the freed permit is assigned
|
||||
|
||||
@@ -17,7 +17,7 @@ use crate::{
|
||||
Command::{self, AddError, SubtractFromUsizeField},
|
||||
Handles,
|
||||
},
|
||||
extractor::{ExtractionTarget::ResponseBody, ExtractorBuilder},
|
||||
extractor::{ExtractionTarget, ExtractorBuilder},
|
||||
response::FeroxResponse,
|
||||
scan_manager::{FeroxScan, ScanStatus},
|
||||
statistics::{StatError::Other, StatField::TotalExpected},
|
||||
@@ -395,13 +395,12 @@ impl Requester {
|
||||
|
||||
if self.handles.config.extract_links && !ferox_response.status().is_redirection() {
|
||||
let extractor = ExtractorBuilder::default()
|
||||
.target(ResponseBody)
|
||||
.target(ExtractionTarget::ResponseBody)
|
||||
.response(&ferox_response)
|
||||
.handles(self.handles.clone())
|
||||
.build()?;
|
||||
|
||||
let new_links: HashSet<_>;
|
||||
let extracted = extractor.extract().await?;
|
||||
let extracted = (extractor.extract().await?).0;
|
||||
|
||||
{
|
||||
// gain and quickly drop the read lock on seen_links, using it while unlocked
|
||||
|
||||
@@ -288,7 +288,7 @@ fn extractor_finds_robots_txt_links_and_displays_files_or_scans_directories() {
|
||||
);
|
||||
|
||||
assert_eq!(mock.hits(), 1);
|
||||
assert_eq!(mock_dir.hits(), 1);
|
||||
assert_eq!(mock_dir.hits(), 2);
|
||||
assert_eq!(mock_two.hits(), 1);
|
||||
assert_eq!(mock_file.hits(), 1);
|
||||
assert_eq!(mock_disallowed.hits(), 1);
|
||||
@@ -370,6 +370,226 @@ fn extractor_finds_robots_txt_links_and_displays_files_non_recursive() {
|
||||
teardown_tmp_directory(tmp_dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// serve a directory listing with a file and and a folder contained within it. ferox should
|
||||
/// find both links and request each one.
|
||||
fn extractor_finds_directory_listing_links_and_displays_files() {
|
||||
let srv = MockServer::start();
|
||||
let (tmp_dir, file) = setup_tmp_directory(&["invalid".to_string()], "wordlist").unwrap();
|
||||
|
||||
let mock_root = srv.mock(|when, then| {
|
||||
when.method(GET).path("/");
|
||||
then.status(200).body(
|
||||
r#"
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||
<title>Directory listing for /</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Directory listing for /</h1>
|
||||
<hr>
|
||||
<ul>
|
||||
<li><a href="disallowed-subdir/">disallowed-subdir/</a></li>
|
||||
<li><a href="LICENSE">LICENSE</a></li>
|
||||
<li><a href="misc/">misc/</a></li>
|
||||
</ul>
|
||||
<hr>
|
||||
</body>
|
||||
</html>
|
||||
"#,
|
||||
);
|
||||
});
|
||||
|
||||
let mock_root_file = srv.mock(|when, then| {
|
||||
when.method(GET).path("/LICENSE");
|
||||
then.status(200).body("im a little teapot"); // 18
|
||||
});
|
||||
|
||||
let mock_dir_disallowed = srv.mock(|when, then| {
|
||||
when.method(GET).path("/disallowed-subdir");
|
||||
then.status(404);
|
||||
});
|
||||
|
||||
let mock_dir_redir = srv.mock(|when, then| {
|
||||
when.method(GET).path("/misc");
|
||||
then.status(301).header("Location", &srv.url("/misc/"));
|
||||
});
|
||||
let mock_dir = srv.mock(|when, then| {
|
||||
when.method(GET).path("/misc/");
|
||||
then.status(200).body(
|
||||
r#"
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||
<title>Directory listing for /misc</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Directory listing for /misc</h1>
|
||||
<hr>
|
||||
<ul>
|
||||
<li><a href="LICENSE">LICENSE</a></li>
|
||||
<li><a href="stupidfile.php">stupidfile.php</a></li>
|
||||
</ul>
|
||||
<hr>
|
||||
</body>
|
||||
</html>
|
||||
"#,
|
||||
);
|
||||
});
|
||||
|
||||
let mock_dir_file = srv.mock(|when, then| {
|
||||
when.method(GET).path("/misc/LICENSE");
|
||||
then.status(200).body("i too, am a container for tea"); // 29
|
||||
});
|
||||
|
||||
let mock_dir_file_ext = srv.mock(|when, then| {
|
||||
when.method(GET).path("/misc/stupidfile.php");
|
||||
then.status(200).body("im a little teapot too"); // 22
|
||||
});
|
||||
|
||||
let cmd = Command::cargo_bin("feroxbuster")
|
||||
.unwrap()
|
||||
.arg("--url")
|
||||
.arg(srv.url("/"))
|
||||
.arg("--wordlist")
|
||||
.arg(file.as_os_str())
|
||||
.arg("--extract-links")
|
||||
.arg("--redirects")
|
||||
.unwrap();
|
||||
|
||||
cmd.assert().success().stdout(
|
||||
predicate::str::contains("/LICENSE") // 2 directories contain LICENSE
|
||||
.count(2)
|
||||
.and(predicate::str::contains("18c"))
|
||||
.and(predicate::str::contains("/misc/stupidfile.php"))
|
||||
.and(predicate::str::contains("22c"))
|
||||
.and(predicate::str::contains("/misc/LICENSE"))
|
||||
.and(predicate::str::contains("29c"))
|
||||
.and(predicate::str::contains("200").count(3)),
|
||||
);
|
||||
|
||||
assert_eq!(mock_root.hits(), 2);
|
||||
assert_eq!(mock_root_file.hits(), 1);
|
||||
assert_eq!(mock_dir_disallowed.hits(), 1);
|
||||
assert_eq!(mock_dir_redir.hits(), 1);
|
||||
assert_eq!(mock_dir.hits(), 2);
|
||||
assert_eq!(mock_dir_file.hits(), 1);
|
||||
assert_eq!(mock_dir_file_ext.hits(), 1);
|
||||
teardown_tmp_directory(tmp_dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// serve a directory listing with a file and and a folder contained within it. ferox should
|
||||
/// find both links and request each one. This is the non-recursive version of the test above
|
||||
fn extractor_finds_directory_listing_links_and_displays_files_non_recursive() {
|
||||
let srv = MockServer::start();
|
||||
let (tmp_dir, file) = setup_tmp_directory(&["invalid".to_string()], "wordlist").unwrap();
|
||||
|
||||
let mock_root = srv.mock(|when, then| {
|
||||
when.method(GET).path("/");
|
||||
then.status(200).body(
|
||||
r#"
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||
<title>Directory listing for /</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Directory listing for /</h1>
|
||||
<hr>
|
||||
<ul>
|
||||
<li><a href="disallowed-subdir/">disallowed-subdir/</a></li>
|
||||
<li><a href="LICENSE">LICENSE</a></li>
|
||||
<li><a href="misc/">misc/</a></li>
|
||||
</ul>
|
||||
<hr>
|
||||
</body>
|
||||
</html>
|
||||
"#,
|
||||
);
|
||||
});
|
||||
|
||||
let mock_root_file = srv.mock(|when, then| {
|
||||
when.method(GET).path("/LICENSE");
|
||||
then.status(200).body("im a little teapot"); // 18
|
||||
});
|
||||
|
||||
let mock_dir_disallowed = srv.mock(|when, then| {
|
||||
when.method(GET).path("/disallowed-subdir");
|
||||
then.status(404);
|
||||
});
|
||||
|
||||
let mock_dir_redir = srv.mock(|when, then| {
|
||||
when.method(GET).path("/misc");
|
||||
then.status(301).header("Location", &srv.url("/misc/"));
|
||||
});
|
||||
let mock_dir = srv.mock(|when, then| {
|
||||
when.method(GET).path("/misc/");
|
||||
then.status(200).body(
|
||||
r#"
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||
<title>Directory listing for /misc</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Directory listing for /misc</h1>
|
||||
<hr>
|
||||
<ul>
|
||||
<li><a href="LICENSE">LICENSE</a></li>
|
||||
<li><a href="stupidfile.php">stupidfile.php</a></li>
|
||||
</ul>
|
||||
<hr>
|
||||
</body>
|
||||
</html>
|
||||
"#,
|
||||
);
|
||||
});
|
||||
|
||||
let mock_dir_file = srv.mock(|when, then| {
|
||||
when.method(GET).path("/misc/LICENSE");
|
||||
then.status(200).body("i too, am a container for tea"); // 29
|
||||
});
|
||||
|
||||
let mock_dir_file_ext = srv.mock(|when, then| {
|
||||
when.method(GET).path("/misc/stupidfile.php");
|
||||
then.status(200).body("im a little teapot too"); // 22
|
||||
});
|
||||
|
||||
let cmd = Command::cargo_bin("feroxbuster")
|
||||
.unwrap()
|
||||
.arg("--url")
|
||||
.arg(srv.url("/"))
|
||||
.arg("--wordlist")
|
||||
.arg(file.as_os_str())
|
||||
.arg("--extract-links")
|
||||
.arg("--redirects")
|
||||
.arg("--no-recursion")
|
||||
.unwrap();
|
||||
|
||||
cmd.assert().success().stdout(
|
||||
predicate::str::contains("/LICENSE")
|
||||
.and(predicate::str::contains("18c"))
|
||||
.and(predicate::str::contains("/misc/stupidfile.php"))
|
||||
.not()
|
||||
.and(predicate::str::contains("22c"))
|
||||
.not()
|
||||
.and(predicate::str::contains("/misc/LICENSE").not())
|
||||
.and(predicate::str::contains("29c").not())
|
||||
.and(predicate::str::contains("200").count(1)),
|
||||
);
|
||||
|
||||
assert_eq!(mock_root.hits(), 2);
|
||||
assert_eq!(mock_root_file.hits(), 1);
|
||||
assert_eq!(mock_dir_disallowed.hits(), 1);
|
||||
assert_eq!(mock_dir_redir.hits(), 1);
|
||||
assert_eq!(mock_dir.hits(), 1);
|
||||
assert_eq!(mock_dir_file.hits(), 0);
|
||||
assert_eq!(mock_dir_file_ext.hits(), 0);
|
||||
teardown_tmp_directory(tmp_dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// send a request to a page that contains a link that contains a directory that returns a 403
|
||||
/// --extract-links should find the link and make recurse into the 403 directory, finding LICENSE
|
||||
@@ -416,7 +636,7 @@ fn extractor_recurses_into_403_directories() -> Result<(), Box<dyn std::error::E
|
||||
|
||||
assert_eq!(mock.hits(), 1);
|
||||
assert_eq!(mock_two.hits(), 1);
|
||||
assert_eq!(forbidden_dir.hits(), 1);
|
||||
assert_eq!(forbidden_dir.hits(), 2);
|
||||
teardown_tmp_directory(tmp_dir);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user