fixed test

Merge pull request #511 from epi052/all-contributors/add-0dayCTF
docs: add 0dayCTF as a contributor for ideas
2026-05-23 04:51:13 -03:00 · 2022-03-05 07:09:23 -06:00 · 2022-03-05 07:02:49 -06:00 · 2022-03-05 13:02:17 +00:00 · 2022-03-05 13:02:16 +00:00 · 2022-03-05 06:57:33 -06:00
52 changed files with 3782 additions and 770 deletions
--- a/.all-contributorsrc
+++ b/.all-contributorsrc
@@ -357,6 +357,15 @@
        "ideas",
        "code"
      ]
+    },
+    {
+      "login": "0dayCTF",
+      "name": "Ryan Montgomery",
+      "avatar_url": "https://avatars.githubusercontent.com/u/44453666?v=4",
+      "profile": "http://ryanmontgomery.me",
+      "contributions": [
+        "ideas"
+      ]
    }
  ],
  "contributorsPerLine": 7,
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -13,9 +13,9 @@ dependencies = [

 [[package]]
 name = "anyhow"
-version = "1.0.52"
+version = "1.0.55"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "84450d0b4a8bd1ba4144ce8ce718fbc5d071358b1e5384bace6536b3d1f2d5b3"
+checksum = "159bb86af3a200e19a068f4224eae4c8bb2d0fa054c7e5d1cacd5cef95e684cd"

 [[package]]
 name = "ascii-canvas"
@@ -77,9 +77,9 @@ dependencies = [

 [[package]]
 name = "async-global-executor"
-version = "2.0.2"
+version = "2.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9586ec52317f36de58453159d48351bc244bc24ced3effc1fce22f3d48664af6"
+checksum = "c026b7e44f1316b567ee750fea85103f87fcb80792b860e979f221259796ca0a"
 dependencies = [
 "async-channel",
 "async-executor",
@@ -112,9 +112,9 @@ dependencies = [

 [[package]]
 name = "async-lock"
-version = "2.4.0"
+version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6a8ea61bf9947a1007c5cada31e647dbc77b103c679858150003ba697ea798b"
+checksum = "e97a171d191782fba31bb902b14ad94e24a68145032b7eedf871ab0bc0d077b6"
 dependencies = [
 "event-listener",
 ]
@@ -184,9 +184,9 @@ dependencies = [

 [[package]]
 name = "async-task"
-version = "4.0.3"
+version = "4.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e91831deabf0d6d7ec49552e489aed63b7456a7a3c46cff62adad428110b0af0"
+checksum = "677d306121baf53310a3fd342d88dc0824f6bbeace68347593658525565abee8"

 [[package]]
 name = "async-trait"
@@ -218,9 +218,9 @@ dependencies = [

 [[package]]
 name = "autocfg"
-version = "1.0.1"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
+checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"

 [[package]]
 name = "base64"
@@ -317,9 +317,9 @@ checksum = "a2698f953def977c68f935bb0dfa959375ad4638570e969e2f1e9f433cbf1af6"

 [[package]]
 name = "cc"
-version = "1.0.72"
+version = "1.0.73"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22a9137b95ea06864e018375b72adfb7db6e6f68cfc8df5a04d00288050485ee"
+checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"

 [[package]]
 name = "cfg-if"
@@ -329,9 +329,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"

 [[package]]
 name = "clap"
-version = "3.0.7"
+version = "3.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "12e8611f9ae4e068fa3e56931fded356ff745e70987ff76924a6e0ab1c8ef2e3"
+checksum = "ced1892c55c910c1219e98d6fc8d71f6bddba7905866ce740066d8bfea859312"
 dependencies = [
 "atty",
 "bitflags",
@@ -346,9 +346,9 @@ dependencies = [

 [[package]]
 name = "clap_complete"
-version = "3.0.4"
+version = "3.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d044e9db8cd0f68191becdeb5246b7462e4cf0c069b19ae00d1bf3fa9889498d"
+checksum = "df6f3613c0a3cddfd78b41b10203eb322cb29b600cbdf808a7d3db95691b8e25"
 dependencies = [
 "clap",
 ]
@@ -385,9 +385,9 @@ checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e"

 [[package]]
 name = "core-foundation"
-version = "0.9.2"
+version = "0.9.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6888e10551bb93e424d8df1d07f1a8b4fceb0001a3a4b048bfc47554946f47b3"
+checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146"
 dependencies = [
 "core-foundation-sys",
 "libc",
@@ -401,9 +401,9 @@ checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc"

 [[package]]
 name = "crossbeam-utils"
-version = "0.8.6"
+version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cfcae03edb34f947e64acdb1c33ec169824e20657e9ecb61cef6c8c74dcb8120"
+checksum = "b5e5bed1f1c269533fa816a0a5492b3545209a205ca1a54842be180eb63a16a6"
 dependencies = [
 "cfg-if",
 "lazy_static",
@@ -411,15 +411,15 @@ dependencies = [

 [[package]]
 name = "crossterm"
-version = "0.20.0"
+version = "0.23.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c0ebde6a9dd5e331cd6c6f48253254d117642c31653baa475e394657c59c1f7d"
+checksum = "77b75a27dc8d220f1f8521ea69cd55a34d720a200ebb3a624d9aa19193d3b432"
 dependencies = [
 "bitflags",
 "crossterm_winapi",
 "libc",
- "mio",
- "parking_lot",
+ "mio 0.7.14",
+ "parking_lot 0.12.0",
 "signal-hook",
 "signal-hook-mio",
 "winapi",
@@ -427,9 +427,9 @@ dependencies = [

 [[package]]
 name = "crossterm_winapi"
-version = "0.8.0"
+version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a6966607622438301997d3dac0d2f6e9a90c68bb6bc1785ea98456ab93c0507"
+checksum = "2ae1b35a484aa10e07fe0638d02301c5ad24de82d310ccbd2f3693da5f09bf1c"
 dependencies = [
 "winapi",
 ]
@@ -656,22 +656,22 @@ dependencies = [

 [[package]]
 name = "event-listener"
-version = "2.5.1"
+version = "2.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f7531096570974c3a9dcf9e4b8e1cede1ec26cf5046219fb3b9d897503b9be59"
+checksum = "77f3309417938f28bf8228fcff79a4a37103981e3e186d2ccd19c74b38f4eb71"

 [[package]]
 name = "fastrand"
-version = "1.6.0"
+version = "1.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "779d043b6a0b90cc4c0ed7ee380a6504394cee7efd7db050e3774eee387324b2"
+checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf"
 dependencies = [
 "instant",
 ]

 [[package]]
 name = "feroxbuster"
-version = "2.5.0"
+version = "2.6.0"
 dependencies = [
 "anyhow",
 "assert_cmd",
@@ -700,7 +700,7 @@ dependencies = [
 "serde_regex",
 "tempfile",
 "tokio",
- "tokio-util",
+ "tokio-util 0.7.0",
 "toml",
 "url",
 "uuid",
@@ -754,9 +754,9 @@ dependencies = [

 [[package]]
 name = "futf"
-version = "0.1.4"
+version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c9c1ce3fa9336301af935ab852c437817d14cd33690446569392e65170aac3b"
+checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
 dependencies = [
 "mac",
 "new_debug_unreachable",
@@ -764,9 +764,9 @@ dependencies = [

 [[package]]
 name = "futures"
-version = "0.3.19"
+version = "0.3.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "28560757fe2bb34e79f907794bb6b22ae8b0e5c669b638a1132f2592b19035b4"
+checksum = "f73fe65f54d1e12b726f517d3e2135ca3125a437b6d998caf1962961f7172d9e"
 dependencies = [
 "futures-channel",
 "futures-core",
@@ -779,9 +779,9 @@ dependencies = [

 [[package]]
 name = "futures-channel"
-version = "0.3.19"
+version = "0.3.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ba3dda0b6588335f360afc675d0564c17a77a2bda81ca178a4b6081bd86c7f0b"
+checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010"
 dependencies = [
 "futures-core",
 "futures-sink",
@@ -789,15 +789,15 @@ dependencies = [

 [[package]]
 name = "futures-core"
-version = "0.3.19"
+version = "0.3.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0c8ff0461b82559810cdccfde3215c3f373807f5e5232b71479bff7bb2583d7"
+checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3"

 [[package]]
 name = "futures-executor"
-version = "0.3.19"
+version = "0.3.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "29d6d2ff5bb10fb95c85b8ce46538a2e5f5e7fdc755623a7d4529ab8a4ed9d2a"
+checksum = "9420b90cfa29e327d0429f19be13e7ddb68fa1cccb09d65e5706b8c7a749b8a6"
 dependencies = [
 "futures-core",
 "futures-task",
@@ -806,9 +806,9 @@ dependencies = [

 [[package]]
 name = "futures-io"
-version = "0.3.19"
+version = "0.3.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b1f9d34af5a1aac6fb380f735fe510746c38067c5bf16c7fd250280503c971b2"
+checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b"

 [[package]]
 name = "futures-lite"
@@ -827,9 +827,9 @@ dependencies = [

 [[package]]
 name = "futures-macro"
-version = "0.3.19"
+version = "0.3.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6dbd947adfffb0efc70599b3ddcf7b5597bb5fa9e245eb99f62b3a5f7bb8bd3c"
+checksum = "33c1e13800337f4d4d7a316bf45a567dbcb6ffe087f16424852d97e97a91f512"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -838,21 +838,21 @@ dependencies = [

 [[package]]
 name = "futures-sink"
-version = "0.3.19"
+version = "0.3.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3055baccb68d74ff6480350f8d6eb8fcfa3aa11bdc1a1ae3afdd0514617d508"
+checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868"

 [[package]]
 name = "futures-task"
-version = "0.3.19"
+version = "0.3.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ee7c6485c30167ce4dfb83ac568a849fe53274c831081476ee13e0dce1aad72"
+checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a"

 [[package]]
 name = "futures-util"
-version = "0.3.19"
+version = "0.3.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d9b5cf40b47a271f77a8b1bec03ca09044d99d2372c0de244e66430761127164"
+checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a"
 dependencies = [
 "futures-channel",
 "futures-core",
@@ -903,9 +903,9 @@ dependencies = [

 [[package]]
 name = "getrandom"
-version = "0.2.4"
+version = "0.2.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "418d37c8b1d42553c93648be529cb70f920d3baf8ef469b74b9638df426e0b4c"
+checksum = "d39cd93900197114fa1fcb7ae84ca742095eed9442088988ae74fa744e930e77"
 dependencies = [
 "cfg-if",
 "libc",
@@ -914,22 +914,21 @@ dependencies = [

 [[package]]
 name = "gloo-timers"
-version = "0.2.2"
+version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f16c88aa13d2656ef20d1c042086b8767bbe2bdb62526894275a1b062161b2e"
+checksum = "4d12a7f4e95cfe710f1d624fb1210b7d961a5fb05c4fd942f4feab06e61f590e"
 dependencies = [
 "futures-channel",
 "futures-core",
 "js-sys",
 "wasm-bindgen",
- "web-sys",
 ]

 [[package]]
 name = "h2"
-version = "0.3.10"
+version = "0.3.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0c9de88456263e249e241fcd211d3954e2c9b0ef7ccfc235a444eb367cae3689"
+checksum = "d9f1f717ddc7b2ba36df7e871fd88db79326551d3d6f1fc406fbfd28b582ff8e"
 dependencies = [
 "bytes",
 "fnv",
@@ -940,7 +939,7 @@ dependencies = [
 "indexmap",
 "slab",
 "tokio",
- "tokio-util",
+ "tokio-util 0.6.9",
 "tracing",
 ]

@@ -997,9 +996,9 @@ dependencies = [

 [[package]]
 name = "httparse"
-version = "1.5.1"
+version = "1.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "acd94fdbe1d4ff688b67b04eee2e17bd50995534a61539e45adfefb45e5e5503"
+checksum = "9100414882e15fb7feccb4897e5f0ff0ff1ca7d1a86a23208ada4d7a18e6c6c4"

 [[package]]
 name = "httpdate"
@@ -1043,9 +1042,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"

 [[package]]
 name = "hyper"
-version = "0.14.16"
+version = "0.14.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b7ec3e62bdc98a2f0393a5048e4c30ef659440ea6e0e572965103e72bd836f55"
+checksum = "043f0e083e9901b6cc658a77d1eb86f4fc650bbb977a4337dd63192826aa85dd"
 dependencies = [
 "bytes",
 "futures-channel",
@@ -1056,7 +1055,7 @@ dependencies = [
 "http-body",
 "httparse",
 "httpdate",
- "itoa 0.4.8",
+ "itoa 1.0.1",
 "pin-project-lite",
 "socket2",
 "tokio",
@@ -1176,9 +1175,9 @@ checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35"

 [[package]]
 name = "js-sys"
-version = "0.3.55"
+version = "0.3.56"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7cc9ffccd38c451a86bf13657df244e9c3f37493cce8e5e21e940963777acc84"
+checksum = "a38fc24e30fd564ce974c02bf1d337caddff65be6cc4735a1f7eab22a7440f04"
 dependencies = [
 "wasm-bindgen",
 ]
@@ -1194,9 +1193,9 @@ dependencies = [

 [[package]]
 name = "lalrpop"
-version = "0.19.6"
+version = "0.19.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b15174f1c529af5bf1283c3bc0058266b483a67156f79589fab2a25e23cf8988"
+checksum = "852b75a095da6b69da8c5557731c3afd06525d4f655a4fc1c799e2ec8bc4dce4"
 dependencies = [
 "ascii-canvas",
 "atty",
@@ -1217,9 +1216,9 @@ dependencies = [

 [[package]]
 name = "lalrpop-util"
-version = "0.19.6"
+version = "0.19.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d3e58cce361efcc90ba8a0a5f982c741ff86b603495bb15a998412e957dcd278"
+checksum = "d6d265705249fe209280676d8f68887859fa42e1d34f342fc05bd47726a5e188"
 dependencies = [
 "regex",
 ]
@@ -1252,9 +1251,9 @@ checksum = "db13adb97ab515a3691f56e4dbab09283d0b86cb45abd991d8634a9d6f501760"

 [[package]]
 name = "libc"
-version = "0.2.112"
+version = "0.2.119"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1b03d17f364a3a042d5e5d46b053bbbf82c92c9430c592dd4c064dc6ee997125"
+checksum = "1bf2e165bb3457c8e098ea76f3e3bc9db55f87aa90d52d0e6be741470916aaa4"

 [[package]]
 name = "libnghttp2-sys"
@@ -1280,9 +1279,9 @@ dependencies = [

 [[package]]
 name = "lock_api"
-version = "0.4.5"
+version = "0.4.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "712a4d093c9976e24e7dbca41db895dabcbac38eb5f4045393d17a95bdfb1109"
+checksum = "88943dd7ef4a2e5a4bfa2753aaab3013e34ce2533d1996fb18ef591e315e2b3b"
 dependencies = [
 "scopeguard",
 ]
@@ -1357,6 +1356,19 @@ dependencies = [
 "winapi",
 ]

+[[package]]
+name = "mio"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba272f85fa0b41fc91872be579b3bbe0f56b792aa361a380eb669469f68dafb2"
+dependencies = [
+ "libc",
+ "log",
+ "miow",
+ "ntapi",
+ "winapi",
+]
+
 [[package]]
 name = "miow"
 version = "0.3.7"
@@ -1417,9 +1429,9 @@ checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be"

 [[package]]
 name = "ntapi"
-version = "0.3.6"
+version = "0.3.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f6bb902e437b6d86e03cce10a7e2af662292c5dfef23b65899ea3ac9354ad44"
+checksum = "c28774a7fd2fbb4f0babd8237ce554b73af68021b5f695a3cebd6c59bac0980f"
 dependencies = [
 "winapi",
 ]
@@ -1451,9 +1463,9 @@ checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a"

 [[package]]
 name = "once_cell"
-version = "1.9.0"
+version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5"
+checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9"

 [[package]]
 name = "openssl"
@@ -1521,7 +1533,17 @@ checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99"
 dependencies = [
 "instant",
 "lock_api",
- "parking_lot_core",
+ "parking_lot_core 0.8.5",
+]
+
+[[package]]
+name = "parking_lot"
+version = "0.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87f5ec2493a61ac0506c0f4199f99070cbe83857b0337006a30f3e6719b8ef58"
+dependencies = [
+ "lock_api",
+ "parking_lot_core 0.9.1",
 ]

 [[package]]
@@ -1538,6 +1560,19 @@ dependencies = [
 "winapi",
 ]

+[[package]]
+name = "parking_lot_core"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28141e0cc4143da2443301914478dc976a61ffdb3f043058310c70df2fed8954"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-sys",
+]
+
 [[package]]
 name = "percent-encoding"
 version = "2.1.0"
@@ -1561,7 +1596,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12"
 dependencies = [
 "phf_macros",
- "phf_shared",
+ "phf_shared 0.8.0",
 "proc-macro-hack",
 ]

@@ -1572,7 +1607,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815"
 dependencies = [
 "phf_generator",
- "phf_shared",
+ "phf_shared 0.8.0",
 ]

 [[package]]
@@ -1581,7 +1616,7 @@ version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526"
 dependencies = [
- "phf_shared",
+ "phf_shared 0.8.0",
 "rand",
 ]

@@ -1592,7 +1627,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c"
 dependencies = [
 "phf_generator",
- "phf_shared",
+ "phf_shared 0.8.0",
 "proc-macro-hack",
 "proc-macro2",
 "quote",
@@ -1608,6 +1643,15 @@ dependencies = [
 "siphasher",
 ]

+[[package]]
+name = "phf_shared"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
+dependencies = [
+ "siphasher",
+]
+
 [[package]]
 name = "pico-args"
 version = "0.4.2"
@@ -1724,9 +1768,9 @@ dependencies = [

 [[package]]
 name = "quote"
-version = "1.0.14"
+version = "1.0.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "47aa80447ce4daf1717500037052af176af5d38cc3e571d9ec1c7353fc10c87d"
+checksum = "864d3e96a899863136fc6e99f3d7cae289dafe43bf2c5ac19b70df7210c0a145"
 dependencies = [
 "proc-macro2",
 ]
@@ -1784,9 +1828,9 @@ dependencies = [

 [[package]]
 name = "redox_syscall"
-version = "0.2.10"
+version = "0.2.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff"
+checksum = "8380fe0152551244f0747b1bf41737e0f8a74f97a14ccefd1148187271634f3c"
 dependencies = [
 "bitflags",
 ]
@@ -1797,7 +1841,7 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64"
 dependencies = [
- "getrandom 0.2.4",
+ "getrandom 0.2.5",
 "redox_syscall",
 ]

@@ -1872,9 +1916,9 @@ dependencies = [

 [[package]]
 name = "rlimit"
-version = "0.6.2"
+version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc0bf25554376fd362f54332b8410a625c71f15445bca32ffdfdf4ec9ac91726"
+checksum = "347703a5ae47adf1e693144157be231dde38c72bd485925cae7407ad3e52480b"
 dependencies = [
 "libc",
 ]
@@ -1934,9 +1978,9 @@ dependencies = [

 [[package]]
 name = "security-framework"
-version = "2.5.0"
+version = "2.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d09d3c15d814eda1d6a836f2f2b56a6abc1446c8a34351cb3180d3db92ffe4ce"
+checksum = "2dc14f172faf8a0194a3aded622712b0de276821addc574fa54fc0a1167e10dc"
 dependencies = [
 "bitflags",
 "core-foundation",
@@ -1947,9 +1991,9 @@ dependencies = [

 [[package]]
 name = "security-framework-sys"
-version = "2.5.0"
+version = "2.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e90dd10c41c6bfc633da6e0c659bd25d31e0791e5974ac42970267d59eba87f7"
+checksum = "0160a13a177a45bfb43ce71c01580998474f556ad854dcbca936dd2841a5c556"
 dependencies = [
 "core-foundation-sys",
 "libc",
@@ -1977,24 +2021,24 @@ dependencies = [

 [[package]]
 name = "semver"
-version = "1.0.4"
+version = "1.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "568a8e6258aa33c13358f81fd834adb854c6f7c9468520910a9b1e8fac068012"
+checksum = "a4a3381e03edd24287172047536f20cabde766e2cd3e65e6b00fb3af51c4f38d"

 [[package]]
 name = "serde"
-version = "1.0.133"
+version = "1.0.136"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "97565067517b60e2d1ea8b268e59ce036de907ac523ad83a0475da04e818989a"
+checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789"
 dependencies = [
 "serde_derive",
 ]

 [[package]]
 name = "serde_derive"
-version = "1.0.133"
+version = "1.0.136"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ed201699328568d8d08208fdd080e3ff594e6c422e438b6705905da01005d537"
+checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -2003,9 +2047,9 @@ dependencies = [

 [[package]]
 name = "serde_json"
-version = "1.0.75"
+version = "1.0.79"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c059c05b48c5c0067d4b4b2b4f0732dd65feb52daf7e0ea09cd87e7dadc1af79"
+checksum = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95"
 dependencies = [
 "itoa 1.0.1",
 "ryu",
@@ -2061,7 +2105,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "29fd5867f1c4f2c5be079aee7a2adf1152ebb04a4bc4d341f504b7dece607ed4"
 dependencies = [
 "libc",
- "mio",
+ "mio 0.7.14",
 "signal-hook",
 ]

@@ -2082,9 +2126,9 @@ checksum = "2e24979f63a11545f5f2c60141afe249d4f19f84581ea2138065e400941d83d3"

 [[package]]
 name = "siphasher"
-version = "0.3.8"
+version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ba1eead9e94aa5a2e02de9e7839f96a007f686ae7a1d57c7797774810d24908a"
+checksum = "a86232ab60fa71287d7f2ddae4a7073f6b7aac33631c3015abb556f08c6d0a3e"

 [[package]]
 name = "slab"
@@ -2111,9 +2155,9 @@ checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83"

 [[package]]
 name = "socket2"
-version = "0.4.2"
+version = "0.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5dc90fe6c7be1a323296982db1836d1ea9e47b6839496dde9a541bc496df3516"
+checksum = "66d72b759436ae32898a2af0a14218dbf55efde3feeb170eb623637db85ee1e0"
 dependencies = [
 "libc",
 "winapi",
@@ -2127,14 +2171,14 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"

 [[package]]
 name = "string_cache"
-version = "0.8.2"
+version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "923f0f39b6267d37d23ce71ae7235602134b250ace715dd2c90421998ddac0c6"
+checksum = "33994d0838dc2d152d17a62adf608a869b5e846b65b389af7f3dbc1de45c5b26"
 dependencies = [
 "lazy_static",
 "new_debug_unreachable",
- "parking_lot",
- "phf_shared",
+ "parking_lot 0.11.2",
+ "phf_shared 0.10.0",
 "precomputed-hash",
 "serde",
 ]
@@ -2146,7 +2190,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f24c8e5e19d22a726626f1a5e16fe15b132dcf21d10177fa5a45ce7962996b97"
 dependencies = [
 "phf_generator",
- "phf_shared",
+ "phf_shared 0.8.0",
 "proc-macro2",
 "quote",
 ]
@@ -2159,9 +2203,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"

 [[package]]
 name = "syn"
-version = "1.0.85"
+version = "1.0.86"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a684ac3dcd8913827e18cd09a68384ee66c1de24157e3c556c9ab16d85695fb7"
+checksum = "8a65b3f4ffa0092e9887669db0eae07941f023991ab58ea44da8fe8e2d511c6b"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -2206,9 +2250,9 @@ dependencies = [

 [[package]]
 name = "termcolor"
-version = "1.1.2"
+version = "1.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4"
+checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755"
 dependencies = [
 "winapi-util",
 ]
@@ -2231,9 +2275,9 @@ checksum = "507e9898683b6c43a9aa55b64259b721b52ba226e0f3779137e50ad114a4c90b"

 [[package]]
 name = "textwrap"
-version = "0.14.2"
+version = "0.15.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0066c8d12af8b5acd21e00547c3797fde4e8677254a7ee429176ccebbe93dd80"
+checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb"
 dependencies = [
 "terminal_size",
 ]
@@ -2290,19 +2334,20 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"

 [[package]]
 name = "tokio"
-version = "1.15.0"
+version = "1.17.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fbbf1c778ec206785635ce8ad57fe52b3009ae9e0c9f574a728f3049d3e55838"
+checksum = "2af73ac49756f3f7c01172e34a23e5d0216f6c32333757c2c61feb2bbff5a5ee"
 dependencies = [
 "bytes",
 "libc",
 "memchr",
- "mio",
+ "mio 0.8.0",
 "num_cpus",
 "once_cell",
- "parking_lot",
+ "parking_lot 0.12.0",
 "pin-project-lite",
 "signal-hook-registry",
+ "socket2",
 "tokio-macros",
 "winapi",
 ]
@@ -2365,6 +2410,20 @@ dependencies = [
 "tokio",
 ]

+[[package]]
+name = "tokio-util"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "64910e1b9c1901aaf5375561e35b9c057d95ff41a44ede043a03e09279eabaf1"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "futures-sink",
+ "log",
+ "pin-project-lite",
+ "tokio",
+]
+
 [[package]]
 name = "toml"
 version = "0.5.8"
@@ -2382,9 +2441,9 @@ checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6"

 [[package]]
 name = "tracing"
-version = "0.1.29"
+version = "0.1.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "375a639232caf30edfc78e8d89b2d4c375515393e7af7e16f01cd96917fb2105"
+checksum = "f6c650a8ef0cd2dd93736f033d21cbd1224c5a967aa0c258d00fcf7dafef9b9f"
 dependencies = [
 "cfg-if",
 "log",
@@ -2395,9 +2454,9 @@ dependencies = [

 [[package]]
 name = "tracing-attributes"
-version = "0.1.18"
+version = "0.1.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f4f480b8f81512e825f337ad51e94c1eb5d3bbdf2b363dcd01e2b19a9ffe3f8e"
+checksum = "8276d9a4a3a558d7b7ad5303ad50b53d58264641b82914b7ada36bd762e7a716"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -2406,9 +2465,9 @@ dependencies = [

 [[package]]
 name = "tracing-core"
-version = "0.1.21"
+version = "0.1.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1f4ed65637b8390770814083d20756f87bfa2c21bf2f110babdc5438351746e4"
+checksum = "03cfcb51380632a72d3111cb8d3447a8d908e577d31beeac006f836383d29a23"
 dependencies = [
 "lazy_static",
 ]
@@ -2481,7 +2540,7 @@ version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7"
 dependencies = [
- "getrandom 0.2.4",
+ "getrandom 0.2.5",
 ]

 [[package]]
@@ -2545,9 +2604,9 @@ checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"

 [[package]]
 name = "wasm-bindgen"
-version = "0.2.78"
+version = "0.2.79"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "632f73e236b219150ea279196e54e610f5dbafa5d61786303d4da54f84e47fce"
+checksum = "25f1af7423d8588a3d840681122e72e6a24ddbcb3f0ec385cac0d12d24256c06"
 dependencies = [
 "cfg-if",
 "wasm-bindgen-macro",
@@ -2555,9 +2614,9 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-backend"
-version = "0.2.78"
+version = "0.2.79"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a317bf8f9fba2476b4b2c85ef4c4af8ff39c3c7f0cdfeed4f82c34a880aa837b"
+checksum = "8b21c0df030f5a177f3cba22e9bc4322695ec43e7257d865302900290bcdedca"
 dependencies = [
 "bumpalo",
 "lazy_static",
@@ -2570,9 +2629,9 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-futures"
-version = "0.4.28"
+version = "0.4.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e8d7523cb1f2a4c96c1317ca690031b714a51cc14e05f712446691f413f5d39"
+checksum = "2eb6ec270a31b1d3c7e266b999739109abce8b6c87e4b31fcfcd788b65267395"
 dependencies = [
 "cfg-if",
 "js-sys",
@@ -2582,9 +2641,9 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.78"
+version = "0.2.79"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d56146e7c495528bf6587663bea13a8eb588d39b36b679d83972e1a2dbbdacf9"
+checksum = "2f4203d69e40a52ee523b2529a773d5ffc1dc0071801c87b3d270b471b80ed01"
 dependencies = [
 "quote",
 "wasm-bindgen-macro-support",
@@ -2592,9 +2651,9 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.78"
+version = "0.2.79"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7803e0eea25835f8abdc585cd3021b3deb11543c6fe226dcd30b228857c5c5ab"
+checksum = "bfa8a30d46208db204854cadbb5d4baf5fcf8071ba5bf48190c3e59937962ebc"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -2605,15 +2664,15 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.78"
+version = "0.2.79"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0237232789cf037d5480773fe568aac745bfe2afbc11a863e97901780a6b47cc"
+checksum = "3d958d035c4438e28c70e4321a2911302f10135ce78a9c7834c0cab4123d06a2"

 [[package]]
 name = "web-sys"
-version = "0.3.55"
+version = "0.3.56"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "38eb105f1c59d9eaa6b5cdc92b859d85b926e82cb2e0945cd0c9259faa6fe9fb"
+checksum = "c060b319f29dd25724f09a2ba1418f142f539b2be99fbf4d2d5a8f7330afb8eb"
 dependencies = [
 "js-sys",
 "wasm-bindgen",
@@ -2659,6 +2718,49 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

+[[package]]
+name = "windows-sys"
+version = "0.32.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3df6e476185f92a12c072be4a189a0210dcdcf512a1891d6dff9edb874deadc6"
+dependencies = [
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.32.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d8e92753b1c443191654ec532f14c199742964a061be25d77d7a96f09db20bf5"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.32.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a711c68811799e017b6038e0922cb27a5e2f43a2ddb609fe0b6f3eeda9de615"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.32.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "146c11bb1a02615db74680b32a68e2d61f553cc24c4eb5b4ca10311740e44172"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.32.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c912b12f7454c6620635bbff3450962753834be2a594819bd5e945af18ec64bc"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.32.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "504a2476202769977a040c6364301a3f65d0cc9e3fb08600b2bda150a0488316"
+
 [[package]]
 name = "winreg"
 version = "0.7.0"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "feroxbuster"
-version = "2.5.0"
+version = "2.6.0"
 authors = ["Ben 'epi' Risher (@epi052)"]
 license = "MIT"
 edition = "2021"
@@ -16,45 +16,46 @@ build = "build.rs"
 maintenance = { status = "actively-developed" }

 [build-dependencies]
-clap = {version = "3.0", features = ["cargo"]}
-clap_complete = "3.0"
-regex = "1"
-lazy_static = "1.4"
-dirs = "4.0"
+clap = { version = "3.1.5", features = ["wrap_help", "cargo"] }
+clap_complete = "3.1.1"
+regex = "1.5.4"
+lazy_static = "1.4.0"
+dirs = "4.0.0"

 [dependencies]
-scraper = "0.12"
-futures = { version = "0.3"}
-tokio = { version = "1.15", features = ["full"] }
-tokio-util = {version = "0.6", features = ["codec"]}
-log = "0.4"
-env_logger = "0.9"
-reqwest = { version = "0.11", features = ["socks"] }
-url = { version = "2.2", features = ["serde"]}  # uses feature unification to add 'serde' to reqwest::Url
-serde_regex = "1.1"
-clap = {version = "3.0", features = ["wrap_help", "cargo"]}
-lazy_static = "1.4"
-toml = "0.5"
-serde = { version = "1.0", features = ["derive", "rc"] }
-serde_json = "1.0"
-uuid = { version = "0.8", features = ["v4"] }
+scraper = "0.12.0"
+futures = "0.3.21"
+tokio = { version = "1.17.0", features = ["full"] }
+tokio-util = { version = "0.7.0", features = ["codec"] }
+log = "0.4.14"
+env_logger = "0.9.0"
+reqwest = { version = "0.11.9", features = ["socks"] }
+# uses feature unification to add 'serde' to reqwest::Url
+url = { version = "2.2.2", features = ["serde"] }
+serde_regex = "1.1.0"
+clap = { version = "3.1.5", features = ["wrap_help", "cargo"] }
+lazy_static = "1.4.0"
+toml = "0.5.8"
+serde = { version = "1.0.136", features = ["derive", "rc"] }
+serde_json = "1.0.79"
+uuid = { version = "0.8.2", features = ["v4"] }
 indicatif = "0.15"
-console = "0.15"
-openssl = { version = "0.10", features = ["vendored"] }
-dirs = "4.0"
-regex = "1"
-crossterm = "0.20"
-rlimit = "0.6"
-ctrlc = "3.2"
+console = "0.15.0"
+openssl = { version = "0.10.38", features = ["vendored"] }
+dirs = "4.0.0"
+regex = "1.5.4"
+crossterm = "0.23.0"
+rlimit = "0.7.0"
+ctrlc = "3.2.1"
 fuzzyhash = "0.2.1"
-anyhow = "1.0"
-leaky-bucket = "0.10.0"
+anyhow = "1.0.55"
+leaky-bucket = "0.10.0"  # todo: upgrade, will take a little work/thought since api changed

 [dev-dependencies]
-tempfile = "3.3"
-httpmock = "0.6"
-assert_cmd = "2.0"
-predicates = "2.1"
+tempfile = "3.3.0"
+httpmock = "0.6.6"
+assert_cmd = "2.0.4"
+predicates = "2.1.1"

 [profile.release]
 lto = true
--- a/Makefile.toml
+++ b/Makefile.toml
@@ -0,0 +1,18 @@
+# composite tasks
+[tasks.upgrade]
+dependencies = ["upgrade-deps", "update"]
+
+# cleaning
+[tasks.clean-state]
+script = """
+rm ferox-*.state
+"""
+
+# dependency management
+[tasks.upgrade-deps]
+command = "cargo"
+args = ["upgrade", "--exclude", "indicatif", "leaky-bucket"]
+
+[tasks.update]
+command = "cargo"
+args = ["update"]
--- a/README.md
+++ b/README.md
@@ -230,6 +230,7 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d
    <td align="center"><a href="https://ironwort.me/"><img src="https://avatars.githubusercontent.com/u/15280042?v=4?s=100" width="100px;" alt=""/><br /><sub><b>0x08</b></sub></a><br /><a href="#ideas-its0x08" title="Ideas, Planning, & Feedback">🤔</a></td>
    <td align="center"><a href="https://github.com/MD-Levitan"><img src="https://avatars.githubusercontent.com/u/12116508?v=4?s=100" width="100px;" alt=""/><br /><sub><b>kusok</b></sub></a><br /><a href="#ideas-MD-Levitan" title="Ideas, Planning, & Feedback">🤔</a> <a href="https://github.com/epi052/feroxbuster/commits?author=MD-Levitan" title="Code">💻</a></td>
    <td align="center"><a href="https://github.com/godylockz"><img src="https://avatars.githubusercontent.com/u/81207744?v=4?s=100" width="100px;" alt=""/><br /><sub><b>godylockz</b></sub></a><br /><a href="#ideas-godylockz" title="Ideas, Planning, & Feedback">🤔</a> <a href="https://github.com/epi052/feroxbuster/commits?author=godylockz" title="Code">💻</a></td>
+    <td align="center"><a href="http://ryanmontgomery.me"><img src="https://avatars.githubusercontent.com/u/44453666?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Ryan Montgomery</b></sub></a><br /><a href="#ideas-0dayCTF" title="Ideas, Planning, & Feedback">🤔</a></td>
  </tr>
 </table>

--- a/ferox-config.toml.example
+++ b/ferox-config.toml.example
@@ -30,7 +30,11 @@
 # random_agent = false
 # redirects = true
 # insecure = true
+# collect_words = true
+# collect_backups = true
+# collect_extensions = true
 # extensions = ["php", "html"]
+# dont_collect = ["png", "gif", "jpg", "jpeg"]
 # methods = ["GET", "POST"]
 # data = [11, 12, 13, 14, 15]
 # url_denylist = ["http://dont-scan.me", "https://also-not.me"]
--- a/shell_completions/_feroxbuster
+++ b/shell_completions/_feroxbuster
@@ -24,8 +24,8 @@ _feroxbuster() {
 '--replay-proxy=[Send only unfiltered requests through a Replay Proxy, instead of all requests]:REPLAY_PROXY:_urls' \
 '*-R+[Status Codes to send through a Replay Proxy when found (default: --status-codes value)]:REPLAY_CODE: ' \
 '*--replay-codes=[Status Codes to send through a Replay Proxy when found (default: --status-codes value)]:REPLAY_CODE: ' \
-'-a+[Sets the User-Agent (default: feroxbuster/2.5.0)]:USER_AGENT: ' \
-'--user-agent=[Sets the User-Agent (default: feroxbuster/2.5.0)]:USER_AGENT: ' \
+'-a+[Sets the User-Agent (default: feroxbuster/2.6.0)]:USER_AGENT: ' \
+'--user-agent=[Sets the User-Agent (default: feroxbuster/2.6.0)]:USER_AGENT: ' \
 '*-x+[File extension(s) to search for (ex: -x php -x pdf js)]:FILE_EXTENSION: ' \
 '*--extensions=[File extension(s) to search for (ex: -x php -x pdf js)]:FILE_EXTENSION: ' \
 '*-m+[Which HTTP request method(s) should be sent (default: GET)]:HTTP_METHODS: ' \
@@ -64,6 +64,8 @@ _feroxbuster() {
 '--time-limit=[Limit total run time of all scans (ex: --time-limit 10m)]:TIME_SPEC: ' \
 '-w+[Path to the wordlist]:FILE:_files' \
 '--wordlist=[Path to the wordlist]:FILE:_files' \
+'*-I+[File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)]:FILE_EXTENSION: ' \
+'*--dont-collect=[File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)]:FILE_EXTENSION: ' \
 '-o+[Output file to write results to (use w/ --json for JSON entries)]:FILE:_files' \
 '--output=[Output file to write results to (use w/ --json for JSON entries)]:FILE:_files' \
 '--debug-log=[Output file to write log entries (use w/ --json for JSON entries)]:FILE:_files' \
@@ -72,6 +74,10 @@ _feroxbuster() {
 '-V[Print version information]' \
 '--version[Print version information]' \
 '(-u --url)--stdin[Read url(s) from STDIN]' \
+'(-p --proxy -k --insecure --burp-replay)--burp[Set --proxy to http://127.0.0.1:8080 and set --insecure to true]' \
+'(-P --replay-proxy -k --insecure)--burp-replay[Set --replay-proxy to http://127.0.0.1:8080 and set --insecure to true]' \
+'--smart[Set --extract-links, --auto-tune, --collect-words, and --collect-backups to true]' \
+'--thorough[Use the same settings as --smart and set --collect-extensions to true]' \
 '-A[Use a random User-Agent]' \
 '--random-agent[Use a random User-Agent]' \
 '-f[Append / to each request'\''s URL]' \
@@ -88,12 +94,19 @@ _feroxbuster() {
 '--auto-bail[Automatically stop scanning when an excessive amount of errors are encountered]' \
 '-D[Don'\''t auto-filter wildcard responses]' \
 '--dont-filter[Don'\''t auto-filter wildcard responses]' \
+'-E[Automatically discover extensions and add them to --extensions (unless they'\''re in --dont-collect)]' \
+'--collect-extensions[Automatically discover extensions and add them to --extensions (unless they'\''re in --dont-collect)]' \
+'-B[Automatically request likely backup extensions for "found" urls]' \
+'--collect-backups[Automatically request likely backup extensions for "found" urls]' \
+'-g[Automatically discover important words from within responses and add them to the wordlist]' \
+'--collect-words[Automatically discover important words from within responses and add them to the wordlist]' \
 '(--silent)*-v[Increase verbosity level (use -vv or more for greater effect. \[CAUTION\] 4 -v'\''s is probably too much)]' \
 '(--silent)*--verbosity[Increase verbosity level (use -vv or more for greater effect. \[CAUTION\] 4 -v'\''s is probably too much)]' \
 '(-q --quiet)--silent[Only print URLs + turn off logging (good for piping a list of urls to other commands)]' \
 '-q[Hide progress bars and banner (good for tmux windows w/ notifications)]' \
 '--quiet[Hide progress bars and banner (good for tmux windows w/ notifications)]' \
 '--json[Emit JSON logs to --output and --debug-log instead of normal text]' \
+'--no-state[Disable state output file (*.state)]' \
 && ret=0
 }

--- a/shell_completions/_feroxbuster.ps1
+++ b/shell_completions/_feroxbuster.ps1
@@ -12,7 +12,8 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock {
            $element = $commandElements[$i]
            if ($element -isnot [StringConstantExpressionAst] -or
                $element.StringConstantType -ne [StringConstantType]::BareWord -or
-                $element.Value.StartsWith('-')) {
+                $element.Value.StartsWith('-') -or
+                $element.Value -eq $wordToComplete) {
                break
        }
        $element.Value
@@ -29,8 +30,8 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock {
            [CompletionResult]::new('--replay-proxy', 'replay-proxy', [CompletionResultType]::ParameterName, 'Send only unfiltered requests through a Replay Proxy, instead of all requests')
            [CompletionResult]::new('-R', 'R', [CompletionResultType]::ParameterName, 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)')
            [CompletionResult]::new('--replay-codes', 'replay-codes', [CompletionResultType]::ParameterName, 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)')
-            [CompletionResult]::new('-a', 'a', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.5.0)')
-            [CompletionResult]::new('--user-agent', 'user-agent', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.5.0)')
+            [CompletionResult]::new('-a', 'a', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.6.0)')
+            [CompletionResult]::new('--user-agent', 'user-agent', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.6.0)')
            [CompletionResult]::new('-x', 'x', [CompletionResultType]::ParameterName, 'File extension(s) to search for (ex: -x php -x pdf js)')
            [CompletionResult]::new('--extensions', 'extensions', [CompletionResultType]::ParameterName, 'File extension(s) to search for (ex: -x php -x pdf js)')
            [CompletionResult]::new('-m', 'm', [CompletionResultType]::ParameterName, 'Which HTTP request method(s) should be sent (default: GET)')
@@ -69,6 +70,8 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock {
            [CompletionResult]::new('--time-limit', 'time-limit', [CompletionResultType]::ParameterName, 'Limit total run time of all scans (ex: --time-limit 10m)')
            [CompletionResult]::new('-w', 'w', [CompletionResultType]::ParameterName, 'Path to the wordlist')
            [CompletionResult]::new('--wordlist', 'wordlist', [CompletionResultType]::ParameterName, 'Path to the wordlist')
+            [CompletionResult]::new('-I', 'I', [CompletionResultType]::ParameterName, 'File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)')
+            [CompletionResult]::new('--dont-collect', 'dont-collect', [CompletionResultType]::ParameterName, 'File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)')
            [CompletionResult]::new('-o', 'o', [CompletionResultType]::ParameterName, 'Output file to write results to (use w/ --json for JSON entries)')
            [CompletionResult]::new('--output', 'output', [CompletionResultType]::ParameterName, 'Output file to write results to (use w/ --json for JSON entries)')
            [CompletionResult]::new('--debug-log', 'debug-log', [CompletionResultType]::ParameterName, 'Output file to write log entries (use w/ --json for JSON entries)')
@@ -77,6 +80,10 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock {
            [CompletionResult]::new('-V', 'V', [CompletionResultType]::ParameterName, 'Print version information')
            [CompletionResult]::new('--version', 'version', [CompletionResultType]::ParameterName, 'Print version information')
            [CompletionResult]::new('--stdin', 'stdin', [CompletionResultType]::ParameterName, 'Read url(s) from STDIN')
+            [CompletionResult]::new('--burp', 'burp', [CompletionResultType]::ParameterName, 'Set --proxy to http://127.0.0.1:8080 and set --insecure to true')
+            [CompletionResult]::new('--burp-replay', 'burp-replay', [CompletionResultType]::ParameterName, 'Set --replay-proxy to http://127.0.0.1:8080 and set --insecure to true')
+            [CompletionResult]::new('--smart', 'smart', [CompletionResultType]::ParameterName, 'Set --extract-links, --auto-tune, --collect-words, and --collect-backups to true')
+            [CompletionResult]::new('--thorough', 'thorough', [CompletionResultType]::ParameterName, 'Use the same settings as --smart and set --collect-extensions to true')
            [CompletionResult]::new('-A', 'A', [CompletionResultType]::ParameterName, 'Use a random User-Agent')
            [CompletionResult]::new('--random-agent', 'random-agent', [CompletionResultType]::ParameterName, 'Use a random User-Agent')
            [CompletionResult]::new('-f', 'f', [CompletionResultType]::ParameterName, 'Append / to each request''s URL')
@@ -93,12 +100,19 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock {
            [CompletionResult]::new('--auto-bail', 'auto-bail', [CompletionResultType]::ParameterName, 'Automatically stop scanning when an excessive amount of errors are encountered')
            [CompletionResult]::new('-D', 'D', [CompletionResultType]::ParameterName, 'Don''t auto-filter wildcard responses')
            [CompletionResult]::new('--dont-filter', 'dont-filter', [CompletionResultType]::ParameterName, 'Don''t auto-filter wildcard responses')
+            [CompletionResult]::new('-E', 'E', [CompletionResultType]::ParameterName, 'Automatically discover extensions and add them to --extensions (unless they''re in --dont-collect)')
+            [CompletionResult]::new('--collect-extensions', 'collect-extensions', [CompletionResultType]::ParameterName, 'Automatically discover extensions and add them to --extensions (unless they''re in --dont-collect)')
+            [CompletionResult]::new('-B', 'B', [CompletionResultType]::ParameterName, 'Automatically request likely backup extensions for "found" urls')
+            [CompletionResult]::new('--collect-backups', 'collect-backups', [CompletionResultType]::ParameterName, 'Automatically request likely backup extensions for "found" urls')
+            [CompletionResult]::new('-g', 'g', [CompletionResultType]::ParameterName, 'Automatically discover important words from within responses and add them to the wordlist')
+            [CompletionResult]::new('--collect-words', 'collect-words', [CompletionResultType]::ParameterName, 'Automatically discover important words from within responses and add them to the wordlist')
            [CompletionResult]::new('-v', 'v', [CompletionResultType]::ParameterName, 'Increase verbosity level (use -vv or more for greater effect. [CAUTION] 4 -v''s is probably too much)')
            [CompletionResult]::new('--verbosity', 'verbosity', [CompletionResultType]::ParameterName, 'Increase verbosity level (use -vv or more for greater effect. [CAUTION] 4 -v''s is probably too much)')
            [CompletionResult]::new('--silent', 'silent', [CompletionResultType]::ParameterName, 'Only print URLs + turn off logging (good for piping a list of urls to other commands)')
            [CompletionResult]::new('-q', 'q', [CompletionResultType]::ParameterName, 'Hide progress bars and banner (good for tmux windows w/ notifications)')
            [CompletionResult]::new('--quiet', 'quiet', [CompletionResultType]::ParameterName, 'Hide progress bars and banner (good for tmux windows w/ notifications)')
            [CompletionResult]::new('--json', 'json', [CompletionResultType]::ParameterName, 'Emit JSON logs to --output and --debug-log instead of normal text')
+            [CompletionResult]::new('--no-state', 'no-state', [CompletionResultType]::ParameterName, 'Disable state output file (*.state)')
            break
        }
    })
--- a/shell_completions/feroxbuster.bash
+++ b/shell_completions/feroxbuster.bash
@@ -19,7 +19,7 @@ _feroxbuster() {

    case "${cmd}" in
        feroxbuster)
-            opts="-h -V -u -p -P -R -a -A -x -m -H -b -Q -f -S -X -W -N -C -s -T -r -k -t -n -d -e -L -w -D -v -q -o --help --version --url --stdin --resume-from --proxy --replay-proxy --replay-codes --user-agent --random-agent --extensions --methods --data --headers --cookies --query --add-slash --dont-scan --filter-size --filter-regex --filter-words --filter-lines --filter-status --filter-similar-to --status-codes --timeout --redirects --insecure --threads --no-recursion --depth --extract-links --scan-limit --parallel --rate-limit --time-limit --wordlist --auto-tune --auto-bail --dont-filter --verbosity --silent --quiet --json --output --debug-log"
+            opts="-h -V -u -p -P -R -a -A -x -m -H -b -Q -f -S -X -W -N -C -s -T -r -k -t -n -d -e -L -w -D -E -B -g -I -v -q -o --help --version --url --stdin --resume-from --burp --burp-replay --smart --thorough --proxy --replay-proxy --replay-codes --user-agent --random-agent --extensions --methods --data --headers --cookies --query --add-slash --dont-scan --filter-size --filter-regex --filter-words --filter-lines --filter-status --filter-similar-to --status-codes --timeout --redirects --insecure --threads --no-recursion --depth --extract-links --scan-limit --parallel --rate-limit --time-limit --wordlist --auto-tune --auto-bail --dont-filter --collect-extensions --collect-backups --collect-words --dont-collect --verbosity --silent --quiet --json --output --debug-log --no-state"
            if [[ ${cur} == -* || ${COMP_CWORD} -eq 1 ]] ; then
                COMPREPLY=( $(compgen -W "${opts}" -- "${cur}") )
                return 0
@@ -221,6 +221,14 @@ _feroxbuster() {
                    COMPREPLY=($(compgen -f "${cur}"))
                    return 0
                    ;;
+                --dont-collect)
+                    COMPREPLY=($(compgen -f "${cur}"))
+                    return 0
+                    ;;
+                -I)
+                    COMPREPLY=($(compgen -f "${cur}"))
+                    return 0
+                    ;;
                --output)
                    COMPREPLY=($(compgen -f "${cur}"))
                    return 0
--- a/shell_completions/feroxbuster.elv
+++ b/shell_completions/feroxbuster.elv
@@ -27,8 +27,8 @@ set edit:completion:arg-completer[feroxbuster] = {|@words|
            cand --replay-proxy 'Send only unfiltered requests through a Replay Proxy, instead of all requests'
            cand -R 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)'
            cand --replay-codes 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)'
-            cand -a 'Sets the User-Agent (default: feroxbuster/2.5.0)'
-            cand --user-agent 'Sets the User-Agent (default: feroxbuster/2.5.0)'
+            cand -a 'Sets the User-Agent (default: feroxbuster/2.6.0)'
+            cand --user-agent 'Sets the User-Agent (default: feroxbuster/2.6.0)'
            cand -x 'File extension(s) to search for (ex: -x php -x pdf js)'
            cand --extensions 'File extension(s) to search for (ex: -x php -x pdf js)'
            cand -m 'Which HTTP request method(s) should be sent (default: GET)'
@@ -67,6 +67,8 @@ set edit:completion:arg-completer[feroxbuster] = {|@words|
            cand --time-limit 'Limit total run time of all scans (ex: --time-limit 10m)'
            cand -w 'Path to the wordlist'
            cand --wordlist 'Path to the wordlist'
+            cand -I 'File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)'
+            cand --dont-collect 'File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)'
            cand -o 'Output file to write results to (use w/ --json for JSON entries)'
            cand --output 'Output file to write results to (use w/ --json for JSON entries)'
            cand --debug-log 'Output file to write log entries (use w/ --json for JSON entries)'
@@ -75,6 +77,10 @@ set edit:completion:arg-completer[feroxbuster] = {|@words|
            cand -V 'Print version information'
            cand --version 'Print version information'
            cand --stdin 'Read url(s) from STDIN'
+            cand --burp 'Set --proxy to http://127.0.0.1:8080 and set --insecure to true'
+            cand --burp-replay 'Set --replay-proxy to http://127.0.0.1:8080 and set --insecure to true'
+            cand --smart 'Set --extract-links, --auto-tune, --collect-words, and --collect-backups to true'
+            cand --thorough 'Use the same settings as --smart and set --collect-extensions to true'
            cand -A 'Use a random User-Agent'
            cand --random-agent 'Use a random User-Agent'
            cand -f 'Append / to each request''s URL'
@@ -91,12 +97,19 @@ set edit:completion:arg-completer[feroxbuster] = {|@words|
            cand --auto-bail 'Automatically stop scanning when an excessive amount of errors are encountered'
            cand -D 'Don''t auto-filter wildcard responses'
            cand --dont-filter 'Don''t auto-filter wildcard responses'
+            cand -E 'Automatically discover extensions and add them to --extensions (unless they''re in --dont-collect)'
+            cand --collect-extensions 'Automatically discover extensions and add them to --extensions (unless they''re in --dont-collect)'
+            cand -B 'Automatically request likely backup extensions for "found" urls'
+            cand --collect-backups 'Automatically request likely backup extensions for "found" urls'
+            cand -g 'Automatically discover important words from within responses and add them to the wordlist'
+            cand --collect-words 'Automatically discover important words from within responses and add them to the wordlist'
            cand -v 'Increase verbosity level (use -vv or more for greater effect. [CAUTION] 4 -v''s is probably too much)'
            cand --verbosity 'Increase verbosity level (use -vv or more for greater effect. [CAUTION] 4 -v''s is probably too much)'
            cand --silent 'Only print URLs + turn off logging (good for piping a list of urls to other commands)'
            cand -q 'Hide progress bars and banner (good for tmux windows w/ notifications)'
            cand --quiet 'Hide progress bars and banner (good for tmux windows w/ notifications)'
            cand --json 'Emit JSON logs to --output and --debug-log instead of normal text'
+            cand --no-state 'Disable state output file (*.state)'
        }
    ]
    $completions[$command]
--- a/src/banner/container.rs
+++ b/src/banner/container.rs
@@ -3,7 +3,7 @@ use crate::{
    config::Configuration,
    event_handlers::Handles,
    utils::{logged_request, status_colorizer},
-    DEFAULT_METHOD, VERSION,
+    DEFAULT_IGNORED_EXTENSIONS, DEFAULT_METHOD, VERSION,
 };
 use anyhow::{bail, Result};
 use console::{style, Emoji};
@@ -151,6 +151,18 @@ pub struct Banner {

    /// whether or not there is a known new version
    pub(super) update_status: UpdateStatus,
+
+    /// represents Configuration.collect_extensions
+    collect_extensions: BannerEntry,
+
+    /// represents Configuration.dont_collect
+    dont_collect: BannerEntry,
+
+    /// represents Configuration.collect_backups
+    collect_backups: BannerEntry,
+
+    /// represents Configuration.collect_words
+    collect_words: BannerEntry,
 }

 /// implementation of Banner
@@ -314,6 +326,21 @@ impl Banner {
            &format!("[{}]", config.methods.join(", ")),
        );

+        let dont_collect = if config.dont_collect == DEFAULT_IGNORED_EXTENSIONS {
+            // default has 30+ extensions, just trim it up
+            BannerEntry::new(
+                "💸",
+                "Ignored Extensions",
+                "[Images, Movies, Audio, etc...]",
+            )
+        } else {
+            BannerEntry::new(
+                "💸",
+                "Ignored Extensions",
+                &format!("[{}]", config.dont_collect.join(", ")),
+            )
+        };
+
        let offset = std::cmp::min(config.data.len(), 30);
        let data = String::from_utf8(config.data[..offset].to_vec())
            .unwrap_or_else(|_err| {
@@ -322,8 +349,8 @@ impl Banner {
                    &config.data[..std::cmp::min(config.data.len(), 13)]
                )
            })
-            .replace("\n", " ")
-            .replace("\r", "");
+            .replace('\n', " ")
+            .replace('\r', "");
        let data = BannerEntry::new("💣", "HTTP Body", &data);
        let insecure = BannerEntry::new("🔓", "Insecure", &config.insecure.to_string());
        let redirects = BannerEntry::new("📍", "Follow Redirects", &config.redirects.to_string());
@@ -334,6 +361,16 @@ impl Banner {
        let parallel = BannerEntry::new("🛤", "Parallel Scans", &config.parallel.to_string());
        let rate_limit =
            BannerEntry::new("🚧", "Requests per Second", &config.rate_limit.to_string());
+        let collect_extensions = BannerEntry::new(
+            "💰",
+            "Collect Extensions",
+            &config.collect_extensions.to_string(),
+        );
+        let collect_backups =
+            BannerEntry::new("🏦", "Collect Backups", &config.collect_backups.to_string());
+
+        let collect_words =
+            BannerEntry::new("🤑", "Collect Words", &config.collect_words.to_string());

        Self {
            targets,
@@ -374,6 +411,10 @@ impl Banner {
            scan_limit,
            time_limit,
            url_denylist,
+            collect_extensions,
+            collect_backups,
+            collect_words,
+            dont_collect,
            config: cfg,
            version: VERSION.to_string(),
            update_status: UpdateStatus::Unknown,
@@ -550,6 +591,20 @@ by Ben "epi" Risher {}                 ver: {}"#,
            writeln!(&mut writer, "{}", self.extensions)?;
        }

+        if config.collect_extensions {
+            // dont-collect is active only when collect-extensions is used
+            writeln!(&mut writer, "{}", self.collect_extensions)?;
+            writeln!(&mut writer, "{}", self.dont_collect)?;
+        }
+
+        if config.collect_backups {
+            writeln!(&mut writer, "{}", self.collect_backups)?;
+        }
+
+        if config.collect_words {
+            writeln!(&mut writer, "{}", self.collect_words)?;
+        }
+
        if !config.methods.is_empty() {
            writeln!(&mut writer, "{}", self.methods)?;
        }
--- a/src/client.rs
+++ b/src/client.rs
@@ -27,7 +27,8 @@ pub fn initialize(
        .user_agent(user_agent)
        .danger_accept_invalid_certs(insecure)
        .default_headers(header_map)
-        .redirect(policy);
+        .redirect(policy)
+        .http1_title_case_headers();

    if let Some(some_proxy) = proxy {
        if !some_proxy.is_empty() {
--- a/src/config/container.rs
+++ b/src/config/container.rs
@@ -1,6 +1,6 @@
 use super::utils::{
-    depth, methods, report_and_exit, save_state, serialized_type, status_codes, threads, timeout,
-    user_agent, wordlist, OutputLevel, RequesterPolicy,
+    depth, ignored_extensions, methods, report_and_exit, save_state, serialized_type, status_codes,
+    threads, timeout, user_agent, wordlist, OutputLevel, RequesterPolicy,
 };
 use crate::config::determine_output_level;
 use crate::config::utils::determine_requester_policy;
@@ -26,7 +26,7 @@ macro_rules! update_config_if_present {
        match $matches.value_of_t($arg_name) {
            Ok(value) => *$conf_val = value, // Update value
            Err(err) => {
-                if !matches!(err.kind, clap::ErrorKind::ArgumentNotFound) {
+                if !matches!(err.kind(), clap::ErrorKind::ArgumentNotFound) {
                    // Do nothing if argument not found
                    err.exit() // Exit with error on any other parse error
                }
@@ -246,8 +246,6 @@ pub struct Configuration {
    pub resume_from: String,

    /// Whether or not a scan's current state should be saved when user presses Ctrl+C
-    ///
-    /// Not configurable from CLI; can only be set from a config file
    #[serde(default = "save_state")]
    pub save_state: bool,

@@ -264,8 +262,25 @@ pub struct Configuration {
    #[serde(default)]
    pub url_denylist: Vec<Url>,

+    /// URLs that should never be scanned/recursed into based on a regular expression
    #[serde(with = "serde_regex", default)]
    pub regex_denylist: Vec<Regex>,
+
+    /// Automatically discover extensions and add them to --extensions (unless they're in --dont-collect)
+    #[serde(default)]
+    pub collect_extensions: bool,
+
+    /// don't collect any of these extensions when --collect-extensions is used
+    #[serde(default = "ignored_extensions")]
+    pub dont_collect: Vec<String>,
+
+    /// Automatically request likely backup extensions on "found" urls
+    #[serde(default)]
+    pub collect_backups: bool,
+
+    /// Automatically discover important words from within responses and add them to the wordlist
+    #[serde(default)]
+    pub collect_words: bool,
 }

 impl Default for Configuration {
@@ -310,6 +325,9 @@ impl Default for Configuration {
            no_recursion: false,
            extract_links: false,
            random_agent: false,
+            collect_extensions: false,
+            collect_backups: false,
+            collect_words: false,
            save_state: true,
            proxy: String::new(),
            config: String::new(),
@@ -335,6 +353,7 @@ impl Default for Configuration {
            depth: depth(),
            threads: threads(),
            wordlist: wordlist(),
+            dont_collect: ignored_extensions(),
        }
    }
 }
@@ -365,7 +384,11 @@ impl Configuration {
    /// - **random_agent**: `false`
    /// - **insecure**: `false` (don't be insecure, i.e. don't allow invalid certs)
    /// - **extensions**: `None`
-    /// - **methods**: [`DEFAULT_METHOD`]
+    /// - **collect_extensions**: `false`
+    /// - **collect_backups**: `false`
+    /// - **collect_words**: `false`
+    /// - **dont_collect**: [`DEFAULT_IGNORED_EXTENSIONS`](constant.DEFAULT_RESPONSE_CODES.html)
+    /// - **methods**: [`DEFAULT_METHOD`](constant.DEFAULT_METHOD.html)
    /// - **data**: `None`
    /// - **url_denylist**: `None`
    /// - **regex_denylist**: `None`
@@ -566,6 +589,10 @@ impl Configuration {
            config.extensions = arg.map(|val| val.to_string()).collect();
        }

+        if let Some(arg) = args.values_of("dont_collect") {
+            config.dont_collect = arg.map(|val| val.to_string()).collect();
+        }
+
        if let Some(arg) = args.values_of("methods") {
            config.methods = arg
                .map(|val| {
@@ -686,7 +713,7 @@ impl Configuration {
            config.output_level = OutputLevel::Quiet;
        }

-        if args.is_present("auto_tune") {
+        if args.is_present("auto_tune") || args.is_present("smart") || args.is_present("thorough") {
            config.auto_tune = true;
            config.requester_policy = RequesterPolicy::AutoTune;
        }
@@ -696,10 +723,32 @@ impl Configuration {
            config.requester_policy = RequesterPolicy::AutoBail;
        }

+        if args.is_present("no_state") {
+            config.save_state = false;
+        }
+
        if args.is_present("dont_filter") {
            config.dont_filter = true;
        }

+        if args.is_present("collect_extensions") || args.is_present("thorough") {
+            config.collect_extensions = true;
+        }
+
+        if args.is_present("collect_backups")
+            || args.is_present("smart")
+            || args.is_present("thorough")
+        {
+            config.collect_backups = true;
+        }
+
+        if args.is_present("collect_words")
+            || args.is_present("smart")
+            || args.is_present("thorough")
+        {
+            config.collect_words = true;
+        }
+
        if args.occurrences_of("verbosity") > 0 {
            // occurrences_of returns 0 if none are found; this is protected in
            // an if block for the same reason as the quiet option
@@ -714,7 +763,10 @@ impl Configuration {
            config.add_slash = true;
        }

-        if args.is_present("extract_links") {
+        if args.is_present("extract_links")
+            || args.is_present("smart")
+            || args.is_present("thorough")
+        {
            config.extract_links = true;
        }

@@ -730,6 +782,14 @@ impl Configuration {
        update_config_if_present!(&mut config.user_agent, args, "user_agent");
        update_config_if_present!(&mut config.timeout, args, "timeout");

+        if args.is_present("burp") {
+            config.proxy = String::from("http://127.0.0.1:8080");
+        }
+
+        if args.is_present("burp_replay") {
+            config.replay_proxy = String::from("http://127.0.0.1:8080");
+        }
+
        if args.is_present("random_agent") {
            config.random_agent = true;
        }
@@ -738,7 +798,8 @@ impl Configuration {
            config.redirects = true;
        }

-        if args.is_present("insecure") {
+        if args.is_present("insecure") || args.is_present("burp") || args.is_present("burp_replay")
+        {
            config.insecure = true;
        }

@@ -872,6 +933,9 @@ impl Configuration {
        update_if_not_default!(&mut conf.quiet, new.quiet, false);
        update_if_not_default!(&mut conf.auto_bail, new.auto_bail, false);
        update_if_not_default!(&mut conf.auto_tune, new.auto_tune, false);
+        update_if_not_default!(&mut conf.collect_extensions, new.collect_extensions, false);
+        update_if_not_default!(&mut conf.collect_backups, new.collect_backups, false);
+        update_if_not_default!(&mut conf.collect_words, new.collect_words, false);
        // use updated quiet/silent values to determine output level; same for requester policy
        conf.output_level = determine_output_level(conf.quiet, conf.silent);
        conf.requester_policy = determine_requester_policy(conf.auto_tune, conf.auto_bail);
@@ -941,6 +1005,11 @@ impl Configuration {
        // status_codes() is the default for replay_codes, if they're not provided
        update_if_not_default!(&mut conf.replay_codes, new.replay_codes, status_codes());
        update_if_not_default!(&mut conf.save_state, new.save_state, save_state());
+        update_if_not_default!(
+            &mut conf.dont_collect,
+            new.dont_collect,
+            ignored_extensions()
+        );
    }

    /// If present, read in `DEFAULT_CONFIG_NAME` and deserialize the specified values
--- a/src/config/tests.rs
+++ b/src/config/tests.rs
@@ -30,7 +30,11 @@ fn setup_config_test() -> Configuration {
            resume_from = "/some/state/file"
            redirects = true
            insecure = true
+            collect_backups = true
+            collect_extensions = true
+            collect_words = true
            extensions = ["html", "php", "js"]
+            dont_collect = ["png", "gif", "jpg", "jpeg"]
            methods = ["GET", "PUT", "DELETE"]
            data = [31, 32, 33, 34]
            url_denylist = ["http://dont-scan.me", "https://also-not.me"]
@@ -94,6 +98,9 @@ fn default_configuration() {
    assert!(!config.redirects);
    assert!(!config.extract_links);
    assert!(!config.insecure);
+    assert!(!config.collect_extensions);
+    assert!(!config.collect_backups);
+    assert!(!config.collect_words);
    assert!(config.regex_denylist.is_empty());
    assert_eq!(config.queries, Vec::new());
    assert_eq!(config.filter_size, Vec::<u64>::new());
@@ -101,6 +108,7 @@ fn default_configuration() {
    assert_eq!(config.methods, vec!["GET"]);
    assert_eq!(config.data, Vec::<u8>::new());
    assert_eq!(config.url_denylist, Vec::<Url>::new());
+    assert_eq!(config.dont_collect, ignored_extensions());
    assert_eq!(config.filter_regex, Vec::<String>::new());
    assert_eq!(config.filter_similar, Vec::<String>::new());
    assert_eq!(config.filter_word_count, Vec::<usize>::new());
@@ -291,6 +299,27 @@ fn config_reads_extract_links() {
    assert!(config.extract_links);
 }

+#[test]
+/// parse the test config and see that the value parsed is correct
+fn config_reads_collect_extensions() {
+    let config = setup_config_test();
+    assert!(config.collect_extensions);
+}
+
+#[test]
+/// parse the test config and see that the value parsed is correct
+fn config_reads_collect_backups() {
+    let config = setup_config_test();
+    assert!(config.collect_backups);
+}
+
+#[test]
+/// parse the test config and see that the value parsed is correct
+fn config_reads_collect_words() {
+    let config = setup_config_test();
+    assert!(config.collect_words);
+}
+
 #[test]
 /// parse the test config and see that the value parsed is correct
 fn config_reads_extensions() {
@@ -298,6 +327,13 @@ fn config_reads_extensions() {
    assert_eq!(config.extensions, vec!["html", "php", "js"]);
 }

+#[test]
+/// parse the test config and see that the value parsed is correct
+fn config_reads_dont_collect() {
+    let config = setup_config_test();
+    assert_eq!(config.dont_collect, vec!["png", "gif", "jpg", "jpeg"]);
+}
+
 #[test]
 /// parse the test config and see that the value parsed is correct
 fn config_reads_methods() {
--- a/src/config/utils.rs
+++ b/src/config/utils.rs
@@ -1,6 +1,6 @@
 use crate::{
    utils::{module_colorizer, status_colorizer},
-    DEFAULT_METHOD, DEFAULT_STATUS_CODES, DEFAULT_WORDLIST, VERSION,
+    DEFAULT_IGNORED_EXTENSIONS, DEFAULT_METHOD, DEFAULT_STATUS_CODES, DEFAULT_WORDLIST, VERSION,
 };
 #[cfg(not(test))]
 use std::process::exit;
@@ -57,6 +57,14 @@ pub(super) fn methods() -> Vec<String> {
    vec![DEFAULT_METHOD.to_owned()]
 }

+/// default extensions to ignore while auto-collecting
+pub(super) fn ignored_extensions() -> Vec<String> {
+    DEFAULT_IGNORED_EXTENSIONS
+        .iter()
+        .map(|s| s.to_string())
+        .collect()
+}
+
 /// default wordlist
 pub(super) fn wordlist() -> String {
    String::from(DEFAULT_WORDLIST)
--- a/src/event_handlers/command.rs
+++ b/src/event_handlers/command.rs
@@ -5,6 +5,7 @@ use tokio::sync::oneshot::Sender;

 use crate::response::FeroxResponse;
 use crate::{
+    message::FeroxMessage,
    statistics::{StatError, StatField},
    traits::FeroxFilter,
 };
@@ -66,6 +67,12 @@ pub enum Command {
    /// Just receive a sender and reply, used for slowing down the main thread
    Sync(Sender<bool>),

+    /// Notify event handler that a new extension has been seen
+    AddDiscoveredExtension(String),
+
+    /// Write an arbitrary string to disk
+    WriteToDisk(Box<FeroxMessage>),
+
    /// Break out of the (infinite) mpsc receive loop
    Exit,
 }
--- a/src/event_handlers/container.rs
+++ b/src/event_handlers/container.rs
@@ -6,6 +6,7 @@ use crate::Joiner;
 #[cfg(test)]
 use crate::{filters::FeroxFilters, statistics::Stats, Command};
 use anyhow::{bail, Result};
+use std::collections::HashSet;
 use std::sync::{Arc, RwLock};
 #[cfg(test)]
 use tokio::sync::mpsc::{self, UnboundedReceiver};
@@ -56,6 +57,9 @@ pub struct Handles {

    /// Handle for recursion
    pub scans: RwLock<Option<ScanHandle>>,
+
+    /// Pointer to the list of words generated from reading in the wordlist
+    pub wordlist: Arc<Vec<String>>,
 }

 /// implementation of Handles
@@ -66,6 +70,7 @@ impl Handles {
        filters: FiltersHandle,
        output: TermOutHandle,
        config: Arc<Configuration>,
+        wordlist: Arc<Vec<String>>,
    ) -> Self {
        Self {
            stats,
@@ -73,6 +78,7 @@ impl Handles {
            output,
            config,
            scans: RwLock::new(None),
+            wordlist,
        }
    }

@@ -87,7 +93,14 @@ impl Handles {
        let terminal_handle = TermOutHandle::new(tx.clone(), tx.clone());
        let stats_handle = StatsHandle::new(Arc::new(Stats::new(configuration.json)), tx.clone());
        let filters_handle = FiltersHandle::new(Arc::new(FeroxFilters::default()), tx.clone());
-        let handles = Self::new(stats_handle, filters_handle, terminal_handle, configuration);
+        let wordlist = Arc::new(vec![String::from("this_is_a_test")]);
+        let handles = Self::new(
+            stats_handle,
+            filters_handle,
+            terminal_handle,
+            configuration,
+            wordlist,
+        );
        if let Some(sh) = scanned_urls {
            let scan_handle = ScanHandle::new(sh, tx);
            handles.set_scan_handle(scan_handle);
@@ -116,6 +129,46 @@ impl Handles {
        bail!("Could not get underlying CommandSender object")
    }

+    /// wrapper to reach into `FeroxScans` and yank out the length of `collected_extensions`
+    pub fn num_collected_extensions(&self) -> usize {
+        if !self.config.collect_extensions {
+            // if --collect-extensions wasn't used, simply return 0 and forego unlocking
+            return 0;
+        }
+
+        self.collected_extensions().len()
+    }
+
+    /// wrapper to reach into `FeroxScans` and yank out the length of `collected_extensions`
+    pub fn collected_extensions(&self) -> HashSet<String> {
+        if let Ok(scans) = self.ferox_scans() {
+            if let Ok(extensions) = scans.collected_extensions.read() {
+                return extensions.clone();
+            }
+        }
+
+        HashSet::new()
+    }
+
+    /// number of words in the wordlist, multiplied by `expected_num_requests_multiplier`
+    pub fn expected_num_requests_per_dir(&self) -> usize {
+        let num_words = self.wordlist.len();
+        let multiplier = self.expected_num_requests_multiplier();
+        multiplier * num_words
+    }
+
+    /// number of extensions plus the number of request method types plus any dynamically collected
+    /// extensions
+    pub fn expected_num_requests_multiplier(&self) -> usize {
+        let multiplier = self.config.extensions.len()
+            + self.config.methods.len()
+            + self.num_collected_extensions();
+
+        // methods should always have at least 1 member, likely making this .max call unneeded
+        // but leaving it for 'just in case' reasons
+        multiplier.max(1)
+    }
+
    /// Helper to easily get the (locked) underlying FeroxScans object
    pub fn ferox_scans(&self) -> Result<Arc<FeroxScans>> {
        if let Ok(guard) = self.scans.read().as_ref() {
--- a/src/event_handlers/outputs.rs
+++ b/src/event_handlers/outputs.rs
@@ -2,11 +2,14 @@ use super::Command::AddToUsizeField;
 use super::*;

 use anyhow::{Context, Result};
+use futures::future::{BoxFuture, FutureExt};
 use tokio::sync::{mpsc, oneshot};

+use crate::statistics::StatField::TotalExpected;
 use crate::{
    config::Configuration,
    progress::PROGRESS_PRINTER,
+    response::FeroxResponse,
    scanner::RESPONSES,
    send_command, skip_fail,
    statistics::StatField::ResourcesDiscovered,
@@ -15,6 +18,17 @@ use crate::{
    CommandReceiver, CommandSender, Joiner,
 };
 use std::sync::Arc;
+use url::Url;
+
+#[derive(Debug, Copy, Clone)]
+/// Simple enum for semantic clarity around calling expectations for `process_response`
+enum ProcessResponseCall {
+    /// call should allow recursion
+    Recursive,
+
+    /// call should not allow recursion
+    NotRecursive,
+}

 #[derive(Debug)]
 /// Container for terminal output transmitter
@@ -90,6 +104,12 @@ impl FileOutHandler {
                Command::Report(response) => {
                    skip_fail!(write_to(&*response, &mut file, self.config.json));
                }
+                Command::WriteToDisk(message) => {
+                    // todo consider making report accept dyn FeroxSerialize; would mean adding
+                    //  as_any/box_eq/PartialEq to the trait and then adding them to the
+                    //  implementing structs
+                    skip_fail!(write_to(&*message, &mut file, self.config.json));
+                }
                Command::Exit => {
                    break;
                }
@@ -185,56 +205,9 @@ impl TermOutHandler {

        while let Some(command) = self.receiver.recv().await {
            match command {
-                Command::Report(mut resp) => {
-                    let contains_sentry =
-                        self.config.status_codes.contains(&resp.status().as_u16());
-                    let unknown_sentry = !RESPONSES.contains(&resp); // !contains == unknown
-                    let should_process_response = contains_sentry && unknown_sentry;
-
-                    if should_process_response {
-                        // print to stdout
-                        ferox_print(&resp.as_str(), &PROGRESS_PRINTER);
-
-                        send_command!(tx_stats, AddToUsizeField(ResourcesDiscovered, 1));
-
-                        if self.file_task.is_some() {
-                            // -o used, need to send the report to be written out to disk
-                            self.tx_file
-                                .send(Command::Report(resp.clone()))
-                                .with_context(|| {
-                                    fmt_err(&format!("Could not send {} to file handler", resp))
-                                })?;
-                        }
-                    }
-                    log::trace!("report complete: {}", resp.url());
-
-                    if self.config.replay_client.is_some() && should_process_response {
-                        // replay proxy specified/client created and this response's status code is one that
-                        // should be replayed; not using logged_request due to replay proxy client
-                        make_request(
-                            self.config.replay_client.as_ref().unwrap(),
-                            resp.url(),
-                            resp.method().as_str(),
-                            None,
-                            self.config.output_level,
-                            &self.config,
-                            tx_stats.clone(),
-                        )
-                        .await
-                        .with_context(|| "Could not replay request through replay proxy")?;
-                    }
-
-                    if should_process_response {
-                        // add response to RESPONSES for serialization in case of ctrl+c
-                        // placed all by its lonesome like this so that RESPONSES can take ownership
-                        // of the FeroxResponse
-
-                        // before ownership is transferred, there's no real reason to keep the body anymore
-                        // so we can free that piece of data, reducing memory usage
-                        resp.drop_text();
-
-                        RESPONSES.insert(*resp);
-                    }
+                Command::Report(resp) => {
+                    self.process_response(tx_stats.clone(), resp, ProcessResponseCall::Recursive)
+                        .await?;
                }
                Command::Sync(sender) => {
                    sender.send(true).unwrap_or_default();
@@ -251,6 +224,175 @@ impl TermOutHandler {
        log::trace!("exit: start");
        Ok(())
    }
+
+    /// upon receiving a `FeroxResponse` from the mpsc, handle printing, sending to the replay
+    /// proxy, checking for backups of the `FeroxResponse`'s url, and tracking the response.
+    fn process_response(
+        &self,
+        tx_stats: CommandSender,
+        mut resp: Box<FeroxResponse>,
+        call_type: ProcessResponseCall,
+    ) -> BoxFuture<'_, Result<()>> {
+        log::trace!("enter: process_response({:?}, {:?})", resp, call_type);
+
+        async move {
+            let contains_sentry = self.config.status_codes.contains(&resp.status().as_u16());
+            let unknown_sentry = !RESPONSES.contains(&resp); // !contains == unknown
+            let should_process_response = contains_sentry && unknown_sentry;
+
+            if should_process_response {
+                // print to stdout
+                ferox_print(&resp.as_str(), &PROGRESS_PRINTER);
+
+                send_command!(tx_stats, AddToUsizeField(ResourcesDiscovered, 1));
+
+                if self.file_task.is_some() {
+                    // -o used, need to send the report to be written out to disk
+                    self.tx_file
+                        .send(Command::Report(resp.clone()))
+                        .with_context(|| {
+                            fmt_err(&format!("Could not send {} to file handler", resp))
+                        })?;
+                }
+            }
+            log::trace!("report complete: {}", resp.url());
+
+            if self.config.replay_client.is_some() && should_process_response {
+                // replay proxy specified/client created and this response's status code is one that
+                // should be replayed; not using logged_request due to replay proxy client
+                let data = if self.config.data.is_empty() {
+                    None
+                } else {
+                    Some(self.config.data.as_slice())
+                };
+
+                make_request(
+                    self.config.replay_client.as_ref().unwrap(),
+                    resp.url(),
+                    resp.method().as_str(),
+                    data,
+                    self.config.output_level,
+                    &self.config,
+                    tx_stats.clone(),
+                )
+                .await
+                .with_context(|| "Could not replay request through replay proxy")?;
+            }
+
+            if self.config.collect_backups
+                && should_process_response
+                && matches!(call_type, ProcessResponseCall::Recursive)
+            {
+                // --collect-backups was used; the response is one we care about, and the function
+                // call came from the loop in `.start` (i.e. recursive was specified
+                let backup_urls = self.generate_backup_urls(&resp).await;
+
+                // need to manually adjust stats
+                send_command!(tx_stats, AddToUsizeField(TotalExpected, backup_urls.len()));
+
+                for backup_url in &backup_urls {
+                    let backup_response = make_request(
+                        &self.config.client,
+                        backup_url,
+                        resp.method().as_str(),
+                        None,
+                        self.config.output_level,
+                        &self.config,
+                        tx_stats.clone(),
+                    )
+                    .await
+                    .with_context(|| {
+                        format!("Could not request backup of {}", resp.url().as_str())
+                    })?;
+
+                    let ferox_response = FeroxResponse::from(
+                        backup_response,
+                        resp.url().as_str(),
+                        resp.method().as_str(),
+                        resp.output_level,
+                    )
+                    .await;
+
+                    self.process_response(
+                        tx_stats.clone(),
+                        Box::new(ferox_response),
+                        ProcessResponseCall::NotRecursive,
+                    )
+                    .await?;
+                }
+            }
+
+            if should_process_response {
+                // add response to RESPONSES for serialization in case of ctrl+c
+                // placed all by its lonesome like this so that RESPONSES can take ownership
+                // of the FeroxResponse
+
+                // before ownership is transferred, there's no real reason to keep the body anymore
+                // so we can free that piece of data, reducing memory usage
+                resp.drop_text();
+
+                RESPONSES.insert(*resp);
+            }
+            log::trace!("exit: process_response");
+            Ok(())
+        }
+        .boxed()
+    }
+
+    /// internal helper to stay DRY
+    fn add_new_url_to_vec(&self, url: &Url, new_name: &str, urls: &mut Vec<Url>) {
+        let mut new_url = url.clone();
+        new_url.set_path(new_name);
+        urls.push(new_url);
+    }
+
+    /// given a `FeroxResponse`, generate either 6 or 7 urls that are likely backups of the
+    /// original.
+    ///
+    /// example:
+    ///     original: LICENSE.txt
+    ///     backups:    
+    ///         - LICENSE.txt~
+    ///         - LICENSE.txt.bak
+    ///         - LICENSE.txt.bak2
+    ///         - LICENSE.txt.old
+    ///         - LICENSE.txt.1
+    ///         - LICENSE.bak
+    ///         - .LICENSE.txt.swp
+    async fn generate_backup_urls(&self, response: &FeroxResponse) -> Vec<Url> {
+        log::trace!("enter: generate_backup_urls({:?})", response);
+
+        let mut urls = vec![];
+        let url = response.url();
+
+        // confirmed safe: see src/response.rs for comments
+        let filename = url.path_segments().unwrap().last().unwrap();
+
+        if !filename.is_empty() {
+            // append rules
+            for suffix in ["~", ".bak", ".bak2", ".old", ".1"] {
+                self.add_new_url_to_vec(url, &format!("{}{}", filename, suffix), &mut urls);
+            }
+
+            // vim swap rule
+            self.add_new_url_to_vec(url, &format!(".{}.swp", filename), &mut urls);
+
+            // replace original extension rule
+            let parts: Vec<_> = filename
+                .split('.')
+                // keep things like /.bash_history out of results
+                .filter(|part| !part.is_empty())
+                .collect();
+
+            if parts.len() > 1 {
+                // filename + at least one extension, i.e. whatever.js becomes ["whatever", "js"]
+                self.add_new_url_to_vec(url, &format!("{}.bak", parts.first().unwrap()), &mut urls);
+            }
+        }
+
+        log::trace!("exit: generate_backup_urls -> {:?}", urls);
+        urls
+    }
 }

 #[cfg(test)]
@@ -286,4 +428,89 @@ mod tests {
        println!("{:?}", toh);
        tx.send(Command::Exit).unwrap();
    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
+    /// when the feroxresponse's url contains an extension, there should be 7 urls returned
+    async fn generate_backup_urls_creates_correct_urls_when_extension_present() {
+        let (tx, rx) = mpsc::unbounded_channel::<Command>();
+        let (tx_file, _) = mpsc::unbounded_channel::<Command>();
+        let config = Arc::new(Configuration::new().unwrap());
+
+        let toh = TermOutHandler {
+            config,
+            file_task: None,
+            receiver: rx,
+            tx_file,
+        };
+
+        let expected: Vec<_> = vec![
+            "derp.php~",
+            "derp.php.bak",
+            "derp.php.bak2",
+            "derp.php.old",
+            "derp.php.1",
+            ".derp.php.swp",
+            "derp.bak",
+        ];
+
+        let mut fr = FeroxResponse::default();
+        fr.set_url("http://localhost/derp.php");
+
+        let urls = toh.generate_backup_urls(&fr).await;
+
+        let paths: Vec<_> = urls
+            .iter()
+            .map(|url| url.path_segments().unwrap().last().unwrap())
+            .collect();
+
+        assert_eq!(urls.len(), 7);
+
+        for path in paths {
+            assert!(expected.contains(&path));
+        }
+
+        tx.send(Command::Exit).unwrap();
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
+    /// when the feroxresponse's url doesn't contain an extension, there should be 6 urls returned
+    async fn generate_backup_urls_creates_correct_urls_when_extension_not_present() {
+        let (tx, rx) = mpsc::unbounded_channel::<Command>();
+        let (tx_file, _) = mpsc::unbounded_channel::<Command>();
+        let config = Arc::new(Configuration::new().unwrap());
+
+        let toh = TermOutHandler {
+            config,
+            file_task: None,
+            receiver: rx,
+            tx_file,
+        };
+
+        let expected: Vec<_> = vec![
+            "derp~",
+            "derp.bak",
+            "derp.bak2",
+            "derp.old",
+            "derp.1",
+            ".derp.swp",
+        ];
+
+        let mut fr = FeroxResponse::default();
+        fr.set_url("http://localhost/derp");
+
+        let urls = toh.generate_backup_urls(&fr).await;
+
+        let paths: Vec<_> = urls
+            .iter()
+            .map(|url| url.path_segments().unwrap().last().unwrap())
+            .collect();
+
+        assert_eq!(urls.len(), 6);
+
+        for path in paths {
+            assert!(expected.contains(&path));
+        }
+
+        tx.send(Command::Exit).unwrap();
+    }
 }
--- a/src/event_handlers/scans.rs
+++ b/src/event_handlers/scans.rs
@@ -15,6 +15,7 @@ use crate::{

 use super::command::Command::AddToUsizeField;
 use super::*;
+use crate::statistics::StatField;
 use reqwest::Url;
 use tokio::time::Duration;

@@ -176,6 +177,23 @@ impl ScanHandler {
                Command::Sync(sender) => {
                    sender.send(true).unwrap_or_default();
                }
+                Command::AddDiscoveredExtension(new_extension) => {
+                    // if --collect-extensions was used, AND the new extension isn't in
+                    // the --dont-collect list AND it's also not in the --extensions list, AND
+                    // we actually added a new extension (i.e. wasn't previously known), add
+                    // it to FeroxScans.collected_extensions
+                    if self.handles.config.collect_extensions
+                        && !self.handles.config.dont_collect.contains(&new_extension)
+                        && !self.handles.config.extensions.contains(&new_extension)
+                        && self.data.add_discovered_extension(new_extension)
+                    {
+                        self.update_all_bar_lengths()?;
+                        self.handles
+                            .stats
+                            .send(Command::AddToUsizeField(StatField::ExtensionsCollected, 1))
+                            .unwrap_or_default();
+                    }
+                }
                _ => {} // no other commands needed for RecursionHandler
            }
        }
@@ -184,6 +202,93 @@ impl ScanHandler {
        Ok(())
    }

+    /// update all current and future bar lengths
+    ///
+    /// updating all bar lengths correctly requires a few different actions on our part.
+    /// - get the current number of requests expected per scan (dynamic when --collect-extensions
+    ///     is used)
+    /// - update the overall progress bar via the statistics handler (total expected)
+    /// - update the expected per scan value tracked in the statistics handler
+    /// - update progress bars on each FeroxScan (type::directory) that are running/not-started
+    /// - update progress bar length on FeroxScans (this is used when creating new a FeroxScan and
+    ///     determines the new scan's progress bar length)
+    fn update_all_bar_lengths(&self) -> Result<()> {
+        log::trace!("enter: update_all_bar_lengths");
+
+        // current number of requests expected per scan
+        // ExpectedPerScan and TotalExpected are a += action, so we need the wordlist length to
+        // update them while the other updates use expected_num_requests_per_dir
+        let num_words = self.get_wordlist()?.len();
+        let current_expectation = self.handles.expected_num_requests_per_dir() as u64;
+
+        // used in the calculation of bar width down below, see explanation there
+        let divisor = self.handles.expected_num_requests_multiplier() as u64 - 1;
+
+        // add another `wordlist.len` to the expected per scan tracker in the statistics handler
+        self.handles
+            .stats
+            .send(AddToUsizeField(StatField::ExpectedPerScan, num_words))?;
+
+        // since we're adding extensions in the middle of scans (potentially), we need to take
+        // current number of requests into account, new_total will be used as an accumulator
+        // used to increment the overall progress bar
+        let mut new_total = 0;
+
+        if let Ok(ferox_scans) = self.handles.ferox_scans() {
+            // update progress bar length on FeroxScans, which used when creating a new FeroxScan's
+            // progress bar and should mirror the expected_per_scan field on Statistics
+            ferox_scans.set_bar_length(current_expectation);
+
+            if let Ok(scans_guard) = ferox_scans.scans.read() {
+                // update progress bars on each FeroxScan where its scan type is directory and
+                // scan status is either running or not-started
+                for scan in scans_guard.iter() {
+                    if scan.is_active() {
+                        // current number of words left in the 'to-scan' bin, for example:
+                        //
+                        // say we have a 2000 word wordlist, have `-x js` on the command line, and
+                        // just found `php` as a new extension
+                        //
+                        // that puts our state at:
+                        // - wordlist length: 2000
+                        // - total expected: 4000 (original length * 2 for -x js)
+                        //
+                        // let's assume the current scan has sent 3000 requests so far
+                        // that means to get the number of `words` left to send, we need to take
+                        // the difference of 4000 and 3000 and then divide that by the current
+                        // multiplier (2 in the example)
+                        //
+                        // (4000 - 3000) / 2 => 500 words left to send
+                        //
+                        // the remaining 500 words will be sent as 3 variations (word, word.js,
+                        // word.php). So, we would then need to increment the bar by 500 to
+                        // reflect the dynamism of adding extensions mid-scan.
+                        let bar = scan.progress_bar();
+
+                        // (4000 - 3000) / 2 => 500 words left to send
+                        let length = bar.length();
+                        let num_words_left = (length - bar.position()) / divisor;
+
+                        // accumulate each bar's increment value for incrementing the total bar
+                        new_total += num_words_left;
+
+                        bar.inc_length(num_words_left);
+                    }
+                }
+            }
+
+            // add the total number of newly expected requests to the overall progress bar
+            // via the statistics handler
+            self.handles.stats.send(AddToUsizeField(
+                StatField::TotalExpected,
+                new_total as usize,
+            ))?;
+        }
+
+        log::trace!("exit: update_all_bar_lengths");
+        Ok(())
+    }
+
    /// Helper to easily get the (locked) underlying wordlist
    pub fn get_wordlist(&self) -> Result<Arc<Vec<String>>> {
        if let Ok(guard) = self.wordlist.lock().as_ref() {
--- a/src/event_handlers/statistics.rs
+++ b/src/event_handlers/statistics.rs
@@ -103,7 +103,7 @@ impl StatsHandler {
                Command::AddToUsizeField(field, value) => {
                    self.stats.update_usize_field(field, value);

-                    if matches!(field, StatField::TotalScans) {
+                    if matches!(field, StatField::TotalScans | StatField::TotalExpected) {
                        self.bar.set_length(self.stats.total_expected() as u64);
                    }
                }
--- a/src/extractor/builder.rs
+++ b/src/extractor/builder.rs
@@ -16,14 +16,14 @@ pub(super) const ROBOTS_TXT_REGEX: &str =
 /// Which type of extraction should be performed
 #[derive(Debug, Copy, Clone)]
 pub enum ExtractionTarget {
-    /// Examine a response body and extract links
+    /// Examine a response body and extract javascript and html links (multiple tags)
    ResponseBody,

    /// Examine robots.txt (specifically) and extract links
    RobotsTxt,

-    // Parse HTML and extract links
-    ParseHtml,
+    /// Extract all <a> tags from a page
+    DirectoryListing,
 }

 /// responsible for building an `Extractor`
@@ -79,9 +79,9 @@ impl<'a> ExtractorBuilder<'a> {
        self
    }

-    /// finalize configuration of ExtratorBuilder and return an Extractor
+    /// finalize configuration of `ExtractorBuilder` and return an `Extractor`
    ///
-    /// requires either with_url or with_response to have been used in the build process
+    /// requires either `with_url` or `with_response` to have been used in the build process
    pub fn build(&self) -> Result<Extractor<'a>> {
        if (self.url.is_empty() && self.response.is_none()) || self.handles.is_none() {
            bail!("Extractor requires a URL or a FeroxResponse be specified as well as a Handles object")
--- a/src/extractor/container.rs
+++ b/src/extractor/container.rs
@@ -1,5 +1,4 @@
 use super::*;
-use crate::utils::should_deny_url;
 use crate::{
    client,
    event_handlers::{
@@ -13,11 +12,11 @@ use crate::{
        StatField::{LinksExtracted, TotalExpected},
    },
    url::FeroxUrl,
-    utils::{logged_request, make_request},
-    DEFAULT_METHOD,
+    utils::{logged_request, make_request, should_deny_url},
+    ExtractionResult, DEFAULT_METHOD,
 };
 use anyhow::{bail, Context, Result};
-use reqwest::{StatusCode, Url};
+use reqwest::{Client, StatusCode, Url};
 use scraper::{Html, Selector};
 use std::collections::HashSet;
 use tokio::sync::oneshot;
@@ -57,19 +56,77 @@ pub struct Extractor<'a> {
 /// Extractor implementation
 impl<'a> Extractor<'a> {
    /// perform extraction from the given target and return any links found
-    pub async fn extract(&self) -> Result<(HashSet<String>, bool)> {
-        log::trace!("enter: extract (this fn has associated trace exit msg)");
+    pub async fn extract(&self) -> Result<ExtractionResult> {
+        log::trace!(
+            "enter: extract({:?}) (this fn has no associated trace exit msg)",
+            self.target
+        );
        match self.target {
            ExtractionTarget::ResponseBody => Ok(self.extract_from_body().await?),
            ExtractionTarget::RobotsTxt => Ok(self.extract_from_robots().await?),
-            ExtractionTarget::ParseHtml => Ok(self.parse_html().await?),
+            ExtractionTarget::DirectoryListing => Ok(self.extract_from_dir_listing().await?),
        }
    }

+    /// wrapper around logic that performs the following:
+    /// - parses `url_to_parse`
+    /// - bails if the parsed url doesn't belong to the original host/domain
+    /// - otherwise, calls `add_all_sub_paths` with the parsed result
+    fn parse_url_and_add_subpaths(
+        &self,
+        url_to_parse: &str,
+        original_url: &Url,
+        links: &mut HashSet<String>,
+    ) -> Result<()> {
+        log::trace!("enter: parse_url_and_add_subpaths({:?})", links);
+
+        match Url::parse(url_to_parse) {
+            Ok(absolute) => {
+                if absolute.domain() != original_url.domain()
+                    || absolute.host() != original_url.host()
+                {
+                    // domains/ips are not the same, don't scan things that aren't part of the original
+                    // target url
+                    bail!("parsed url does not belong to original domain/host");
+                }
+
+                if self.add_all_sub_paths(absolute.path(), links).is_err() {
+                    log::warn!("could not add sub-paths from {} to {:?}", absolute, links);
+                }
+            }
+            Err(e) => {
+                // this is the expected error that happens when we try to parse a url fragment
+                //     ex: Url::parse("/login") -> Err("relative URL without a base")
+                // while this is technically an error, these are good results for us
+                if e.to_string().contains("relative URL without a base") {
+                    if self.add_all_sub_paths(url_to_parse, links).is_err() {
+                        log::warn!(
+                            "could not add sub-paths from {} to {:?}",
+                            url_to_parse,
+                            links
+                        );
+                    }
+                } else {
+                    // unexpected error has occurred
+                    log::warn!("Could not parse given url: {}", e);
+                    self.handles.stats.send(AddError(Other)).unwrap_or_default();
+                }
+            }
+        }
+
+        log::trace!("exit: parse_url_and_add_subpaths");
+        Ok(())
+    }
+
    /// given a set of links from a normal http body response, task the request handler to make
    /// the requests
-    pub async fn request_links(&self, links: HashSet<String>) -> Result<()> {
+    pub async fn request_links(&mut self, links: HashSet<String>) -> Result<()> {
        log::trace!("enter: request_links({:?})", links);
+
+        if links.is_empty() {
+            return Ok(());
+        }
+
        let recursive = if self.handles.config.no_recursion {
            RecursionStatus::NotRecursive
        } else {
@@ -77,6 +134,7 @@ impl<'a> Extractor<'a> {
        };

        let scanned_urls = self.handles.ferox_scans()?;
+        self.update_stats(links.len())?;

        for link in links {
            let mut resp = match self.request_link(&link).await {
@@ -100,6 +158,10 @@ impl<'a> Extractor<'a> {

                scanned_urls.add_file_scan(resp.url().as_str(), ScanOrder::Latest);

+                if self.handles.config.collect_extensions {
+                    resp.parse_extension(self.handles.clone())?;
+                }
+
                if let Err(e) = resp.send_report(self.handles.output.tx.clone()) {
                    log::warn!("Could not send FeroxResponse to output handler: {}", e);
                }
@@ -135,8 +197,26 @@ impl<'a> Extractor<'a> {
        Ok(())
    }

-    /// Given a `reqwest::Response`, perform the following actions
-    ///   - parse the response's text for links using the linkfinder regex
+    /// wrapper around link extraction via html attributes
+    fn extract_all_links_from_html_tags(
+        &self,
+        resp_url: &Url,
+        links: &mut HashSet<String>,
+        html: &Html,
+    ) {
+        self.extract_links_by_attr(resp_url, links, html, "a", "href");
+        self.extract_links_by_attr(resp_url, links, html, "img", "src");
+        self.extract_links_by_attr(resp_url, links, html, "form", "action");
+        self.extract_links_by_attr(resp_url, links, html, "script", "src");
+        self.extract_links_by_attr(resp_url, links, html, "iframe", "src");
+        self.extract_links_by_attr(resp_url, links, html, "div", "src");
+        self.extract_links_by_attr(resp_url, links, html, "frame", "src");
+        self.extract_links_by_attr(resp_url, links, html, "embed", "src");
+        self.extract_links_by_attr(resp_url, links, html, "script", "src");
+    }
+
+    /// Given the body of a `reqwest::Response`, perform the following actions
+    ///   - parse the body for links using the linkfinder regex
    ///   - for every link found take its url path and parse each sub-path
    ///     - example: Response contains a link fragment `homepage/assets/img/icons/handshake.svg`
    ///       with a base url of http://localhost, the following urls would be returned:
@@ -145,69 +225,32 @@ impl<'a> Extractor<'a> {
    ///         - homepage/assets/img/
    ///         - homepage/assets/
    ///         - homepage/
-    pub(super) async fn extract_from_body(&self) -> Result<(HashSet<String>, bool)> {
-        log::trace!("enter: extract_from_body");
+    fn extract_all_links_from_javascript(
+        &self,
+        response_body: &str,
+        response_url: &Url,
+        links: &mut HashSet<String>,
+    ) {
+        log::trace!(
+            "enter: extract_all_links_from_javascript(html body..., {}, {:?})",
+            response_url.as_str(),
+            links
+        );

-        let mut links = HashSet::<String>::new();
-        let dirlist_flag = false;
-
-        // Response
-        let response = self.response.unwrap();
-        let resp_url = response.url();
-        let body = response.text();
-        let html = Html::parse_document(body);
-
-        // Extract Links
-        self.extract_links_by_attr(resp_url, &mut links, &html, "a", "href");
-        self.extract_links_by_attr(resp_url, &mut links, &html, "img", "src");
-        self.extract_links_by_attr(resp_url, &mut links, &html, "form", "action");
-        self.extract_links_by_attr(resp_url, &mut links, &html, "script", "src");
-        self.extract_links_by_attr(resp_url, &mut links, &html, "iframe", "src");
-        self.extract_links_by_attr(resp_url, &mut links, &html, "div", "src");
-        self.extract_links_by_attr(resp_url, &mut links, &html, "frame", "src");
-        self.extract_links_by_attr(resp_url, &mut links, &html, "embed", "src");
-        self.extract_links_by_attr(resp_url, &mut links, &html, "script", "src");
-
-        for capture in self.links_regex.captures_iter(body) {
+        for capture in self.links_regex.captures_iter(response_body) {
            // remove single & double quotes from both ends of the capture
            // capture[0] is the entire match, additional capture groups start at [1]
            let link = capture[0].trim_matches(|c| c == '\'' || c == '"');

-            match Url::parse(link) {
-                Ok(absolute) => {
-                    if absolute.domain() != self.response.unwrap().url().domain()
-                        || absolute.host() != self.response.unwrap().url().host()
-                    {
-                        // domains/ips are not the same, don't scan things that aren't part of the original
-                        // target url
-                        continue;
-                    }
-
-                    if self.add_all_sub_paths(absolute.path(), &mut links).is_err() {
-                        log::warn!("could not add sub-paths from {} to {:?}", absolute, links);
-                    }
-                }
-                Err(e) => {
-                    // this is the expected error that happens when we try to parse a url fragment
-                    //     ex: Url::parse("/login") -> Err("relative URL without a base")
-                    // while this is technically an error, these are good results for us
-                    if e.to_string().contains("relative URL without a base") {
-                        if self.add_all_sub_paths(link, &mut links).is_err() {
-                            log::warn!("could not add sub-paths from {} to {:?}", link, links);
-                        }
-                    } else {
-                        // unexpected error has occurred
-                        log::warn!("Could not parse given url: {}", e);
-                        self.handles.stats.send(AddError(Other)).unwrap_or_default();
-                    }
-                }
+            if self
+                .parse_url_and_add_subpaths(link, response_url, links)
+                .is_err()
+            {
+                // purposely not logging the error here, due to the frequency with which it gets hit
            }
        }

-        self.update_stats(links.len())?;
-
-        log::trace!("exit: extract_from_body -> {:?} {}", links, dirlist_flag);
-        Ok((links, dirlist_flag))
+        log::trace!("exit: extract_all_links_from_javascript");
    }

    /// take a url fragment like homepage/assets/img/icons/handshake.svg and
@@ -227,6 +270,45 @@ impl<'a> Extractor<'a> {
        Ok(())
    }

+    /// given a url path, trim whitespace, remove slashes, and queries/fragments; return the
+    /// normalized string
+    pub(super) fn normalize_url_path(&self, path: &str) -> String {
+        log::trace!("enter: normalize_url_path({})", path);
+
+        // remove whitespace and leading '/'
+        let path_str: String = path
+            .trim()
+            .trim_start_matches('/')
+            .chars()
+            .filter(|char| !char.is_whitespace())
+            .collect();
+
+        // snippets from rfc-3986:
+        //
+        //          foo://example.com:8042/over/there?name=ferret#nose
+        //          \_/   \______________/\_________/ \_________/ \__/
+        //           |           |            |            |        |
+        //        scheme     authority       path        query   fragment
+        //
+        // The path component is terminated
+        //    by the first question mark ("?") or number sign ("#") character, or
+        //    by the end of the URI.
+        //
+        // The query component is indicated by the first question
+        //    mark ("?") character and terminated by a number sign ("#") character
+        //    or by the end of the URI.
+        let (path_str, _discarded) = path_str
+            .split_once('?')
+            // if there isn't a '?', try to remove a fragment
+            .unwrap_or_else(|| {
+                // if there isn't a '#', return (original, empty)
+                path_str.split_once('#').unwrap_or((&path_str, ""))
+            });
+
+        log::trace!("exit: normalize_url_path -> {}", path_str);
+        path_str.into()
+    }
+
    /// Iterate over a given path, return a list of every sub-path found
    ///
    /// example: `path` contains a link fragment `homepage/assets/img/icons/handshake.svg`
@@ -240,8 +322,13 @@ impl<'a> Extractor<'a> {
        log::trace!("enter: get_sub_paths_from_path({})", path);
        let mut paths = vec![];

+        let normalized_path = self.normalize_url_path(path);
+
        // filter out any empty strings caused by .split
-        let mut parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
+        let mut parts: Vec<&str> = normalized_path
+            .split('/')
+            .filter(|s| !s.is_empty())
+            .collect();

        let length = parts.len();

@@ -274,7 +361,7 @@ impl<'a> Extractor<'a> {
        paths
    }

-    /// simple helper to stay DRY, trys to join a url + fragment and add it to the `links` HashSet
+    /// simple helper to stay DRY, tries to join a url + fragment and add it to the `links` HashSet
    pub(super) fn add_link_to_set_of_links(
        &self,
        link: &str,
@@ -283,15 +370,15 @@ impl<'a> Extractor<'a> {
        log::trace!("enter: add_link_to_set_of_links({}, {:?})", link, links);

        let old_url = match self.target {
-            ExtractionTarget::ResponseBody => self.response.unwrap().url().clone(),
-            ExtractionTarget::ParseHtml | ExtractionTarget::RobotsTxt => {
-                match Url::parse(&self.url) {
-                    Ok(u) => u,
-                    Err(e) => {
-                        bail!("Could not parse {}: {}", self.url, e);
-                    }
-                }
+            ExtractionTarget::ResponseBody | ExtractionTarget::DirectoryListing => {
+                self.response.unwrap().url().clone()
            }
+            ExtractionTarget::RobotsTxt => match Url::parse(&self.url) {
+                Ok(u) => u,
+                Err(e) => {
+                    bail!("Could not parse {}: {}", self.url, e);
+                }
+            },
        };

        let new_url = old_url
@@ -346,7 +433,6 @@ impl<'a> Extractor<'a> {
            new_response,
            url,
            DEFAULT_METHOD,
-            true,
            self.handles.config.output_level,
        )
        .await;
@@ -364,11 +450,10 @@ impl<'a> Extractor<'a> {
    ///     http://localhost/stuff/things
    /// this function requests:
    ///     http://localhost/robots.txt
-    pub(super) async fn extract_from_robots(&self) -> Result<(HashSet<String>, bool)> {
+    pub(super) async fn extract_from_robots(&self) -> Result<ExtractionResult> {
        log::trace!("enter: extract_robots_txt");

-        let mut links: HashSet<String> = HashSet::new();
-        let dirlist_flag = false;
+        let mut result: HashSet<_> = ExtractionResult::new();

        // request
        let response = self.make_extract_request("/robots.txt").await?;
@@ -377,73 +462,60 @@ impl<'a> Extractor<'a> {
        for capture in self.robots_regex.captures_iter(body) {
            if let Some(new_path) = capture.name("url_path") {
                let mut new_url = Url::parse(&self.url)?;
+
                new_url.set_path(new_path.as_str());
-                if self.add_all_sub_paths(new_url.path(), &mut links).is_err() {
-                    log::warn!("could not add sub-paths from {} to {:?}", new_url, links);
+
+                if self.add_all_sub_paths(new_url.path(), &mut result).is_err() {
+                    log::warn!("could not add sub-paths from {} to {:?}", new_url, result);
                }
            }
        }

-        self.update_stats(links.len())?;
-
-        log::trace!("exit: extract_robots_txt -> {:?} {}", links, dirlist_flag);
-        Ok((links, dirlist_flag))
+        log::trace!("exit: extract_robots_txt -> {:?}", result);
+        Ok(result)
    }

-    /// Entry point to parse html for links (i.e. webscraping, directory listings)
-    /// this function requests:
-    ///     http://localhost/<location>
-    pub(super) async fn parse_html(&self) -> Result<(HashSet<String>, bool)> {
-        log::trace!("enter: parse_html");
+    /// outer-most wrapper for parsing html response bodies in search of additional content.
+    /// performs the following high-level steps:
+    /// - requests the page, if necessary
+    /// - checks the page to see if directory listing is enabled and sucks up all the links, if so
+    /// - uses the linkfinder regex to grab links from embedded javascript/javascript files
+    /// - extracts many different types of link sources from the html itself
+    pub(super) async fn extract_from_body(&self) -> Result<ExtractionResult> {
+        log::trace!("enter: extract_from_body");

-        let mut links: HashSet<String> = HashSet::new();
-        let mut dirlist_flag = false;
+        let mut result = ExtractionResult::new();

-        // Response
-        let url = Url::parse(&self.url)?;
-        let response = self.make_extract_request(url.path()).await?;
+        let response = self.response.unwrap();
        let resp_url = response.url();
        let body = response.text();
        let html = Html::parse_document(body);

-        // Directory listing heuristic detection to not continue scanning
-        // Index of /: apache
-        // Directory Listing for /: tomcat,
-        // Directory Listing -- /: ASP.NET
-        // <host> - /: iis, azure, skipping due to loose heuristic
-        let title_selector = Selector::parse("title").unwrap();
-        for t in html.select(&title_selector) {
-            let title = t.inner_html().to_lowercase();
-            if title.contains("directory listing for /")
-                || title.contains("index of /")
-                || title.contains("directory listing -- /")
-            {
-                log::debug!("Directory listing heuristic detection from \"{}\"", title);
-                dirlist_flag = true;
+        // extract links from html tags/attributes and embedded javascript
+        self.extract_all_links_from_html_tags(resp_url, &mut result, &html);
+        self.extract_all_links_from_javascript(body, resp_url, &mut result);

-                self.extract_links_by_attr(resp_url, &mut links, &html, "a", "href");
-                self.update_stats(links.len())?;
+        log::trace!("exit: extract_from_body -> {:?}", result);
+        Ok(result)
+    }

-                log::trace!("exit: parse_html -> {:?} {}", links, dirlist_flag);
-                return Ok((links, dirlist_flag));
-            }
-        }
+    /// parses html response bodies in search of <a> tags.
+    ///
+    /// the assumption is that directory listing is turned on and this extraction target simply
+    /// scoops up all the links for the given directory. The test to detect a directory listing
+    /// is located in `HeuristicTests`
+    pub async fn extract_from_dir_listing(&self) -> Result<ExtractionResult> {
+        log::trace!("enter: extract_from_dir_listing");

-        // Extract Links
-        self.extract_links_by_attr(resp_url, &mut links, &html, "a", "href");
-        self.extract_links_by_attr(resp_url, &mut links, &html, "img", "src");
-        self.extract_links_by_attr(resp_url, &mut links, &html, "form", "action");
-        self.extract_links_by_attr(resp_url, &mut links, &html, "script", "src");
-        self.extract_links_by_attr(resp_url, &mut links, &html, "iframe", "src");
-        self.extract_links_by_attr(resp_url, &mut links, &html, "div", "src");
-        self.extract_links_by_attr(resp_url, &mut links, &html, "frame", "src");
-        self.extract_links_by_attr(resp_url, &mut links, &html, "embed", "src");
-        self.extract_links_by_attr(resp_url, &mut links, &html, "script", "src");
+        let mut result = ExtractionResult::new();

-        self.update_stats(links.len())?;
+        let response = self.response.unwrap();
+        let html = Html::parse_document(response.text());

-        log::trace!("exit: parse_html -> {:?} {}", links, dirlist_flag);
-        Ok((links, dirlist_flag))
+        self.extract_links_by_attr(response.url(), &mut result, &html, "a", "href");
+
+        log::trace!("exit: extract_from_dir_listing -> {:?}", result);
+        Ok(result)
    }

    /// simple helper to get html links by tag/attribute and add it to the `links` HashSet
@@ -458,41 +530,20 @@ impl<'a> Extractor<'a> {
        log::trace!("enter: extract_links_by_attr");

        let selector = Selector::parse(html_tag).unwrap();
+
        let tags = html
            .select(&selector)
            .filter(|a| a.value().attrs().any(|attr| attr.0 == html_attr));
-        for t in tags {
-            if let Some(link) = t.value().attr(html_attr) {
+
+        for tag in tags {
+            if let Some(link) = tag.value().attr(html_attr) {
                log::debug!("Parsed link \"{}\" from {}", link, resp_url.as_str());

-                match Url::parse(link) {
-                    Ok(absolute) => {
-                        if absolute.domain() != resp_url.domain()
-                            || absolute.host() != resp_url.host()
-                        {
-                            // domains/ips are not the same, don't scan things that aren't part of the original
-                            // target url
-                            continue;
-                        }
-
-                        if self.add_all_sub_paths(absolute.path(), links).is_err() {
-                            log::warn!("could not add sub-paths from {} to {:?}", absolute, links);
-                        }
-                    }
-                    Err(e) => {
-                        // this is the expected error that happens when we try to parse a url fragment
-                        //     ex: Url::parse("/login") -> Err("relative URL without a base")
-                        // while this is technically an error, these are good results for us
-                        if e.to_string().contains("relative URL without a base") {
-                            if self.add_all_sub_paths(link, links).is_err() {
-                                log::warn!("could not add sub-paths from {} to {:?}", link, links);
-                            }
-                        } else {
-                            // unexpected error has occurred
-                            log::warn!("Could not parse given url: {}", e);
-                            self.handles.stats.send(AddError(Other)).unwrap_or_default();
-                        }
-                    }
+                if self
+                    .parse_url_and_add_subpaths(link, resp_url, links)
+                    .is_err()
+                {
+                    log::debug!("link didn't belong to the target domain/host: {}", link);
                }
            }
        }
@@ -507,33 +558,45 @@ impl<'a> Extractor<'a> {
    pub(super) async fn make_extract_request(&self, location: &str) -> Result<FeroxResponse> {
        log::trace!("enter: make_extract_request");

-        // more often than not, domain/robots.txt will redirect to www.domain/robots.txt or something
-        // similar; to account for that, create a client that will follow redirects, regardless of
-        // what the user specified for the scanning client. Other than redirects, it will respect
-        // all other user specified settings
-        let follow_redirects = true;
+        // need late binding here to avoid 'creates a temporary which is freed...' in the
+        // `let ... if` below to avoid cloning the client out of config
+        let mut client = Client::new();

-        let proxy = if self.handles.config.proxy.is_empty() {
-            None
+        if location == "/robots.txt" {
+            // more often than not, domain/robots.txt will redirect to www.domain/robots.txt or something
+            // similar; to account for that, create a client that will follow redirects, regardless of
+            // what the user specified for the scanning client. Other than redirects, it will respect
+            // all other user specified settings
+            let follow_redirects = true;
+
+            let proxy = if self.handles.config.proxy.is_empty() {
+                None
+            } else {
+                Some(self.handles.config.proxy.as_str())
+            };
+
+            client = client::initialize(
+                self.handles.config.timeout,
+                &self.handles.config.user_agent,
+                follow_redirects,
+                self.handles.config.insecure,
+                &self.handles.config.headers,
+                proxy,
+            )?;
+        }
+
+        let client = if location != "/robots.txt" {
+            &self.handles.config.client
        } else {
-            Some(self.handles.config.proxy.as_str())
+            &client
        };

-        let client = client::initialize(
-            self.handles.config.timeout,
-            &self.handles.config.user_agent,
-            follow_redirects,
-            self.handles.config.insecure,
-            &self.handles.config.headers,
-            proxy,
-        )?;
-
        let mut url = Url::parse(&self.url)?;
        url.set_path(location); // overwrite existing path

        // purposefully not using logged_request here due to using the special client
        let response = make_request(
-            &client,
+            client,
            &url,
            DEFAULT_METHOD,
            None,
@@ -547,10 +610,10 @@ impl<'a> Extractor<'a> {
            response,
            &self.url,
            DEFAULT_METHOD,
-            true,
            self.handles.config.output_level,
        )
        .await;
+        // note: don't call parse_extension here. If we call it here, it gets called on robots.txt

        log::trace!("exit: make_extract_request -> {}", ferox_response);
        Ok(ferox_response)
@@ -558,7 +621,7 @@ impl<'a> Extractor<'a> {

    /// update total number of links extracted and expected responses
    fn update_stats(&self, num_links: usize) -> Result<()> {
-        let multiplier = self.handles.config.extensions.len().max(1);
+        let multiplier = self.handles.expected_num_requests_multiplier();

        self.handles
            .stats
--- a/src/extractor/tests.rs
+++ b/src/extractor/tests.rs
@@ -21,7 +21,7 @@ lazy_static! {
    static ref BODY_EXT: Extractor<'static> = setup_extractor(ExtractionTarget::ResponseBody, Arc::new(FeroxScans::default()));

    /// Extractor for testing paring html
-    static ref PARSEHTML_EXT: Extractor<'static> = setup_extractor(ExtractionTarget::ParseHtml, Arc::new(FeroxScans::default()));
+    static ref PARSEHTML_EXT: Extractor<'static> = setup_extractor(ExtractionTarget::DirectoryListing, Arc::new(FeroxScans::default()));

    /// FeroxResponse for Extractor
    static ref RESPONSE: FeroxResponse = get_test_response();
@@ -45,9 +45,9 @@ fn setup_extractor(target: ExtractionTarget, scanned_urls: Arc<FeroxScans>) -> E
        ExtractionTarget::RobotsTxt => builder
            .url("http://localhost")
            .target(ExtractionTarget::RobotsTxt),
-        ExtractionTarget::ParseHtml => builder
+        ExtractionTarget::DirectoryListing => builder
            .url("http://localhost")
-            .target(ExtractionTarget::ParseHtml),
+            .target(ExtractionTarget::DirectoryListing),
    };

    let config = Arc::new(Configuration::new().unwrap());
@@ -195,7 +195,6 @@ fn extractor_add_link_to_set_of_links_happy_path() {
 fn extractor_add_link_to_set_of_links_with_non_base_url() {
    let mut links = HashSet::<String>::new();
    let link = "\\\\\\\\";
-
    assert_eq!(links.len(), 0);
    assert!(ROBOTS_EXT
        .add_link_to_set_of_links(link, &mut links)
@@ -206,6 +205,34 @@ fn extractor_add_link_to_set_of_links_with_non_base_url() {
    assert!(links.is_empty());
 }

+#[test]
+/// test for filtering queries and fragments
+fn normalize_url_path_filters_queries_and_fragments() {
+    let handles = Arc::new(Handles::for_testing(None, None).0);
+    let extractor = ExtractorBuilder::default()
+        .url("doesnt matter")
+        .target(ExtractionTarget::RobotsTxt)
+        .handles(handles)
+        .build()
+        .unwrap();
+
+    let test_strings = [
+        "over/there?name=ferret#nose",
+        "over/there?name=ferret",
+        "over/there#nose",
+        "over/there",
+        "over/there?name#nose",
+        "over/there?name",
+        "   over/there?name=ferret#nose  ",
+        "over/there?name=ferret   ",
+        "   over/there#nose",
+    ];
+    test_strings.iter().for_each(|&ts| {
+        let normed = extractor.normalize_url_path(ts);
+        assert_eq!(normed, "over/there");
+    });
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
 /// use make_request to generate a Response, and use the Response to test get_links;
 /// the response will contain an absolute path to a domain that is not part of the scanned
@@ -240,14 +267,8 @@ async fn extractor_get_links_with_absolute_url_that_differs_from_target_domain()
    let (handles, _rx) = Handles::for_testing(None, None);

    let handles = Arc::new(handles);
-    let ferox_response = FeroxResponse::from(
-        response,
-        &srv.url(""),
-        DEFAULT_METHOD,
-        true,
-        OutputLevel::Default,
-    )
-    .await;
+    let ferox_response =
+        FeroxResponse::from(response, &srv.url(""), DEFAULT_METHOD, OutputLevel::Default).await;

    let extractor = Extractor {
        links_regex: Regex::new(LINKFINDER_REGEX).unwrap(),
@@ -258,7 +279,7 @@ async fn extractor_get_links_with_absolute_url_that_differs_from_target_domain()
        handles: handles.clone(),
    };

-    let links = (extractor.extract_from_body().await?).0;
+    let links = extractor.extract_from_body().await?;

    assert!(links.is_empty());
    assert_eq!(mock.hits(), 1);
--- a/src/filters/init.rs
+++ b/src/filters/init.rs
@@ -75,15 +75,18 @@ pub async fn initialize(handles: Arc<Handles>) -> Result<()> {
        let resp = skip_fail!(logged_request(&url, DEFAULT_METHOD, None, handles.clone()).await);

        // if successful, create a filter based on the response's body
-        let fr = FeroxResponse::from(
+        let mut fr = FeroxResponse::from(
            resp,
            similarity_filter,
            DEFAULT_METHOD,
-            true,
            handles.config.output_level,
        )
        .await;

+        if handles.config.collect_extensions {
+            fr.parse_extension(handles.clone())?;
+        }
+
        // hash the response body and store the resulting hash in the filter object
        let hash = FuzzyHash::new(&fr.text()).to_string();

--- a/src/heuristics.rs
+++ b/src/heuristics.rs
@@ -2,8 +2,10 @@ use std::sync::Arc;

 use anyhow::{bail, Result};
 use console::style;
+use scraper::{Html, Selector};
 use uuid::Uuid;

+use crate::message::FeroxMessage;
 use crate::{
    config::OutputLevel,
    event_handlers::{Command, Handles},
@@ -36,6 +38,36 @@ macro_rules! format_template {
    };
 }

+/// enum representing the different servers that `parse_html` can detect when directory listing is
+/// enabled
+#[derive(Copy, Debug, Clone)]
+pub enum DirListingType {
+    /// apache server, detected by `Index of /`
+    Apache,
+
+    /// tomcat/python server, detected by `Directory Listing for /`
+    TomCatOrPython,
+
+    /// ASP.NET server, detected by `Directory Listing -- /`
+    AspDotNet,
+
+    // /// IIS/Azure server, detected by `HOST_NAME - /` (not currently used)
+    // IIS_AZURE,
+    /// variant that represents the absence of directory listing
+    None,
+}
+
+/// Wrapper around the results of running a directory listing detection against a target web page
+#[derive(Debug, Clone)]
+pub struct DirListingResult {
+    /// type of server where directory listing was detected
+    /// i.e. https://portswigger.net/kb/issues/00600100_directory-listing
+    pub dir_list_type: Option<DirListingType>,
+
+    /// the `FeroxResponse` generated during detection
+    pub response: FeroxResponse,
+}
+
 /// container for heuristics related info
 pub struct HeuristicTests {
    /// Handles object for event handler interaction
@@ -198,11 +230,11 @@ impl HeuristicTests {
            .contains(&response.status().as_u16())
        {
            // found a wildcard response
+
            let mut ferox_response = FeroxResponse::from(
                response,
                &target.target,
                method,
-                true,
                self.handles.config.output_level,
            )
            .await;
@@ -282,6 +314,111 @@ impl HeuristicTests {
        log::trace!("exit: connectivity_test -> {:?}", good_urls);
        Ok(good_urls)
    }
+
+    /// heuristic designed to detect when a server has directory listing enabled
+    pub async fn directory_listing(&self, target_url: &str) -> Result<Option<DirListingResult>> {
+        log::trace!("enter: directory_listing({})", target_url);
+
+        let tgt = if !target_url.ends_with('/') {
+            // if left unchanged, this function would be called against redirects that point to
+            // valid directories for most, if not all, directories beyond the initial urls.
+            // so, instead of `directory_listing("http://localhost") -> None` we get
+            // `directory_listing("http://localhost/") -> Some(DirListingResult)` if there is
+            // directory listing beyond the redirect
+            format!("{}/", target_url)
+        } else {
+            target_url.to_string()
+        };
+
+        let url = FeroxUrl::from_string(&tgt, self.handles.clone());
+        let request = url.format("", None)?;
+
+        let result = logged_request(&request, DEFAULT_METHOD, None, self.handles.clone()).await?;
+
+        let ferox_response = FeroxResponse::from(
+            result,
+            &url.target,
+            DEFAULT_METHOD,
+            self.handles.config.output_level,
+        )
+        .await;
+
+        let body = ferox_response.text();
+        let html = Html::parse_document(body);
+
+        let dirlist_type = self.detect_directory_listing(&html);
+
+        if dirlist_type.is_some() {
+            // folks that run things and step away/rely on logs need to be notified of directory
+            // listing, since they won't see the message on the bar; bastardizing FeroxMessage
+            // for ease of implementation. This could use a bit of polish at some point.
+            let msg = format!(
+                "detected directory listing: {} ({:?})",
+                target_url,
+                dirlist_type.unwrap()
+            );
+            let ferox_msg = FeroxMessage {
+                kind: "log".to_string(),
+                message: msg.clone(),
+                level: "MSG".to_string(),
+                time_offset: 0.0,
+                module: "feroxbuster::heuristics".to_string(),
+            };
+            self.handles
+                .output
+                .tx_file
+                .send(Command::WriteToDisk(Box::new(ferox_msg)))
+                .unwrap_or_default();
+
+            log::info!("{}", msg);
+
+            let result = DirListingResult {
+                dir_list_type: dirlist_type,
+                response: ferox_response,
+            };
+
+            log::trace!("exit: directory_listing -> {:?}", result);
+            return Ok(Some(result));
+        }
+
+        log::trace!("exit: directory_listing -> None");
+        Ok(None)
+    }
+
+    /// Directory listing heuristic detection, uses <title> tag to make its determination. When
+    /// the inner html of <title> matches one of the following, a `DirListingType` is returned.
+    /// - apache: `Index of /`
+    /// - tomcat/python: `Directory Listing for /`
+    /// - ASP.NET: `Directory Listing -- /`
+    /// - <host> - /: iis, azure, skipping due to loose heuristic
+    fn detect_directory_listing(&self, html: &Html) -> Option<DirListingType> {
+        log::trace!("enter: detect_directory_listing(html body...)");
+
+        let title_selector = Selector::parse("title").expect("couldn't parse title selector");
+
+        for t in html.select(&title_selector) {
+            let title = t.inner_html().to_lowercase();
+
+            let dirlist_type = if title.contains("directory listing for /") {
+                Some(DirListingType::TomCatOrPython)
+            } else if title.contains("index of /") {
+                Some(DirListingType::Apache)
+            } else if title.contains("directory listing -- /") {
+                Some(DirListingType::AspDotNet)
+            } else {
+                // IIS_AZURE purposely skipped for now
+                None
+            };
+
+            if dirlist_type.is_some() {
+                log::trace!("exit: detect_directory_listing -> {:?}", dirlist_type);
+                return dirlist_type;
+            }
+        }
+
+        log::trace!("exit: detect_directory_listing -> None");
+        None
+    }
 }

 #[cfg(test)]
@@ -297,4 +434,51 @@ mod tests {
            assert_eq!(tester.unique_string(i).len(), i * 32);
        }
    }
+
+    #[test]
+    /// `detect_directory_listing` correctly identifies tomcat/python instances
+    fn detect_directory_listing_finds_tomcat_python() {
+        let html = "<title>directory listing for /</title>";
+        let parsed = Html::parse_document(html);
+        let handles = Handles::for_testing(None, None);
+        let heuristics = HeuristicTests::new(Arc::new(handles.0));
+        let dirlist_type = heuristics.detect_directory_listing(&parsed);
+        assert!(matches!(
+            dirlist_type.unwrap(),
+            DirListingType::TomCatOrPython
+        ));
+    }
+
+    #[test]
+    /// `detect_directory_listing` correctly identifies apache instances
+    fn detect_directory_listing_finds_apache() {
+        let html = "<title>index of /</title>";
+        let parsed = Html::parse_document(html);
+        let handles = Handles::for_testing(None, None);
+        let heuristics = HeuristicTests::new(Arc::new(handles.0));
+        let dirlist_type = heuristics.detect_directory_listing(&parsed);
+        assert!(matches!(dirlist_type.unwrap(), DirListingType::Apache));
+    }
+
+    #[test]
+    /// `detect_directory_listing` correctly identifies ASP.NET instances
+    fn detect_directory_listing_finds_asp_dot_net() {
+        let html = "<title>directory listing -- /</title>";
+        let parsed = Html::parse_document(html);
+        let handles = Handles::for_testing(None, None);
+        let heuristics = HeuristicTests::new(Arc::new(handles.0));
+        let dirlist_type = heuristics.detect_directory_listing(&parsed);
+        assert!(matches!(dirlist_type.unwrap(), DirListingType::AspDotNet));
+    }
+
+    #[test]
+    /// `detect_directory_listing` returns None when heuristic doesn't match
+    fn detect_directory_listing_returns_none_as_default() {
+        let html = "<title>derp listing -- /</title>";
+        let parsed = Html::parse_document(html);
+        let handles = Handles::for_testing(None, None);
+        let heuristics = HeuristicTests::new(Arc::new(handles.0));
+        let dirlist_type = heuristics.detect_directory_listing(&parsed);
+        assert!(dirlist_type.is_none());
+    }
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,5 +1,8 @@
+#![deny(clippy::all)]
+#![allow(clippy::mutex_atomic)]
 use anyhow::Result;
 use reqwest::StatusCode;
+use std::collections::HashSet;
 use tokio::{
    sync::mpsc::{UnboundedReceiver, UnboundedSender},
    task::JoinHandle,
@@ -26,6 +29,7 @@ mod macros;
 mod url;
 mod response;
 mod message;
+mod nlp;

 /// Alias for tokio::sync::mpsc::UnboundedSender<Command>
 pub(crate) type CommandSender = UnboundedSender<Command>;
@@ -39,6 +43,9 @@ pub(crate) type Joiner = JoinHandle<Result<()>>;
 /// Generic mpsc::unbounded_channel type to tidy up some code
 pub(crate) type FeroxChannel<T> = (UnboundedSender<T>, UnboundedReceiver<T>);

+/// Wrapper around the results of performing any kind of extraction against a target web page
+pub(crate) type ExtractionResult = HashSet<String>;
+
 /// Version pulled from Cargo.toml at compile time
 pub(crate) const VERSION: &str = env!("CARGO_PKG_VERSION");

@@ -48,6 +55,13 @@ pub const DEFAULT_OPEN_FILE_LIMIT: u64 = 8192;
 /// Default value used to determine near-duplicate web pages (equivalent to 95%)
 pub const SIMILARITY_THRESHOLD: u32 = 95;

+/// Default set of extensions to Ignore when auto-collecting extensions during scans
+pub(crate) const DEFAULT_IGNORED_EXTENSIONS: [&str; 38] = [
+    "tif", "tiff", "ico", "cur", "bmp", "webp", "svg", "png", "jpg", "jpeg", "jfif", "gif", "avif",
+    "apng", "pjpeg", "pjp", "mov", "wav", "mpg", "mpeg", "mp3", "mp4", "m4a", "m4p", "m4v", "ogg",
+    "webm", "ogv", "oga", "flac", "aac", "3gp", "css", "zip", "xls", "xml", "gz", "tgz",
+];
+
 /// Default wordlist to use when `-w|--wordlist` isn't specified and not `wordlist` isn't set
 /// in a [ferox-config.toml](constant.DEFAULT_CONFIG_NAME.html) config file.
 ///
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,3 +1,4 @@
+use std::io::stdin;
 use std::{
    env::args,
    fs::{create_dir, remove_file, File},
@@ -16,6 +17,7 @@ use tokio::{
 };
 use tokio_util::codec::{FramedRead, LinesCodec};

+use feroxbuster::scan_manager::ScanType;
 use feroxbuster::{
    banner::{Banner, UPDATE_URL},
    config::{Configuration, OutputLevel},
@@ -48,7 +50,11 @@ fn get_unique_words_from_wordlist(path: &str) -> Result<Arc<Vec<String>>> {

    let reader = BufReader::new(file);

-    let mut words = Vec::new();
+    // this empty string ensures that we call Requester::request with the base url, i.e.
+    // `http://localhost/` instead of going straight into `http://localhost/WORD.EXT`.
+    // for vanilla scans, it doesn't matter all that much, but it can be a significant difference
+    // when `-e` is used, depending on the content at the base url.
+    let mut words = vec![String::from("")];

    for line in reader.lines() {
        line.map(|result| {
@@ -70,21 +76,12 @@ fn get_unique_words_from_wordlist(path: &str) -> Result<Arc<Vec<String>>> {
 /// Determine whether it's a single url scan or urls are coming from stdin, then scan as needed
 async fn scan(targets: Vec<String>, handles: Arc<Handles>) -> Result<()> {
    log::trace!("enter: scan({:?}, {:?})", targets, handles);
-    // cloning an Arc is cheap (it's basically a pointer into the heap)
-    // so that will allow for cheap/safe sharing of a single wordlist across multi-target scans
-    // as well as additional directories found as part of recursion
-
-    let words = get_unique_words_from_wordlist(&handles.config.wordlist)?;
-
-    if words.len() == 0 {
-        bail!("Did not find any words in {}", handles.config.wordlist);
-    }

    let scanned_urls = handles.ferox_scans()?;

-    handles.send_scan_command(UpdateWordlist(words.clone()))?;
+    handles.send_scan_command(UpdateWordlist(handles.wordlist.clone()))?;

-    scanner::initialize(words.len(), handles.clone()).await?;
+    scanner::initialize(handles.wordlist.len(), handles.clone()).await?;

    // at this point, the stat thread's progress bar can be created; things that needed to happen
    // first:
@@ -103,7 +100,7 @@ async fn scan(targets: Vec<String>, handles: Arc<Handles>) -> Result<()> {
    if handles.config.resumed {
        // display what has already been completed
        scanned_urls.print_known_responses();
-        scanned_urls.print_completed_bars(words.len())?;
+        scanned_urls.print_completed_bars(handles.wordlist.len())?;
    }

    log::debug!("sending {:?} to be scanned as initial targets", targets);
@@ -138,8 +135,8 @@ async fn get_targets(handles: Arc<Handles>) -> Result<Vec<String>> {
            for scan in scans.iter() {
                // ferox_scans gets deserialized scans added to it at program start if --resume-from
                // is used, so scans that aren't marked complete still need to be scanned
-                if scan.is_complete() {
-                    // this one's already done, ignore it
+                if scan.is_complete() || matches!(scan.scan_type, ScanType::File) {
+                    // this one's already done, or it's not a directory, ignore it
                    continue;
                }

@@ -193,6 +190,18 @@ async fn wrapped_main(config: Arc<Configuration>) -> Result<()> {
        PROGRESS_BAR.join().unwrap();
    });

+    // cloning an Arc is cheap (it's basically a pointer into the heap)
+    // so that will allow for cheap/safe sharing of a single wordlist across multi-target scans
+    // as well as additional directories found as part of recursion
+    let words = get_unique_words_from_wordlist(&config.wordlist)?;
+
+    if words.len() <= 1 {
+        // the check is now <= 1 due to the initial empty string added in 2.6.0
+        // 1 -> empty wordlist
+        // 0 -> error
+        bail!("Did not find any words in {}", config.wordlist);
+    }
+
    // spawn all event handlers, expect back a JoinHandle and a *Handle to the specific event
    let (stats_task, stats_handle) = StatsHandler::initialize(config.clone());
    let (filters_task, filters_handle) = FiltersHandler::initialize();
@@ -205,6 +214,7 @@ async fn wrapped_main(config: Arc<Configuration>) -> Result<()> {
        filters_handle,
        out_handle,
        config.clone(),
+        words,
    ));

    let (scan_task, scan_handle) = ScanHandler::initialize(handles.clone());
@@ -494,9 +504,39 @@ fn main() -> Result<()> {
        .enable_all()
        .build()
    {
-        let future = wrapped_main(config);
+        let future = wrapped_main(config.clone());
        if let Err(e) = runtime.block_on(future) {
            eprintln!("{}", e);
+
+            // the code below is to facilitate testing tests/test_banner entries. Since it's an
+            // integration test, normal test detection (cfg!(test), etc...) won't work. So, in
+            // the tests themselves, we pass
+            // `--wordlist /definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676`
+            // and look for that here to print the banner.
+            //
+            // this change became a necessity once we moved wordlist parsing out of `scan` and into
+            // `wrapped_main`.
+            if e.to_string()
+                .contains("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
+            {
+                // support the handful of tests that use `--stdin`
+                let targets: Vec<_> = if config.stdin {
+                    stdin().lock().lines().map(|tgt| tgt.unwrap()).collect()
+                } else {
+                    vec!["http://localhost".to_string()]
+                };
+
+                // print the banner to stderr
+                let std_stderr = stderr(); // std::io::stderr
+                let banner = Banner::new(&targets, &config);
+                if !config.quiet && !config.silent {
+                    banner.print_to(std_stderr, config).unwrap();
+                }
+            }
+
+            // if we've encountered an error before clean_up can be called (i.e. a wordlist error)
+            // we need to at least spin-down the progress bar
+            PROGRESS_PRINTER.finish();
        };
    }

--- a/src/message.rs
+++ b/src/message.rs
@@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize};
 use crate::traits::FeroxSerialize;
 use crate::utils::fmt_err;

-#[derive(Serialize, Deserialize, Default)]
+#[derive(Serialize, Deserialize, Default, Debug)]
 /// Representation of a log entry, can be represented as a human readable string or JSON
 pub struct FeroxMessage {
    #[serde(rename = "type")]
@@ -38,7 +38,7 @@ impl FeroxSerialize for FeroxMessage {
            "DEBUG" => ("DBG", Color::Yellow),
            "TRACE" => ("TRC", Color::Magenta),
            "WILDCARD" => ("WLD", Color::Cyan),
-            _ => ("UNK", Color::White),
+            _ => ("MSG", Color::White),
        };

        format!(
@@ -143,6 +143,6 @@ mod tests {
        assert!(console::strip_ansi_codes(&msg.as_str()).starts_with("WLD"));

        msg.level = "UNKNOWN".to_string();
-        assert!(console::strip_ansi_codes(&msg.as_str()).starts_with("UNK"));
+        assert!(console::strip_ansi_codes(&msg.as_str()).starts_with("MSG"));
    }
 }
--- a/src/nlp/constants.rs
+++ b/src/nlp/constants.rs
@@ -0,0 +1,334 @@
+use lazy_static::lazy_static;
+use regex::Regex;
+
+lazy_static! {
+    /// regular expression to match on words with numbers, underscores, and hyphens
+    pub(super) static ref BOUNDED_WORD_REGEX: Regex = Regex::new(r"\b[a-zA-Z0-9_-]+\b").unwrap();
+}
+
+/// collection of stop words from spaCy with small modifications
+pub(super) static STOP_WORDS: [&str; 323] = [
+    "'d",
+    "'ll",
+    "'m",
+    "'re",
+    "'s",
+    "'ve",
+    "a",
+    "about",
+    "above",
+    "across",
+    "after",
+    "afterwards",
+    "again",
+    "against",
+    "almost",
+    "alone",
+    "along",
+    "already",
+    "also",
+    "although",
+    "always",
+    "am",
+    "among",
+    "amongst",
+    "amount",
+    "an",
+    "and",
+    "another",
+    "any",
+    "anyhow",
+    "anyone",
+    "anything",
+    "anyway",
+    "anywhere",
+    "are",
+    "around",
+    "as",
+    "at",
+    "back",
+    "be",
+    "became",
+    "because",
+    "become",
+    "becomes",
+    "becoming",
+    "been",
+    "before",
+    "beforehand",
+    "behind",
+    "being",
+    "below",
+    "beside",
+    "besides",
+    "between",
+    "beyond",
+    "both",
+    "bottom",
+    "but",
+    "by",
+    "ca",
+    "call",
+    "can",
+    "cannot",
+    "could",
+    "did",
+    "do",
+    "does",
+    "doing",
+    "done",
+    "down",
+    "due",
+    "during",
+    "each",
+    "eight",
+    "either",
+    "eleven",
+    "else",
+    "elsewhere",
+    "empty",
+    "enough",
+    "even",
+    "ever",
+    "every",
+    "everyone",
+    "everything",
+    "everywhere",
+    "except",
+    "few",
+    "fifteen",
+    "fifty",
+    "first",
+    "five",
+    "for",
+    "former",
+    "formerly",
+    "forty",
+    "four",
+    "from",
+    "front",
+    "full",
+    "further",
+    "get",
+    "got",
+    "give",
+    "go",
+    "had",
+    "has",
+    "have",
+    "he",
+    "hence",
+    "her",
+    "here",
+    "hereafter",
+    "hereby",
+    "herein",
+    "hereupon",
+    "hers",
+    "herself",
+    "him",
+    "himself",
+    "his",
+    "how",
+    "however",
+    "hundred",
+    "i",
+    "if",
+    "in",
+    "indeed",
+    "into",
+    "is",
+    "it",
+    "its",
+    "itself",
+    "just",
+    "keep",
+    "last",
+    "latter",
+    "latterly",
+    "least",
+    "less",
+    "made",
+    "make",
+    "many",
+    "may",
+    "me",
+    "meanwhile",
+    "might",
+    "mine",
+    "more",
+    "moreover",
+    "most",
+    "mostly",
+    "move",
+    "much",
+    "must",
+    "my",
+    "myself",
+    "n't",
+    "name",
+    "namely",
+    "neither",
+    "never",
+    "nevertheless",
+    "next",
+    "nine",
+    "no",
+    "nobody",
+    "none",
+    "noone",
+    "nor",
+    "not",
+    "nothing",
+    "now",
+    "nowhere",
+    "n\u{2018}t",
+    "n\u{2019}t",
+    "of",
+    "off",
+    "often",
+    "on",
+    "once",
+    "one",
+    "only",
+    "onto",
+    "or",
+    "other",
+    "others",
+    "otherwise",
+    "our",
+    "ours",
+    "ourselves",
+    "out",
+    "over",
+    "own",
+    "part",
+    "per",
+    "perhaps",
+    "please",
+    "put",
+    "quite",
+    "rather",
+    "re",
+    "really",
+    "regarding",
+    "same",
+    "say",
+    "see",
+    "seem",
+    "seemed",
+    "seeming",
+    "seems",
+    "serious",
+    "several",
+    "she",
+    "should",
+    "side",
+    "since",
+    "six",
+    "sixty",
+    "so",
+    "some",
+    "somehow",
+    "someone",
+    "something",
+    "sometime",
+    "sometimes",
+    "somewhere",
+    "still",
+    "such",
+    "take",
+    "ten",
+    "than",
+    "that",
+    "the",
+    "their",
+    "them",
+    "themselves",
+    "then",
+    "thence",
+    "there",
+    "thereafter",
+    "thereby",
+    "therefore",
+    "therein",
+    "thereupon",
+    "these",
+    "they",
+    "third",
+    "this",
+    "those",
+    "though",
+    "three",
+    "through",
+    "throughout",
+    "thru",
+    "thus",
+    "to",
+    "together",
+    "too",
+    "toward",
+    "towards",
+    "twelve",
+    "twenty",
+    "two",
+    "under",
+    "unless",
+    "until",
+    "up",
+    "upon",
+    "used",
+    "using",
+    "various",
+    "very",
+    "via",
+    "was",
+    "we",
+    "well",
+    "were",
+    "what",
+    "whatever",
+    "when",
+    "whence",
+    "whenever",
+    "where",
+    "whereafter",
+    "whereas",
+    "whereby",
+    "wherein",
+    "whereupon",
+    "wherever",
+    "whether",
+    "which",
+    "while",
+    "whither",
+    "who",
+    "whoever",
+    "whole",
+    "whom",
+    "whose",
+    "why",
+    "will",
+    "with",
+    "within",
+    "without",
+    "would",
+    "yet",
+    "you",
+    "your",
+    "yours",
+    "yourself",
+    "yourselves",
+    "\u{2018}d",
+    "\u{2018}ll",
+    "\u{2018}m",
+    "\u{2018}re",
+    "\u{2018}s",
+    "\u{2018}ve",
+    "\u{2019}d",
+    "\u{2019}ll",
+    "\u{2019}m",
+    "\u{2019}re",
+    "\u{2019}s",
+    "\u{2019}ve",
+];
--- a/src/nlp/document.rs
+++ b/src/nlp/document.rs
@@ -0,0 +1,223 @@
+use super::term::{Term, TermMetaData};
+use super::utils::preprocess;
+use scraper::{Html, Node, Selector};
+use std::collections::HashMap;
+
+/// data container representing a single document, in the nlp sense
+#[derive(Debug, Default)]
+pub(crate) struct Document {
+    /// collection of `Term`s and their associated metadata
+    terms: HashMap<Term, TermMetaData>,
+
+    /// number of terms contained within the document
+    number_of_terms: usize,
+}
+
+impl Document {
+    /// create a new `Document` from the given string
+    pub(super) fn new(text: &str) -> Self {
+        let mut document = Self::default();
+
+        let processed = preprocess(text);
+
+        document.number_of_terms += processed.len();
+
+        for normalized in processed {
+            if normalized.len() > 2 {
+                document.add_term(&normalized)
+            }
+        }
+        document
+    }
+
+    /// add a `Term` to the document if it's not already tracked, otherwise increment the number
+    /// of times the term has been seen
+    fn add_term(&mut self, word: &str) {
+        let term = Term::new(word);
+
+        let metadata = self.terms.entry(term).or_insert_with(TermMetaData::new);
+        *metadata.count_mut() += 1;
+    }
+
+    /// create a new `Document` from the given HTML string
+    pub(crate) fn from_html(raw_html: &str) -> Self {
+        let selector = Selector::parse("body").unwrap();
+
+        let html = Html::parse_document(raw_html);
+
+        let text = html
+            .select(&selector)
+            .next()
+            .unwrap()
+            .descendants()
+            .filter_map(|node| {
+                if !node.value().is_text() && !node.value().is_comment() {
+                    return None;
+                }
+
+                // have a Text||Comment node, trim whitespace to test for all whitespace stuff
+                let trimmed = if node.value().is_text() {
+                    node.value().as_text().unwrap().text.trim()
+                } else {
+                    node.value().as_comment().unwrap().comment.trim()
+                };
+
+                if trimmed.is_empty() {
+                    return None;
+                }
+
+                // found a non-empty Text||Comment node, need to check its parent to determine if
+                // it's a <script>||<style> tag. We're assuming text within a script||style tag is
+                // uninteresting
+
+                let parent = node.parent().unwrap().value();
+
+                if !parent.is_element() {
+                    return None;
+                }
+
+                // parent is an Element node, see if it's a <script> or <style>
+
+                if let Node::Element(element) = parent {
+                    if element.name() == "script" || element.name() == "style" {
+                        return None;
+                    }
+
+                    // at this point, we have a non-empty Text element with a non-script|style parent;
+                    // now we can return the trimmed up string
+                    return Some(format!("{} ", trimmed));
+                }
+
+                // not an Element node
+                None
+            })
+            .collect::<String>();
+
+        // call `new` to push the parsed html through the pre-processing pipeline and process all
+        // the words
+        Self::new(&text)
+    }
+
+    /// Log normalized weighting scheme for term frequency
+    pub(super) fn term_frequency(&self, term: &Term) -> f32 {
+        if let Some(metadata) = self.terms.get(term) {
+            metadata.count() as f32 / self.number_of_terms() as f32
+        } else {
+            0.0
+        }
+    }
+
+    /// immutable reference to the collection of terms and their metadata
+    pub(super) fn terms(&self) -> &HashMap<Term, TermMetaData> {
+        &self.terms
+    }
+
+    /// number of terms the current document knows about
+    fn number_of_terms(&self) -> usize {
+        self.number_of_terms
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    /// `Document::new` should preprocess text and generate a hashmap of `Term, TermMetadata`
+    fn nlp_document_creation_from_text() {
+        let doc = Document::new("The air quality in Singapore got worse on Wednesday.");
+
+        let expected_terms = ["air", "quality", "singapore", "worse", "wednesday"];
+
+        for expected in expected_terms {
+            let term = Term::new(expected);
+            assert!(doc.terms().contains_key(&term));
+            assert_eq!(doc.number_of_terms, 5);
+            assert_eq!(doc.terms().get(&term).unwrap().count(), 1);
+
+            // since term frequencies aren't calculated on `new`, document frequency is zero in
+            // addition to the empty term_frequencies slice
+            let empty: &[f32] = &[];
+            assert_eq!(doc.terms().get(&term).unwrap().term_frequencies(), empty);
+            assert_eq!(doc.terms().get(&term).unwrap().document_frequency(), 0);
+        }
+    }
+
+    #[test]
+    /// `Document::new` should preprocess html and generate a hashmap of `Term, TermMetadata`
+    fn nlp_document_creation_from_html() {
+        let empty = Document::from_html("<html></html>");
+        assert_eq!(empty.number_of_terms, 0);
+
+        let other_empty = Document::from_html("<html><body><p></p></body></html>");
+        assert_eq!(other_empty.number_of_terms, 0);
+
+        let third_empty = Document::from_html("<!DOCTYPE html><html><!DOCTYPE html><p></p></html>");
+        assert_eq!(third_empty.number_of_terms, 0);
+
+        // p tag for is_text check and comment for is_comment
+        let doc = Document::from_html(
+            "<html><body><p>The air quality in Singapore.</p><!--got worse on Wednesday--></body></html>",
+        );
+
+        let expected_terms = ["air", "quality", "singapore", "worse", "wednesday"];
+
+        for expected in expected_terms {
+            let term = Term::new(expected);
+            assert_eq!(doc.number_of_terms, 5);
+            assert!(doc.terms().contains_key(&term));
+            assert_eq!(doc.terms().get(&term).unwrap().count(), 1);
+
+            // since term frequencies aren't calculated on `new`, document frequency is zero in
+            // addition to the empty term_frequencies slice
+            let empty: &[f32] = &[];
+            assert_eq!(doc.terms().get(&term).unwrap().term_frequencies(), empty);
+            assert_eq!(doc.terms().get(&term).unwrap().document_frequency(), 0);
+        }
+    }
+
+    #[test]
+    /// simple check of the `term_frequency` function's return value
+    fn term_frequency_validation() {
+        let doc = Document::new("The air quality in Singapore got worse on Wednesday. Air Jordan.");
+
+        let air_freq = doc.term_frequency(&Term::new("air"));
+
+        let abs_diff = (air_freq - 0.2857143).abs();
+        assert!(abs_diff <= f32::EPSILON);
+
+        let non_existent = doc.term_frequency(&Term::new("derpatronic"));
+        assert_eq!(non_existent, 0.0);
+    }
+
+    #[test]
+    /// test accessors for correctness
+    fn document_accessor_test() {
+        let doc = Document::new("The air quality in Singapore got worse on Wednesday.");
+        let keys = doc.terms().keys().map(|key| key.raw()).collect::<Vec<_>>();
+
+        let expected = ["air", "quality", "singapore", "worse", "wednesday"];
+
+        assert_eq!(doc.number_of_terms(), 5);
+
+        for key in keys {
+            assert!(expected.contains(&key));
+        }
+    }
+
+    #[test]
+    /// ensure words in script/style tags aren't processed
+    fn document_creation_skips_script_and_style_tags() {
+        let html = "<body><script>The air quality</script><style>in Singapore</style><p>got worse on Wednesday.</p></body>";
+        let doc = Document::from_html(html);
+        let keys = doc.terms().keys().map(|key| key.raw()).collect::<Vec<_>>();
+
+        let expected = ["worse", "wednesday"];
+
+        assert_eq!(doc.number_of_terms(), 2);
+
+        for key in keys {
+            assert!(expected.contains(&key));
+        }
+    }
+}
--- a/src/nlp/mod.rs
+++ b/src/nlp/mod.rs
@@ -0,0 +1,10 @@
+//! small stand-alone tf-idf library, specifically designed for use in feroxbuster
+
+mod constants;
+mod document;
+mod model;
+mod term;
+mod utils;
+
+pub(crate) use self::document::Document;
+pub(crate) use self::model::TfIdf;
--- a/src/nlp/model.rs
+++ b/src/nlp/model.rs
@@ -0,0 +1,185 @@
+use super::document::Document;
+use super::term::{Term, TermMetaData};
+use super::utils::{inverse_document_frequency, tf_idf_score};
+use std::borrow::{Borrow, BorrowMut};
+use std::collections::HashMap;
+
+/// data container for the TF-IDF model
+#[derive(Debug, Default)]
+pub(crate) struct TfIdf {
+    /// collection of `Term`s and their associated metadata
+    terms: HashMap<Term, TermMetaData>,
+
+    /// number of documents processed by the model
+    num_documents: usize,
+}
+
+impl TfIdf {
+    /// create an empty TF-IDF model; must be populated with `add_document` prior to use
+    pub(crate) fn new() -> Self {
+        Self::default()
+    }
+
+    /// accessor method for the collection of `Term`s and `TermMetaData`
+    fn terms(&self) -> &HashMap<Term, TermMetaData> {
+        self.terms.borrow()
+    }
+
+    /// accessor method for the number of `Document`s the model has processed
+    pub(crate) fn num_documents(&self) -> usize {
+        self.num_documents
+    }
+
+    /// add a `Document` to the model
+    pub(crate) fn add_document(&mut self, document: Document) {
+        // increment number of docs seen, since we don't preserve the document itself; this needs
+        // to happen before calls to `self.inverse_document_frequency`, as it relies on the count
+        // being up to date
+        self.num_documents += 1;
+
+        for (term, doc_metadata) in document.terms().iter() {
+            // an incoming `Term` from a `Document` only has a valid `count` for that particular
+            // document; need to get the term frequency while both are known/valid
+            let term_frequency = document.term_frequency(term);
+
+            let metadata = self
+                .terms
+                .entry(term.clone())
+                .or_insert_with(|| doc_metadata.to_owned());
+
+            metadata.term_frequencies_mut().push(term_frequency);
+        }
+    }
+
+    /// (re)-calculate tf-idf scores for all terms, given the current number of documents
+    ///
+    /// # Notes
+    ///
+    /// old tf-idf scores are removed during calculations to keep new `Term`s at the same relative
+    /// level as new ones WRT corpus size
+    pub(crate) fn calculate_tf_idf_scores(&mut self) {
+        for metadata in self.terms.borrow_mut().values_mut() {
+            let num_frequencies = metadata.term_frequencies().len();
+
+            let mut to_add = Vec::with_capacity(num_frequencies);
+
+            for frequency in metadata.term_frequencies() {
+                let idf = inverse_document_frequency(
+                    self.num_documents as f32,
+                    metadata.document_frequency() as f32,
+                );
+
+                let score = tf_idf_score(*frequency, idf);
+                to_add.push(score);
+            }
+
+            let average: f32 = to_add.iter().sum::<f32>() / to_add.len() as f32;
+
+            *metadata.tf_idf_score_mut() = average;
+        }
+    }
+
+    /// select all terms with a non-zero tf-idf score
+    pub(crate) fn all_words(&self) -> Vec<String> {
+        self.terms()
+            .iter()
+            .filter(|(_, metadata)| metadata.tf_idf_score() > 0.0)
+            .map(|(term, _)| term.raw().to_owned())
+            .collect()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    /// helper for this test suite
+    fn get_score(word: &str, model: &TfIdf) -> f32 {
+        model.terms().get(&Term::new(word)).unwrap().tf_idf_score()
+    }
+
+    #[test]
+    /// given the example data at https://remykarem.github.io/tfidf-demo/, ensure the model
+    /// produces the same results
+    fn model_generates_expected_tf_idf_scores() {
+        let one = "Air quality in the sunny island improved gradually throughout Wednesday.";
+        let two =
+            "Air quality in Singapore on Wednesday continued to get worse as haze hit the island.";
+        let three = "The air quality in Singapore is monitored through a network of air monitoring stations located in different parts of the island";
+        let four = "The air quality in Singapore got worse on Wednesday.";
+
+        let docs = [one, two, three, four];
+        let mut model = TfIdf::new();
+
+        for doc in docs.iter() {
+            let d = Document::new(doc);
+            model.add_document(d);
+        }
+
+        assert_eq!(model.terms().len(), 19);
+
+        model.calculate_tf_idf_scores();
+
+        assert_eq!(get_score("quality", &model), 0.0);
+        assert_eq!(get_score("air", &model), 0.0);
+        assert_eq!(get_score("wednesday", &model), 0.018906077);
+        assert_eq!(get_score("island", &model), 0.014047348);
+        assert_eq!(get_score("singapore", &model), 0.016427131);
+        assert_eq!(get_score("sunny", &model), 0.08600858);
+        assert_eq!(get_score("monitoring", &model), 0.05017167);
+        assert_eq!(get_score("stations", &model), 0.05017167);
+        assert_eq!(get_score("parts", &model), 0.05017167);
+        assert_eq!(get_score("haze", &model), 0.06689556);
+        assert_eq!(get_score("hit", &model), 0.06689556);
+        assert_eq!(get_score("worse", &model), 0.04682689);
+    }
+
+    #[test]
+    /// given the example data at https://remykarem.github.io/tfidf-demo/, ensure the model
+    /// produces the same results
+    fn select_n_words_grabs_correct_words() {
+        let one = "Air quality in the sunny island improved gradually throughout Wednesday.";
+        let two =
+            "Air quality in Singapore on Wednesday continued to get worse as haze hit the island.";
+        let three = "The air quality in Singapore is monitored through a network of air monitoring stations located in different parts of the island";
+        let four = "The air quality in Singapore got worse on Wednesday.";
+
+        let docs = [one, two, three, four];
+        let mut model = TfIdf::new();
+
+        for doc in docs.iter() {
+            let d = Document::new(doc);
+            model.add_document(d);
+        }
+
+        assert_eq!(model.num_documents(), 4);
+
+        model.calculate_tf_idf_scores();
+
+        let non_zero_words = model.all_words();
+
+        [
+            "gradually",
+            "network",
+            "hit",
+            "located",
+            "continued",
+            "island",
+            "worse",
+            "monitored",
+            "monitoring",
+            "haze",
+            "different",
+            "stations",
+            "sunny",
+            "singapore",
+            "improved",
+            "parts",
+            "wednesday",
+        ]
+        .iter()
+        .for_each(|word| {
+            assert!(non_zero_words.contains(&word.to_string()));
+        });
+    }
+}
--- a/src/nlp/term.rs
+++ b/src/nlp/term.rs
@@ -0,0 +1,105 @@
+use std::borrow::BorrowMut;
+
+/// single word term for text processing
+#[derive(Debug, Hash, Eq, PartialEq, Default, Clone)]
+pub(crate) struct Term {
+    /// underlying string that the term represents
+    raw: String,
+}
+
+impl Term {
+    /// given a word, create a new `Term`
+    pub(super) fn new(word: &str) -> Self {
+        Self {
+            raw: word.to_owned(),
+        }
+    }
+
+    /// return a reference to the underlying string
+    pub(super) fn raw(&self) -> &str {
+        &self.raw
+    }
+}
+
+/// metadata to be associated with a `Term`
+#[derive(Debug, Clone, Default)]
+pub(super) struct TermMetaData {
+    /// number of times the associated `Term` was seen in a single document
+    count: u32,
+
+    /// collection of term frequencies for the associated `Term`
+    term_frequencies: Vec<f32>,
+
+    /// tf-idf score for the associated `Term`
+    tf_idf_score: f32,
+}
+
+impl TermMetaData {
+    /// create a new metadata container
+    pub(super) fn new() -> Self {
+        Self::default()
+    }
+
+    /// number of times a `Term` has appeared in any `Document` within the corpus
+    pub(super) fn document_frequency(&self) -> usize {
+        self.term_frequencies().len()
+    }
+
+    /// mutable reference to the collection of term frequencies
+    pub(super) fn term_frequencies_mut(&mut self) -> &mut Vec<f32> {
+        self.term_frequencies.borrow_mut()
+    }
+
+    /// immutable reference to the collection of term frequencies
+    pub(super) fn term_frequencies(&self) -> &[f32] {
+        &self.term_frequencies
+    }
+
+    /// mutable reference to the number of times a `Term` was seen in a particular `Document`
+    pub(super) fn count_mut(&mut self) -> &mut u32 {
+        self.count.borrow_mut()
+    }
+
+    /// number of times a `Term` was seen in a particular `Document`
+    pub(super) fn count(&self) -> u32 {
+        self.count
+    }
+
+    /// mutable reference to the term's tf-idf score
+    pub(super) fn tf_idf_score_mut(&mut self) -> &mut f32 {
+        self.tf_idf_score.borrow_mut()
+    }
+
+    /// immutable reference to the term's tf-idf score
+    pub(super) fn tf_idf_score(&self) -> f32 {
+        self.tf_idf_score
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    /// test accessors for correctness
+    fn nlp_term_accessor_test() {
+        let term = Term::new("stuff");
+        assert_eq!(term.raw(), "stuff");
+    }
+
+    #[test]
+    /// test accessors for correctness
+    fn nlp_term_metadata_accessor_test() {
+        let mut metadata = TermMetaData::new();
+
+        *metadata.count_mut() += 1;
+        assert_eq!(metadata.count(), 1);
+
+        metadata.term_frequencies_mut().push(1.0);
+        assert_eq!(metadata.document_frequency(), 1);
+        assert_eq!(metadata.term_frequencies().first().unwrap(), &1.0);
+
+        *metadata.tf_idf_score_mut() = 1.0_f32;
+        assert_eq!(metadata.tf_idf_score(), 1.0);
+    }
+}
--- a/src/nlp/utils.rs
+++ b/src/nlp/utils.rs
@@ -0,0 +1,158 @@
+use super::constants::{BOUNDED_WORD_REGEX, STOP_WORDS};
+use regex::Captures;
+use std::borrow::Cow;
+
+/// pre-processing pipeline wrapper that removes punctuation, normalizes word case (utf-8 included)
+/// to lowercase, and remove stop words
+pub(super) fn preprocess(text: &str) -> Vec<String> {
+    let text = remove_punctuation(text);
+    let text = normalize_case(text);
+    let text = remove_stop_words(&text);
+
+    text.split_whitespace()
+        .map(|word| word.to_string())
+        .collect::<Vec<_>>()
+}
+
+/// optimized version of `str::to_lowercase`
+fn normalize_case<'a, S: Into<Cow<'a, str>>>(input: S) -> Cow<'a, str> {
+    let input = input.into();
+
+    let first = input.find(char::is_uppercase);
+
+    if let Some(first_idx) = first {
+        let mut output = String::from(&input[..first_idx]);
+        output.reserve(input.len() - first_idx);
+
+        for c in input[first_idx..].chars() {
+            if c.is_uppercase() {
+                output.push(c.to_lowercase().next().unwrap())
+            } else {
+                output.push(c)
+            }
+        }
+
+        Cow::Owned(output)
+    } else {
+        input
+    }
+}
+
+/// remove ascii and some utf-8 punctuation characters from the given string
+fn remove_punctuation(text: &str) -> String {
+    // non-separator type chars can be replaced with an empty string, while separators are replaced
+    // with a space. This attempts to keep things like
+    // 'aboutblogfaqcontactpresstermslexicondisclosure' from happening
+    text.replace(
+        [
+            '!', '\\', '"', '#', '$', '%', '&', '(', ')', '*', '+', ':', ';', '<', '=', '>', '?',
+            '@', '[', ']', '^', '{', '}', '|', '~', ',', '\'', '“', '”', '’', '‘', '’', '‘',
+        ],
+        "",
+    )
+    .replace(['/', '–', '—', '.'], " ")
+}
+
+/// remove stop words from the given string
+fn remove_stop_words(text: &str) -> String {
+    BOUNDED_WORD_REGEX
+        .replace_all(text, |caps: &Captures| {
+            let word = &caps[0];
+            if !STOP_WORDS.contains(&word) {
+                word.to_owned()
+            } else {
+                String::new()
+            }
+        })
+        .into()
+}
+
+/// calculate inverse document frequency
+pub(super) fn inverse_document_frequency(num_docs: f32, doc_frequency: f32) -> f32 {
+    f32::log10(num_docs / doc_frequency)
+}
+
+/// calculate term frequency-inverse document frequency (tf-idf)
+pub(super) fn tf_idf_score(term_frequency: f32, idf: f32) -> f32 {
+    term_frequency * idf
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    /// ensure all expected punctuation characters are removed
+    fn test_remove_punctuation() {
+        let tester = "!\\\"#$%&()*+/:;<=>?@[]^{}|~,.'“”’‘–—\n‘’";
+        // the `"    \n"` is because of the things like / getting replaced with a space
+        assert_eq!(remove_punctuation(tester), "    \n");
+    }
+
+    #[test]
+    /// ensure uppercase characters are swapped to lowercase
+    fn test_normalize_case() {
+        let tester = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+        assert_eq!(normalize_case(tester), "abcdefghijklmnopqrstuvwxyz");
+    }
+
+    #[test]
+    /// ensure all stop words are removed from the list of stopwords ... intestuous
+    fn test_remove_stopwords() {
+        let all_words = STOP_WORDS
+            .iter()
+            .map(|&word| word.to_string())
+            .collect::<Vec<_>>()
+            .join(" ");
+
+        let removed = remove_stop_words(&all_words).replace(' ', "");
+
+        // the remaining chars are from the contraction-based stop words
+        assert_eq!(removed, "'d'll'm''s'ven'tn‘tn’t‘d‘ll‘m‘‘s‘ve’d’ll’m’’s’ve");
+    }
+
+    #[test]
+    /// ensure preprocess
+    fn test_preprocess_results() {
+        let tester = "WHY are Y'all YELLing?";
+        assert_eq!(&preprocess(tester), &["yall", "yelling"]);
+    }
+
+    #[test]
+    /// ensure our calculations conform to the example provided at the link below
+    ///
+    /// https://www.kaggle.com/paulrohan2020/tf-idf-tutorial/notebook#TF-IDF-Model
+    ///
+    /// Consider a document containing 100 words wherein the word cat appears 3 times.
+    /// The term frequency (i.e., tf) for cat is then (3 / 100) = 0.03. Now, assume we have 10
+    /// million documents and the word cat appears in one thousand of these. Then, the inverse
+    /// document frequency (i.e., idf) is calculated as log(10,000,000 / 1,000) = 4. Thus, the
+    /// Tf-idf weight is the product of these quantities: 0.03 * 4 = 0.12.
+    fn idf_returns_expected_value() {
+        let num_docs = 10_000_000_f32;
+        let num_occurrences = 1_000_f32;
+        let abs_diff = (inverse_document_frequency(num_docs, num_occurrences) - 4.0).abs();
+
+        assert!(abs_diff <= f32::EPSILON);
+    }
+
+    #[test]
+    /// ensure our calculations conform to the example provided at the link below
+    ///
+    /// https://www.kaggle.com/paulrohan2020/tf-idf-tutorial/notebook#TF-IDF-Model
+    ///
+    /// Consider a document containing 100 words wherein the word cat appears 3 times.
+    /// The term frequency (i.e., tf) for cat is then (3 / 100) = 0.03. Now, assume we have 10
+    /// million documents and the word cat appears in one thousand of these. Then, the inverse
+    /// document frequency (i.e., idf) is calculated as log(10,000,000 / 1,000) = 4. Thus, the
+    /// Tf-idf weight is the product of these quantities: 0.03 * 4 = 0.12.
+    fn tf_idf_returns_expected_value() {
+        let term_freq = 0.03_f32;
+        let num_docs = 10_000_000_f32;
+        let num_occurrences = 1_000_f32;
+        let idf = inverse_document_frequency(num_docs, num_occurrences);
+        let abs_diff = (tf_idf_score(term_freq, idf) - 0.12).abs();
+
+        assert!(abs_diff <= f32::EPSILON);
+    }
+}
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -1,5 +1,5 @@
 use clap::{
-    crate_authors, crate_description, crate_name, crate_version, App, Arg, ArgGroup, ValueHint,
+    crate_authors, crate_description, crate_name, crate_version, Arg, ArgGroup, Command, ValueHint,
 };
 use lazy_static::lazy_static;
 use regex::Regex;
@@ -25,8 +25,8 @@ lazy_static! {
 }

 /// Create and return an instance of [clap::App](https://docs.rs/clap/latest/clap/struct.App.html), i.e. the Command Line Interface's configuration
-pub fn initialize() -> App<'static> {
-    let app = App::new(crate_name!())
+pub fn initialize() -> Command<'static> {
+    let app = Command::new(crate_name!())
        .version(crate_version!())
        .author(crate_authors!())
        .about(crate_description!());
@@ -42,7 +42,7 @@ pub fn initialize() -> App<'static> {
                .required_unless_present_any(&["stdin", "resume_from"])
                .help_heading("Target selection")
                .value_name("URL")
-                .use_delimiter(true)
+                .use_value_delimiter(true)
                .value_hint(ValueHint::Url)
                .help("The target URL (required, unless [--stdin || --resume-from] used)"),
        )
@@ -65,6 +65,36 @@ pub fn initialize() -> App<'static> {
                .takes_value(true),
        );

+    /////////////////////////////////////////////////////////////////////
+    // group - composite settings
+    /////////////////////////////////////////////////////////////////////
+    let app = app
+        .arg(
+            Arg::new("burp")
+                .long("burp")
+                .help_heading("Composite settings")
+                .conflicts_with_all(&["proxy", "insecure", "burp_replay"])
+                .help("Set --proxy to http://127.0.0.1:8080 and set --insecure to true"),
+        )
+        .arg(
+            Arg::new("burp_replay")
+                .long("burp-replay")
+                .help_heading("Composite settings")
+                .conflicts_with_all(&["replay_proxy", "insecure"])
+                .help("Set --replay-proxy to http://127.0.0.1:8080 and set --insecure to true"),
+        )
+        .arg(
+            Arg::new("smart")
+                .long("smart")
+                .help_heading("Composite settings")
+                .help("Set --extract-links, --auto-tune, --collect-words, and --collect-backups to true"),
+        ).arg(
+            Arg::new("thorough")
+                .long("thorough")
+                .help_heading("Composite settings")
+                .help("Use the same settings as --smart and set --collect-extensions to true"),
+        );
+
    /////////////////////////////////////////////////////////////////////
    // group - proxy settings
    /////////////////////////////////////////////////////////////////////
@@ -101,7 +131,7 @@ pub fn initialize() -> App<'static> {
                .takes_value(true)
                .multiple_values(true)
                .multiple_occurrences(true)
-                .use_delimiter(true)
+                .use_value_delimiter(true)
                .requires("replay_proxy")
                .help_heading("Proxy settings")
                .help(
@@ -138,7 +168,7 @@ pub fn initialize() -> App<'static> {
                .takes_value(true)
                .multiple_values(true)
                .multiple_occurrences(true)
-                .use_delimiter(true)
+                .use_value_delimiter(true)
                .help_heading("Request settings")
                .help(
                    "File extension(s) to search for (ex: -x php -x pdf js)",
@@ -152,7 +182,7 @@ pub fn initialize() -> App<'static> {
                .takes_value(true)
                .multiple_values(true)
                .multiple_occurrences(true)
-                .use_delimiter(true)
+                .use_value_delimiter(true)
                .help_heading("Request settings")
                .help(
                    "Which HTTP request method(s) should be sent (default: GET)",
@@ -177,7 +207,7 @@ pub fn initialize() -> App<'static> {
                .help_heading("Request settings")
                .multiple_values(true)
                .multiple_occurrences(true)
-                .use_delimiter(true)
+                .use_value_delimiter(true)
                .help(
                    "Specify HTTP headers to be used in each request (ex: -H Header:val -H 'stuff: things')",
                ),
@@ -190,7 +220,7 @@ pub fn initialize() -> App<'static> {
                .takes_value(true)
                .multiple_values(true)
                .multiple_occurrences(true)
-                .use_delimiter(true)
+                .use_value_delimiter(true)
                .help_heading("Request settings")
                .help(
                    "Specify HTTP cookies to be used in each request (ex: -b stuff=things)",
@@ -204,7 +234,7 @@ pub fn initialize() -> App<'static> {
                .takes_value(true)
                .multiple_values(true)
                .multiple_occurrences(true)
-                .use_delimiter(true)
+                .use_value_delimiter(true)
                .help_heading("Request settings")
                .help(
                    "Request's URL query parameters (ex: -Q token=stuff -Q secret=key)",
@@ -229,7 +259,7 @@ pub fn initialize() -> App<'static> {
            .takes_value(true)
            .multiple_values(true)
            .multiple_occurrences(true)
-            .use_delimiter(true)
+            .use_value_delimiter(true)
            .help_heading("Request filters")
            .help("URL(s) or Regex Pattern(s) to exclude from recursion/scans"),
    );
@@ -246,7 +276,7 @@ pub fn initialize() -> App<'static> {
                .takes_value(true)
                .multiple_values(true)
                .multiple_occurrences(true)
-                .use_delimiter(true)
+                .use_value_delimiter(true)
                .help_heading("Response filters")
                .help(
                    "Filter out messages of a particular size (ex: -S 5120 -S 4927,1970)",
@@ -260,7 +290,7 @@ pub fn initialize() -> App<'static> {
                .takes_value(true)
                .multiple_values(true)
                .multiple_occurrences(true)
-                .use_delimiter(true)
+                .use_value_delimiter(true)
                .help_heading("Response filters")
                .help(
                    "Filter out messages via regular expression matching on the response's body (ex: -X '^ignore me$')",
@@ -274,7 +304,7 @@ pub fn initialize() -> App<'static> {
                .takes_value(true)
                .multiple_values(true)
                .multiple_occurrences(true)
-                .use_delimiter(true)
+                .use_value_delimiter(true)
                .help_heading("Response filters")
                .help(
                    "Filter out messages of a particular word count (ex: -W 312 -W 91,82)",
@@ -288,7 +318,7 @@ pub fn initialize() -> App<'static> {
                .takes_value(true)
                .multiple_values(true)
                .multiple_occurrences(true)
-                .use_delimiter(true)
+                .use_value_delimiter(true)
                .help_heading("Response filters")
                .help(
                    "Filter out messages of a particular line count (ex: -N 20 -N 31,30)",
@@ -302,7 +332,7 @@ pub fn initialize() -> App<'static> {
                .takes_value(true)
                .multiple_values(true)
                .multiple_occurrences(true)
-                .use_delimiter(true)
+                .use_value_delimiter(true)
                .help_heading("Response filters")
                .help(
                    "Filter out status codes (deny list) (ex: -C 200 -C 401)",
@@ -316,7 +346,7 @@ pub fn initialize() -> App<'static> {
                .multiple_values(true)
                .multiple_occurrences(true)
                .value_hint(ValueHint::Url)
-                .use_delimiter(true)
+                .use_value_delimiter(true)
                .help_heading("Response filters")
                .help(
                    "Filter out pages that are similar to the given page (ex. --filter-similar-to http://site.xyz/soft404)",
@@ -330,7 +360,7 @@ pub fn initialize() -> App<'static> {
                .takes_value(true)
                .multiple_values(true)
                .multiple_occurrences(true)
-                .use_delimiter(true)
+                .use_value_delimiter(true)
                .help_heading("Response filters")
                .help(
                    "Status Codes to include (allow list) (default: 200 204 301 302 307 308 401 403 405)",
@@ -470,6 +500,40 @@ pub fn initialize() -> App<'static> {
                .takes_value(false)
                .help_heading("Scan settings")
                .help("Don't auto-filter wildcard responses")
+        ).arg(
+            Arg::new("collect_extensions")
+                .short('E')
+                .long("collect-extensions")
+                .takes_value(false)
+                .help_heading("Dynamic collection settings")
+                .help("Automatically discover extensions and add them to --extensions (unless they're in --dont-collect)")
+        ).arg(
+            Arg::new("collect_backups")
+                .short('B')
+                .long("collect-backups")
+                .takes_value(false)
+                .help_heading("Dynamic collection settings")
+                .help("Automatically request likely backup extensions for \"found\" urls")
+        ).arg(
+            Arg::new("collect_words")
+                .short('g')
+                .long("collect-words")
+                .takes_value(false)
+                .help_heading("Dynamic collection settings")
+                .help("Automatically discover important words from within responses and add them to the wordlist")
+        ).arg(
+            Arg::new("dont_collect")
+                .short('I')
+                .long("dont-collect")
+                .value_name("FILE_EXTENSION")
+                .takes_value(true)
+                .multiple_values(true)
+                .multiple_occurrences(true)
+                .use_value_delimiter(true)
+                .help_heading("Dynamic collection settings")
+                .help(
+                    "File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)",
+                ),
        );

    /////////////////////////////////////////////////////////////////////
@@ -527,6 +591,13 @@ pub fn initialize() -> App<'static> {
                .help_heading("Output settings")
                .help("Output file to write log entries (use w/ --json for JSON entries)")
                .takes_value(true),
+        )
+        .arg(
+            Arg::new("no_state")
+                .long("no-state")
+                .takes_value(false)
+                .help_heading("Output settings")
+                .help("Disable state output file (*.state)")
        );

    /////////////////////////////////////////////////////////////////////
--- a/src/response.rs
+++ b/src/response.rs
@@ -60,6 +60,9 @@ pub struct FeroxResponse {

    /// whether the user passed --quiet|--silent on the command line
    pub(crate) output_level: OutputLevel,
+
+    /// Url's file extension, if one exists
+    pub(crate) extension: Option<String>,
 }

 /// implement Default trait for FeroxResponse
@@ -78,6 +81,7 @@ impl Default for FeroxResponse {
            headers: Default::default(),
            wildcard: false,
            output_level: Default::default(),
+            extension: None,
        }
    }
 }
@@ -205,7 +209,6 @@ impl FeroxResponse {
        response: Response,
        original_url: &str,
        method: &str,
-        read_body: bool,
        output_level: OutputLevel,
    ) -> Self {
        let url = response.url().clone();
@@ -213,21 +216,12 @@ impl FeroxResponse {
        let headers = response.headers().clone();
        let content_length = response.content_length().unwrap_or(0);

-        let text = if read_body {
-            // .text() consumes the response, must be called last
-            // additionally, --extract-links is currently the only place we use the body of the
-            // response, so we forego the processing if not performing extraction
-            match response.text().await {
-                // await the response's body
-                Ok(text) => text,
-                Err(e) => {
-                    log::warn!("Could not parse body from response: {}", e);
-                    String::new()
-                }
-            }
-        } else {
-            String::new()
-        };
+        // .text() consumes the response, must be called last
+        let text = response
+            .text()
+            .await
+            .with_context(|| "Could not parse body from response")
+            .unwrap_or_default();

        let line_count = text.lines().count();
        let word_count = text.lines().map(|s| s.split_whitespace().count()).sum();
@@ -244,9 +238,65 @@ impl FeroxResponse {
            word_count,
            output_level,
            wildcard: false,
+            extension: None,
        }
    }

+    /// if --collect-extensions is used, examine the response's url and grab the file's extension
+    /// if one is available to be grabbed. If an extension is found, send it to the ScanHandler
+    /// for further processing
+    pub(crate) fn parse_extension(&mut self, handles: Arc<Handles>) -> Result<()> {
+        log::trace!("enter: parse_extension");
+
+        if !handles.config.collect_extensions {
+            // early return, --collect-extensions not used
+            return Ok(());
+        }
+
+        // path_segments:
+        //   Return None for cannot-be-a-base URLs.
+        //   When Some is returned, the iterator always contains at least one string
+        //     (which may be empty).
+        //
+        // meaning: the two unwraps here are fine, the worst outcome is an empty string
+        let filename = self.url.path_segments().unwrap().last().unwrap();
+
+        if !filename.is_empty() {
+            // non-empty string, try to get extension
+            let parts: Vec<_> = filename
+                .split('.')
+                // keep things like /.bash_history from becoming an extension
+                .filter(|part| !part.is_empty())
+                .collect();
+
+            if parts.len() > 1 {
+                // filename + at least one extension, i.e. whatever.js becomes ["whatever", "js"]
+                self.extension = Some(parts.last().unwrap().to_string())
+            }
+        }
+
+        if let Some(extension) = &self.extension {
+            if handles
+                .config
+                .status_codes
+                .contains(&self.status().as_u16())
+            {
+                // only add extensions to those responses that pass our checks; filtered out
+                // status codes are handled by should_filter, but we need to still check against
+                // the allow list for what we want to keep
+                #[cfg(test)]
+                handles
+                    .send_scan_command(Command::AddDiscoveredExtension(extension.to_owned()))
+                    .unwrap_or_default();
+                #[cfg(not(test))]
+                handles.send_scan_command(Command::AddDiscoveredExtension(extension.to_owned()))?;
+            }
+        }
+
+        log::trace!("exit: parse_extension");
+        Ok(())
+    }
+
    /// Helper function that determines if the configured maximum recursion depth has been reached
    ///
    /// Essentially looks at the Url path and determines how many directories are present in the
@@ -484,6 +534,10 @@ impl Serialize for FeroxResponse {
        state.serialize_field("line_count", &self.line_count)?;
        state.serialize_field("word_count", &self.word_count)?;
        state.serialize_field("headers", &headers)?;
+        state.serialize_field(
+            "extension",
+            self.extension.as_ref().unwrap_or(&String::new()),
+        )?;

        state.end()
    }
@@ -508,6 +562,7 @@ impl<'de> Deserialize<'de> for FeroxResponse {
            output_level: Default::default(),
            line_count: 0,
            word_count: 0,
+            extension: None,
        };

        let map: HashMap<String, Value> = HashMap::deserialize(deserializer)?;
@@ -576,6 +631,11 @@ impl<'de> Deserialize<'de> for FeroxResponse {
                        response.wildcard = result;
                    }
                }
+                "extension" => {
+                    if let Some(result) = value.as_str() {
+                        response.extension = Some(result.to_string());
+                    }
+                }
                _ => {}
            }
        }
@@ -587,6 +647,8 @@ impl<'de> Deserialize<'de> for FeroxResponse {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use crate::config::Configuration;
+    use std::default::Default;

    #[test]
    /// call reached_max_depth with max depth of zero, which is infinite recursion, expect false
@@ -595,16 +657,7 @@ mod tests {
        let url = Url::parse("http://localhost").unwrap();
        let response = FeroxResponse {
            url,
-            original_url: String::new(),
-            status: Default::default(),
-            method: Default::default(),
-            text: "".to_string(),
-            content_length: 0,
-            line_count: 0,
-            word_count: 0,
-            headers: Default::default(),
-            wildcard: false,
-            output_level: Default::default(),
+            ..Default::default()
        };
        let result = response.reached_max_depth(0, 0, handles);
        assert!(!result);
@@ -618,16 +671,7 @@ mod tests {
        let url = Url::parse("http://localhost/one/two").unwrap();
        let response = FeroxResponse {
            url,
-            original_url: String::new(),
-            status: Default::default(),
-            method: Default::default(),
-            text: "".to_string(),
-            content_length: 0,
-            line_count: 0,
-            word_count: 0,
-            headers: Default::default(),
-            wildcard: false,
-            output_level: Default::default(),
+            ..Default::default()
        };

        let result = response.reached_max_depth(0, 2, handles);
@@ -641,16 +685,7 @@ mod tests {
        let url = Url::parse("http://localhost").unwrap();
        let response = FeroxResponse {
            url,
-            original_url: String::new(),
-            status: Default::default(),
-            method: Default::default(),
-            text: "".to_string(),
-            content_length: 0,
-            line_count: 0,
-            word_count: 0,
-            headers: Default::default(),
-            wildcard: false,
-            output_level: Default::default(),
+            ..Default::default()
        };

        let result = response.reached_max_depth(0, 2, handles);
@@ -664,16 +699,7 @@ mod tests {
        let url = Url::parse("http://localhost/one/two").unwrap();
        let response = FeroxResponse {
            url,
-            original_url: String::new(),
-            status: Default::default(),
-            method: Default::default(),
-            text: "".to_string(),
-            content_length: 0,
-            line_count: 0,
-            word_count: 0,
-            headers: Default::default(),
-            wildcard: false,
-            output_level: Default::default(),
+            ..Default::default()
        };

        let result = response.reached_max_depth(2, 2, handles);
@@ -687,19 +713,71 @@ mod tests {
        let url = Url::parse("http://localhost/one/two/three").unwrap();
        let response = FeroxResponse {
            url,
-            original_url: String::new(),
-            status: Default::default(),
-            method: Default::default(),
-            text: "".to_string(),
-            content_length: 0,
-            line_count: 0,
-            word_count: 0,
-            headers: Default::default(),
-            wildcard: false,
-            output_level: Default::default(),
+            ..Default::default()
        };

        let result = response.reached_max_depth(0, 2, handles);
        assert!(result);
    }
+
+    #[test]
+    /// simple case of a single extension gets parsed correctly and stored on the `FeroxResponse`
+    fn parse_extension_finds_simple_extension() {
+        let config = Configuration {
+            collect_extensions: true,
+            ..Default::default()
+        };
+
+        let (handles, _) = Handles::for_testing(None, Some(Arc::new(config)));
+
+        let url = Url::parse("http://localhost/derp.js").unwrap();
+
+        let mut response = FeroxResponse {
+            url,
+            ..Default::default()
+        };
+
+        response.parse_extension(Arc::new(handles)).unwrap();
+
+        assert_eq!(response.extension, Some(String::from("js")));
+    }
+
+    #[test]
+    /// hidden files shouldn't be parsed as extensions, i.e. `/.bash_history`
+    fn parse_extension_ignores_hidden_files() {
+        let config = Configuration {
+            collect_extensions: true,
+            ..Default::default()
+        };
+
+        let (handles, _) = Handles::for_testing(None, Some(Arc::new(config)));
+
+        let url = Url::parse("http://localhost/.bash_history").unwrap();
+
+        let mut response = FeroxResponse {
+            url,
+            ..Default::default()
+        };
+
+        response.parse_extension(Arc::new(handles)).unwrap();
+
+        assert_eq!(response.extension, None);
+    }
+
+    #[test]
+    /// `parse_extension` should return immediately if `--collect-extensions` isn't used
+    fn parse_extension_early_returns_based_on_config() {
+        let (handles, _) = Handles::for_testing(None, None);
+
+        let url = Url::parse("http://localhost/derp.js").unwrap();
+
+        let mut response = FeroxResponse {
+            url,
+            ..Default::default()
+        };
+
+        response.parse_extension(Arc::new(handles)).unwrap();
+
+        assert_eq!(response.extension, None);
+    }
 }
--- a/src/scan_manager/scan.rs
+++ b/src/scan_manager/scan.rs
@@ -33,7 +33,7 @@ pub struct FeroxScan {
    pub(super) url: String,

    /// The type of scan
-    pub(super) scan_type: ScanType,
+    pub scan_type: ScanType,

    /// The order in which the scan was received
    pub(crate) scan_order: ScanOrder,
@@ -42,7 +42,7 @@ pub struct FeroxScan {
    pub(super) num_requests: u64,

    /// Status of this scan
-    pub(super) status: Mutex<ScanStatus>,
+    pub status: Mutex<ScanStatus>,

    /// The spawned tokio task performing this scan (uses tokio::sync::Mutex)
    pub(super) task: sync::Mutex<Option<JoinHandle<()>>>,
--- a/src/scan_manager/scan_container.rs
+++ b/src/scan_manager/scan_container.rs
@@ -13,6 +13,7 @@ use anyhow::Result;
 use reqwest::StatusCode;
 use serde::{ser::SerializeSeq, Serialize, Serializer};
 use std::{
+    collections::HashSet,
    convert::TryInto,
    fs::File,
    io::BufReader,
@@ -47,6 +48,9 @@ pub struct FeroxScans {

    /// whether or not the user passed --silent|--quiet on the command line
    output_level: OutputLevel,
+
+    /// vector of extensions discovered and collected during scans
+    pub(crate) collected_extensions: RwLock<HashSet<String>>,
 }

 /// Serialize implementation for FeroxScans
@@ -58,17 +62,20 @@ impl Serialize for FeroxScans {
    where
        S: Serializer,
    {
-        if let Ok(scans) = self.scans.read() {
-            let mut seq = serializer.serialize_seq(Some(scans.len()))?;
-            for scan in scans.iter() {
-                seq.serialize_element(&*scan).unwrap_or_default();
-            }
+        match self.scans.read() {
+            Ok(scans) => {
+                let mut seq = serializer.serialize_seq(Some(scans.len() + 1))?;

-            seq.end()
-        } else {
-            // if for some reason we can't unlock the RwLock, just write an empty list
-            let seq = serializer.serialize_seq(Some(0))?;
-            seq.end()
+                for scan in scans.iter() {
+                    seq.serialize_element(&*scan).unwrap_or_default();
+                }
+                seq.end()
+            }
+            Err(_) => {
+                // if for some reason we can't unlock the RwLock, just write an empty list
+                let seq = serializer.serialize_seq(Some(0))?;
+                seq.end()
+            }
        }
    }
 }
@@ -109,7 +116,7 @@ impl FeroxScans {
        sentry
    }

-    /// load serialized FeroxScan(s) into this FeroxScans  
+    /// load serialized FeroxScan(s) and any previously collected extensions into this FeroxScans  
    pub fn add_serialized_scans(&self, filename: &str) -> Result<()> {
        log::trace!("enter: add_serialized_scans({})", filename);
        let file = File::open(filename)?;
@@ -122,18 +129,31 @@ impl FeroxScans {
                for scan in arr_scans {
                    let mut deser_scan: FeroxScan =
                        serde_json::from_value(scan.clone()).unwrap_or_default();
+
                    // FeroxScans gets -q value from config as usual; the FeroxScans themselves
                    // rely on that value being passed in. If the user starts a scan without -q
                    // and resumes the scan but adds -q, FeroxScan will not have the proper value
                    // without the line below
                    deser_scan.output_level = self.output_level;

-                    log::debug!("added: {}", deser_scan);
                    self.insert(Arc::new(deser_scan));
                }
            }
        }

+        if let Some(extensions) = state.get("collected_extensions") {
+            if let Some(arr_exts) = extensions.as_array() {
+                if let Ok(mut guard) = self.collected_extensions.write() {
+                    for ext in arr_exts {
+                        let deser_ext: String =
+                            serde_json::from_value(ext.clone()).unwrap_or_default();
+
+                        guard.insert(deser_ext);
+                    }
+                }
+            }
+        }
+
        log::trace!("exit: add_serialized_scans");
        Ok(())
    }
@@ -163,8 +183,8 @@ impl FeroxScans {
        None
    }

-    pub(super) fn get_base_scan_by_url(&self, url: &str) -> Option<Arc<FeroxScan>> {
-        log::trace!("enter: get_sub_paths_from_path({})", url);
+    pub fn get_base_scan_by_url(&self, url: &str) -> Option<Arc<FeroxScan>> {
+        log::trace!("enter: get_base_scan_by_url({})", url);

        // rmatch_indices returns tuples in index, match form, i.e. (10, "/")
        // with the furthest-right match in the first position in the vector
@@ -188,14 +208,14 @@ impl FeroxScans {
                for scan in guard.iter() {
                    let slice = url.index(0..*idx);
                    if slice == scan.url || format!("{}/", slice).as_str() == scan.url {
-                        log::trace!("enter: get_sub_paths_from_path -> {}", scan);
+                        log::trace!("enter: get_base_scan_by_url -> {}", scan);
                        return Some(scan.clone());
                    }
                }
            }
        }

-        log::trace!("enter: get_sub_paths_from_path -> None");
+        log::trace!("enter: get_base_scan_by_url -> None");
        None
    }
    /// add one to either 403 or 429 tracker in the scan related to the given url
@@ -511,4 +531,67 @@ impl FeroxScans {
        }
        scans
    }
+
+    /// given an extension, add it to `collected_extensions` if all constraints are met
+    /// returns `true` if an extension was added, `false` otherwise
+    pub fn add_discovered_extension(&self, extension: String) -> bool {
+        log::trace!("enter: add_discovered_extension({})", extension);
+        let mut extension_added = false;
+
+        // note: the filter by --dont-collect happens in the event handler, since it has access
+        // to a Handles object form which it can check the config value. additionally, the check
+        // against --extensions is performed there for the same reason
+
+        if let Ok(extensions) = self.collected_extensions.read() {
+            // quicker to allow most to read and return and then reopen for write if necessary
+            if extensions.contains(&extension) {
+                return extension_added;
+            }
+        }
+
+        if let Ok(mut extensions) = self.collected_extensions.write() {
+            log::info!("discovered new extension: {}", extension);
+            extensions.insert(extension);
+            extension_added = true;
+        }
+
+        log::trace!("exit: add_discovered_extension -> {}", extension_added);
+        extension_added
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    /// unknown extension should be added to collected_extensions
+    fn unknown_extension_is_added_to_collected_extensions() {
+        let scans = FeroxScans::new(OutputLevel::Default);
+
+        assert_eq!(0, scans.collected_extensions.read().unwrap().len());
+
+        let added = scans.add_discovered_extension(String::from("js"));
+
+        assert!(added);
+        assert_eq!(1, scans.collected_extensions.read().unwrap().len());
+    }
+
+    #[test]
+    /// known extension should not be added to collected_extensions
+    fn known_extension_is_added_to_collected_extensions() {
+        let scans = FeroxScans::new(OutputLevel::Default);
+        scans
+            .collected_extensions
+            .write()
+            .unwrap()
+            .insert(String::from("js"));
+
+        assert_eq!(1, scans.collected_extensions.read().unwrap().len());
+
+        let added = scans.add_discovered_extension(String::from("js"));
+
+        assert!(!added);
+        assert_eq!(1, scans.collected_extensions.read().unwrap().len());
+    }
 }
--- a/src/scan_manager/state.rs
+++ b/src/scan_manager/state.rs
@@ -2,6 +2,7 @@ use super::*;
 use crate::{config::Configuration, statistics::Stats, traits::FeroxSerialize, utils::fmt_err};
 use anyhow::{Context, Result};
 use serde::Serialize;
+use std::collections::HashSet;
 use std::sync::Arc;

 /// Data container for (de)?serialization of multiple items
@@ -18,6 +19,9 @@ pub struct FeroxState {

    /// Gathered statistics
    statistics: Arc<Stats>,
+
+    /// collected extensions
+    collected_extensions: HashSet<String>,
 }

 /// implementation of FeroxState
@@ -29,11 +33,17 @@ impl FeroxState {
        responses: &'static FeroxResponses,
        statistics: Arc<Stats>,
    ) -> Self {
+        let collected_extensions = match scans.collected_extensions.read() {
+            Ok(extensions) => extensions.clone(),
+            Err(_) => HashSet::new(),
+        };
+
        Self {
            scans,
            config,
            responses,
            statistics,
+            collected_extensions,
        }
    }
 }
--- a/src/scan_manager/tests.rs
+++ b/src/scan_manager/tests.rs
@@ -303,7 +303,7 @@ fn ferox_scans_serialize() {
 #[test]
 /// given a FeroxResponses, test that it serializes into the proper JSON entry
 fn ferox_responses_serialize() {
-    let json_response = r#"{"type":"response","url":"https://nerdcore.com/css","original_url":"https://nerdcore.com","path":"/css","wildcard":true,"status":301,"method":"GET","content_length":173,"line_count":10,"word_count":16,"headers":{"server":"nginx/1.16.1"}}"#;
+    let json_response = r#"{"type":"response","url":"https://nerdcore.com/css","original_url":"https://nerdcore.com","path":"/css","wildcard":true,"status":301,"method":"GET","content_length":173,"line_count":10,"word_count":16,"headers":{"server":"nginx/1.16.1"},"extension":""}"#;
    let response: FeroxResponse = serde_json::from_str(json_response).unwrap();

    let responses = FeroxResponses::default();
@@ -321,7 +321,7 @@ fn ferox_responses_serialize() {
 /// given a FeroxResponse, test that it serializes into the proper JSON entry
 fn ferox_response_serialize_and_deserialize() {
    // deserialize
-    let json_response = r#"{"type":"response","url":"https://nerdcore.com/css","original_url":"https://nerdcore.com","path":"/css","wildcard":true,"status":301,"method":"GET","content_length":173,"line_count":10,"word_count":16,"headers":{"server":"nginx/1.16.1"}}"#;
+    let json_response = r#"{"type":"response","url":"https://nerdcore.com/css","original_url":"https://nerdcore.com","path":"/css","wildcard":true,"status":301,"method":"GET","content_length":173,"line_count":10,"word_count":16,"headers":{"server":"nginx/1.16.1"},"extension":""}"#;
    let response: FeroxResponse = serde_json::from_str(json_response).unwrap();

    assert_eq!(response.url().as_str(), "https://nerdcore.com/css");
@@ -351,33 +351,42 @@ fn feroxstates_feroxserialize_implementation() {
    );
    let ferox_scans = FeroxScans::default();
    let saved_id = ferox_scan.id.clone();
+
    ferox_scans.insert(ferox_scan);

-    let config = Configuration::new().unwrap();
+    ferox_scans
+        .collected_extensions
+        .write()
+        .unwrap()
+        .insert(String::from("php"));
+
+    let mut config = Configuration::new().unwrap();
+
+    config.collect_extensions = true;
+
    let stats = Arc::new(Stats::new(config.json));

-    let json_response = r#"{"type":"response","url":"https://nerdcore.com/css","path":"/css","wildcard":true,"status":301,"content_length":173,"line_count":10,"word_count":16,"headers":{"server":"nginx/1.16.1"}}"#;
+    let json_response = r#"{"type":"response","url":"https://nerdcore.com/css","path":"/css","wildcard":true,"status":301,"content_length":173,"line_count":10,"word_count":16,"headers":{"server":"nginx/1.16.1"},"extension":""}"#;
    let response: FeroxResponse = serde_json::from_str(json_response).unwrap();
    RESPONSES.insert(response);

-    let ferox_state = FeroxState::new(
-        Arc::new(ferox_scans),
-        Arc::new(Configuration::new().unwrap()),
-        &RESPONSES,
-        stats,
-    );
+    let ferox_state = FeroxState::new(Arc::new(ferox_scans), Arc::new(config), &RESPONSES, stats);

    let expected_strs = predicates::str::contains("scans: FeroxScans").and(
        predicate::str::contains("config: Configuration")
            .and(predicate::str::contains("responses: FeroxResponses"))
            .and(predicate::str::contains("nerdcore.com"))
            .and(predicate::str::contains("/css"))
-            .and(predicate::str::contains("https://spiritanimal.com")),
+            .and(predicate::str::contains("https://spiritanimal.com"))
+            .and(predicate::str::contains("php")),
    );

    assert!(expected_strs.eval(&ferox_state.as_str()));

    let json_state = ferox_state.as_json().unwrap();
+
+    println!("echo '{}'|jq", json_state); // for debugging, if the test fails, can see what's going on
+
    for expected in [
        r#""scans""#,
        &format!(r#""id":"{}""#, saved_id),
@@ -445,14 +454,17 @@ fn feroxstates_feroxserialize_implementation() {
        r#""word_count":16"#,
        r#""headers""#,
        r#""server":"nginx/1.16.1"#,
+        r#""collect_extensions":true"#,
+        r#""collect_backups":false"#,
+        r#""collect_words":false"#,
+        r#""collected_extensions":["php"]"#,
+        r#""dont_collect":["tif","tiff","ico","cur","bmp","webp","svg","png","jpg","jpeg","jfif","gif","avif","apng","pjpeg","pjp","mov","wav","mpg","mpeg","mp3","mp4","m4a","m4p","m4v","ogg","webm","ogv","oga","flac","aac","3gp","css","zip","xls","xml","gz","tgz"]"#,
    ]
    .iter()
    {
        assert!(
-            predicates::str::contains(*expected).eval(&json_state),
-            "{}",
-            expected
-        )
+            predicates::str::contains(*expected).eval(&json_state)
+        );
    }
 }

--- a/src/scanner/ferox_scanner.rs
+++ b/src/scanner/ferox_scanner.rs
@@ -1,19 +1,22 @@
+use std::sync::atomic::AtomicBool;
 use std::{ops::Deref, sync::atomic::Ordering, sync::Arc, time::Instant};

 use anyhow::{bail, Result};
 use console::style;
 use futures::{stream, StreamExt};
+use indicatif::ProgressBar;
 use lazy_static::lazy_static;
 use tokio::sync::Semaphore;

 use crate::{
    event_handlers::{
-        Command::{AddError, AddToF64Field, SubtractFromUsizeField},
+        Command::{AddError, AddToF64Field, AddToUsizeField, SubtractFromUsizeField},
        Handles,
    },
    extractor::{ExtractionTarget, ExtractorBuilder},
    heuristics,
-    scan_manager::{FeroxResponses, MenuCmdResult, ScanOrder, ScanStatus, PAUSE_SCAN},
+    scan_manager::{FeroxResponses, FeroxScans, MenuCmdResult, ScanOrder, ScanStatus, PAUSE_SCAN},
+    scanner::requester::TF_IDF,
    statistics::{
        StatError::Other,
        StatField::{DirScanTimes, TotalExpected},
@@ -29,6 +32,43 @@ lazy_static! {
    pub static ref RESPONSES: FeroxResponses = FeroxResponses::default();
    // todo consider removing this
 }
+
+/// check to see if `pause_flag` is set to true. when true; enter a busy loop that only exits
+/// by setting PAUSE_SCAN back to false
+async fn check_for_user_input(
+    pause_flag: &AtomicBool,
+    scanned_urls: Arc<FeroxScans>,
+    handles: Arc<Handles>,
+) {
+    log::trace!(
+        "enter: check_for_user_input({:?}, SCANNED_URLS, HANDLES)",
+        pause_flag
+    );
+
+    // todo write a test or two for this function at some point...
+    if pause_flag.load(Ordering::Acquire) {
+        match scanned_urls.pause(true).await {
+            Some(MenuCmdResult::Url(url)) => {
+                // user wants to add a new url to be scanned, need to send
+                // it over to the event handler for processing
+                handles
+                    .send_scan_command(Command::ScanNewUrl(url))
+                    .unwrap_or_else(|e| log::warn!("Could not add scan to scan queue: {}", e))
+            }
+            Some(MenuCmdResult::NumCancelled(num_canx)) => {
+                if num_canx > 0 {
+                    handles
+                        .stats
+                        .send(SubtractFromUsizeField(TotalExpected, num_canx))
+                        .unwrap_or_else(|e| log::warn!("Could not update overall scan bar: {}", e));
+                }
+            }
+            _ => {}
+        }
+    }
+    log::trace!("exit: check_for_user_input");
+}
+
 /// handles the main muscle movement of scanning a url
 pub struct FeroxScanner {
    /// handles to handlers and config
@@ -67,6 +107,57 @@ impl FeroxScanner {
        }
    }

+    /// produces and awaits tasks (mp of mpsc); responsible for making requests
+    async fn stream_requests(
+        &self,
+        looping_words: Arc<Vec<String>>,
+        progress_bar: ProgressBar,
+        scanned_urls: Arc<FeroxScans>,
+        requester: Arc<Requester>,
+    ) {
+        log::trace!("enter: stream_requests(params too verbose to print)");
+
+        let producers = stream::iter(looping_words.deref().to_owned())
+            .map(|word| {
+                let pb = progress_bar.clone(); // progress bar is an Arc around internal state
+                let scanned_urls_clone = scanned_urls.clone();
+                let requester_clone = requester.clone();
+                let handles_clone = self.handles.clone();
+                (
+                    tokio::spawn(async move {
+                        // for every word in the wordlist, check to see if user has pressed enter
+                        // in order to go into the interactive menu
+                        check_for_user_input(&PAUSE_SCAN, scanned_urls_clone, handles_clone).await;
+
+                        // after checking for user input, send the request
+                        requester_clone
+                            .request(&word)
+                            .await
+                            .unwrap_or_else(|e| log::warn!("Requester encountered an error: {}", e))
+                    }),
+                    pb,
+                )
+            })
+            .for_each_concurrent(self.handles.config.threads, |(resp, bar)| async move {
+                match resp.await {
+                    Ok(_) => {
+                        let increment_len = self.handles.expected_num_requests_multiplier() as u64;
+                        bar.inc(increment_len);
+                    }
+                    Err(e) => {
+                        log::warn!("error awaiting a response: {}", e);
+                        self.handles.stats.send(AddError(Other)).unwrap_or_default();
+                    }
+                }
+            });
+
+        // await tx tasks
+        log::trace!("awaiting scan producers");
+        producers.await;
+        log::trace!("done awaiting scan producers");
+        log::trace!("exit: stream_requests");
+    }
+
    /// Scan a given url using a given wordlist
    ///
    /// This is the primary entrypoint for the scanner
@@ -75,30 +166,17 @@ impl FeroxScanner {
        log::info!("Starting scan against: {}", self.target_url);

        let mut scan_timer = Instant::now();
-        let mut dirlist_flag = false;

-        if self.handles.config.extract_links {
-            // parse html for links (i.e. web scraping)
-            let extractor = ExtractorBuilder::default()
-                .target(ExtractionTarget::ParseHtml)
+        if self.handles.config.extract_links && matches!(self.order, ScanOrder::Initial) {
+            // check for robots.txt (cannot be in sub-directories, so limited to Initial)
+            let mut extractor = ExtractorBuilder::default()
+                .target(ExtractionTarget::RobotsTxt)
                .url(&self.target_url)
                .handles(self.handles.clone())
                .build()?;
-            let extract_out = extractor.extract().await?;
-            let links = extract_out.0;
-            dirlist_flag = extract_out.1;
-            extractor.request_links(links).await?;

-            if matches!(self.order, ScanOrder::Initial) {
-                // check for robots.txt (cannot be in subdirs)
-                let extractor = ExtractorBuilder::default()
-                    .target(ExtractionTarget::RobotsTxt)
-                    .url(&self.target_url)
-                    .handles(self.handles.clone())
-                    .build()?;
-                let links = (extractor.extract().await?).0;
-                extractor.request_links(links).await?;
-            }
+            let result = extractor.extract().await?;
+            extractor.request_links(result).await?;
        }

        let scanned_urls = self.handles.ferox_scans()?;
@@ -118,118 +196,115 @@ impl FeroxScanner {

        let progress_bar = ferox_scan.progress_bar();

-        // Directory listing heuristic detection to not continue scanning
-        if dirlist_flag {
-            log::trace!("exit: scan_url -> Directory listing heuristic");
-
-            self.handles.stats.send(AddToF64Field(
-                DirScanTimes,
-                scan_timer.elapsed().as_secs_f64(),
-            ))?;
-
-            self.handles.stats.send(SubtractFromUsizeField(
-                TotalExpected,
-                progress_bar.length() as usize,
-            ))?;
-
-            progress_bar.reset_eta();
-            progress_bar.finish_with_message(&format!(
-                "=> {}",
-                style("Directory listing").blue().bright()
-            ));
-
-            ferox_scan.finish()?;
-
-            return Ok(());
-        }
-
        // When acquire is called and the semaphore has remaining permits, the function immediately
        // returns a permit. However, if no remaining permits are available, acquire (asynchronously)
        // waits until an outstanding permit is dropped, at which point, the freed permit is assigned
        // to the caller.
        let _permit = self.scan_limiter.acquire().await;
+
        if self.handles.config.scan_limit > 0 {
            scan_timer = Instant::now();
            progress_bar.reset();
        }

-        // Arc clones to be passed around to the various scans
-        let looping_words = self.wordlist.clone();
-
        {
+            // heuristics test block
            let test = heuristics::HeuristicTests::new(self.handles.clone());
+
            if let Ok(num_reqs) = test.wildcard(&self.target_url).await {
                progress_bar.inc(num_reqs);
            }
+
+            if let Ok(dirlist_result) = test.directory_listing(&self.target_url).await {
+                if dirlist_result.is_some() {
+                    let dirlist_result = dirlist_result.unwrap();
+                    // at this point, we have a DirListingType, and it's not the None variant
+                    // which means we found directory listing based on the heuristic; now we need
+                    // to process the links that are available if --extract-links was used
+
+                    if self.handles.config.extract_links {
+                        let mut extractor = ExtractorBuilder::default()
+                            .response(&dirlist_result.response)
+                            .target(ExtractionTarget::DirectoryListing)
+                            .url(&self.target_url)
+                            .handles(self.handles.clone())
+                            .build()?;
+
+                        let result = extractor.extract_from_dir_listing().await?;
+
+                        extractor.request_links(result).await?;
+
+                        log::trace!("exit: scan_url -> Directory listing heuristic");
+
+                        self.handles.stats.send(AddToF64Field(
+                            DirScanTimes,
+                            scan_timer.elapsed().as_secs_f64(),
+                        ))?;
+
+                        self.handles.stats.send(SubtractFromUsizeField(
+                            TotalExpected,
+                            progress_bar.length() as usize,
+                        ))?;
+                    }
+
+                    let mut message = format!("=> {}", style("Directory listing").blue().bright());
+
+                    if !self.handles.config.extract_links {
+                        message
+                            .push_str(&format!(" (add {} to scan)", style("-e").bright().yellow()))
+                    }
+
+                    progress_bar.reset_eta();
+                    progress_bar.finish_with_message(&message);
+
+                    ferox_scan.finish()?;
+
+                    return Ok(());
+                }
+            }
        }

+        // Arc clones to be passed around to the various scans
+        let looping_words = self.wordlist.clone();
+
        let requester = Arc::new(Requester::from(self, ferox_scan.clone())?);
-        let increment_len =
-            ((self.handles.config.extensions.len() + 1) * self.handles.config.methods.len()) as u64;

-        // producer tasks (mp of mpsc); responsible for making requests
-        let producers = stream::iter(looping_words.deref().to_owned())
-            .map(|word| {
-                let pb = progress_bar.clone(); // progress bar is an Arc around internal state
-                let scanned_urls_clone = scanned_urls.clone();
-                let requester_clone = requester.clone();
-                let handles_clone = self.handles.clone();
-                (
-                    tokio::spawn(async move {
-                        if PAUSE_SCAN.load(Ordering::Acquire) {
-                            // for every word in the wordlist, check to see if PAUSE_SCAN is set to true
-                            // when true; enter a busy loop that only exits by setting PAUSE_SCAN back
-                            // to false
-                            match scanned_urls_clone.pause(true).await {
-                                Some(MenuCmdResult::Url(url)) => {
-                                    // user wants to add a new url to be scanned, need to send
-                                    // it over to the event handler for processing
-                                    handles_clone
-                                        .send_scan_command(Command::ScanNewUrl(url))
-                                        .unwrap_or_else(|e| {
-                                            log::warn!("Could not add scan to scan queue: {}", e)
-                                        })
-                                }
-                                Some(MenuCmdResult::NumCancelled(num_canx)) => {
-                                    if num_canx > 0 {
-                                        handles_clone
-                                            .stats
-                                            .send(SubtractFromUsizeField(TotalExpected, num_canx))
-                                            .unwrap_or_else(|e| {
-                                                log::warn!(
-                                                    "Could not update overall scan bar: {}",
-                                                    e
-                                                )
-                                            });
-                                    }
-                                }
-                                _ => {}
-                            }
-                        }
-                        requester_clone
-                            .request(&word)
-                            .await
-                            .unwrap_or_else(|e| log::warn!("Requester encountered an error: {}", e))
-                    }),
-                    pb,
-                )
-            })
-            .for_each_concurrent(self.handles.config.threads, |(resp, bar)| async move {
-                match resp.await {
-                    Ok(_) => {
-                        bar.inc(increment_len);
-                    }
-                    Err(e) => {
-                        log::warn!("error awaiting a response: {}", e);
-                        self.handles.stats.send(AddError(Other)).unwrap_or_default();
-                    }
-                }
-            });
+        self.stream_requests(
+            looping_words.clone(),
+            progress_bar.clone(),
+            scanned_urls.clone(),
+            requester.clone(),
+        )
+        .await;

-        // await tx tasks
-        log::trace!("awaiting scan producers");
-        producers.await;
-        log::trace!("done awaiting scan producers");
+        if self.handles.config.collect_words {
+            let new_words = TF_IDF.read().unwrap().all_words();
+            let new_words_len = new_words.len();
+
+            let cur_length = progress_bar.length();
+            let new_length = cur_length + new_words_len as u64;
+
+            progress_bar.set_length(new_length);
+
+            self.handles
+                .stats
+                .send(AddToUsizeField(TotalExpected, new_words.len()))
+                .unwrap_or_default();
+
+            log::info!(
+                "requesting {} collected words: {:?}...",
+                new_words_len,
+                &new_words[..new_words_len.min(3) as usize]
+            );
+
+            self.stream_requests(
+                Arc::new(new_words),
+                progress_bar.clone(),
+                scanned_urls.clone(),
+                requester.clone(),
+            )
+            .await;
+        }

        self.handles.stats.send(AddToF64Field(
            DirScanTimes,
--- a/src/scanner/init.rs
+++ b/src/scanner/init.rs
@@ -11,9 +11,7 @@ pub async fn initialize(num_words: usize, handles: Arc<Handles>) -> Result<()> {
    log::trace!("enter: initialize({}, {:?})", num_words, handles);

    // number of requests only needs to be calculated once, and then can be reused
-    let num_reqs_expected: u64 =
-        (num_words * (handles.config.extensions.len() + 1) * (handles.config.methods.len()))
-            .try_into()?;
+    let num_reqs_expected: u64 = handles.expected_num_requests_per_dir().try_into()?;

    {
        // no real reason to keep the arc around beyond this call
--- a/src/scanner/requester.rs
+++ b/src/scanner/requester.rs
@@ -1,9 +1,11 @@
 use std::{
    cmp::max,
-    sync::{atomic::Ordering, Arc, Mutex},
+    collections::HashSet,
+    sync::{self, atomic::Ordering, Arc, Mutex},
 };

 use anyhow::Result;
+use lazy_static::lazy_static;
 use leaky_bucket::LeakyBucket;
 use tokio::{
    sync::{oneshot, RwLock},
@@ -18,17 +20,21 @@ use crate::{
        Handles,
    },
    extractor::{ExtractionTarget, ExtractorBuilder},
+    nlp::{Document, TfIdf},
    response::FeroxResponse,
    scan_manager::{FeroxScan, ScanStatus},
    statistics::{StatError::Other, StatField::TotalExpected},
    url::FeroxUrl,
-    utils::logged_request,
+    utils::{logged_request, should_deny_url},
    HIGH_ERROR_RATIO,
 };

 use super::{policy_data::PolicyData, FeroxScanner, PolicyTrigger};
-use crate::utils::should_deny_url;
-use std::collections::HashSet;
+
+lazy_static! {
+    /// make sure to note that this is a std rwlock and not tokio
+    pub(crate) static ref TF_IDF: Arc<sync::RwLock<TfIdf>> = Arc::new(sync::RwLock::new(TfIdf::new()));
+}

 /// Makes multiple requests based on the presence of extensions
 pub(super) struct Requester {
@@ -303,8 +309,10 @@ impl Requester {
    pub async fn request(&self, word: &str) -> Result<()> {
        log::trace!("enter: request({})", word);

-        let urls =
-            FeroxUrl::from_string(&self.target_url, self.handles.clone()).formatted_urls(word)?;
+        let collected = self.handles.collected_extensions();
+
+        let urls = FeroxUrl::from_string(&self.target_url, self.handles.clone())
+            .formatted_urls(word, collected)?;

        let should_test_deny = !self.handles.config.url_denylist.is_empty()
            || !self.handles.config.regex_denylist.is_empty();
@@ -331,13 +339,14 @@ impl Requester {
                    continue;
                }

-                let response = logged_request(
-                    &url,
-                    method.as_str(),
-                    Some(self.handles.config.data.as_slice()),
-                    self.handles.clone(),
-                )
-                .await?;
+                let data = if self.handles.config.data.is_empty() {
+                    None
+                } else {
+                    Some(self.handles.config.data.as_slice())
+                };
+
+                let response =
+                    logged_request(&url, method.as_str(), data, self.handles.clone()).await?;

                if (should_tune || self.handles.config.auto_bail)
                    && !atomic_load!(self.policy_data.cooling_down, Ordering::SeqCst)
@@ -361,11 +370,10 @@ impl Requester {
                }

                // response came back without error, convert it to FeroxResponse
-                let ferox_response = FeroxResponse::from(
+                let mut ferox_response = FeroxResponse::from(
                    response,
                    &self.target_url,
                    method,
-                    true,
                    self.handles.config.output_level,
                )
                .await;
@@ -392,20 +400,38 @@ impl Requester {
                    continue;
                }

-                if self.handles.config.extract_links && !ferox_response.status().is_redirection() {
-                    let extractor = ExtractorBuilder::default()
+                if self.handles.config.collect_extensions {
+                    ferox_response.parse_extension(self.handles.clone())?;
+                }
+
+                if self.handles.config.collect_words {
+                    if let Ok(mut guard) = TF_IDF.write() {
+                        let doc = Document::from_html(ferox_response.text());
+                        guard.add_document(doc);
+                        if guard.num_documents() % 12 == 0
+                            || (guard.num_documents() < 5 && guard.num_documents() % 2 == 0)
+                        {
+                            guard.calculate_tf_idf_scores();
+                        }
+                    }
+                }
+
+                if self.handles.config.extract_links {
+                    let mut extractor = ExtractorBuilder::default()
                        .target(ExtractionTarget::ResponseBody)
                        .response(&ferox_response)
                        .handles(self.handles.clone())
                        .build()?;
+
                    let new_links: HashSet<_>;
-                    let extracted = (extractor.extract().await?).0;
+
+                    let result = extractor.extract().await?;

                    {
                        // gain and quickly drop the read lock on seen_links, using it while unlocked
                        // to determine if there are any new links to process
                        let read_links = self.seen_links.read().await;
-                        new_links = extracted.difference(&read_links).cloned().collect();
+                        new_links = result.difference(&read_links).cloned().collect();
                    }

                    if !new_links.is_empty() {
@@ -417,7 +443,9 @@ impl Requester {
                        }
                    }

-                    extractor.request_links(new_links).await?;
+                    if !new_links.is_empty() {
+                        extractor.request_links(new_links).await?;
+                    }
                }

                // everything else should be reported
@@ -458,12 +486,14 @@ mod tests {
        let (filters_task, filters_handle) = FiltersHandler::initialize();
        let (out_task, out_handle) =
            TermOutHandler::initialize(configuration.clone(), stats_handle.tx.clone());
+        let wordlist = Arc::new(vec![String::from("this_is_a_test")]);

        let handles = Arc::new(Handles::new(
            stats_handle,
            filters_handle,
            out_handle,
            configuration.clone(),
+            wordlist,
        ));

        let (scan_task, scan_handle) = ScanHandler::initialize(handles.clone());
@@ -587,10 +617,10 @@ mod tests {

        let requester = Requester {
            handles,
+            target_url: "http://localhost".to_string(),
            seen_links: RwLock::new(HashSet::<String>::new()),
            tuning_lock: Mutex::new(0),
            ferox_scan: Arc::new(FeroxScan::default()),
-            target_url: "http://localhost".to_string(),
            rate_limiter: RwLock::new(None),
            policy_data: Default::default(),
        };
--- a/src/statistics/container.rs
+++ b/src/statistics/container.rs
@@ -69,6 +69,10 @@ pub struct Stats {
    /// response bodies and robots.txt as of v1.11.0
    links_extracted: AtomicUsize,

+    /// tracker for number of extensions discovered when `--collect-extensions` is used; sources
+    /// are response bodies
+    extensions_collected: AtomicUsize,
+
    /// tracker for overall number of 200s seen by the client
    status_200s: AtomicUsize,

@@ -166,6 +170,10 @@ impl Serialize for Stats {
        state.serialize_field("total_scans", &atomic_load!(self.total_scans))?;
        state.serialize_field("initial_targets", &atomic_load!(self.initial_targets))?;
        state.serialize_field("links_extracted", &atomic_load!(self.links_extracted))?;
+        state.serialize_field(
+            "extensions_collected",
+            &atomic_load!(self.extensions_collected),
+        )?;
        state.serialize_field("status_200s", &atomic_load!(self.status_200s))?;
        state.serialize_field("status_301s", &atomic_load!(self.status_301s))?;
        state.serialize_field("status_302s", &atomic_load!(self.status_302s))?;
@@ -290,6 +298,13 @@ impl<'a> Deserialize<'a> for Stats {
                        }
                    }
                }
+                "extensions_collected" => {
+                    if let Some(num) = value.as_u64() {
+                        if let Ok(parsed) = usize::try_from(num) {
+                            atomic_increment!(stats.extensions_collected, parsed);
+                        }
+                    }
+                }
                "status_200s" => {
                    if let Some(num) = value.as_u64() {
                        if let Ok(parsed) = usize::try_from(num) {
@@ -628,6 +643,9 @@ impl Stats {
            StatField::LinksExtracted => {
                atomic_increment!(self.links_extracted, value);
            }
+            StatField::ExtensionsCollected => {
+                atomic_increment!(self.extensions_collected, value);
+            }
            StatField::WildcardsFiltered => {
                atomic_increment!(self.wildcards_filtered, value);
                atomic_increment!(self.responses_filtered, value);
@@ -664,6 +682,10 @@ impl Stats {
            atomic_increment!(self.client_errors, atomic_load!(d_stats.client_errors));
            atomic_increment!(self.server_errors, atomic_load!(d_stats.server_errors));
            atomic_increment!(self.links_extracted, atomic_load!(d_stats.links_extracted));
+            atomic_increment!(
+                self.extensions_collected,
+                atomic_load!(d_stats.extensions_collected)
+            );
            atomic_increment!(self.status_200s, atomic_load!(d_stats.status_200s));
            atomic_increment!(self.status_301s, atomic_load!(d_stats.status_301s));
            atomic_increment!(self.status_302s, atomic_load!(d_stats.status_302s));
@@ -834,7 +856,7 @@ mod tests {
    #[test]
    /// Stats::merge_from should properly increment expected fields and ignore others
    fn stats_merge_from_alters_correct_fields() {
-        let contents = r#"{"statistics":{"type":"statistics","timeouts":1,"requests":9207,"expected_per_scan":707,"total_expected":9191,"errors":3,"successes":720,"redirects":13,"client_errors":8474,"server_errors":2,"total_scans":13,"initial_targets":1,"links_extracted":51,"status_403s":3,"status_200s":720,"status_301s":12,"status_302s":1,"status_401s":4,"status_429s":2,"status_500s":5,"status_503s":9,"status_504s":6,"status_508s":7,"wildcards_filtered":707,"responses_filtered":707,"resources_discovered":27,"directory_scan_times":[2.211973078,1.989015505,1.898675839,3.9714468910000003,4.938152838,5.256073528,6.021986595,6.065740734,6.42633762,7.095142125,7.336982137,5.319785619,4.843649778],"total_runtime":[11.556575456000001],"url_format_errors":17,"redirection_errors":12,"connection_errors":21,"request_errors":4}}"#;
+        let contents = r#"{"statistics":{"type":"statistics","timeouts":1,"requests":9207,"expected_per_scan":707,"total_expected":9191,"errors":3,"successes":720,"redirects":13,"client_errors":8474,"server_errors":2,"total_scans":13,"initial_targets":1,"links_extracted":51,"extensions_collected":4,"status_403s":3,"status_200s":720,"status_301s":12,"status_302s":1,"status_401s":4,"status_429s":2,"status_500s":5,"status_503s":9,"status_504s":6,"status_508s":7,"wildcards_filtered":707,"responses_filtered":707,"resources_discovered":27,"directory_scan_times":[2.211973078,1.989015505,1.898675839,3.9714468910000003,4.938152838,5.256073528,6.021986595,6.065740734,6.42633762,7.095142125,7.336982137,5.319785619,4.843649778],"total_runtime":[11.556575456000001],"url_format_errors":17,"redirection_errors":12,"connection_errors":21,"request_errors":4}}"#;
        let config = Configuration::new().unwrap();
        let stats = Stats::new(config.json);

@@ -857,6 +879,7 @@ mod tests {
        assert_eq!(atomic_load!(stats.total_scans), 0); // not updated in merge_from
        assert_eq!(atomic_load!(stats.initial_targets), 0); // not updated in merge_from
        assert_eq!(atomic_load!(stats.links_extracted), 51);
+        assert_eq!(atomic_load!(stats.extensions_collected), 4);
        assert_eq!(atomic_load!(stats.status_200s), 720);
        assert_eq!(atomic_load!(stats.status_301s), 12);
        assert_eq!(atomic_load!(stats.status_302s), 1);
--- a/src/statistics/field.rs
+++ b/src/statistics/field.rs
@@ -13,6 +13,9 @@ pub enum StatField {
    /// Translates to `links_extracted`
    LinksExtracted,

+    /// Translates to `extensions_collected`
+    ExtensionsCollected,
+
    /// Translates to `total_expected`
    TotalExpected,

--- a/src/url.rs
+++ b/src/url.rs
@@ -1,6 +1,7 @@
 use crate::{event_handlers::Handles, statistics::StatError::UrlFormat, Command::AddError};
 use anyhow::{anyhow, bail, Result};
 use reqwest::Url;
+use std::collections::HashSet;
 use std::{convert::TryInto, fmt, sync::Arc};

 /// abstraction around target urls; collects all Url related shenanigans in one place
@@ -37,7 +38,11 @@ impl FeroxUrl {
    ///
    /// If any extensions were passed to the program, each extension will add a
    /// (base_url + word + ext) Url to the vector
-    pub fn formatted_urls(&self, word: &str) -> Result<Vec<Url>> {
+    pub fn formatted_urls(
+        &self,
+        word: &str,
+        collected_extensions: HashSet<String>,
+    ) -> Result<Vec<Url>> {
        log::trace!("enter: formatted_urls({})", word);

        let mut urls = vec![];
@@ -54,7 +59,13 @@ impl FeroxUrl {
            Err(_) => self.handles.stats.send(AddError(UrlFormat))?,
        }

-        for ext in self.handles.config.extensions.iter() {
+        for ext in self
+            .handles
+            .config
+            .extensions
+            .iter()
+            .chain(collected_extensions.iter())
+        {
            match self.format(word, Some(ext)) {
                // any extensions passed in
                Ok(url) => urls.push(url),
@@ -254,7 +265,7 @@ mod tests {
    fn formatted_urls_no_extension_returns_base_url_with_word() {
        let handles = Arc::new(Handles::for_testing(None, None).0);
        let url = FeroxUrl::from_string("http://localhost", handles);
-        let urls = url.formatted_urls("turbo").unwrap();
+        let urls = url.formatted_urls("turbo", HashSet::new()).unwrap();
        assert_eq!(urls, [Url::parse("http://localhost/turbo").unwrap()])
    }

@@ -268,7 +279,7 @@ mod tests {

        let handles = Arc::new(Handles::for_testing(None, Some(Arc::new(config))).0);
        let url = FeroxUrl::from_string("http://localhost", handles);
-        let urls = url.formatted_urls("turbo").unwrap();
+        let urls = url.formatted_urls("turbo", HashSet::new()).unwrap();

        assert_eq!(
            urls,
@@ -315,7 +326,7 @@ mod tests {
            let handles = Arc::new(Handles::for_testing(None, Some(Arc::new(config))).0);
            let url = FeroxUrl::from_string("http://localhost", handles);

-            let urls = url.formatted_urls("turbo").unwrap();
+            let urls = url.formatted_urls("turbo", HashSet::new()).unwrap();
            assert_eq!(urls, expected[i]);
        }
    }
@@ -502,7 +513,7 @@ mod tests {
        };
        let handles = Arc::new(Handles::for_testing(None, Some(Arc::new(config))).0);
        let url = FeroxUrl::from_string("http://localhost", handles);
-        match url.formatted_urls("ferox") {
+        match url.formatted_urls("ferox", HashSet::new()) {
            Ok(urls) => {
                // 3 = One for the main word + slash and for the two extensions
                assert_eq!(urls.len(), 3);
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -131,7 +131,7 @@ pub async fn make_request(
    client: &Client,
    url: &Url,
    method: &str,
-    data: Option<&[u8]>,
+    mut data: Option<&[u8]>,
    output_level: OutputLevel,
    config: &Configuration,
    tx_stats: UnboundedSender<Command>,
@@ -142,8 +142,30 @@ pub async fn make_request(
        output_level,
        tx_stats
    );
+    let tmp_workaround: Option<&[u8]> = Some(&[0xd_u8, 0xa]); // \r\n

    let mut request = client.request(Method::from_bytes(method.as_bytes())?, url.to_owned());
+
+    if (!config.proxy.is_empty() || config.replay_proxy.is_empty())
+        && data.is_none()
+        && ["post", "put", "patch"].contains(&method.to_ascii_lowercase().as_str())
+    {
+        // either --proxy or --replay-proxy was specified
+        // AND
+        // --data wasn't used
+        // AND
+        // the method is either post/put/patch (case insensitive)
+        //
+        // this combination of factors results in requests that are delayed for 10 seconds before
+        // being issued. The tracking issues are
+        //   https://github.com/epi052/feroxbuster/issues/501
+        //   https://github.com/seanmonstar/reqwest/issues/1474
+        //
+        // as a (hopefully temporary) workaround, we'll add \r\n to the body so that there's no
+        // delay
+        data = tmp_workaround;
+    }
+
    if let Some(body_data) = data {
        request = request.body(body_data.to_vec());
    }
@@ -482,7 +504,7 @@ pub fn slugify_filename(url: &str, prefix: &str, suffix: &str) -> String {
        String::new()
    };

-    let slug = url.replace("://", "_").replace("/", "_").replace(".", "_");
+    let slug = url.replace("://", "_").replace('/', "_").replace('.', "_");

    let filename = format!("{}{}-{}.{}", altered_prefix, slug, ts, suffix);

--- a/tests/test_banner.rs
+++ b/tests/test_banner.rs
@@ -16,10 +16,10 @@ fn banner_prints_proxy() -> Result<(), Box<dyn std::error::Error>> {
    Command::cargo_bin("feroxbuster")
        .unwrap()
        .arg("--stdin")
-        .arg("--wordlist")
-        .arg(file.as_os_str())
        .arg("--proxy")
        .arg("http://127.0.0.1:8080")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .pipe_stdin(file)
        .unwrap()
        .assert()
@@ -57,7 +57,7 @@ fn banner_prints_replay_proxy() -> Result<(), Box<dyn std::error::Error>> {
        .unwrap()
        .arg("--stdin")
        .arg("--wordlist")
-        .arg(file.as_os_str())
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .arg("--replay-proxy")
        .arg("http://127.0.0.1:8081")
        .pipe_stdin(file)
@@ -95,6 +95,8 @@ fn banner_prints_headers() {
        .arg("stuff:things")
        .arg("-H")
        .arg("mostuff:mothings")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -126,6 +128,8 @@ fn banner_prints_denied_urls() {
        .arg("https://also-not.me")
        .arg("https:")
        .arg("/deny.*")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -156,6 +160,8 @@ fn banner_prints_random_agent() {
        .arg("--url")
        .arg("http://localhost")
        .arg("--random-agent")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -192,6 +198,8 @@ fn banner_prints_filter_sizes() {
        .arg("93")
        .arg("--filter-words")
        .arg("94")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -228,6 +236,8 @@ fn banner_prints_queries() {
        .arg("token=supersecret")
        .arg("--query")
        .arg("stuff=things")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -256,6 +266,8 @@ fn banner_prints_status_codes() {
        .arg("http://localhost")
        .arg("-s")
        .arg("201,301,401")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -284,6 +296,8 @@ fn banner_prints_replay_codes() {
        .arg("200,302")
        .arg("--replay-proxy")
        .arg("http://localhost:8081")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -312,6 +326,8 @@ fn banner_prints_output_file() {
        .arg("http://localhost")
        .arg("--output")
        .arg("/super/cool/path")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -326,7 +342,7 @@ fn banner_prints_output_file() {
                .and(predicate::str::contains("Output File"))
                .and(predicate::str::contains("/super/cool/path"))
                .and(predicate::str::contains(
-                    "ERROR: Couldn't start /super/cool/path file handler",
+                    "Could not open /definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676",
                ))
                .and(predicate::str::contains("─┴─")),
        );
@@ -341,6 +357,8 @@ fn banner_prints_insecure() {
        .arg("--url")
        .arg("http://localhost")
        .arg("-k")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -367,6 +385,8 @@ fn banner_prints_redirects() {
        .arg("--url")
        .arg("http://localhost")
        .arg("-r")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -396,6 +416,8 @@ fn banner_prints_extensions() {
        .arg("js")
        .arg("--extensions")
        .arg("pdf")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -422,6 +444,8 @@ fn banner_prints_dont_filter() {
        .arg("--url")
        .arg("http://localhost")
        .arg("--dont-filter")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -448,6 +472,8 @@ fn banner_prints_verbosity_one() {
        .arg("--url")
        .arg("http://localhost")
        .arg("-v")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -474,6 +500,8 @@ fn banner_prints_verbosity_two() {
        .arg("--url")
        .arg("http://localhost")
        .arg("-vv")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -500,6 +528,8 @@ fn banner_prints_verbosity_three() {
        .arg("--url")
        .arg("http://localhost")
        .arg("-vvv")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -526,6 +556,8 @@ fn banner_prints_verbosity_four() {
        .arg("--url")
        .arg("http://localhost")
        .arg("-vvvv")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -552,6 +584,8 @@ fn banner_prints_add_slash() {
        .arg("--url")
        .arg("http://localhost")
        .arg("-f")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -579,6 +613,8 @@ fn banner_prints_infinite_depth() {
        .arg("http://localhost")
        .arg("--depth")
        .arg("0")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -606,6 +642,8 @@ fn banner_prints_recursion_depth() {
        .arg("http://localhost")
        .arg("--depth")
        .arg("343214")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -632,6 +670,8 @@ fn banner_prints_no_recursion() {
        .arg("--url")
        .arg("http://localhost")
        .arg("-n")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -658,10 +698,12 @@ fn banner_doesnt_print() {
        .arg("--url")
        .arg("http://localhost")
        .arg("-q")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(predicate::str::contains(
-            "Could not connect to any target provided",
+            "Could not open /definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676",
        ));
 }

@@ -674,6 +716,8 @@ fn banner_prints_extract_links() {
        .arg("--url")
        .arg("http://localhost")
        .arg("-e")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -701,6 +745,8 @@ fn banner_prints_scan_limit() {
        .arg("http://localhost")
        .arg("-L")
        .arg("4")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -728,6 +774,8 @@ fn banner_prints_filter_status() {
        .arg("http://localhost")
        .arg("-C")
        .arg("200")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -756,6 +804,8 @@ fn banner_prints_json() {
        .arg("--json")
        .arg("--output")
        .arg("/dev/null")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -783,6 +833,8 @@ fn banner_prints_debug_log() {
        .arg("http://localhost")
        .arg("--debug-log")
        .arg("/dev/null")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -810,6 +862,8 @@ fn banner_prints_filter_regex() {
        .arg("http://localhost")
        .arg("--filter-regex")
        .arg("^ignore me$")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -837,6 +891,8 @@ fn banner_prints_time_limit() {
        .arg("http://localhost")
        .arg("--time-limit")
        .arg("10m")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -864,6 +920,8 @@ fn banner_prints_similarity_filter() {
        .arg("http://localhost")
        .arg("--filter-similar-to")
        .arg("https://somesite.com")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -891,6 +949,8 @@ fn banner_prints_rate_limit() {
        .arg("http://localhost")
        .arg("--rate-limit")
        .arg("6735")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -917,6 +977,8 @@ fn banner_prints_auto_tune() {
        .arg("--url")
        .arg("http://localhost")
        .arg("--auto-tune")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -943,6 +1005,8 @@ fn banner_prints_auto_bail() {
        .arg("--url")
        .arg("http://localhost")
        .arg("--auto-bail")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -969,6 +1033,8 @@ fn banner_doesnt_print_when_silent() {
        .arg("--url")
        .arg("http://localhost")
        .arg("--silent")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -993,6 +1059,8 @@ fn banner_doesnt_print_when_quiet() {
        .arg("--url")
        .arg("http://localhost")
        .arg("--quiet")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -1017,18 +1085,19 @@ fn banner_prints_parallel() {
        .arg("--stdin")
        .arg("--parallel")
        .arg("4316")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
            predicate::str::contains("─┬─")
-                .not()
-                .and(predicate::str::contains("Target Url").not())
-                .and(predicate::str::contains("Parallel Scans").not())
-                .and(predicate::str::contains("Threads").not())
-                .and(predicate::str::contains("Wordlist").not())
-                .and(predicate::str::contains("Status Codes").not())
-                .and(predicate::str::contains("Timeout (secs)").not())
-                .and(predicate::str::contains("User-Agent").not()),
+                .and(predicate::str::contains("Parallel Scans"))
+                .and(predicate::str::contains("4316"))
+                .and(predicate::str::contains("Threads"))
+                .and(predicate::str::contains("Wordlist"))
+                .and(predicate::str::contains("Status Codes"))
+                .and(predicate::str::contains("Timeout (secs)"))
+                .and(predicate::str::contains("User-Agent")),
        );
 }

@@ -1044,6 +1113,8 @@ fn banner_prints_methods() {
        .arg("PUT")
        .arg("--methods")
        .arg("OPTIONS")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -1075,6 +1146,8 @@ fn banner_prints_data() {
        .arg("POST")
        .arg("--data")
        .arg("some_data")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
        .assert()
        .success()
        .stderr(
@@ -1091,3 +1164,233 @@ fn banner_prints_data() {
                .and(predicate::str::contains("─┴─")),
        );
 }
+
+#[test]
+/// test allows non-existent wordlist to trigger the banner printing to stderr
+/// expect to see all mandatory prints + ignored extensions
+fn banner_prints_collect_extensions_and_dont_collect_default() {
+    Command::cargo_bin("feroxbuster")
+        .unwrap()
+        .arg("--url")
+        .arg("http://localhost")
+        .arg("--collect-extensions")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
+        .assert()
+        .success()
+        .stderr(
+            predicate::str::contains("─┬─")
+                .and(predicate::str::contains("Target Url"))
+                .and(predicate::str::contains("http://localhost"))
+                .and(predicate::str::contains("Threads"))
+                .and(predicate::str::contains("Wordlist"))
+                .and(predicate::str::contains("Status Codes"))
+                .and(predicate::str::contains("Timeout (secs)"))
+                .and(predicate::str::contains("User-Agent"))
+                .and(predicate::str::contains("Collect Extensions"))
+                .and(predicate::str::contains("Ignored Extensions"))
+                .and(predicate::str::contains("Images, Movies, Audio, etc..."))
+                .and(predicate::str::contains("─┴─")),
+        );
+}
+
+#[test]
+/// test allows non-existent wordlist to trigger the banner printing to stderr
+/// expect to see all mandatory prints + collect extensions
+fn banner_prints_collect_extensions_and_dont_collect_with_input() {
+    Command::cargo_bin("feroxbuster")
+        .unwrap()
+        .arg("--url")
+        .arg("http://localhost")
+        .arg("--collect-extensions")
+        .arg("--dont-collect")
+        .arg("pdf")
+        .arg("xps")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
+        .assert()
+        .success()
+        .stderr(
+            predicate::str::contains("─┬─")
+                .and(predicate::str::contains("Target Url"))
+                .and(predicate::str::contains("http://localhost"))
+                .and(predicate::str::contains("Threads"))
+                .and(predicate::str::contains("Wordlist"))
+                .and(predicate::str::contains("Status Codes"))
+                .and(predicate::str::contains("Timeout (secs)"))
+                .and(predicate::str::contains("User-Agent"))
+                .and(predicate::str::contains("Collect Extensions"))
+                .and(predicate::str::contains("Ignored Extensions"))
+                .and(predicate::str::contains("[pdf, xps]"))
+                .and(predicate::str::contains("─┴─")),
+        );
+}
+
+#[test]
+/// test allows non-existent wordlist to trigger the banner printing to stderr
+/// expect to see all mandatory prints + collect backups
+fn banner_prints_collect_backups() {
+    Command::cargo_bin("feroxbuster")
+        .unwrap()
+        .arg("--url")
+        .arg("http://localhost")
+        .arg("--collect-backups")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
+        .assert()
+        .success()
+        .stderr(
+            predicate::str::contains("─┬─")
+                .and(predicate::str::contains("Target Url"))
+                .and(predicate::str::contains("http://localhost"))
+                .and(predicate::str::contains("Threads"))
+                .and(predicate::str::contains("Wordlist"))
+                .and(predicate::str::contains("Status Codes"))
+                .and(predicate::str::contains("Timeout (secs)"))
+                .and(predicate::str::contains("User-Agent"))
+                .and(predicate::str::contains("Collect Backups"))
+                .and(predicate::str::contains("─┴─")),
+        );
+}
+
+#[test]
+/// test allows non-existent wordlist to trigger the banner printing to stderr
+/// expect to see all mandatory prints + collect words
+fn banner_prints_collect_words() {
+    Command::cargo_bin("feroxbuster")
+        .unwrap()
+        .arg("--url")
+        .arg("http://localhost")
+        .arg("--collect-words")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
+        .assert()
+        .success()
+        .stderr(
+            predicate::str::contains("─┬─")
+                .and(predicate::str::contains("Target Url"))
+                .and(predicate::str::contains("http://localhost"))
+                .and(predicate::str::contains("Threads"))
+                .and(predicate::str::contains("Wordlist"))
+                .and(predicate::str::contains("Status Codes"))
+                .and(predicate::str::contains("Timeout (secs)"))
+                .and(predicate::str::contains("User-Agent"))
+                .and(predicate::str::contains("Collect Words"))
+                .and(predicate::str::contains("─┴─")),
+        );
+}
+
+#[test]
+/// test allows non-existent wordlist to trigger the banner printing to stderr
+/// expect to see all mandatory prints + collect words
+fn banner_prints_all_composite_settings_smart() {
+    Command::cargo_bin("feroxbuster")
+        .unwrap()
+        .arg("--url")
+        .arg("http://localhost")
+        .arg("--smart")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
+        .assert()
+        .success()
+        .stderr(
+            predicate::str::contains("─┬─")
+                .and(predicate::str::contains("Target Url"))
+                .and(predicate::str::contains("http://localhost"))
+                .and(predicate::str::contains("Threads"))
+                .and(predicate::str::contains("Wordlist"))
+                .and(predicate::str::contains("Status Codes"))
+                .and(predicate::str::contains("Timeout (secs)"))
+                .and(predicate::str::contains("User-Agent"))
+                .and(predicate::str::contains("Collect Words"))
+                .and(predicate::str::contains("Collect Backups"))
+                .and(predicate::str::contains("Extract Links"))
+                .and(predicate::str::contains("Auto Tune"))
+                .and(predicate::str::contains("─┴─")),
+        );
+}
+
+#[test]
+/// test allows non-existent wordlist to trigger the banner printing to stderr
+/// expect to see all mandatory prints + collect words
+fn banner_prints_all_composite_settings_thorough() {
+    Command::cargo_bin("feroxbuster")
+        .unwrap()
+        .arg("--url")
+        .arg("http://localhost")
+        .arg("--thorough")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
+        .assert()
+        .success()
+        .stderr(
+            predicate::str::contains("─┬─")
+                .and(predicate::str::contains("Target Url"))
+                .and(predicate::str::contains("http://localhost"))
+                .and(predicate::str::contains("Threads"))
+                .and(predicate::str::contains("Wordlist"))
+                .and(predicate::str::contains("Status Codes"))
+                .and(predicate::str::contains("Timeout (secs)"))
+                .and(predicate::str::contains("User-Agent"))
+                .and(predicate::str::contains("Collect Words"))
+                .and(predicate::str::contains("Collect Extensions"))
+                .and(predicate::str::contains("Collect Backups"))
+                .and(predicate::str::contains("Extract Links"))
+                .and(predicate::str::contains("Auto Tune"))
+                .and(predicate::str::contains("─┴─")),
+        );
+}
+#[test]
+/// test allows non-existent wordlist to trigger the banner printing to stderr
+/// expect to see all mandatory prints + collect words
+fn banner_prints_all_composite_settings_burp() {
+    Command::cargo_bin("feroxbuster")
+        .unwrap()
+        .arg("--url")
+        .arg("http://localhost")
+        .arg("--burp")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
+        .assert()
+        .success()
+        .stderr(
+            predicate::str::contains("─┬─")
+                .and(predicate::str::contains("Target Url"))
+                .and(predicate::str::contains("http://localhost"))
+                .and(predicate::str::contains("Threads"))
+                .and(predicate::str::contains("Wordlist"))
+                .and(predicate::str::contains("Status Codes"))
+                .and(predicate::str::contains("Timeout (secs)"))
+                .and(predicate::str::contains("User-Agent"))
+                .and(predicate::str::contains("Proxy"))
+                .and(predicate::str::contains("Insecure"))
+                .and(predicate::str::contains("─┴─")),
+        );
+}
+#[test]
+/// test allows non-existent wordlist to trigger the banner printing to stderr
+/// expect to see all mandatory prints + collect words
+fn banner_prints_all_composite_settings_burp_replay() {
+    Command::cargo_bin("feroxbuster")
+        .unwrap()
+        .arg("--url")
+        .arg("http://localhost")
+        .arg("--burp-replay")
+        .arg("--wordlist")
+        .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676")
+        .assert()
+        .success()
+        .stderr(
+            predicate::str::contains("─┬─")
+                .and(predicate::str::contains("Target Url"))
+                .and(predicate::str::contains("http://localhost"))
+                .and(predicate::str::contains("Threads"))
+                .and(predicate::str::contains("Wordlist"))
+                .and(predicate::str::contains("Status Codes"))
+                .and(predicate::str::contains("Timeout (secs)"))
+                .and(predicate::str::contains("User-Agent"))
+                .and(predicate::str::contains("Replay Proxy"))
+                .and(predicate::str::contains("Insecure"))
+                .and(predicate::str::contains("─┴─")),
+        );
+}
--- a/tests/test_deny_list.rs
+++ b/tests/test_deny_list.rs
@@ -131,8 +131,8 @@ fn deny_list_works_during_recursion() {
            .not(),
    );

-    assert_eq!(js_mock.hits(), 1);
-    assert_eq!(js_prod_mock.hits(), 1);
+    assert_eq!(js_mock.hits(), 2);
+    assert_eq!(js_prod_mock.hits(), 2);
    assert_eq!(js_dev_mock.hits(), 0);
    assert_eq!(js_dev_file_mock.hits(), 0);

@@ -202,9 +202,9 @@ fn deny_list_works_during_recursion_with_inverted_parents() {
            .not(),
    );

-    assert_eq!(js_mock.hits(), 1);
-    assert_eq!(js_prod_mock.hits(), 1);
-    assert_eq!(js_dev_mock.hits(), 1);
+    assert_eq!(js_mock.hits(), 2);
+    assert_eq!(js_prod_mock.hits(), 2);
+    assert_eq!(js_dev_mock.hits(), 2);
    assert_eq!(js_dev_file_mock.hits(), 1);
    assert_eq!(api_mock.hits(), 0);

--- a/tests/test_extractor.rs
+++ b/tests/test_extractor.rs
@@ -284,11 +284,11 @@ fn extractor_finds_robots_txt_links_and_displays_files_or_scans_directories() {
            .and(predicate::str::contains("22c"))
            .and(predicate::str::contains("/misc/LICENSE"))
            .and(predicate::str::contains("29c"))
-            .and(predicate::str::contains("200").count(3)),
+            .and(predicate::str::contains("200").count(4)),
    );

    assert_eq!(mock.hits(), 1);
-    assert_eq!(mock_dir.hits(), 2);
+    assert_eq!(mock_dir.hits(), 3);
    assert_eq!(mock_two.hits(), 1);
    assert_eq!(mock_file.hits(), 1);
    assert_eq!(mock_disallowed.hits(), 1);
@@ -636,7 +636,7 @@ fn extractor_recurses_into_403_directories() -> Result<(), Box<dyn std::error::E

    assert_eq!(mock.hits(), 1);
    assert_eq!(mock_two.hits(), 1);
-    assert_eq!(forbidden_dir.hits(), 2);
+    assert_eq!(forbidden_dir.hits(), 3);
    teardown_tmp_directory(tmp_dir);
    Ok(())
 }
--- a/tests/test_main.rs
+++ b/tests/test_main.rs
@@ -25,12 +25,9 @@ fn main_use_root_owned_file_as_wordlist() {
        .arg("-vvvv")
        .assert()
        .success()
-        .stderr(predicate::str::contains(
-            "Failed while scanning: Could not open /etc/shadow",
-        ));
+        .stderr(predicate::str::contains("Could not open /etc/shadow"));

-    // connectivity test hits it once
-    assert_eq!(mock.hits(), 1);
+    assert_eq!(mock.hits(), 0);
 }

 #[test]
@@ -53,11 +50,9 @@ fn main_use_empty_wordlist() -> Result<(), Box<dyn std::error::Error>> {
        .arg("-vvvv")
        .assert()
        .success()
-        .stderr(predicate::str::contains(
-            "Failed while scanning: Did not find any words in",
-        ));
+        .stderr(predicate::str::contains("Did not find any words in"));

-    assert_eq!(mock.hits(), 1);
+    assert_eq!(mock.hits(), 0);

    teardown_tmp_directory(tmp_dir);
    Ok(())
--- a/tests/test_scanner.rs
+++ b/tests/test_scanner.rs
@@ -3,6 +3,8 @@ use assert_cmd::prelude::*;
 use httpmock::Method::GET;
 use httpmock::MockServer;
 use predicates::prelude::*;
+use std::thread::sleep;
+use std::time::Duration;
 use std::{process::Command, time};
 use utils::{setup_tmp_directory, teardown_tmp_directory};

@@ -89,9 +91,9 @@ fn scanner_recursive_request_scan() -> Result<(), Box<dyn std::error::Error>> {
            .and(predicate::str::is_match("200.*js/dev/file.js").unwrap()),
    );

-    assert_eq!(js_mock.hits(), 1);
-    assert_eq!(js_prod_mock.hits(), 1);
-    assert_eq!(js_dev_mock.hits(), 1);
+    assert_eq!(js_mock.hits(), 2);
+    assert_eq!(js_prod_mock.hits(), 2);
+    assert_eq!(js_dev_mock.hits(), 2);
    assert_eq!(js_dev_file_mock.hits(), 1);

    teardown_tmp_directory(tmp_dir);
@@ -153,9 +155,9 @@ fn scanner_recursive_request_scan_using_only_success_responses(
            .and(predicate::str::is_match("200.*js/dev/file.js").unwrap()),
    );

-    assert_eq!(js_mock.hits(), 1);
-    assert_eq!(js_prod_mock.hits(), 1);
-    assert_eq!(js_dev_mock.hits(), 1);
+    assert_eq!(js_mock.hits(), 3);
+    assert_eq!(js_prod_mock.hits(), 3);
+    assert_eq!(js_dev_mock.hits(), 3);
    assert_eq!(js_dev_file_mock.hits(), 1);

    teardown_tmp_directory(tmp_dir);
@@ -596,7 +598,7 @@ fn scanner_recursion_works_with_403_directories() {

    assert_eq!(mock.hits(), 1);
    assert_eq!(found_anyway.hits(), 1);
-    assert_eq!(forbidden_dir.hits(), 1);
+    assert_eq!(forbidden_dir.hits(), 3);

    teardown_tmp_directory(tmp_dir);
 }
@@ -638,3 +640,215 @@ fn rate_limit_enforced_when_specified() {

    teardown_tmp_directory(tmp_dir);
 }
+
+#[test]
+/// ensure that auto-discovered extensions are tracked in statistics and bar lengths are updated
+fn add_discovered_extension_updates_bars_and_stats() {
+    let srv = MockServer::start();
+    let (tmp_dir, file) = setup_tmp_directory(
+        &["LICENSE".to_string(), "stuff.php".to_string()],
+        "wordlist",
+    )
+    .unwrap();
+
+    srv.mock(|when, then| {
+        when.method(GET).path("/stuff.php");
+        then.status(200).body("cool... coolcoolcool");
+    });
+
+    let file_path = tmp_dir.path().join("debug-file.txt");
+
+    assert!(!file_path.exists());
+
+    Command::cargo_bin("feroxbuster")
+        .unwrap()
+        .arg("--url")
+        .arg(srv.url("/"))
+        .arg("--wordlist")
+        .arg(file.as_os_str())
+        .arg("--extract-links")
+        .arg("--collect-extensions")
+        .arg("-vvvv")
+        .arg("--debug-log")
+        .arg(file_path.as_os_str())
+        .unwrap()
+        .assert()
+        .success();
+
+    let contents = std::fs::read_to_string(file_path).unwrap();
+    println!("{}", contents);
+    assert!(contents.contains("discovered new extension: php"));
+    assert!(contents.contains("extensions_collected: 1"));
+    assert!(contents.contains("expected_per_scan: 6"));
+}
+
+#[test]
+/// send a request to a 200 file, expect pre-configured backup collection rules to be applied
+/// and then requested
+fn collect_backups_makes_appropriate_requests() {
+    let srv = MockServer::start();
+    let (tmp_dir, file) = setup_tmp_directory(&["LICENSE.txt".to_string()], "wordlist").unwrap();
+
+    let valid_paths = vec![
+        "/LICENSE.txt",
+        "/LICENSE.txt~",
+        "/LICENSE.txt.bak",
+        "/LICENSE.txt.bak2",
+        "/LICENSE.txt.old",
+        "/LICENSE.txt.1",
+        "/LICENSE.bak",
+        "/.LICENSE.txt.swp",
+    ];
+
+    let valid_mocks: Vec<_> = valid_paths
+        .iter()
+        .map(|&p| {
+            srv.mock(|when, then| {
+                when.method(GET).path(p);
+                then.status(200).body("this is a valid test");
+            })
+        })
+        .collect();
+
+    let invalid_paths: Vec<_> = vec![
+        "/LICENSE.txt~~",
+        "/LICENSE.txt.bak.bak",
+        "/LICENSE.txt.bak2.bak2",
+        "/LICENSE.txt.old.old",
+        "/LICENSE.txt.1.1",
+        "/..LICENSE.txt.swp.swp",
+    ];
+
+    let invalid_mocks: Vec<_> = invalid_paths
+        .iter()
+        .map(|&p| {
+            srv.mock(|when, then| {
+                when.method(GET).path(p);
+                then.status(200).body("this is an invalid test");
+            })
+        })
+        .collect();
+
+    let cmd = Command::cargo_bin("feroxbuster")
+        .unwrap()
+        .arg("--url")
+        .arg(srv.url("/"))
+        .arg("--collect-backups")
+        .arg("--wordlist")
+        .arg(file.as_os_str())
+        .unwrap();
+
+    cmd.assert().success().stdout(
+        predicate::str::contains("/LICENSE.txt")
+            .and(predicate::str::contains("/LICENSE.txt~"))
+            .and(predicate::str::contains("/LICENSE.txt.bak"))
+            .and(predicate::str::contains("/LICENSE.txt.bak2"))
+            .and(predicate::str::contains("/LICENSE.txt.old"))
+            .and(predicate::str::contains("/LICENSE.txt.1"))
+            .and(predicate::str::contains("/LICENSE.bak"))
+            .and(predicate::str::contains("/.LICENSE.txt.swp")),
+    );
+
+    for valid_mock in valid_mocks {
+        assert_eq!(valid_mock.hits(), 1);
+    }
+
+    for invalid_mock in invalid_mocks {
+        assert_eq!(invalid_mock.hits(), 0);
+    }
+
+    teardown_tmp_directory(tmp_dir);
+}
+
+#[test]
+/// send a request to 4 200 files, expect non-zero tf-idf rated words to be requested as well
+fn collect_words_makes_appropriate_requests() {
+    let srv = MockServer::start();
+
+    let wordlist: Vec<_> = [
+        "doc1", "doc2", "doc3", "doc4", "blah", "blah2", "blah3", "blah4",
+    ]
+    .iter()
+    .map(|w| w.to_string())
+    .collect();
+
+    let (tmp_dir, file) = setup_tmp_directory(&wordlist, "wordlist").unwrap();
+
+    srv.mock(|when, then| {
+        when.method(GET).path("/doc1");
+        then.status(200)
+            .body("Air quality in the sunny island improved gradually throughout Wednesday.");
+    });
+    srv.mock(|when, then| {
+        when.method(GET).path("/doc2");
+        then.status(200).body(
+            "Air quality in Singapore on Wednesday continued to get worse as haze hit the island.",
+        );
+    });
+    srv.mock(|when, then| {
+        when.method(GET).path("/doc3");
+        then.status(200).body("The air quality in Singapore is monitored through a network of air monitoring stations located in different parts of the island");
+    });
+    srv.mock(|when, then| {
+        when.method(GET).path("/doc4");
+        then.status(200)
+            .body("The air quality in Singapore got worse on Wednesday.");
+    });
+
+    let valid_paths = vec![
+        "/gradually",
+        "/network",
+        "/hit",
+        "/located",
+        "/continued",
+        "/island",
+        "/worse",
+        "/monitored",
+        "/monitoring",
+        "/haze",
+        "/different",
+        "/stations",
+        "/sunny",
+        "/singapore",
+        "/improved",
+        "/parts",
+        "/wednesday",
+    ];
+
+    let valid_mocks: Vec<_> = valid_paths
+        .iter()
+        .map(|&p| {
+            srv.mock(|when, then| {
+                when.method(GET).path(p);
+                then.status(200);
+            })
+        })
+        .collect();
+
+    let cmd = Command::cargo_bin("feroxbuster")
+        .unwrap()
+        .arg("--url")
+        .arg(srv.url("/"))
+        .arg("-vv")
+        .arg("--collect-words")
+        .arg("-t")
+        .arg("1")
+        .arg("--wordlist")
+        .arg(file.as_os_str())
+        .unwrap();
+
+    cmd.assert().success().stdout(
+        predicate::str::contains("/doc1")
+            .and(predicate::str::contains("/doc2"))
+            .and(predicate::str::contains("/doc3"))
+            .and(predicate::str::contains("/doc4")),
+    );
+
+    sleep(Duration::new(2, 0));
+
+    for valid_mock in valid_mocks {
+        assert_eq!(valid_mock.hits(), 1);
+    }
+
+    teardown_tmp_directory(tmp_dir);
+}
Author	SHA1	Message	Date
epi	cca3163baf	fixed test	2022-03-05 07:09:23 -06:00
epi	c9013edce8	Merge pull request #511 from epi052/all-contributors/add-0dayCTF docs: add 0dayCTF as a contributor for ideas	2022-03-05 07:02:49 -06:00
allcontributors[bot]	7bdb137fd1	docs: update .all-contributorsrc [skip ci]	2022-03-05 13:02:17 +00:00
allcontributors[bot]	5f0eaf8885	docs: update README.md [skip ci]	2022-03-05 13:02:16 +00:00
epi	2b7002d9cf	Merge pull request #494 from epi052/release-2.6.0-multi-feature 2.6.0 release branch	2022-03-05 06:57:33 -06:00
epi	86b17f226d	removed lint	2022-03-04 21:47:52 -06:00
epi	cbbf9be6c9	added composite flags	2022-03-04 21:15:43 -06:00
epi	f814c4b223	put back stripped comment	2022-03-04 06:54:17 -06:00
epi	7839118379	added cargo make Makefile.toml	2022-03-04 06:52:45 -06:00
epi	8214a2a357	bumped depenedencies	2022-03-04 06:52:23 -06:00
epi	e06e194f77	fixed flaky test	2022-03-04 06:52:13 -06:00
epi	1628ee86a3	added info log for collect words	2022-03-04 06:51:59 -06:00
epi	53d2076176	removed deprecated clap struct/methods	2022-03-04 06:45:22 -06:00
epi	304750fa3f	Merge pull request #507 from epi052/collect-words-feature-2.6 add collect-words feature	2022-03-03 06:55:02 -06:00
epi	6c5c812784	added a few tests	2022-03-03 06:38:37 -06:00
epi	063e7b0420	added --collect-words implementation	2022-03-01 17:55:14 -06:00
epi	eed59e1da5	added nlp module	2022-02-27 13:42:07 -06:00
epi	ca4d8f0c52	replay proxy respects --data now	2022-02-19 17:28:19 -06:00
epi	c1132622cf	replay proxy respects --data now	2022-02-19 15:19:09 -06:00
epi	2d5aeb444e	added temp workaround for +proxy/-data problem	2022-02-19 15:11:22 -06:00
epi	53238a6e5e	Merge pull request #474 from godylockz/Misc-Fixes Add no-state option, filter queries from links, fix headers	2022-02-19 10:18:50 -06:00
epi	7b7eeeebfa	Merge branch 'release-2.6.0-multi-feature' into Misc-Fixes	2022-02-18 08:05:12 -06:00
epi	aed0c41d8f	Merge pull request #498 from epi052/489-discover-backups implemented/tested logic for collecting backups	2022-02-17 20:02:20 -06:00
epi	5edd58a3f4	clippy	2022-02-17 19:46:44 -06:00
epi	44693a3498	added cli/banner/tests etc...	2022-02-17 19:44:29 -06:00
epi	02448e9834	dirlist extraction gated behind -e	2022-02-16 21:25:20 -06:00
epi	368035833c	fixed up implementation/removed todo items	2022-02-16 20:44:07 -06:00
epi	d13bce2261	implemented/tested logic for collecting backups	2022-02-16 17:16:12 -06:00
epi	88d451144c	Merge pull request #495 from epi052/collect-extensions-feature implemented --collect-extensions; numerous bugfixes/code improvements	2022-02-15 21:04:49 -06:00
epi	8d639a17e4	removed read_body param from FeroxResponse::from	2022-02-15 21:03:02 -06:00
epi	7f0dcb6b46	lint	2022-02-15 20:51:01 -06:00
epi	3230f9c276	removed client param from logged_request	2022-02-15 20:42:15 -06:00
epi	b21ea9ce32	removed assay	2022-02-15 20:37:20 -06:00
epi	801413105d	clippy	2022-02-15 16:35:16 -06:00
epi	3030296d1c	added more tests again	2022-02-15 16:34:12 -06:00
epi	88a595fd82	added more tests	2022-02-15 07:14:23 -06:00
epi	9a84c5234f	fixed banner tests	2022-02-14 17:32:40 -06:00
epi	d0d99ebed6	tests passing	2022-02-14 06:29:25 -06:00
epi	7194326cd1	moved dirlist detection to heuristics/fixed initial extract async issue	2022-02-13 13:20:13 -06:00
epi	71885e7e56	implemented --collect-extensions; numerous bugfixes/code improvements	2022-02-12 07:01:25 -06:00
epi	13cfbe152e	bumped version to 2.6.0	2022-02-12 06:57:42 -06:00
godylockz	8b9d640090	Found a bug for redirect links not extracting links properly	2022-02-05 23:34:16 -05:00
godylockz	007bc4a50d	Response URL was not being used in concatenation.	2022-01-30 01:24:53 -05:00
godylockz	71c5b66eb6	Revert, did in wrong place.	2022-01-30 01:07:32 -05:00
godylockz	1498122973	Fix warnings / formatting.	2022-01-29 09:22:23 -05:00
godylockz	c0b4040743	Fix relative pathing	2022-01-29 09:20:44 -05:00
godylockz	3c474920bb	Fix per comments	2022-01-24 23:10:21 -05:00
godylockz	079b8b2176	Update src/parser.rs Co-authored-by: epi <43392618+epi052@users.noreply.github.com>	2022-01-24 22:17:09 -05:00
godylockz	4a678ef65b	Shell completions	2022-01-23 14:30:41 -05:00
godylockz	e9fb9642a8	Add no-state option, filter queries from links, fix headers	2022-01-23 14:27:24 -05:00