diff --git a/Cargo.lock b/Cargo.lock index 09008b8..cf6ed58 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,9 +13,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.52" +version = "1.0.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84450d0b4a8bd1ba4144ce8ce718fbc5d071358b1e5384bace6536b3d1f2d5b3" +checksum = "159bb86af3a200e19a068f4224eae4c8bb2d0fa054c7e5d1cacd5cef95e684cd" [[package]] name = "ascii-canvas" @@ -77,9 +77,9 @@ dependencies = [ [[package]] name = "async-global-executor" -version = "2.0.2" +version = "2.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9586ec52317f36de58453159d48351bc244bc24ced3effc1fce22f3d48664af6" +checksum = "c026b7e44f1316b567ee750fea85103f87fcb80792b860e979f221259796ca0a" dependencies = [ "async-channel", "async-executor", @@ -112,9 +112,9 @@ dependencies = [ [[package]] name = "async-lock" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6a8ea61bf9947a1007c5cada31e647dbc77b103c679858150003ba697ea798b" +checksum = "e97a171d191782fba31bb902b14ad94e24a68145032b7eedf871ab0bc0d077b6" dependencies = [ "event-listener", ] @@ -184,9 +184,9 @@ dependencies = [ [[package]] name = "async-task" -version = "4.0.3" +version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91831deabf0d6d7ec49552e489aed63b7456a7a3c46cff62adad428110b0af0" +checksum = "677d306121baf53310a3fd342d88dc0824f6bbeace68347593658525565abee8" [[package]] name = "async-trait" @@ -218,9 +218,9 @@ dependencies = [ [[package]] name = "autocfg" -version = "1.0.1" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "base64" @@ -317,9 +317,9 @@ checksum = "a2698f953def977c68f935bb0dfa959375ad4638570e969e2f1e9f433cbf1af6" [[package]] name = "cc" -version = "1.0.72" +version = "1.0.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22a9137b95ea06864e018375b72adfb7db6e6f68cfc8df5a04d00288050485ee" +checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" [[package]] name = "cfg-if" @@ -329,9 +329,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "3.0.7" +version = "3.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12e8611f9ae4e068fa3e56931fded356ff745e70987ff76924a6e0ab1c8ef2e3" +checksum = "ced1892c55c910c1219e98d6fc8d71f6bddba7905866ce740066d8bfea859312" dependencies = [ "atty", "bitflags", @@ -346,9 +346,9 @@ dependencies = [ [[package]] name = "clap_complete" -version = "3.0.4" +version = "3.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d044e9db8cd0f68191becdeb5246b7462e4cf0c069b19ae00d1bf3fa9889498d" +checksum = "df6f3613c0a3cddfd78b41b10203eb322cb29b600cbdf808a7d3db95691b8e25" dependencies = [ "clap", ] @@ -385,9 +385,9 @@ checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" [[package]] name = "core-foundation" -version = "0.9.2" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6888e10551bb93e424d8df1d07f1a8b4fceb0001a3a4b048bfc47554946f47b3" +checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" dependencies = [ "core-foundation-sys", "libc", @@ -401,9 +401,9 @@ checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" [[package]] name = "crossbeam-utils" -version = "0.8.6" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfcae03edb34f947e64acdb1c33ec169824e20657e9ecb61cef6c8c74dcb8120" +checksum = "b5e5bed1f1c269533fa816a0a5492b3545209a205ca1a54842be180eb63a16a6" dependencies = [ "cfg-if", "lazy_static", @@ -411,15 +411,15 @@ dependencies = [ [[package]] name = "crossterm" -version = "0.20.0" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0ebde6a9dd5e331cd6c6f48253254d117642c31653baa475e394657c59c1f7d" +checksum = "77b75a27dc8d220f1f8521ea69cd55a34d720a200ebb3a624d9aa19193d3b432" dependencies = [ "bitflags", "crossterm_winapi", "libc", - "mio", - "parking_lot", + "mio 0.7.14", + "parking_lot 0.12.0", "signal-hook", "signal-hook-mio", "winapi", @@ -427,9 +427,9 @@ dependencies = [ [[package]] name = "crossterm_winapi" -version = "0.8.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a6966607622438301997d3dac0d2f6e9a90c68bb6bc1785ea98456ab93c0507" +checksum = "2ae1b35a484aa10e07fe0638d02301c5ad24de82d310ccbd2f3693da5f09bf1c" dependencies = [ "winapi", ] @@ -656,22 +656,22 @@ dependencies = [ [[package]] name = "event-listener" -version = "2.5.1" +version = "2.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7531096570974c3a9dcf9e4b8e1cede1ec26cf5046219fb3b9d897503b9be59" +checksum = "77f3309417938f28bf8228fcff79a4a37103981e3e186d2ccd19c74b38f4eb71" [[package]] name = "fastrand" -version = "1.6.0" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "779d043b6a0b90cc4c0ed7ee380a6504394cee7efd7db050e3774eee387324b2" +checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" dependencies = [ "instant", ] [[package]] name = "feroxbuster" -version = "2.5.0" +version = "2.6.0" dependencies = [ "anyhow", "assert_cmd", @@ -700,7 +700,7 @@ dependencies = [ "serde_regex", "tempfile", "tokio", - "tokio-util", + "tokio-util 0.7.0", "toml", "url", "uuid", @@ -754,9 +754,9 @@ dependencies = [ [[package]] name = "futf" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c9c1ce3fa9336301af935ab852c437817d14cd33690446569392e65170aac3b" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" dependencies = [ "mac", "new_debug_unreachable", @@ -764,9 +764,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.19" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28560757fe2bb34e79f907794bb6b22ae8b0e5c669b638a1132f2592b19035b4" +checksum = "f73fe65f54d1e12b726f517d3e2135ca3125a437b6d998caf1962961f7172d9e" dependencies = [ "futures-channel", "futures-core", @@ -779,9 +779,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.19" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3dda0b6588335f360afc675d0564c17a77a2bda81ca178a4b6081bd86c7f0b" +checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010" dependencies = [ "futures-core", "futures-sink", @@ -789,15 +789,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.19" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0c8ff0461b82559810cdccfde3215c3f373807f5e5232b71479bff7bb2583d7" +checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" [[package]] name = "futures-executor" -version = "0.3.19" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29d6d2ff5bb10fb95c85b8ce46538a2e5f5e7fdc755623a7d4529ab8a4ed9d2a" +checksum = "9420b90cfa29e327d0429f19be13e7ddb68fa1cccb09d65e5706b8c7a749b8a6" dependencies = [ "futures-core", "futures-task", @@ -806,9 +806,9 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.19" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1f9d34af5a1aac6fb380f735fe510746c38067c5bf16c7fd250280503c971b2" +checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b" [[package]] name = "futures-lite" @@ -827,9 +827,9 @@ dependencies = [ [[package]] name = "futures-macro" -version = "0.3.19" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dbd947adfffb0efc70599b3ddcf7b5597bb5fa9e245eb99f62b3a5f7bb8bd3c" +checksum = "33c1e13800337f4d4d7a316bf45a567dbcb6ffe087f16424852d97e97a91f512" dependencies = [ "proc-macro2", "quote", @@ -838,21 +838,21 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.19" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3055baccb68d74ff6480350f8d6eb8fcfa3aa11bdc1a1ae3afdd0514617d508" +checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868" [[package]] name = "futures-task" -version = "0.3.19" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ee7c6485c30167ce4dfb83ac568a849fe53274c831081476ee13e0dce1aad72" +checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a" [[package]] name = "futures-util" -version = "0.3.19" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b5cf40b47a271f77a8b1bec03ca09044d99d2372c0de244e66430761127164" +checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" dependencies = [ "futures-channel", "futures-core", @@ -903,9 +903,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "418d37c8b1d42553c93648be529cb70f920d3baf8ef469b74b9638df426e0b4c" +checksum = "d39cd93900197114fa1fcb7ae84ca742095eed9442088988ae74fa744e930e77" dependencies = [ "cfg-if", "libc", @@ -914,22 +914,21 @@ dependencies = [ [[package]] name = "gloo-timers" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f16c88aa13d2656ef20d1c042086b8767bbe2bdb62526894275a1b062161b2e" +checksum = "4d12a7f4e95cfe710f1d624fb1210b7d961a5fb05c4fd942f4feab06e61f590e" dependencies = [ "futures-channel", "futures-core", "js-sys", "wasm-bindgen", - "web-sys", ] [[package]] name = "h2" -version = "0.3.10" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c9de88456263e249e241fcd211d3954e2c9b0ef7ccfc235a444eb367cae3689" +checksum = "d9f1f717ddc7b2ba36df7e871fd88db79326551d3d6f1fc406fbfd28b582ff8e" dependencies = [ "bytes", "fnv", @@ -940,7 +939,7 @@ dependencies = [ "indexmap", "slab", "tokio", - "tokio-util", + "tokio-util 0.6.9", "tracing", ] @@ -997,9 +996,9 @@ dependencies = [ [[package]] name = "httparse" -version = "1.5.1" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acd94fdbe1d4ff688b67b04eee2e17bd50995534a61539e45adfefb45e5e5503" +checksum = "9100414882e15fb7feccb4897e5f0ff0ff1ca7d1a86a23208ada4d7a18e6c6c4" [[package]] name = "httpdate" @@ -1043,9 +1042,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.16" +version = "0.14.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7ec3e62bdc98a2f0393a5048e4c30ef659440ea6e0e572965103e72bd836f55" +checksum = "043f0e083e9901b6cc658a77d1eb86f4fc650bbb977a4337dd63192826aa85dd" dependencies = [ "bytes", "futures-channel", @@ -1056,7 +1055,7 @@ dependencies = [ "http-body", "httparse", "httpdate", - "itoa 0.4.8", + "itoa 1.0.1", "pin-project-lite", "socket2", "tokio", @@ -1176,9 +1175,9 @@ checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35" [[package]] name = "js-sys" -version = "0.3.55" +version = "0.3.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cc9ffccd38c451a86bf13657df244e9c3f37493cce8e5e21e940963777acc84" +checksum = "a38fc24e30fd564ce974c02bf1d337caddff65be6cc4735a1f7eab22a7440f04" dependencies = [ "wasm-bindgen", ] @@ -1194,9 +1193,9 @@ dependencies = [ [[package]] name = "lalrpop" -version = "0.19.6" +version = "0.19.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15174f1c529af5bf1283c3bc0058266b483a67156f79589fab2a25e23cf8988" +checksum = "852b75a095da6b69da8c5557731c3afd06525d4f655a4fc1c799e2ec8bc4dce4" dependencies = [ "ascii-canvas", "atty", @@ -1217,9 +1216,9 @@ dependencies = [ [[package]] name = "lalrpop-util" -version = "0.19.6" +version = "0.19.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3e58cce361efcc90ba8a0a5f982c741ff86b603495bb15a998412e957dcd278" +checksum = "d6d265705249fe209280676d8f68887859fa42e1d34f342fc05bd47726a5e188" dependencies = [ "regex", ] @@ -1252,9 +1251,9 @@ checksum = "db13adb97ab515a3691f56e4dbab09283d0b86cb45abd991d8634a9d6f501760" [[package]] name = "libc" -version = "0.2.112" +version = "0.2.119" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b03d17f364a3a042d5e5d46b053bbbf82c92c9430c592dd4c064dc6ee997125" +checksum = "1bf2e165bb3457c8e098ea76f3e3bc9db55f87aa90d52d0e6be741470916aaa4" [[package]] name = "libnghttp2-sys" @@ -1280,9 +1279,9 @@ dependencies = [ [[package]] name = "lock_api" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712a4d093c9976e24e7dbca41db895dabcbac38eb5f4045393d17a95bdfb1109" +checksum = "88943dd7ef4a2e5a4bfa2753aaab3013e34ce2533d1996fb18ef591e315e2b3b" dependencies = [ "scopeguard", ] @@ -1357,6 +1356,19 @@ dependencies = [ "winapi", ] +[[package]] +name = "mio" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba272f85fa0b41fc91872be579b3bbe0f56b792aa361a380eb669469f68dafb2" +dependencies = [ + "libc", + "log", + "miow", + "ntapi", + "winapi", +] + [[package]] name = "miow" version = "0.3.7" @@ -1417,9 +1429,9 @@ checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" [[package]] name = "ntapi" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f6bb902e437b6d86e03cce10a7e2af662292c5dfef23b65899ea3ac9354ad44" +checksum = "c28774a7fd2fbb4f0babd8237ce554b73af68021b5f695a3cebd6c59bac0980f" dependencies = [ "winapi", ] @@ -1451,9 +1463,9 @@ checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a" [[package]] name = "once_cell" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5" +checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9" [[package]] name = "openssl" @@ -1521,7 +1533,17 @@ checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" dependencies = [ "instant", "lock_api", - "parking_lot_core", + "parking_lot_core 0.8.5", +] + +[[package]] +name = "parking_lot" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f5ec2493a61ac0506c0f4199f99070cbe83857b0337006a30f3e6719b8ef58" +dependencies = [ + "lock_api", + "parking_lot_core 0.9.1", ] [[package]] @@ -1538,6 +1560,19 @@ dependencies = [ "winapi", ] +[[package]] +name = "parking_lot_core" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28141e0cc4143da2443301914478dc976a61ffdb3f043058310c70df2fed8954" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-sys", +] + [[package]] name = "percent-encoding" version = "2.1.0" @@ -1561,7 +1596,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" dependencies = [ "phf_macros", - "phf_shared", + "phf_shared 0.8.0", "proc-macro-hack", ] @@ -1572,7 +1607,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" dependencies = [ "phf_generator", - "phf_shared", + "phf_shared 0.8.0", ] [[package]] @@ -1581,7 +1616,7 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" dependencies = [ - "phf_shared", + "phf_shared 0.8.0", "rand", ] @@ -1592,7 +1627,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c" dependencies = [ "phf_generator", - "phf_shared", + "phf_shared 0.8.0", "proc-macro-hack", "proc-macro2", "quote", @@ -1608,6 +1643,15 @@ dependencies = [ "siphasher", ] +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher", +] + [[package]] name = "pico-args" version = "0.4.2" @@ -1724,9 +1768,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.14" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47aa80447ce4daf1717500037052af176af5d38cc3e571d9ec1c7353fc10c87d" +checksum = "864d3e96a899863136fc6e99f3d7cae289dafe43bf2c5ac19b70df7210c0a145" dependencies = [ "proc-macro2", ] @@ -1784,9 +1828,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.2.10" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff" +checksum = "8380fe0152551244f0747b1bf41737e0f8a74f97a14ccefd1148187271634f3c" dependencies = [ "bitflags", ] @@ -1797,7 +1841,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64" dependencies = [ - "getrandom 0.2.4", + "getrandom 0.2.5", "redox_syscall", ] @@ -1872,9 +1916,9 @@ dependencies = [ [[package]] name = "rlimit" -version = "0.6.2" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc0bf25554376fd362f54332b8410a625c71f15445bca32ffdfdf4ec9ac91726" +checksum = "347703a5ae47adf1e693144157be231dde38c72bd485925cae7407ad3e52480b" dependencies = [ "libc", ] @@ -1934,9 +1978,9 @@ dependencies = [ [[package]] name = "security-framework" -version = "2.5.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d09d3c15d814eda1d6a836f2f2b56a6abc1446c8a34351cb3180d3db92ffe4ce" +checksum = "2dc14f172faf8a0194a3aded622712b0de276821addc574fa54fc0a1167e10dc" dependencies = [ "bitflags", "core-foundation", @@ -1947,9 +1991,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.5.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e90dd10c41c6bfc633da6e0c659bd25d31e0791e5974ac42970267d59eba87f7" +checksum = "0160a13a177a45bfb43ce71c01580998474f556ad854dcbca936dd2841a5c556" dependencies = [ "core-foundation-sys", "libc", @@ -1977,24 +2021,24 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.4" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "568a8e6258aa33c13358f81fd834adb854c6f7c9468520910a9b1e8fac068012" +checksum = "a4a3381e03edd24287172047536f20cabde766e2cd3e65e6b00fb3af51c4f38d" [[package]] name = "serde" -version = "1.0.133" +version = "1.0.136" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97565067517b60e2d1ea8b268e59ce036de907ac523ad83a0475da04e818989a" +checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.133" +version = "1.0.136" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed201699328568d8d08208fdd080e3ff594e6c422e438b6705905da01005d537" +checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9" dependencies = [ "proc-macro2", "quote", @@ -2003,9 +2047,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.75" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c059c05b48c5c0067d4b4b2b4f0732dd65feb52daf7e0ea09cd87e7dadc1af79" +checksum = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95" dependencies = [ "itoa 1.0.1", "ryu", @@ -2061,7 +2105,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "29fd5867f1c4f2c5be079aee7a2adf1152ebb04a4bc4d341f504b7dece607ed4" dependencies = [ "libc", - "mio", + "mio 0.7.14", "signal-hook", ] @@ -2082,9 +2126,9 @@ checksum = "2e24979f63a11545f5f2c60141afe249d4f19f84581ea2138065e400941d83d3" [[package]] name = "siphasher" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba1eead9e94aa5a2e02de9e7839f96a007f686ae7a1d57c7797774810d24908a" +checksum = "a86232ab60fa71287d7f2ddae4a7073f6b7aac33631c3015abb556f08c6d0a3e" [[package]] name = "slab" @@ -2111,9 +2155,9 @@ checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" [[package]] name = "socket2" -version = "0.4.2" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dc90fe6c7be1a323296982db1836d1ea9e47b6839496dde9a541bc496df3516" +checksum = "66d72b759436ae32898a2af0a14218dbf55efde3feeb170eb623637db85ee1e0" dependencies = [ "libc", "winapi", @@ -2127,14 +2171,14 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "string_cache" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "923f0f39b6267d37d23ce71ae7235602134b250ace715dd2c90421998ddac0c6" +checksum = "33994d0838dc2d152d17a62adf608a869b5e846b65b389af7f3dbc1de45c5b26" dependencies = [ "lazy_static", "new_debug_unreachable", - "parking_lot", - "phf_shared", + "parking_lot 0.11.2", + "phf_shared 0.10.0", "precomputed-hash", "serde", ] @@ -2146,7 +2190,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f24c8e5e19d22a726626f1a5e16fe15b132dcf21d10177fa5a45ce7962996b97" dependencies = [ "phf_generator", - "phf_shared", + "phf_shared 0.8.0", "proc-macro2", "quote", ] @@ -2159,9 +2203,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "syn" -version = "1.0.85" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a684ac3dcd8913827e18cd09a68384ee66c1de24157e3c556c9ab16d85695fb7" +checksum = "8a65b3f4ffa0092e9887669db0eae07941f023991ab58ea44da8fe8e2d511c6b" dependencies = [ "proc-macro2", "quote", @@ -2206,9 +2250,9 @@ dependencies = [ [[package]] name = "termcolor" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4" +checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" dependencies = [ "winapi-util", ] @@ -2231,9 +2275,9 @@ checksum = "507e9898683b6c43a9aa55b64259b721b52ba226e0f3779137e50ad114a4c90b" [[package]] name = "textwrap" -version = "0.14.2" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0066c8d12af8b5acd21e00547c3797fde4e8677254a7ee429176ccebbe93dd80" +checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" dependencies = [ "terminal_size", ] @@ -2290,19 +2334,20 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "tokio" -version = "1.15.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbbf1c778ec206785635ce8ad57fe52b3009ae9e0c9f574a728f3049d3e55838" +checksum = "2af73ac49756f3f7c01172e34a23e5d0216f6c32333757c2c61feb2bbff5a5ee" dependencies = [ "bytes", "libc", "memchr", - "mio", + "mio 0.8.0", "num_cpus", "once_cell", - "parking_lot", + "parking_lot 0.12.0", "pin-project-lite", "signal-hook-registry", + "socket2", "tokio-macros", "winapi", ] @@ -2365,6 +2410,20 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-util" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64910e1b9c1901aaf5375561e35b9c057d95ff41a44ede043a03e09279eabaf1" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "log", + "pin-project-lite", + "tokio", +] + [[package]] name = "toml" version = "0.5.8" @@ -2382,9 +2441,9 @@ checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6" [[package]] name = "tracing" -version = "0.1.29" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "375a639232caf30edfc78e8d89b2d4c375515393e7af7e16f01cd96917fb2105" +checksum = "f6c650a8ef0cd2dd93736f033d21cbd1224c5a967aa0c258d00fcf7dafef9b9f" dependencies = [ "cfg-if", "log", @@ -2395,9 +2454,9 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.18" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f480b8f81512e825f337ad51e94c1eb5d3bbdf2b363dcd01e2b19a9ffe3f8e" +checksum = "8276d9a4a3a558d7b7ad5303ad50b53d58264641b82914b7ada36bd762e7a716" dependencies = [ "proc-macro2", "quote", @@ -2406,9 +2465,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.21" +version = "0.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f4ed65637b8390770814083d20756f87bfa2c21bf2f110babdc5438351746e4" +checksum = "03cfcb51380632a72d3111cb8d3447a8d908e577d31beeac006f836383d29a23" dependencies = [ "lazy_static", ] @@ -2481,7 +2540,7 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" dependencies = [ - "getrandom 0.2.4", + "getrandom 0.2.5", ] [[package]] @@ -2545,9 +2604,9 @@ checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" [[package]] name = "wasm-bindgen" -version = "0.2.78" +version = "0.2.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "632f73e236b219150ea279196e54e610f5dbafa5d61786303d4da54f84e47fce" +checksum = "25f1af7423d8588a3d840681122e72e6a24ddbcb3f0ec385cac0d12d24256c06" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -2555,9 +2614,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.78" +version = "0.2.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a317bf8f9fba2476b4b2c85ef4c4af8ff39c3c7f0cdfeed4f82c34a880aa837b" +checksum = "8b21c0df030f5a177f3cba22e9bc4322695ec43e7257d865302900290bcdedca" dependencies = [ "bumpalo", "lazy_static", @@ -2570,9 +2629,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.28" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e8d7523cb1f2a4c96c1317ca690031b714a51cc14e05f712446691f413f5d39" +checksum = "2eb6ec270a31b1d3c7e266b999739109abce8b6c87e4b31fcfcd788b65267395" dependencies = [ "cfg-if", "js-sys", @@ -2582,9 +2641,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.78" +version = "0.2.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d56146e7c495528bf6587663bea13a8eb588d39b36b679d83972e1a2dbbdacf9" +checksum = "2f4203d69e40a52ee523b2529a773d5ffc1dc0071801c87b3d270b471b80ed01" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2592,9 +2651,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.78" +version = "0.2.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7803e0eea25835f8abdc585cd3021b3deb11543c6fe226dcd30b228857c5c5ab" +checksum = "bfa8a30d46208db204854cadbb5d4baf5fcf8071ba5bf48190c3e59937962ebc" dependencies = [ "proc-macro2", "quote", @@ -2605,15 +2664,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.78" +version = "0.2.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0237232789cf037d5480773fe568aac745bfe2afbc11a863e97901780a6b47cc" +checksum = "3d958d035c4438e28c70e4321a2911302f10135ce78a9c7834c0cab4123d06a2" [[package]] name = "web-sys" -version = "0.3.55" +version = "0.3.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38eb105f1c59d9eaa6b5cdc92b859d85b926e82cb2e0945cd0c9259faa6fe9fb" +checksum = "c060b319f29dd25724f09a2ba1418f142f539b2be99fbf4d2d5a8f7330afb8eb" dependencies = [ "js-sys", "wasm-bindgen", @@ -2659,6 +2718,49 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-sys" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3df6e476185f92a12c072be4a189a0210dcdcf512a1891d6dff9edb874deadc6" +dependencies = [ + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_msvc" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8e92753b1c443191654ec532f14c199742964a061be25d77d7a96f09db20bf5" + +[[package]] +name = "windows_i686_gnu" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a711c68811799e017b6038e0922cb27a5e2f43a2ddb609fe0b6f3eeda9de615" + +[[package]] +name = "windows_i686_msvc" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "146c11bb1a02615db74680b32a68e2d61f553cc24c4eb5b4ca10311740e44172" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c912b12f7454c6620635bbff3450962753834be2a594819bd5e945af18ec64bc" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "504a2476202769977a040c6364301a3f65d0cc9e3fb08600b2bda150a0488316" + [[package]] name = "winreg" version = "0.7.0" diff --git a/Cargo.toml b/Cargo.toml index 7bc9fb5..9a81a8e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "feroxbuster" -version = "2.5.0" +version = "2.6.0" authors = ["Ben 'epi' Risher (@epi052)"] license = "MIT" edition = "2021" @@ -16,45 +16,46 @@ build = "build.rs" maintenance = { status = "actively-developed" } [build-dependencies] -clap = {version = "3.0", features = ["cargo"]} -clap_complete = "3.0" -regex = "1" -lazy_static = "1.4" -dirs = "4.0" +clap = { version = "3.1.5", features = ["wrap_help", "cargo"] } +clap_complete = "3.1.1" +regex = "1.5.4" +lazy_static = "1.4.0" +dirs = "4.0.0" [dependencies] -scraper = "0.12" -futures = { version = "0.3"} -tokio = { version = "1.15", features = ["full"] } -tokio-util = {version = "0.6", features = ["codec"]} -log = "0.4" -env_logger = "0.9" -reqwest = { version = "0.11", features = ["socks"] } -url = { version = "2.2", features = ["serde"]} # uses feature unification to add 'serde' to reqwest::Url -serde_regex = "1.1" -clap = {version = "3.0", features = ["wrap_help", "cargo"]} -lazy_static = "1.4" -toml = "0.5" -serde = { version = "1.0", features = ["derive", "rc"] } -serde_json = "1.0" -uuid = { version = "0.8", features = ["v4"] } +scraper = "0.12.0" +futures = "0.3.21" +tokio = { version = "1.17.0", features = ["full"] } +tokio-util = { version = "0.7.0", features = ["codec"] } +log = "0.4.14" +env_logger = "0.9.0" +reqwest = { version = "0.11.9", features = ["socks"] } +# uses feature unification to add 'serde' to reqwest::Url +url = { version = "2.2.2", features = ["serde"] } +serde_regex = "1.1.0" +clap = { version = "3.1.5", features = ["wrap_help", "cargo"] } +lazy_static = "1.4.0" +toml = "0.5.8" +serde = { version = "1.0.136", features = ["derive", "rc"] } +serde_json = "1.0.79" +uuid = { version = "0.8.2", features = ["v4"] } indicatif = "0.15" -console = "0.15" -openssl = { version = "0.10", features = ["vendored"] } -dirs = "4.0" -regex = "1" -crossterm = "0.20" -rlimit = "0.6" -ctrlc = "3.2" +console = "0.15.0" +openssl = { version = "0.10.38", features = ["vendored"] } +dirs = "4.0.0" +regex = "1.5.4" +crossterm = "0.23.0" +rlimit = "0.7.0" +ctrlc = "3.2.1" fuzzyhash = "0.2.1" -anyhow = "1.0" -leaky-bucket = "0.10.0" +anyhow = "1.0.55" +leaky-bucket = "0.10.0" # todo: upgrade, will take a little work/thought since api changed [dev-dependencies] -tempfile = "3.3" -httpmock = "0.6" -assert_cmd = "2.0" -predicates = "2.1" +tempfile = "3.3.0" +httpmock = "0.6.6" +assert_cmd = "2.0.4" +predicates = "2.1.1" [profile.release] lto = true diff --git a/Makefile.toml b/Makefile.toml new file mode 100644 index 0000000..1afc7d4 --- /dev/null +++ b/Makefile.toml @@ -0,0 +1,18 @@ +# composite tasks +[tasks.upgrade] +dependencies = ["upgrade-deps", "update"] + +# cleaning +[tasks.clean-state] +script = """ +rm ferox-*.state +""" + +# dependency management +[tasks.upgrade-deps] +command = "cargo" +args = ["upgrade", "--exclude", "indicatif", "leaky-bucket"] + +[tasks.update] +command = "cargo" +args = ["update"] diff --git a/ferox-config.toml.example b/ferox-config.toml.example index 275f3c8..054ac39 100644 --- a/ferox-config.toml.example +++ b/ferox-config.toml.example @@ -30,7 +30,11 @@ # random_agent = false # redirects = true # insecure = true +# collect_words = true +# collect_backups = true +# collect_extensions = true # extensions = ["php", "html"] +# dont_collect = ["png", "gif", "jpg", "jpeg"] # methods = ["GET", "POST"] # data = [11, 12, 13, 14, 15] # url_denylist = ["http://dont-scan.me", "https://also-not.me"] diff --git a/shell_completions/_feroxbuster b/shell_completions/_feroxbuster index f7fb7b9..f863ac4 100644 --- a/shell_completions/_feroxbuster +++ b/shell_completions/_feroxbuster @@ -24,8 +24,8 @@ _feroxbuster() { '--replay-proxy=[Send only unfiltered requests through a Replay Proxy, instead of all requests]:REPLAY_PROXY:_urls' \ '*-R+[Status Codes to send through a Replay Proxy when found (default: --status-codes value)]:REPLAY_CODE: ' \ '*--replay-codes=[Status Codes to send through a Replay Proxy when found (default: --status-codes value)]:REPLAY_CODE: ' \ -'-a+[Sets the User-Agent (default: feroxbuster/2.5.0)]:USER_AGENT: ' \ -'--user-agent=[Sets the User-Agent (default: feroxbuster/2.5.0)]:USER_AGENT: ' \ +'-a+[Sets the User-Agent (default: feroxbuster/2.6.0)]:USER_AGENT: ' \ +'--user-agent=[Sets the User-Agent (default: feroxbuster/2.6.0)]:USER_AGENT: ' \ '*-x+[File extension(s) to search for (ex: -x php -x pdf js)]:FILE_EXTENSION: ' \ '*--extensions=[File extension(s) to search for (ex: -x php -x pdf js)]:FILE_EXTENSION: ' \ '*-m+[Which HTTP request method(s) should be sent (default: GET)]:HTTP_METHODS: ' \ @@ -64,6 +64,8 @@ _feroxbuster() { '--time-limit=[Limit total run time of all scans (ex: --time-limit 10m)]:TIME_SPEC: ' \ '-w+[Path to the wordlist]:FILE:_files' \ '--wordlist=[Path to the wordlist]:FILE:_files' \ +'*-I+[File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)]:FILE_EXTENSION: ' \ +'*--dont-collect=[File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)]:FILE_EXTENSION: ' \ '-o+[Output file to write results to (use w/ --json for JSON entries)]:FILE:_files' \ '--output=[Output file to write results to (use w/ --json for JSON entries)]:FILE:_files' \ '--debug-log=[Output file to write log entries (use w/ --json for JSON entries)]:FILE:_files' \ @@ -72,6 +74,10 @@ _feroxbuster() { '-V[Print version information]' \ '--version[Print version information]' \ '(-u --url)--stdin[Read url(s) from STDIN]' \ +'(-p --proxy -k --insecure --burp-replay)--burp[Set --proxy to http://127.0.0.1:8080 and set --insecure to true]' \ +'(-P --replay-proxy -k --insecure)--burp-replay[Set --replay-proxy to http://127.0.0.1:8080 and set --insecure to true]' \ +'--smart[Set --extract-links, --auto-tune, --collect-words, and --collect-backups to true]' \ +'--thorough[Use the same settings as --smart and set --collect-extensions to true]' \ '-A[Use a random User-Agent]' \ '--random-agent[Use a random User-Agent]' \ '-f[Append / to each request'\''s URL]' \ @@ -88,12 +94,19 @@ _feroxbuster() { '--auto-bail[Automatically stop scanning when an excessive amount of errors are encountered]' \ '-D[Don'\''t auto-filter wildcard responses]' \ '--dont-filter[Don'\''t auto-filter wildcard responses]' \ +'-E[Automatically discover extensions and add them to --extensions (unless they'\''re in --dont-collect)]' \ +'--collect-extensions[Automatically discover extensions and add them to --extensions (unless they'\''re in --dont-collect)]' \ +'-B[Automatically request likely backup extensions for "found" urls]' \ +'--collect-backups[Automatically request likely backup extensions for "found" urls]' \ +'-g[Automatically discover important words from within responses and add them to the wordlist]' \ +'--collect-words[Automatically discover important words from within responses and add them to the wordlist]' \ '(--silent)*-v[Increase verbosity level (use -vv or more for greater effect. \[CAUTION\] 4 -v'\''s is probably too much)]' \ '(--silent)*--verbosity[Increase verbosity level (use -vv or more for greater effect. \[CAUTION\] 4 -v'\''s is probably too much)]' \ '(-q --quiet)--silent[Only print URLs + turn off logging (good for piping a list of urls to other commands)]' \ '-q[Hide progress bars and banner (good for tmux windows w/ notifications)]' \ '--quiet[Hide progress bars and banner (good for tmux windows w/ notifications)]' \ '--json[Emit JSON logs to --output and --debug-log instead of normal text]' \ +'--no-state[Disable state output file (*.state)]' \ && ret=0 } diff --git a/shell_completions/_feroxbuster.ps1 b/shell_completions/_feroxbuster.ps1 index a38c7d8..5eaeadb 100644 --- a/shell_completions/_feroxbuster.ps1 +++ b/shell_completions/_feroxbuster.ps1 @@ -12,7 +12,8 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock { $element = $commandElements[$i] if ($element -isnot [StringConstantExpressionAst] -or $element.StringConstantType -ne [StringConstantType]::BareWord -or - $element.Value.StartsWith('-')) { + $element.Value.StartsWith('-') -or + $element.Value -eq $wordToComplete) { break } $element.Value @@ -29,8 +30,8 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock { [CompletionResult]::new('--replay-proxy', 'replay-proxy', [CompletionResultType]::ParameterName, 'Send only unfiltered requests through a Replay Proxy, instead of all requests') [CompletionResult]::new('-R', 'R', [CompletionResultType]::ParameterName, 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)') [CompletionResult]::new('--replay-codes', 'replay-codes', [CompletionResultType]::ParameterName, 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)') - [CompletionResult]::new('-a', 'a', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.5.0)') - [CompletionResult]::new('--user-agent', 'user-agent', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.5.0)') + [CompletionResult]::new('-a', 'a', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.6.0)') + [CompletionResult]::new('--user-agent', 'user-agent', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.6.0)') [CompletionResult]::new('-x', 'x', [CompletionResultType]::ParameterName, 'File extension(s) to search for (ex: -x php -x pdf js)') [CompletionResult]::new('--extensions', 'extensions', [CompletionResultType]::ParameterName, 'File extension(s) to search for (ex: -x php -x pdf js)') [CompletionResult]::new('-m', 'm', [CompletionResultType]::ParameterName, 'Which HTTP request method(s) should be sent (default: GET)') @@ -69,6 +70,8 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock { [CompletionResult]::new('--time-limit', 'time-limit', [CompletionResultType]::ParameterName, 'Limit total run time of all scans (ex: --time-limit 10m)') [CompletionResult]::new('-w', 'w', [CompletionResultType]::ParameterName, 'Path to the wordlist') [CompletionResult]::new('--wordlist', 'wordlist', [CompletionResultType]::ParameterName, 'Path to the wordlist') + [CompletionResult]::new('-I', 'I', [CompletionResultType]::ParameterName, 'File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)') + [CompletionResult]::new('--dont-collect', 'dont-collect', [CompletionResultType]::ParameterName, 'File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)') [CompletionResult]::new('-o', 'o', [CompletionResultType]::ParameterName, 'Output file to write results to (use w/ --json for JSON entries)') [CompletionResult]::new('--output', 'output', [CompletionResultType]::ParameterName, 'Output file to write results to (use w/ --json for JSON entries)') [CompletionResult]::new('--debug-log', 'debug-log', [CompletionResultType]::ParameterName, 'Output file to write log entries (use w/ --json for JSON entries)') @@ -77,6 +80,10 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock { [CompletionResult]::new('-V', 'V', [CompletionResultType]::ParameterName, 'Print version information') [CompletionResult]::new('--version', 'version', [CompletionResultType]::ParameterName, 'Print version information') [CompletionResult]::new('--stdin', 'stdin', [CompletionResultType]::ParameterName, 'Read url(s) from STDIN') + [CompletionResult]::new('--burp', 'burp', [CompletionResultType]::ParameterName, 'Set --proxy to http://127.0.0.1:8080 and set --insecure to true') + [CompletionResult]::new('--burp-replay', 'burp-replay', [CompletionResultType]::ParameterName, 'Set --replay-proxy to http://127.0.0.1:8080 and set --insecure to true') + [CompletionResult]::new('--smart', 'smart', [CompletionResultType]::ParameterName, 'Set --extract-links, --auto-tune, --collect-words, and --collect-backups to true') + [CompletionResult]::new('--thorough', 'thorough', [CompletionResultType]::ParameterName, 'Use the same settings as --smart and set --collect-extensions to true') [CompletionResult]::new('-A', 'A', [CompletionResultType]::ParameterName, 'Use a random User-Agent') [CompletionResult]::new('--random-agent', 'random-agent', [CompletionResultType]::ParameterName, 'Use a random User-Agent') [CompletionResult]::new('-f', 'f', [CompletionResultType]::ParameterName, 'Append / to each request''s URL') @@ -93,12 +100,19 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock { [CompletionResult]::new('--auto-bail', 'auto-bail', [CompletionResultType]::ParameterName, 'Automatically stop scanning when an excessive amount of errors are encountered') [CompletionResult]::new('-D', 'D', [CompletionResultType]::ParameterName, 'Don''t auto-filter wildcard responses') [CompletionResult]::new('--dont-filter', 'dont-filter', [CompletionResultType]::ParameterName, 'Don''t auto-filter wildcard responses') + [CompletionResult]::new('-E', 'E', [CompletionResultType]::ParameterName, 'Automatically discover extensions and add them to --extensions (unless they''re in --dont-collect)') + [CompletionResult]::new('--collect-extensions', 'collect-extensions', [CompletionResultType]::ParameterName, 'Automatically discover extensions and add them to --extensions (unless they''re in --dont-collect)') + [CompletionResult]::new('-B', 'B', [CompletionResultType]::ParameterName, 'Automatically request likely backup extensions for "found" urls') + [CompletionResult]::new('--collect-backups', 'collect-backups', [CompletionResultType]::ParameterName, 'Automatically request likely backup extensions for "found" urls') + [CompletionResult]::new('-g', 'g', [CompletionResultType]::ParameterName, 'Automatically discover important words from within responses and add them to the wordlist') + [CompletionResult]::new('--collect-words', 'collect-words', [CompletionResultType]::ParameterName, 'Automatically discover important words from within responses and add them to the wordlist') [CompletionResult]::new('-v', 'v', [CompletionResultType]::ParameterName, 'Increase verbosity level (use -vv or more for greater effect. [CAUTION] 4 -v''s is probably too much)') [CompletionResult]::new('--verbosity', 'verbosity', [CompletionResultType]::ParameterName, 'Increase verbosity level (use -vv or more for greater effect. [CAUTION] 4 -v''s is probably too much)') [CompletionResult]::new('--silent', 'silent', [CompletionResultType]::ParameterName, 'Only print URLs + turn off logging (good for piping a list of urls to other commands)') [CompletionResult]::new('-q', 'q', [CompletionResultType]::ParameterName, 'Hide progress bars and banner (good for tmux windows w/ notifications)') [CompletionResult]::new('--quiet', 'quiet', [CompletionResultType]::ParameterName, 'Hide progress bars and banner (good for tmux windows w/ notifications)') [CompletionResult]::new('--json', 'json', [CompletionResultType]::ParameterName, 'Emit JSON logs to --output and --debug-log instead of normal text') + [CompletionResult]::new('--no-state', 'no-state', [CompletionResultType]::ParameterName, 'Disable state output file (*.state)') break } }) diff --git a/shell_completions/feroxbuster.bash b/shell_completions/feroxbuster.bash index fdb9a6d..a3a766a 100644 --- a/shell_completions/feroxbuster.bash +++ b/shell_completions/feroxbuster.bash @@ -19,7 +19,7 @@ _feroxbuster() { case "${cmd}" in feroxbuster) - opts="-h -V -u -p -P -R -a -A -x -m -H -b -Q -f -S -X -W -N -C -s -T -r -k -t -n -d -e -L -w -D -v -q -o --help --version --url --stdin --resume-from --proxy --replay-proxy --replay-codes --user-agent --random-agent --extensions --methods --data --headers --cookies --query --add-slash --dont-scan --filter-size --filter-regex --filter-words --filter-lines --filter-status --filter-similar-to --status-codes --timeout --redirects --insecure --threads --no-recursion --depth --extract-links --scan-limit --parallel --rate-limit --time-limit --wordlist --auto-tune --auto-bail --dont-filter --verbosity --silent --quiet --json --output --debug-log" + opts="-h -V -u -p -P -R -a -A -x -m -H -b -Q -f -S -X -W -N -C -s -T -r -k -t -n -d -e -L -w -D -E -B -g -I -v -q -o --help --version --url --stdin --resume-from --burp --burp-replay --smart --thorough --proxy --replay-proxy --replay-codes --user-agent --random-agent --extensions --methods --data --headers --cookies --query --add-slash --dont-scan --filter-size --filter-regex --filter-words --filter-lines --filter-status --filter-similar-to --status-codes --timeout --redirects --insecure --threads --no-recursion --depth --extract-links --scan-limit --parallel --rate-limit --time-limit --wordlist --auto-tune --auto-bail --dont-filter --collect-extensions --collect-backups --collect-words --dont-collect --verbosity --silent --quiet --json --output --debug-log --no-state" if [[ ${cur} == -* || ${COMP_CWORD} -eq 1 ]] ; then COMPREPLY=( $(compgen -W "${opts}" -- "${cur}") ) return 0 @@ -221,6 +221,14 @@ _feroxbuster() { COMPREPLY=($(compgen -f "${cur}")) return 0 ;; + --dont-collect) + COMPREPLY=($(compgen -f "${cur}")) + return 0 + ;; + -I) + COMPREPLY=($(compgen -f "${cur}")) + return 0 + ;; --output) COMPREPLY=($(compgen -f "${cur}")) return 0 diff --git a/shell_completions/feroxbuster.elv b/shell_completions/feroxbuster.elv index 7234c7b..4050129 100644 --- a/shell_completions/feroxbuster.elv +++ b/shell_completions/feroxbuster.elv @@ -27,8 +27,8 @@ set edit:completion:arg-completer[feroxbuster] = {|@words| cand --replay-proxy 'Send only unfiltered requests through a Replay Proxy, instead of all requests' cand -R 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)' cand --replay-codes 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)' - cand -a 'Sets the User-Agent (default: feroxbuster/2.5.0)' - cand --user-agent 'Sets the User-Agent (default: feroxbuster/2.5.0)' + cand -a 'Sets the User-Agent (default: feroxbuster/2.6.0)' + cand --user-agent 'Sets the User-Agent (default: feroxbuster/2.6.0)' cand -x 'File extension(s) to search for (ex: -x php -x pdf js)' cand --extensions 'File extension(s) to search for (ex: -x php -x pdf js)' cand -m 'Which HTTP request method(s) should be sent (default: GET)' @@ -67,6 +67,8 @@ set edit:completion:arg-completer[feroxbuster] = {|@words| cand --time-limit 'Limit total run time of all scans (ex: --time-limit 10m)' cand -w 'Path to the wordlist' cand --wordlist 'Path to the wordlist' + cand -I 'File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)' + cand --dont-collect 'File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)' cand -o 'Output file to write results to (use w/ --json for JSON entries)' cand --output 'Output file to write results to (use w/ --json for JSON entries)' cand --debug-log 'Output file to write log entries (use w/ --json for JSON entries)' @@ -75,6 +77,10 @@ set edit:completion:arg-completer[feroxbuster] = {|@words| cand -V 'Print version information' cand --version 'Print version information' cand --stdin 'Read url(s) from STDIN' + cand --burp 'Set --proxy to http://127.0.0.1:8080 and set --insecure to true' + cand --burp-replay 'Set --replay-proxy to http://127.0.0.1:8080 and set --insecure to true' + cand --smart 'Set --extract-links, --auto-tune, --collect-words, and --collect-backups to true' + cand --thorough 'Use the same settings as --smart and set --collect-extensions to true' cand -A 'Use a random User-Agent' cand --random-agent 'Use a random User-Agent' cand -f 'Append / to each request''s URL' @@ -91,12 +97,19 @@ set edit:completion:arg-completer[feroxbuster] = {|@words| cand --auto-bail 'Automatically stop scanning when an excessive amount of errors are encountered' cand -D 'Don''t auto-filter wildcard responses' cand --dont-filter 'Don''t auto-filter wildcard responses' + cand -E 'Automatically discover extensions and add them to --extensions (unless they''re in --dont-collect)' + cand --collect-extensions 'Automatically discover extensions and add them to --extensions (unless they''re in --dont-collect)' + cand -B 'Automatically request likely backup extensions for "found" urls' + cand --collect-backups 'Automatically request likely backup extensions for "found" urls' + cand -g 'Automatically discover important words from within responses and add them to the wordlist' + cand --collect-words 'Automatically discover important words from within responses and add them to the wordlist' cand -v 'Increase verbosity level (use -vv or more for greater effect. [CAUTION] 4 -v''s is probably too much)' cand --verbosity 'Increase verbosity level (use -vv or more for greater effect. [CAUTION] 4 -v''s is probably too much)' cand --silent 'Only print URLs + turn off logging (good for piping a list of urls to other commands)' cand -q 'Hide progress bars and banner (good for tmux windows w/ notifications)' cand --quiet 'Hide progress bars and banner (good for tmux windows w/ notifications)' cand --json 'Emit JSON logs to --output and --debug-log instead of normal text' + cand --no-state 'Disable state output file (*.state)' } ] $completions[$command] diff --git a/src/banner/container.rs b/src/banner/container.rs index 04e54e7..fb613e4 100644 --- a/src/banner/container.rs +++ b/src/banner/container.rs @@ -3,7 +3,7 @@ use crate::{ config::Configuration, event_handlers::Handles, utils::{logged_request, status_colorizer}, - DEFAULT_METHOD, VERSION, + DEFAULT_IGNORED_EXTENSIONS, DEFAULT_METHOD, VERSION, }; use anyhow::{bail, Result}; use console::{style, Emoji}; @@ -151,6 +151,18 @@ pub struct Banner { /// whether or not there is a known new version pub(super) update_status: UpdateStatus, + + /// represents Configuration.collect_extensions + collect_extensions: BannerEntry, + + /// represents Configuration.dont_collect + dont_collect: BannerEntry, + + /// represents Configuration.collect_backups + collect_backups: BannerEntry, + + /// represents Configuration.collect_words + collect_words: BannerEntry, } /// implementation of Banner @@ -314,6 +326,21 @@ impl Banner { &format!("[{}]", config.methods.join(", ")), ); + let dont_collect = if config.dont_collect == DEFAULT_IGNORED_EXTENSIONS { + // default has 30+ extensions, just trim it up + BannerEntry::new( + "💸", + "Ignored Extensions", + "[Images, Movies, Audio, etc...]", + ) + } else { + BannerEntry::new( + "💸", + "Ignored Extensions", + &format!("[{}]", config.dont_collect.join(", ")), + ) + }; + let offset = std::cmp::min(config.data.len(), 30); let data = String::from_utf8(config.data[..offset].to_vec()) .unwrap_or_else(|_err| { @@ -322,8 +349,8 @@ impl Banner { &config.data[..std::cmp::min(config.data.len(), 13)] ) }) - .replace("\n", " ") - .replace("\r", ""); + .replace('\n', " ") + .replace('\r', ""); let data = BannerEntry::new("💣", "HTTP Body", &data); let insecure = BannerEntry::new("🔓", "Insecure", &config.insecure.to_string()); let redirects = BannerEntry::new("📍", "Follow Redirects", &config.redirects.to_string()); @@ -334,6 +361,16 @@ impl Banner { let parallel = BannerEntry::new("🛤", "Parallel Scans", &config.parallel.to_string()); let rate_limit = BannerEntry::new("🚧", "Requests per Second", &config.rate_limit.to_string()); + let collect_extensions = BannerEntry::new( + "💰", + "Collect Extensions", + &config.collect_extensions.to_string(), + ); + let collect_backups = + BannerEntry::new("🏦", "Collect Backups", &config.collect_backups.to_string()); + + let collect_words = + BannerEntry::new("🤑", "Collect Words", &config.collect_words.to_string()); Self { targets, @@ -374,6 +411,10 @@ impl Banner { scan_limit, time_limit, url_denylist, + collect_extensions, + collect_backups, + collect_words, + dont_collect, config: cfg, version: VERSION.to_string(), update_status: UpdateStatus::Unknown, @@ -550,6 +591,20 @@ by Ben "epi" Risher {} ver: {}"#, writeln!(&mut writer, "{}", self.extensions)?; } + if config.collect_extensions { + // dont-collect is active only when collect-extensions is used + writeln!(&mut writer, "{}", self.collect_extensions)?; + writeln!(&mut writer, "{}", self.dont_collect)?; + } + + if config.collect_backups { + writeln!(&mut writer, "{}", self.collect_backups)?; + } + + if config.collect_words { + writeln!(&mut writer, "{}", self.collect_words)?; + } + if !config.methods.is_empty() { writeln!(&mut writer, "{}", self.methods)?; } diff --git a/src/client.rs b/src/client.rs index 1e61306..ee0ecff 100644 --- a/src/client.rs +++ b/src/client.rs @@ -27,7 +27,8 @@ pub fn initialize( .user_agent(user_agent) .danger_accept_invalid_certs(insecure) .default_headers(header_map) - .redirect(policy); + .redirect(policy) + .http1_title_case_headers(); if let Some(some_proxy) = proxy { if !some_proxy.is_empty() { diff --git a/src/config/container.rs b/src/config/container.rs index 4c25057..c992e58 100644 --- a/src/config/container.rs +++ b/src/config/container.rs @@ -1,6 +1,6 @@ use super::utils::{ - depth, methods, report_and_exit, save_state, serialized_type, status_codes, threads, timeout, - user_agent, wordlist, OutputLevel, RequesterPolicy, + depth, ignored_extensions, methods, report_and_exit, save_state, serialized_type, status_codes, + threads, timeout, user_agent, wordlist, OutputLevel, RequesterPolicy, }; use crate::config::determine_output_level; use crate::config::utils::determine_requester_policy; @@ -26,7 +26,7 @@ macro_rules! update_config_if_present { match $matches.value_of_t($arg_name) { Ok(value) => *$conf_val = value, // Update value Err(err) => { - if !matches!(err.kind, clap::ErrorKind::ArgumentNotFound) { + if !matches!(err.kind(), clap::ErrorKind::ArgumentNotFound) { // Do nothing if argument not found err.exit() // Exit with error on any other parse error } @@ -246,8 +246,6 @@ pub struct Configuration { pub resume_from: String, /// Whether or not a scan's current state should be saved when user presses Ctrl+C - /// - /// Not configurable from CLI; can only be set from a config file #[serde(default = "save_state")] pub save_state: bool, @@ -264,8 +262,25 @@ pub struct Configuration { #[serde(default)] pub url_denylist: Vec, + /// URLs that should never be scanned/recursed into based on a regular expression #[serde(with = "serde_regex", default)] pub regex_denylist: Vec, + + /// Automatically discover extensions and add them to --extensions (unless they're in --dont-collect) + #[serde(default)] + pub collect_extensions: bool, + + /// don't collect any of these extensions when --collect-extensions is used + #[serde(default = "ignored_extensions")] + pub dont_collect: Vec, + + /// Automatically request likely backup extensions on "found" urls + #[serde(default)] + pub collect_backups: bool, + + /// Automatically discover important words from within responses and add them to the wordlist + #[serde(default)] + pub collect_words: bool, } impl Default for Configuration { @@ -310,6 +325,9 @@ impl Default for Configuration { no_recursion: false, extract_links: false, random_agent: false, + collect_extensions: false, + collect_backups: false, + collect_words: false, save_state: true, proxy: String::new(), config: String::new(), @@ -335,6 +353,7 @@ impl Default for Configuration { depth: depth(), threads: threads(), wordlist: wordlist(), + dont_collect: ignored_extensions(), } } } @@ -365,7 +384,11 @@ impl Configuration { /// - **random_agent**: `false` /// - **insecure**: `false` (don't be insecure, i.e. don't allow invalid certs) /// - **extensions**: `None` - /// - **methods**: [`DEFAULT_METHOD`] + /// - **collect_extensions**: `false` + /// - **collect_backups**: `false` + /// - **collect_words**: `false` + /// - **dont_collect**: [`DEFAULT_IGNORED_EXTENSIONS`](constant.DEFAULT_RESPONSE_CODES.html) + /// - **methods**: [`DEFAULT_METHOD`](constant.DEFAULT_METHOD.html) /// - **data**: `None` /// - **url_denylist**: `None` /// - **regex_denylist**: `None` @@ -566,6 +589,10 @@ impl Configuration { config.extensions = arg.map(|val| val.to_string()).collect(); } + if let Some(arg) = args.values_of("dont_collect") { + config.dont_collect = arg.map(|val| val.to_string()).collect(); + } + if let Some(arg) = args.values_of("methods") { config.methods = arg .map(|val| { @@ -686,7 +713,7 @@ impl Configuration { config.output_level = OutputLevel::Quiet; } - if args.is_present("auto_tune") { + if args.is_present("auto_tune") || args.is_present("smart") || args.is_present("thorough") { config.auto_tune = true; config.requester_policy = RequesterPolicy::AutoTune; } @@ -696,10 +723,32 @@ impl Configuration { config.requester_policy = RequesterPolicy::AutoBail; } + if args.is_present("no_state") { + config.save_state = false; + } + if args.is_present("dont_filter") { config.dont_filter = true; } + if args.is_present("collect_extensions") || args.is_present("thorough") { + config.collect_extensions = true; + } + + if args.is_present("collect_backups") + || args.is_present("smart") + || args.is_present("thorough") + { + config.collect_backups = true; + } + + if args.is_present("collect_words") + || args.is_present("smart") + || args.is_present("thorough") + { + config.collect_words = true; + } + if args.occurrences_of("verbosity") > 0 { // occurrences_of returns 0 if none are found; this is protected in // an if block for the same reason as the quiet option @@ -714,7 +763,10 @@ impl Configuration { config.add_slash = true; } - if args.is_present("extract_links") { + if args.is_present("extract_links") + || args.is_present("smart") + || args.is_present("thorough") + { config.extract_links = true; } @@ -730,6 +782,14 @@ impl Configuration { update_config_if_present!(&mut config.user_agent, args, "user_agent"); update_config_if_present!(&mut config.timeout, args, "timeout"); + if args.is_present("burp") { + config.proxy = String::from("http://127.0.0.1:8080"); + } + + if args.is_present("burp_replay") { + config.replay_proxy = String::from("http://127.0.0.1:8080"); + } + if args.is_present("random_agent") { config.random_agent = true; } @@ -738,7 +798,8 @@ impl Configuration { config.redirects = true; } - if args.is_present("insecure") { + if args.is_present("insecure") || args.is_present("burp") || args.is_present("burp_replay") + { config.insecure = true; } @@ -872,6 +933,9 @@ impl Configuration { update_if_not_default!(&mut conf.quiet, new.quiet, false); update_if_not_default!(&mut conf.auto_bail, new.auto_bail, false); update_if_not_default!(&mut conf.auto_tune, new.auto_tune, false); + update_if_not_default!(&mut conf.collect_extensions, new.collect_extensions, false); + update_if_not_default!(&mut conf.collect_backups, new.collect_backups, false); + update_if_not_default!(&mut conf.collect_words, new.collect_words, false); // use updated quiet/silent values to determine output level; same for requester policy conf.output_level = determine_output_level(conf.quiet, conf.silent); conf.requester_policy = determine_requester_policy(conf.auto_tune, conf.auto_bail); @@ -941,6 +1005,11 @@ impl Configuration { // status_codes() is the default for replay_codes, if they're not provided update_if_not_default!(&mut conf.replay_codes, new.replay_codes, status_codes()); update_if_not_default!(&mut conf.save_state, new.save_state, save_state()); + update_if_not_default!( + &mut conf.dont_collect, + new.dont_collect, + ignored_extensions() + ); } /// If present, read in `DEFAULT_CONFIG_NAME` and deserialize the specified values diff --git a/src/config/tests.rs b/src/config/tests.rs index 634c200..2f1ff1e 100644 --- a/src/config/tests.rs +++ b/src/config/tests.rs @@ -30,7 +30,11 @@ fn setup_config_test() -> Configuration { resume_from = "/some/state/file" redirects = true insecure = true + collect_backups = true + collect_extensions = true + collect_words = true extensions = ["html", "php", "js"] + dont_collect = ["png", "gif", "jpg", "jpeg"] methods = ["GET", "PUT", "DELETE"] data = [31, 32, 33, 34] url_denylist = ["http://dont-scan.me", "https://also-not.me"] @@ -94,6 +98,9 @@ fn default_configuration() { assert!(!config.redirects); assert!(!config.extract_links); assert!(!config.insecure); + assert!(!config.collect_extensions); + assert!(!config.collect_backups); + assert!(!config.collect_words); assert!(config.regex_denylist.is_empty()); assert_eq!(config.queries, Vec::new()); assert_eq!(config.filter_size, Vec::::new()); @@ -101,6 +108,7 @@ fn default_configuration() { assert_eq!(config.methods, vec!["GET"]); assert_eq!(config.data, Vec::::new()); assert_eq!(config.url_denylist, Vec::::new()); + assert_eq!(config.dont_collect, ignored_extensions()); assert_eq!(config.filter_regex, Vec::::new()); assert_eq!(config.filter_similar, Vec::::new()); assert_eq!(config.filter_word_count, Vec::::new()); @@ -291,6 +299,27 @@ fn config_reads_extract_links() { assert!(config.extract_links); } +#[test] +/// parse the test config and see that the value parsed is correct +fn config_reads_collect_extensions() { + let config = setup_config_test(); + assert!(config.collect_extensions); +} + +#[test] +/// parse the test config and see that the value parsed is correct +fn config_reads_collect_backups() { + let config = setup_config_test(); + assert!(config.collect_backups); +} + +#[test] +/// parse the test config and see that the value parsed is correct +fn config_reads_collect_words() { + let config = setup_config_test(); + assert!(config.collect_words); +} + #[test] /// parse the test config and see that the value parsed is correct fn config_reads_extensions() { @@ -298,6 +327,13 @@ fn config_reads_extensions() { assert_eq!(config.extensions, vec!["html", "php", "js"]); } +#[test] +/// parse the test config and see that the value parsed is correct +fn config_reads_dont_collect() { + let config = setup_config_test(); + assert_eq!(config.dont_collect, vec!["png", "gif", "jpg", "jpeg"]); +} + #[test] /// parse the test config and see that the value parsed is correct fn config_reads_methods() { diff --git a/src/config/utils.rs b/src/config/utils.rs index fb0926a..b31d646 100644 --- a/src/config/utils.rs +++ b/src/config/utils.rs @@ -1,6 +1,6 @@ use crate::{ utils::{module_colorizer, status_colorizer}, - DEFAULT_METHOD, DEFAULT_STATUS_CODES, DEFAULT_WORDLIST, VERSION, + DEFAULT_IGNORED_EXTENSIONS, DEFAULT_METHOD, DEFAULT_STATUS_CODES, DEFAULT_WORDLIST, VERSION, }; #[cfg(not(test))] use std::process::exit; @@ -57,6 +57,14 @@ pub(super) fn methods() -> Vec { vec![DEFAULT_METHOD.to_owned()] } +/// default extensions to ignore while auto-collecting +pub(super) fn ignored_extensions() -> Vec { + DEFAULT_IGNORED_EXTENSIONS + .iter() + .map(|s| s.to_string()) + .collect() +} + /// default wordlist pub(super) fn wordlist() -> String { String::from(DEFAULT_WORDLIST) diff --git a/src/event_handlers/command.rs b/src/event_handlers/command.rs index 58b7df9..5c91172 100644 --- a/src/event_handlers/command.rs +++ b/src/event_handlers/command.rs @@ -5,6 +5,7 @@ use tokio::sync::oneshot::Sender; use crate::response::FeroxResponse; use crate::{ + message::FeroxMessage, statistics::{StatError, StatField}, traits::FeroxFilter, }; @@ -66,6 +67,12 @@ pub enum Command { /// Just receive a sender and reply, used for slowing down the main thread Sync(Sender), + /// Notify event handler that a new extension has been seen + AddDiscoveredExtension(String), + + /// Write an arbitrary string to disk + WriteToDisk(Box), + /// Break out of the (infinite) mpsc receive loop Exit, } diff --git a/src/event_handlers/container.rs b/src/event_handlers/container.rs index d2ed9de..37cbd8e 100644 --- a/src/event_handlers/container.rs +++ b/src/event_handlers/container.rs @@ -6,6 +6,7 @@ use crate::Joiner; #[cfg(test)] use crate::{filters::FeroxFilters, statistics::Stats, Command}; use anyhow::{bail, Result}; +use std::collections::HashSet; use std::sync::{Arc, RwLock}; #[cfg(test)] use tokio::sync::mpsc::{self, UnboundedReceiver}; @@ -56,6 +57,9 @@ pub struct Handles { /// Handle for recursion pub scans: RwLock>, + + /// Pointer to the list of words generated from reading in the wordlist + pub wordlist: Arc>, } /// implementation of Handles @@ -66,6 +70,7 @@ impl Handles { filters: FiltersHandle, output: TermOutHandle, config: Arc, + wordlist: Arc>, ) -> Self { Self { stats, @@ -73,6 +78,7 @@ impl Handles { output, config, scans: RwLock::new(None), + wordlist, } } @@ -87,7 +93,14 @@ impl Handles { let terminal_handle = TermOutHandle::new(tx.clone(), tx.clone()); let stats_handle = StatsHandle::new(Arc::new(Stats::new(configuration.json)), tx.clone()); let filters_handle = FiltersHandle::new(Arc::new(FeroxFilters::default()), tx.clone()); - let handles = Self::new(stats_handle, filters_handle, terminal_handle, configuration); + let wordlist = Arc::new(vec![String::from("this_is_a_test")]); + let handles = Self::new( + stats_handle, + filters_handle, + terminal_handle, + configuration, + wordlist, + ); if let Some(sh) = scanned_urls { let scan_handle = ScanHandle::new(sh, tx); handles.set_scan_handle(scan_handle); @@ -116,6 +129,46 @@ impl Handles { bail!("Could not get underlying CommandSender object") } + /// wrapper to reach into `FeroxScans` and yank out the length of `collected_extensions` + pub fn num_collected_extensions(&self) -> usize { + if !self.config.collect_extensions { + // if --collect-extensions wasn't used, simply return 0 and forego unlocking + return 0; + } + + self.collected_extensions().len() + } + + /// wrapper to reach into `FeroxScans` and yank out the length of `collected_extensions` + pub fn collected_extensions(&self) -> HashSet { + if let Ok(scans) = self.ferox_scans() { + if let Ok(extensions) = scans.collected_extensions.read() { + return extensions.clone(); + } + } + + HashSet::new() + } + + /// number of words in the wordlist, multiplied by `expected_num_requests_multiplier` + pub fn expected_num_requests_per_dir(&self) -> usize { + let num_words = self.wordlist.len(); + let multiplier = self.expected_num_requests_multiplier(); + multiplier * num_words + } + + /// number of extensions plus the number of request method types plus any dynamically collected + /// extensions + pub fn expected_num_requests_multiplier(&self) -> usize { + let multiplier = self.config.extensions.len() + + self.config.methods.len() + + self.num_collected_extensions(); + + // methods should always have at least 1 member, likely making this .max call unneeded + // but leaving it for 'just in case' reasons + multiplier.max(1) + } + /// Helper to easily get the (locked) underlying FeroxScans object pub fn ferox_scans(&self) -> Result> { if let Ok(guard) = self.scans.read().as_ref() { diff --git a/src/event_handlers/outputs.rs b/src/event_handlers/outputs.rs index a855712..304cbaf 100644 --- a/src/event_handlers/outputs.rs +++ b/src/event_handlers/outputs.rs @@ -2,11 +2,14 @@ use super::Command::AddToUsizeField; use super::*; use anyhow::{Context, Result}; +use futures::future::{BoxFuture, FutureExt}; use tokio::sync::{mpsc, oneshot}; +use crate::statistics::StatField::TotalExpected; use crate::{ config::Configuration, progress::PROGRESS_PRINTER, + response::FeroxResponse, scanner::RESPONSES, send_command, skip_fail, statistics::StatField::ResourcesDiscovered, @@ -15,6 +18,17 @@ use crate::{ CommandReceiver, CommandSender, Joiner, }; use std::sync::Arc; +use url::Url; + +#[derive(Debug, Copy, Clone)] +/// Simple enum for semantic clarity around calling expectations for `process_response` +enum ProcessResponseCall { + /// call should allow recursion + Recursive, + + /// call should not allow recursion + NotRecursive, +} #[derive(Debug)] /// Container for terminal output transmitter @@ -90,6 +104,12 @@ impl FileOutHandler { Command::Report(response) => { skip_fail!(write_to(&*response, &mut file, self.config.json)); } + Command::WriteToDisk(message) => { + // todo consider making report accept dyn FeroxSerialize; would mean adding + // as_any/box_eq/PartialEq to the trait and then adding them to the + // implementing structs + skip_fail!(write_to(&*message, &mut file, self.config.json)); + } Command::Exit => { break; } @@ -185,56 +205,9 @@ impl TermOutHandler { while let Some(command) = self.receiver.recv().await { match command { - Command::Report(mut resp) => { - let contains_sentry = - self.config.status_codes.contains(&resp.status().as_u16()); - let unknown_sentry = !RESPONSES.contains(&resp); // !contains == unknown - let should_process_response = contains_sentry && unknown_sentry; - - if should_process_response { - // print to stdout - ferox_print(&resp.as_str(), &PROGRESS_PRINTER); - - send_command!(tx_stats, AddToUsizeField(ResourcesDiscovered, 1)); - - if self.file_task.is_some() { - // -o used, need to send the report to be written out to disk - self.tx_file - .send(Command::Report(resp.clone())) - .with_context(|| { - fmt_err(&format!("Could not send {} to file handler", resp)) - })?; - } - } - log::trace!("report complete: {}", resp.url()); - - if self.config.replay_client.is_some() && should_process_response { - // replay proxy specified/client created and this response's status code is one that - // should be replayed; not using logged_request due to replay proxy client - make_request( - self.config.replay_client.as_ref().unwrap(), - resp.url(), - resp.method().as_str(), - None, - self.config.output_level, - &self.config, - tx_stats.clone(), - ) - .await - .with_context(|| "Could not replay request through replay proxy")?; - } - - if should_process_response { - // add response to RESPONSES for serialization in case of ctrl+c - // placed all by its lonesome like this so that RESPONSES can take ownership - // of the FeroxResponse - - // before ownership is transferred, there's no real reason to keep the body anymore - // so we can free that piece of data, reducing memory usage - resp.drop_text(); - - RESPONSES.insert(*resp); - } + Command::Report(resp) => { + self.process_response(tx_stats.clone(), resp, ProcessResponseCall::Recursive) + .await?; } Command::Sync(sender) => { sender.send(true).unwrap_or_default(); @@ -251,6 +224,175 @@ impl TermOutHandler { log::trace!("exit: start"); Ok(()) } + + /// upon receiving a `FeroxResponse` from the mpsc, handle printing, sending to the replay + /// proxy, checking for backups of the `FeroxResponse`'s url, and tracking the response. + fn process_response( + &self, + tx_stats: CommandSender, + mut resp: Box, + call_type: ProcessResponseCall, + ) -> BoxFuture<'_, Result<()>> { + log::trace!("enter: process_response({:?}, {:?})", resp, call_type); + + async move { + let contains_sentry = self.config.status_codes.contains(&resp.status().as_u16()); + let unknown_sentry = !RESPONSES.contains(&resp); // !contains == unknown + let should_process_response = contains_sentry && unknown_sentry; + + if should_process_response { + // print to stdout + ferox_print(&resp.as_str(), &PROGRESS_PRINTER); + + send_command!(tx_stats, AddToUsizeField(ResourcesDiscovered, 1)); + + if self.file_task.is_some() { + // -o used, need to send the report to be written out to disk + self.tx_file + .send(Command::Report(resp.clone())) + .with_context(|| { + fmt_err(&format!("Could not send {} to file handler", resp)) + })?; + } + } + log::trace!("report complete: {}", resp.url()); + + if self.config.replay_client.is_some() && should_process_response { + // replay proxy specified/client created and this response's status code is one that + // should be replayed; not using logged_request due to replay proxy client + let data = if self.config.data.is_empty() { + None + } else { + Some(self.config.data.as_slice()) + }; + + make_request( + self.config.replay_client.as_ref().unwrap(), + resp.url(), + resp.method().as_str(), + data, + self.config.output_level, + &self.config, + tx_stats.clone(), + ) + .await + .with_context(|| "Could not replay request through replay proxy")?; + } + + if self.config.collect_backups + && should_process_response + && matches!(call_type, ProcessResponseCall::Recursive) + { + // --collect-backups was used; the response is one we care about, and the function + // call came from the loop in `.start` (i.e. recursive was specified + let backup_urls = self.generate_backup_urls(&resp).await; + + // need to manually adjust stats + send_command!(tx_stats, AddToUsizeField(TotalExpected, backup_urls.len())); + + for backup_url in &backup_urls { + let backup_response = make_request( + &self.config.client, + backup_url, + resp.method().as_str(), + None, + self.config.output_level, + &self.config, + tx_stats.clone(), + ) + .await + .with_context(|| { + format!("Could not request backup of {}", resp.url().as_str()) + })?; + + let ferox_response = FeroxResponse::from( + backup_response, + resp.url().as_str(), + resp.method().as_str(), + resp.output_level, + ) + .await; + + self.process_response( + tx_stats.clone(), + Box::new(ferox_response), + ProcessResponseCall::NotRecursive, + ) + .await?; + } + } + + if should_process_response { + // add response to RESPONSES for serialization in case of ctrl+c + // placed all by its lonesome like this so that RESPONSES can take ownership + // of the FeroxResponse + + // before ownership is transferred, there's no real reason to keep the body anymore + // so we can free that piece of data, reducing memory usage + resp.drop_text(); + + RESPONSES.insert(*resp); + } + log::trace!("exit: process_response"); + Ok(()) + } + .boxed() + } + + /// internal helper to stay DRY + fn add_new_url_to_vec(&self, url: &Url, new_name: &str, urls: &mut Vec) { + let mut new_url = url.clone(); + new_url.set_path(new_name); + urls.push(new_url); + } + + /// given a `FeroxResponse`, generate either 6 or 7 urls that are likely backups of the + /// original. + /// + /// example: + /// original: LICENSE.txt + /// backups: + /// - LICENSE.txt~ + /// - LICENSE.txt.bak + /// - LICENSE.txt.bak2 + /// - LICENSE.txt.old + /// - LICENSE.txt.1 + /// - LICENSE.bak + /// - .LICENSE.txt.swp + async fn generate_backup_urls(&self, response: &FeroxResponse) -> Vec { + log::trace!("enter: generate_backup_urls({:?})", response); + + let mut urls = vec![]; + let url = response.url(); + + // confirmed safe: see src/response.rs for comments + let filename = url.path_segments().unwrap().last().unwrap(); + + if !filename.is_empty() { + // append rules + for suffix in ["~", ".bak", ".bak2", ".old", ".1"] { + self.add_new_url_to_vec(url, &format!("{}{}", filename, suffix), &mut urls); + } + + // vim swap rule + self.add_new_url_to_vec(url, &format!(".{}.swp", filename), &mut urls); + + // replace original extension rule + let parts: Vec<_> = filename + .split('.') + // keep things like /.bash_history out of results + .filter(|part| !part.is_empty()) + .collect(); + + if parts.len() > 1 { + // filename + at least one extension, i.e. whatever.js becomes ["whatever", "js"] + self.add_new_url_to_vec(url, &format!("{}.bak", parts.first().unwrap()), &mut urls); + } + } + + log::trace!("exit: generate_backup_urls -> {:?}", urls); + urls + } } #[cfg(test)] @@ -286,4 +428,89 @@ mod tests { println!("{:?}", toh); tx.send(Command::Exit).unwrap(); } + + #[tokio::test(flavor = "multi_thread", worker_threads = 1)] + /// when the feroxresponse's url contains an extension, there should be 7 urls returned + async fn generate_backup_urls_creates_correct_urls_when_extension_present() { + let (tx, rx) = mpsc::unbounded_channel::(); + let (tx_file, _) = mpsc::unbounded_channel::(); + let config = Arc::new(Configuration::new().unwrap()); + + let toh = TermOutHandler { + config, + file_task: None, + receiver: rx, + tx_file, + }; + + let expected: Vec<_> = vec![ + "derp.php~", + "derp.php.bak", + "derp.php.bak2", + "derp.php.old", + "derp.php.1", + ".derp.php.swp", + "derp.bak", + ]; + + let mut fr = FeroxResponse::default(); + fr.set_url("http://localhost/derp.php"); + + let urls = toh.generate_backup_urls(&fr).await; + + let paths: Vec<_> = urls + .iter() + .map(|url| url.path_segments().unwrap().last().unwrap()) + .collect(); + + assert_eq!(urls.len(), 7); + + for path in paths { + assert!(expected.contains(&path)); + } + + tx.send(Command::Exit).unwrap(); + } + + #[tokio::test(flavor = "multi_thread", worker_threads = 1)] + /// when the feroxresponse's url doesn't contain an extension, there should be 6 urls returned + async fn generate_backup_urls_creates_correct_urls_when_extension_not_present() { + let (tx, rx) = mpsc::unbounded_channel::(); + let (tx_file, _) = mpsc::unbounded_channel::(); + let config = Arc::new(Configuration::new().unwrap()); + + let toh = TermOutHandler { + config, + file_task: None, + receiver: rx, + tx_file, + }; + + let expected: Vec<_> = vec![ + "derp~", + "derp.bak", + "derp.bak2", + "derp.old", + "derp.1", + ".derp.swp", + ]; + + let mut fr = FeroxResponse::default(); + fr.set_url("http://localhost/derp"); + + let urls = toh.generate_backup_urls(&fr).await; + + let paths: Vec<_> = urls + .iter() + .map(|url| url.path_segments().unwrap().last().unwrap()) + .collect(); + + assert_eq!(urls.len(), 6); + + for path in paths { + assert!(expected.contains(&path)); + } + + tx.send(Command::Exit).unwrap(); + } } diff --git a/src/event_handlers/scans.rs b/src/event_handlers/scans.rs index 3eb9bf3..a1705c3 100644 --- a/src/event_handlers/scans.rs +++ b/src/event_handlers/scans.rs @@ -15,6 +15,7 @@ use crate::{ use super::command::Command::AddToUsizeField; use super::*; +use crate::statistics::StatField; use reqwest::Url; use tokio::time::Duration; @@ -176,6 +177,23 @@ impl ScanHandler { Command::Sync(sender) => { sender.send(true).unwrap_or_default(); } + Command::AddDiscoveredExtension(new_extension) => { + // if --collect-extensions was used, AND the new extension isn't in + // the --dont-collect list AND it's also not in the --extensions list, AND + // we actually added a new extension (i.e. wasn't previously known), add + // it to FeroxScans.collected_extensions + if self.handles.config.collect_extensions + && !self.handles.config.dont_collect.contains(&new_extension) + && !self.handles.config.extensions.contains(&new_extension) + && self.data.add_discovered_extension(new_extension) + { + self.update_all_bar_lengths()?; + self.handles + .stats + .send(Command::AddToUsizeField(StatField::ExtensionsCollected, 1)) + .unwrap_or_default(); + } + } _ => {} // no other commands needed for RecursionHandler } } @@ -184,6 +202,93 @@ impl ScanHandler { Ok(()) } + /// update all current and future bar lengths + /// + /// updating all bar lengths correctly requires a few different actions on our part. + /// - get the current number of requests expected per scan (dynamic when --collect-extensions + /// is used) + /// - update the overall progress bar via the statistics handler (total expected) + /// - update the expected per scan value tracked in the statistics handler + /// - update progress bars on each FeroxScan (type::directory) that are running/not-started + /// - update progress bar length on FeroxScans (this is used when creating new a FeroxScan and + /// determines the new scan's progress bar length) + fn update_all_bar_lengths(&self) -> Result<()> { + log::trace!("enter: update_all_bar_lengths"); + + // current number of requests expected per scan + // ExpectedPerScan and TotalExpected are a += action, so we need the wordlist length to + // update them while the other updates use expected_num_requests_per_dir + let num_words = self.get_wordlist()?.len(); + let current_expectation = self.handles.expected_num_requests_per_dir() as u64; + + // used in the calculation of bar width down below, see explanation there + let divisor = self.handles.expected_num_requests_multiplier() as u64 - 1; + + // add another `wordlist.len` to the expected per scan tracker in the statistics handler + self.handles + .stats + .send(AddToUsizeField(StatField::ExpectedPerScan, num_words))?; + + // since we're adding extensions in the middle of scans (potentially), we need to take + // current number of requests into account, new_total will be used as an accumulator + // used to increment the overall progress bar + let mut new_total = 0; + + if let Ok(ferox_scans) = self.handles.ferox_scans() { + // update progress bar length on FeroxScans, which used when creating a new FeroxScan's + // progress bar and should mirror the expected_per_scan field on Statistics + ferox_scans.set_bar_length(current_expectation); + + if let Ok(scans_guard) = ferox_scans.scans.read() { + // update progress bars on each FeroxScan where its scan type is directory and + // scan status is either running or not-started + for scan in scans_guard.iter() { + if scan.is_active() { + // current number of words left in the 'to-scan' bin, for example: + // + // say we have a 2000 word wordlist, have `-x js` on the command line, and + // just found `php` as a new extension + // + // that puts our state at: + // - wordlist length: 2000 + // - total expected: 4000 (original length * 2 for -x js) + // + // let's assume the current scan has sent 3000 requests so far + // that means to get the number of `words` left to send, we need to take + // the difference of 4000 and 3000 and then divide that by the current + // multiplier (2 in the example) + // + // (4000 - 3000) / 2 => 500 words left to send + // + // the remaining 500 words will be sent as 3 variations (word, word.js, + // word.php). So, we would then need to increment the bar by 500 to + // reflect the dynamism of adding extensions mid-scan. + let bar = scan.progress_bar(); + + // (4000 - 3000) / 2 => 500 words left to send + let length = bar.length(); + let num_words_left = (length - bar.position()) / divisor; + + // accumulate each bar's increment value for incrementing the total bar + new_total += num_words_left; + + bar.inc_length(num_words_left); + } + } + } + + // add the total number of newly expected requests to the overall progress bar + // via the statistics handler + self.handles.stats.send(AddToUsizeField( + StatField::TotalExpected, + new_total as usize, + ))?; + } + + log::trace!("exit: update_all_bar_lengths"); + Ok(()) + } + /// Helper to easily get the (locked) underlying wordlist pub fn get_wordlist(&self) -> Result>> { if let Ok(guard) = self.wordlist.lock().as_ref() { diff --git a/src/event_handlers/statistics.rs b/src/event_handlers/statistics.rs index 5e6483b..2ca4426 100644 --- a/src/event_handlers/statistics.rs +++ b/src/event_handlers/statistics.rs @@ -103,7 +103,7 @@ impl StatsHandler { Command::AddToUsizeField(field, value) => { self.stats.update_usize_field(field, value); - if matches!(field, StatField::TotalScans) { + if matches!(field, StatField::TotalScans | StatField::TotalExpected) { self.bar.set_length(self.stats.total_expected() as u64); } } diff --git a/src/extractor/builder.rs b/src/extractor/builder.rs index d9c0d7d..6349607 100644 --- a/src/extractor/builder.rs +++ b/src/extractor/builder.rs @@ -16,14 +16,14 @@ pub(super) const ROBOTS_TXT_REGEX: &str = /// Which type of extraction should be performed #[derive(Debug, Copy, Clone)] pub enum ExtractionTarget { - /// Examine a response body and extract links + /// Examine a response body and extract javascript and html links (multiple tags) ResponseBody, /// Examine robots.txt (specifically) and extract links RobotsTxt, - // Parse HTML and extract links - ParseHtml, + /// Extract all tags from a page + DirectoryListing, } /// responsible for building an `Extractor` @@ -79,9 +79,9 @@ impl<'a> ExtractorBuilder<'a> { self } - /// finalize configuration of ExtratorBuilder and return an Extractor + /// finalize configuration of `ExtractorBuilder` and return an `Extractor` /// - /// requires either with_url or with_response to have been used in the build process + /// requires either `with_url` or `with_response` to have been used in the build process pub fn build(&self) -> Result> { if (self.url.is_empty() && self.response.is_none()) || self.handles.is_none() { bail!("Extractor requires a URL or a FeroxResponse be specified as well as a Handles object") diff --git a/src/extractor/container.rs b/src/extractor/container.rs index c7bbb08..91b4577 100644 --- a/src/extractor/container.rs +++ b/src/extractor/container.rs @@ -1,5 +1,4 @@ use super::*; -use crate::utils::should_deny_url; use crate::{ client, event_handlers::{ @@ -13,11 +12,11 @@ use crate::{ StatField::{LinksExtracted, TotalExpected}, }, url::FeroxUrl, - utils::{logged_request, make_request}, - DEFAULT_METHOD, + utils::{logged_request, make_request, should_deny_url}, + ExtractionResult, DEFAULT_METHOD, }; use anyhow::{bail, Context, Result}; -use reqwest::{StatusCode, Url}; +use reqwest::{Client, StatusCode, Url}; use scraper::{Html, Selector}; use std::collections::HashSet; use tokio::sync::oneshot; @@ -57,19 +56,77 @@ pub struct Extractor<'a> { /// Extractor implementation impl<'a> Extractor<'a> { /// perform extraction from the given target and return any links found - pub async fn extract(&self) -> Result<(HashSet, bool)> { - log::trace!("enter: extract (this fn has associated trace exit msg)"); + pub async fn extract(&self) -> Result { + log::trace!( + "enter: extract({:?}) (this fn has no associated trace exit msg)", + self.target + ); match self.target { ExtractionTarget::ResponseBody => Ok(self.extract_from_body().await?), ExtractionTarget::RobotsTxt => Ok(self.extract_from_robots().await?), - ExtractionTarget::ParseHtml => Ok(self.parse_html().await?), + ExtractionTarget::DirectoryListing => Ok(self.extract_from_dir_listing().await?), } } + /// wrapper around logic that performs the following: + /// - parses `url_to_parse` + /// - bails if the parsed url doesn't belong to the original host/domain + /// - otherwise, calls `add_all_sub_paths` with the parsed result + fn parse_url_and_add_subpaths( + &self, + url_to_parse: &str, + original_url: &Url, + links: &mut HashSet, + ) -> Result<()> { + log::trace!("enter: parse_url_and_add_subpaths({:?})", links); + + match Url::parse(url_to_parse) { + Ok(absolute) => { + if absolute.domain() != original_url.domain() + || absolute.host() != original_url.host() + { + // domains/ips are not the same, don't scan things that aren't part of the original + // target url + bail!("parsed url does not belong to original domain/host"); + } + + if self.add_all_sub_paths(absolute.path(), links).is_err() { + log::warn!("could not add sub-paths from {} to {:?}", absolute, links); + } + } + Err(e) => { + // this is the expected error that happens when we try to parse a url fragment + // ex: Url::parse("/login") -> Err("relative URL without a base") + // while this is technically an error, these are good results for us + if e.to_string().contains("relative URL without a base") { + if self.add_all_sub_paths(url_to_parse, links).is_err() { + log::warn!( + "could not add sub-paths from {} to {:?}", + url_to_parse, + links + ); + } + } else { + // unexpected error has occurred + log::warn!("Could not parse given url: {}", e); + self.handles.stats.send(AddError(Other)).unwrap_or_default(); + } + } + } + + log::trace!("exit: parse_url_and_add_subpaths"); + Ok(()) + } + /// given a set of links from a normal http body response, task the request handler to make /// the requests - pub async fn request_links(&self, links: HashSet) -> Result<()> { + pub async fn request_links(&mut self, links: HashSet) -> Result<()> { log::trace!("enter: request_links({:?})", links); + + if links.is_empty() { + return Ok(()); + } + let recursive = if self.handles.config.no_recursion { RecursionStatus::NotRecursive } else { @@ -77,6 +134,7 @@ impl<'a> Extractor<'a> { }; let scanned_urls = self.handles.ferox_scans()?; + self.update_stats(links.len())?; for link in links { let mut resp = match self.request_link(&link).await { @@ -100,6 +158,10 @@ impl<'a> Extractor<'a> { scanned_urls.add_file_scan(resp.url().as_str(), ScanOrder::Latest); + if self.handles.config.collect_extensions { + resp.parse_extension(self.handles.clone())?; + } + if let Err(e) = resp.send_report(self.handles.output.tx.clone()) { log::warn!("Could not send FeroxResponse to output handler: {}", e); } @@ -135,8 +197,26 @@ impl<'a> Extractor<'a> { Ok(()) } - /// Given a `reqwest::Response`, perform the following actions - /// - parse the response's text for links using the linkfinder regex + /// wrapper around link extraction via html attributes + fn extract_all_links_from_html_tags( + &self, + resp_url: &Url, + links: &mut HashSet, + html: &Html, + ) { + self.extract_links_by_attr(resp_url, links, html, "a", "href"); + self.extract_links_by_attr(resp_url, links, html, "img", "src"); + self.extract_links_by_attr(resp_url, links, html, "form", "action"); + self.extract_links_by_attr(resp_url, links, html, "script", "src"); + self.extract_links_by_attr(resp_url, links, html, "iframe", "src"); + self.extract_links_by_attr(resp_url, links, html, "div", "src"); + self.extract_links_by_attr(resp_url, links, html, "frame", "src"); + self.extract_links_by_attr(resp_url, links, html, "embed", "src"); + self.extract_links_by_attr(resp_url, links, html, "script", "src"); + } + + /// Given the body of a `reqwest::Response`, perform the following actions + /// - parse the body for links using the linkfinder regex /// - for every link found take its url path and parse each sub-path /// - example: Response contains a link fragment `homepage/assets/img/icons/handshake.svg` /// with a base url of http://localhost, the following urls would be returned: @@ -145,69 +225,32 @@ impl<'a> Extractor<'a> { /// - homepage/assets/img/ /// - homepage/assets/ /// - homepage/ - pub(super) async fn extract_from_body(&self) -> Result<(HashSet, bool)> { - log::trace!("enter: extract_from_body"); + fn extract_all_links_from_javascript( + &self, + response_body: &str, + response_url: &Url, + links: &mut HashSet, + ) { + log::trace!( + "enter: extract_all_links_from_javascript(html body..., {}, {:?})", + response_url.as_str(), + links + ); - let mut links = HashSet::::new(); - let dirlist_flag = false; - - // Response - let response = self.response.unwrap(); - let resp_url = response.url(); - let body = response.text(); - let html = Html::parse_document(body); - - // Extract Links - self.extract_links_by_attr(resp_url, &mut links, &html, "a", "href"); - self.extract_links_by_attr(resp_url, &mut links, &html, "img", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "form", "action"); - self.extract_links_by_attr(resp_url, &mut links, &html, "script", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "iframe", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "div", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "frame", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "embed", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "script", "src"); - - for capture in self.links_regex.captures_iter(body) { + for capture in self.links_regex.captures_iter(response_body) { // remove single & double quotes from both ends of the capture // capture[0] is the entire match, additional capture groups start at [1] let link = capture[0].trim_matches(|c| c == '\'' || c == '"'); - match Url::parse(link) { - Ok(absolute) => { - if absolute.domain() != self.response.unwrap().url().domain() - || absolute.host() != self.response.unwrap().url().host() - { - // domains/ips are not the same, don't scan things that aren't part of the original - // target url - continue; - } - - if self.add_all_sub_paths(absolute.path(), &mut links).is_err() { - log::warn!("could not add sub-paths from {} to {:?}", absolute, links); - } - } - Err(e) => { - // this is the expected error that happens when we try to parse a url fragment - // ex: Url::parse("/login") -> Err("relative URL without a base") - // while this is technically an error, these are good results for us - if e.to_string().contains("relative URL without a base") { - if self.add_all_sub_paths(link, &mut links).is_err() { - log::warn!("could not add sub-paths from {} to {:?}", link, links); - } - } else { - // unexpected error has occurred - log::warn!("Could not parse given url: {}", e); - self.handles.stats.send(AddError(Other)).unwrap_or_default(); - } - } + if self + .parse_url_and_add_subpaths(link, response_url, links) + .is_err() + { + // purposely not logging the error here, due to the frequency with which it gets hit } } - self.update_stats(links.len())?; - - log::trace!("exit: extract_from_body -> {:?} {}", links, dirlist_flag); - Ok((links, dirlist_flag)) + log::trace!("exit: extract_all_links_from_javascript"); } /// take a url fragment like homepage/assets/img/icons/handshake.svg and @@ -227,6 +270,45 @@ impl<'a> Extractor<'a> { Ok(()) } + /// given a url path, trim whitespace, remove slashes, and queries/fragments; return the + /// normalized string + pub(super) fn normalize_url_path(&self, path: &str) -> String { + log::trace!("enter: normalize_url_path({})", path); + + // remove whitespace and leading '/' + let path_str: String = path + .trim() + .trim_start_matches('/') + .chars() + .filter(|char| !char.is_whitespace()) + .collect(); + + // snippets from rfc-3986: + // + // foo://example.com:8042/over/there?name=ferret#nose + // \_/ \______________/\_________/ \_________/ \__/ + // | | | | | + // scheme authority path query fragment + // + // The path component is terminated + // by the first question mark ("?") or number sign ("#") character, or + // by the end of the URI. + // + // The query component is indicated by the first question + // mark ("?") character and terminated by a number sign ("#") character + // or by the end of the URI. + let (path_str, _discarded) = path_str + .split_once('?') + // if there isn't a '?', try to remove a fragment + .unwrap_or_else(|| { + // if there isn't a '#', return (original, empty) + path_str.split_once('#').unwrap_or((&path_str, "")) + }); + + log::trace!("exit: normalize_url_path -> {}", path_str); + path_str.into() + } + /// Iterate over a given path, return a list of every sub-path found /// /// example: `path` contains a link fragment `homepage/assets/img/icons/handshake.svg` @@ -240,8 +322,13 @@ impl<'a> Extractor<'a> { log::trace!("enter: get_sub_paths_from_path({})", path); let mut paths = vec![]; + let normalized_path = self.normalize_url_path(path); + // filter out any empty strings caused by .split - let mut parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect(); + let mut parts: Vec<&str> = normalized_path + .split('/') + .filter(|s| !s.is_empty()) + .collect(); let length = parts.len(); @@ -274,7 +361,7 @@ impl<'a> Extractor<'a> { paths } - /// simple helper to stay DRY, trys to join a url + fragment and add it to the `links` HashSet + /// simple helper to stay DRY, tries to join a url + fragment and add it to the `links` HashSet pub(super) fn add_link_to_set_of_links( &self, link: &str, @@ -283,15 +370,15 @@ impl<'a> Extractor<'a> { log::trace!("enter: add_link_to_set_of_links({}, {:?})", link, links); let old_url = match self.target { - ExtractionTarget::ResponseBody => self.response.unwrap().url().clone(), - ExtractionTarget::ParseHtml | ExtractionTarget::RobotsTxt => { - match Url::parse(&self.url) { - Ok(u) => u, - Err(e) => { - bail!("Could not parse {}: {}", self.url, e); - } - } + ExtractionTarget::ResponseBody | ExtractionTarget::DirectoryListing => { + self.response.unwrap().url().clone() } + ExtractionTarget::RobotsTxt => match Url::parse(&self.url) { + Ok(u) => u, + Err(e) => { + bail!("Could not parse {}: {}", self.url, e); + } + }, }; let new_url = old_url @@ -346,7 +433,6 @@ impl<'a> Extractor<'a> { new_response, url, DEFAULT_METHOD, - true, self.handles.config.output_level, ) .await; @@ -364,11 +450,10 @@ impl<'a> Extractor<'a> { /// http://localhost/stuff/things /// this function requests: /// http://localhost/robots.txt - pub(super) async fn extract_from_robots(&self) -> Result<(HashSet, bool)> { + pub(super) async fn extract_from_robots(&self) -> Result { log::trace!("enter: extract_robots_txt"); - let mut links: HashSet = HashSet::new(); - let dirlist_flag = false; + let mut result: HashSet<_> = ExtractionResult::new(); // request let response = self.make_extract_request("/robots.txt").await?; @@ -377,73 +462,60 @@ impl<'a> Extractor<'a> { for capture in self.robots_regex.captures_iter(body) { if let Some(new_path) = capture.name("url_path") { let mut new_url = Url::parse(&self.url)?; + new_url.set_path(new_path.as_str()); - if self.add_all_sub_paths(new_url.path(), &mut links).is_err() { - log::warn!("could not add sub-paths from {} to {:?}", new_url, links); + + if self.add_all_sub_paths(new_url.path(), &mut result).is_err() { + log::warn!("could not add sub-paths from {} to {:?}", new_url, result); } } } - self.update_stats(links.len())?; - - log::trace!("exit: extract_robots_txt -> {:?} {}", links, dirlist_flag); - Ok((links, dirlist_flag)) + log::trace!("exit: extract_robots_txt -> {:?}", result); + Ok(result) } - /// Entry point to parse html for links (i.e. webscraping, directory listings) - /// this function requests: - /// http://localhost/ - pub(super) async fn parse_html(&self) -> Result<(HashSet, bool)> { - log::trace!("enter: parse_html"); + /// outer-most wrapper for parsing html response bodies in search of additional content. + /// performs the following high-level steps: + /// - requests the page, if necessary + /// - checks the page to see if directory listing is enabled and sucks up all the links, if so + /// - uses the linkfinder regex to grab links from embedded javascript/javascript files + /// - extracts many different types of link sources from the html itself + pub(super) async fn extract_from_body(&self) -> Result { + log::trace!("enter: extract_from_body"); - let mut links: HashSet = HashSet::new(); - let mut dirlist_flag = false; + let mut result = ExtractionResult::new(); - // Response - let url = Url::parse(&self.url)?; - let response = self.make_extract_request(url.path()).await?; + let response = self.response.unwrap(); let resp_url = response.url(); let body = response.text(); let html = Html::parse_document(body); - // Directory listing heuristic detection to not continue scanning - // Index of /: apache - // Directory Listing for /: tomcat, - // Directory Listing -- /: ASP.NET - // - /: iis, azure, skipping due to loose heuristic - let title_selector = Selector::parse("title").unwrap(); - for t in html.select(&title_selector) { - let title = t.inner_html().to_lowercase(); - if title.contains("directory listing for /") - || title.contains("index of /") - || title.contains("directory listing -- /") - { - log::debug!("Directory listing heuristic detection from \"{}\"", title); - dirlist_flag = true; + // extract links from html tags/attributes and embedded javascript + self.extract_all_links_from_html_tags(resp_url, &mut result, &html); + self.extract_all_links_from_javascript(body, resp_url, &mut result); - self.extract_links_by_attr(resp_url, &mut links, &html, "a", "href"); - self.update_stats(links.len())?; + log::trace!("exit: extract_from_body -> {:?}", result); + Ok(result) + } - log::trace!("exit: parse_html -> {:?} {}", links, dirlist_flag); - return Ok((links, dirlist_flag)); - } - } + /// parses html response bodies in search of tags. + /// + /// the assumption is that directory listing is turned on and this extraction target simply + /// scoops up all the links for the given directory. The test to detect a directory listing + /// is located in `HeuristicTests` + pub async fn extract_from_dir_listing(&self) -> Result { + log::trace!("enter: extract_from_dir_listing"); - // Extract Links - self.extract_links_by_attr(resp_url, &mut links, &html, "a", "href"); - self.extract_links_by_attr(resp_url, &mut links, &html, "img", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "form", "action"); - self.extract_links_by_attr(resp_url, &mut links, &html, "script", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "iframe", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "div", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "frame", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "embed", "src"); - self.extract_links_by_attr(resp_url, &mut links, &html, "script", "src"); + let mut result = ExtractionResult::new(); - self.update_stats(links.len())?; + let response = self.response.unwrap(); + let html = Html::parse_document(response.text()); - log::trace!("exit: parse_html -> {:?} {}", links, dirlist_flag); - Ok((links, dirlist_flag)) + self.extract_links_by_attr(response.url(), &mut result, &html, "a", "href"); + + log::trace!("exit: extract_from_dir_listing -> {:?}", result); + Ok(result) } /// simple helper to get html links by tag/attribute and add it to the `links` HashSet @@ -458,41 +530,20 @@ impl<'a> Extractor<'a> { log::trace!("enter: extract_links_by_attr"); let selector = Selector::parse(html_tag).unwrap(); + let tags = html .select(&selector) .filter(|a| a.value().attrs().any(|attr| attr.0 == html_attr)); - for t in tags { - if let Some(link) = t.value().attr(html_attr) { + + for tag in tags { + if let Some(link) = tag.value().attr(html_attr) { log::debug!("Parsed link \"{}\" from {}", link, resp_url.as_str()); - match Url::parse(link) { - Ok(absolute) => { - if absolute.domain() != resp_url.domain() - || absolute.host() != resp_url.host() - { - // domains/ips are not the same, don't scan things that aren't part of the original - // target url - continue; - } - - if self.add_all_sub_paths(absolute.path(), links).is_err() { - log::warn!("could not add sub-paths from {} to {:?}", absolute, links); - } - } - Err(e) => { - // this is the expected error that happens when we try to parse a url fragment - // ex: Url::parse("/login") -> Err("relative URL without a base") - // while this is technically an error, these are good results for us - if e.to_string().contains("relative URL without a base") { - if self.add_all_sub_paths(link, links).is_err() { - log::warn!("could not add sub-paths from {} to {:?}", link, links); - } - } else { - // unexpected error has occurred - log::warn!("Could not parse given url: {}", e); - self.handles.stats.send(AddError(Other)).unwrap_or_default(); - } - } + if self + .parse_url_and_add_subpaths(link, resp_url, links) + .is_err() + { + log::debug!("link didn't belong to the target domain/host: {}", link); } } } @@ -507,33 +558,45 @@ impl<'a> Extractor<'a> { pub(super) async fn make_extract_request(&self, location: &str) -> Result { log::trace!("enter: make_extract_request"); - // more often than not, domain/robots.txt will redirect to www.domain/robots.txt or something - // similar; to account for that, create a client that will follow redirects, regardless of - // what the user specified for the scanning client. Other than redirects, it will respect - // all other user specified settings - let follow_redirects = true; + // need late binding here to avoid 'creates a temporary which is freed...' in the + // `let ... if` below to avoid cloning the client out of config + let mut client = Client::new(); - let proxy = if self.handles.config.proxy.is_empty() { - None + if location == "/robots.txt" { + // more often than not, domain/robots.txt will redirect to www.domain/robots.txt or something + // similar; to account for that, create a client that will follow redirects, regardless of + // what the user specified for the scanning client. Other than redirects, it will respect + // all other user specified settings + let follow_redirects = true; + + let proxy = if self.handles.config.proxy.is_empty() { + None + } else { + Some(self.handles.config.proxy.as_str()) + }; + + client = client::initialize( + self.handles.config.timeout, + &self.handles.config.user_agent, + follow_redirects, + self.handles.config.insecure, + &self.handles.config.headers, + proxy, + )?; + } + + let client = if location != "/robots.txt" { + &self.handles.config.client } else { - Some(self.handles.config.proxy.as_str()) + &client }; - let client = client::initialize( - self.handles.config.timeout, - &self.handles.config.user_agent, - follow_redirects, - self.handles.config.insecure, - &self.handles.config.headers, - proxy, - )?; - let mut url = Url::parse(&self.url)?; url.set_path(location); // overwrite existing path // purposefully not using logged_request here due to using the special client let response = make_request( - &client, + client, &url, DEFAULT_METHOD, None, @@ -547,10 +610,10 @@ impl<'a> Extractor<'a> { response, &self.url, DEFAULT_METHOD, - true, self.handles.config.output_level, ) .await; + // note: don't call parse_extension here. If we call it here, it gets called on robots.txt log::trace!("exit: make_extract_request -> {}", ferox_response); Ok(ferox_response) @@ -558,7 +621,7 @@ impl<'a> Extractor<'a> { /// update total number of links extracted and expected responses fn update_stats(&self, num_links: usize) -> Result<()> { - let multiplier = self.handles.config.extensions.len().max(1); + let multiplier = self.handles.expected_num_requests_multiplier(); self.handles .stats diff --git a/src/extractor/tests.rs b/src/extractor/tests.rs index 8d9c972..e8e9756 100644 --- a/src/extractor/tests.rs +++ b/src/extractor/tests.rs @@ -21,7 +21,7 @@ lazy_static! { static ref BODY_EXT: Extractor<'static> = setup_extractor(ExtractionTarget::ResponseBody, Arc::new(FeroxScans::default())); /// Extractor for testing paring html - static ref PARSEHTML_EXT: Extractor<'static> = setup_extractor(ExtractionTarget::ParseHtml, Arc::new(FeroxScans::default())); + static ref PARSEHTML_EXT: Extractor<'static> = setup_extractor(ExtractionTarget::DirectoryListing, Arc::new(FeroxScans::default())); /// FeroxResponse for Extractor static ref RESPONSE: FeroxResponse = get_test_response(); @@ -45,9 +45,9 @@ fn setup_extractor(target: ExtractionTarget, scanned_urls: Arc) -> E ExtractionTarget::RobotsTxt => builder .url("http://localhost") .target(ExtractionTarget::RobotsTxt), - ExtractionTarget::ParseHtml => builder + ExtractionTarget::DirectoryListing => builder .url("http://localhost") - .target(ExtractionTarget::ParseHtml), + .target(ExtractionTarget::DirectoryListing), }; let config = Arc::new(Configuration::new().unwrap()); @@ -195,7 +195,6 @@ fn extractor_add_link_to_set_of_links_happy_path() { fn extractor_add_link_to_set_of_links_with_non_base_url() { let mut links = HashSet::::new(); let link = "\\\\\\\\"; - assert_eq!(links.len(), 0); assert!(ROBOTS_EXT .add_link_to_set_of_links(link, &mut links) @@ -206,6 +205,34 @@ fn extractor_add_link_to_set_of_links_with_non_base_url() { assert!(links.is_empty()); } +#[test] +/// test for filtering queries and fragments +fn normalize_url_path_filters_queries_and_fragments() { + let handles = Arc::new(Handles::for_testing(None, None).0); + let extractor = ExtractorBuilder::default() + .url("doesnt matter") + .target(ExtractionTarget::RobotsTxt) + .handles(handles) + .build() + .unwrap(); + + let test_strings = [ + "over/there?name=ferret#nose", + "over/there?name=ferret", + "over/there#nose", + "over/there", + "over/there?name#nose", + "over/there?name", + " over/there?name=ferret#nose ", + "over/there?name=ferret ", + " over/there#nose", + ]; + test_strings.iter().for_each(|&ts| { + let normed = extractor.normalize_url_path(ts); + assert_eq!(normed, "over/there"); + }); +} + #[tokio::test(flavor = "multi_thread", worker_threads = 1)] /// use make_request to generate a Response, and use the Response to test get_links; /// the response will contain an absolute path to a domain that is not part of the scanned @@ -240,14 +267,8 @@ async fn extractor_get_links_with_absolute_url_that_differs_from_target_domain() let (handles, _rx) = Handles::for_testing(None, None); let handles = Arc::new(handles); - let ferox_response = FeroxResponse::from( - response, - &srv.url(""), - DEFAULT_METHOD, - true, - OutputLevel::Default, - ) - .await; + let ferox_response = + FeroxResponse::from(response, &srv.url(""), DEFAULT_METHOD, OutputLevel::Default).await; let extractor = Extractor { links_regex: Regex::new(LINKFINDER_REGEX).unwrap(), @@ -258,7 +279,7 @@ async fn extractor_get_links_with_absolute_url_that_differs_from_target_domain() handles: handles.clone(), }; - let links = (extractor.extract_from_body().await?).0; + let links = extractor.extract_from_body().await?; assert!(links.is_empty()); assert_eq!(mock.hits(), 1); diff --git a/src/filters/init.rs b/src/filters/init.rs index 1d98615..604b552 100644 --- a/src/filters/init.rs +++ b/src/filters/init.rs @@ -75,15 +75,18 @@ pub async fn initialize(handles: Arc) -> Result<()> { let resp = skip_fail!(logged_request(&url, DEFAULT_METHOD, None, handles.clone()).await); // if successful, create a filter based on the response's body - let fr = FeroxResponse::from( + let mut fr = FeroxResponse::from( resp, similarity_filter, DEFAULT_METHOD, - true, handles.config.output_level, ) .await; + if handles.config.collect_extensions { + fr.parse_extension(handles.clone())?; + } + // hash the response body and store the resulting hash in the filter object let hash = FuzzyHash::new(&fr.text()).to_string(); diff --git a/src/heuristics.rs b/src/heuristics.rs index 31e832f..044b9b2 100644 --- a/src/heuristics.rs +++ b/src/heuristics.rs @@ -2,8 +2,10 @@ use std::sync::Arc; use anyhow::{bail, Result}; use console::style; +use scraper::{Html, Selector}; use uuid::Uuid; +use crate::message::FeroxMessage; use crate::{ config::OutputLevel, event_handlers::{Command, Handles}, @@ -36,6 +38,36 @@ macro_rules! format_template { }; } +/// enum representing the different servers that `parse_html` can detect when directory listing is +/// enabled +#[derive(Copy, Debug, Clone)] +pub enum DirListingType { + /// apache server, detected by `Index of /` + Apache, + + /// tomcat/python server, detected by `Directory Listing for /` + TomCatOrPython, + + /// ASP.NET server, detected by `Directory Listing -- /` + AspDotNet, + + // /// IIS/Azure server, detected by `HOST_NAME - /` (not currently used) + // IIS_AZURE, + /// variant that represents the absence of directory listing + None, +} + +/// Wrapper around the results of running a directory listing detection against a target web page +#[derive(Debug, Clone)] +pub struct DirListingResult { + /// type of server where directory listing was detected + /// i.e. https://portswigger.net/kb/issues/00600100_directory-listing + pub dir_list_type: Option, + + /// the `FeroxResponse` generated during detection + pub response: FeroxResponse, +} + /// container for heuristics related info pub struct HeuristicTests { /// Handles object for event handler interaction @@ -198,11 +230,11 @@ impl HeuristicTests { .contains(&response.status().as_u16()) { // found a wildcard response + let mut ferox_response = FeroxResponse::from( response, &target.target, method, - true, self.handles.config.output_level, ) .await; @@ -282,6 +314,111 @@ impl HeuristicTests { log::trace!("exit: connectivity_test -> {:?}", good_urls); Ok(good_urls) } + + /// heuristic designed to detect when a server has directory listing enabled + pub async fn directory_listing(&self, target_url: &str) -> Result> { + log::trace!("enter: directory_listing({})", target_url); + + let tgt = if !target_url.ends_with('/') { + // if left unchanged, this function would be called against redirects that point to + // valid directories for most, if not all, directories beyond the initial urls. + // so, instead of `directory_listing("http://localhost") -> None` we get + // `directory_listing("http://localhost/") -> Some(DirListingResult)` if there is + // directory listing beyond the redirect + format!("{}/", target_url) + } else { + target_url.to_string() + }; + + let url = FeroxUrl::from_string(&tgt, self.handles.clone()); + let request = url.format("", None)?; + + let result = logged_request(&request, DEFAULT_METHOD, None, self.handles.clone()).await?; + + let ferox_response = FeroxResponse::from( + result, + &url.target, + DEFAULT_METHOD, + self.handles.config.output_level, + ) + .await; + + let body = ferox_response.text(); + let html = Html::parse_document(body); + + let dirlist_type = self.detect_directory_listing(&html); + + if dirlist_type.is_some() { + // folks that run things and step away/rely on logs need to be notified of directory + // listing, since they won't see the message on the bar; bastardizing FeroxMessage + // for ease of implementation. This could use a bit of polish at some point. + let msg = format!( + "detected directory listing: {} ({:?})", + target_url, + dirlist_type.unwrap() + ); + let ferox_msg = FeroxMessage { + kind: "log".to_string(), + message: msg.clone(), + level: "MSG".to_string(), + time_offset: 0.0, + module: "feroxbuster::heuristics".to_string(), + }; + self.handles + .output + .tx_file + .send(Command::WriteToDisk(Box::new(ferox_msg))) + .unwrap_or_default(); + + log::info!("{}", msg); + + let result = DirListingResult { + dir_list_type: dirlist_type, + response: ferox_response, + }; + + log::trace!("exit: directory_listing -> {:?}", result); + return Ok(Some(result)); + } + + log::trace!("exit: directory_listing -> None"); + Ok(None) + } + + /// Directory listing heuristic detection, uses tag to make its determination. When + /// the inner html of <title> matches one of the following, a `DirListingType` is returned. + /// - apache: `Index of /` + /// - tomcat/python: `Directory Listing for /` + /// - ASP.NET: `Directory Listing -- /` + /// - <host> - /: iis, azure, skipping due to loose heuristic + fn detect_directory_listing(&self, html: &Html) -> Option<DirListingType> { + log::trace!("enter: detect_directory_listing(html body...)"); + + let title_selector = Selector::parse("title").expect("couldn't parse title selector"); + + for t in html.select(&title_selector) { + let title = t.inner_html().to_lowercase(); + + let dirlist_type = if title.contains("directory listing for /") { + Some(DirListingType::TomCatOrPython) + } else if title.contains("index of /") { + Some(DirListingType::Apache) + } else if title.contains("directory listing -- /") { + Some(DirListingType::AspDotNet) + } else { + // IIS_AZURE purposely skipped for now + None + }; + + if dirlist_type.is_some() { + log::trace!("exit: detect_directory_listing -> {:?}", dirlist_type); + return dirlist_type; + } + } + + log::trace!("exit: detect_directory_listing -> None"); + None + } } #[cfg(test)] @@ -297,4 +434,51 @@ mod tests { assert_eq!(tester.unique_string(i).len(), i * 32); } } + + #[test] + /// `detect_directory_listing` correctly identifies tomcat/python instances + fn detect_directory_listing_finds_tomcat_python() { + let html = "<title>directory listing for /"; + let parsed = Html::parse_document(html); + let handles = Handles::for_testing(None, None); + let heuristics = HeuristicTests::new(Arc::new(handles.0)); + let dirlist_type = heuristics.detect_directory_listing(&parsed); + assert!(matches!( + dirlist_type.unwrap(), + DirListingType::TomCatOrPython + )); + } + + #[test] + /// `detect_directory_listing` correctly identifies apache instances + fn detect_directory_listing_finds_apache() { + let html = "index of /"; + let parsed = Html::parse_document(html); + let handles = Handles::for_testing(None, None); + let heuristics = HeuristicTests::new(Arc::new(handles.0)); + let dirlist_type = heuristics.detect_directory_listing(&parsed); + assert!(matches!(dirlist_type.unwrap(), DirListingType::Apache)); + } + + #[test] + /// `detect_directory_listing` correctly identifies ASP.NET instances + fn detect_directory_listing_finds_asp_dot_net() { + let html = "directory listing -- /"; + let parsed = Html::parse_document(html); + let handles = Handles::for_testing(None, None); + let heuristics = HeuristicTests::new(Arc::new(handles.0)); + let dirlist_type = heuristics.detect_directory_listing(&parsed); + assert!(matches!(dirlist_type.unwrap(), DirListingType::AspDotNet)); + } + + #[test] + /// `detect_directory_listing` returns None when heuristic doesn't match + fn detect_directory_listing_returns_none_as_default() { + let html = "derp listing -- /"; + let parsed = Html::parse_document(html); + let handles = Handles::for_testing(None, None); + let heuristics = HeuristicTests::new(Arc::new(handles.0)); + let dirlist_type = heuristics.detect_directory_listing(&parsed); + assert!(dirlist_type.is_none()); + } } diff --git a/src/lib.rs b/src/lib.rs index 0aa5613..72258d9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,8 @@ +#![deny(clippy::all)] +#![allow(clippy::mutex_atomic)] use anyhow::Result; use reqwest::StatusCode; +use std::collections::HashSet; use tokio::{ sync::mpsc::{UnboundedReceiver, UnboundedSender}, task::JoinHandle, @@ -26,6 +29,7 @@ mod macros; mod url; mod response; mod message; +mod nlp; /// Alias for tokio::sync::mpsc::UnboundedSender pub(crate) type CommandSender = UnboundedSender; @@ -39,6 +43,9 @@ pub(crate) type Joiner = JoinHandle>; /// Generic mpsc::unbounded_channel type to tidy up some code pub(crate) type FeroxChannel = (UnboundedSender, UnboundedReceiver); +/// Wrapper around the results of performing any kind of extraction against a target web page +pub(crate) type ExtractionResult = HashSet; + /// Version pulled from Cargo.toml at compile time pub(crate) const VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -48,6 +55,13 @@ pub const DEFAULT_OPEN_FILE_LIMIT: u64 = 8192; /// Default value used to determine near-duplicate web pages (equivalent to 95%) pub const SIMILARITY_THRESHOLD: u32 = 95; +/// Default set of extensions to Ignore when auto-collecting extensions during scans +pub(crate) const DEFAULT_IGNORED_EXTENSIONS: [&str; 38] = [ + "tif", "tiff", "ico", "cur", "bmp", "webp", "svg", "png", "jpg", "jpeg", "jfif", "gif", "avif", + "apng", "pjpeg", "pjp", "mov", "wav", "mpg", "mpeg", "mp3", "mp4", "m4a", "m4p", "m4v", "ogg", + "webm", "ogv", "oga", "flac", "aac", "3gp", "css", "zip", "xls", "xml", "gz", "tgz", +]; + /// Default wordlist to use when `-w|--wordlist` isn't specified and not `wordlist` isn't set /// in a [ferox-config.toml](constant.DEFAULT_CONFIG_NAME.html) config file. /// diff --git a/src/main.rs b/src/main.rs index d2feb29..7e28bb5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,4 @@ +use std::io::stdin; use std::{ env::args, fs::{create_dir, remove_file, File}, @@ -16,6 +17,7 @@ use tokio::{ }; use tokio_util::codec::{FramedRead, LinesCodec}; +use feroxbuster::scan_manager::ScanType; use feroxbuster::{ banner::{Banner, UPDATE_URL}, config::{Configuration, OutputLevel}, @@ -48,7 +50,11 @@ fn get_unique_words_from_wordlist(path: &str) -> Result>> { let reader = BufReader::new(file); - let mut words = Vec::new(); + // this empty string ensures that we call Requester::request with the base url, i.e. + // `http://localhost/` instead of going straight into `http://localhost/WORD.EXT`. + // for vanilla scans, it doesn't matter all that much, but it can be a significant difference + // when `-e` is used, depending on the content at the base url. + let mut words = vec![String::from("")]; for line in reader.lines() { line.map(|result| { @@ -70,21 +76,12 @@ fn get_unique_words_from_wordlist(path: &str) -> Result>> { /// Determine whether it's a single url scan or urls are coming from stdin, then scan as needed async fn scan(targets: Vec, handles: Arc) -> Result<()> { log::trace!("enter: scan({:?}, {:?})", targets, handles); - // cloning an Arc is cheap (it's basically a pointer into the heap) - // so that will allow for cheap/safe sharing of a single wordlist across multi-target scans - // as well as additional directories found as part of recursion - - let words = get_unique_words_from_wordlist(&handles.config.wordlist)?; - - if words.len() == 0 { - bail!("Did not find any words in {}", handles.config.wordlist); - } let scanned_urls = handles.ferox_scans()?; - handles.send_scan_command(UpdateWordlist(words.clone()))?; + handles.send_scan_command(UpdateWordlist(handles.wordlist.clone()))?; - scanner::initialize(words.len(), handles.clone()).await?; + scanner::initialize(handles.wordlist.len(), handles.clone()).await?; // at this point, the stat thread's progress bar can be created; things that needed to happen // first: @@ -103,7 +100,7 @@ async fn scan(targets: Vec, handles: Arc) -> Result<()> { if handles.config.resumed { // display what has already been completed scanned_urls.print_known_responses(); - scanned_urls.print_completed_bars(words.len())?; + scanned_urls.print_completed_bars(handles.wordlist.len())?; } log::debug!("sending {:?} to be scanned as initial targets", targets); @@ -138,8 +135,8 @@ async fn get_targets(handles: Arc) -> Result> { for scan in scans.iter() { // ferox_scans gets deserialized scans added to it at program start if --resume-from // is used, so scans that aren't marked complete still need to be scanned - if scan.is_complete() { - // this one's already done, ignore it + if scan.is_complete() || matches!(scan.scan_type, ScanType::File) { + // this one's already done, or it's not a directory, ignore it continue; } @@ -193,6 +190,18 @@ async fn wrapped_main(config: Arc) -> Result<()> { PROGRESS_BAR.join().unwrap(); }); + // cloning an Arc is cheap (it's basically a pointer into the heap) + // so that will allow for cheap/safe sharing of a single wordlist across multi-target scans + // as well as additional directories found as part of recursion + let words = get_unique_words_from_wordlist(&config.wordlist)?; + + if words.len() <= 1 { + // the check is now <= 1 due to the initial empty string added in 2.6.0 + // 1 -> empty wordlist + // 0 -> error + bail!("Did not find any words in {}", config.wordlist); + } + // spawn all event handlers, expect back a JoinHandle and a *Handle to the specific event let (stats_task, stats_handle) = StatsHandler::initialize(config.clone()); let (filters_task, filters_handle) = FiltersHandler::initialize(); @@ -205,6 +214,7 @@ async fn wrapped_main(config: Arc) -> Result<()> { filters_handle, out_handle, config.clone(), + words, )); let (scan_task, scan_handle) = ScanHandler::initialize(handles.clone()); @@ -494,9 +504,39 @@ fn main() -> Result<()> { .enable_all() .build() { - let future = wrapped_main(config); + let future = wrapped_main(config.clone()); if let Err(e) = runtime.block_on(future) { eprintln!("{}", e); + + // the code below is to facilitate testing tests/test_banner entries. Since it's an + // integration test, normal test detection (cfg!(test), etc...) won't work. So, in + // the tests themselves, we pass + // `--wordlist /definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676` + // and look for that here to print the banner. + // + // this change became a necessity once we moved wordlist parsing out of `scan` and into + // `wrapped_main`. + if e.to_string() + .contains("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") + { + // support the handful of tests that use `--stdin` + let targets: Vec<_> = if config.stdin { + stdin().lock().lines().map(|tgt| tgt.unwrap()).collect() + } else { + vec!["http://localhost".to_string()] + }; + + // print the banner to stderr + let std_stderr = stderr(); // std::io::stderr + let banner = Banner::new(&targets, &config); + if !config.quiet && !config.silent { + banner.print_to(std_stderr, config).unwrap(); + } + } + + // if we've encountered an error before clean_up can be called (i.e. a wordlist error) + // we need to at least spin-down the progress bar + PROGRESS_PRINTER.finish(); }; } diff --git a/src/message.rs b/src/message.rs index a5186c4..14e71ce 100644 --- a/src/message.rs +++ b/src/message.rs @@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize}; use crate::traits::FeroxSerialize; use crate::utils::fmt_err; -#[derive(Serialize, Deserialize, Default)] +#[derive(Serialize, Deserialize, Default, Debug)] /// Representation of a log entry, can be represented as a human readable string or JSON pub struct FeroxMessage { #[serde(rename = "type")] @@ -38,7 +38,7 @@ impl FeroxSerialize for FeroxMessage { "DEBUG" => ("DBG", Color::Yellow), "TRACE" => ("TRC", Color::Magenta), "WILDCARD" => ("WLD", Color::Cyan), - _ => ("UNK", Color::White), + _ => ("MSG", Color::White), }; format!( @@ -143,6 +143,6 @@ mod tests { assert!(console::strip_ansi_codes(&msg.as_str()).starts_with("WLD")); msg.level = "UNKNOWN".to_string(); - assert!(console::strip_ansi_codes(&msg.as_str()).starts_with("UNK")); + assert!(console::strip_ansi_codes(&msg.as_str()).starts_with("MSG")); } } diff --git a/src/nlp/constants.rs b/src/nlp/constants.rs new file mode 100644 index 0000000..a276214 --- /dev/null +++ b/src/nlp/constants.rs @@ -0,0 +1,334 @@ +use lazy_static::lazy_static; +use regex::Regex; + +lazy_static! { + /// regular expression to match on words with numbers, underscores, and hyphens + pub(super) static ref BOUNDED_WORD_REGEX: Regex = Regex::new(r"\b[a-zA-Z0-9_-]+\b").unwrap(); +} + +/// collection of stop words from spaCy with small modifications +pub(super) static STOP_WORDS: [&str; 323] = [ + "'d", + "'ll", + "'m", + "'re", + "'s", + "'ve", + "a", + "about", + "above", + "across", + "after", + "afterwards", + "again", + "against", + "almost", + "alone", + "along", + "already", + "also", + "although", + "always", + "am", + "among", + "amongst", + "amount", + "an", + "and", + "another", + "any", + "anyhow", + "anyone", + "anything", + "anyway", + "anywhere", + "are", + "around", + "as", + "at", + "back", + "be", + "became", + "because", + "become", + "becomes", + "becoming", + "been", + "before", + "beforehand", + "behind", + "being", + "below", + "beside", + "besides", + "between", + "beyond", + "both", + "bottom", + "but", + "by", + "ca", + "call", + "can", + "cannot", + "could", + "did", + "do", + "does", + "doing", + "done", + "down", + "due", + "during", + "each", + "eight", + "either", + "eleven", + "else", + "elsewhere", + "empty", + "enough", + "even", + "ever", + "every", + "everyone", + "everything", + "everywhere", + "except", + "few", + "fifteen", + "fifty", + "first", + "five", + "for", + "former", + "formerly", + "forty", + "four", + "from", + "front", + "full", + "further", + "get", + "got", + "give", + "go", + "had", + "has", + "have", + "he", + "hence", + "her", + "here", + "hereafter", + "hereby", + "herein", + "hereupon", + "hers", + "herself", + "him", + "himself", + "his", + "how", + "however", + "hundred", + "i", + "if", + "in", + "indeed", + "into", + "is", + "it", + "its", + "itself", + "just", + "keep", + "last", + "latter", + "latterly", + "least", + "less", + "made", + "make", + "many", + "may", + "me", + "meanwhile", + "might", + "mine", + "more", + "moreover", + "most", + "mostly", + "move", + "much", + "must", + "my", + "myself", + "n't", + "name", + "namely", + "neither", + "never", + "nevertheless", + "next", + "nine", + "no", + "nobody", + "none", + "noone", + "nor", + "not", + "nothing", + "now", + "nowhere", + "n\u{2018}t", + "n\u{2019}t", + "of", + "off", + "often", + "on", + "once", + "one", + "only", + "onto", + "or", + "other", + "others", + "otherwise", + "our", + "ours", + "ourselves", + "out", + "over", + "own", + "part", + "per", + "perhaps", + "please", + "put", + "quite", + "rather", + "re", + "really", + "regarding", + "same", + "say", + "see", + "seem", + "seemed", + "seeming", + "seems", + "serious", + "several", + "she", + "should", + "side", + "since", + "six", + "sixty", + "so", + "some", + "somehow", + "someone", + "something", + "sometime", + "sometimes", + "somewhere", + "still", + "such", + "take", + "ten", + "than", + "that", + "the", + "their", + "them", + "themselves", + "then", + "thence", + "there", + "thereafter", + "thereby", + "therefore", + "therein", + "thereupon", + "these", + "they", + "third", + "this", + "those", + "though", + "three", + "through", + "throughout", + "thru", + "thus", + "to", + "together", + "too", + "toward", + "towards", + "twelve", + "twenty", + "two", + "under", + "unless", + "until", + "up", + "upon", + "used", + "using", + "various", + "very", + "via", + "was", + "we", + "well", + "were", + "what", + "whatever", + "when", + "whence", + "whenever", + "where", + "whereafter", + "whereas", + "whereby", + "wherein", + "whereupon", + "wherever", + "whether", + "which", + "while", + "whither", + "who", + "whoever", + "whole", + "whom", + "whose", + "why", + "will", + "with", + "within", + "without", + "would", + "yet", + "you", + "your", + "yours", + "yourself", + "yourselves", + "\u{2018}d", + "\u{2018}ll", + "\u{2018}m", + "\u{2018}re", + "\u{2018}s", + "\u{2018}ve", + "\u{2019}d", + "\u{2019}ll", + "\u{2019}m", + "\u{2019}re", + "\u{2019}s", + "\u{2019}ve", +]; diff --git a/src/nlp/document.rs b/src/nlp/document.rs new file mode 100644 index 0000000..0267575 --- /dev/null +++ b/src/nlp/document.rs @@ -0,0 +1,223 @@ +use super::term::{Term, TermMetaData}; +use super::utils::preprocess; +use scraper::{Html, Node, Selector}; +use std::collections::HashMap; + +/// data container representing a single document, in the nlp sense +#[derive(Debug, Default)] +pub(crate) struct Document { + /// collection of `Term`s and their associated metadata + terms: HashMap, + + /// number of terms contained within the document + number_of_terms: usize, +} + +impl Document { + /// create a new `Document` from the given string + pub(super) fn new(text: &str) -> Self { + let mut document = Self::default(); + + let processed = preprocess(text); + + document.number_of_terms += processed.len(); + + for normalized in processed { + if normalized.len() > 2 { + document.add_term(&normalized) + } + } + document + } + + /// add a `Term` to the document if it's not already tracked, otherwise increment the number + /// of times the term has been seen + fn add_term(&mut self, word: &str) { + let term = Term::new(word); + + let metadata = self.terms.entry(term).or_insert_with(TermMetaData::new); + *metadata.count_mut() += 1; + } + + /// create a new `Document` from the given HTML string + pub(crate) fn from_html(raw_html: &str) -> Self { + let selector = Selector::parse("body").unwrap(); + + let html = Html::parse_document(raw_html); + + let text = html + .select(&selector) + .next() + .unwrap() + .descendants() + .filter_map(|node| { + if !node.value().is_text() && !node.value().is_comment() { + return None; + } + + // have a Text||Comment node, trim whitespace to test for all whitespace stuff + let trimmed = if node.value().is_text() { + node.value().as_text().unwrap().text.trim() + } else { + node.value().as_comment().unwrap().comment.trim() + }; + + if trimmed.is_empty() { + return None; + } + + // found a non-empty Text||Comment node, need to check its parent to determine if + // it's a

got worse on Wednesday.

"; + let doc = Document::from_html(html); + let keys = doc.terms().keys().map(|key| key.raw()).collect::>(); + + let expected = ["worse", "wednesday"]; + + assert_eq!(doc.number_of_terms(), 2); + + for key in keys { + assert!(expected.contains(&key)); + } + } +} diff --git a/src/nlp/mod.rs b/src/nlp/mod.rs new file mode 100644 index 0000000..6fad768 --- /dev/null +++ b/src/nlp/mod.rs @@ -0,0 +1,10 @@ +//! small stand-alone tf-idf library, specifically designed for use in feroxbuster + +mod constants; +mod document; +mod model; +mod term; +mod utils; + +pub(crate) use self::document::Document; +pub(crate) use self::model::TfIdf; diff --git a/src/nlp/model.rs b/src/nlp/model.rs new file mode 100644 index 0000000..588d2a3 --- /dev/null +++ b/src/nlp/model.rs @@ -0,0 +1,185 @@ +use super::document::Document; +use super::term::{Term, TermMetaData}; +use super::utils::{inverse_document_frequency, tf_idf_score}; +use std::borrow::{Borrow, BorrowMut}; +use std::collections::HashMap; + +/// data container for the TF-IDF model +#[derive(Debug, Default)] +pub(crate) struct TfIdf { + /// collection of `Term`s and their associated metadata + terms: HashMap, + + /// number of documents processed by the model + num_documents: usize, +} + +impl TfIdf { + /// create an empty TF-IDF model; must be populated with `add_document` prior to use + pub(crate) fn new() -> Self { + Self::default() + } + + /// accessor method for the collection of `Term`s and `TermMetaData` + fn terms(&self) -> &HashMap { + self.terms.borrow() + } + + /// accessor method for the number of `Document`s the model has processed + pub(crate) fn num_documents(&self) -> usize { + self.num_documents + } + + /// add a `Document` to the model + pub(crate) fn add_document(&mut self, document: Document) { + // increment number of docs seen, since we don't preserve the document itself; this needs + // to happen before calls to `self.inverse_document_frequency`, as it relies on the count + // being up to date + self.num_documents += 1; + + for (term, doc_metadata) in document.terms().iter() { + // an incoming `Term` from a `Document` only has a valid `count` for that particular + // document; need to get the term frequency while both are known/valid + let term_frequency = document.term_frequency(term); + + let metadata = self + .terms + .entry(term.clone()) + .or_insert_with(|| doc_metadata.to_owned()); + + metadata.term_frequencies_mut().push(term_frequency); + } + } + + /// (re)-calculate tf-idf scores for all terms, given the current number of documents + /// + /// # Notes + /// + /// old tf-idf scores are removed during calculations to keep new `Term`s at the same relative + /// level as new ones WRT corpus size + pub(crate) fn calculate_tf_idf_scores(&mut self) { + for metadata in self.terms.borrow_mut().values_mut() { + let num_frequencies = metadata.term_frequencies().len(); + + let mut to_add = Vec::with_capacity(num_frequencies); + + for frequency in metadata.term_frequencies() { + let idf = inverse_document_frequency( + self.num_documents as f32, + metadata.document_frequency() as f32, + ); + + let score = tf_idf_score(*frequency, idf); + to_add.push(score); + } + + let average: f32 = to_add.iter().sum::() / to_add.len() as f32; + + *metadata.tf_idf_score_mut() = average; + } + } + + /// select all terms with a non-zero tf-idf score + pub(crate) fn all_words(&self) -> Vec { + self.terms() + .iter() + .filter(|(_, metadata)| metadata.tf_idf_score() > 0.0) + .map(|(term, _)| term.raw().to_owned()) + .collect() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + /// helper for this test suite + fn get_score(word: &str, model: &TfIdf) -> f32 { + model.terms().get(&Term::new(word)).unwrap().tf_idf_score() + } + + #[test] + /// given the example data at https://remykarem.github.io/tfidf-demo/, ensure the model + /// produces the same results + fn model_generates_expected_tf_idf_scores() { + let one = "Air quality in the sunny island improved gradually throughout Wednesday."; + let two = + "Air quality in Singapore on Wednesday continued to get worse as haze hit the island."; + let three = "The air quality in Singapore is monitored through a network of air monitoring stations located in different parts of the island"; + let four = "The air quality in Singapore got worse on Wednesday."; + + let docs = [one, two, three, four]; + let mut model = TfIdf::new(); + + for doc in docs.iter() { + let d = Document::new(doc); + model.add_document(d); + } + + assert_eq!(model.terms().len(), 19); + + model.calculate_tf_idf_scores(); + + assert_eq!(get_score("quality", &model), 0.0); + assert_eq!(get_score("air", &model), 0.0); + assert_eq!(get_score("wednesday", &model), 0.018906077); + assert_eq!(get_score("island", &model), 0.014047348); + assert_eq!(get_score("singapore", &model), 0.016427131); + assert_eq!(get_score("sunny", &model), 0.08600858); + assert_eq!(get_score("monitoring", &model), 0.05017167); + assert_eq!(get_score("stations", &model), 0.05017167); + assert_eq!(get_score("parts", &model), 0.05017167); + assert_eq!(get_score("haze", &model), 0.06689556); + assert_eq!(get_score("hit", &model), 0.06689556); + assert_eq!(get_score("worse", &model), 0.04682689); + } + + #[test] + /// given the example data at https://remykarem.github.io/tfidf-demo/, ensure the model + /// produces the same results + fn select_n_words_grabs_correct_words() { + let one = "Air quality in the sunny island improved gradually throughout Wednesday."; + let two = + "Air quality in Singapore on Wednesday continued to get worse as haze hit the island."; + let three = "The air quality in Singapore is monitored through a network of air monitoring stations located in different parts of the island"; + let four = "The air quality in Singapore got worse on Wednesday."; + + let docs = [one, two, three, four]; + let mut model = TfIdf::new(); + + for doc in docs.iter() { + let d = Document::new(doc); + model.add_document(d); + } + + assert_eq!(model.num_documents(), 4); + + model.calculate_tf_idf_scores(); + + let non_zero_words = model.all_words(); + + [ + "gradually", + "network", + "hit", + "located", + "continued", + "island", + "worse", + "monitored", + "monitoring", + "haze", + "different", + "stations", + "sunny", + "singapore", + "improved", + "parts", + "wednesday", + ] + .iter() + .for_each(|word| { + assert!(non_zero_words.contains(&word.to_string())); + }); + } +} diff --git a/src/nlp/term.rs b/src/nlp/term.rs new file mode 100644 index 0000000..86b9591 --- /dev/null +++ b/src/nlp/term.rs @@ -0,0 +1,105 @@ +use std::borrow::BorrowMut; + +/// single word term for text processing +#[derive(Debug, Hash, Eq, PartialEq, Default, Clone)] +pub(crate) struct Term { + /// underlying string that the term represents + raw: String, +} + +impl Term { + /// given a word, create a new `Term` + pub(super) fn new(word: &str) -> Self { + Self { + raw: word.to_owned(), + } + } + + /// return a reference to the underlying string + pub(super) fn raw(&self) -> &str { + &self.raw + } +} + +/// metadata to be associated with a `Term` +#[derive(Debug, Clone, Default)] +pub(super) struct TermMetaData { + /// number of times the associated `Term` was seen in a single document + count: u32, + + /// collection of term frequencies for the associated `Term` + term_frequencies: Vec, + + /// tf-idf score for the associated `Term` + tf_idf_score: f32, +} + +impl TermMetaData { + /// create a new metadata container + pub(super) fn new() -> Self { + Self::default() + } + + /// number of times a `Term` has appeared in any `Document` within the corpus + pub(super) fn document_frequency(&self) -> usize { + self.term_frequencies().len() + } + + /// mutable reference to the collection of term frequencies + pub(super) fn term_frequencies_mut(&mut self) -> &mut Vec { + self.term_frequencies.borrow_mut() + } + + /// immutable reference to the collection of term frequencies + pub(super) fn term_frequencies(&self) -> &[f32] { + &self.term_frequencies + } + + /// mutable reference to the number of times a `Term` was seen in a particular `Document` + pub(super) fn count_mut(&mut self) -> &mut u32 { + self.count.borrow_mut() + } + + /// number of times a `Term` was seen in a particular `Document` + pub(super) fn count(&self) -> u32 { + self.count + } + + /// mutable reference to the term's tf-idf score + pub(super) fn tf_idf_score_mut(&mut self) -> &mut f32 { + self.tf_idf_score.borrow_mut() + } + + /// immutable reference to the term's tf-idf score + pub(super) fn tf_idf_score(&self) -> f32 { + self.tf_idf_score + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + /// test accessors for correctness + fn nlp_term_accessor_test() { + let term = Term::new("stuff"); + assert_eq!(term.raw(), "stuff"); + } + + #[test] + /// test accessors for correctness + fn nlp_term_metadata_accessor_test() { + let mut metadata = TermMetaData::new(); + + *metadata.count_mut() += 1; + assert_eq!(metadata.count(), 1); + + metadata.term_frequencies_mut().push(1.0); + assert_eq!(metadata.document_frequency(), 1); + assert_eq!(metadata.term_frequencies().first().unwrap(), &1.0); + + *metadata.tf_idf_score_mut() = 1.0_f32; + assert_eq!(metadata.tf_idf_score(), 1.0); + } +} diff --git a/src/nlp/utils.rs b/src/nlp/utils.rs new file mode 100644 index 0000000..3309560 --- /dev/null +++ b/src/nlp/utils.rs @@ -0,0 +1,158 @@ +use super::constants::{BOUNDED_WORD_REGEX, STOP_WORDS}; +use regex::Captures; +use std::borrow::Cow; + +/// pre-processing pipeline wrapper that removes punctuation, normalizes word case (utf-8 included) +/// to lowercase, and remove stop words +pub(super) fn preprocess(text: &str) -> Vec { + let text = remove_punctuation(text); + let text = normalize_case(text); + let text = remove_stop_words(&text); + + text.split_whitespace() + .map(|word| word.to_string()) + .collect::>() +} + +/// optimized version of `str::to_lowercase` +fn normalize_case<'a, S: Into>>(input: S) -> Cow<'a, str> { + let input = input.into(); + + let first = input.find(char::is_uppercase); + + if let Some(first_idx) = first { + let mut output = String::from(&input[..first_idx]); + output.reserve(input.len() - first_idx); + + for c in input[first_idx..].chars() { + if c.is_uppercase() { + output.push(c.to_lowercase().next().unwrap()) + } else { + output.push(c) + } + } + + Cow::Owned(output) + } else { + input + } +} + +/// remove ascii and some utf-8 punctuation characters from the given string +fn remove_punctuation(text: &str) -> String { + // non-separator type chars can be replaced with an empty string, while separators are replaced + // with a space. This attempts to keep things like + // 'aboutblogfaqcontactpresstermslexicondisclosure' from happening + text.replace( + [ + '!', '\\', '"', '#', '$', '%', '&', '(', ')', '*', '+', ':', ';', '<', '=', '>', '?', + '@', '[', ']', '^', '{', '}', '|', '~', ',', '\'', '“', '”', '’', '‘', '’', '‘', + ], + "", + ) + .replace(['/', '–', '—', '.'], " ") +} + +/// remove stop words from the given string +fn remove_stop_words(text: &str) -> String { + BOUNDED_WORD_REGEX + .replace_all(text, |caps: &Captures| { + let word = &caps[0]; + if !STOP_WORDS.contains(&word) { + word.to_owned() + } else { + String::new() + } + }) + .into() +} + +/// calculate inverse document frequency +pub(super) fn inverse_document_frequency(num_docs: f32, doc_frequency: f32) -> f32 { + f32::log10(num_docs / doc_frequency) +} + +/// calculate term frequency-inverse document frequency (tf-idf) +pub(super) fn tf_idf_score(term_frequency: f32, idf: f32) -> f32 { + term_frequency * idf +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + /// ensure all expected punctuation characters are removed + fn test_remove_punctuation() { + let tester = "!\\\"#$%&()*+/:;<=>?@[]^{}|~,.'“”’‘–—\n‘’"; + // the `" \n"` is because of the things like / getting replaced with a space + assert_eq!(remove_punctuation(tester), " \n"); + } + + #[test] + /// ensure uppercase characters are swapped to lowercase + fn test_normalize_case() { + let tester = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + assert_eq!(normalize_case(tester), "abcdefghijklmnopqrstuvwxyz"); + } + + #[test] + /// ensure all stop words are removed from the list of stopwords ... intestuous + fn test_remove_stopwords() { + let all_words = STOP_WORDS + .iter() + .map(|&word| word.to_string()) + .collect::>() + .join(" "); + + let removed = remove_stop_words(&all_words).replace(' ', ""); + + // the remaining chars are from the contraction-based stop words + assert_eq!(removed, "'d'll'm''s'ven'tn‘tn’t‘d‘ll‘m‘‘s‘ve’d’ll’m’’s’ve"); + } + + #[test] + /// ensure preprocess + fn test_preprocess_results() { + let tester = "WHY are Y'all YELLing?"; + assert_eq!(&preprocess(tester), &["yall", "yelling"]); + } + + #[test] + /// ensure our calculations conform to the example provided at the link below + /// + /// https://www.kaggle.com/paulrohan2020/tf-idf-tutorial/notebook#TF-IDF-Model + /// + /// Consider a document containing 100 words wherein the word cat appears 3 times. + /// The term frequency (i.e., tf) for cat is then (3 / 100) = 0.03. Now, assume we have 10 + /// million documents and the word cat appears in one thousand of these. Then, the inverse + /// document frequency (i.e., idf) is calculated as log(10,000,000 / 1,000) = 4. Thus, the + /// Tf-idf weight is the product of these quantities: 0.03 * 4 = 0.12. + fn idf_returns_expected_value() { + let num_docs = 10_000_000_f32; + let num_occurrences = 1_000_f32; + let abs_diff = (inverse_document_frequency(num_docs, num_occurrences) - 4.0).abs(); + + assert!(abs_diff <= f32::EPSILON); + } + + #[test] + /// ensure our calculations conform to the example provided at the link below + /// + /// https://www.kaggle.com/paulrohan2020/tf-idf-tutorial/notebook#TF-IDF-Model + /// + /// Consider a document containing 100 words wherein the word cat appears 3 times. + /// The term frequency (i.e., tf) for cat is then (3 / 100) = 0.03. Now, assume we have 10 + /// million documents and the word cat appears in one thousand of these. Then, the inverse + /// document frequency (i.e., idf) is calculated as log(10,000,000 / 1,000) = 4. Thus, the + /// Tf-idf weight is the product of these quantities: 0.03 * 4 = 0.12. + fn tf_idf_returns_expected_value() { + let term_freq = 0.03_f32; + let num_docs = 10_000_000_f32; + let num_occurrences = 1_000_f32; + let idf = inverse_document_frequency(num_docs, num_occurrences); + let abs_diff = (tf_idf_score(term_freq, idf) - 0.12).abs(); + + assert!(abs_diff <= f32::EPSILON); + } +} diff --git a/src/parser.rs b/src/parser.rs index 3bab47b..41a90d1 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,5 +1,5 @@ use clap::{ - crate_authors, crate_description, crate_name, crate_version, App, Arg, ArgGroup, ValueHint, + crate_authors, crate_description, crate_name, crate_version, Arg, ArgGroup, Command, ValueHint, }; use lazy_static::lazy_static; use regex::Regex; @@ -25,8 +25,8 @@ lazy_static! { } /// Create and return an instance of [clap::App](https://docs.rs/clap/latest/clap/struct.App.html), i.e. the Command Line Interface's configuration -pub fn initialize() -> App<'static> { - let app = App::new(crate_name!()) +pub fn initialize() -> Command<'static> { + let app = Command::new(crate_name!()) .version(crate_version!()) .author(crate_authors!()) .about(crate_description!()); @@ -42,7 +42,7 @@ pub fn initialize() -> App<'static> { .required_unless_present_any(&["stdin", "resume_from"]) .help_heading("Target selection") .value_name("URL") - .use_delimiter(true) + .use_value_delimiter(true) .value_hint(ValueHint::Url) .help("The target URL (required, unless [--stdin || --resume-from] used)"), ) @@ -65,6 +65,36 @@ pub fn initialize() -> App<'static> { .takes_value(true), ); + ///////////////////////////////////////////////////////////////////// + // group - composite settings + ///////////////////////////////////////////////////////////////////// + let app = app + .arg( + Arg::new("burp") + .long("burp") + .help_heading("Composite settings") + .conflicts_with_all(&["proxy", "insecure", "burp_replay"]) + .help("Set --proxy to http://127.0.0.1:8080 and set --insecure to true"), + ) + .arg( + Arg::new("burp_replay") + .long("burp-replay") + .help_heading("Composite settings") + .conflicts_with_all(&["replay_proxy", "insecure"]) + .help("Set --replay-proxy to http://127.0.0.1:8080 and set --insecure to true"), + ) + .arg( + Arg::new("smart") + .long("smart") + .help_heading("Composite settings") + .help("Set --extract-links, --auto-tune, --collect-words, and --collect-backups to true"), + ).arg( + Arg::new("thorough") + .long("thorough") + .help_heading("Composite settings") + .help("Use the same settings as --smart and set --collect-extensions to true"), + ); + ///////////////////////////////////////////////////////////////////// // group - proxy settings ///////////////////////////////////////////////////////////////////// @@ -101,7 +131,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .requires("replay_proxy") .help_heading("Proxy settings") .help( @@ -138,7 +168,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Request settings") .help( "File extension(s) to search for (ex: -x php -x pdf js)", @@ -152,7 +182,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Request settings") .help( "Which HTTP request method(s) should be sent (default: GET)", @@ -177,7 +207,7 @@ pub fn initialize() -> App<'static> { .help_heading("Request settings") .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help( "Specify HTTP headers to be used in each request (ex: -H Header:val -H 'stuff: things')", ), @@ -190,7 +220,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Request settings") .help( "Specify HTTP cookies to be used in each request (ex: -b stuff=things)", @@ -204,7 +234,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Request settings") .help( "Request's URL query parameters (ex: -Q token=stuff -Q secret=key)", @@ -229,7 +259,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Request filters") .help("URL(s) or Regex Pattern(s) to exclude from recursion/scans"), ); @@ -246,7 +276,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Response filters") .help( "Filter out messages of a particular size (ex: -S 5120 -S 4927,1970)", @@ -260,7 +290,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Response filters") .help( "Filter out messages via regular expression matching on the response's body (ex: -X '^ignore me$')", @@ -274,7 +304,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Response filters") .help( "Filter out messages of a particular word count (ex: -W 312 -W 91,82)", @@ -288,7 +318,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Response filters") .help( "Filter out messages of a particular line count (ex: -N 20 -N 31,30)", @@ -302,7 +332,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Response filters") .help( "Filter out status codes (deny list) (ex: -C 200 -C 401)", @@ -316,7 +346,7 @@ pub fn initialize() -> App<'static> { .multiple_values(true) .multiple_occurrences(true) .value_hint(ValueHint::Url) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Response filters") .help( "Filter out pages that are similar to the given page (ex. --filter-similar-to http://site.xyz/soft404)", @@ -330,7 +360,7 @@ pub fn initialize() -> App<'static> { .takes_value(true) .multiple_values(true) .multiple_occurrences(true) - .use_delimiter(true) + .use_value_delimiter(true) .help_heading("Response filters") .help( "Status Codes to include (allow list) (default: 200 204 301 302 307 308 401 403 405)", @@ -470,6 +500,40 @@ pub fn initialize() -> App<'static> { .takes_value(false) .help_heading("Scan settings") .help("Don't auto-filter wildcard responses") + ).arg( + Arg::new("collect_extensions") + .short('E') + .long("collect-extensions") + .takes_value(false) + .help_heading("Dynamic collection settings") + .help("Automatically discover extensions and add them to --extensions (unless they're in --dont-collect)") + ).arg( + Arg::new("collect_backups") + .short('B') + .long("collect-backups") + .takes_value(false) + .help_heading("Dynamic collection settings") + .help("Automatically request likely backup extensions for \"found\" urls") + ).arg( + Arg::new("collect_words") + .short('g') + .long("collect-words") + .takes_value(false) + .help_heading("Dynamic collection settings") + .help("Automatically discover important words from within responses and add them to the wordlist") + ).arg( + Arg::new("dont_collect") + .short('I') + .long("dont-collect") + .value_name("FILE_EXTENSION") + .takes_value(true) + .multiple_values(true) + .multiple_occurrences(true) + .use_value_delimiter(true) + .help_heading("Dynamic collection settings") + .help( + "File extension(s) to Ignore while collecting extensions (only used with --collect-extensions)", + ), ); ///////////////////////////////////////////////////////////////////// @@ -527,6 +591,13 @@ pub fn initialize() -> App<'static> { .help_heading("Output settings") .help("Output file to write log entries (use w/ --json for JSON entries)") .takes_value(true), + ) + .arg( + Arg::new("no_state") + .long("no-state") + .takes_value(false) + .help_heading("Output settings") + .help("Disable state output file (*.state)") ); ///////////////////////////////////////////////////////////////////// diff --git a/src/response.rs b/src/response.rs index 2fb9c48..6b08b00 100644 --- a/src/response.rs +++ b/src/response.rs @@ -60,6 +60,9 @@ pub struct FeroxResponse { /// whether the user passed --quiet|--silent on the command line pub(crate) output_level: OutputLevel, + + /// Url's file extension, if one exists + pub(crate) extension: Option, } /// implement Default trait for FeroxResponse @@ -78,6 +81,7 @@ impl Default for FeroxResponse { headers: Default::default(), wildcard: false, output_level: Default::default(), + extension: None, } } } @@ -205,7 +209,6 @@ impl FeroxResponse { response: Response, original_url: &str, method: &str, - read_body: bool, output_level: OutputLevel, ) -> Self { let url = response.url().clone(); @@ -213,21 +216,12 @@ impl FeroxResponse { let headers = response.headers().clone(); let content_length = response.content_length().unwrap_or(0); - let text = if read_body { - // .text() consumes the response, must be called last - // additionally, --extract-links is currently the only place we use the body of the - // response, so we forego the processing if not performing extraction - match response.text().await { - // await the response's body - Ok(text) => text, - Err(e) => { - log::warn!("Could not parse body from response: {}", e); - String::new() - } - } - } else { - String::new() - }; + // .text() consumes the response, must be called last + let text = response + .text() + .await + .with_context(|| "Could not parse body from response") + .unwrap_or_default(); let line_count = text.lines().count(); let word_count = text.lines().map(|s| s.split_whitespace().count()).sum(); @@ -244,9 +238,65 @@ impl FeroxResponse { word_count, output_level, wildcard: false, + extension: None, } } + /// if --collect-extensions is used, examine the response's url and grab the file's extension + /// if one is available to be grabbed. If an extension is found, send it to the ScanHandler + /// for further processing + pub(crate) fn parse_extension(&mut self, handles: Arc) -> Result<()> { + log::trace!("enter: parse_extension"); + + if !handles.config.collect_extensions { + // early return, --collect-extensions not used + return Ok(()); + } + + // path_segments: + // Return None for cannot-be-a-base URLs. + // When Some is returned, the iterator always contains at least one string + // (which may be empty). + // + // meaning: the two unwraps here are fine, the worst outcome is an empty string + let filename = self.url.path_segments().unwrap().last().unwrap(); + + if !filename.is_empty() { + // non-empty string, try to get extension + let parts: Vec<_> = filename + .split('.') + // keep things like /.bash_history from becoming an extension + .filter(|part| !part.is_empty()) + .collect(); + + if parts.len() > 1 { + // filename + at least one extension, i.e. whatever.js becomes ["whatever", "js"] + self.extension = Some(parts.last().unwrap().to_string()) + } + } + + if let Some(extension) = &self.extension { + if handles + .config + .status_codes + .contains(&self.status().as_u16()) + { + // only add extensions to those responses that pass our checks; filtered out + // status codes are handled by should_filter, but we need to still check against + // the allow list for what we want to keep + #[cfg(test)] + handles + .send_scan_command(Command::AddDiscoveredExtension(extension.to_owned())) + .unwrap_or_default(); + #[cfg(not(test))] + handles.send_scan_command(Command::AddDiscoveredExtension(extension.to_owned()))?; + } + } + + log::trace!("exit: parse_extension"); + Ok(()) + } + /// Helper function that determines if the configured maximum recursion depth has been reached /// /// Essentially looks at the Url path and determines how many directories are present in the @@ -484,6 +534,10 @@ impl Serialize for FeroxResponse { state.serialize_field("line_count", &self.line_count)?; state.serialize_field("word_count", &self.word_count)?; state.serialize_field("headers", &headers)?; + state.serialize_field( + "extension", + self.extension.as_ref().unwrap_or(&String::new()), + )?; state.end() } @@ -508,6 +562,7 @@ impl<'de> Deserialize<'de> for FeroxResponse { output_level: Default::default(), line_count: 0, word_count: 0, + extension: None, }; let map: HashMap = HashMap::deserialize(deserializer)?; @@ -576,6 +631,11 @@ impl<'de> Deserialize<'de> for FeroxResponse { response.wildcard = result; } } + "extension" => { + if let Some(result) = value.as_str() { + response.extension = Some(result.to_string()); + } + } _ => {} } } @@ -587,6 +647,8 @@ impl<'de> Deserialize<'de> for FeroxResponse { #[cfg(test)] mod tests { use super::*; + use crate::config::Configuration; + use std::default::Default; #[test] /// call reached_max_depth with max depth of zero, which is infinite recursion, expect false @@ -595,16 +657,7 @@ mod tests { let url = Url::parse("http://localhost").unwrap(); let response = FeroxResponse { url, - original_url: String::new(), - status: Default::default(), - method: Default::default(), - text: "".to_string(), - content_length: 0, - line_count: 0, - word_count: 0, - headers: Default::default(), - wildcard: false, - output_level: Default::default(), + ..Default::default() }; let result = response.reached_max_depth(0, 0, handles); assert!(!result); @@ -618,16 +671,7 @@ mod tests { let url = Url::parse("http://localhost/one/two").unwrap(); let response = FeroxResponse { url, - original_url: String::new(), - status: Default::default(), - method: Default::default(), - text: "".to_string(), - content_length: 0, - line_count: 0, - word_count: 0, - headers: Default::default(), - wildcard: false, - output_level: Default::default(), + ..Default::default() }; let result = response.reached_max_depth(0, 2, handles); @@ -641,16 +685,7 @@ mod tests { let url = Url::parse("http://localhost").unwrap(); let response = FeroxResponse { url, - original_url: String::new(), - status: Default::default(), - method: Default::default(), - text: "".to_string(), - content_length: 0, - line_count: 0, - word_count: 0, - headers: Default::default(), - wildcard: false, - output_level: Default::default(), + ..Default::default() }; let result = response.reached_max_depth(0, 2, handles); @@ -664,16 +699,7 @@ mod tests { let url = Url::parse("http://localhost/one/two").unwrap(); let response = FeroxResponse { url, - original_url: String::new(), - status: Default::default(), - method: Default::default(), - text: "".to_string(), - content_length: 0, - line_count: 0, - word_count: 0, - headers: Default::default(), - wildcard: false, - output_level: Default::default(), + ..Default::default() }; let result = response.reached_max_depth(2, 2, handles); @@ -687,19 +713,71 @@ mod tests { let url = Url::parse("http://localhost/one/two/three").unwrap(); let response = FeroxResponse { url, - original_url: String::new(), - status: Default::default(), - method: Default::default(), - text: "".to_string(), - content_length: 0, - line_count: 0, - word_count: 0, - headers: Default::default(), - wildcard: false, - output_level: Default::default(), + ..Default::default() }; let result = response.reached_max_depth(0, 2, handles); assert!(result); } + + #[test] + /// simple case of a single extension gets parsed correctly and stored on the `FeroxResponse` + fn parse_extension_finds_simple_extension() { + let config = Configuration { + collect_extensions: true, + ..Default::default() + }; + + let (handles, _) = Handles::for_testing(None, Some(Arc::new(config))); + + let url = Url::parse("http://localhost/derp.js").unwrap(); + + let mut response = FeroxResponse { + url, + ..Default::default() + }; + + response.parse_extension(Arc::new(handles)).unwrap(); + + assert_eq!(response.extension, Some(String::from("js"))); + } + + #[test] + /// hidden files shouldn't be parsed as extensions, i.e. `/.bash_history` + fn parse_extension_ignores_hidden_files() { + let config = Configuration { + collect_extensions: true, + ..Default::default() + }; + + let (handles, _) = Handles::for_testing(None, Some(Arc::new(config))); + + let url = Url::parse("http://localhost/.bash_history").unwrap(); + + let mut response = FeroxResponse { + url, + ..Default::default() + }; + + response.parse_extension(Arc::new(handles)).unwrap(); + + assert_eq!(response.extension, None); + } + + #[test] + /// `parse_extension` should return immediately if `--collect-extensions` isn't used + fn parse_extension_early_returns_based_on_config() { + let (handles, _) = Handles::for_testing(None, None); + + let url = Url::parse("http://localhost/derp.js").unwrap(); + + let mut response = FeroxResponse { + url, + ..Default::default() + }; + + response.parse_extension(Arc::new(handles)).unwrap(); + + assert_eq!(response.extension, None); + } } diff --git a/src/scan_manager/scan.rs b/src/scan_manager/scan.rs index 42c06bc..e2ac1b9 100644 --- a/src/scan_manager/scan.rs +++ b/src/scan_manager/scan.rs @@ -33,7 +33,7 @@ pub struct FeroxScan { pub(super) url: String, /// The type of scan - pub(super) scan_type: ScanType, + pub scan_type: ScanType, /// The order in which the scan was received pub(crate) scan_order: ScanOrder, @@ -42,7 +42,7 @@ pub struct FeroxScan { pub(super) num_requests: u64, /// Status of this scan - pub(super) status: Mutex, + pub status: Mutex, /// The spawned tokio task performing this scan (uses tokio::sync::Mutex) pub(super) task: sync::Mutex>>, diff --git a/src/scan_manager/scan_container.rs b/src/scan_manager/scan_container.rs index ab1124e..317b940 100644 --- a/src/scan_manager/scan_container.rs +++ b/src/scan_manager/scan_container.rs @@ -13,6 +13,7 @@ use anyhow::Result; use reqwest::StatusCode; use serde::{ser::SerializeSeq, Serialize, Serializer}; use std::{ + collections::HashSet, convert::TryInto, fs::File, io::BufReader, @@ -47,6 +48,9 @@ pub struct FeroxScans { /// whether or not the user passed --silent|--quiet on the command line output_level: OutputLevel, + + /// vector of extensions discovered and collected during scans + pub(crate) collected_extensions: RwLock>, } /// Serialize implementation for FeroxScans @@ -58,17 +62,20 @@ impl Serialize for FeroxScans { where S: Serializer, { - if let Ok(scans) = self.scans.read() { - let mut seq = serializer.serialize_seq(Some(scans.len()))?; - for scan in scans.iter() { - seq.serialize_element(&*scan).unwrap_or_default(); - } + match self.scans.read() { + Ok(scans) => { + let mut seq = serializer.serialize_seq(Some(scans.len() + 1))?; - seq.end() - } else { - // if for some reason we can't unlock the RwLock, just write an empty list - let seq = serializer.serialize_seq(Some(0))?; - seq.end() + for scan in scans.iter() { + seq.serialize_element(&*scan).unwrap_or_default(); + } + seq.end() + } + Err(_) => { + // if for some reason we can't unlock the RwLock, just write an empty list + let seq = serializer.serialize_seq(Some(0))?; + seq.end() + } } } } @@ -109,7 +116,7 @@ impl FeroxScans { sentry } - /// load serialized FeroxScan(s) into this FeroxScans + /// load serialized FeroxScan(s) and any previously collected extensions into this FeroxScans pub fn add_serialized_scans(&self, filename: &str) -> Result<()> { log::trace!("enter: add_serialized_scans({})", filename); let file = File::open(filename)?; @@ -122,18 +129,31 @@ impl FeroxScans { for scan in arr_scans { let mut deser_scan: FeroxScan = serde_json::from_value(scan.clone()).unwrap_or_default(); + // FeroxScans gets -q value from config as usual; the FeroxScans themselves // rely on that value being passed in. If the user starts a scan without -q // and resumes the scan but adds -q, FeroxScan will not have the proper value // without the line below deser_scan.output_level = self.output_level; - log::debug!("added: {}", deser_scan); self.insert(Arc::new(deser_scan)); } } } + if let Some(extensions) = state.get("collected_extensions") { + if let Some(arr_exts) = extensions.as_array() { + if let Ok(mut guard) = self.collected_extensions.write() { + for ext in arr_exts { + let deser_ext: String = + serde_json::from_value(ext.clone()).unwrap_or_default(); + + guard.insert(deser_ext); + } + } + } + } + log::trace!("exit: add_serialized_scans"); Ok(()) } @@ -163,8 +183,8 @@ impl FeroxScans { None } - pub(super) fn get_base_scan_by_url(&self, url: &str) -> Option> { - log::trace!("enter: get_sub_paths_from_path({})", url); + pub fn get_base_scan_by_url(&self, url: &str) -> Option> { + log::trace!("enter: get_base_scan_by_url({})", url); // rmatch_indices returns tuples in index, match form, i.e. (10, "/") // with the furthest-right match in the first position in the vector @@ -188,14 +208,14 @@ impl FeroxScans { for scan in guard.iter() { let slice = url.index(0..*idx); if slice == scan.url || format!("{}/", slice).as_str() == scan.url { - log::trace!("enter: get_sub_paths_from_path -> {}", scan); + log::trace!("enter: get_base_scan_by_url -> {}", scan); return Some(scan.clone()); } } } } - log::trace!("enter: get_sub_paths_from_path -> None"); + log::trace!("enter: get_base_scan_by_url -> None"); None } /// add one to either 403 or 429 tracker in the scan related to the given url @@ -511,4 +531,67 @@ impl FeroxScans { } scans } + + /// given an extension, add it to `collected_extensions` if all constraints are met + /// returns `true` if an extension was added, `false` otherwise + pub fn add_discovered_extension(&self, extension: String) -> bool { + log::trace!("enter: add_discovered_extension({})", extension); + let mut extension_added = false; + + // note: the filter by --dont-collect happens in the event handler, since it has access + // to a Handles object form which it can check the config value. additionally, the check + // against --extensions is performed there for the same reason + + if let Ok(extensions) = self.collected_extensions.read() { + // quicker to allow most to read and return and then reopen for write if necessary + if extensions.contains(&extension) { + return extension_added; + } + } + + if let Ok(mut extensions) = self.collected_extensions.write() { + log::info!("discovered new extension: {}", extension); + extensions.insert(extension); + extension_added = true; + } + + log::trace!("exit: add_discovered_extension -> {}", extension_added); + extension_added + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + /// unknown extension should be added to collected_extensions + fn unknown_extension_is_added_to_collected_extensions() { + let scans = FeroxScans::new(OutputLevel::Default); + + assert_eq!(0, scans.collected_extensions.read().unwrap().len()); + + let added = scans.add_discovered_extension(String::from("js")); + + assert!(added); + assert_eq!(1, scans.collected_extensions.read().unwrap().len()); + } + + #[test] + /// known extension should not be added to collected_extensions + fn known_extension_is_added_to_collected_extensions() { + let scans = FeroxScans::new(OutputLevel::Default); + scans + .collected_extensions + .write() + .unwrap() + .insert(String::from("js")); + + assert_eq!(1, scans.collected_extensions.read().unwrap().len()); + + let added = scans.add_discovered_extension(String::from("js")); + + assert!(!added); + assert_eq!(1, scans.collected_extensions.read().unwrap().len()); + } } diff --git a/src/scan_manager/state.rs b/src/scan_manager/state.rs index ed883bd..28e7ca7 100644 --- a/src/scan_manager/state.rs +++ b/src/scan_manager/state.rs @@ -2,6 +2,7 @@ use super::*; use crate::{config::Configuration, statistics::Stats, traits::FeroxSerialize, utils::fmt_err}; use anyhow::{Context, Result}; use serde::Serialize; +use std::collections::HashSet; use std::sync::Arc; /// Data container for (de)?serialization of multiple items @@ -18,6 +19,9 @@ pub struct FeroxState { /// Gathered statistics statistics: Arc, + + /// collected extensions + collected_extensions: HashSet, } /// implementation of FeroxState @@ -29,11 +33,17 @@ impl FeroxState { responses: &'static FeroxResponses, statistics: Arc, ) -> Self { + let collected_extensions = match scans.collected_extensions.read() { + Ok(extensions) => extensions.clone(), + Err(_) => HashSet::new(), + }; + Self { scans, config, responses, statistics, + collected_extensions, } } } diff --git a/src/scan_manager/tests.rs b/src/scan_manager/tests.rs index c14b3cd..92c274e 100644 --- a/src/scan_manager/tests.rs +++ b/src/scan_manager/tests.rs @@ -303,7 +303,7 @@ fn ferox_scans_serialize() { #[test] /// given a FeroxResponses, test that it serializes into the proper JSON entry fn ferox_responses_serialize() { - let json_response = r#"{"type":"response","url":"https://nerdcore.com/css","original_url":"https://nerdcore.com","path":"/css","wildcard":true,"status":301,"method":"GET","content_length":173,"line_count":10,"word_count":16,"headers":{"server":"nginx/1.16.1"}}"#; + let json_response = r#"{"type":"response","url":"https://nerdcore.com/css","original_url":"https://nerdcore.com","path":"/css","wildcard":true,"status":301,"method":"GET","content_length":173,"line_count":10,"word_count":16,"headers":{"server":"nginx/1.16.1"},"extension":""}"#; let response: FeroxResponse = serde_json::from_str(json_response).unwrap(); let responses = FeroxResponses::default(); @@ -321,7 +321,7 @@ fn ferox_responses_serialize() { /// given a FeroxResponse, test that it serializes into the proper JSON entry fn ferox_response_serialize_and_deserialize() { // deserialize - let json_response = r#"{"type":"response","url":"https://nerdcore.com/css","original_url":"https://nerdcore.com","path":"/css","wildcard":true,"status":301,"method":"GET","content_length":173,"line_count":10,"word_count":16,"headers":{"server":"nginx/1.16.1"}}"#; + let json_response = r#"{"type":"response","url":"https://nerdcore.com/css","original_url":"https://nerdcore.com","path":"/css","wildcard":true,"status":301,"method":"GET","content_length":173,"line_count":10,"word_count":16,"headers":{"server":"nginx/1.16.1"},"extension":""}"#; let response: FeroxResponse = serde_json::from_str(json_response).unwrap(); assert_eq!(response.url().as_str(), "https://nerdcore.com/css"); @@ -351,33 +351,42 @@ fn feroxstates_feroxserialize_implementation() { ); let ferox_scans = FeroxScans::default(); let saved_id = ferox_scan.id.clone(); + ferox_scans.insert(ferox_scan); - let config = Configuration::new().unwrap(); + ferox_scans + .collected_extensions + .write() + .unwrap() + .insert(String::from("php")); + + let mut config = Configuration::new().unwrap(); + + config.collect_extensions = true; + let stats = Arc::new(Stats::new(config.json)); - let json_response = r#"{"type":"response","url":"https://nerdcore.com/css","path":"/css","wildcard":true,"status":301,"content_length":173,"line_count":10,"word_count":16,"headers":{"server":"nginx/1.16.1"}}"#; + let json_response = r#"{"type":"response","url":"https://nerdcore.com/css","path":"/css","wildcard":true,"status":301,"content_length":173,"line_count":10,"word_count":16,"headers":{"server":"nginx/1.16.1"},"extension":""}"#; let response: FeroxResponse = serde_json::from_str(json_response).unwrap(); RESPONSES.insert(response); - let ferox_state = FeroxState::new( - Arc::new(ferox_scans), - Arc::new(Configuration::new().unwrap()), - &RESPONSES, - stats, - ); + let ferox_state = FeroxState::new(Arc::new(ferox_scans), Arc::new(config), &RESPONSES, stats); let expected_strs = predicates::str::contains("scans: FeroxScans").and( predicate::str::contains("config: Configuration") .and(predicate::str::contains("responses: FeroxResponses")) .and(predicate::str::contains("nerdcore.com")) .and(predicate::str::contains("/css")) - .and(predicate::str::contains("https://spiritanimal.com")), + .and(predicate::str::contains("https://spiritanimal.com")) + .and(predicate::str::contains("php")), ); assert!(expected_strs.eval(&ferox_state.as_str())); let json_state = ferox_state.as_json().unwrap(); + + println!("echo '{}'|jq", json_state); // for debugging, if the test fails, can see what's going on + for expected in [ r#""scans""#, &format!(r#""id":"{}""#, saved_id), @@ -445,14 +454,17 @@ fn feroxstates_feroxserialize_implementation() { r#""word_count":16"#, r#""headers""#, r#""server":"nginx/1.16.1"#, + r#""collect_extensions":true"#, + r#""collect_backups":false"#, + r#""collect_words":false"#, + r#""collected_extensions":["php"]"#, + r#""dont_collect":["tif","tiff","ico","cur","bmp","webp","svg","png","jpg","jpeg","jfif","gif","avif","apng","pjpeg","pjp","mov","wav","mpg","mpeg","mp3","mp4","m4a","m4p","m4v","ogg","webm","ogv","oga","flac","aac","3gp","css","zip","xls","xml","gz","tgz"]"#, ] .iter() { assert!( - predicates::str::contains(*expected).eval(&json_state), - "{}", - expected - ) + predicates::str::contains(*expected).eval(&json_state) + ); } } diff --git a/src/scanner/ferox_scanner.rs b/src/scanner/ferox_scanner.rs index d7449ed..32790e6 100644 --- a/src/scanner/ferox_scanner.rs +++ b/src/scanner/ferox_scanner.rs @@ -1,19 +1,22 @@ +use std::sync::atomic::AtomicBool; use std::{ops::Deref, sync::atomic::Ordering, sync::Arc, time::Instant}; use anyhow::{bail, Result}; use console::style; use futures::{stream, StreamExt}; +use indicatif::ProgressBar; use lazy_static::lazy_static; use tokio::sync::Semaphore; use crate::{ event_handlers::{ - Command::{AddError, AddToF64Field, SubtractFromUsizeField}, + Command::{AddError, AddToF64Field, AddToUsizeField, SubtractFromUsizeField}, Handles, }, extractor::{ExtractionTarget, ExtractorBuilder}, heuristics, - scan_manager::{FeroxResponses, MenuCmdResult, ScanOrder, ScanStatus, PAUSE_SCAN}, + scan_manager::{FeroxResponses, FeroxScans, MenuCmdResult, ScanOrder, ScanStatus, PAUSE_SCAN}, + scanner::requester::TF_IDF, statistics::{ StatError::Other, StatField::{DirScanTimes, TotalExpected}, @@ -29,6 +32,43 @@ lazy_static! { pub static ref RESPONSES: FeroxResponses = FeroxResponses::default(); // todo consider removing this } + +/// check to see if `pause_flag` is set to true. when true; enter a busy loop that only exits +/// by setting PAUSE_SCAN back to false +async fn check_for_user_input( + pause_flag: &AtomicBool, + scanned_urls: Arc, + handles: Arc, +) { + log::trace!( + "enter: check_for_user_input({:?}, SCANNED_URLS, HANDLES)", + pause_flag + ); + + // todo write a test or two for this function at some point... + if pause_flag.load(Ordering::Acquire) { + match scanned_urls.pause(true).await { + Some(MenuCmdResult::Url(url)) => { + // user wants to add a new url to be scanned, need to send + // it over to the event handler for processing + handles + .send_scan_command(Command::ScanNewUrl(url)) + .unwrap_or_else(|e| log::warn!("Could not add scan to scan queue: {}", e)) + } + Some(MenuCmdResult::NumCancelled(num_canx)) => { + if num_canx > 0 { + handles + .stats + .send(SubtractFromUsizeField(TotalExpected, num_canx)) + .unwrap_or_else(|e| log::warn!("Could not update overall scan bar: {}", e)); + } + } + _ => {} + } + } + log::trace!("exit: check_for_user_input"); +} + /// handles the main muscle movement of scanning a url pub struct FeroxScanner { /// handles to handlers and config @@ -67,6 +107,57 @@ impl FeroxScanner { } } + /// produces and awaits tasks (mp of mpsc); responsible for making requests + async fn stream_requests( + &self, + looping_words: Arc>, + progress_bar: ProgressBar, + scanned_urls: Arc, + requester: Arc, + ) { + log::trace!("enter: stream_requests(params too verbose to print)"); + + let producers = stream::iter(looping_words.deref().to_owned()) + .map(|word| { + let pb = progress_bar.clone(); // progress bar is an Arc around internal state + let scanned_urls_clone = scanned_urls.clone(); + let requester_clone = requester.clone(); + let handles_clone = self.handles.clone(); + ( + tokio::spawn(async move { + // for every word in the wordlist, check to see if user has pressed enter + // in order to go into the interactive menu + check_for_user_input(&PAUSE_SCAN, scanned_urls_clone, handles_clone).await; + + // after checking for user input, send the request + requester_clone + .request(&word) + .await + .unwrap_or_else(|e| log::warn!("Requester encountered an error: {}", e)) + }), + pb, + ) + }) + .for_each_concurrent(self.handles.config.threads, |(resp, bar)| async move { + match resp.await { + Ok(_) => { + let increment_len = self.handles.expected_num_requests_multiplier() as u64; + bar.inc(increment_len); + } + Err(e) => { + log::warn!("error awaiting a response: {}", e); + self.handles.stats.send(AddError(Other)).unwrap_or_default(); + } + } + }); + + // await tx tasks + log::trace!("awaiting scan producers"); + producers.await; + log::trace!("done awaiting scan producers"); + log::trace!("exit: stream_requests"); + } + /// Scan a given url using a given wordlist /// /// This is the primary entrypoint for the scanner @@ -75,30 +166,17 @@ impl FeroxScanner { log::info!("Starting scan against: {}", self.target_url); let mut scan_timer = Instant::now(); - let mut dirlist_flag = false; - if self.handles.config.extract_links { - // parse html for links (i.e. web scraping) - let extractor = ExtractorBuilder::default() - .target(ExtractionTarget::ParseHtml) + if self.handles.config.extract_links && matches!(self.order, ScanOrder::Initial) { + // check for robots.txt (cannot be in sub-directories, so limited to Initial) + let mut extractor = ExtractorBuilder::default() + .target(ExtractionTarget::RobotsTxt) .url(&self.target_url) .handles(self.handles.clone()) .build()?; - let extract_out = extractor.extract().await?; - let links = extract_out.0; - dirlist_flag = extract_out.1; - extractor.request_links(links).await?; - if matches!(self.order, ScanOrder::Initial) { - // check for robots.txt (cannot be in subdirs) - let extractor = ExtractorBuilder::default() - .target(ExtractionTarget::RobotsTxt) - .url(&self.target_url) - .handles(self.handles.clone()) - .build()?; - let links = (extractor.extract().await?).0; - extractor.request_links(links).await?; - } + let result = extractor.extract().await?; + extractor.request_links(result).await?; } let scanned_urls = self.handles.ferox_scans()?; @@ -118,118 +196,115 @@ impl FeroxScanner { let progress_bar = ferox_scan.progress_bar(); - // Directory listing heuristic detection to not continue scanning - if dirlist_flag { - log::trace!("exit: scan_url -> Directory listing heuristic"); - - self.handles.stats.send(AddToF64Field( - DirScanTimes, - scan_timer.elapsed().as_secs_f64(), - ))?; - - self.handles.stats.send(SubtractFromUsizeField( - TotalExpected, - progress_bar.length() as usize, - ))?; - - progress_bar.reset_eta(); - progress_bar.finish_with_message(&format!( - "=> {}", - style("Directory listing").blue().bright() - )); - - ferox_scan.finish()?; - - return Ok(()); - } - // When acquire is called and the semaphore has remaining permits, the function immediately // returns a permit. However, if no remaining permits are available, acquire (asynchronously) // waits until an outstanding permit is dropped, at which point, the freed permit is assigned // to the caller. let _permit = self.scan_limiter.acquire().await; + if self.handles.config.scan_limit > 0 { scan_timer = Instant::now(); progress_bar.reset(); } - // Arc clones to be passed around to the various scans - let looping_words = self.wordlist.clone(); - { + // heuristics test block let test = heuristics::HeuristicTests::new(self.handles.clone()); + if let Ok(num_reqs) = test.wildcard(&self.target_url).await { progress_bar.inc(num_reqs); } + + if let Ok(dirlist_result) = test.directory_listing(&self.target_url).await { + if dirlist_result.is_some() { + let dirlist_result = dirlist_result.unwrap(); + // at this point, we have a DirListingType, and it's not the None variant + // which means we found directory listing based on the heuristic; now we need + // to process the links that are available if --extract-links was used + + if self.handles.config.extract_links { + let mut extractor = ExtractorBuilder::default() + .response(&dirlist_result.response) + .target(ExtractionTarget::DirectoryListing) + .url(&self.target_url) + .handles(self.handles.clone()) + .build()?; + + let result = extractor.extract_from_dir_listing().await?; + + extractor.request_links(result).await?; + + log::trace!("exit: scan_url -> Directory listing heuristic"); + + self.handles.stats.send(AddToF64Field( + DirScanTimes, + scan_timer.elapsed().as_secs_f64(), + ))?; + + self.handles.stats.send(SubtractFromUsizeField( + TotalExpected, + progress_bar.length() as usize, + ))?; + } + + let mut message = format!("=> {}", style("Directory listing").blue().bright()); + + if !self.handles.config.extract_links { + message + .push_str(&format!(" (add {} to scan)", style("-e").bright().yellow())) + } + + progress_bar.reset_eta(); + progress_bar.finish_with_message(&message); + + ferox_scan.finish()?; + + return Ok(()); + } + } } + // Arc clones to be passed around to the various scans + let looping_words = self.wordlist.clone(); + let requester = Arc::new(Requester::from(self, ferox_scan.clone())?); - let increment_len = - ((self.handles.config.extensions.len() + 1) * self.handles.config.methods.len()) as u64; - // producer tasks (mp of mpsc); responsible for making requests - let producers = stream::iter(looping_words.deref().to_owned()) - .map(|word| { - let pb = progress_bar.clone(); // progress bar is an Arc around internal state - let scanned_urls_clone = scanned_urls.clone(); - let requester_clone = requester.clone(); - let handles_clone = self.handles.clone(); - ( - tokio::spawn(async move { - if PAUSE_SCAN.load(Ordering::Acquire) { - // for every word in the wordlist, check to see if PAUSE_SCAN is set to true - // when true; enter a busy loop that only exits by setting PAUSE_SCAN back - // to false - match scanned_urls_clone.pause(true).await { - Some(MenuCmdResult::Url(url)) => { - // user wants to add a new url to be scanned, need to send - // it over to the event handler for processing - handles_clone - .send_scan_command(Command::ScanNewUrl(url)) - .unwrap_or_else(|e| { - log::warn!("Could not add scan to scan queue: {}", e) - }) - } - Some(MenuCmdResult::NumCancelled(num_canx)) => { - if num_canx > 0 { - handles_clone - .stats - .send(SubtractFromUsizeField(TotalExpected, num_canx)) - .unwrap_or_else(|e| { - log::warn!( - "Could not update overall scan bar: {}", - e - ) - }); - } - } - _ => {} - } - } - requester_clone - .request(&word) - .await - .unwrap_or_else(|e| log::warn!("Requester encountered an error: {}", e)) - }), - pb, - ) - }) - .for_each_concurrent(self.handles.config.threads, |(resp, bar)| async move { - match resp.await { - Ok(_) => { - bar.inc(increment_len); - } - Err(e) => { - log::warn!("error awaiting a response: {}", e); - self.handles.stats.send(AddError(Other)).unwrap_or_default(); - } - } - }); + self.stream_requests( + looping_words.clone(), + progress_bar.clone(), + scanned_urls.clone(), + requester.clone(), + ) + .await; - // await tx tasks - log::trace!("awaiting scan producers"); - producers.await; - log::trace!("done awaiting scan producers"); + if self.handles.config.collect_words { + let new_words = TF_IDF.read().unwrap().all_words(); + let new_words_len = new_words.len(); + + let cur_length = progress_bar.length(); + let new_length = cur_length + new_words_len as u64; + + progress_bar.set_length(new_length); + + self.handles + .stats + .send(AddToUsizeField(TotalExpected, new_words.len())) + .unwrap_or_default(); + + log::info!( + "requesting {} collected words: {:?}...", + new_words_len, + &new_words[..new_words_len.min(3) as usize] + ); + + self.stream_requests( + Arc::new(new_words), + progress_bar.clone(), + scanned_urls.clone(), + requester.clone(), + ) + .await; + } self.handles.stats.send(AddToF64Field( DirScanTimes, diff --git a/src/scanner/init.rs b/src/scanner/init.rs index b50a943..bee4c06 100644 --- a/src/scanner/init.rs +++ b/src/scanner/init.rs @@ -11,9 +11,7 @@ pub async fn initialize(num_words: usize, handles: Arc) -> Result<()> { log::trace!("enter: initialize({}, {:?})", num_words, handles); // number of requests only needs to be calculated once, and then can be reused - let num_reqs_expected: u64 = - (num_words * (handles.config.extensions.len() + 1) * (handles.config.methods.len())) - .try_into()?; + let num_reqs_expected: u64 = handles.expected_num_requests_per_dir().try_into()?; { // no real reason to keep the arc around beyond this call diff --git a/src/scanner/requester.rs b/src/scanner/requester.rs index f19767b..2d01d97 100644 --- a/src/scanner/requester.rs +++ b/src/scanner/requester.rs @@ -1,9 +1,11 @@ use std::{ cmp::max, - sync::{atomic::Ordering, Arc, Mutex}, + collections::HashSet, + sync::{self, atomic::Ordering, Arc, Mutex}, }; use anyhow::Result; +use lazy_static::lazy_static; use leaky_bucket::LeakyBucket; use tokio::{ sync::{oneshot, RwLock}, @@ -18,17 +20,21 @@ use crate::{ Handles, }, extractor::{ExtractionTarget, ExtractorBuilder}, + nlp::{Document, TfIdf}, response::FeroxResponse, scan_manager::{FeroxScan, ScanStatus}, statistics::{StatError::Other, StatField::TotalExpected}, url::FeroxUrl, - utils::logged_request, + utils::{logged_request, should_deny_url}, HIGH_ERROR_RATIO, }; use super::{policy_data::PolicyData, FeroxScanner, PolicyTrigger}; -use crate::utils::should_deny_url; -use std::collections::HashSet; + +lazy_static! { + /// make sure to note that this is a std rwlock and not tokio + pub(crate) static ref TF_IDF: Arc> = Arc::new(sync::RwLock::new(TfIdf::new())); +} /// Makes multiple requests based on the presence of extensions pub(super) struct Requester { @@ -303,8 +309,10 @@ impl Requester { pub async fn request(&self, word: &str) -> Result<()> { log::trace!("enter: request({})", word); - let urls = - FeroxUrl::from_string(&self.target_url, self.handles.clone()).formatted_urls(word)?; + let collected = self.handles.collected_extensions(); + + let urls = FeroxUrl::from_string(&self.target_url, self.handles.clone()) + .formatted_urls(word, collected)?; let should_test_deny = !self.handles.config.url_denylist.is_empty() || !self.handles.config.regex_denylist.is_empty(); @@ -331,13 +339,14 @@ impl Requester { continue; } - let response = logged_request( - &url, - method.as_str(), - Some(self.handles.config.data.as_slice()), - self.handles.clone(), - ) - .await?; + let data = if self.handles.config.data.is_empty() { + None + } else { + Some(self.handles.config.data.as_slice()) + }; + + let response = + logged_request(&url, method.as_str(), data, self.handles.clone()).await?; if (should_tune || self.handles.config.auto_bail) && !atomic_load!(self.policy_data.cooling_down, Ordering::SeqCst) @@ -361,11 +370,10 @@ impl Requester { } // response came back without error, convert it to FeroxResponse - let ferox_response = FeroxResponse::from( + let mut ferox_response = FeroxResponse::from( response, &self.target_url, method, - true, self.handles.config.output_level, ) .await; @@ -392,20 +400,38 @@ impl Requester { continue; } - if self.handles.config.extract_links && !ferox_response.status().is_redirection() { - let extractor = ExtractorBuilder::default() + if self.handles.config.collect_extensions { + ferox_response.parse_extension(self.handles.clone())?; + } + + if self.handles.config.collect_words { + if let Ok(mut guard) = TF_IDF.write() { + let doc = Document::from_html(ferox_response.text()); + guard.add_document(doc); + if guard.num_documents() % 12 == 0 + || (guard.num_documents() < 5 && guard.num_documents() % 2 == 0) + { + guard.calculate_tf_idf_scores(); + } + } + } + + if self.handles.config.extract_links { + let mut extractor = ExtractorBuilder::default() .target(ExtractionTarget::ResponseBody) .response(&ferox_response) .handles(self.handles.clone()) .build()?; + let new_links: HashSet<_>; - let extracted = (extractor.extract().await?).0; + + let result = extractor.extract().await?; { // gain and quickly drop the read lock on seen_links, using it while unlocked // to determine if there are any new links to process let read_links = self.seen_links.read().await; - new_links = extracted.difference(&read_links).cloned().collect(); + new_links = result.difference(&read_links).cloned().collect(); } if !new_links.is_empty() { @@ -417,7 +443,9 @@ impl Requester { } } - extractor.request_links(new_links).await?; + if !new_links.is_empty() { + extractor.request_links(new_links).await?; + } } // everything else should be reported @@ -458,12 +486,14 @@ mod tests { let (filters_task, filters_handle) = FiltersHandler::initialize(); let (out_task, out_handle) = TermOutHandler::initialize(configuration.clone(), stats_handle.tx.clone()); + let wordlist = Arc::new(vec![String::from("this_is_a_test")]); let handles = Arc::new(Handles::new( stats_handle, filters_handle, out_handle, configuration.clone(), + wordlist, )); let (scan_task, scan_handle) = ScanHandler::initialize(handles.clone()); @@ -587,10 +617,10 @@ mod tests { let requester = Requester { handles, + target_url: "http://localhost".to_string(), seen_links: RwLock::new(HashSet::::new()), tuning_lock: Mutex::new(0), ferox_scan: Arc::new(FeroxScan::default()), - target_url: "http://localhost".to_string(), rate_limiter: RwLock::new(None), policy_data: Default::default(), }; diff --git a/src/statistics/container.rs b/src/statistics/container.rs index 5ae3de0..780e517 100644 --- a/src/statistics/container.rs +++ b/src/statistics/container.rs @@ -69,6 +69,10 @@ pub struct Stats { /// response bodies and robots.txt as of v1.11.0 links_extracted: AtomicUsize, + /// tracker for number of extensions discovered when `--collect-extensions` is used; sources + /// are response bodies + extensions_collected: AtomicUsize, + /// tracker for overall number of 200s seen by the client status_200s: AtomicUsize, @@ -166,6 +170,10 @@ impl Serialize for Stats { state.serialize_field("total_scans", &atomic_load!(self.total_scans))?; state.serialize_field("initial_targets", &atomic_load!(self.initial_targets))?; state.serialize_field("links_extracted", &atomic_load!(self.links_extracted))?; + state.serialize_field( + "extensions_collected", + &atomic_load!(self.extensions_collected), + )?; state.serialize_field("status_200s", &atomic_load!(self.status_200s))?; state.serialize_field("status_301s", &atomic_load!(self.status_301s))?; state.serialize_field("status_302s", &atomic_load!(self.status_302s))?; @@ -290,6 +298,13 @@ impl<'a> Deserialize<'a> for Stats { } } } + "extensions_collected" => { + if let Some(num) = value.as_u64() { + if let Ok(parsed) = usize::try_from(num) { + atomic_increment!(stats.extensions_collected, parsed); + } + } + } "status_200s" => { if let Some(num) = value.as_u64() { if let Ok(parsed) = usize::try_from(num) { @@ -628,6 +643,9 @@ impl Stats { StatField::LinksExtracted => { atomic_increment!(self.links_extracted, value); } + StatField::ExtensionsCollected => { + atomic_increment!(self.extensions_collected, value); + } StatField::WildcardsFiltered => { atomic_increment!(self.wildcards_filtered, value); atomic_increment!(self.responses_filtered, value); @@ -664,6 +682,10 @@ impl Stats { atomic_increment!(self.client_errors, atomic_load!(d_stats.client_errors)); atomic_increment!(self.server_errors, atomic_load!(d_stats.server_errors)); atomic_increment!(self.links_extracted, atomic_load!(d_stats.links_extracted)); + atomic_increment!( + self.extensions_collected, + atomic_load!(d_stats.extensions_collected) + ); atomic_increment!(self.status_200s, atomic_load!(d_stats.status_200s)); atomic_increment!(self.status_301s, atomic_load!(d_stats.status_301s)); atomic_increment!(self.status_302s, atomic_load!(d_stats.status_302s)); @@ -834,7 +856,7 @@ mod tests { #[test] /// Stats::merge_from should properly increment expected fields and ignore others fn stats_merge_from_alters_correct_fields() { - let contents = r#"{"statistics":{"type":"statistics","timeouts":1,"requests":9207,"expected_per_scan":707,"total_expected":9191,"errors":3,"successes":720,"redirects":13,"client_errors":8474,"server_errors":2,"total_scans":13,"initial_targets":1,"links_extracted":51,"status_403s":3,"status_200s":720,"status_301s":12,"status_302s":1,"status_401s":4,"status_429s":2,"status_500s":5,"status_503s":9,"status_504s":6,"status_508s":7,"wildcards_filtered":707,"responses_filtered":707,"resources_discovered":27,"directory_scan_times":[2.211973078,1.989015505,1.898675839,3.9714468910000003,4.938152838,5.256073528,6.021986595,6.065740734,6.42633762,7.095142125,7.336982137,5.319785619,4.843649778],"total_runtime":[11.556575456000001],"url_format_errors":17,"redirection_errors":12,"connection_errors":21,"request_errors":4}}"#; + let contents = r#"{"statistics":{"type":"statistics","timeouts":1,"requests":9207,"expected_per_scan":707,"total_expected":9191,"errors":3,"successes":720,"redirects":13,"client_errors":8474,"server_errors":2,"total_scans":13,"initial_targets":1,"links_extracted":51,"extensions_collected":4,"status_403s":3,"status_200s":720,"status_301s":12,"status_302s":1,"status_401s":4,"status_429s":2,"status_500s":5,"status_503s":9,"status_504s":6,"status_508s":7,"wildcards_filtered":707,"responses_filtered":707,"resources_discovered":27,"directory_scan_times":[2.211973078,1.989015505,1.898675839,3.9714468910000003,4.938152838,5.256073528,6.021986595,6.065740734,6.42633762,7.095142125,7.336982137,5.319785619,4.843649778],"total_runtime":[11.556575456000001],"url_format_errors":17,"redirection_errors":12,"connection_errors":21,"request_errors":4}}"#; let config = Configuration::new().unwrap(); let stats = Stats::new(config.json); @@ -857,6 +879,7 @@ mod tests { assert_eq!(atomic_load!(stats.total_scans), 0); // not updated in merge_from assert_eq!(atomic_load!(stats.initial_targets), 0); // not updated in merge_from assert_eq!(atomic_load!(stats.links_extracted), 51); + assert_eq!(atomic_load!(stats.extensions_collected), 4); assert_eq!(atomic_load!(stats.status_200s), 720); assert_eq!(atomic_load!(stats.status_301s), 12); assert_eq!(atomic_load!(stats.status_302s), 1); diff --git a/src/statistics/field.rs b/src/statistics/field.rs index dab6954..80ff3b3 100644 --- a/src/statistics/field.rs +++ b/src/statistics/field.rs @@ -13,6 +13,9 @@ pub enum StatField { /// Translates to `links_extracted` LinksExtracted, + /// Translates to `extensions_collected` + ExtensionsCollected, + /// Translates to `total_expected` TotalExpected, diff --git a/src/url.rs b/src/url.rs index 9e522ab..7621ec5 100644 --- a/src/url.rs +++ b/src/url.rs @@ -1,6 +1,7 @@ use crate::{event_handlers::Handles, statistics::StatError::UrlFormat, Command::AddError}; use anyhow::{anyhow, bail, Result}; use reqwest::Url; +use std::collections::HashSet; use std::{convert::TryInto, fmt, sync::Arc}; /// abstraction around target urls; collects all Url related shenanigans in one place @@ -37,7 +38,11 @@ impl FeroxUrl { /// /// If any extensions were passed to the program, each extension will add a /// (base_url + word + ext) Url to the vector - pub fn formatted_urls(&self, word: &str) -> Result> { + pub fn formatted_urls( + &self, + word: &str, + collected_extensions: HashSet, + ) -> Result> { log::trace!("enter: formatted_urls({})", word); let mut urls = vec![]; @@ -54,7 +59,13 @@ impl FeroxUrl { Err(_) => self.handles.stats.send(AddError(UrlFormat))?, } - for ext in self.handles.config.extensions.iter() { + for ext in self + .handles + .config + .extensions + .iter() + .chain(collected_extensions.iter()) + { match self.format(word, Some(ext)) { // any extensions passed in Ok(url) => urls.push(url), @@ -254,7 +265,7 @@ mod tests { fn formatted_urls_no_extension_returns_base_url_with_word() { let handles = Arc::new(Handles::for_testing(None, None).0); let url = FeroxUrl::from_string("http://localhost", handles); - let urls = url.formatted_urls("turbo").unwrap(); + let urls = url.formatted_urls("turbo", HashSet::new()).unwrap(); assert_eq!(urls, [Url::parse("http://localhost/turbo").unwrap()]) } @@ -268,7 +279,7 @@ mod tests { let handles = Arc::new(Handles::for_testing(None, Some(Arc::new(config))).0); let url = FeroxUrl::from_string("http://localhost", handles); - let urls = url.formatted_urls("turbo").unwrap(); + let urls = url.formatted_urls("turbo", HashSet::new()).unwrap(); assert_eq!( urls, @@ -315,7 +326,7 @@ mod tests { let handles = Arc::new(Handles::for_testing(None, Some(Arc::new(config))).0); let url = FeroxUrl::from_string("http://localhost", handles); - let urls = url.formatted_urls("turbo").unwrap(); + let urls = url.formatted_urls("turbo", HashSet::new()).unwrap(); assert_eq!(urls, expected[i]); } } @@ -502,7 +513,7 @@ mod tests { }; let handles = Arc::new(Handles::for_testing(None, Some(Arc::new(config))).0); let url = FeroxUrl::from_string("http://localhost", handles); - match url.formatted_urls("ferox") { + match url.formatted_urls("ferox", HashSet::new()) { Ok(urls) => { // 3 = One for the main word + slash and for the two extensions assert_eq!(urls.len(), 3); diff --git a/src/utils.rs b/src/utils.rs index aa97907..5e05997 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -131,7 +131,7 @@ pub async fn make_request( client: &Client, url: &Url, method: &str, - data: Option<&[u8]>, + mut data: Option<&[u8]>, output_level: OutputLevel, config: &Configuration, tx_stats: UnboundedSender, @@ -142,8 +142,30 @@ pub async fn make_request( output_level, tx_stats ); + let tmp_workaround: Option<&[u8]> = Some(&[0xd_u8, 0xa]); // \r\n let mut request = client.request(Method::from_bytes(method.as_bytes())?, url.to_owned()); + + if (!config.proxy.is_empty() || config.replay_proxy.is_empty()) + && data.is_none() + && ["post", "put", "patch"].contains(&method.to_ascii_lowercase().as_str()) + { + // either --proxy or --replay-proxy was specified + // AND + // --data wasn't used + // AND + // the method is either post/put/patch (case insensitive) + // + // this combination of factors results in requests that are delayed for 10 seconds before + // being issued. The tracking issues are + // https://github.com/epi052/feroxbuster/issues/501 + // https://github.com/seanmonstar/reqwest/issues/1474 + // + // as a (hopefully temporary) workaround, we'll add \r\n to the body so that there's no + // delay + data = tmp_workaround; + } + if let Some(body_data) = data { request = request.body(body_data.to_vec()); } @@ -482,7 +504,7 @@ pub fn slugify_filename(url: &str, prefix: &str, suffix: &str) -> String { String::new() }; - let slug = url.replace("://", "_").replace("/", "_").replace(".", "_"); + let slug = url.replace("://", "_").replace('/', "_").replace('.', "_"); let filename = format!("{}{}-{}.{}", altered_prefix, slug, ts, suffix); diff --git a/tests/test_banner.rs b/tests/test_banner.rs index 980f346..b6e0df8 100644 --- a/tests/test_banner.rs +++ b/tests/test_banner.rs @@ -16,10 +16,10 @@ fn banner_prints_proxy() -> Result<(), Box> { Command::cargo_bin("feroxbuster") .unwrap() .arg("--stdin") - .arg("--wordlist") - .arg(file.as_os_str()) .arg("--proxy") .arg("http://127.0.0.1:8080") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .pipe_stdin(file) .unwrap() .assert() @@ -57,7 +57,7 @@ fn banner_prints_replay_proxy() -> Result<(), Box> { .unwrap() .arg("--stdin") .arg("--wordlist") - .arg(file.as_os_str()) + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .arg("--replay-proxy") .arg("http://127.0.0.1:8081") .pipe_stdin(file) @@ -95,6 +95,8 @@ fn banner_prints_headers() { .arg("stuff:things") .arg("-H") .arg("mostuff:mothings") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -126,6 +128,8 @@ fn banner_prints_denied_urls() { .arg("https://also-not.me") .arg("https:") .arg("/deny.*") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -156,6 +160,8 @@ fn banner_prints_random_agent() { .arg("--url") .arg("http://localhost") .arg("--random-agent") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -192,6 +198,8 @@ fn banner_prints_filter_sizes() { .arg("93") .arg("--filter-words") .arg("94") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -228,6 +236,8 @@ fn banner_prints_queries() { .arg("token=supersecret") .arg("--query") .arg("stuff=things") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -256,6 +266,8 @@ fn banner_prints_status_codes() { .arg("http://localhost") .arg("-s") .arg("201,301,401") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -284,6 +296,8 @@ fn banner_prints_replay_codes() { .arg("200,302") .arg("--replay-proxy") .arg("http://localhost:8081") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -312,6 +326,8 @@ fn banner_prints_output_file() { .arg("http://localhost") .arg("--output") .arg("/super/cool/path") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -326,7 +342,7 @@ fn banner_prints_output_file() { .and(predicate::str::contains("Output File")) .and(predicate::str::contains("/super/cool/path")) .and(predicate::str::contains( - "ERROR: Couldn't start /super/cool/path file handler", + "Could not open /definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676", )) .and(predicate::str::contains("─┴─")), ); @@ -341,6 +357,8 @@ fn banner_prints_insecure() { .arg("--url") .arg("http://localhost") .arg("-k") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -367,6 +385,8 @@ fn banner_prints_redirects() { .arg("--url") .arg("http://localhost") .arg("-r") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -396,6 +416,8 @@ fn banner_prints_extensions() { .arg("js") .arg("--extensions") .arg("pdf") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -422,6 +444,8 @@ fn banner_prints_dont_filter() { .arg("--url") .arg("http://localhost") .arg("--dont-filter") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -448,6 +472,8 @@ fn banner_prints_verbosity_one() { .arg("--url") .arg("http://localhost") .arg("-v") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -474,6 +500,8 @@ fn banner_prints_verbosity_two() { .arg("--url") .arg("http://localhost") .arg("-vv") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -500,6 +528,8 @@ fn banner_prints_verbosity_three() { .arg("--url") .arg("http://localhost") .arg("-vvv") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -526,6 +556,8 @@ fn banner_prints_verbosity_four() { .arg("--url") .arg("http://localhost") .arg("-vvvv") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -552,6 +584,8 @@ fn banner_prints_add_slash() { .arg("--url") .arg("http://localhost") .arg("-f") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -579,6 +613,8 @@ fn banner_prints_infinite_depth() { .arg("http://localhost") .arg("--depth") .arg("0") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -606,6 +642,8 @@ fn banner_prints_recursion_depth() { .arg("http://localhost") .arg("--depth") .arg("343214") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -632,6 +670,8 @@ fn banner_prints_no_recursion() { .arg("--url") .arg("http://localhost") .arg("-n") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -658,10 +698,12 @@ fn banner_doesnt_print() { .arg("--url") .arg("http://localhost") .arg("-q") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr(predicate::str::contains( - "Could not connect to any target provided", + "Could not open /definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676", )); } @@ -674,6 +716,8 @@ fn banner_prints_extract_links() { .arg("--url") .arg("http://localhost") .arg("-e") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -701,6 +745,8 @@ fn banner_prints_scan_limit() { .arg("http://localhost") .arg("-L") .arg("4") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -728,6 +774,8 @@ fn banner_prints_filter_status() { .arg("http://localhost") .arg("-C") .arg("200") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -756,6 +804,8 @@ fn banner_prints_json() { .arg("--json") .arg("--output") .arg("/dev/null") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -783,6 +833,8 @@ fn banner_prints_debug_log() { .arg("http://localhost") .arg("--debug-log") .arg("/dev/null") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -810,6 +862,8 @@ fn banner_prints_filter_regex() { .arg("http://localhost") .arg("--filter-regex") .arg("^ignore me$") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -837,6 +891,8 @@ fn banner_prints_time_limit() { .arg("http://localhost") .arg("--time-limit") .arg("10m") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -864,6 +920,8 @@ fn banner_prints_similarity_filter() { .arg("http://localhost") .arg("--filter-similar-to") .arg("https://somesite.com") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -891,6 +949,8 @@ fn banner_prints_rate_limit() { .arg("http://localhost") .arg("--rate-limit") .arg("6735") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -917,6 +977,8 @@ fn banner_prints_auto_tune() { .arg("--url") .arg("http://localhost") .arg("--auto-tune") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -943,6 +1005,8 @@ fn banner_prints_auto_bail() { .arg("--url") .arg("http://localhost") .arg("--auto-bail") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -969,6 +1033,8 @@ fn banner_doesnt_print_when_silent() { .arg("--url") .arg("http://localhost") .arg("--silent") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -993,6 +1059,8 @@ fn banner_doesnt_print_when_quiet() { .arg("--url") .arg("http://localhost") .arg("--quiet") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -1017,18 +1085,19 @@ fn banner_prints_parallel() { .arg("--stdin") .arg("--parallel") .arg("4316") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( predicate::str::contains("─┬─") - .not() - .and(predicate::str::contains("Target Url").not()) - .and(predicate::str::contains("Parallel Scans").not()) - .and(predicate::str::contains("Threads").not()) - .and(predicate::str::contains("Wordlist").not()) - .and(predicate::str::contains("Status Codes").not()) - .and(predicate::str::contains("Timeout (secs)").not()) - .and(predicate::str::contains("User-Agent").not()), + .and(predicate::str::contains("Parallel Scans")) + .and(predicate::str::contains("4316")) + .and(predicate::str::contains("Threads")) + .and(predicate::str::contains("Wordlist")) + .and(predicate::str::contains("Status Codes")) + .and(predicate::str::contains("Timeout (secs)")) + .and(predicate::str::contains("User-Agent")), ); } @@ -1044,6 +1113,8 @@ fn banner_prints_methods() { .arg("PUT") .arg("--methods") .arg("OPTIONS") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -1075,6 +1146,8 @@ fn banner_prints_data() { .arg("POST") .arg("--data") .arg("some_data") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") .assert() .success() .stderr( @@ -1091,3 +1164,233 @@ fn banner_prints_data() { .and(predicate::str::contains("─┴─")), ); } + +#[test] +/// test allows non-existent wordlist to trigger the banner printing to stderr +/// expect to see all mandatory prints + ignored extensions +fn banner_prints_collect_extensions_and_dont_collect_default() { + Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg("http://localhost") + .arg("--collect-extensions") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") + .assert() + .success() + .stderr( + predicate::str::contains("─┬─") + .and(predicate::str::contains("Target Url")) + .and(predicate::str::contains("http://localhost")) + .and(predicate::str::contains("Threads")) + .and(predicate::str::contains("Wordlist")) + .and(predicate::str::contains("Status Codes")) + .and(predicate::str::contains("Timeout (secs)")) + .and(predicate::str::contains("User-Agent")) + .and(predicate::str::contains("Collect Extensions")) + .and(predicate::str::contains("Ignored Extensions")) + .and(predicate::str::contains("Images, Movies, Audio, etc...")) + .and(predicate::str::contains("─┴─")), + ); +} + +#[test] +/// test allows non-existent wordlist to trigger the banner printing to stderr +/// expect to see all mandatory prints + collect extensions +fn banner_prints_collect_extensions_and_dont_collect_with_input() { + Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg("http://localhost") + .arg("--collect-extensions") + .arg("--dont-collect") + .arg("pdf") + .arg("xps") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") + .assert() + .success() + .stderr( + predicate::str::contains("─┬─") + .and(predicate::str::contains("Target Url")) + .and(predicate::str::contains("http://localhost")) + .and(predicate::str::contains("Threads")) + .and(predicate::str::contains("Wordlist")) + .and(predicate::str::contains("Status Codes")) + .and(predicate::str::contains("Timeout (secs)")) + .and(predicate::str::contains("User-Agent")) + .and(predicate::str::contains("Collect Extensions")) + .and(predicate::str::contains("Ignored Extensions")) + .and(predicate::str::contains("[pdf, xps]")) + .and(predicate::str::contains("─┴─")), + ); +} + +#[test] +/// test allows non-existent wordlist to trigger the banner printing to stderr +/// expect to see all mandatory prints + collect backups +fn banner_prints_collect_backups() { + Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg("http://localhost") + .arg("--collect-backups") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") + .assert() + .success() + .stderr( + predicate::str::contains("─┬─") + .and(predicate::str::contains("Target Url")) + .and(predicate::str::contains("http://localhost")) + .and(predicate::str::contains("Threads")) + .and(predicate::str::contains("Wordlist")) + .and(predicate::str::contains("Status Codes")) + .and(predicate::str::contains("Timeout (secs)")) + .and(predicate::str::contains("User-Agent")) + .and(predicate::str::contains("Collect Backups")) + .and(predicate::str::contains("─┴─")), + ); +} + +#[test] +/// test allows non-existent wordlist to trigger the banner printing to stderr +/// expect to see all mandatory prints + collect words +fn banner_prints_collect_words() { + Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg("http://localhost") + .arg("--collect-words") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") + .assert() + .success() + .stderr( + predicate::str::contains("─┬─") + .and(predicate::str::contains("Target Url")) + .and(predicate::str::contains("http://localhost")) + .and(predicate::str::contains("Threads")) + .and(predicate::str::contains("Wordlist")) + .and(predicate::str::contains("Status Codes")) + .and(predicate::str::contains("Timeout (secs)")) + .and(predicate::str::contains("User-Agent")) + .and(predicate::str::contains("Collect Words")) + .and(predicate::str::contains("─┴─")), + ); +} + +#[test] +/// test allows non-existent wordlist to trigger the banner printing to stderr +/// expect to see all mandatory prints + collect words +fn banner_prints_all_composite_settings_smart() { + Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg("http://localhost") + .arg("--smart") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") + .assert() + .success() + .stderr( + predicate::str::contains("─┬─") + .and(predicate::str::contains("Target Url")) + .and(predicate::str::contains("http://localhost")) + .and(predicate::str::contains("Threads")) + .and(predicate::str::contains("Wordlist")) + .and(predicate::str::contains("Status Codes")) + .and(predicate::str::contains("Timeout (secs)")) + .and(predicate::str::contains("User-Agent")) + .and(predicate::str::contains("Collect Words")) + .and(predicate::str::contains("Collect Backups")) + .and(predicate::str::contains("Extract Links")) + .and(predicate::str::contains("Auto Tune")) + .and(predicate::str::contains("─┴─")), + ); +} + +#[test] +/// test allows non-existent wordlist to trigger the banner printing to stderr +/// expect to see all mandatory prints + collect words +fn banner_prints_all_composite_settings_thorough() { + Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg("http://localhost") + .arg("--thorough") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") + .assert() + .success() + .stderr( + predicate::str::contains("─┬─") + .and(predicate::str::contains("Target Url")) + .and(predicate::str::contains("http://localhost")) + .and(predicate::str::contains("Threads")) + .and(predicate::str::contains("Wordlist")) + .and(predicate::str::contains("Status Codes")) + .and(predicate::str::contains("Timeout (secs)")) + .and(predicate::str::contains("User-Agent")) + .and(predicate::str::contains("Collect Words")) + .and(predicate::str::contains("Collect Extensions")) + .and(predicate::str::contains("Collect Backups")) + .and(predicate::str::contains("Extract Links")) + .and(predicate::str::contains("Auto Tune")) + .and(predicate::str::contains("─┴─")), + ); +} +#[test] +/// test allows non-existent wordlist to trigger the banner printing to stderr +/// expect to see all mandatory prints + collect words +fn banner_prints_all_composite_settings_burp() { + Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg("http://localhost") + .arg("--burp") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") + .assert() + .success() + .stderr( + predicate::str::contains("─┬─") + .and(predicate::str::contains("Target Url")) + .and(predicate::str::contains("http://localhost")) + .and(predicate::str::contains("Threads")) + .and(predicate::str::contains("Wordlist")) + .and(predicate::str::contains("Status Codes")) + .and(predicate::str::contains("Timeout (secs)")) + .and(predicate::str::contains("User-Agent")) + .and(predicate::str::contains("Proxy")) + .and(predicate::str::contains("Insecure")) + .and(predicate::str::contains("─┴─")), + ); +} +#[test] +/// test allows non-existent wordlist to trigger the banner printing to stderr +/// expect to see all mandatory prints + collect words +fn banner_prints_all_composite_settings_burp_replay() { + Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg("http://localhost") + .arg("--burp-replay") + .arg("--wordlist") + .arg("/definitely/doesnt/exist/0cd7fed0-47f4-4b18-a1b0-ac39708c1676") + .assert() + .success() + .stderr( + predicate::str::contains("─┬─") + .and(predicate::str::contains("Target Url")) + .and(predicate::str::contains("http://localhost")) + .and(predicate::str::contains("Threads")) + .and(predicate::str::contains("Wordlist")) + .and(predicate::str::contains("Status Codes")) + .and(predicate::str::contains("Timeout (secs)")) + .and(predicate::str::contains("User-Agent")) + .and(predicate::str::contains("Replay Proxy")) + .and(predicate::str::contains("Insecure")) + .and(predicate::str::contains("─┴─")), + ); +} diff --git a/tests/test_deny_list.rs b/tests/test_deny_list.rs index 7c23116..b634ea6 100644 --- a/tests/test_deny_list.rs +++ b/tests/test_deny_list.rs @@ -131,8 +131,8 @@ fn deny_list_works_during_recursion() { .not(), ); - assert_eq!(js_mock.hits(), 1); - assert_eq!(js_prod_mock.hits(), 1); + assert_eq!(js_mock.hits(), 2); + assert_eq!(js_prod_mock.hits(), 2); assert_eq!(js_dev_mock.hits(), 0); assert_eq!(js_dev_file_mock.hits(), 0); @@ -202,9 +202,9 @@ fn deny_list_works_during_recursion_with_inverted_parents() { .not(), ); - assert_eq!(js_mock.hits(), 1); - assert_eq!(js_prod_mock.hits(), 1); - assert_eq!(js_dev_mock.hits(), 1); + assert_eq!(js_mock.hits(), 2); + assert_eq!(js_prod_mock.hits(), 2); + assert_eq!(js_dev_mock.hits(), 2); assert_eq!(js_dev_file_mock.hits(), 1); assert_eq!(api_mock.hits(), 0); diff --git a/tests/test_extractor.rs b/tests/test_extractor.rs index bea4685..a8029a1 100644 --- a/tests/test_extractor.rs +++ b/tests/test_extractor.rs @@ -284,11 +284,11 @@ fn extractor_finds_robots_txt_links_and_displays_files_or_scans_directories() { .and(predicate::str::contains("22c")) .and(predicate::str::contains("/misc/LICENSE")) .and(predicate::str::contains("29c")) - .and(predicate::str::contains("200").count(3)), + .and(predicate::str::contains("200").count(4)), ); assert_eq!(mock.hits(), 1); - assert_eq!(mock_dir.hits(), 2); + assert_eq!(mock_dir.hits(), 3); assert_eq!(mock_two.hits(), 1); assert_eq!(mock_file.hits(), 1); assert_eq!(mock_disallowed.hits(), 1); @@ -636,7 +636,7 @@ fn extractor_recurses_into_403_directories() -> Result<(), Box Result<(), Box> { .arg("-vvvv") .assert() .success() - .stderr(predicate::str::contains( - "Failed while scanning: Did not find any words in", - )); + .stderr(predicate::str::contains("Did not find any words in")); - assert_eq!(mock.hits(), 1); + assert_eq!(mock.hits(), 0); teardown_tmp_directory(tmp_dir); Ok(()) diff --git a/tests/test_scanner.rs b/tests/test_scanner.rs index b6fab74..feb7141 100644 --- a/tests/test_scanner.rs +++ b/tests/test_scanner.rs @@ -3,6 +3,8 @@ use assert_cmd::prelude::*; use httpmock::Method::GET; use httpmock::MockServer; use predicates::prelude::*; +use std::thread::sleep; +use std::time::Duration; use std::{process::Command, time}; use utils::{setup_tmp_directory, teardown_tmp_directory}; @@ -89,9 +91,9 @@ fn scanner_recursive_request_scan() -> Result<(), Box> { .and(predicate::str::is_match("200.*js/dev/file.js").unwrap()), ); - assert_eq!(js_mock.hits(), 1); - assert_eq!(js_prod_mock.hits(), 1); - assert_eq!(js_dev_mock.hits(), 1); + assert_eq!(js_mock.hits(), 2); + assert_eq!(js_prod_mock.hits(), 2); + assert_eq!(js_dev_mock.hits(), 2); assert_eq!(js_dev_file_mock.hits(), 1); teardown_tmp_directory(tmp_dir); @@ -153,9 +155,9 @@ fn scanner_recursive_request_scan_using_only_success_responses( .and(predicate::str::is_match("200.*js/dev/file.js").unwrap()), ); - assert_eq!(js_mock.hits(), 1); - assert_eq!(js_prod_mock.hits(), 1); - assert_eq!(js_dev_mock.hits(), 1); + assert_eq!(js_mock.hits(), 3); + assert_eq!(js_prod_mock.hits(), 3); + assert_eq!(js_dev_mock.hits(), 3); assert_eq!(js_dev_file_mock.hits(), 1); teardown_tmp_directory(tmp_dir); @@ -596,7 +598,7 @@ fn scanner_recursion_works_with_403_directories() { assert_eq!(mock.hits(), 1); assert_eq!(found_anyway.hits(), 1); - assert_eq!(forbidden_dir.hits(), 1); + assert_eq!(forbidden_dir.hits(), 3); teardown_tmp_directory(tmp_dir); } @@ -638,3 +640,216 @@ fn rate_limit_enforced_when_specified() { teardown_tmp_directory(tmp_dir); } + +#[test] +/// ensure that auto-discovered extensions are tracked in statistics and bar lengths are updated +fn add_discovered_extension_updates_bars_and_stats() { + let srv = MockServer::start(); + let (tmp_dir, file) = setup_tmp_directory( + &["LICENSE".to_string(), "stuff.php".to_string()], + "wordlist", + ) + .unwrap(); + + srv.mock(|when, then| { + when.method(GET).path("/stuff.php"); + then.status(200).body("cool... coolcoolcool"); + }); + + let file_path = tmp_dir.path().join("debug-file.txt"); + + assert!(!file_path.exists()); + + Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg(srv.url("/")) + .arg("--wordlist") + .arg(file.as_os_str()) + .arg("--extract-links") + .arg("--collect-extensions") + .arg("-vvvv") + .arg("--debug-log") + .arg(file_path.as_os_str()) + .unwrap() + .assert() + .success(); + + let contents = std::fs::read_to_string(file_path).unwrap(); + println!("{}", contents); + assert!(contents.contains("discovered new extension: php")); + assert!(contents.contains("extensions_collected: 1")); + assert!(contents.contains("expected_per_scan: 6")); +} + +#[test] +/// send a request to a 200 file, expect pre-configured backup collection rules to be applied +/// and then requested +fn collect_backups_makes_appropriate_requests() { + let srv = MockServer::start(); + let (tmp_dir, file) = setup_tmp_directory(&["LICENSE.txt".to_string()], "wordlist").unwrap(); + + let valid_paths = vec![ + "/LICENSE.txt", + "/LICENSE.txt~", + "/LICENSE.txt.bak", + "/LICENSE.txt.bak2", + "/LICENSE.txt.old", + "/LICENSE.txt.1", + "/LICENSE.bak", + "/.LICENSE.txt.swp", + ]; + + let valid_mocks: Vec<_> = valid_paths + .iter() + .map(|&p| { + srv.mock(|when, then| { + when.method(GET).path(p); + then.status(200).body("this is a valid test"); + }) + }) + .collect(); + + let invalid_paths: Vec<_> = vec![ + "/LICENSE.txt~~", + "/LICENSE.txt.bak.bak", + "/LICENSE.txt.bak2.bak2", + "/LICENSE.txt.old.old", + "/LICENSE.txt.1.1", + "/..LICENSE.txt.swp.swp", + ]; + + let invalid_mocks: Vec<_> = invalid_paths + .iter() + .map(|&p| { + srv.mock(|when, then| { + when.method(GET).path(p); + then.status(200).body("this is an invalid test"); + }) + }) + .collect(); + + let cmd = Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg(srv.url("/")) + .arg("--collect-backups") + .arg("--wordlist") + .arg(file.as_os_str()) + .unwrap(); + + cmd.assert().success().stdout( + predicate::str::contains("/LICENSE.txt") + .and(predicate::str::contains("/LICENSE.txt~")) + .and(predicate::str::contains("/LICENSE.txt.bak")) + .and(predicate::str::contains("/LICENSE.txt.bak2")) + .and(predicate::str::contains("/LICENSE.txt.old")) + .and(predicate::str::contains("/LICENSE.txt.1")) + .and(predicate::str::contains("/LICENSE.bak")) + .and(predicate::str::contains("/.LICENSE.txt.swp")), + ); + + for valid_mock in valid_mocks { + assert_eq!(valid_mock.hits(), 1); + } + + for invalid_mock in invalid_mocks { + assert_eq!(invalid_mock.hits(), 0); + } + + teardown_tmp_directory(tmp_dir); +} + +#[test] +/// send a request to 4 200 files, expect non-zero tf-idf rated words to be requested as well +fn collect_words_makes_appropriate_requests() { + let srv = MockServer::start(); + + let wordlist: Vec<_> = [ + "doc1", "doc2", "doc3", "doc4", "blah", "blah2", "blah3", "blah4", + ] + .iter() + .map(|w| w.to_string()) + .collect(); + + let (tmp_dir, file) = setup_tmp_directory(&wordlist, "wordlist").unwrap(); + + srv.mock(|when, then| { + when.method(GET).path("/doc1"); + then.status(200) + .body("Air quality in the sunny island improved gradually throughout Wednesday."); + }); + srv.mock(|when, then| { + when.method(GET).path("/doc2"); + then.status(200).body( + "Air quality in Singapore on Wednesday continued to get worse as haze hit the island.", + ); + }); + srv.mock(|when, then| { + when.method(GET).path("/doc3"); + then.status(200).body("The air quality in Singapore is monitored through a network of air monitoring stations located in different parts of the island"); + }); + srv.mock(|when, then| { + when.method(GET).path("/doc4"); + then.status(200) + .body("The air quality in Singapore got worse on Wednesday."); + }); + + let valid_paths = vec![ + "/gradually", + "/network", + "/hit", + "/located", + "/continued", + "/island", + "/worse", + "/monitored", + "/monitoring", + "/haze", + "/different", + "/stations", + "/sunny", + "/singapore", + "/improved", + "/parts", + "/wednesday", + ]; + + let valid_mocks: Vec<_> = valid_paths + .iter() + .map(|&p| { + srv.mock(|when, then| { + when.method(GET).path(p); + then.status(200); + }) + }) + .collect(); + + let cmd = Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg(srv.url("/")) + .arg("-vv") + .arg("--collect-words") + .arg("-t") + .arg("1") + .arg("--wordlist") + .arg(file.as_os_str()) + .unwrap(); + + print!("{}", std::str::from_utf8(&cmd.stdout).unwrap()); + + cmd.assert().success().stdout( + predicate::str::contains("/doc1") + .and(predicate::str::contains("/doc2")) + .and(predicate::str::contains("/doc3")) + .and(predicate::str::contains("/doc4")), + ); + sleep(Duration::new(2, 0)); + for valid_mock in valid_mocks { + println!("mock: {}", valid_paths[valid_mock.id - 4]); + assert_eq!(valid_mock.hits(), 1); + } + + teardown_tmp_directory(tmp_dir); +}