mirror of
https://github.com/epi052/feroxbuster.git
synced 2026-06-06 00:41:13 -03:00
Second Cut - All Directory Listing Items Obtained
This commit is contained in:
@@ -23,7 +23,7 @@ pub enum ExtractionTarget {
|
||||
RobotsTxt,
|
||||
|
||||
// Parse HTML and extract links
|
||||
ParseHTML,
|
||||
ParseHtml,
|
||||
}
|
||||
|
||||
/// responsible for building an `Extractor`
|
||||
|
||||
@@ -63,7 +63,7 @@ impl<'a> Extractor<'a> {
|
||||
match self.target {
|
||||
ExtractionTarget::ResponseBody => Ok(self.extract_from_body().await?),
|
||||
ExtractionTarget::RobotsTxt => Ok(self.extract_from_robots().await?),
|
||||
ExtractionTarget::ParseHTML => Ok(self.parse_html().await?),
|
||||
ExtractionTarget::ParseHtml => Ok(self.parse_html().await?),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -153,6 +153,29 @@ impl<'a> Extractor<'a> {
|
||||
|
||||
let body = self.response.unwrap().text();
|
||||
|
||||
// Check for directory listing
|
||||
if body.contains("Directory listing") {
|
||||
log::debug!(" >> directory listing detected");
|
||||
}
|
||||
// Parse links [Note: Update both functions]
|
||||
let document = Document::from(body);
|
||||
let html_links = (document.find(Name("a")).filter_map(|n| n.attr("href")))
|
||||
.chain(document.find(Name("img")).filter_map(|n| n.attr("src")))
|
||||
.chain(document.find(Name("form")).filter_map(|n| n.attr("action")))
|
||||
.chain(document.find(Name("script")).filter_map(|n| n.attr("src")))
|
||||
.chain(document.find(Name("iframe")).filter_map(|n| n.attr("src")))
|
||||
.chain(document.find(Name("div")).filter_map(|n| n.attr("src")))
|
||||
.chain(document.find(Name("frame")).filter_map(|n| n.attr("src")))
|
||||
.chain(document.find(Name("embed")).filter_map(|n| n.attr("src")));
|
||||
for link in html_links {
|
||||
log::info!(" >> found link \"{}\"", link);
|
||||
let mut new_url = Url::parse(&self.url)?;
|
||||
new_url.set_path(link);
|
||||
if self.add_all_sub_paths(new_url.path(), &mut links).is_err() {
|
||||
log::warn!("could not add sub-paths from {} to {:?}", new_url, links);
|
||||
}
|
||||
}
|
||||
|
||||
for capture in self.links_regex.captures_iter(body) {
|
||||
// remove single & double quotes from both ends of the capture
|
||||
// capture[0] is the entire match, additional capture groups start at [1]
|
||||
@@ -276,7 +299,7 @@ impl<'a> Extractor<'a> {
|
||||
bail!("Could not parse {}: {}", self.url, e);
|
||||
}
|
||||
},
|
||||
ExtractionTarget::ParseHTML => match Url::parse(&self.url) {
|
||||
ExtractionTarget::ParseHtml => match Url::parse(&self.url) {
|
||||
Ok(u) => u,
|
||||
Err(e) => {
|
||||
bail!("Could not parse {}: {}", self.url, e);
|
||||
@@ -365,9 +388,11 @@ impl<'a> Extractor<'a> {
|
||||
|
||||
let mut links: HashSet<String> = HashSet::new();
|
||||
|
||||
// request
|
||||
let response = self.make_extract_request("/robots.txt").await?;
|
||||
let body = response.text();
|
||||
|
||||
for capture in self.robots_regex.captures_iter(response.text()) {
|
||||
for capture in self.robots_regex.captures_iter(body) {
|
||||
if let Some(new_path) = capture.name("url_path") {
|
||||
let mut new_url = Url::parse(&self.url)?;
|
||||
new_url.set_path(new_path.as_str());
|
||||
@@ -391,16 +416,17 @@ impl<'a> Extractor<'a> {
|
||||
|
||||
let mut links: HashSet<String> = HashSet::new();
|
||||
|
||||
let response = self.make_extract_request("/").await?;
|
||||
// Request
|
||||
let url = Url::parse(&self.url)?;
|
||||
let response = self.make_extract_request(url.path()).await?;
|
||||
let body = response.text();
|
||||
|
||||
// Check for directory listing
|
||||
if body.contains("Directory listing") {
|
||||
log::debug!(" >> directory listing detected");
|
||||
}
|
||||
// Parse links [Note: Update both functions]
|
||||
let document = Document::from(body);
|
||||
|
||||
// Parse links
|
||||
let html_links = (document.find(Name("a")).filter_map(|n| n.attr("href")))
|
||||
.chain(document.find(Name("img")).filter_map(|n| n.attr("src")))
|
||||
.chain(document.find(Name("form")).filter_map(|n| n.attr("action")))
|
||||
|
||||
@@ -280,7 +280,7 @@ async fn request_robots_txt_without_proxy() -> Result<()> {
|
||||
handles,
|
||||
};
|
||||
|
||||
let resp = extractor.request_robots_txt().await?;
|
||||
let resp = extractor.make_extract_request("/robots.txt").await?;
|
||||
|
||||
assert!(matches!(resp.status(), &StatusCode::OK));
|
||||
println!("{}", resp);
|
||||
@@ -313,7 +313,7 @@ async fn request_robots_txt_with_proxy() -> Result<()> {
|
||||
.handles(handles)
|
||||
.build()?;
|
||||
|
||||
let resp = extractor.request_robots_txt().await?;
|
||||
let resp = extractor.make_extract_request("/robots.txt").await?;
|
||||
|
||||
assert!(matches!(resp.status(), &StatusCode::OK));
|
||||
assert_eq!(resp.content_length(), 19);
|
||||
|
||||
@@ -75,25 +75,25 @@ impl FeroxScanner {
|
||||
|
||||
let scan_timer = Instant::now();
|
||||
|
||||
if matches!(self.order, ScanOrder::Initial) { // all fresh dirs will be passed to try_recursion
|
||||
if self.handles.config.extract_links {
|
||||
// parse html for links (i.e. web scraping)
|
||||
let extractor = ExtractorBuilder::default()
|
||||
.target(ExtractionTarget::ParseHtml)
|
||||
.url(&self.target_url)
|
||||
.handles(self.handles.clone())
|
||||
.target(ExtractionTarget::ParseHTML)
|
||||
.build()?;
|
||||
let links = extractor.extract().await?;
|
||||
extractor.request_links(links).await?;
|
||||
|
||||
if self.handles.config.extract_links {
|
||||
// test robots.txt
|
||||
if matches!(self.order, ScanOrder::Initial) {
|
||||
// check for robots.txt (cannot be in subdirs)
|
||||
let extractor = ExtractorBuilder::default()
|
||||
.url(&self.target_url)
|
||||
.handles(self.handles.clone())
|
||||
.target(ExtractionTarget::RobotsTxt)
|
||||
.build()?;
|
||||
let links = extractor.extract().await?;
|
||||
extractor.request_links(links).await?;
|
||||
.target(ExtractionTarget::RobotsTxt)
|
||||
.url(&self.target_url)
|
||||
.handles(self.handles.clone())
|
||||
.build()?;
|
||||
let links = extractor.extract().await?;
|
||||
extractor.request_links(links).await?;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ use crate::{
|
||||
Command::{self, AddError, SubtractFromUsizeField},
|
||||
Handles,
|
||||
},
|
||||
extractor::{ExtractionTarget::ResponseBody, ExtractorBuilder},
|
||||
extractor::{ExtractionTarget, ExtractorBuilder},
|
||||
response::FeroxResponse,
|
||||
scan_manager::{FeroxScan, ScanStatus},
|
||||
statistics::{StatError::Other, StatField::TotalExpected},
|
||||
@@ -394,7 +394,7 @@ impl Requester {
|
||||
|
||||
if self.handles.config.extract_links && !ferox_response.status().is_redirection() {
|
||||
let extractor = ExtractorBuilder::default()
|
||||
.target(ResponseBody)
|
||||
.target(ExtractionTarget::ResponseBody)
|
||||
.response(&ferox_response)
|
||||
.handles(self.handles.clone())
|
||||
.build()?;
|
||||
|
||||
Reference in New Issue
Block a user