diff --git a/src/client.rs b/src/client.rs index 1e61306..ee0ecff 100644 --- a/src/client.rs +++ b/src/client.rs @@ -27,7 +27,8 @@ pub fn initialize( .user_agent(user_agent) .danger_accept_invalid_certs(insecure) .default_headers(header_map) - .redirect(policy); + .redirect(policy) + .http1_title_case_headers(); if let Some(some_proxy) = proxy { if !some_proxy.is_empty() { diff --git a/src/config/container.rs b/src/config/container.rs index 4c25057..abddfbd 100644 --- a/src/config/container.rs +++ b/src/config/container.rs @@ -246,8 +246,6 @@ pub struct Configuration { pub resume_from: String, /// Whether or not a scan's current state should be saved when user presses Ctrl+C - /// - /// Not configurable from CLI; can only be set from a config file #[serde(default = "save_state")] pub save_state: bool, @@ -696,6 +694,10 @@ impl Configuration { config.requester_policy = RequesterPolicy::AutoBail; } + if args.is_present("no_state") { + config.save_state = false; + } + if args.is_present("dont_filter") { config.dont_filter = true; } diff --git a/src/extractor/container.rs b/src/extractor/container.rs index c7bbb08..e969456 100644 --- a/src/extractor/container.rs +++ b/src/extractor/container.rs @@ -240,8 +240,18 @@ impl<'a> Extractor<'a> { log::trace!("enter: get_sub_paths_from_path({})", path); let mut paths = vec![]; + // trim whitespace, remove slashes, and queries/anchors (i.e. ?C=D;O=A) + let mut path_str = path.to_owned(); + path_str = path_str.trim().to_string(); + path_str.retain(|c| !c.is_whitespace()); + if path_str.starts_with("//") { + path_str = path_str.trim_start_matches('/').to_string(); + }; + let re = Regex::new(r"([#?].*)?").unwrap(); + path_str = re.replace_all(&path_str, "").to_string().trim().to_string(); + // filter out any empty strings caused by .split - let mut parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect(); + let mut parts: Vec<&str> = path_str.split('/').filter(|s| !s.is_empty()).collect(); let length = parts.len(); @@ -297,7 +307,7 @@ impl<'a> Extractor<'a> { let new_url = old_url .join(link) .with_context(|| format!("Could not join {} with {}", old_url, link))?; - + log::debug!("Added link \"{}\"", new_url); links.insert(new_url.to_string()); log::trace!("exit: add_link_to_set_of_links"); @@ -463,7 +473,7 @@ impl<'a> Extractor<'a> { .filter(|a| a.value().attrs().any(|attr| attr.0 == html_attr)); for t in tags { if let Some(link) = t.value().attr(html_attr) { - log::debug!("Parsed link \"{}\" from {}", link, resp_url.as_str()); + log::debug!("Extracted link \"{}\" from {}", link, resp_url.as_str()); match Url::parse(link) { Ok(absolute) => { diff --git a/src/parser.rs b/src/parser.rs index 3bab47b..8bfcf21 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -527,6 +527,13 @@ pub fn initialize() -> App<'static> { .help_heading("Output settings") .help("Output file to write log entries (use w/ --json for JSON entries)") .takes_value(true), + ) + .arg( + Arg::new("no_state") + .long("no-state") + .takes_value(false) + .help_heading("Output settings") + .help("Disable state output file (*.state) for continuing scans") ); /////////////////////////////////////////////////////////////////////