diff --git a/README.md b/README.md index d5c9048..5e328f4 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,12 @@ Here are a few examples of how to use `paramspider`: paramspider -d example.com -p '">

reflection

' ``` +- Disable updating parameter values with a placeholder: + + ```sh + paramspider -d example.com -db +``` + ## Contributing Contributions are welcome! If you'd like to contribute to `paramspider`, please follow these steps: diff --git a/paramspider/main.py b/paramspider/main.py index 38ff9f5..609db7b 100644 --- a/paramspider/main.py +++ b/paramspider/main.py @@ -55,13 +55,15 @@ def clean_url(url): return parsed_url.geturl() -def clean_urls(urls, extensions, placeholder): +def clean_urls(urls, extensions, placeholder, disable_placeholder): """ Clean a list of URLs by removing unnecessary parameters and query strings. Args: urls (list): List of URLs to clean. extensions (list): List of file extensions to check against. + placeholder (str): Default placeholder for parameter values. + disable_placeholder (bool): Flag to indicate whether to update parameter values with the default placeholder. Returns: list: List of cleaned URLs. @@ -72,13 +74,16 @@ def clean_urls(urls, extensions, placeholder): if not has_extension(cleaned_url, extensions): parsed_url = urlparse(cleaned_url) query_params = parse_qs(parsed_url.query) - cleaned_params = {key: placeholder for key in query_params} + if not disable_placeholder: + cleaned_params = {key: placeholder for key in query_params} + else: + cleaned_params = query_params cleaned_query = urlencode(cleaned_params, doseq=True) cleaned_url = parsed_url._replace(query=cleaned_query).geturl() cleaned_urls.add(cleaned_url) return list(cleaned_urls) -def fetch_and_clean_urls(domain, extensions, stream_output,proxy, placeholder): +def fetch_and_clean_urls(domain, extensions, stream_output, proxy, placeholder, disable_placeholder): """ Fetch and clean URLs related to a specific domain from the Wayback Machine. @@ -86,18 +91,21 @@ def fetch_and_clean_urls(domain, extensions, stream_output,proxy, placeholder): domain (str): The domain name to fetch URLs for. extensions (list): List of file extensions to check against. stream_output (bool): True to stream URLs to the terminal. + proxy (str): Proxy address for web requests. + placeholder (str): Default placeholder for parameter values. + disable_placeholder (bool): Flag to indicate whether to update parameter values with the default placeholder. Returns: None """ logging.info(f"{Fore.YELLOW}[INFO]{Style.RESET_ALL} Fetching URLs for {Fore.CYAN + domain + Style.RESET_ALL}") wayback_uri = f"https://web.archive.org/cdx/search/cdx?url={domain}/*&output=txt&collapse=urlkey&fl=original&page=/" - response = client.fetch_url_content(wayback_uri,proxy) + response = client.fetch_url_content(wayback_uri, proxy) urls = response.text.split() logging.info(f"{Fore.YELLOW}[INFO]{Style.RESET_ALL} Found {Fore.GREEN + str(len(urls)) + Style.RESET_ALL} URLs for {Fore.CYAN + domain + Style.RESET_ALL}") - cleaned_urls = clean_urls(urls, extensions, placeholder) + cleaned_urls = clean_urls(urls, extensions, placeholder, disable_placeholder) logging.info(f"{Fore.YELLOW}[INFO]{Style.RESET_ALL} Cleaning URLs for {Fore.CYAN + domain + Style.RESET_ALL}") logging.info(f"{Fore.YELLOW}[INFO]{Style.RESET_ALL} Found {Fore.GREEN + str(len(cleaned_urls)) + Style.RESET_ALL} URLs after cleaning") logging.info(f"{Fore.YELLOW}[INFO]{Style.RESET_ALL} Extracting URLs with parameters") @@ -137,8 +145,9 @@ def main(): parser.add_argument("-d", "--domain", help="Domain name to fetch related URLs for.") parser.add_argument("-l", "--list", help="File containing a list of domain names.") parser.add_argument("-s", "--stream", action="store_true", help="Stream URLs on the terminal.") - parser.add_argument("--proxy", help="Set the proxy address for web requests.",default=None) - parser.add_argument("-p", "--placeholder", help="placeholder for parameter values", default="FUZZ") + parser.add_argument("--proxy", help="Set the proxy address for web requests.", default=None) + parser.add_argument("-p", "--placeholder", help="Placeholder for parameter values", default="FUZZ") + parser.add_argument("-dp", "--disable-placeholder", action="store_true", help="Disable updating parameter values with a placeholder.") args = parser.parse_args() if not args.domain and not args.list: @@ -147,6 +156,9 @@ def main(): if args.domain and args.list: parser.error("Please provide either the -d option or the -l option, not both.") + if args.placeholder and args.disable_placeholder: + parser.error("Please provide either the --placeholder option or the --disable-placeholder option, not both.") + if args.list: with open(args.list, "r") as f: domains = [line.strip().lower().replace('https://', '').replace('http://', '') for line in f.readlines()] @@ -158,11 +170,11 @@ def main(): extensions = HARDCODED_EXTENSIONS if args.domain: - fetch_and_clean_urls(domain, extensions, args.stream, args.proxy, args.placeholder) + fetch_and_clean_urls(domain, extensions, args.stream, args.proxy, args.placeholder, args.disable_placeholder) if args.list: for domain in domains: - fetch_and_clean_urls(domain, extensions, args.stream,args.proxy, args.placeholder) + fetch_and_clean_urls(domain, extensions, args.stream, args.proxy, args.placeholder, args.disable_placeholder) if __name__ == "__main__": - main() \ No newline at end of file + main()