-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathdump-scripts.py
More file actions
executable file
·83 lines (69 loc) · 3.76 KB
/
Copy pathdump-scripts.py
File metadata and controls
executable file
·83 lines (69 loc) · 3.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env python3
import requests
from bs4 import BeautifulSoup as bs
import os
import hashlib
import argparse
"""
Simple python utility to dump all scripts from a given page.
By: @SamuelAnttila
License: MIT
"""
def download_script(url,downloads_dir_path,headers={},prettify=False):
"""Download script into given directory. Note: Does nothing to avoid name collisions"""
# /asdf/file.js?123=123 -> file.js
url_path = requests.compat.urlparse(url).path
local_filename = os.path.basename(url_path)
if prettify:
# we can't stream if we want to prettify since we need the full file contents
res = requests.get(url, headers=headers)
code = jsbeautifier.beautify(res.text)
with open(os.path.join(downloads_dir_path,local_filename), 'w+') as f:
f.write(code)
else:
# streaming file download because putting everything in memory at once is silly if we don't have to
with requests.get(url, stream=True, headers=headers) as r:
r.raise_for_status()
with open(os.path.join(downloads_dir_path,local_filename), 'wb+') as f:
for chunk in r.iter_content(chunk_size=8192):
# If you have chunk encoded response uncomment if
# and set chunk_size parameter to None.
#if chunk:
f.write(chunk)
return local_filename
def ensure_dir(file_path):
"""Ensure directory exists by creating it if not present"""
directory = os.path.dirname(file_path)
if not os.path.exists(directory):
os.makedirs(directory)
if __name__ == "__main__":
downloads_dirname = "scripts/"
curr_path = os.path.dirname(os.path.realpath(__file__))
downloads_path = os.path.join(curr_path,downloads_dirname)
ensure_dir(downloads_path)
parser = argparse.ArgumentParser(description='Download all scripts from a website into a scripts/ folder underneath this script')
parser.add_argument('url', metavar='URL', type=str, help='The url (including schema) from which to dump scripts')
parser.add_argument('--useragent', dest='useragent', type=str, default="Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36", help='User agent to use when making requests')
parser.add_argument('--prettify', dest='prettify', action="store_true", default=False, help='If enabled uses jsbeautify to prettify/deobfuscate all downloaded javascript. Will fail if "jsbeautifier" not installed with pip. Recommended if you\'re dealing with minified files.')
args = parser.parse_args()
if args.prettify:
import jsbeautifier # only necessary if we actually want to use this functionality
res = requests.get(args.url,headers={"User-Agent":args.useragent})
soup = bs(res.text,features="html.parser")
for script in soup.find_all("script"):
if "src" in script.attrs:
#externally loaded script
download_url = requests.compat.urljoin(args.url, script.attrs["src"])
print(f'Downloaded {download_script(download_url,downloads_path,headers={"User-Agent":args.useragent},prettify=args.prettify)}')
else:
#inline script
print(script.text)
m = hashlib.sha256()
m.update(script.text.encode("utf32"))
local_filename = m.hexdigest() # To give all inline scripts a unique name we take the hash of its contents. Only identical scripts should collide.
with open(os.path.join(downloads_path,local_filename)+".js", 'w+') as f:
if not args.prettify:
f.write(script.text)
else:
f.write(jsbeautifier.beautify(script.text))
print("Done downloading scripts. They should be under the 'scripts/' folder.")