-
Notifications
You must be signed in to change notification settings - Fork 27
Expand file tree
/
Copy pathwaybackMachine.py
More file actions
19 lines (15 loc) · 790 Bytes
/
waybackMachine.py
File metadata and controls
19 lines (15 loc) · 790 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import requests
import sys
import json
class waybackMachineClass():
def __init__(self,domain):
self.waybackURL = "https://web.archive.org/cdx/search?url="+domain+"%2F&matchType=prefix&collapse=urlkey&output=json&fl=original%2Cmimetype%2Ctimestamp%2Cendtimestamp%2Cgroupcount%2Cuniqcount&filter=!statuscode%3A%5B45%5D..&limit=100000&_=1547318148315"
self.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0'}
def getUrls(self):
r = requests.get(self.waybackURL,headers=self.headers)
html = r.text
jsonObj = json.loads(html)
return jsonObj
wbm = waybackMachineClass(sys.argv[1])
for row in wbm.getUrls():
print(row[0])