-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathwebscraper.py
More file actions
36 lines (26 loc) · 1.09 KB
/
webscraper.py
File metadata and controls
36 lines (26 loc) · 1.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from bs4 import BeautifulSoup as BS
import requests
def get_codes(url='http://orcz.com/Borderlands_Pre-Sequel:_Shift_Codes', cell=4, cellLenght=7, tableIndex=0, codeLenght=29):
'''
Returns a list of all codes found on the website. Contains the codes of the specified platform and the respective game.
'''
source = requests.get(url).text
soup = BS(source, 'lxml')
tables = soup.findAll('table')
table = tables[tableIndex]
redCodes = [] # invalid codes
codeList = []
clCheck = True
if (codeLenght == 0):
clCheck = False
for row in table.findAll("tr"):
cells = row.findAll("td")
if len(cells) == cellLenght:
isValid = True
if (clCheck and len(cells[cell].text.rstrip()) != codeLenght):
isValid = False
if (cells[cell].find(style="color:red") != None):
redCodes.append(cells[cell].find(style="color:red").text.rstrip())
if cells[cell].text.rstrip() not in redCodes and isValid:
codeList.append(cells[cell].text.strip())
return codeList