-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathwebScrapingPython.py
More file actions
65 lines (50 loc) · 2.58 KB
/
webScrapingPython.py
File metadata and controls
65 lines (50 loc) · 2.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from bs4 import BeautifulSoup
import requests, openpyxl
excel = openpyxl.Workbook()
sheet = excel.active
print('Web Scraping in IMDB and exporting results to Excel\n')
category = {1:'Top 250 Movies of all time',2:'Top Action Movies'}
for key,value in category.items():
print(key, '-' ,value)
choice = int(input('Select your choice: '))
if choice in category.keys():
if choice == 1:
sheet.title = 'Movies'
sheet.append(['Movie Name','year','Rating'])
try:
response = requests.get('https://www.imdb.com/chart/top/')
soup = BeautifulSoup(response.text, 'html.parser')
movies = soup.find('tbody',class_='lister-list').find_all('tr')
for movie in movies:
movie_name = movie.find('td',class_ = 'titleColumn').a.text
year_movie = movie.find('td',class_ = 'titleColumn').span.text
movie_rating = movie.find('td', class_ = 'ratingColumn imdbRating').strong.text
sheet.append([movie_name,year_movie,movie_rating])
except Exception as e:
print(e)
excel.save('C:\\Users\\home\\Desktop\\Filename.xlsx')
print('File generated successfully!!')
if choice == 2:
sheet.title = 'Action movie list'
sheet.append(['index','Movie Name','Rating','Description','Director Name','Gross Volume'])
try:
response = requests.get('https://www.imdb.com/search/title/?genres=action&sort=user_rating,desc&title_type=feature&num_votes=25000,')
soup = BeautifulSoup(response.text, 'html.parser')
movies = soup.find('div',class_='lister-list').find_all('div',class_ = 'lister-item')
for movie in movies:
index = movie.find('h3').find('span',class_ = 'lister-item-index').text.split('.')[0]
movie_name = movie.find('h3').a.text
rating = movie.find('div',class_ = 'ratings-bar').find('strong').text
description = movie.find('p', class_ = 'text-muted').findNext('p').text
director_name = movie.find('p',class_ = '').a.text
gross = movie.find('p', class_ = 'sort-num_votes-visible').find_all('span')[-1].text
sheet.append([index,movie_name,rating, description,director_name,gross])
except Exception as e:
print(e)
try:
excel.save('C:\\Users\\home\\Action.xlsx')
print('File generated successfully!!')
except Exception as ee:
print(ee)
else:
print('Please select only from the above given list')