-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscrape_data_from_website.py
More file actions
44 lines (35 loc) · 1.28 KB
/
scrape_data_from_website.py
File metadata and controls
44 lines (35 loc) · 1.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import requests
from bs4 import BeautifulSoup
def fetch_page(url):
"""Fetch the content of the webpage."""
try:
response = requests.get(url)
response.raise_for_status() # Raise an HTTPError for bad responses
return response.text
except requests.RequestException as e:
print(f"Error fetching {url}: {e}")
return None
def parse_page(content):
"""Parse the webpage content and extract titles and links."""
soup = BeautifulSoup(content, 'html.parser')
results = []
for a_tag in soup.find_all('a', href=True):
title = a_tag.get_text().strip()
link = a_tag['href']
results.append((title, link))
return results
def scrape_website(url):
"""Scrape the given website for titles and links."""
print(f"Starting scrape for {url}")
page_content = fetch_page(url)
if not page_content:
print(f"Failed to retrieve content from {url}")
return
parsed_data = parse_page(page_content)
print(f"Scraping complete for {url}. Found {len(parsed_data)} links.")
for title, link in parsed_data:
print(f"Title: {title}, Link: {link}")
if __name__ == "__main__":
# Example usage
target_url = input("Please Enter URL of the Target Website: ")
scrape_website(target_url)