-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path02_crawl.py
More file actions
28 lines (22 loc) · 824 Bytes
/
02_crawl.py
File metadata and controls
28 lines (22 loc) · 824 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#!/usr/bin/env python3
"""Example: Crawl a website and get all discovered pages.
This example crawls a website starting from a seed URL, following links
up to a configurable depth, and returns scraped content from each page.
"""
from quickcrawl import QuickCrawlClient
def main():
with QuickCrawlClient() as client:
results = client.crawl(
"https://example.com",
max_depth=2,
max_pages=5,
)
print(f"Crawled {len(results)} pages:\n")
for i, result in enumerate(results, 1):
title = result.get("metadata", {}).get("title", "No title")
url = result.get("metadata", {}).get("sourceURL", "Unknown URL")
print(f"{i}. {title}")
print(f" {url}")
print()
if __name__ == "__main__":
main()