forked from E-P-T/Homework
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreader.py
More file actions
71 lines (63 loc) · 2.61 KB
/
reader.py
File metadata and controls
71 lines (63 loc) · 2.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from loguru import logger
import requests
from bs4 import BeautifulSoup
import re
import sys
class Reader:
"""Parse data from URL"""
def __init__(self, source: str, limit=-1) -> None:
self.version = '4.0'
self.source = source
self.name = self.get_acces()[0]
self.items = self.get_acces()[1]
logger.info("Acces is available (info)!")
self.limit = len(self.items) if limit == -1 or limit > len(self.items) else limit
self.title = self.get_title()
logger.info("Title is available (info)!")
self.pubDate = self.get_pubDate()
logger.info("PubDate is available (info)!")
self.link = self.get_link()
logger.info("Link is available (info)!")
self.clear_description = list()
self.description = self.get_description()
logger.info("Description is available (info)!")
def get_acces(self) -> list:
logger.debug("Get access (debug)!")
try:
url = requests.get(self.source)
except Exception:
logger.info(f"Invalid url.{self.source}(info)!")
print('Could not fetch the URL. Input valid URL.')
sys.exit()
try:
soup = BeautifulSoup(url.content, 'xml')
name = soup.find().title.text
items = soup.find_all('item')
if len(items) == 0:
raise Exception
except Exception as e:
logger.info(f"Invalid url.{self.source}(info)!")
print('Could not read feed. Input xml-format URL.')
sys.exit()
return name, items
def get_title(self) -> list:
logger.debug("Get title from xml (debug)!")
return [self.items[i].title.text for i in range(self.limit)]
def get_pubDate(self) -> list:
logger.debug("Get pubDate from xml (debug)!")
print([self.items[i].pubDate.text for i in range(self.limit)])
return [self.items[i].pubDate.text for i in range(self.limit)]
def get_link(self) -> list:
logger.debug("Get link from xml (debug)!")
return [self.items[i].link.text for i in range(self.limit)]
def get_description(self) -> list:
logger.debug("Get description from xml (debug)!")
des = []
for i in range(self.limit):
if self.items[i].description:
des.append(self.items[i].description.text)
self.clear_description.append(re.sub(r'\<[^>]*\>|(&rsaquo)', '', self.items[i].description.text))
else:
des.append('No description here')
self.clear_description.append('No description here')
return des