From c6558496f0442a66ed964a53affc2ca634f528d3 Mon Sep 17 00:00:00 2001 From: arslansD <55465158+arslansD@users.noreply.github.com> Date: Thu, 30 Jun 2022 22:05:07 +0600 Subject: [PATCH 1/7] Uploading Readme file --- README.md | 61 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index c86d1e65..a0d0837c 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,35 @@ -# How to create a PR with a homework task - -1. Create fork from the following repo: https://github.com/E-P-T/Homework. (Docs: https://docs.github.com/en/get-started/quickstart/fork-a-repo ) -2. Clone your forked repo in your local folder. -3. Create separate branches for each session.Example(`session_2`, `session_3` and so on) -4. Create folder with you First and Last name in you forked repo in the created session. -5. Add your task into created folder -6. Push finished session task in the appropriate branch in accordance with written above. - You should get the structure that looks something like that - -``` - Branch: Session_2 - DzmitryKolb - |___Task1.py - |___Task2.py - Branch: Session_3 - DzmitryKolb - |___Task1.py - |___Task2.py -``` - -7. When you finish your work on task you should create Pull request to the appropriate branch of the main repo https://github.com/E-P-T/Homework (Docs: https://docs.github.com/en/github/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork). -Please use the following instructions to prepare good description of the pull request: - - Pull request header should be: `Session - `. - Example: `Session 2 - Dzmitry Kolb` - - Pull request body: You should write here what tasks were implemented. - Example: `Finished: Task 1.2, Task 1.3, Task 1.6` +## RSS Reader +### Requirements: +Install requirements using: `pip install -r .\requirements.txt` + +### Setup: +#### Virtual Environment (Optional) +Create Virtual Environment\ +Linux: `virtualenv venv` +Windows: `python -m venv ./venv`\ + +Activate Virtual Environment:\ +Linux: `source venv/bin/activate` +Windows: `./venv/Scripts/activate`\ + +#### Pip Usage: +Update pip:\ +`python -m pip install --upgrade pip` + +### Run Application: +Run `python ./rss_reader.py -h` to find available options + +### Cache +Application stores RSS Feed using buildin pickle module, which is located in the root directory of the project. +Particularly, we are using it to convert python object into byte stream to store it in our database. +For more information regarding its usage, refer to the [official documentation](https://docs.python.org/3/library/pickle.html). + +### Run Tests: +Tests for this project can mainly be found in the fourth version/iteration of the task. + +### Package distributive: +To install package distributive you can install sudo, to be able able to accept it as the system wide CLI. + +### Output format +The project supports HTML, which means you are able to export news to the HTML5 format. From 8e14dc2c42f55a5a703e2192b888282398808a16 Mon Sep 17 00:00:00 2001 From: arslansD <55465158+arslansD@users.noreply.github.com> Date: Thu, 30 Jun 2022 22:11:27 +0600 Subject: [PATCH 2/7] Added json structure description --- README.md | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index a0d0837c..899f375d 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,22 @@ ## RSS Reader -### Requirements: -Install requirements using: `pip install -r .\requirements.txt` - ### Setup: -#### Virtual Environment (Optional) +#### Virtual Environment Create Virtual Environment\ -Linux: `virtualenv venv` +Linux: `virtualenv venv`\ Windows: `python -m venv ./venv`\ Activate Virtual Environment:\ -Linux: `source venv/bin/activate` +Linux: `source venv/bin/activate`\ Windows: `./venv/Scripts/activate`\ #### Pip Usage: Update pip:\ `python -m pip install --upgrade pip` +### Requirements: +Install requirements using: `pip install -r .\requirements.txt` + ### Run Application: Run `python ./rss_reader.py -h` to find available options @@ -33,3 +33,16 @@ To install package distributive you can install sudo, to be able able to accept ### Output format The project supports HTML, which means you are able to export news to the HTML5 format. + +### Json structure +Json structure looks as follows: +``` +{ + "title": string, + "date": datetime, + "link": string, + "image": string, + "channel": string, + "source": string +} +``` From 78053b02e1818fb98e7c503c675d96203df5dc1c Mon Sep 17 00:00:00 2001 From: arslansD Date: Thu, 30 Jun 2022 22:27:19 +0600 Subject: [PATCH 3/7] First iteration added --- .idea/.gitignore | 3 + .idea/Final-Task.iml | 8 ++ .../inspectionProfiles/profiles_settings.xml | 6 + .idea/misc.xml | 4 + .idea/modules.xml | 8 ++ .idea/vcs.xml | 6 + version1/__init__.py | 0 version1/commands | 1 + version1/requiremets.txt | 2 + version1/rss_reader.py | 124 ++++++++++++++++++ 10 files changed, 162 insertions(+) create mode 100644 .idea/.gitignore create mode 100644 .idea/Final-Task.iml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 version1/__init__.py create mode 100644 version1/commands create mode 100644 version1/requiremets.txt create mode 100644 version1/rss_reader.py diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 00000000..26d33521 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/Final-Task.iml b/.idea/Final-Task.iml new file mode 100644 index 00000000..d0876a78 --- /dev/null +++ b/.idea/Final-Task.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 00000000..105ce2da --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 00000000..d56657ad --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 00000000..f76fec37 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 00000000..94a25f7f --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/version1/__init__.py b/version1/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/version1/commands b/version1/commands new file mode 100644 index 00000000..e978bade --- /dev/null +++ b/version1/commands @@ -0,0 +1 @@ +pip install . -r requirements.txt \ No newline at end of file diff --git a/version1/requiremets.txt b/version1/requiremets.txt new file mode 100644 index 00000000..d5567799 --- /dev/null +++ b/version1/requiremets.txt @@ -0,0 +1,2 @@ +httpx==0.22.0 + diff --git a/version1/rss_reader.py b/version1/rss_reader.py new file mode 100644 index 00000000..b81dbbac --- /dev/null +++ b/version1/rss_reader.py @@ -0,0 +1,124 @@ +import argparse +import datetime +import json +import xml.etree.ElementTree as ET +from typing import Optional + +import httpx + +PARSER = argparse.ArgumentParser(description='Pure Python command-line RSS reader.', prog="RSS Reader") +PARSER.add_argument('source', type=str, help="RSS URL") +PARSER.add_argument('--limit', type=int, help="Limit news topics if this parameter provided") +PARSER.add_argument('--version', action='version', version='%(prog)s 1.1') +PARSER.add_argument('--json', action='count', default=0, help="Print result as JSON in stdout") +PARSER.add_argument('--verbose', action='count', default=0, help="Outputs verbose status messages") + +DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ" + + +class RSSNews: + + def __init__( + self, + title: str, + link: str, + pubDate: str, + source: str, + channel: str, + *args, **kwargs, + ): + self.pubDate = datetime.datetime.strptime(pubDate, DATETIME_FORMAT) + self.title = title + self.link = link + self.source = source + self.channel = channel + + def to_dict(self): + return { + "pubDate": self.pubDate.strftime(DATETIME_FORMAT), + "title": self.title, + "link": self.link, + "source": self.source, + "channel": self.channel, + } + + +class RSS20Parser: + + def __init__(self, xml_tree: ET.Element, limit: int, verbose: bool): + self.xml_tree = xml_tree + self.limit = limit + self.verbose = verbose + + def parse(self) -> list[RSSNews]: + if self.verbose: + print("LOG: Parsing the data of the RSS 2.0 format") + news = [] + channel = self.xml_tree.find("channel/title").text + for i, item in enumerate(self.xml_tree.findall("./channel/item")): + if self.limit and i == self.limit: + return news + single_news = {} + for elem in item: + try: + single_news[elem.tag] = elem.text + except AttributeError: + pass + + news.append(RSSNews(**single_news, channel=channel)) + print("LOG: Finished parsing") + return news + + +def get_xml_response(url: str, verbose: bool): + if verbose: + print(f"LOG: Querying data from source: {url}") + response = httpx.get(url) + if verbose: + print(f"LOG: Queried data from source: {url}") + return response.text + + +def parse_xml(xml_data: str, verbose: bool, limit: Optional[int] = None) -> list[RSSNews]: + root = ET.fromstring(xml_data) + if verbose: + print("LOG: Starting parser block") + match root.attrib['version']: + case '2.0': + parser = RSS20Parser(root, limit, verbose) + data = parser.parse() + return data + case _: + print("Not a valid or supported xml RSS feed!") + return [] + + +def format_console_text(news: list[RSSNews]): + result = "" + for i in news: + result += f"""\nFeed: {i.channel}\n\nTitle: {i.title}\nDate: {i.pubDate}\nLink: {i.link}\n""" + return result + + +def format_json(news: list[RSSNews]): + return json.dumps([i.to_dict() for i in news]) + + +def main(): + args = PARSER.parse_args() + verbose = args.verbose + json_output = args.json + result = get_xml_response(args.source, verbose) + news = parse_xml(xml_data=result, limit=args.limit, verbose=verbose) + if not json_output: + if verbose: + print("LOG: Preparing text formatted output") + print(format_console_text(news)) + else: + if verbose: + print("LOG: Preparing json formatted output") + print(format_json(news)) + + +if __name__ == '__main__': + main() From b298010189cc4eb1bdb6b17b95fe15412e9fb352 Mon Sep 17 00:00:00 2001 From: arslansD Date: Thu, 30 Jun 2022 22:27:58 +0600 Subject: [PATCH 4/7] Second iteration added --- version2/__init__.py | 0 version2/commands | 1 + version2/requirements.txt | 1 + version2/rss_reader.py | 122 ++++++++++++++++++++++++++++++++++++++ version2/setup.py | 20 +++++++ 5 files changed, 144 insertions(+) create mode 100644 version2/__init__.py create mode 100644 version2/commands create mode 100644 version2/requirements.txt create mode 100644 version2/rss_reader.py create mode 100644 version2/setup.py diff --git a/version2/__init__.py b/version2/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/version2/commands b/version2/commands new file mode 100644 index 00000000..9bbd8d71 --- /dev/null +++ b/version2/commands @@ -0,0 +1 @@ +pip install . \ No newline at end of file diff --git a/version2/requirements.txt b/version2/requirements.txt new file mode 100644 index 00000000..be5be59e --- /dev/null +++ b/version2/requirements.txt @@ -0,0 +1 @@ +httpx==0.22.0 diff --git a/version2/rss_reader.py b/version2/rss_reader.py new file mode 100644 index 00000000..82824760 --- /dev/null +++ b/version2/rss_reader.py @@ -0,0 +1,122 @@ +import argparse +import datetime +import json +import xml.etree.ElementTree as ET +from typing import Optional + +import httpx + +PARSER = argparse.ArgumentParser(description='Pure Python command-line RSS reader.', prog="RSS Reader") +PARSER.add_argument('source', type=str, help="RSS URL") +PARSER.add_argument('--limit', type=int, help="Limit news topics if this parameter provided") +PARSER.add_argument('--version', action='version', version='%(prog)s 1.1') +PARSER.add_argument('--json', action='count', default=0, help="Print result as JSON in stdout") +PARSER.add_argument('--verbose', action='count', default=0, help="Outputs verbose status messages") + +DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ" + + +class RSSNews: + + def __init__( + self, + title: str, + link: str, + pubDate: str, + source: str, + channel: str, + *args, **kwargs, + ): + self.pubDate = datetime.datetime.strptime(pubDate, DATETIME_FORMAT) + self.title = title + self.link = link + self.source = source + self.channel = channel + + def to_dict(self): + return { + "pub_date": self.pubDate.strftime(DATETIME_FORMAT), + "title": self.title, + "link": self.link, + "source": self.source, + "channel": self.channel, + } + + +class RSS20Parser: + + def __init__(self, xml_tree: ET.Element, limit: int, verbose: bool): + self.xml_tree = xml_tree + self.limit = limit + self.verbose = verbose + + def parse(self) -> list[RSSNews]: + if self.verbose: + print("LOG: Parsing the data of the RSS 2.0 format") + news = [] + channel = self.xml_tree.find("channel/title").text + for i, item in enumerate(self.xml_tree.findall("./channel/item")): + if self.limit and i == self.limit: + return news + single_news = {} + for elem in item: + try: + single_news[elem.tag] = elem.text.encode('utf8') + except AttributeError: + pass + + news.append(RSSNews(**single_news, channel=channel)) + print("LOG: Finished parsing") + return news + + +def get_xml_response(url: str, verbose: bool): + if verbose: + print(f"LOG: Querying data from source: {url}") + response = httpx.get(url) + if verbose: + print(f"LOG: Queried data from source: {url}") + return response.text + + +def parse_xml(xml_data: str | bytes, verbose: bool, limit: Optional[int] = None) -> list[RSSNews]: + root = ET.fromstring(xml_data) + if verbose: + print("LOG: Starting parser block") + match root.attrib['version']: + case '2.0': + parser = RSS20Parser(root, limit, verbose) + data = parser.parse() + return data + case _: + print("Not a valid or supported xml RSS feed!") + + +def format_console_text(news: list[RSSNews]): + for i in news: + result = f"""\nFeed: {i.channel}\n\nTitle: {i.title}\nDate: {i.pubDate}\nLink: {i.link}\n""" + print(result) + + +def format_json(news: list[RSSNews]): + return json.dumps([i.to_dict() for i in news]) + + +def main(): + args = PARSER.parse_args() + verbose = args.verbose + json_output = args.json + result = get_xml_response(args.source, verbose) + news = parse_xml(xml_data=result, limit=args.limit, verbose=verbose) + if not json_output: + if verbose: + print("LOG: Preparing text formatted output") + format_console_text(news) + else: + if verbose: + print("LOG: Preparing json formatted output") + print(format_json(news)) + + +if __name__ == '__main__': + main() diff --git a/version2/setup.py b/version2/setup.py new file mode 100644 index 00000000..47c8944b --- /dev/null +++ b/version2/setup.py @@ -0,0 +1,20 @@ +import setuptools + +with open("requirements.txt", "r", encoding="utf-8") as fh: + requirements = fh.read() + +setuptools.setup( + include_package_data=True, + name="RSS Reader", + version="2.0.0", + description="Pure Python command-line RSS reader.", + author="arslan", + packages=setuptools.find_packages(), + install_requires=[requirements], + py_modules=["rss_reader"], + entry_points=''' + [console_scripts] + rss_reader=rss_reader:main + ''', + python_requires='>=3.9' +) From a925a251792c3f6f99a50cee6f9f455c9f44419b Mon Sep 17 00:00:00 2001 From: arslansD Date: Thu, 30 Jun 2022 22:28:30 +0600 Subject: [PATCH 5/7] Third iteration added --- version3/__init__.py | 0 version3/commands | 1 + version3/requirements.txt | 1 + version3/rss_reader.py | 202 ++++++++++++++++++++++++++++++++++++++ version3/setup.py | 20 ++++ 5 files changed, 224 insertions(+) create mode 100644 version3/__init__.py create mode 100644 version3/commands create mode 100644 version3/requirements.txt create mode 100644 version3/rss_reader.py create mode 100644 version3/setup.py diff --git a/version3/__init__.py b/version3/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/version3/commands b/version3/commands new file mode 100644 index 00000000..9bbd8d71 --- /dev/null +++ b/version3/commands @@ -0,0 +1 @@ +pip install . \ No newline at end of file diff --git a/version3/requirements.txt b/version3/requirements.txt new file mode 100644 index 00000000..be5be59e --- /dev/null +++ b/version3/requirements.txt @@ -0,0 +1 @@ +httpx==0.22.0 diff --git a/version3/rss_reader.py b/version3/rss_reader.py new file mode 100644 index 00000000..3b34e335 --- /dev/null +++ b/version3/rss_reader.py @@ -0,0 +1,202 @@ +import argparse +import datetime +import json +import pickle +import xml.etree.ElementTree as ET +from os.path import exists +from typing import Optional + +import httpx + +DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ" +DATE_FORMAT = "%Y-%m-%d" + +PARSER = argparse.ArgumentParser(description='Pure Python command-line RSS reader.', prog="RSS Reader") +PARSER.add_argument('source', nargs='?', type=str, help="RSS URL") +PARSER.add_argument('--limit', type=int, help="Limit news topics if this parameter provided") +PARSER.add_argument('--version', action='version', version='%(prog)s 1.1') +PARSER.add_argument('--json', action='count', default=0, help="Print result as JSON in stdout") +PARSER.add_argument('--verbose', action='count', default=0, help="Outputs verbose status messages") +PARSER.add_argument( + '--date', type=datetime.date.fromisoformat, dest="date", + help=f"Get news from specific date from archives in format {DATE_FORMAT}" +) + +CACHE_OBJECT = "local_cache.pickle" + + +class RSSNews: + + def __init__( + self, + title: str, + link: str, + pubDate: str, + source: str, + channel: str, + *args, **kwargs, + ): + self.pubDate = datetime.datetime.strptime(pubDate, DATETIME_FORMAT) + self.title = title + self.link = link + self.source = source + self.channel = channel + + def to_dict(self) -> dict: + return { + "pubDate": self.pubDate.strftime(DATETIME_FORMAT), + "title": self.title, + "link": self.link, + "source": self.source, + "channel": self.channel, + } + + +class RSS20Parser: + + def __init__(self, xml_tree: ET.Element, limit: int, verbose: bool): + self.xml_tree = xml_tree + self.limit = limit + self.verbose = verbose + + def parse(self) -> list[RSSNews]: + if self.verbose: + print("LOG: Parsing the data of the RSS 2.0 format") + news = [] + channel = self.xml_tree.find("channel/title").text + for i, item in enumerate(self.xml_tree.findall("./channel/item")): + if self.limit and i == self.limit - 1: + return news + single_news = {} + for elem in item: + try: + single_news[elem.tag] = elem.text + except AttributeError: + pass + + news.append(RSSNews(**single_news, channel=channel)) + if self.verbose: + print("LOG: Finished parsing") + + return news + + +def save_news_to_local_cache(news: list[RSSNews], verbose: bool): + file_exists = exists(CACHE_OBJECT) + if verbose: + print("LOG: Saving data to local cache") + if file_exists: + if verbose: + print("LOG: Local cache file exists, opening") + with open(CACHE_OBJECT, 'rb+') as cache_file: + try: + cache_data: dict = pickle.load(cache_file) + except EOFError: + cache_data = {} + data = append_to_existing_data(cache_data, news) + pickle.dump(data, cache_file) + else: + if verbose: + print("LOG: Local cache doesn't exists, creating") + with open(CACHE_OBJECT, 'xb+') as cache_file: + data = append_to_existing_data({}, news) + pickle.dump(data, cache_file) + if verbose: + print("LOG: Saved data to file") + + +def append_to_existing_data(data: dict, news: list[RSSNews]) -> dict[str, list]: + for i in news: + news_dict = i.to_dict() + pub_date = i.pubDate.strftime(DATE_FORMAT) + if pub_date in data: + data[pub_date].append(news_dict) + else: + data[pub_date] = [news_dict] + + return data + + +def get_xml_response(url: str, verbose: bool): + if verbose: + print(f"LOG: Querying data from source: {url}") + response = httpx.get(url) + if verbose: + print(f"LOG: Queried data from source: {url}") + return response.text + + +def parse_xml(xml_data: str | bytes, verbose: bool, limit: Optional[int] = None) -> list[RSSNews]: + root = ET.fromstring(xml_data) + if verbose: + print("LOG: Starting parser block") + match root.attrib['version']: + case '2.0': + parser = RSS20Parser(root, limit, verbose) + data = parser.parse() + return data + case _: + print("Not a valid or supported xml RSS feed!") + + +def format_console_text(news: list[RSSNews]): + for i in news: + result = f"""\nFeed: {i.channel}\n\nTitle: {i.title}\nDate: {i.pubDate}\nLink: {i.link}\n""" + print(result) + + +def format_json(news: list[RSSNews]): + return json.dumps([i.to_dict() for i in news]) + + +def get_news_from_archive(date: datetime.date, limit: int, verbose: bool): + if verbose: + print("LOG: Loading data from local cache") + news = [] + try: + with open(CACHE_OBJECT, "rb") as cache_file: + local_data = pickle.load(cache_file) + date_str = date.strftime(DATE_FORMAT) + if date_str not in local_data: + print("No news for given date") + else: + for i, val in enumerate(local_data[date_str]): + if limit and i == limit - 1: + break + news.append(RSSNews(**val)) + except FileNotFoundError: + if verbose: + print("WARNING: Local cache file was not created, means no news been loaded") + print("No news in local cache") + return news + + +def main(): + args = PARSER.parse_args() + source = args.source + verbose = args.verbose + json_output = args.json + date = args.date + if not source and not date: + print("Source must be specified") + return + if date: + news = get_news_from_archive(date=date, limit=args.limit, verbose=bool(verbose)) + else: + result = get_xml_response(args.source, verbose) + news = parse_xml(xml_data=result, limit=args.limit, verbose=verbose) + save_news_to_local_cache(news=news, verbose=verbose) + if not news: + return + if not json_output: + if verbose: + print("LOG: Preparing text formatted output") + format_console_text(news) + else: + if verbose: + print("LOG: Preparing json formatted output") + print(format_json(news)) + + +if __name__ == '__main__': + main() diff --git a/version3/setup.py b/version3/setup.py new file mode 100644 index 00000000..47c8944b --- /dev/null +++ b/version3/setup.py @@ -0,0 +1,20 @@ +import setuptools + +with open("requirements.txt", "r", encoding="utf-8") as fh: + requirements = fh.read() + +setuptools.setup( + include_package_data=True, + name="RSS Reader", + version="2.0.0", + description="Pure Python command-line RSS reader.", + author="arslan", + packages=setuptools.find_packages(), + install_requires=[requirements], + py_modules=["rss_reader"], + entry_points=''' + [console_scripts] + rss_reader=rss_reader:main + ''', + python_requires='>=3.9' +) From d8d36285f95bb8f86851a657baf5abeb679dea0e Mon Sep 17 00:00:00 2001 From: arslansD Date: Thu, 30 Jun 2022 22:29:15 +0600 Subject: [PATCH 6/7] Fourth iteration and tests added --- version4/__init__.py | 0 version4/commands | 1 + version4/news_template.html | 22 ++++ version4/requirements.txt | 2 + version4/rss_reader.py | 235 ++++++++++++++++++++++++++++++++++++ version4/setup.py | 20 +++ version4/tests.py | 78 ++++++++++++ 7 files changed, 358 insertions(+) create mode 100644 version4/__init__.py create mode 100644 version4/commands create mode 100644 version4/news_template.html create mode 100644 version4/requirements.txt create mode 100644 version4/rss_reader.py create mode 100644 version4/setup.py create mode 100644 version4/tests.py diff --git a/version4/__init__.py b/version4/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/version4/commands b/version4/commands new file mode 100644 index 00000000..9bbd8d71 --- /dev/null +++ b/version4/commands @@ -0,0 +1 @@ +pip install . \ No newline at end of file diff --git a/version4/news_template.html b/version4/news_template.html new file mode 100644 index 00000000..6fc61ae4 --- /dev/null +++ b/version4/news_template.html @@ -0,0 +1,22 @@ + + + + + RSS Reader + + +
+
    + {% for obj in news %} +
  • +

    {{ obj.title }}

    + +
    Source: {{ obj.source }}
    +
    Channel: {{ obj.channel }}
    + Link to the article +
  • + {% endfor %} +
+
+ + \ No newline at end of file diff --git a/version4/requirements.txt b/version4/requirements.txt new file mode 100644 index 00000000..802568b4 --- /dev/null +++ b/version4/requirements.txt @@ -0,0 +1,2 @@ +httpx==0.22.0 +Jinja2==3.1.2 diff --git a/version4/rss_reader.py b/version4/rss_reader.py new file mode 100644 index 00000000..e613a34a --- /dev/null +++ b/version4/rss_reader.py @@ -0,0 +1,235 @@ +import argparse +import datetime +import json +import pickle +import xml.etree.ElementTree as ET +from os.path import exists +from pprint import pprint +from typing import Optional + +import httpx +from jinja2 import Environment, PackageLoader, select_autoescape + +JINJA_ENV = Environment( + loader=PackageLoader(package_path="./", package_name="rss_reader"), + autoescape=select_autoescape() +) + +DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ" +DATE_FORMAT = "%Y-%m-%d" + +PARSER = argparse.ArgumentParser(description='Pure Python command-line RSS reader.', prog="RSS Reader") +PARSER.add_argument('source', nargs='?', type=str, help="RSS URL") +PARSER.add_argument('--limit', type=int, help="Limit news topics if this parameter provided") +PARSER.add_argument('--version', action='version', version='%(prog)s 1.1') +PARSER.add_argument('--json', action='count', default=0, help="Print result as JSON in stdout") +PARSER.add_argument('--verbose', action='count', default=0, help="Outputs verbose status messages") +PARSER.add_argument( + '--date', type=datetime.date.fromisoformat, dest="date", + help=f"Get news from specific date from archives in format {DATE_FORMAT}" +) +PARSER.add_argument( + "--to-html", type=str, dest="to_html", default=None, help="Convert news format to html document!" +) + +CACHE_OBJECT = "local_cache.pickle" + + +class RSSNews: + + def __init__( + self, + title: str, + link: str, + pubDate: str, + source: str, + channel: str, + image: Optional[str] = None, + *args, **kwargs, + ): + self.pubDate = datetime.datetime.strptime(pubDate, DATETIME_FORMAT) + self.title = title + self.link = link + self.source = source + self.channel = channel + self.image = image + + def to_dict(self) -> dict: + return { + "pubDate": self.pubDate.strftime(DATETIME_FORMAT), + "title": self.title, + "link": self.link, + "source": self.source, + "channel": self.channel, + "image": self.image, + } + + +class RSS20Parser: + + def __init__(self, xml_tree: ET.Element, limit: int, verbose: bool): + self.xml_tree = xml_tree + self.limit = limit + self.verbose = verbose + + def parse(self) -> list[RSSNews]: + if self.verbose: + print("LOG: Parsing the data of the RSS 2.0 format") + news = [] + channel = self.xml_tree.find("channel/title").text + for i, item in enumerate(self.xml_tree.findall("./channel/item")): + if self.limit and i == self.limit: + return news + single_news = {} + for elem in item: + try: + if "content" in elem.tag: + single_news["image"] = elem.attrib["url"] + else: + single_news[elem.tag] = elem.text + except AttributeError: + pass + + news.append(RSSNews(**single_news, channel=channel)) + if self.verbose: + print("LOG: Finished parsing") + + return news + + +def save_news_to_local_cache(news: list[RSSNews], verbose: bool): + file_exists = exists(CACHE_OBJECT) + if verbose: + print("LOG: Saving data to local cache") + if file_exists: + if verbose: + print("LOG: Local cache file exists, opening") + with open(CACHE_OBJECT, 'rb+') as cache_file: + try: + cache_data: dict = pickle.load(cache_file) + except EOFError: + cache_data = {} + data = append_to_existing_data(cache_data, news) + pickle.dump(data, cache_file) + else: + if verbose: + print("LOG: Local cache doesn't exists, creating") + with open(CACHE_OBJECT, 'xb+') as cache_file: + data = append_to_existing_data({}, news) + pickle.dump(data, cache_file) + if verbose: + print("LOG: Saved data to cache") + + +def append_to_existing_data(data: dict, news: list[RSSNews]) -> dict[str, list]: + for i in news: + news_dict = i.to_dict() + pub_date = i.pubDate.strftime(DATE_FORMAT) + if pub_date in data: + data[pub_date].append(news_dict) + else: + data[pub_date] = [news_dict] + + return data + + +def get_xml_response(url: str, verbose: bool): + if verbose: + print(f"LOG: Querying data from source: {url}") + response = httpx.get(url) + if verbose: + print(f"LOG: Queried data from source: {url}") + return response.text + + +def parse_xml(xml_data: str | bytes, verbose: bool, limit: Optional[int] = None) -> list[RSSNews]: + root = ET.fromstring(xml_data) + if verbose: + print("LOG: Starting parser block") + match root.attrib['version']: + case '2.0': + parser = RSS20Parser(root, limit, verbose) + data = parser.parse() + return data + case _: + print("Not a valid or supported xml RSS feed!") + return [] + + +def format_console_text(news: list[RSSNews]): + result = "" + for i in news: + result += f"""\nFeed: {i.channel}\n\nTitle: {i.title}\nDate: {i.pubDate}\nLink: {i.link}\n""" + + return result + + +def format_json(news: list[RSSNews]): + return json.dumps([i.to_dict() for i in news]) + + +def get_news_from_archive(date: datetime.date, limit: int, verbose: bool): + if verbose: + print("LOG: Loading data from local cache") + news = [] + try: + with open(CACHE_OBJECT, "rb") as cache_file: + local_data = pickle.load(cache_file) + date_str = date.strftime(DATE_FORMAT) + if date_str not in local_data: + print("No news for given date") + else: + for i, val in enumerate(local_data[date_str]): + if limit and i == limit: + break + news.append(RSSNews(**val)) + except FileNotFoundError: + if verbose: + print("WARNING: Local cache file was not created, means no news been loaded") + print("No news in local cache") + return news + + +def create_html_template(news, path, verbose): + template = JINJA_ENV.get_template("news_template.html") + if verbose: + print("LOG: Loading html template from filesystem") + with open(path, 'w+') as html_file: + html_file.write(template.render(news=news)) + if verbose: + print("LOG: Wrote content to html file!") + + +def main(): + args = PARSER.parse_args() + source = args.source + verbose = args.verbose + json_output = args.json + date = args.date + html_path = args.to_html + if not source and not date: + print("Source must be specified") + return + if date: + news = get_news_from_archive(date=date, limit=args.limit, verbose=bool(verbose)) + else: + result = get_xml_response(args.source, verbose) + news = parse_xml(xml_data=result, limit=args.limit, verbose=verbose) + save_news_to_local_cache(news=news, verbose=verbose) + if not news: + return + if not json_output: + if verbose: + print("LOG: Preparing text formatted output") + print(format_console_text(news)) + else: + if verbose: + print("LOG: Preparing json formatted output") + pprint(format_json(news)) + if html_path: + create_html_template(news, html_path, verbose) + print(f"Successfully wrote news to the {html_path}") + + +if __name__ == '__main__': + main() diff --git a/version4/setup.py b/version4/setup.py new file mode 100644 index 00000000..47c8944b --- /dev/null +++ b/version4/setup.py @@ -0,0 +1,20 @@ +import setuptools + +with open("requirements.txt", "r", encoding="utf-8") as fh: + requirements = fh.read() + +setuptools.setup( + include_package_data=True, + name="RSS Reader", + version="2.0.0", + description="Pure Python command-line RSS reader.", + author="arslan", + packages=setuptools.find_packages(), + install_requires=[requirements], + py_modules=["rss_reader"], + entry_points=''' + [console_scripts] + rss_reader=rss_reader:main + ''', + python_requires='>=3.9' +) diff --git a/version4/tests.py b/version4/tests.py new file mode 100644 index 00000000..fdeceb1a --- /dev/null +++ b/version4/tests.py @@ -0,0 +1,78 @@ +import unittest + +from rss_reader import RSSNews, format_console_text, parse_xml + + +def get_sample_news_object(): + return RSSNews( + title="test", + link="test", + pubDate="2022-06-28T22:01:51Z", + source="test", + channel="test" + ) + + +class RssReaderTestCase(unittest.TestCase): + + def test_to_dict_return_dict_of_the_class(self): + news = get_sample_news_object() + + self.assertEqual( + news.to_dict(), { + "title": "test", + "link": "test", + "pubDate": "2022-06-28T22:01:51Z", + "source": "test", + "channel": "test", + "image": None, + } + ) + + def test_formatted_output_is_correct(self): + news = get_sample_news_object() + + result = format_console_text([news]) + expected_string = f"\nFeed: {news.channel}\n\nTitle: {news.title}\nDate: {news.pubDate}\nLink: {news.link}\n" + + self.assertEqual(result, expected_string) + + def test_xml_parser_chooses_proper_rss_format(self): + xml_string = """ + + Yahoo News - Latest News Headlines + https://www.yahoo.com/news + + The latest news and headlines from Yahoo! News. Get breaking news stories and in-depth coverage with videos and photos. + + en-US + Copyright (c) 2022 Yahoo! Inc. All rights reserved + Thu, 30 Jun 2022 07:45:03 -0400 + 5 + + Yahoo News - Latest News Headlines + https://www.yahoo.com/news + + http://l.yimg.com/rz/d/yahoo_news_en-US_s_f_p_168x21_news.png + + + + Spit, 'disrespect' arrive at Wimbledon as tennis turns ugly + https://news.yahoo.com/spit-disrespect-arrive-wimbledon-tennis-220151441.html + 2022-06-28T22:01:51Z + Associated Press + + spit-disrespect-arrive-wimbledon-tennis-220151441.html + + + + + """ + news = parse_xml(xml_data=xml_string, verbose=False, limit=0) + + self.assertEqual(news[0].to_dict(), RSSNews(pubDate="2022-06-28T22:01:51Z", + title="Spit, 'disrespect' arrive at Wimbledon as tennis turns ugly", + link="https://news.yahoo.com/spit-disrespect-arrive-wimbledon-tennis-220151441.html", + source="Associated Press", + channel="Yahoo News - Latest News Headlines" + ).to_dict()) From 3d2356b522920aa7cd1591fa1ff217d98557d447 Mon Sep 17 00:00:00 2001 From: arslansD Date: Thu, 30 Jun 2022 22:39:09 +0600 Subject: [PATCH 7/7] Triggering workflow --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 899f375d..0bb451af 100644 --- a/README.md +++ b/README.md @@ -46,3 +46,4 @@ Json structure looks as follows: "source": string } ``` +