E-P-T · arslansD · Jun 30, 2022 · Jun 30, 2022 · Jun 30, 2022 · Jun 30, 2022
diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/.idea/Final-Task.iml b/.idea/Final-Task.iml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/README.md b/README.md
@@ -1,28 +1,49 @@
-# How to create a PR with a homework task
+## RSS Reader
 
-1. Create fork from the following repo: https://github.com/E-P-T/Homework. (Docs: https://docs.github.com/en/get-started/quickstart/fork-a-repo )
-2. Clone your forked repo in your local folder.
-3. Create separate branches for each session.Example(`session_2`, `session_3` and so on)
-4. Create folder with you First and Last name in you forked repo in the created session.
-5. Add your task into created folder
-6. Push finished session task in the appropriate branch in accordance with written above.
- You should get the structure that looks something like that
+### Setup:
+#### Virtual Environment
+Create Virtual Environment\
+Linux: `virtualenv venv`\
+Windows: `python -m venv ./venv`\
 
+Activate Virtual Environment:\
+Linux: `source venv/bin/activate`\
+Windows: `./venv/Scripts/activate`\
+
+#### Pip Usage:
+Update pip:\
+`python -m pip install --upgrade pip`
+
+### Requirements: 
+Install requirements using: `pip install -r .\requirements.txt`
+
+### Run Application:
+Run `python ./rss_reader.py -h` to find available options
+
+### Cache
+Application stores RSS Feed using buildin pickle module, which is located in the root directory of the project.
+Particularly, we are using it to convert python object into byte stream to store it in our database.
+For more information regarding its usage, refer to the [official documentation](https://docs.python.org/3/library/pickle.html). 
+
+### Run Tests:
+Tests for this project can mainly be found in the fourth version/iteration of the task.
+
+### Package distributive:
+To install package distributive you can install sudo, to be able able to accept it as the system wide CLI.
+
+### Output format
+The project supports HTML, which means you are able to export news to the HTML5 format.
+
+### Json structure
+Json structure looks as follows:
 ```
- Branch: Session_2
-         DzmitryKolb
-              |___Task1.py
-              |___Task2.py
- Branch: Session_3
-         DzmitryKolb
-              |___Task1.py
-              |___Task2.py
+{
+  "title": string,
+  "date": datetime,
+  "link": string,
+  "image": string,
+  "channel": string,
+  "source": string
+}
 ```
 
-7. When you finish your work on task you should create Pull request to the appropriate branch of the main repo https://github.com/E-P-T/Homework (Docs: https://docs.github.com/en/github/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork).
-Please use the following instructions to prepare good description of the pull request:
-    - Pull request header should be: `Session <Number of the session> - <FirstName> <LastName>`.
-      Example: `Session 2 - Dzmitry Kolb`
-    - Pull request body: You should write here what tasks were implemented.
-      Example: `Finished: Task 1.2, Task 1.3, Task 1.6`
-
diff --git a/version1/__init__.py b/version1/__init__.py
diff --git a/version1/commands b/version1/commands
@@ -0,0 +1 @@
+pip install . -r requirements.txt
diff --git a/version1/requiremets.txt b/version1/requiremets.txt
@@ -0,0 +1,2 @@
+httpx==0.22.0
+
diff --git a/version1/rss_reader.py b/version1/rss_reader.py
@@ -0,0 +1,124 @@
+import argparse
+import datetime
+import json
+import xml.etree.ElementTree as ET
+from typing import Optional
+
+import httpx
+
+PARSER = argparse.ArgumentParser(description='Pure Python command-line RSS reader.', prog="RSS Reader")
+PARSER.add_argument('source', type=str, help="RSS URL")
+PARSER.add_argument('--limit', type=int, help="Limit news topics if this parameter provided")
+PARSER.add_argument('--version', action='version', version='%(prog)s 1.1')
+PARSER.add_argument('--json', action='count', default=0, help="Print result as JSON in stdout")
+PARSER.add_argument('--verbose', action='count', default=0, help="Outputs verbose status messages")
+
+DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
+
+
+class RSSNews:
+
+    def __init__(
+            self,
+            title: str,
+            link: str,
+            pubDate: str,
+            source: str,
+            channel: str,
+            *args, **kwargs,
+    ):
+        self.pubDate = datetime.datetime.strptime(pubDate, DATETIME_FORMAT)
+        self.title = title
+        self.link = link
+        self.source = source
+        self.channel = channel
+
+    def to_dict(self):
+        return {
+            "pubDate": self.pubDate.strftime(DATETIME_FORMAT),
+            "title": self.title,
+            "link": self.link,
+            "source": self.source,
+            "channel": self.channel,
+        }
+
+
+class RSS20Parser:
+
+    def __init__(self, xml_tree: ET.Element, limit: int, verbose: bool):
+        self.xml_tree = xml_tree
+        self.limit = limit
+        self.verbose = verbose
+
+    def parse(self) -> list[RSSNews]:
+        if self.verbose:
+            print("LOG: Parsing the data of the RSS 2.0 format")
+        news = []
+        channel = self.xml_tree.find("channel/title").text
+        for i, item in enumerate(self.xml_tree.findall("./channel/item")):
+            if self.limit and i == self.limit:
+                return news
+            single_news = {}
+            for elem in item:
+                try:
+                    single_news[elem.tag] = elem.text
+                except AttributeError:
+                    pass
+
+            news.append(RSSNews(**single_news, channel=channel))
+        print("LOG: Finished parsing")
+        return news
+
+
+def get_xml_response(url: str, verbose: bool):
+    if verbose:
+        print(f"LOG: Querying data from source: {url}")
+    response = httpx.get(url)
+    if verbose:
+        print(f"LOG: Queried data from source: {url}")
+    return response.text
+
+
+def parse_xml(xml_data: str, verbose: bool, limit: Optional[int] = None) -> list[RSSNews]:
+    root = ET.fromstring(xml_data)
+    if verbose:
+        print("LOG: Starting parser block")
+    match root.attrib['version']:
+        case '2.0':
+            parser = RSS20Parser(root, limit, verbose)
+            data = parser.parse()
+            return data
+        case _:
+            print("Not a valid or supported xml RSS feed!")
+            return []
+
+
+def format_console_text(news: list[RSSNews]):
+    result = ""
+    for i in news:
+        result += f"""\nFeed: {i.channel}\n\nTitle: {i.title}\nDate: {i.pubDate}\nLink: {i.link}\n"""
+    return result
+
+
+def format_json(news: list[RSSNews]):
+    return json.dumps([i.to_dict() for i in news])
+
+
+def main():
+    args = PARSER.parse_args()
+    verbose = args.verbose
+    json_output = args.json
+    result = get_xml_response(args.source, verbose)
+    news = parse_xml(xml_data=result, limit=args.limit, verbose=verbose)
+    if not json_output:
+        if verbose:
+            print("LOG: Preparing text formatted output")
+        print(format_console_text(news))
+    else:
+        if verbose:
+            print("LOG: Preparing json formatted output")
+        print(format_json(news))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/version2/__init__.py b/version2/__init__.py
diff --git a/version2/commands b/version2/commands
@@ -0,0 +1 @@
+pip install .
diff --git a/version2/requirements.txt b/version2/requirements.txt
@@ -0,0 +1 @@
+httpx==0.22.0
diff --git a/version2/rss_reader.py b/version2/rss_reader.py
@@ -0,0 +1,122 @@
+import argparse
+import datetime
+import json
+import xml.etree.ElementTree as ET
+from typing import Optional
+
+import httpx
+
+PARSER = argparse.ArgumentParser(description='Pure Python command-line RSS reader.', prog="RSS Reader")
+PARSER.add_argument('source', type=str, help="RSS URL")
+PARSER.add_argument('--limit', type=int, help="Limit news topics if this parameter provided")
+PARSER.add_argument('--version', action='version', version='%(prog)s 1.1')
+PARSER.add_argument('--json', action='count', default=0, help="Print result as JSON in stdout")
+PARSER.add_argument('--verbose', action='count', default=0, help="Outputs verbose status messages")
+
+DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
+
+
+class RSSNews:
+
+    def __init__(
+            self,
+            title: str,
+            link: str,
+            pubDate: str,
+            source: str,
+            channel: str,
+            *args, **kwargs,
+    ):
+        self.pubDate = datetime.datetime.strptime(pubDate, DATETIME_FORMAT)
+        self.title = title
+        self.link = link
+        self.source = source
+        self.channel = channel
+
+    def to_dict(self):
+        return {
+            "pub_date": self.pubDate.strftime(DATETIME_FORMAT),
+            "title": self.title,
+            "link": self.link,
+            "source": self.source,
+            "channel": self.channel,
+        }
+
+
+class RSS20Parser:
+
+    def __init__(self, xml_tree: ET.Element, limit: int, verbose: bool):
+        self.xml_tree = xml_tree
+        self.limit = limit
+        self.verbose = verbose
+
+    def parse(self) -> list[RSSNews]:
+        if self.verbose:
+            print("LOG: Parsing the data of the RSS 2.0 format")
+        news = []
+        channel = self.xml_tree.find("channel/title").text
+        for i, item in enumerate(self.xml_tree.findall("./channel/item")):
+            if self.limit and i == self.limit:
+                return news
+            single_news = {}
+            for elem in item:
+                try:
+                    single_news[elem.tag] = elem.text.encode('utf8')
+                except AttributeError:
+                    pass
+
+            news.append(RSSNews(**single_news, channel=channel))
+        print("LOG: Finished parsing")
+        return news
+
+
+def get_xml_response(url: str, verbose: bool):
+    if verbose:
+        print(f"LOG: Querying data from source: {url}")
+    response = httpx.get(url)
+    if verbose:
+        print(f"LOG: Queried data from source: {url}")
+    return response.text
+
+
+def parse_xml(xml_data: str | bytes, verbose: bool, limit: Optional[int] = None) -> list[RSSNews]:
+    root = ET.fromstring(xml_data)
+    if verbose:
+        print("LOG: Starting parser block")
+    match root.attrib['version']:
+        case '2.0':
+            parser = RSS20Parser(root, limit, verbose)
+            data = parser.parse()
+            return data
+        case _:
+            print("Not a valid or supported xml RSS feed!")
+
+
+def format_console_text(news: list[RSSNews]):
+    for i in news:
+        result = f"""\nFeed: {i.channel}\n\nTitle: {i.title}\nDate: {i.pubDate}\nLink: {i.link}\n"""
+        print(result)
+
+
+def format_json(news: list[RSSNews]):
+    return json.dumps([i.to_dict() for i in news])
+
+
+def main():
+    args = PARSER.parse_args()
+    verbose = args.verbose
+    json_output = args.json
+    result = get_xml_response(args.source, verbose)
+    news = parse_xml(xml_data=result, limit=args.limit, verbose=verbose)
+    if not json_output:
+        if verbose:
+            print("LOG: Preparing text formatted output")
+        format_console_text(news)
+    else:
+        if verbose:
+            print("LOG: Preparing json formatted output")
+        print(format_json(news))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/version2/setup.py b/version2/setup.py
@@ -0,0 +1,20 @@
+import setuptools
+
+with open("requirements.txt", "r", encoding="utf-8") as fh:
+    requirements = fh.read()
+
+setuptools.setup(
+    include_package_data=True,
+    name="RSS Reader",
+    version="2.0.0",
+    description="Pure Python command-line RSS reader.",
+    author="arslan",
+    packages=setuptools.find_packages(),
+    install_requires=[requirements],
+    py_modules=["rss_reader"],
+    entry_points='''
+    [console_scripts]
+    rss_reader=rss_reader:main
+    ''',
+    python_requires='>=3.9'
+)
diff --git a/version3/__init__.py b/version3/__init__.py