From d62e05bc4e2c723f3dc2cab50bc98f6ff752ed4a Mon Sep 17 00:00:00 2001 From: Valery Litskevich Date: Tue, 24 May 2022 18:57:26 +0500 Subject: [PATCH 01/10] Add final task --- Final_Task.md | 195 ++++++++++++++++++++++++++++++++++++++++++++++++++ RULES.md | 12 ++++ 2 files changed, 207 insertions(+) create mode 100644 Final_Task.md create mode 100644 RULES.md diff --git a/Final_Task.md b/Final_Task.md new file mode 100644 index 00000000..2e2e618a --- /dev/null +++ b/Final_Task.md @@ -0,0 +1,195 @@ +# Introduction to Python. Final task. +You are proposed to implement Python RSS-reader using **python 3.9**. + +The task consists of few iterations. Do not start new iteration if the previous one is not implemented yet. + +## Common requirements. +* It is mandatory to use `argparse` module. +* Codebase must be covered with unit tests with at least 50% coverage. It's a mandatory requirement. +* Yor script should **not** require installation of other services such as mysql server, +postgresql and etc. (except Iteration 6). If it does require such programs, +they should be installed automatically by your script, without user doing anything. +* In case of any mistakes utility should print human-readable. +error explanation. Exception tracebacks in stdout are prohibited in final version of application. +* Docstrings are mandatory for all methods, classes, functions and modules. +* Code must correspond to `pep8` (use `pycodestyle` utility for self-check). + * You can set line length up to 120 symbols. +* Commit messages should provide correct and helpful information about changes in commit. Messages like `Fix bug`, +`Tried to make workable`, `Temp commit` and `Finally works` are prohibited. +* All used third-party packages should be written in the `requirements.txt` file and in installation files (`setup.py`, `setup.cfg`, etc.). +* You have to write a file with documentation. Everything must be documented: how to run scripts, how to run tests, how to install the library and etc. + +## [Iteration 1] One-shot command-line RSS reader. +RSS reader should be a command-line utility which receives [RSS](wikipedia.org/wiki/RSS) URL and prints results in human-readable format. + +You are free to choose format of the news console output. The textbox below provides an example of how it can be implemented: + +```shell +$ rss_reader.py "https://news.yahoo.com/rss/" --limit 1 + +Feed: Yahoo News - Latest News & Headlines + +Title: Nestor heads into Georgia after tornados damage Florida +Date: Sun, 20 Oct 2019 04:21:44 +0300 +Link: https://news.yahoo.com/wet-weekend-tropical-storm-warnings-131131925.html + +[image 2: Nestor heads into Georgia after tornados damage Florida][2]Nestor raced across Georgia as a post-tropical cyclone late Saturday, hours after the former tropical storm spawned a tornado that damaged +homes and a school in central Florida while sparing areas of the Florida Panhandle devastated one year earlier by Hurricane Michael. The storm made landfall Saturday on St. Vincent Island, a nature preserve +off Florida's northern Gulf Coast in a lightly populated area of the state, the National Hurricane Center said. Nestor was expected to bring 1 to 3 inches of rain to drought-stricken inland areas on its +march across a swath of the U.S. Southeast. + + +Links: +[1]: https://news.yahoo.com/wet-weekend-tropical-storm-warnings-131131925.html (link) +[2]: http://l2.yimg.com/uu/api/res/1.2/Liyq2kH4HqlYHaS5BmZWpw--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs-/https://media.zenfs.com/en/ap.org/5ecc06358726cabef94585f99050f4f0 (image) + +``` + +Utility should provide the following interface: +```shell +usage: rss_reader.py [-h] [--version] [--json] [--verbose] [--limit LIMIT] + source + +Pure Python command-line RSS reader. + +positional arguments: + source RSS URL + +optional arguments: + -h, --help show this help message and exit + --version Print version info + --json Print result as JSON in stdout + --verbose Outputs verbose status messages + --limit LIMIT Limit news topics if this parameter provided + +``` + +In case of using `--json` argument your utility should convert the news into [JSON](https://en.wikipedia.org/wiki/JSON) format. +You should come up with the JSON structure on you own and describe it in the README.md file for your repository or in a separate documentation file. + + + +With the argument `--verbose` your program should print all logs in stdout. + +### Task clarification (I) + +1) If `--version` option is specified app should _just print its version_ and stop. +2) User should be able to use `--version` option without specifying RSS URL. For example: +``` +> python rss_reader.py --version +"Version 1.4" +``` +3) The version is supposed to change with every iteration. +4) If `--limit` is not specified, then user should get _all_ available feed. +5) If `--limit` is larger than feed size then user should get _all_ available news. +6) `--verbose` should print logs _in the process_ of application running, _not after everything is done_. +7) Make sure that your app **has no encoding issues** (meaning symbols like `'` and etc) when printing news to _stdout_. +8) Make sure that your app **has no encoding issues** (meaning symbols like `'` and etc) when printing news to _stdout in JSON format_. +9) It is preferrable to have different custom exceptions for different situations(If needed). +10) The `--limit` argument should also affect JSON generation. + + +## [Iteration 2] Distribution. + +* Utility should be wrapped into distribution package with `setuptools`. +* This package should export CLI utility named `rss-reader`. + + +### Task clarification (II) + +1) User should be able to run your application _both_ with and without installation of CLI utility, +meaning that this should work: + +``` +> python rss_reader.py ... +``` + +as well as this: + +``` +> rss_reader ... +``` +2) Make sure your second iteration works on a clean machie with python 3.9. (!) +3) Keep in mind that installed CLI utility should have the same functionality, so do not forget to update dependencies and packages. + + +## [Iteration 3] News caching. +The RSS news should be stored in a local storage while reading. The way and format of this storage you can choose yourself. +Please describe it in a separate section of README.md or in the documentation. + +New optional argument `--date` must be added to your utility. It should take a date in `%Y%m%d` format. +For example: `--date 20191020` +Here date means actual *publishing date* not the date when you fetched the news. + +The cashed news can be read with it. The new from the specified day will be printed out. +If the news are not found return an error. + +If the `--date` argument is not provided, the utility should work like in the previous iterations. + +### Task clarification (III) +1) Try to make your application crossplatform, meaning that it should work on both Linux and Windows. +For example when working with filesystem, try to use `os.path` lib instead of manually concatenating file paths. +2) `--date` should **not** require internet connection to fetch news from local cache. +3) User should be able to use `--date` without specifying RSS source. For example: +``` +> python rss_reader.py --date 20191206 +...... +``` +Or for second iteration (when installed using setuptools): +``` +> rss_reader --date 20191206 +...... +``` +4) If `--date` specified _together with RSS source_, then app should get news _for this date_ from local cache that _were fetched from specified source_. +5) `--date` should work correctly with both `--json`, `--limit`, `--verbose` and their different combinations. + +## [Iteration 4] Format converter. + +You should implement the conversion of news in at least two of the suggested format: `.mobi`, `.epub`, `.fb2`, `.html`, `.pdf` + +New optional argument must be added to your utility. This argument receives the path where new file will be saved. The arguments should represents which format will be generated. + +For example: `--to-mobi` or `--to-fb2` or `--to-epub` + +You can choose yourself the way in which the news will be displayed, but the final text result should contain pictures and links, if they exist in the original article and if the format permits to store this type of data. + +### Task clarification (IV) + +Convertation options should work correctly together with all arguments that were implemented in Iterations 1-3. For example: +* Format convertation process should be influenced by `--limit`. +* If `--json` is specified together with convertation options, then JSON news should +be printed to stdout, and converted file should contain news in normal format. +* Logs from `--verbose` should be printed in stdout and not added to the resulting file. +* `--date` should also work correctly with format converter and to not require internet access. + +## * [Iteration 5] Output colorization. +> Note: An optional iteration, it is not necessary to implement it. You can move on with it only if all the previous iterations (from 1 to 4) are completely implemented. + +You should add new optional argument `--colorize`, that will print the result of the utility in colorized mode. + +*If the argument is not provided, the utility should work like in the previous iterations.* + +> Note: Take a look at the [colorize](https://pypi.org/project/colorize/) library + +## * [Iteration 6] Web-server. +> Note: An optional iteration, it is not necessary to implement it. You can move on with it only if all the previous iterations (from 1 to 4) are completely implemented. Introduction to Python course does not cover the topics that are needed for the implementation of this part. + +There are several mandatory requirements in this iteration: +* `Docker` + `docker-compose` usage (at least 2 containers: one for web-application, one for DB) +* Web application should provide all the implemented in the previous parts of the task functionality, using the REST API: + * One-shot conversion from RSS to Human readable format + * Server-side news caching + * Conversion in epub, mobi, fb2 or other formats + +Feel free to choose the way of implementation, libraries and frameworks. (We suggest you `Django Rest Framework` + `PostgreSQL` combination) + +You can implement any functionality that you want. The only requirement is to add the description into README file or update project documentation, for example: +* authorization/authentication +* automatic scheduled news update +* adding new RSS sources using API + +--- +Implementations will be checked with the latest cPython interpreter of 3.9 branch. +--- + +> Always code as if the guy who ends up maintaining your code will be a violent psychopath who knows where you live. Code for readability. **John F. Woods** diff --git a/RULES.md b/RULES.md new file mode 100644 index 00000000..9a72034f --- /dev/null +++ b/RULES.md @@ -0,0 +1,12 @@ +# Final task +Final task (`FT`) for EPAM Python Training 2022.03 + +## Rules +* All work has to be implemented in the `master` branch in forked repository. If you think that `FT` is ready, please open a pull request (`PR`) to our repo. +* When a `PR` will be ready, please mark it with the `final_task` label. +* You have one month to finish `FT`. Commits commited after deadline will be ignored. +* At least the first 4 iterations must be done. +* `FT` you can find in the `Final_Task.md` file. + +### Good luck! + From 4dae39b177b468db9bbe780ebe97c92ee1aaf902 Mon Sep 17 00:00:00 2001 From: 0iskak Date: Sun, 12 Jun 2022 01:08:13 +0600 Subject: [PATCH 02/10] Implemented Iteration 1 and 2 --- rss_reader/setup.py | 18 ++++++++++ rss_reader/src/__init__.py | 0 rss_reader/src/args/Arguments.py | 33 ++++++++++++++++++ rss_reader/src/args/__init__.py | 0 rss_reader/src/info.py | 3 ++ rss_reader/src/news/Item.py | 30 +++++++++++++++++ rss_reader/src/news/News.py | 22 ++++++++++++ rss_reader/src/news/__init__.py | 0 rss_reader/src/rss_reader.py | 39 ++++++++++++++++++++++ rss_reader/src/util/Color.py | 11 ++++++ rss_reader/src/util/Logger.py | 57 ++++++++++++++++++++++++++++++++ rss_reader/src/util/Util.py | 48 +++++++++++++++++++++++++++ rss_reader/src/util/__init__.py | 0 rss_reader/test/__init__.py | 0 rss_reader/test/test_Item.py | 27 +++++++++++++++ rss_reader/test/test_News.py | 34 +++++++++++++++++++ rss_reader/test/test_Util.py | 18 ++++++++++ 17 files changed, 340 insertions(+) create mode 100644 rss_reader/setup.py create mode 100644 rss_reader/src/__init__.py create mode 100644 rss_reader/src/args/Arguments.py create mode 100644 rss_reader/src/args/__init__.py create mode 100644 rss_reader/src/info.py create mode 100644 rss_reader/src/news/Item.py create mode 100644 rss_reader/src/news/News.py create mode 100644 rss_reader/src/news/__init__.py create mode 100644 rss_reader/src/rss_reader.py create mode 100644 rss_reader/src/util/Color.py create mode 100644 rss_reader/src/util/Logger.py create mode 100644 rss_reader/src/util/Util.py create mode 100644 rss_reader/src/util/__init__.py create mode 100644 rss_reader/test/__init__.py create mode 100644 rss_reader/test/test_Item.py create mode 100644 rss_reader/test/test_News.py create mode 100644 rss_reader/test/test_Util.py diff --git a/rss_reader/setup.py b/rss_reader/setup.py new file mode 100644 index 00000000..f57dda73 --- /dev/null +++ b/rss_reader/setup.py @@ -0,0 +1,18 @@ +from setuptools import setup, find_packages + +from src.info import shortname, version + +setup( + name=shortname, + version=str(version), + package_dir={'': 'src'}, + packages=find_packages('src').append(''), + install_requires=[ + 'python-dateutil', + ], + entry_points={ + 'console_scripts': [ + 'rss_reader = rss_reader:main', + ] + } +) diff --git a/rss_reader/src/__init__.py b/rss_reader/src/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/rss_reader/src/args/Arguments.py b/rss_reader/src/args/Arguments.py new file mode 100644 index 00000000..2515e5c8 --- /dev/null +++ b/rss_reader/src/args/Arguments.py @@ -0,0 +1,33 @@ +from argparse import ArgumentParser + + +class Arguments: + """ + Class-wrapper for ArgumentParser + """ + + source: str + json: bool + verbose: bool + limit: int + + def __init__(self, name: str, version: float): + parser = ArgumentParser(description=name) + + source = 'source' + json = 'json' + verbose = 'verbose' + limit = 'limit' + + parser.add_argument(source, type=str, help='RSS URL') + parser.add_argument('--version', action='version', version=f'v{version}') + parser.add_argument(f'--{json}', action='store_true', help='show in JSON format') + parser.add_argument(f'--{verbose}', action='store_true', help='show detailed information') + parser.add_argument(f'--{limit}', type=int, help='limit the items') + + args = parser.parse_args().__dict__ + + self.source = args.get(source) + self.json = args.get(json) + self.verbose = args.get(verbose) + self.limit = args.get(limit) diff --git a/rss_reader/src/args/__init__.py b/rss_reader/src/args/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/rss_reader/src/info.py b/rss_reader/src/info.py new file mode 100644 index 00000000..b995feef --- /dev/null +++ b/rss_reader/src/info.py @@ -0,0 +1,3 @@ +name: str = 'Pure Python command-line RSS reader' +shortname: str = 'RSS Reader' +version: float = 0.1 diff --git a/rss_reader/src/news/Item.py b/rss_reader/src/news/Item.py new file mode 100644 index 00000000..fe24493f --- /dev/null +++ b/rss_reader/src/news/Item.py @@ -0,0 +1,30 @@ +from datetime import datetime +from xml.etree.ElementTree import Element + +from dateutil.parser import parse as parse_date + + +class Item: + """ + Class for a news item. + """ + title: str + date: datetime + link: str + images: list[str] + + @staticmethod + def parse(element: Element) -> 'Item': + """ + Parse an XML element into a news item. + """ + item = Item() + item.title = element.find('title').text + item.date = parse_date(element.find('pubDate').text) + item.link = element.find('link').text + item.images = [ + image.attrib['url'] for image in + element.findall('*[@width][@height]') + ] + + return item diff --git a/rss_reader/src/news/News.py b/rss_reader/src/news/News.py new file mode 100644 index 00000000..e1a016c1 --- /dev/null +++ b/rss_reader/src/news/News.py @@ -0,0 +1,22 @@ +from xml.etree.ElementTree import Element + +from .Item import Item + + +class News: + """ + News class + """ + feed: str + items: list[Item] + + @staticmethod + def parse(element: Element, limit: int = None) -> 'News': + """ + Parse XML element to News object + """ + news = News() + news.feed = element.find('title').text + news.items = [Item.parse(item) for item in element.findall('item')[:limit]] + + return news diff --git a/rss_reader/src/news/__init__.py b/rss_reader/src/news/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/rss_reader/src/rss_reader.py b/rss_reader/src/rss_reader.py new file mode 100644 index 00000000..75224683 --- /dev/null +++ b/rss_reader/src/rss_reader.py @@ -0,0 +1,39 @@ +from urllib.error import URLError +from xml.etree.ElementTree import ParseError + +from args.Arguments import Arguments +from info import name, version +from news.News import News +from util.Logger import Logger +from util.Util import Util + + +def main() -> None: + """ + Main function of the program. + """ + + args = Arguments(name, version) + + logger = Logger(args.verbose) + logger.info(f'Downloading from {args.source}') + try: + element = Util.url_to_element(args.source) + except (URLError, ValueError): + logger.error('Invalid URL') + return + except ParseError: + logger.error('Invalid XML') + return + + logger.info('Parsing XML') + news = News.parse(element, args.limit) + + print( + Util.to_json(news) if args.json + else Util.to_str(news) + ) + + +if __name__ == '__main__': + main() diff --git a/rss_reader/src/util/Color.py b/rss_reader/src/util/Color.py new file mode 100644 index 00000000..bfddc438 --- /dev/null +++ b/rss_reader/src/util/Color.py @@ -0,0 +1,11 @@ +from enum import Enum + + +class Color(Enum): + """ + Enum for color. + """ + RED = '\033[91m' + GREEN = '\033[92m' + YELLOW = '\033[93m' + END = '\033[0m' diff --git a/rss_reader/src/util/Logger.py b/rss_reader/src/util/Logger.py new file mode 100644 index 00000000..4757584c --- /dev/null +++ b/rss_reader/src/util/Logger.py @@ -0,0 +1,57 @@ +from enum import Enum + +from .Color import Color +from .Util import Util + + +class Logger: + """ + Logger class + """ + + class Level(Enum): + """ + Enum-class for logging levels + """ + INFO = Color.GREEN + WARNING = Color.YELLOW + ERROR = Color.RED + + def __init__(self, verbose: bool): + self.logger = print if verbose else lambda *args: None + self.force_logger = print + + def log(self, message: str, level: Level) -> None: + """ + Logs a message with a given level + logger is print if verbose is True + else is a lambda function that does nothing + """ + message = f'[{level.name}] {message}' + self.logger(Util.colorize(message, level.value)) + + def force_log(self, message: str, level: Level) -> None: + """ + Logs a message with a given level + but prints even if verbose is False + """ + message = f'[{level.name}] {message}' + self.force_logger(Util.colorize(message, level.value)) + + def info(self, message: str) -> None: + """ + Logs a message with INFO level + """ + self.log(message, Logger.Level.INFO) + + def warning(self, message: str) -> None: + """ + Logs a message with WARNING level + """ + self.log(message, Logger.Level.WARNING) + + def error(self, message: str) -> None: + """ + Logs a message with ERROR level + """ + self.force_log(message, Logger.Level.ERROR) diff --git a/rss_reader/src/util/Util.py b/rss_reader/src/util/Util.py new file mode 100644 index 00000000..8302b752 --- /dev/null +++ b/rss_reader/src/util/Util.py @@ -0,0 +1,48 @@ +import json +import re +from urllib.request import urlopen +from xml.etree import ElementTree as ET + +from .Color import Color + + +class Util: + indent = 2 + + @staticmethod + def url_to_element(source: str) -> ET.Element: + """ + Returns an ElementTree.Element from a URL. + """ + with urlopen(source) as file: + return ET.parse(file).getroot().find('channel') + + @staticmethod + def to_json(obj: object) -> str: + """ + Returns a JSON string from an object. + """ + return json.dumps( + obj, indent=Util.indent, default=lambda o: + o.__dict__ if hasattr(o, '__dict__') else str(o) + ) + + @staticmethod + def to_str(obj: object) -> str: + """ + Returns a string from an object. + """ + string = Util.to_json(obj) + string = re.sub(r'^\s*[{}],?\s?', '', string, flags=re.MULTILINE) + string = re.sub(r'^\s{' + str(Util.indent) + '}', '', string, flags=re.MULTILINE) + string = re.sub(r'"(\w+)"', lambda match: match.group(1).capitalize(), string, flags=re.MULTILINE) + string = re.sub(r'"(.+)",?', lambda match: match.group(1), string, flags=re.MULTILINE) + string = re.sub(r'[\[\]]', '', string, flags=re.MULTILINE) + return string.strip() + + @staticmethod + def colorize(message: str, color: Color) -> str: + """ + Returns a string with colorized text. + """ + return f'{color.value}{message}{Color.END.value}' diff --git a/rss_reader/src/util/__init__.py b/rss_reader/src/util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/rss_reader/test/__init__.py b/rss_reader/test/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/rss_reader/test/test_Item.py b/rss_reader/test/test_Item.py new file mode 100644 index 00000000..96c55e75 --- /dev/null +++ b/rss_reader/test/test_Item.py @@ -0,0 +1,27 @@ +from unittest import TestCase +from xml.etree.ElementTree import fromstring + +from dateutil.parser import parse as parse_date + +from rss_reader.src.news.Item import Item + + +class TestItem(TestCase): + def test_parse(self): + title = 'Title' + link = 'https://example.com' + date = 'Mon, 1 Jan 2000 12:00:00 GMT' + + rss = f''' + + {title} + {link} + {date} + + ''' + + item = Item.parse(fromstring(rss)) + + self.assertEqual(item.title, title) + self.assertEqual(item.link, link) + self.assertEqual(item.date, parse_date(date)) diff --git a/rss_reader/test/test_News.py b/rss_reader/test/test_News.py new file mode 100644 index 00000000..9ed4538b --- /dev/null +++ b/rss_reader/test/test_News.py @@ -0,0 +1,34 @@ +from unittest import TestCase +from xml.etree.ElementTree import fromstring + +from dateutil.parser import parse as parse_date + +from rss_reader.src.news.News import News + + +class TestNews(TestCase): + def test_parse(self): + feed = 'RSS Title' + title = 'Title' + link = 'https://example.com' + date = 'Mon, 1 Jan 2000 12:00:00 GMT' + + rss = f''' + + {feed} + + {title} + {link} + {date} + + + ''' + + news = News.parse(fromstring(rss)) + self.assertEqual(news.feed, feed) + self.assertEqual(len(news.items), 1) + + item = news.items[0] + self.assertEqual(item.title, title) + self.assertEqual(item.link, link) + self.assertEqual(item.date, parse_date(date)) diff --git a/rss_reader/test/test_Util.py b/rss_reader/test/test_Util.py new file mode 100644 index 00000000..581d8e6e --- /dev/null +++ b/rss_reader/test/test_Util.py @@ -0,0 +1,18 @@ +import json +from unittest import TestCase + +from rss_reader.src.util.Util import Util + + +class TestUtil(TestCase): + def test_to_json(self): + obj = { + 'title': 'Title', + 'link': 'Link', + 'description': 'Description' + } + + self.assertEqual( + obj, + json.loads(Util.to_json(obj)) + ) From 268897de3785ae92eba273df7a64e56115318f66 Mon Sep 17 00:00:00 2001 From: 0iskak Date: Sun, 12 Jun 2022 04:38:05 +0600 Subject: [PATCH 03/10] Implemented Iteration 3 --- rss_reader/src/args/Arguments.py | 18 ++++++- rss_reader/src/news/Item.py | 33 ++++++++++--- rss_reader/src/news/News.py | 29 +++++++++-- rss_reader/src/rss_reader.py | 83 ++++++++++++++++++++++++++++---- rss_reader/test/test_Item.py | 17 +++++++ rss_reader/test/test_News.py | 26 ++++++++++ 6 files changed, 184 insertions(+), 22 deletions(-) diff --git a/rss_reader/src/args/Arguments.py b/rss_reader/src/args/Arguments.py index 2515e5c8..0dacd7ee 100644 --- a/rss_reader/src/args/Arguments.py +++ b/rss_reader/src/args/Arguments.py @@ -1,4 +1,5 @@ from argparse import ArgumentParser +from datetime import datetime class Arguments: @@ -6,10 +7,11 @@ class Arguments: Class-wrapper for ArgumentParser """ - source: str + source: str = None json: bool verbose: bool limit: int + date: datetime = None def __init__(self, name: str, version: float): parser = ArgumentParser(description=name) @@ -18,12 +20,14 @@ def __init__(self, name: str, version: float): json = 'json' verbose = 'verbose' limit = 'limit' + date = 'date' - parser.add_argument(source, type=str, help='RSS URL') + parser.add_argument('source', type=str, help='RSS URL', nargs='?', default=None) parser.add_argument('--version', action='version', version=f'v{version}') parser.add_argument(f'--{json}', action='store_true', help='show in JSON format') parser.add_argument(f'--{verbose}', action='store_true', help='show detailed information') parser.add_argument(f'--{limit}', type=int, help='limit the items') + parser.add_argument(f'--{date}', type=str, help='show cached items from specified date') args = parser.parse_args().__dict__ @@ -31,3 +35,13 @@ def __init__(self, name: str, version: float): self.json = args.get(json) self.verbose = args.get(verbose) self.limit = args.get(limit) + + date_str = args.get(date) + + if self.source is None and date_str is None: + parser.error(f'the following arguments are required: {source} or {date}') + elif date_str is not None: + try: + self.date = datetime.strptime(date_str, '%Y%m%d') + except ValueError: + parser.error(f'date must be in %Y%m%d format') diff --git a/rss_reader/src/news/Item.py b/rss_reader/src/news/Item.py index fe24493f..645c8b78 100644 --- a/rss_reader/src/news/Item.py +++ b/rss_reader/src/news/Item.py @@ -13,18 +13,39 @@ class Item: link: str images: list[str] + def __init__(self, title: str, date: datetime, link: str, images: list[str]): + self.title = title + self.date = date + self.link = link + self.images = images + @staticmethod def parse(element: Element) -> 'Item': """ Parse an XML element into a news item. """ - item = Item() - item.title = element.find('title').text - item.date = parse_date(element.find('pubDate').text) - item.link = element.find('link').text - item.images = [ + title = element.find('title').text + date = parse_date(element.find('pubDate').text) + link = element.find('link').text + images = [ image.attrib['url'] for image in element.findall('*[@width][@height]') ] - return item + return Item(title, date, link, images) + + @staticmethod + def parse_dict(item_dict: dict) -> 'Item': + """ + Parse a dictionary into a news item. + """ + + title = item_dict.get('title') + date = parse_date(item_dict.get('date')) + link = item_dict.get('link') + images = item_dict.get('images') + + return Item(title, date, link, images) + + def __eq__(self, o: object) -> bool: + return self.__dict__ == o.__dict__ diff --git a/rss_reader/src/news/News.py b/rss_reader/src/news/News.py index e1a016c1..c92be627 100644 --- a/rss_reader/src/news/News.py +++ b/rss_reader/src/news/News.py @@ -1,3 +1,4 @@ +from datetime import datetime from xml.etree.ElementTree import Element from .Item import Item @@ -10,13 +11,31 @@ class News: feed: str items: list[Item] + def __init__(self, feed: str, items: list[Item]): + self.feed = feed + self.items = items + @staticmethod - def parse(element: Element, limit: int = None) -> 'News': + def parse(element: Element, limit: int = None, date: datetime = None) -> 'News': """ Parse XML element to News object """ - news = News() - news.feed = element.find('title').text - news.items = [Item.parse(item) for item in element.findall('item')[:limit]] + feed = element.find('title').text + items = [Item.parse(item) for item in element.findall('item')] + if date is not None: + items = list(filter(lambda item: item.date.date() == date.date(), news.items)) + items = items[:limit] + + return News(feed, items) + + @staticmethod + def parse_dict(news_dict: dict) -> 'News': + """ + Parse a dictionary to News object + """ + feed = news_dict.get('feed') + + items = news_dict.get('items') + items = [Item.parse_dict(item) for item in items] - return news + return News(feed, items) diff --git a/rss_reader/src/rss_reader.py b/rss_reader/src/rss_reader.py index 75224683..081dcf23 100644 --- a/rss_reader/src/rss_reader.py +++ b/rss_reader/src/rss_reader.py @@ -1,3 +1,6 @@ +import json +import os +from pathlib import Path from urllib.error import URLError from xml.etree.ElementTree import ParseError @@ -8,14 +11,38 @@ from util.Util import Util -def main() -> None: +def get_cache(path: os.path) -> News: """ - Main function of the program. + Get the cache from the local file. """ + if os.path.exists(path): + with open(path, 'r') as file: + return News.parse_dict(json.loads(file.read())) + else: + news = News('Local cached feed', []) + return news - args = Arguments(name, version) - logger = Logger(args.verbose) +def save_cache(path: os.path, news: News, logger: Logger) -> None: + """ + Save the cache to the local file. + """ + logger.info('Saving to cache') + with open(path, 'w') as file: + file.write(Util.to_json(news)) + + +def print_news(news: News, args: Arguments) -> None: + print( + Util.to_json(news) if args.json + else Util.to_str(news) + ) + + +def from_url(logger: Logger, args: Arguments, path: os.path) -> None: + """ + Get news from a URL. + """ logger.info(f'Downloading from {args.source}') try: element = Util.url_to_element(args.source) @@ -27,12 +54,50 @@ def main() -> None: return logger.info('Parsing XML') - news = News.parse(element, args.limit) + news = News.parse(element, args.limit, args.date) - print( - Util.to_json(news) if args.json - else Util.to_str(news) - ) + print_news(news, args) + + cached = get_cache(path) + cached.items += news.items + + items = [] + for item in cached.items: + if item not in items: + items.append(item) + + cached.items = items + + save_cache(path, cached, logger) + + +def from_local(logger: Logger, args: Arguments, path: os.path) -> None: + """ + Get news from a local cache. + """ + logger.info('Getting items from local cache: no source specified') + news = get_cache(path) + items = [] + for item in news.items: + if item.date.date() == args.date.date(): + items.append(item) + items = items[:args.limit] + + print_news(News('Local cached feed', items), args) + + +def main() -> None: + """ + Main function of the program. + """ + args = Arguments(name, version) + logger = Logger(args.verbose) + path = os.path.join(Path(__file__).parent.parent, 'news.json') + + if args.source is None: + from_local(logger, args, path) + else: + from_url(logger, args, path) if __name__ == '__main__': diff --git a/rss_reader/test/test_Item.py b/rss_reader/test/test_Item.py index 96c55e75..71dc5408 100644 --- a/rss_reader/test/test_Item.py +++ b/rss_reader/test/test_Item.py @@ -25,3 +25,20 @@ def test_parse(self): self.assertEqual(item.title, title) self.assertEqual(item.link, link) self.assertEqual(item.date, parse_date(date)) + + def test_parse_dict(self): + title = 'Title' + link = 'https://example.com' + date = 'Mon, 1 Jan 2000 12:00:00 GMT' + + dict = { + 'title': title, + 'link': link, + 'date': date + } + + item = Item.parse_dict(dict) + + self.assertEqual(item.title, title) + self.assertEqual(item.link, link) + self.assertEqual(item.date, parse_date(date)) diff --git a/rss_reader/test/test_News.py b/rss_reader/test/test_News.py index 9ed4538b..4d28bbc4 100644 --- a/rss_reader/test/test_News.py +++ b/rss_reader/test/test_News.py @@ -32,3 +32,29 @@ def test_parse(self): self.assertEqual(item.title, title) self.assertEqual(item.link, link) self.assertEqual(item.date, parse_date(date)) + + def test_parse_dict(self): + feed = 'RSS Title' + title = 'Title' + link = 'https://example.com' + date = 'Mon, 1 Jan 2000 12:00:00 GMT' + + dict = { + 'feed': feed, + 'items': [ + { + 'title': title, + 'link': link, + 'date': date + } + ] + } + + news = News.parse_dict(dict) + self.assertEqual(news.feed, feed) + self.assertEqual(len(news.items), 1) + + item = news.items[0] + self.assertEqual(item.title, title) + self.assertEqual(item.link, link) + self.assertEqual(item.date, parse_date(date)) From 2911f4452fcfee43c58136344f44a3cd9ad99ef8 Mon Sep 17 00:00:00 2001 From: 0iskak Date: Sun, 12 Jun 2022 19:30:56 +0600 Subject: [PATCH 04/10] Implemented Iteration 4 --- rss_reader/src/args/ArgumentParser.py | 68 +++++++++++++++++++++++++++ rss_reader/src/args/Arguments.py | 47 ------------------ rss_reader/src/news/Item.py | 13 +++++ rss_reader/src/news/News.py | 25 +++++++++- rss_reader/src/rss_reader.py | 62 +++++++++++++++++------- rss_reader/src/util/Util.py | 12 +++++ 6 files changed, 161 insertions(+), 66 deletions(-) create mode 100644 rss_reader/src/args/ArgumentParser.py delete mode 100644 rss_reader/src/args/Arguments.py diff --git a/rss_reader/src/args/ArgumentParser.py b/rss_reader/src/args/ArgumentParser.py new file mode 100644 index 00000000..fb205c16 --- /dev/null +++ b/rss_reader/src/args/ArgumentParser.py @@ -0,0 +1,68 @@ +import argparse +import os +from datetime import datetime + + +class ArgumentParser(argparse.ArgumentParser): + """ + Class-wrapper for the argparse.ArgumentParser class + """ + + def __init__(self, name: str, version: float): + super().__init__(description=name) + + self.add_argument('source', type=str, help='RSS URL', nargs='?', default=None) + self.add_argument('--version', action='version', version=f'v{version}') + self.add_argument('--json', action='store_true', help='show in JSON format') + self.add_argument('--verbose', action='store_true', help='show detailed information') + self.add_argument('--limit', type=int, help='limit the items') + self.add_argument('--date', type=str, help='show cached items from specified date') + self.add_argument('--to-html', type=str, help='generate HTML file and save it to specified path') + self.add_argument('--to-pdf', type=str, help='generate PDF file and save it to specified path') + + class Arguments: + source: str + json: bool + verbose: bool + limit: int + date: datetime + html: str + pdf: str + + def __init__(self, source: str, json: bool, verbose: bool, limit: int, date: str, html: str, pdf: str): + if source is None and date is None: + raise ValueError('Either source or date must be specified') + + if date is not None: + try: + date = datetime.strptime(date, '%Y%m%d') + except ValueError: + raise ValueError(f'date must be in %Y%m%d format') + + self.assert_file(html, '.html') + self.assert_file(pdf, '.pdf') + + self.source = source + self.json = json + self.verbose = verbose + self.limit = limit + self.date = date + self.html = html + self.pdf = pdf + + @staticmethod + def assert_file(path: str, ext: str) -> None: + if path is not None: + if not os.path.isabs(path): + raise ValueError(f'{path} must be an absolute path') + if not path.endswith(ext): + raise ValueError(f'{path} must have {ext} extension') + + def parse(self) -> Arguments: + """ + Parses the arguments. + """ + try: + return self.Arguments(*self.parse_args().__dict__.values()) + except ValueError as e: + self.error(str(e)) diff --git a/rss_reader/src/args/Arguments.py b/rss_reader/src/args/Arguments.py deleted file mode 100644 index 0dacd7ee..00000000 --- a/rss_reader/src/args/Arguments.py +++ /dev/null @@ -1,47 +0,0 @@ -from argparse import ArgumentParser -from datetime import datetime - - -class Arguments: - """ - Class-wrapper for ArgumentParser - """ - - source: str = None - json: bool - verbose: bool - limit: int - date: datetime = None - - def __init__(self, name: str, version: float): - parser = ArgumentParser(description=name) - - source = 'source' - json = 'json' - verbose = 'verbose' - limit = 'limit' - date = 'date' - - parser.add_argument('source', type=str, help='RSS URL', nargs='?', default=None) - parser.add_argument('--version', action='version', version=f'v{version}') - parser.add_argument(f'--{json}', action='store_true', help='show in JSON format') - parser.add_argument(f'--{verbose}', action='store_true', help='show detailed information') - parser.add_argument(f'--{limit}', type=int, help='limit the items') - parser.add_argument(f'--{date}', type=str, help='show cached items from specified date') - - args = parser.parse_args().__dict__ - - self.source = args.get(source) - self.json = args.get(json) - self.verbose = args.get(verbose) - self.limit = args.get(limit) - - date_str = args.get(date) - - if self.source is None and date_str is None: - parser.error(f'the following arguments are required: {source} or {date}') - elif date_str is not None: - try: - self.date = datetime.strptime(date_str, '%Y%m%d') - except ValueError: - parser.error(f'date must be in %Y%m%d format') diff --git a/rss_reader/src/news/Item.py b/rss_reader/src/news/Item.py index 645c8b78..808a783b 100644 --- a/rss_reader/src/news/Item.py +++ b/rss_reader/src/news/Item.py @@ -49,3 +49,16 @@ def parse_dict(item_dict: dict) -> 'Item': def __eq__(self, o: object) -> bool: return self.__dict__ == o.__dict__ + + def to_html(self) -> str: + images = '\n'.join(list(map(lambda image: f'', self.images))) + + return f''' +
+
+
Title: {self.title}
+
Date: {self.date}
+
Link: {self.link}
+ {images} +
+ ''' diff --git a/rss_reader/src/news/News.py b/rss_reader/src/news/News.py index c92be627..b02d94dd 100644 --- a/rss_reader/src/news/News.py +++ b/rss_reader/src/news/News.py @@ -23,7 +23,7 @@ def parse(element: Element, limit: int = None, date: datetime = None) -> 'News': feed = element.find('title').text items = [Item.parse(item) for item in element.findall('item')] if date is not None: - items = list(filter(lambda item: item.date.date() == date.date(), news.items)) + items = list(filter(lambda item: item.date.date() == date.date(), items)) items = items[:limit] return News(feed, items) @@ -39,3 +39,26 @@ def parse_dict(news_dict: dict) -> 'News': items = [Item.parse_dict(item) for item in items] return News(feed, items) + + def to_html(self) -> str: + items = '\n'.join(list(map(Item.to_html, self.items))) + html = f''' + + + + + + News + + + +
+

Feed: {self.feed}

+ {items} +
+ + + ''' + + return html diff --git a/rss_reader/src/rss_reader.py b/rss_reader/src/rss_reader.py index 081dcf23..49279df3 100644 --- a/rss_reader/src/rss_reader.py +++ b/rss_reader/src/rss_reader.py @@ -4,7 +4,7 @@ from urllib.error import URLError from xml.etree.ElementTree import ParseError -from args.Arguments import Arguments +from args.ArgumentParser import ArgumentParser from info import name, version from news.News import News from util.Logger import Logger @@ -32,14 +32,7 @@ def save_cache(path: os.path, news: News, logger: Logger) -> None: file.write(Util.to_json(news)) -def print_news(news: News, args: Arguments) -> None: - print( - Util.to_json(news) if args.json - else Util.to_str(news) - ) - - -def from_url(logger: Logger, args: Arguments, path: os.path) -> None: +def from_url(logger: Logger, args: ArgumentParser.Arguments, path: os.path) -> News: """ Get news from a URL. """ @@ -48,16 +41,14 @@ def from_url(logger: Logger, args: Arguments, path: os.path) -> None: element = Util.url_to_element(args.source) except (URLError, ValueError): logger.error('Invalid URL') - return + exit(-1) except ParseError: logger.error('Invalid XML') - return + exit(-1) logger.info('Parsing XML') news = News.parse(element, args.limit, args.date) - print_news(news, args) - cached = get_cache(path) cached.items += news.items @@ -70,8 +61,10 @@ def from_url(logger: Logger, args: Arguments, path: os.path) -> None: save_cache(path, cached, logger) + return news + -def from_local(logger: Logger, args: Arguments, path: os.path) -> None: +def from_local(logger: Logger, args: ArgumentParser.Arguments, path: os.path) -> News: """ Get news from a local cache. """ @@ -83,21 +76,54 @@ def from_local(logger: Logger, args: Arguments, path: os.path) -> None: items.append(item) items = items[:args.limit] - print_news(News('Local cached feed', items), args) + return News('Local cached feed', items) + + +def to_html(path: str, news: News, logger: Logger) -> str: + """ + Generate HTML file and save it to specified path. + """ + logger.info(f'Generating HTML file and saving it to {path}') + news = news.to_html() + + with open(path, 'w+') as file: + file.write(news) + + return news + + +def to_pdf(path: str, news: News, html: str, logger: Logger) -> None: + """ + Generate PDF file and save it to specified path. + """ + logger.info(f'Generating PDF file and saving it to {path}') + if html is None: + html = news.to_html() + Util.html_to_pdf(html, path) def main() -> None: """ Main function of the program. """ - args = Arguments(name, version) + args = ArgumentParser(name, version).parse() logger = Logger(args.verbose) path = os.path.join(Path(__file__).parent.parent, 'news.json') if args.source is None: - from_local(logger, args, path) + news = from_local(logger, args, path) else: - from_url(logger, args, path) + news = from_url(logger, args, path) + + print( + Util.to_json(news) if args.json + else Util.to_str(news) + ) + + if args.html is not None: + html = to_html(args.html, news, logger) + if args.pdf is not None: + to_pdf(args.pdf, news, html, logger) if __name__ == '__main__': diff --git a/rss_reader/src/util/Util.py b/rss_reader/src/util/Util.py index 8302b752..3f9bd178 100644 --- a/rss_reader/src/util/Util.py +++ b/rss_reader/src/util/Util.py @@ -1,8 +1,11 @@ +import contextlib import json import re from urllib.request import urlopen from xml.etree import ElementTree as ET +from xhtml2pdf import pisa + from .Color import Color @@ -46,3 +49,12 @@ def colorize(message: str, color: Color) -> str: Returns a string with colorized text. """ return f'{color.value}{message}{Color.END.value}' + + @staticmethod + def html_to_pdf(html: str, path: str): + """ + Converts an HTML file to a PDF file. + """ + with open(path, 'w+b') as file: + with contextlib.redirect_stdout(None): + pisa.CreatePDF(html, file) From 2450256ef2824e4280ced1305853269fb43acd0d Mon Sep 17 00:00:00 2001 From: 0iskak Date: Fri, 24 Jun 2022 22:14:26 +0600 Subject: [PATCH 05/10] reset --- rss_reader/setup.py | 18 ---- rss_reader/src/__init__.py | 0 rss_reader/src/args/ArgumentParser.py | 68 -------------- rss_reader/src/args/__init__.py | 0 rss_reader/src/info.py | 3 - rss_reader/src/news/Item.py | 64 ------------- rss_reader/src/news/News.py | 64 ------------- rss_reader/src/news/__init__.py | 0 rss_reader/src/rss_reader.py | 130 -------------------------- rss_reader/src/util/Color.py | 11 --- rss_reader/src/util/Logger.py | 57 ----------- rss_reader/src/util/Util.py | 60 ------------ rss_reader/src/util/__init__.py | 0 rss_reader/test/__init__.py | 0 rss_reader/test/test_Item.py | 44 --------- rss_reader/test/test_News.py | 60 ------------ rss_reader/test/test_Util.py | 18 ---- 17 files changed, 597 deletions(-) delete mode 100644 rss_reader/setup.py delete mode 100644 rss_reader/src/__init__.py delete mode 100644 rss_reader/src/args/ArgumentParser.py delete mode 100644 rss_reader/src/args/__init__.py delete mode 100644 rss_reader/src/info.py delete mode 100644 rss_reader/src/news/Item.py delete mode 100644 rss_reader/src/news/News.py delete mode 100644 rss_reader/src/news/__init__.py delete mode 100644 rss_reader/src/rss_reader.py delete mode 100644 rss_reader/src/util/Color.py delete mode 100644 rss_reader/src/util/Logger.py delete mode 100644 rss_reader/src/util/Util.py delete mode 100644 rss_reader/src/util/__init__.py delete mode 100644 rss_reader/test/__init__.py delete mode 100644 rss_reader/test/test_Item.py delete mode 100644 rss_reader/test/test_News.py delete mode 100644 rss_reader/test/test_Util.py diff --git a/rss_reader/setup.py b/rss_reader/setup.py deleted file mode 100644 index f57dda73..00000000 --- a/rss_reader/setup.py +++ /dev/null @@ -1,18 +0,0 @@ -from setuptools import setup, find_packages - -from src.info import shortname, version - -setup( - name=shortname, - version=str(version), - package_dir={'': 'src'}, - packages=find_packages('src').append(''), - install_requires=[ - 'python-dateutil', - ], - entry_points={ - 'console_scripts': [ - 'rss_reader = rss_reader:main', - ] - } -) diff --git a/rss_reader/src/__init__.py b/rss_reader/src/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/rss_reader/src/args/ArgumentParser.py b/rss_reader/src/args/ArgumentParser.py deleted file mode 100644 index fb205c16..00000000 --- a/rss_reader/src/args/ArgumentParser.py +++ /dev/null @@ -1,68 +0,0 @@ -import argparse -import os -from datetime import datetime - - -class ArgumentParser(argparse.ArgumentParser): - """ - Class-wrapper for the argparse.ArgumentParser class - """ - - def __init__(self, name: str, version: float): - super().__init__(description=name) - - self.add_argument('source', type=str, help='RSS URL', nargs='?', default=None) - self.add_argument('--version', action='version', version=f'v{version}') - self.add_argument('--json', action='store_true', help='show in JSON format') - self.add_argument('--verbose', action='store_true', help='show detailed information') - self.add_argument('--limit', type=int, help='limit the items') - self.add_argument('--date', type=str, help='show cached items from specified date') - self.add_argument('--to-html', type=str, help='generate HTML file and save it to specified path') - self.add_argument('--to-pdf', type=str, help='generate PDF file and save it to specified path') - - class Arguments: - source: str - json: bool - verbose: bool - limit: int - date: datetime - html: str - pdf: str - - def __init__(self, source: str, json: bool, verbose: bool, limit: int, date: str, html: str, pdf: str): - if source is None and date is None: - raise ValueError('Either source or date must be specified') - - if date is not None: - try: - date = datetime.strptime(date, '%Y%m%d') - except ValueError: - raise ValueError(f'date must be in %Y%m%d format') - - self.assert_file(html, '.html') - self.assert_file(pdf, '.pdf') - - self.source = source - self.json = json - self.verbose = verbose - self.limit = limit - self.date = date - self.html = html - self.pdf = pdf - - @staticmethod - def assert_file(path: str, ext: str) -> None: - if path is not None: - if not os.path.isabs(path): - raise ValueError(f'{path} must be an absolute path') - if not path.endswith(ext): - raise ValueError(f'{path} must have {ext} extension') - - def parse(self) -> Arguments: - """ - Parses the arguments. - """ - try: - return self.Arguments(*self.parse_args().__dict__.values()) - except ValueError as e: - self.error(str(e)) diff --git a/rss_reader/src/args/__init__.py b/rss_reader/src/args/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/rss_reader/src/info.py b/rss_reader/src/info.py deleted file mode 100644 index b995feef..00000000 --- a/rss_reader/src/info.py +++ /dev/null @@ -1,3 +0,0 @@ -name: str = 'Pure Python command-line RSS reader' -shortname: str = 'RSS Reader' -version: float = 0.1 diff --git a/rss_reader/src/news/Item.py b/rss_reader/src/news/Item.py deleted file mode 100644 index 808a783b..00000000 --- a/rss_reader/src/news/Item.py +++ /dev/null @@ -1,64 +0,0 @@ -from datetime import datetime -from xml.etree.ElementTree import Element - -from dateutil.parser import parse as parse_date - - -class Item: - """ - Class for a news item. - """ - title: str - date: datetime - link: str - images: list[str] - - def __init__(self, title: str, date: datetime, link: str, images: list[str]): - self.title = title - self.date = date - self.link = link - self.images = images - - @staticmethod - def parse(element: Element) -> 'Item': - """ - Parse an XML element into a news item. - """ - title = element.find('title').text - date = parse_date(element.find('pubDate').text) - link = element.find('link').text - images = [ - image.attrib['url'] for image in - element.findall('*[@width][@height]') - ] - - return Item(title, date, link, images) - - @staticmethod - def parse_dict(item_dict: dict) -> 'Item': - """ - Parse a dictionary into a news item. - """ - - title = item_dict.get('title') - date = parse_date(item_dict.get('date')) - link = item_dict.get('link') - images = item_dict.get('images') - - return Item(title, date, link, images) - - def __eq__(self, o: object) -> bool: - return self.__dict__ == o.__dict__ - - def to_html(self) -> str: - images = '\n'.join(list(map(lambda image: f'', self.images))) - - return f''' -
-
-
Title: {self.title}
-
Date: {self.date}
-
Link: {self.link}
- {images} -
- ''' diff --git a/rss_reader/src/news/News.py b/rss_reader/src/news/News.py deleted file mode 100644 index b02d94dd..00000000 --- a/rss_reader/src/news/News.py +++ /dev/null @@ -1,64 +0,0 @@ -from datetime import datetime -from xml.etree.ElementTree import Element - -from .Item import Item - - -class News: - """ - News class - """ - feed: str - items: list[Item] - - def __init__(self, feed: str, items: list[Item]): - self.feed = feed - self.items = items - - @staticmethod - def parse(element: Element, limit: int = None, date: datetime = None) -> 'News': - """ - Parse XML element to News object - """ - feed = element.find('title').text - items = [Item.parse(item) for item in element.findall('item')] - if date is not None: - items = list(filter(lambda item: item.date.date() == date.date(), items)) - items = items[:limit] - - return News(feed, items) - - @staticmethod - def parse_dict(news_dict: dict) -> 'News': - """ - Parse a dictionary to News object - """ - feed = news_dict.get('feed') - - items = news_dict.get('items') - items = [Item.parse_dict(item) for item in items] - - return News(feed, items) - - def to_html(self) -> str: - items = '\n'.join(list(map(Item.to_html, self.items))) - html = f''' - - - - - - News - - - -
-

Feed: {self.feed}

- {items} -
- - - ''' - - return html diff --git a/rss_reader/src/news/__init__.py b/rss_reader/src/news/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/rss_reader/src/rss_reader.py b/rss_reader/src/rss_reader.py deleted file mode 100644 index 49279df3..00000000 --- a/rss_reader/src/rss_reader.py +++ /dev/null @@ -1,130 +0,0 @@ -import json -import os -from pathlib import Path -from urllib.error import URLError -from xml.etree.ElementTree import ParseError - -from args.ArgumentParser import ArgumentParser -from info import name, version -from news.News import News -from util.Logger import Logger -from util.Util import Util - - -def get_cache(path: os.path) -> News: - """ - Get the cache from the local file. - """ - if os.path.exists(path): - with open(path, 'r') as file: - return News.parse_dict(json.loads(file.read())) - else: - news = News('Local cached feed', []) - return news - - -def save_cache(path: os.path, news: News, logger: Logger) -> None: - """ - Save the cache to the local file. - """ - logger.info('Saving to cache') - with open(path, 'w') as file: - file.write(Util.to_json(news)) - - -def from_url(logger: Logger, args: ArgumentParser.Arguments, path: os.path) -> News: - """ - Get news from a URL. - """ - logger.info(f'Downloading from {args.source}') - try: - element = Util.url_to_element(args.source) - except (URLError, ValueError): - logger.error('Invalid URL') - exit(-1) - except ParseError: - logger.error('Invalid XML') - exit(-1) - - logger.info('Parsing XML') - news = News.parse(element, args.limit, args.date) - - cached = get_cache(path) - cached.items += news.items - - items = [] - for item in cached.items: - if item not in items: - items.append(item) - - cached.items = items - - save_cache(path, cached, logger) - - return news - - -def from_local(logger: Logger, args: ArgumentParser.Arguments, path: os.path) -> News: - """ - Get news from a local cache. - """ - logger.info('Getting items from local cache: no source specified') - news = get_cache(path) - items = [] - for item in news.items: - if item.date.date() == args.date.date(): - items.append(item) - items = items[:args.limit] - - return News('Local cached feed', items) - - -def to_html(path: str, news: News, logger: Logger) -> str: - """ - Generate HTML file and save it to specified path. - """ - logger.info(f'Generating HTML file and saving it to {path}') - news = news.to_html() - - with open(path, 'w+') as file: - file.write(news) - - return news - - -def to_pdf(path: str, news: News, html: str, logger: Logger) -> None: - """ - Generate PDF file and save it to specified path. - """ - logger.info(f'Generating PDF file and saving it to {path}') - if html is None: - html = news.to_html() - Util.html_to_pdf(html, path) - - -def main() -> None: - """ - Main function of the program. - """ - args = ArgumentParser(name, version).parse() - logger = Logger(args.verbose) - path = os.path.join(Path(__file__).parent.parent, 'news.json') - - if args.source is None: - news = from_local(logger, args, path) - else: - news = from_url(logger, args, path) - - print( - Util.to_json(news) if args.json - else Util.to_str(news) - ) - - if args.html is not None: - html = to_html(args.html, news, logger) - if args.pdf is not None: - to_pdf(args.pdf, news, html, logger) - - -if __name__ == '__main__': - main() diff --git a/rss_reader/src/util/Color.py b/rss_reader/src/util/Color.py deleted file mode 100644 index bfddc438..00000000 --- a/rss_reader/src/util/Color.py +++ /dev/null @@ -1,11 +0,0 @@ -from enum import Enum - - -class Color(Enum): - """ - Enum for color. - """ - RED = '\033[91m' - GREEN = '\033[92m' - YELLOW = '\033[93m' - END = '\033[0m' diff --git a/rss_reader/src/util/Logger.py b/rss_reader/src/util/Logger.py deleted file mode 100644 index 4757584c..00000000 --- a/rss_reader/src/util/Logger.py +++ /dev/null @@ -1,57 +0,0 @@ -from enum import Enum - -from .Color import Color -from .Util import Util - - -class Logger: - """ - Logger class - """ - - class Level(Enum): - """ - Enum-class for logging levels - """ - INFO = Color.GREEN - WARNING = Color.YELLOW - ERROR = Color.RED - - def __init__(self, verbose: bool): - self.logger = print if verbose else lambda *args: None - self.force_logger = print - - def log(self, message: str, level: Level) -> None: - """ - Logs a message with a given level - logger is print if verbose is True - else is a lambda function that does nothing - """ - message = f'[{level.name}] {message}' - self.logger(Util.colorize(message, level.value)) - - def force_log(self, message: str, level: Level) -> None: - """ - Logs a message with a given level - but prints even if verbose is False - """ - message = f'[{level.name}] {message}' - self.force_logger(Util.colorize(message, level.value)) - - def info(self, message: str) -> None: - """ - Logs a message with INFO level - """ - self.log(message, Logger.Level.INFO) - - def warning(self, message: str) -> None: - """ - Logs a message with WARNING level - """ - self.log(message, Logger.Level.WARNING) - - def error(self, message: str) -> None: - """ - Logs a message with ERROR level - """ - self.force_log(message, Logger.Level.ERROR) diff --git a/rss_reader/src/util/Util.py b/rss_reader/src/util/Util.py deleted file mode 100644 index 3f9bd178..00000000 --- a/rss_reader/src/util/Util.py +++ /dev/null @@ -1,60 +0,0 @@ -import contextlib -import json -import re -from urllib.request import urlopen -from xml.etree import ElementTree as ET - -from xhtml2pdf import pisa - -from .Color import Color - - -class Util: - indent = 2 - - @staticmethod - def url_to_element(source: str) -> ET.Element: - """ - Returns an ElementTree.Element from a URL. - """ - with urlopen(source) as file: - return ET.parse(file).getroot().find('channel') - - @staticmethod - def to_json(obj: object) -> str: - """ - Returns a JSON string from an object. - """ - return json.dumps( - obj, indent=Util.indent, default=lambda o: - o.__dict__ if hasattr(o, '__dict__') else str(o) - ) - - @staticmethod - def to_str(obj: object) -> str: - """ - Returns a string from an object. - """ - string = Util.to_json(obj) - string = re.sub(r'^\s*[{}],?\s?', '', string, flags=re.MULTILINE) - string = re.sub(r'^\s{' + str(Util.indent) + '}', '', string, flags=re.MULTILINE) - string = re.sub(r'"(\w+)"', lambda match: match.group(1).capitalize(), string, flags=re.MULTILINE) - string = re.sub(r'"(.+)",?', lambda match: match.group(1), string, flags=re.MULTILINE) - string = re.sub(r'[\[\]]', '', string, flags=re.MULTILINE) - return string.strip() - - @staticmethod - def colorize(message: str, color: Color) -> str: - """ - Returns a string with colorized text. - """ - return f'{color.value}{message}{Color.END.value}' - - @staticmethod - def html_to_pdf(html: str, path: str): - """ - Converts an HTML file to a PDF file. - """ - with open(path, 'w+b') as file: - with contextlib.redirect_stdout(None): - pisa.CreatePDF(html, file) diff --git a/rss_reader/src/util/__init__.py b/rss_reader/src/util/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/rss_reader/test/__init__.py b/rss_reader/test/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/rss_reader/test/test_Item.py b/rss_reader/test/test_Item.py deleted file mode 100644 index 71dc5408..00000000 --- a/rss_reader/test/test_Item.py +++ /dev/null @@ -1,44 +0,0 @@ -from unittest import TestCase -from xml.etree.ElementTree import fromstring - -from dateutil.parser import parse as parse_date - -from rss_reader.src.news.Item import Item - - -class TestItem(TestCase): - def test_parse(self): - title = 'Title' - link = 'https://example.com' - date = 'Mon, 1 Jan 2000 12:00:00 GMT' - - rss = f''' - - {title} - {link} - {date} - - ''' - - item = Item.parse(fromstring(rss)) - - self.assertEqual(item.title, title) - self.assertEqual(item.link, link) - self.assertEqual(item.date, parse_date(date)) - - def test_parse_dict(self): - title = 'Title' - link = 'https://example.com' - date = 'Mon, 1 Jan 2000 12:00:00 GMT' - - dict = { - 'title': title, - 'link': link, - 'date': date - } - - item = Item.parse_dict(dict) - - self.assertEqual(item.title, title) - self.assertEqual(item.link, link) - self.assertEqual(item.date, parse_date(date)) diff --git a/rss_reader/test/test_News.py b/rss_reader/test/test_News.py deleted file mode 100644 index 4d28bbc4..00000000 --- a/rss_reader/test/test_News.py +++ /dev/null @@ -1,60 +0,0 @@ -from unittest import TestCase -from xml.etree.ElementTree import fromstring - -from dateutil.parser import parse as parse_date - -from rss_reader.src.news.News import News - - -class TestNews(TestCase): - def test_parse(self): - feed = 'RSS Title' - title = 'Title' - link = 'https://example.com' - date = 'Mon, 1 Jan 2000 12:00:00 GMT' - - rss = f''' - - {feed} - - {title} - {link} - {date} - - - ''' - - news = News.parse(fromstring(rss)) - self.assertEqual(news.feed, feed) - self.assertEqual(len(news.items), 1) - - item = news.items[0] - self.assertEqual(item.title, title) - self.assertEqual(item.link, link) - self.assertEqual(item.date, parse_date(date)) - - def test_parse_dict(self): - feed = 'RSS Title' - title = 'Title' - link = 'https://example.com' - date = 'Mon, 1 Jan 2000 12:00:00 GMT' - - dict = { - 'feed': feed, - 'items': [ - { - 'title': title, - 'link': link, - 'date': date - } - ] - } - - news = News.parse_dict(dict) - self.assertEqual(news.feed, feed) - self.assertEqual(len(news.items), 1) - - item = news.items[0] - self.assertEqual(item.title, title) - self.assertEqual(item.link, link) - self.assertEqual(item.date, parse_date(date)) diff --git a/rss_reader/test/test_Util.py b/rss_reader/test/test_Util.py deleted file mode 100644 index 581d8e6e..00000000 --- a/rss_reader/test/test_Util.py +++ /dev/null @@ -1,18 +0,0 @@ -import json -from unittest import TestCase - -from rss_reader.src.util.Util import Util - - -class TestUtil(TestCase): - def test_to_json(self): - obj = { - 'title': 'Title', - 'link': 'Link', - 'description': 'Description' - } - - self.assertEqual( - obj, - json.loads(Util.to_json(obj)) - ) From 389c62b9a9bdef7818c6964e8f09b529c87a482e Mon Sep 17 00:00:00 2001 From: 0iskak Date: Sun, 26 Jun 2022 21:14:58 +0600 Subject: [PATCH 06/10] refactored and implemented Iteration 5 --- setup.py | 20 ++++++++ src/Arguments.py | 50 +++++++++++++++++++ src/Logger.py | 30 ++++++++++++ src/Util.py | 119 ++++++++++++++++++++++++++++++++++++++++++++++ src/info.py | 3 ++ src/main.py | 80 +++++++++++++++++++++++++++++++ src/news/Item.py | 90 +++++++++++++++++++++++++++++++++++ src/news/News.py | 93 ++++++++++++++++++++++++++++++++++++ src/rss_reader.py | 9 ++++ 9 files changed, 494 insertions(+) create mode 100644 setup.py create mode 100644 src/Arguments.py create mode 100644 src/Logger.py create mode 100644 src/Util.py create mode 100644 src/info.py create mode 100644 src/main.py create mode 100644 src/news/Item.py create mode 100644 src/news/News.py create mode 100644 src/rss_reader.py diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..83693558 --- /dev/null +++ b/setup.py @@ -0,0 +1,20 @@ +from setuptools import setup, find_packages + +import src.info as info + +setup( + name=info.shortname, + version=info.version, + package_dir={'': 'src'}, + packages=find_packages().append(''), + install_requres=[ + 'colorama', + 'Pillow', + 'python_dateutil' + ], + entry_points={ + 'console_scripts': [ + 'rss_reader = rss_reader:main', + ] + } +) diff --git a/src/Arguments.py b/src/Arguments.py new file mode 100644 index 00000000..07aa4b7e --- /dev/null +++ b/src/Arguments.py @@ -0,0 +1,50 @@ +from argparse import ArgumentParser, Namespace +from datetime import datetime +from pathlib import Path + + +class Arguments(ArgumentParser): + class Exception(ValueError): + def __init__(self, message: str): + self.message = message + + def __init__(self, name: str, version: float): + super().__init__(description=name) + + self.add_argument('source', type=str, help='RSS URL', nargs='?') + self.add_argument('--version', action='version', version=f'v{version}') + self.add_argument('--json', action='store_true', help='show in JSON format') + self.add_argument('--verbose', action='store_true', help='show detailed information') + self.add_argument('--limit', type=int, help='limit the items') + self.add_argument('--date', type=str, help='show items from specified date') + self.add_argument('--to-fb2', type=str, help='save in fb2 format to specified path') + self.add_argument('--to-html', type=str, help='save in html format to specified path') + self.add_argument('--colorize', action='store_true', help='show items as colorized') + + def parse(self) -> Namespace: + try: + args = self.parse_args() + + if args.source is None and args.date is None: + raise self.Exception('Either source or date should be specified') + + if args.date is not None: + try: + args.date = datetime.strptime(args.date, '%Y%m%d') + except ValueError: + raise self.Exception('Date must be in %Y%m%d format') + + self.check_absolute(args.to_fb2, '.fb2') + self.check_absolute(args.to_html, '.html') + + return args + except self.Exception as e: + self.error(e.message) + + def check_absolute(self, path: str, ext: str) -> None: + if path is None: + return + if not Path(path).is_absolute(): + raise self.Exception(path + ': should be absolute') + if not path.endswith(ext): + raise self.Exception(path + ': extension should be ' + ext) diff --git a/src/Logger.py b/src/Logger.py new file mode 100644 index 00000000..2e662d61 --- /dev/null +++ b/src/Logger.py @@ -0,0 +1,30 @@ +from enum import Enum +from typing import Callable + +from colorama import Fore + +from Util import Util + + +class Level(Enum): + INFO: Fore = Fore.GREEN + WARNING: Fore = Fore.YELLOW + ERROR: Fore = Fore.RED + + +class Logger: + __logger: Callable + + @classmethod + def verbose(cls, is_verbose: bool): + cls.__logger = print if is_verbose else lambda *args: None + + @classmethod + def log(cls, level: Level, message: str) -> None: + message = Util.colorize(f'[{level.name}] {message}', level.value) + + if level is Level.ERROR: + print(message) + exit(-1) + + cls.__logger(message) diff --git a/src/Util.py b/src/Util.py new file mode 100644 index 00000000..8dcc80f2 --- /dev/null +++ b/src/Util.py @@ -0,0 +1,119 @@ +import base64 +import io +import json +import re +from pathlib import Path +from typing import Union +from urllib.request import urlopen +from xml.etree.ElementTree import Element, parse as parse_xml + +from PIL import Image +from colorama import Fore + + +class UtilException(Exception): + def __init__(self, message): + self.message = message + + +class Util: + indent = ' ' + cached_file = Path(__file__).parent.parent.joinpath('cached.json') + + @classmethod + def colorize(cls, message: str, color: str, back_color: str = None) -> str: + message = color + message + Fore.RESET + if back_color is not None: + message = Fore.RESET + message + back_color + return message + + @classmethod + def to_element(cls, url: str) -> Element: + with urlopen(url) as content: + return parse_xml(content).getroot().find('channel') + + @classmethod + def to_json(cls, obj: object) -> str: + string = json.dumps(obj, indent=Util.indent, default=cls.safe_vars) + return string + + @classmethod + def safe_vars(cls, obj: object) -> Union[dict, str]: + try: + return vars(obj) + except: + return str(obj) + + @classmethod + def vars_to_string(cls, obj: object): + items = [] + for key, value in vars(obj).items(): + if isinstance(value, list): + continue + items.append(f'{key.capitalize()}: {value}') + + return '\n'.join(items) + + @classmethod + def make_indent(cls, string: str): + return re.sub( + '^', cls.indent, string, + flags=re.MULTILINE + ) + + @classmethod + def get_cached(cls) -> str: + content: str + try: + with open(cls.cached_file, 'r') as file: + content = file.read() + except: + content = '' + + return content + + @classmethod + def save_cache(cls, content: str) -> None: + with open(cls.cached_file, 'w+') as file: + file.write(content) + + @classmethod + def unique(cls, items: list) -> list: + unique = [] + for item in items: + if item in unique: + continue + unique.append(item) + + return unique + + @classmethod + def save(cls, content: str, path: str): + with open(path, 'w+') as file: + file.write(content) + + @classmethod + def to_base64(cls, url: str) -> str: + with urlopen(url) as content: + with io.BytesIO(content.read()) as buf: + with io.BytesIO() as image_buf: + with Image.open(buf) as image: + image.thumbnail((500, 500), Image.ANTIALIAS) + image.save(image_buf, 'PNG') + + return base64.b64encode(image_buf.getvalue()).decode() + + @classmethod + def colorize_object(cls, string: str): + main = Fore.CYAN + key = Fore.YELLOW + curly = Fore.WHITE + square = Fore.WHITE + string = Util.regex(r'[\[\]]', cls.colorize(r'\g<0>', square, main), string) + string = Util.regex(r'[\{\}]', cls.colorize(r'\g<0>', curly, main), string) + string = Util.regex(r'^ *(\"|\w)+:', cls.colorize(r'\g<0>', key, main), string) + return string + Fore.RESET + + @classmethod + def regex(cls, pattern: str, replace: str, string: str) -> str: + return re.sub(pattern, replace, string, flags=re.MULTILINE) diff --git a/src/info.py b/src/info.py new file mode 100644 index 00000000..0ca87267 --- /dev/null +++ b/src/info.py @@ -0,0 +1,3 @@ +fullname = 'Pure Python command-line RSS reader' +shortname = 'RSS Reader' +version = 0.1 diff --git a/src/main.py b/src/main.py new file mode 100644 index 00000000..9bd320b0 --- /dev/null +++ b/src/main.py @@ -0,0 +1,80 @@ +import json +from datetime import datetime +from xml.etree.ElementTree import ParseError + +import info +from Arguments import Arguments +from Logger import Logger, Level +from Util import Util +from news.News import News + +args = Arguments(info.fullname, info.version).parse() +Logger.verbose(args.verbose) + + +def get_news(source: str, limit: int, date: datetime) -> News: + Logger.log(Level.INFO, 'Downloading and parsing ' + source) + try: + element = Util.to_element(source) + except ValueError: + Logger.log(Level.ERROR, 'Invalid URL') + except ParseError: + Logger.log(Level.ERROR, 'Invalid XML') + + return News.parse_element(element, limit, date) + + +def print_news(news_to_print: News, to_json: bool, to_colorize: bool) -> None: + string = Util.to_json(news_to_print) \ + if to_json else str(news_to_print) + string = '\n' + string + print( + Util.colorize_object(string) if to_colorize + else string + ) + + +def get_cached_news(cached: str) -> News: + Logger.log(Level.INFO, 'Getting cached items and parsing') + if cached: + news_cached = News.parse_json(json.loads(cached)) + else: + Logger.log(Level.WARNING, 'Cached items are empty') + news_cached = News() + news_cached.feed = 'Cached news' + news_cached.items = [] + + return news_cached + + +def save_cached_news(news_to_cache: News) -> None: + Logger.log(Level.INFO, 'Caching news') + Util.save_cache(Util.to_json(news_to_cache)) + + +def save_to_files(to_save: News, html: str, fb2: str) -> None: + if html is not None: + Logger.log(Level.INFO, 'Saving as html') + Util.save(to_save.to_html(), html) + if fb2 is not None: + Logger.log(Level.INFO, 'Saving as fb2') + Util.save(to_save.to_fb2(), fb2) + + +cached_news = get_cached_news(Util.get_cached()) + +to_print: News + +if args.source: + news = get_news(args.source, args.limit, args.date) + cached_news.items += news.items + cached_news.items = Util.unique(cached_news.items) + save_cached_news(cached_news) + + to_print = news +else: + cached_news.items = News.date_filter(cached_news.items, args.date)[:args.limit] + to_print = cached_news + +save_to_files(to_print, args.to_html, args.to_fb2) +print_news(to_print, args.json, args.colorize) diff --git a/src/news/Item.py b/src/news/Item.py new file mode 100644 index 00000000..a34a0c94 --- /dev/null +++ b/src/news/Item.py @@ -0,0 +1,90 @@ +from datetime import datetime +from xml.etree.ElementTree import Element + +from dateutil.parser import parse as parse_date + +from Util import Util + + +class Item: + title: str + date: datetime + link: str + images: list[str] + + @classmethod + def parse_element(cls, element: Element) -> 'Item': + item = cls() + + item.title = element.find('title').text + item.date = parse_date(element.find('pubDate').text) + item.link = element.find('link').text + item.images = [ + image.attrib['url'] for image in + element.findall('*[@width][@height]') + ] + + return item + + @classmethod + def parse_json(cls, obj: dict) -> 'Item': + item = cls() + item.title = obj['title'] + item.date = parse_date(obj['date']) + item.link = obj['link'] + item.images = obj['images'] + + return item + + def __str__(self) -> str: + items = [Util.vars_to_string(self), 'Images:', + Util.make_indent('\n'.join(self.images))] + + return '\n'.join(items) + + def __eq__(self, o: object) -> bool: + return isinstance(o, Item) and \ + Util.vars_to_string(self) \ + == Util.vars_to_string(o) + + def to_fb2(self, images_id: str) -> str: + content = '
' \ + '' \ + f'<p>{self.title}</p>' \ + '' \ + f'

{self.date}

' \ + f'

{self.link}

' + for i in range(len(self.images)): + image_id = f'{images_id}{i}' + content += f'

' + content += '
' + + return content + + def to_fb2_images(self, images_id: str) -> str: + content = '' + i = 0 + for image in self.images: + image_id = f'{images_id}{i}' + content += f'' \ + f'{Util.to_base64(image)}' \ + '' + i += 1 + + return content + + def to_html(self) -> str: + content = '
' \ + '
' \ + f'

{self.title}

' \ + f'

{self.date}

' \ + '
' + for image in self.images: + content += '
' \ + f'' \ + '
' + content += '
' \ + '
' \ + '
' + + return content diff --git a/src/news/News.py b/src/news/News.py new file mode 100644 index 00000000..351958d4 --- /dev/null +++ b/src/news/News.py @@ -0,0 +1,93 @@ +from datetime import datetime +from xml.etree.ElementTree import Element + +from Util import Util +from news.Item import Item + + +class News: + feed: str + items: list[Item] + + @classmethod + def parse_element(cls, element: Element, limit: int, date: datetime) -> 'News': + news = cls() + news.feed = element.find('title').text + + items = [Item.parse_element(item) for item in element.findall('item')] + if date is not None: + items = cls.date_filter(items, date) + + news.items = items[:limit] + + return news + + @classmethod + def parse_json(cls, obj: dict) -> 'News': + news = cls() + news.feed = obj['feed'] + news.items = list(map( + lambda item: Item.parse_json(item), + obj['items'] + )) + + return news + + def __str__(self) -> str: + items = [Util.vars_to_string(self), 'Items:', + Util.make_indent('\n\n'.join(map(str, self.items)))] + return '\n'.join(items) + + @classmethod + def date_filter(cls, items: list[Item], date: datetime) -> list[Item]: + date_format = '%Y-%m-%d' + return list(filter( + lambda item: + item.date.strftime(date_format) == date.strftime(date_format), + items + )) + + def to_fb2(self) -> str: + content = '' \ + '' \ + '' \ + '' \ + f'{self.feed}' \ + '' \ + '' \ + '' + i = 0 + for item in self.items: + content += item.to_fb2(i) + i += 1 + content += '' + i = 0 + for item in self.items: + content += item.to_fb2_images(i) + i += 1 + content += '' + + return content + + def to_html(self) -> str: + content = '' \ + '' \ + '' \ + '' \ + '' \ + 'News' \ + '' \ + '' \ + '' \ + '
' \ + '
' \ + f'

{self.feed}

' + for item in self.items: + content += item.to_html() + content += '
' \ + '
' \ + '' \ + '' + + return content diff --git a/src/rss_reader.py b/src/rss_reader.py new file mode 100644 index 00000000..3f7582ba --- /dev/null +++ b/src/rss_reader.py @@ -0,0 +1,9 @@ +import runpy + + +def main(): + runpy.run_module('main') + + +if __name__ == '__main__': + main() From 7dfd251a6f3c9b75f4db22eba2d04a452a2fcbbe Mon Sep 17 00:00:00 2001 From: 0iskak Date: Mon, 27 Jun 2022 17:52:33 +0600 Subject: [PATCH 07/10] added requirements.txt --- requirements.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..e7e9990e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +colorama +Pillow +python_dateutil +setuptools From 0ff81267a0acb2ec0b77b9103d0f4db16b693884 Mon Sep 17 00:00:00 2001 From: 0iskak Date: Mon, 27 Jun 2022 21:23:03 +0600 Subject: [PATCH 08/10] added unit tests --- test/test_Logger.py | 30 ++++++++++++++++++++++++++++++ test/test_News.py | 32 ++++++++++++++++++++++++++++++++ test/test_Util.py | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+) create mode 100644 test/test_Logger.py create mode 100644 test/test_News.py create mode 100644 test/test_Util.py diff --git a/test/test_Logger.py b/test/test_Logger.py new file mode 100644 index 00000000..4d14bfe9 --- /dev/null +++ b/test/test_Logger.py @@ -0,0 +1,30 @@ +import io +from contextlib import redirect_stdout +from unittest import TestCase +from src.Logger import Logger, Level + + +class TestLogger(TestCase): + def test_log(self): + Logger.verbose(True) + string = 'test' + + with io.StringIO() as buf1: + with redirect_stdout(buf1): + Logger.log(Level.INFO, string) + Logger.log(Level.WARNING, string) + self.assertRaises(SystemExit, Logger.log, Level.ERROR, string) + for line in buf1.getvalue().strip().split('\n'): + self.assertRegex(line, string) + + Logger.verbose(False) + with io.StringIO() as buf2: + with redirect_stdout(buf2): + Logger.log(Level.INFO, string) + Logger.log(Level.WARNING, string) + self.assertEqual(len(buf2.getvalue()), 0) + + with io.StringIO() as buf3: + with redirect_stdout(buf3): + self.assertRaises(SystemExit, Logger.log, Level.ERROR, string) + self.assertRegex(buf3.getvalue(), string) diff --git a/test/test_News.py b/test/test_News.py new file mode 100644 index 00000000..a47ff913 --- /dev/null +++ b/test/test_News.py @@ -0,0 +1,32 @@ +import datetime +from unittest import TestCase +from src.news.News import News + + +class TestNews(TestCase): + def test_parse_json(self): + feed = 'test_feed' + title = 'test_title' + date = datetime.datetime.today() + link = 'test_link' + images = ['test_image'] + obj = { + 'feed': feed, + 'items': [{ + 'title': title, + 'date': str(date), + 'link': link, + 'images': images + }] + } + + news = News.parse_json(obj) + self.assertEqual(news.feed, feed) + self.assertEqual(len(news.items), 1) + + item = news.items[0] + self.assertEqual(item.title, title) + self.assertEqual(item.date, date) + self.assertEqual(item.link, link) + self.assertEqual(item.images, images) + diff --git a/test/test_Util.py b/test/test_Util.py new file mode 100644 index 00000000..73d66f49 --- /dev/null +++ b/test/test_Util.py @@ -0,0 +1,36 @@ +import datetime +import json +from unittest import TestCase +from src.Util import Util +from colorama import Fore + + +class TestUtil(TestCase): + def test_colorize(self): + string = 'dsa' + self.assertRegex(Util.colorize(string, Fore.GREEN), string) + + def test_safe_vars(self): + for item in ['a', {'a': 'a'}, self, datetime.date.today()]: + try: + Util.safe_vars(item) + except: + self.fail() + + def test_to_json(self): + obj = {'a': 1, 'b': 2} + self.assertEqual(obj, json.loads(Util.to_json(obj))) + + def test_make_intend(self): + string = 'test' + self.assertEqual(Util.make_indent(string), Util.indent + string) + + def test_unique(self): + unique = [1, 2, 3, 4, 5] + self.assertEqual(unique, Util.unique(unique + [1, 2, 4])) + + def test_regex(self): + string = 'test' + pattern = '^t' + replace = 'the t' + self.assertEqual(Util.regex(pattern, replace, string), 'the ' + string) From de807780780850ae98ff439244c8b57a39c5afe5 Mon Sep 17 00:00:00 2001 From: 0iskak Date: Mon, 27 Jun 2022 22:27:33 +0600 Subject: [PATCH 09/10] add README.md --- README.md | 80 +++++++++++++++++++++---------- src/Util.py | 2 +- {test => src/test}/test_Logger.py | 2 +- {test => src/test}/test_News.py | 2 +- {test => src/test}/test_Util.py | 2 +- src/test_all.py | 9 ++++ 6 files changed, 67 insertions(+), 30 deletions(-) rename {test => src/test}/test_Logger.py (96%) rename {test => src/test}/test_News.py (96%) rename {test => src/test}/test_Util.py (97%) create mode 100644 src/test_all.py diff --git a/README.md b/README.md index c86d1e65..41af19fd 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,56 @@ -# How to create a PR with a homework task - -1. Create fork from the following repo: https://github.com/E-P-T/Homework. (Docs: https://docs.github.com/en/get-started/quickstart/fork-a-repo ) -2. Clone your forked repo in your local folder. -3. Create separate branches for each session.Example(`session_2`, `session_3` and so on) -4. Create folder with you First and Last name in you forked repo in the created session. -5. Add your task into created folder -6. Push finished session task in the appropriate branch in accordance with written above. - You should get the structure that looks something like that - +## How to run +### Install requirements +```shell +pip install -r .\requirements.txt ``` - Branch: Session_2 - DzmitryKolb - |___Task1.py - |___Task2.py - Branch: Session_3 - DzmitryKolb - |___Task1.py - |___Task2.py +### Application +```shell +python .\src\rss_reader.py ``` - -7. When you finish your work on task you should create Pull request to the appropriate branch of the main repo https://github.com/E-P-T/Homework (Docs: https://docs.github.com/en/github/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork). -Please use the following instructions to prepare good description of the pull request: - - Pull request header should be: `Session - `. - Example: `Session 2 - Dzmitry Kolb` - - Pull request body: You should write here what tasks were implemented. - Example: `Finished: Task 1.2, Task 1.3, Task 1.6` - +### Tests +```shell +python .\src\test_all.py +``` +--- +## Print format +### JSON +#### News: +```json +{ + "feed": string, + "items": array of Items +} +``` +#### Item: +```json +{ + "title": string, + "date": datetime, + "link": string, + "images": array of strings +} +``` +### String +Same as JSON but without brackets and quotes +#### News: +```yaml +feed: string +items: + array of Items +``` +#### Item: +```yaml +title: string +date: datetime +link: string +images: + array of strings +``` +--- +## Caching system +All items were fetched +will be stored in cached.json +which is located +(will be created if it doesn't exist) +in source folder and will be stored +in JSON format with checking for duplicates \ No newline at end of file diff --git a/src/Util.py b/src/Util.py index 8dcc80f2..ecc9a17e 100644 --- a/src/Util.py +++ b/src/Util.py @@ -18,7 +18,7 @@ def __init__(self, message): class Util: indent = ' ' - cached_file = Path(__file__).parent.parent.joinpath('cached.json') + cached_file = Path(__file__).parent.joinpath('cached.json') @classmethod def colorize(cls, message: str, color: str, back_color: str = None) -> str: diff --git a/test/test_Logger.py b/src/test/test_Logger.py similarity index 96% rename from test/test_Logger.py rename to src/test/test_Logger.py index 4d14bfe9..1ebba14a 100644 --- a/test/test_Logger.py +++ b/src/test/test_Logger.py @@ -1,7 +1,7 @@ import io from contextlib import redirect_stdout from unittest import TestCase -from src.Logger import Logger, Level +from Logger import Logger, Level class TestLogger(TestCase): diff --git a/test/test_News.py b/src/test/test_News.py similarity index 96% rename from test/test_News.py rename to src/test/test_News.py index a47ff913..1b4654fe 100644 --- a/test/test_News.py +++ b/src/test/test_News.py @@ -1,6 +1,6 @@ import datetime from unittest import TestCase -from src.news.News import News +from news.News import News class TestNews(TestCase): diff --git a/test/test_Util.py b/src/test/test_Util.py similarity index 97% rename from test/test_Util.py rename to src/test/test_Util.py index 73d66f49..161b96df 100644 --- a/test/test_Util.py +++ b/src/test/test_Util.py @@ -1,7 +1,7 @@ import datetime import json from unittest import TestCase -from src.Util import Util +from Util import Util from colorama import Fore diff --git a/src/test_all.py b/src/test_all.py new file mode 100644 index 00000000..6e0bf49c --- /dev/null +++ b/src/test_all.py @@ -0,0 +1,9 @@ +import unittest +from pathlib import Path + +if __name__ == '__main__': + path = Path(__file__).parent.joinpath('test') + tests = unittest.TestLoader().discover(path) + unittest.TextTestRunner().run(tests) + + From dcfa8b78b13419fa6f79df09dfbd4f969e9d3d12 Mon Sep 17 00:00:00 2001 From: Iskak <57086286+0iskak@users.noreply.github.com> Date: Thu, 30 Jun 2022 17:12:15 +0600 Subject: [PATCH 10/10] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 41af19fd..80786371 100644 --- a/README.md +++ b/README.md @@ -15,14 +15,14 @@ python .\src\test_all.py ## Print format ### JSON #### News: -```json +``` { "feed": string, "items": array of Items } ``` #### Item: -```json +``` { "title": string, "date": datetime, @@ -53,4 +53,4 @@ will be stored in cached.json which is located (will be created if it doesn't exist) in source folder and will be stored -in JSON format with checking for duplicates \ No newline at end of file +in JSON format with checking for duplicates