diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml new file mode 100644 index 00000000..b0b5d983 --- /dev/null +++ b/.github/workflows/labeler.yml @@ -0,0 +1,29 @@ +on: + pull_request: + types: [opened, edited] + +permissions: + actions: none + checks: none + contents: none + deployments: none + issues: none + packages: none + pull-requests: write + repository-projects: none + security-events: none + statuses: none + +name: "Mark Final Task" +jobs: + label_regex: + runs-on: ubuntu-latest + name: Add label + steps: + - name: "Assign label to Final Task" + uses: Bhacaz/label-regex@v1 + with: + field: title + regex: '(?(F|f)(I|i)(N|n)(A|a)(L|l)[ -]*(T|t)(A|a)(S|s)(K|k)).*' + lowercase: true + token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..5d40cf71 --- /dev/null +++ b/.gitignore @@ -0,0 +1,119 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# pycharm +.idea + +# vscode +.vscode +export_path.sh + +# files convertion +parsing_news.html +*.pdf +*.jpg + +# file format +*.pkl \ No newline at end of file diff --git a/DejaVuSans.ttf b/DejaVuSans.ttf new file mode 100644 index 00000000..e5f7eecc Binary files /dev/null and b/DejaVuSans.ttf differ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..cd5926a5 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3.8 + +RUN mkdir /code + +WORKDIR /code +ADD . /code +ADD requirements.txt code/requirements.txt + +RUN python3.8 -m pip install --upgrade -r requirements.txt +ENV PYTHONPATH "${PATHONPATH}:/rssreader" \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..540b7204 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include requirements.txt \ No newline at end of file diff --git a/README.md b/README.md index c86d1e65..6588f0a8 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,98 @@ -# How to create a PR with a homework task - -1. Create fork from the following repo: https://github.com/E-P-T/Homework. (Docs: https://docs.github.com/en/get-started/quickstart/fork-a-repo ) -2. Clone your forked repo in your local folder. -3. Create separate branches for each session.Example(`session_2`, `session_3` and so on) -4. Create folder with you First and Last name in you forked repo in the created session. -5. Add your task into created folder -6. Push finished session task in the appropriate branch in accordance with written above. - You should get the structure that looks something like that - -``` - Branch: Session_2 - DzmitryKolb - |___Task1.py - |___Task2.py - Branch: Session_3 - DzmitryKolb - |___Task1.py - |___Task2.py -``` - -7. When you finish your work on task you should create Pull request to the appropriate branch of the main repo https://github.com/E-P-T/Homework (Docs: https://docs.github.com/en/github/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork). -Please use the following instructions to prepare good description of the pull request: - - Pull request header should be: `Session - `. - Example: `Session 2 - Dzmitry Kolb` - - Pull request body: You should write here what tasks were implemented. - Example: `Finished: Task 1.2, Task 1.3, Task 1.6` +# RSS reader by Qayumjon Ergashaliyev +An RSS reader is a command-line utility that retrieves the RSS URL and prints the results in a human-readable format. + +# RSS reader +RSS reader is a command-line utility that retrieves the RSS URL and prints the results in a human-readable format. + +## Distribution +The program is located in module rssreader. When unpacking, it is installed in the system. After installation, to start using the program, write rss-reader to the terminal. + +## Specification +The program works with command line arguments. Their description: + +**usage:** docker rm my_app |& docker-compose run --name my_app app python -m rssreader \[-h] \[--version] \[--json] \[--verbose] \[--limit *LIMIT*] \[--colorize] \[--to-pdf] \[--to-html] \[--date *DATE*] source + ++ Positionalarguments: + + source => RSS URL + ++ Optional arguments: + + -h, --help => Show help message and exit. Сan be used as a single argument. + + --version => Print version info. Сan be used as a single argument. + + --json => Print result as JSON in stdout. + + --verbose => Outputs verbose status messages. Use with other arguments. + + --limit LIMIT => Limit news topics if this parameter is provided LIMIT. + + --date DATE => Return cached news from the publication day. Format is YYYYMMDD". **Argument source has the meaning**. + + --to-html => Convert news to html. Return file parsing_news.hmtl + + --to-pdf => Convert news to pdf. Return parsing_news.pdf + + --colorize => Print the result of the utility in colorized mode. + ++ Additional description: + + --limit => can be used with --json, --date, --to-pdf, --to-html. + + --json => the description of the circuit is in the file **parsing_json_schema.json** + + --date => can be used with --json, --limit, --to-pdf, --to-html. + + --verbose => can be used with all agruments. + + --colorize => use argument without --json. Because json will not be color. + +For example you can use a super combination: +```bash + source --limit LIMIT --json --date Date --to-pdf --to-html +``` +## Get files from container + +```bash +docker cp my_app:/code/parsing_news.pdf parsing_news.pdf +docker cp my_app:/code/parsing_news.html parsing_news.html +``` + +## Database check + +Go to your browser at. Only after starting the application at least 1 time +```bash +http://0.0.0.0:8081/db/news/news_received +``` + +## Testing + +Test find in the most important folder. + +Write before tests +```bash + pip install . +``` +To test the package enter the command: +```bash + nosetests --with-coverage --cover-erase --cover-package=rssreader +``` +You can test internal packages as well: +```bash + --cover-package=rssreader.parser +``` + +# RSS reader +RSS reader is a command-line utility that retrieves the RSS URL and prints the results in a human-readable format. + +## Guide +1. Сreate docker container +```bash +docker run -it -p 8080:8080 -v /var/run/docker.sock:/var/run/docker.sock python /bin/bash +``` +2. Clone [repository](https://https://github.com/king9799/Homework) +```bash +git clone https://github.com/king9799/Homework.git && cd Rss-Reader +``` +3. Run the docker and docker-compose installation script. If you get an error, then you have problems with the Internet. Restart the command again +```bash +chmod +x install.sh && . install.sh +``` +4. To start the application write +```bash +docker rm my_app |& docker-compose run --name my_app app python -m rssreader +``` +Example: +```bash +docker rm my_app |& docker-compose run --name my_app app python -m rssreader "https://news.yahoo.com/rss" --limit 1 +``` +5. If you want to test the code, see **manual** +6. If you go get the file pdf or html, see **manual** +7. All is ready. Read **manual**. \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 00000000..891295af --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,31 @@ +version: '3.1' + +services: + mongo: + image: mongo + container_name: mongodb + hostname: mongo + ports: + - 27017:27017 + app: + tty: true + container_name: rss-reader + stdin_open: true + depends_on: + - mongo + build: + context: . + dockerfile: Dockerfile + ports: + - 3000:3000 + links: + - mongo:mongodb + environment: + - MONGO_URL=mongodb://mongo:27017/ + + mongo-express: + image: mongo-express + container_name: mongo-express + restart: always + ports: + - 8081:8081 \ No newline at end of file diff --git a/install.sh b/install.sh new file mode 100644 index 00000000..6a8d52af --- /dev/null +++ b/install.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +apt-get update && \ +apt-get -y install apt-transport-https \ + ca-certificates \ + curl \ + gnupg2 \ + software-properties-common && \ +curl -fsSL https://download.docker.com/linux/$(. /etc/os-release; echo "$ID")/gpg > /tmp/dkey; apt-key add /tmp/dkey && \ +add-apt-repository \ + "deb [arch=amd64] https://download.docker.com/linux/$(. /etc/os-release; echo "$ID") \ + $(lsb_release -cs) \ + stable" && \ +apt-get update && \ +apt-get -y install docker-ce + +sudo curl -L https://github.com/docker/compose/releases/download/1.24.0/docker-compose-`uname -s`-`uname -m` -o /usr/local/bin/docker-compose +sudo chmod +x /usr/local/bin/docker-compose + +docker-compose up --build -d \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..9dc809c2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +feedparser==5.2.1 +bs4==0.0.1 +nose==1.3.7 +pymongo==3.9.0 +coverage==4.5.4 +fpdf==1.7.2 +requests==2.22.0 +colored==1.4.1 diff --git a/rssreader/DejaVuSans.ttf b/rssreader/DejaVuSans.ttf new file mode 100644 index 00000000..e5f7eecc Binary files /dev/null and b/rssreader/DejaVuSans.ttf differ diff --git a/rssreader/__init__.py b/rssreader/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/rssreader/__main__.py b/rssreader/__main__.py new file mode 100644 index 00000000..93f208f3 --- /dev/null +++ b/rssreader/__main__.py @@ -0,0 +1,9 @@ +from rssreader.app import rss_reader + + +def main(): + rss_reader.main() + + +if __name__ == "__main__": + main() diff --git a/rssreader/app/DejaVuSans.ttf b/rssreader/app/DejaVuSans.ttf new file mode 100644 index 00000000..e5f7eecc Binary files /dev/null and b/rssreader/app/DejaVuSans.ttf differ diff --git a/rssreader/app/__init__.py b/rssreader/app/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/rssreader/app/rss_reader.py b/rssreader/app/rss_reader.py new file mode 100644 index 00000000..82c2305c --- /dev/null +++ b/rssreader/app/rss_reader.py @@ -0,0 +1,109 @@ +"""Entry point module""" + +import logging +from rssreader.arguments import all_args, arg_verbose +from rssreader.parser import feed_parser +from rssreader.format_conversion import conversion_json as cv_json +from rssreader.format_conversion import conversion_html as cv_html +from rssreader.format_conversion import conversion_pdf as cv_pdf +from rssreader.db import news_database as db + + +class RssReader(all_args.Arguments): + def __reaction_to_arguments(self) -> None: + """ + Method that performs various actions + depending on command line arguments + """ + limit = self.args.limit + source = self.args.source + date = self.args.date + to_html = self.args.to_html + to_pdf = self.args.to_pdf + colorize = self.args.colorize + debug_string = "Start with arguments: " +\ + f"--limit: {limit}, " +\ + f"--json {self.args.json}, " +\ + f"--verbose {self.args.verbose}, " +\ + f"--date {date}, " +\ + f"--to-html {to_html}, " +\ + f"--to-pdf {to_pdf}, " +\ + f"--colorize {colorize}" + + logging.debug(debug_string) + logging.debug(f"URL: {source}") + + feed = feed_parser.RssParser(source, limit) + feed_json = cv_json.JsonConversion(source, limit) + feed_db = db.NewsDatabase(source, date, limit) + + if limit is not None and limit < 1: + msg = "Limit is less than one" + logging.info(f"Stop. {msg}") + return msg + + if date: + news_from_db = feed_db.show_news() + + if news_from_db: + if to_html and news_from_db: + feed_html = cv_html.HtmlConversion(news_from_db) + feed_html.save_html_news(feed_html.conversion_to_html()) + if to_pdf and news_from_db: + feed_pdf = cv_pdf.PdfConversion(news_from_db) + feed_pdf.conversion_to_pdf() + if self.args.json: + return feed_json.convert_to_json(news_from_db) + else: + if colorize: + return feed.make_pretty_rss_colorize(news_from_db) + else: + return feed.make_pretty_rss(news_from_db) + else: + msg = f"No news with date {date} and url {source}" + logging.info(msg) + return msg + + news_parsing = feed.parse_news() + logging.info(f"Get rss from {source}") + + if self.args.json: + logging.info(f"Convert rss from {source} to json") + result = feed_json.convert_to_json(news_parsing) + else: + logging.info("Show result of parsing") + if colorize: + result = feed.make_pretty_rss_colorize(news_parsing) + else: + result = feed.make_pretty_rss(news_parsing) + + if to_html and news_parsing: + feed_html = cv_html.HtmlConversion(news_parsing) + feed_html.save_html_news(feed_html.conversion_to_html()) + + if to_pdf and news_parsing: + feed_pdf = cv_pdf.PdfConversion(news_parsing) + feed_pdf.conversion_to_pdf() + + feed_db.update_news(news_parsing) + return result + + def get_verbose(self): + return arg_verbose.AppLogging.show_logs() if self.args.verbose else "" + + def run(self) -> None: + """Application launch""" + logging.info("Run app") + return self.__reaction_to_arguments() + + +def main() -> None: + """Main application method""" + try: + arg_verbose.AppLogging.log_setup() + rss_app = RssReader() + print(rss_app.run()) + print(rss_app.get_verbose()) + except Exception as e: + logging.error(e) + print(e) diff --git a/rssreader/arguments/__init__.py b/rssreader/arguments/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/rssreader/arguments/all_args.py b/rssreader/arguments/all_args.py new file mode 100644 index 00000000..c1adcae6 --- /dev/null +++ b/rssreader/arguments/all_args.py @@ -0,0 +1,61 @@ +"""A module that sets command line arguments""" + +from rssreader import cnf +from rssreader.exceptions.all_exceptions import ThrowingArgumentParser + + +class Arguments: + """A class that sets command line arguments""" + def __init__(self) -> None: + parser = ThrowingArgumentParser( + prog=cnf.__package__, + description="Pure Python command-line RSS reader." + ) + parser.add_argument( + "--json", + help="Print result as JSON in stdout", + action="store_true" + ) + parser.add_argument( + "--limit", + help="Limit news topics if this parameter is provided", + type=int + ) + parser.add_argument( + "--verbose", + help="Outputs verbose status messages", + action="store_true" + ) + parser.add_argument( + "--version", + help="Print version info", + action="version", + version=cnf.__version__ + ) + parser.add_argument( + "--date", + help="Return cached news from the publication day.\ + Format is YYYYMMDD", + type=int + ) + parser.add_argument( + "--to-html", + help="Convert news to html", + action="store_true" + ) + parser.add_argument( + "--to-pdf", + help="Convert news to pdf", + action="store_true" + ) + parser.add_argument( + "--colorize", + help="Convert news to pdf", + action="store_true" + ) + parser.add_argument( + "source", + help="RSS URL", + type=str + ) + self.args = parser.parse_args() diff --git a/rssreader/arguments/arg_verbose.py b/rssreader/arguments/arg_verbose.py new file mode 100644 index 00000000..2d06418b --- /dev/null +++ b/rssreader/arguments/arg_verbose.py @@ -0,0 +1,39 @@ +"""A module which is responsible for the logic of logs""" + +import os +import logging + + +class AppLogging: + """A class that contains methods responsible for logs""" + @staticmethod + def setup_logs(file_path) -> None: + """ + Method that configures the log config + storage location, output format + """ + logging.basicConfig( + filename=file_path, + filemode="a", + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt='%Y-%m-%d %H:%M:%S', + level=logging.DEBUG + ) + + @staticmethod + def log_setup() -> None: + """ + Method that configures the logs and checks + whether the application has already been launched + """ + file_path = "app_logging.log" + + with open("app_logging.log", "w") as wf: + AppLogging.setup_logs(file_path) + + @staticmethod + def show_logs() -> str: + """Method that returns all the logs""" + with open("app_logging.log", "r") as rf: + logging.info("Show logs") + return "Logs:\n" + "".join([line for line in rf]) diff --git a/rssreader/cnf.py b/rssreader/cnf.py new file mode 100644 index 00000000..b5d6268c --- /dev/null +++ b/rssreader/cnf.py @@ -0,0 +1,2 @@ +__package__ = "rss-reader" +__version__ = "5.0" diff --git a/rssreader/db/__init__.py b/rssreader/db/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/rssreader/db/news_database.py b/rssreader/db/news_database.py new file mode 100644 index 00000000..2b706058 --- /dev/null +++ b/rssreader/db/news_database.py @@ -0,0 +1,66 @@ +"""Database caching module""" + +import logging +from pymongo import MongoClient +from datetime import datetime + + +class NewsDatabase(): + """Сlass for cache""" + def __init__(self, url: str, date: int, limit: int = None) -> None: + """Creates a database connection and a collection of news""" + self.client = MongoClient("mongodb://mongo:27017/") + self.db = self.client.news + self.collection = self.db.news_received + self.url = url + self.date = date + self.limit = limit + logging.debug("Connected to the database") + + def __template_news(self, date: int, current_news: dict) -> dict: + """Template for news in this format news is stored in the database""" + return {"source": self.url, "date": date, "news": current_news} + + def __convert_date_format(self, date: str) -> str: + """Convert date to format YYYYMMDD""" + format_time = "%a, %d %b %Y %H:%M:%S %Z" + if date.find("GMT") == -1: + format_time = format_time.replace("Z", "z") + + return format_time + + def update_news(self, news: list) -> None: + """Adding new news to the database. News replay check""" + if news: + for current_news in news: + db_date = current_news["Date"] + format_time = self.__convert_date_format(db_date) + try: + date = int(str(datetime.strptime( + db_date, + format_time + ).date()).replace("-", "")) + template = self.__template_news(date, current_news) + except ValueError: + logging.error("Date not in format UTC or GMT") + + if not self.collection.find_one(template): + self.collection.insert_one(template) + + logging.debug("Database news updated") + + def __search_caching_news(self) -> list: + """Search for news in the database""" + template = {"source": self.url, "date": self.date} + find_news = self.collection.find(template, {"_id": 0}) + + logging.debug("Search news in database") + logging.info("Show caching news from database") + if self.limit is not None: + return find_news.limit(self.limit) if self.limit else [] + else: + return find_news + + def show_news(self): + """Getting news from the database""" + return[curr_news["news"] for curr_news in self.__search_caching_news()] diff --git a/rssreader/exceptions/__init__.py b/rssreader/exceptions/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/rssreader/exceptions/all_exceptions.py b/rssreader/exceptions/all_exceptions.py new file mode 100644 index 00000000..fb62871f --- /dev/null +++ b/rssreader/exceptions/all_exceptions.py @@ -0,0 +1,49 @@ +"""A module with custom exceptions""" + +import argparse + + +class ArgumentParserError(Exception): + """The exception class + + The exception that is thrown when + the arguments are entered incorrectly + + """ + def __init__(self, message): + self.message = message + + +class ThrowingArgumentParser(argparse.ArgumentParser): + """ + The class that causes exclusion by overloading + method error() of class ArgumentParser + """ + def error(self, message) -> None: + """ + Method that throws an exception in response + to a built-in error of the module argparse + """ + raise ArgumentParserError(message) + + +class InvalidLinkOrInternetConnectionError(Exception): + """The exception class + + The exception that is thrown when + the link are entered incorrectly + + """ + def __init__(self, message): + self.message = message + + +class ParsingNewsError(Exception): + """The exception class + + An exception that is thrown when it is not possible + to receive news from rss + + """ + def __init__(self, message): + self.message = message diff --git a/rssreader/format_conversion/DejaVuSans.ttf b/rssreader/format_conversion/DejaVuSans.ttf new file mode 100644 index 00000000..e5f7eecc Binary files /dev/null and b/rssreader/format_conversion/DejaVuSans.ttf differ diff --git a/rssreader/format_conversion/__init__.py b/rssreader/format_conversion/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/rssreader/format_conversion/conversion_html.py b/rssreader/format_conversion/conversion_html.py new file mode 100644 index 00000000..f2ec07ea --- /dev/null +++ b/rssreader/format_conversion/conversion_html.py @@ -0,0 +1,53 @@ +"""Html conversion module""" + +import logging + + +class HtmlConversion(): + """A class that converts to HTML""" + def __init__(self, list_news_dict: list) -> None: + self.list_news_dict = list_news_dict + + def conversion_to_html(self) -> list: + """Method that converts a dictionary to html""" + result_news = [] + current_news = [] + template = '\ + \ + News feed\ + ' + result_news.append(template) + + for news in self.list_news_dict: + feed_html = f'' + current_news.append(feed_html) + current_news.append('') + result_news.append("".join(current_news)) + + current_news.clear() + result_news.append('

{news["Feed"]}

') + current_news.append( + f'\n

{news["Title"]}

') + current_news.append( + f'

\ + News Link

') + current_news.append( + f'

Date: {news["Date"]}

') + + for link in news["Links"]: + current_news.append( + f'

') + + current_news.append(f'

{news["Description"]}\n

') + current_news.append('
') + + logging.info("News converted to html") + + return "".join(result_news) + + def save_html_news(self, news) -> None: + """Method that saves html file""" + with open("parsing_news.html", "w") as wf: + wf.write(news) + logging.info("File html saved successfully") diff --git a/rssreader/format_conversion/conversion_json.py b/rssreader/format_conversion/conversion_json.py new file mode 100644 index 00000000..f7ecef88 --- /dev/null +++ b/rssreader/format_conversion/conversion_json.py @@ -0,0 +1,19 @@ +"""Conversion module to json""" + +import json +import logging + +from rssreader.parser import feed_parser + + +class JsonConversion(feed_parser.RssParser): + """Class that converts rss to json""" + def __init__(self, url: str, limit=None) -> None: + """Сalls the parser constructor to process the limit""" + super().__init__(url, limit) + + def convert_to_json(self, data_parsing) -> str: + """Method that converts rss to json""" + result = json.dumps(data_parsing, indent=4, ensure_ascii=False) + logging.info("Show result of json conversion") + return result if result != "[]" else "" diff --git a/rssreader/format_conversion/conversion_pdf.py b/rssreader/format_conversion/conversion_pdf.py new file mode 100644 index 00000000..a402d471 --- /dev/null +++ b/rssreader/format_conversion/conversion_pdf.py @@ -0,0 +1,75 @@ +"""Pdf conversion module""" +import os +import requests +import logging +from fpdf import FPDF + + +class PdfConversion(): + """A class that converts to Pdf""" + def __init__(self, list_news_dict: list) -> None: + self.list_news_dict = list_news_dict + self.pdf = FPDF() + self.pdf.add_font('DejaVu', '', 'DejaVuSans.ttf', uni=True) + self.pdf.add_page() + + def __pretty_text(self, txt: str, size: int, pos: str, len_l: int) -> None: + """ + Method that aligns text with respect + to its length considering word breaks + """ + self.pdf.set_font('DejaVu', '', size=size) + split_txt = txt.split(" ") + temp_len = 0 + line = [] + + for word in split_txt: + if temp_len + len(word) < len_l: + line.append(word) + temp_len += len(word) + else: + temp_len = 0 + line.append(word) + self.pdf.cell(w=0, h=8, align=pos, txt=" ".join(line), ln=1) + line.clear() + + if line: + self.pdf.cell(w=0, h=8, align=pos, txt=" ".join(line), ln=1) + + def conversion_to_pdf(self): + """Method that receives a list of news and creates pdf from it""" + self.pdf.set_font('DejaVu', '', size=24) + for index, news in enumerate(self.list_news_dict): + self.__pretty_text(news["Feed"], 24, "C", 36) + self.pdf.cell(w=0, h=20, txt=" ", ln=1) + + self.__pretty_text(news["Title"], 18, "C", 36) + + self.pdf.set_text_color(0, 0, 255) + self.pdf.cell( + w=0, h=8, align="C", txt="News link", ln=1, link=news["Link"]) + + self.pdf.set_text_color(0, 0, 0) + self.pdf.cell(w=0, h=15, txt=" ", ln=1) + + self.__pretty_text("Date: " + news["Date"], 14, "C", 36) + + for img in news["Links"]: + req = requests.get(img) + with open(f"image{index}.jpg", "w+b") as wf: + wf.write(req.content) + self.pdf.set_x(70) + try: + self.pdf.image(f"image{index}.jpg", w=70, h=70) + except RuntimeError: + logging.debug("gif and mp4 fles are ignored") + + os.remove(f"image{index}.jpg") + self.pdf.set_x(10) + self.__pretty_text(news["Description"], 18, "C", 36) + + self.pdf.add_page() + + logging.info("News converted to pdf") + logging.info("File pdf saved successfully") + self.pdf.output("parsing_news.pdf") diff --git a/rssreader/parser/__init__.py b/rssreader/parser/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/rssreader/parser/feed_parser.py b/rssreader/parser/feed_parser.py new file mode 100644 index 00000000..f14525c1 --- /dev/null +++ b/rssreader/parser/feed_parser.py @@ -0,0 +1,162 @@ +"""Module that get a rss from the news portal""" + +import feedparser +import logging +from bs4 import BeautifulSoup +from colored import fore, style + +from rssreader.exceptions import all_exceptions + + +class RssParser: + """The class that receives the news in rss format""" + def __init__(self, url: str, limit: int = None) -> None: + self.url = url + self.limit = limit + self.articles = [] + + @property + def limit(self) -> int: + """A property that returns the limit value""" + return self.__limit + + @limit.setter + def limit(self, value) -> None: + """A property that checks for a negative value""" + self.__limit = value if value is None or value > 0 else 0 + + def _check_limit(self, enrtries: list) -> int: + """Function that checks the maximum limit value""" + self.count_news = len(enrtries) + + if self.limit is not None: + return min(self.limit, self.count_news) + else: + return self.count_news + + def _get_rss_from_url(self) -> dict: + """ + Function that checks the possibility of receiving news and, + if successful, receives them + """ + html_news = feedparser.parse(self.url) + + if html_news.bozo == 1: + err_msg = "Check your link or internet connection" + raise all_exceptions.InvalidLinkOrInternetConnectionError(err_msg) + + return html_news + + def parse_news(self) -> list: + """ + Function that returns a list of news that was received by parsing rss + """ + self.html_news = self._get_rss_from_url() + self.limit = self._check_limit(self.html_news.entries) + + parsing_news = [] + try: + feed = self.html_news.feed.title + + for entry in self.html_news.entries[:self.limit]: + temp_dict = { + 'Feed': feed, + 'Title': entry.title.replace("'", "'"), + 'Date': entry.published, + 'Link': entry.link, + 'Description': BeautifulSoup( + entry.summary, features="html.parser" + ).text, + 'Links': [link.get("src") for link in BeautifulSoup( + entry.summary, features="html.parser" + ).find_all("img") if link.get("src")] + } + parsing_news.append(temp_dict) + + return parsing_news + except AttributeError as e: + logging.error(e) + err_msg = "Сan't get rss, because the news has an incorrect format" + raise all_exceptions.ParsingNewsError(err_msg) + + def __find_type_link(self, link) -> str: + """ + Function that determines the type of file that is located at the link + """ + return "(image)" if link.find(".mp4") == -1 else "(video)" + + def __make_pretty_links(self, link, links) -> str: + """ + The function that returns the string in which the numeric + representation of all links from the news is located + """ + links_list = [] + len_list = 0 + + if isinstance(link, list): + len_list = len(link) + for index, element in enumerate(link): + if element: + links_list.append(f"[{(index + 1)}]: {element}(link)") + elif link: + len_list = 1 + links_list.append(f"[1]: {link}(link)") + + if isinstance(links, list): + for index, element in enumerate(links): + if element: + type_link = self.__find_type_link(element) + links_list.append( + f"[{(index+1+len_list)}]: {element}{type_link}" + ) + elif links: + type_link = self.__find_type_link(links) + links_list.append(f"[{len_list + 1}]: {links}{type_link}") + + return "\n".join(links_list) + + def make_pretty_rss(self, news) -> str: + """ + Function that returns the final representation + of news that was obtained from the rss link + """ + pretty_string = [] + + for article in news: + links = self.__make_pretty_links( + article['Link'], + article['Links'] + ) + + pretty_string.append( + f"\nFeed: {article['Feed']}\n\nTitle: {article['Title']} \ + \nDate: {article['Date']}\nLink: {article['Link']} \ + \n\n{article['Description']}\n\nLinks:\n{links}\n\n" + ) + return "".join(pretty_string) + + def make_pretty_rss_colorize(self, news) -> str: + """ + Function that returns the final color presentation + of news that was obtained from the rss link + """ + pretty_string = [] + + for article in news: + links = self.__make_pretty_links( + article['Link'], + article['Links'] + ) + + pretty_string.append( + fore.RED + style.BOLD + + f"\nFeed: {article['Feed']}\n\nTitle: {article['Title']}" + + style.RESET + fore.GREEN + style.BOLD + + f"\nDate: {article['Date']}\nLink: {article['Link']}" + + style.RESET + fore.BLUE + style.BOLD + + f"\n\n{article['Description']}" + + style.RESET + fore.MAGENTA + style.BOLD + + f"\n\nLinks:\n{links}\n\n" + style.RESET + ) + logging.info("Text painted in RGBM") + return "".join(pretty_string) diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..8d747059 --- /dev/null +++ b/setup.py @@ -0,0 +1,20 @@ +from setuptools import setup, find_packages +from rssreader import cnf + +setup( + name=cnf.__package__, + version=cnf.__version__, + description="command-line RSS reader", + long_description="All information in the Readme", + author="Qayumjon Ergashaliyev", + author_email="king97queen99@gmail.com", + packages=find_packages(), + python_requires=">=3.9.6", + install_requires=[ + "bs4", "feedparser", "nose", "pymongo", "coverage", + "fpdf", "requests", "colored"], + entry_points={ + "console_scripts": + [f"{cnf.__package__} = rssreader.__main__:main"] + } +) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/__main__.py b/tests/__main__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/data/test_answer_links.txt b/tests/unit/data/test_answer_links.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/data/test_convert_to_json.txt b/tests/unit/data/test_convert_to_json.txt new file mode 100644 index 00000000..3ee132e0 --- /dev/null +++ b/tests/unit/data/test_convert_to_json.txt @@ -0,0 +1,10 @@ +[ + { + "Feed": "Liftoff News", + "Title": "Star City", + "Date": "Tue, 03 Jun 2003 09:39:21 GMT", + "Link": "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp", + "Description": "How do Americans get ready to work with Russians aboard the\n International Space Station? They take a crash course in culture, language\n and protocol at Russia's Star City.", + "Links": [] + } +] \ No newline at end of file diff --git a/tests/unit/data/test_correct.rss b/tests/unit/data/test_correct.rss new file mode 100644 index 00000000..63ce4840 --- /dev/null +++ b/tests/unit/data/test_correct.rss @@ -0,0 +1,55 @@ + + + + Liftoff News + http://liftoff.msfc.nasa.gov/ + Liftoff to Space Exploration. + en-us + Tue, 10 Jun 2003 04:00:00 GMT + + Tue, 10 Jun 2003 09:41:01 GMT + http://blogs.law.harvard.edu/tech/rss + Weblog Editor 2.0 + editor@example.com + webmaster@example.com + + + Star City + http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp + How do Americans get ready to work with Russians aboard the + International Space Station? They take a crash course in culture, language + and protocol at Russia's Star City. + Tue, 03 Jun 2003 09:39:21 GMT + http://liftoff.msfc.nasa.gov/2003/06/03.html#item573 + + + + Space Exploration + http://liftoff.msfc.nasa.gov/ + Sky watchers in Europe, Asia, and parts of Alaska and Canada + will experience a partial eclipse of the Sun on Saturday, May 31st. + Fri, 30 May 2003 11:06:42 GMT + http://liftoff.msfc.nasa.gov/2003/05/30.html#item572 + + + + The Engine That Does More + http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp + Before man travels to Mars, NASA hopes to design new engines + that will let us fly through the Solar System more quickly. The proposed + VASIMR engine would do that. + Tue, 27 May 2003 08:37:32 GMT + http://liftoff.msfc.nasa.gov/2003/05/27.html#item571 + + + + Astronauts' Dirty Laundry + http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp + Compared to earlier spacecraft, the International Space + Station has many luxuries, but laundry facilities are not one of them. + Instead, astronauts have other options. + Tue, 20 May 2003 08:56:02 GMT + http://liftoff.msfc.nasa.gov/2003/05/20.html#item570 + + + \ No newline at end of file diff --git a/tests/unit/data/test_entries.txt b/tests/unit/data/test_entries.txt new file mode 100644 index 00000000..c8d466ba --- /dev/null +++ b/tests/unit/data/test_entries.txt @@ -0,0 +1,4 @@ +{'title': 'Star City', 'title_detail': {'type': 'text/plain', 'language': None, 'base': '', 'value': 'Star City'}, 'links': [{'rel': 'alternate', 'type': 'text/html', 'href': 'http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp'}], 'link': 'http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp', 'summary': "How do Americans get ready to work with Russians aboard the\n International Space Station? They take a crash course in culture, language\n and protocol at Russia's Star City.", 'summary_detail': {'type': 'text/html', 'language': None, 'base': '', 'value': "How do Americans get ready to work with Russians aboard the\n International Space Station? They take a crash course in culture, language\n and protocol at Russia's Star City."}, 'published': 'Tue, 03 Jun 2003 09:39:21 GMT', 'published_parsed': time.struct_time(tm_year=2003, tm_mon=6, tm_mday=3, tm_hour=9, tm_min=39, tm_sec=21, tm_wday=1, tm_yday=154, tm_isdst=0), 'id': 'http://liftoff.msfc.nasa.gov/2003/06/03.html#item573', 'guidislink': False}, +{'title': 'Space Exploration', 'title_detail': {'type': 'text/plain', 'language': None, 'base': '', 'value': 'Space Exploration'}, 'links': [{'rel': 'alternate', 'type': 'text/html', 'href': 'http://liftoff.msfc.nasa.gov/'}], 'link': 'http://liftoff.msfc.nasa.gov/', 'summary': 'Sky watchers in Europe, Asia, and parts of Alaska and Canada\n will experience a partial eclipse of the Sun on Saturday, May 31st.', 'summary_detail': {'type': 'text/html', 'language': None, 'base': '', 'value': 'Sky watchers in Europe, Asia, and parts of Alaska and Canada\n will experience a partial eclipse of the Sun on Saturday, May 31st.'}, 'published': 'Fri, 30 May 2003 11:06:42 GMT', 'published_parsed': time.struct_time(tm_year=2003, tm_mon=5, tm_mday=30, tm_hour=11, tm_min=6, tm_sec=42, tm_wday=4, tm_yday=150, tm_isdst=0), 'id': 'http://liftoff.msfc.nasa.gov/2003/05/30.html#item572', 'guidislink': False}, +{'title': 'The Engine That Does More', 'title_detail': {'type': 'text/plain', 'language': None, 'base': '', 'value': 'The Engine That Does More'}, 'links': [{'rel': 'alternate', 'type': 'text/html', 'href': 'http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp'}], 'link': 'http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp', 'summary': 'Before man travels to Mars, NASA hopes to design new engines\n that will let us fly through the Solar System more quickly. The proposed\n VASIMR engine would do that.', 'summary_detail': {'type': 'text/html', 'language': None, 'base': '', 'value': 'Before man travels to Mars, NASA hopes to design new engines\n that will let us fly through the Solar System more quickly. The proposed\n VASIMR engine would do that.'}, 'published': 'Tue, 27 May 2003 08:37:32 GMT', 'published_parsed': time.struct_time(tm_year=2003, tm_mon=5, tm_mday=27, tm_hour=8, tm_min=37, tm_sec=32, tm_wday=1, tm_yday=147, tm_isdst=0), 'id': 'http://liftoff.msfc.nasa.gov/2003/05/27.html#item571', 'guidislink': False}, +{'title': "Astronauts' Dirty Laundry", 'title_detail': {'type': 'text/plain', 'language': None, 'base': '', 'value': "Astronauts' Dirty Laundry"}, 'links': [{'rel': 'alternate', 'type': 'text/html', 'href': 'http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp'}], 'link': 'http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp', 'summary': 'Compared to earlier spacecraft, the International Space\n Station has many luxuries, but laundry facilities are not one of them.\n Instead, astronauts have other options.', 'summary_detail': {'type': 'text/html', 'language': None, 'base': '', 'value': 'Compared to earlier spacecraft, the International Space\n Station has many luxuries, but laundry facilities are not one of them.\n Instead, astronauts have other options.'}, 'published': 'Tue, 20 May 2003 08:56:02 GMT', 'published_parsed': time.struct_time(tm_year=2003, tm_mon=5, tm_mday=20, tm_hour=8, tm_min=56, tm_sec=2, tm_wday=1, tm_yday=140, tm_isdst=0), 'id': 'http://liftoff.msfc.nasa.gov/2003/05/20.html#item570', 'guidislink': False} \ No newline at end of file diff --git a/tests/unit/data/test_html.html b/tests/unit/data/test_html.html new file mode 100644 index 00000000..f539421b --- /dev/null +++ b/tests/unit/data/test_html.html @@ -0,0 +1,5 @@ + News feed

Liftoff News

+

Star City

News Link

Date: Tue, 03 Jun 2003 09:39:21 GMT

How do Americans get ready to work with Russians aboard the + International Space Station? They take a crash course in culture, language + and protocol at Russia's Star City. +

\ No newline at end of file diff --git a/tests/unit/data/test_incorrect.rss b/tests/unit/data/test_incorrect.rss new file mode 100644 index 00000000..040a2d37 --- /dev/null +++ b/tests/unit/data/test_incorrect.rss @@ -0,0 +1,4 @@ + + + Liftoff News + \ No newline at end of file diff --git a/tests/unit/data/test_links.txt b/tests/unit/data/test_links.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/data/test_parsing_news.txt b/tests/unit/data/test_parsing_news.txt new file mode 100644 index 00000000..528ae42b --- /dev/null +++ b/tests/unit/data/test_parsing_news.txt @@ -0,0 +1 @@ +[{'Feed': 'Liftoff News', 'Title': 'Star City', 'Date': 'Tue, 03 Jun 2003 09:39:21 GMT', 'Link': 'http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp', 'Description': "How do Americans get ready to work with Russians aboard the\n International Space Station? They take a crash course in culture, language\n and protocol at Russia's Star City.", 'Links': []}] \ No newline at end of file diff --git a/tests/unit/data/test_pretty_rss.txt b/tests/unit/data/test_pretty_rss.txt new file mode 100644 index 00000000..9625d77a --- /dev/null +++ b/tests/unit/data/test_pretty_rss.txt @@ -0,0 +1,14 @@ + +Feed: Liftoff News + +Title: Star City +Date: Tue, 03 Jun 2003 09:39:21 GMT +Link: http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp + +How do Americans get ready to work with Russians aboard the + International Space Station? They take a crash course in culture, language + and protocol at Russia's Star City. + +Links: +[1]: http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp(link) + diff --git a/tests/unit/data/test_result_of_parsing_rss.txt b/tests/unit/data/test_result_of_parsing_rss.txt new file mode 100644 index 00000000..f748dbc2 --- /dev/null +++ b/tests/unit/data/test_result_of_parsing_rss.txt @@ -0,0 +1 @@ +{'feed': {'title': 'Liftoff News', 'title_detail': {'type': 'text/plain', 'language': None, 'base': '', 'value': 'Liftoff News'}, 'links': [{'rel': 'alternate', 'type': 'text/html', 'href': 'http://liftoff.msfc.nasa.gov/'}], 'link': 'http://liftoff.msfc.nasa.gov/', 'subtitle': 'Liftoff to Space Exploration.', 'subtitle_detail': {'type': 'text/html', 'language': None, 'base': '', 'value': 'Liftoff to Space Exploration.'}, 'language': 'en-us', 'published': 'Tue, 10 Jun 2003 04:00:00 GMT', 'published_parsed': time.struct_time(tm_year=2003, tm_mon=6, tm_mday=10, tm_hour=4, tm_min=0, tm_sec=0, tm_wday=1, tm_yday=161, tm_isdst=0), 'updated': 'Tue, 10 Jun 2003 09:41:01 GMT', 'updated_parsed': time.struct_time(tm_year=2003, tm_mon=6, tm_mday=10, tm_hour=9, tm_min=41, tm_sec=1, tm_wday=1, tm_yday=161, tm_isdst=0), 'docs': 'http://blogs.law.harvard.edu/tech/rss', 'generator_detail': {'name': 'Weblog Editor 2.0'}, 'generator': 'Weblog Editor 2.0', 'authors': [{'email': 'editor@example.com'}], 'author': 'editor@example.com', 'author_detail': {'email': 'editor@example.com'}, 'publisher': 'webmaster@example.com', 'publisher_detail': {'email': 'webmaster@example.com'}}, 'entries': [{'title': 'Star City', 'title_detail': {'type': 'text/plain', 'language': None, 'base': '', 'value': 'Star City'}, 'links': [{'rel': 'alternate', 'type': 'text/html', 'href': 'http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp'}], 'link': 'http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp', 'summary': "How do Americans get ready to work with Russians aboard the\n International Space Station? They take a crash course in culture, language\n and protocol at Russia's Star City.", 'summary_detail': {'type': 'text/html', 'language': None, 'base': '', 'value': "How do Americans get ready to work with Russians aboard the\n International Space Station? They take a crash course in culture, language\n and protocol at Russia's Star City."}, 'published': 'Tue, 03 Jun 2003 09:39:21 GMT', 'published_parsed': time.struct_time(tm_year=2003, tm_mon=6, tm_mday=3, tm_hour=9, tm_min=39, tm_sec=21, tm_wday=1, tm_yday=154, tm_isdst=0), 'id': 'http://liftoff.msfc.nasa.gov/2003/06/03.html#item573', 'guidislink': False}, {'title': 'Space Exploration', 'title_detail': {'type': 'text/plain', 'language': None, 'base': '', 'value': 'Space Exploration'}, 'links': [{'rel': 'alternate', 'type': 'text/html', 'href': 'http://liftoff.msfc.nasa.gov/'}], 'link': 'http://liftoff.msfc.nasa.gov/', 'summary': 'Sky watchers in Europe, Asia, and parts of Alaska and Canada\n will experience a partial eclipse of the Sun on Saturday, May 31st.', 'summary_detail': {'type': 'text/html', 'language': None, 'base': '', 'value': 'Sky watchers in Europe, Asia, and parts of Alaska and Canada\n will experience a partial eclipse of the Sun on Saturday, May 31st.'}, 'published': 'Fri, 30 May 2003 11:06:42 GMT', 'published_parsed': time.struct_time(tm_year=2003, tm_mon=5, tm_mday=30, tm_hour=11, tm_min=6, tm_sec=42, tm_wday=4, tm_yday=150, tm_isdst=0), 'id': 'http://liftoff.msfc.nasa.gov/2003/05/30.html#item572', 'guidislink': False}, {'title': 'The Engine That Does More', 'title_detail': {'type': 'text/plain', 'language': None, 'base': '', 'value': 'The Engine That Does More'}, 'links': [{'rel': 'alternate', 'type': 'text/html', 'href': 'http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp'}], 'link': 'http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp', 'summary': 'Before man travels to Mars, NASA hopes to design new engines\n that will let us fly through the Solar System more quickly. The proposed\n VASIMR engine would do that.', 'summary_detail': {'type': 'text/html', 'language': None, 'base': '', 'value': 'Before man travels to Mars, NASA hopes to design new engines\n that will let us fly through the Solar System more quickly. The proposed\n VASIMR engine would do that.'}, 'published': 'Tue, 27 May 2003 08:37:32 GMT', 'published_parsed': time.struct_time(tm_year=2003, tm_mon=5, tm_mday=27, tm_hour=8, tm_min=37, tm_sec=32, tm_wday=1, tm_yday=147, tm_isdst=0), 'id': 'http://liftoff.msfc.nasa.gov/2003/05/27.html#item571', 'guidislink': False}, {'title': "Astronauts' Dirty Laundry", 'title_detail': {'type': 'text/plain', 'language': None, 'base': '', 'value': "Astronauts' Dirty Laundry"}, 'links': [{'rel': 'alternate', 'type': 'text/html', 'href': 'http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp'}], 'link': 'http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp', 'summary': 'Compared to earlier spacecraft, the International Space\n Station has many luxuries, but laundry facilities are not one of them.\n Instead, astronauts have other options.', 'summary_detail': {'type': 'text/html', 'language': None, 'base': '', 'value': 'Compared to earlier spacecraft, the International Space\n Station has many luxuries, but laundry facilities are not one of them.\n Instead, astronauts have other options.'}, 'published': 'Tue, 20 May 2003 08:56:02 GMT', 'published_parsed': time.struct_time(tm_year=2003, tm_mon=5, tm_mday=20, tm_hour=8, tm_min=56, tm_sec=2, tm_wday=1, tm_yday=140, tm_isdst=0), 'id': 'http://liftoff.msfc.nasa.gov/2003/05/20.html#item570', 'guidislink': False}], 'bozo': 0, 'encoding': 'utf-8', 'version': 'rss20', 'namespaces': {}} \ No newline at end of file diff --git a/tests/unit/test_conversion_html.py b/tests/unit/test_conversion_html.py new file mode 100644 index 00000000..c2efc5ee --- /dev/null +++ b/tests/unit/test_conversion_html.py @@ -0,0 +1,32 @@ +"""Tests for rssreader.format_conversion.conversion_html module""" + +import unittest +from os import path + +from rssreader.parser import feed_parser +from rssreader.format_conversion import conversion_html as cv_html + + +class HtmlConversionTestCase(unittest.TestCase): + """Test cases for HtmlConversion class""" + def setUp(self): + with open( + path.join("tests", "unit", "data", "test_correct.rss"), + "r") as rf: + self.url = rf.read() + + self.test_feed = feed_parser.RssParser(self.url, 1) + self.test_news = self.test_feed.parse_news() + self.test_html = cv_html.HtmlConversion(self.test_news) + + def test_conversion_to_html(self): + """Function conversion_to_html test""" + with open( + path.join("tests", "unit", "data", "test_html.html"), + "r") as rf: + answer = rf.read() + self.assertEqual(self.test_html.conversion_to_html(), answer) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_conversion_json.py b/tests/unit/test_conversion_json.py new file mode 100644 index 00000000..f8cceb23 --- /dev/null +++ b/tests/unit/test_conversion_json.py @@ -0,0 +1,38 @@ +"""Tests for rssreader.format_conversion.conversion_json module""" + +import unittest +import feedparser +from os import path + +from rssreader.format_conversion import conversion_json +from rssreader.parser import feed_parser + + +class JsonConversionTestCase(unittest.TestCase): + """Test cases for JsonConversion class""" + def setUp(self): + with open( + path.join("tests", "unit", "data", "test_correct.rss"), + "r") as rf: + self.url = rf.read() + + self.test_feed_json = conversion_json.JsonConversion(self.url, 1) + self.test_feed = feed_parser.RssParser(self.url, 1) + + def test_convert_to_json(self): + """Function test_convert_to_json test""" + with open(path.join( + "tests", + "unit", + "data", + "test_convert_to_json.txt"), "r") as rf: + answer = rf.read() + + self.assertEqual( + self.test_feed_json.convert_to_json(self.test_feed.parse_news()), + answer + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_exceptions.py b/tests/unit/test_exceptions.py new file mode 100644 index 00000000..65b15c98 --- /dev/null +++ b/tests/unit/test_exceptions.py @@ -0,0 +1,23 @@ +"""Tests for rssreader.exceptions.all_exceptions module""" + +import unittest + + +from rssreader.exceptions import all_exceptions as all_exc + + +class ThrowingArgumentParserTestCase(unittest.TestCase): + """Test cases for ThrowingArgumentParser class""" + def setUp(self): + self.args_test = all_exc.ThrowingArgumentParser("test", "test") + + def test_error(self): + """Function test_error test""" + self.assertRaises( + all_exc.ArgumentParserError, + lambda: self.args_test.error("test") + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_feed_parser.py b/tests/unit/test_feed_parser.py new file mode 100644 index 00000000..2ce7beb9 --- /dev/null +++ b/tests/unit/test_feed_parser.py @@ -0,0 +1,163 @@ +"""Tests for rssreader.parser.feed_parser module""" + +import unittest +import feedparser +from unittest.mock import patch +from os import path + +from rssreader.parser import feed_parser +from rssreader.exceptions import all_exceptions + + +class RssParserTestCase(unittest.TestCase): + """Test cases for RssParser class""" + def setUp(self): + with open( + path.join("tests", "unit", "data", "test_correct.rss"), + "r") as rf: + self.url = rf.read() + + self.test_feed = feed_parser.RssParser(self.url, 1) + + with open(path.join( + "tests", + "unit", + "data", + "test_result_of_parsing_rss.txt"), "r") as rf: + self.test_result_rss = rf.read() + + def test__check_limit(self): + """Function _check_limit test""" + test_list = [1, 2, None, 9999999, -10] + answer_list = [1, 2, 4, 4, 0] + + with open(path.join( + "tests", + "unit", + "data", + "test_entries.txt"), "r") as rf: + enrtries = rf.readlines() + + for test_value, answer in zip(test_list, answer_list): + test_feed = feed_parser.RssParser(self.url, test_value) + self.assertEqual(test_feed._check_limit(enrtries), answer) + + def test__get_rss_from_url(self): + """Function _get_rss_from_url test""" + with open(path.join( + "tests", + "unit", + "data", + "test_incorrect.rss"), "r") as rf: + incorrect_rss = rf.read() + + self.assertEqual( + str(self.test_feed._get_rss_from_url()), + self.test_result_rss + ) + + self.assertRaises( + all_exceptions.InvalidLinkOrInternetConnectionError, + lambda: feed_parser.RssParser(incorrect_rss)._get_rss_from_url() + ) + + def test___find_type_link(self): + """Function __find_type_link test""" + with open(path.join( + "tests", + "unit", + "data", + "test_links.txt"), "r") as rf_links: + list_links = rf_links.readlines() + + list_answer = [ + "(image)", "(image)", "(image)", + "(image)", "(image)", "(image)", + "(video)", "(video)", "(video)" + ] + + for link, answer in zip(list_links, list_answer): + self.assertEqual( + self.test_feed._RssParser__find_type_link(link), + answer + ) + + @patch("rssreader.parser.feed_parser.RssParser._check_limit") + @patch("rssreader.parser.feed_parser.RssParser._get_rss_from_url") + def test_parse_news(self, rss_mock, limit_mock): + """Function test_parse_news test + + self.limit = 1 + self.html_news = data/test_result_of_parsing_rss.txt + + """ + rss_mock.return_value = feedparser.parse(self.url) + limit_mock.return_value = 1 + + with open(path.join( + "tests", + "unit", + "data", + "test_parsing_news.txt"), "r") as rf: + test_parsing_news = rf.read() + + self.assertEqual(str(self.test_feed.parse_news()), test_parsing_news) + + @patch("rssreader.parser.feed_parser.RssParser._RssParser__find_type_link") + def test___make_pretty_links(self, type_link_mock): + """Function __make_pretty_links test + + self.__find_type_link(element) = (test) + + """ + type_link_mock.return_value = "(test)" + link = "https://news.tut.by/world/662695.html" + + with open(path.join( + "tests", + "unit", + "data", + "test_links.txt"), "r") as rf_links: + links = rf_links.readlines() + links = [link.rstrip() for link in links] + + with open(path.join( + "tests", + "unit", + "data", + "test_answer_links.txt"), "r") as rf_links: + answer = rf_links.read() + + self.assertEqual( + self.test_feed._RssParser__make_pretty_links(link, links), + answer + ) + + @patch( + "rssreader.parser.feed_parser.RssParser._RssParser__make_pretty_links") + def test_make_pretty_rss(self, pretty_links): + """Function test_make_pretty_rss test + + self.__make_pretty_links(link, links) = + [1]: http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp(link) + + """ + link_part_1 = "[1]: http://liftoff.msfc.nasa.gov" + link_part_2 = "/news/2003/news-starcity.asp(link)" + pretty_links.return_value = link_part_1 + link_part_2 + + with open(path.join( + "tests", + "unit", + "data", + "test_pretty_rss.txt"), "r") as rf: + answer = rf.read() + + self.assertEqual( + self.test_feed.make_pretty_rss(self.test_feed.parse_news()), + answer + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_news_database.py b/tests/unit/test_news_database.py new file mode 100644 index 00000000..212c404c --- /dev/null +++ b/tests/unit/test_news_database.py @@ -0,0 +1,52 @@ +"""Tests for rssreader.db.news_database module""" +import unittest +from unittest.mock import patch + +from rssreader.db import news_database as db + +NAME_FUNCTION_MOCK = "_NewsDatabase__search_caching_news" + + +class NewsDatabaseTestCase(unittest.TestCase): + """Test cases for NewsDatabase class""" + def setUp(self): + self.test_mongo = db.NewsDatabase("test_url", 1, 1) + + def test___template_news(self): + """Function __template_news test""" + self.assertEqual( + self.test_mongo._NewsDatabase__template_news(1, {1: 1}), + {"source": "test_url", "date": 1, "news": {1: 1}}) + + def test___convert_date_format(self): + """Function __convert_date_format test""" + list_tests = [ + "Wed, 27 Nov 2018 09:56:11 GMT", + "Wed, 7 Dec 2017 09:56:11 GMT", + "Wed, 5 Apr 2016 09:56:11 +0300", + "Wed, 7 Sep 2015 09:56:11 -0400", + "Wed, 10 Nov 2014 09:56:11 +0000" + ] + list_answers = [ + "%a, %d %b %Y %H:%M:%S %Z", + "%a, %d %b %Y %H:%M:%S %Z", + "%a, %d %b %Y %H:%M:%S %z", + "%a, %d %b %Y %H:%M:%S %z", + "%a, %d %b %Y %H:%M:%S %z" + ] + + for test, answer in zip(list_tests, list_answers): + self.assertEqual( + self.test_mongo._NewsDatabase__convert_date_format(test), + answer) + + @patch(f"rssreader.db.news_database.NewsDatabase.{NAME_FUNCTION_MOCK}") + def test_show_news(self, find_mock): + """Function show_news test""" + find_mock.return_value = [{"test": 1, "news": {"test": 777}}] + + self.assertEqual(self.test_mongo.show_news(), [{"test": 777}]) + + +if __name__ == "__main__": + unittest.main()