diff --git a/.github/ISSUE_TEMPLATE/issue.md b/.github/ISSUE_TEMPLATE/issue.md index 7b9856c..819b41e 100644 --- a/.github/ISSUE_TEMPLATE/issue.md +++ b/.github/ISSUE_TEMPLATE/issue.md @@ -3,7 +3,7 @@ name: Issue about: Log bugs found while using Manga Tagger title: "[BUG]" labels: bug -assignees: Inpacchi +assignees: sanchoblaze --- diff --git a/.gitignore b/.gitignore index 58b7cbb..4e32a43 100644 --- a/.gitignore +++ b/.gitignore @@ -135,3 +135,4 @@ dmypy.json library/ downloads/ wiki/ +manga_tagger.db diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..764bd0d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,33 @@ +FROM python:3.11.4-alpine3.18 +LABEL authors="sanchoblaze " + +RUN apk add gcc python3-dev python3-tkinter build-base linux-headers git +RUN python -m pip install --upgrade pip + +RUN mkdir /downloads +RUN mkdir /library +RUN mkdir /config +RUN mkdir /manga-tagger + +# COPY settings.json /manga-tagger/ +COPY MangaTaggerLib /manga-tagger/MangaTaggerLib +COPY MangaTagger.py /manga-tagger/ +COPY requirements.txt /manga-tagger/ + +VOLUME /downloads +VOLUME /library +VOLUME /config + +WORKDIR /manga-tagger/ + +RUN pip install --no-cache -r requirements.txt + +RUN groupadd -r -g 100 abc && useradd -r -g abc -u 99 abc +RUN chown -R abc /downloads +RUN chown -R abc /library +VOLUME chown -R abc /config + +USER abc:abc + +# RUN python MangaTagger.py +CMD [ "python", "./MangaTagger.py"] diff --git a/MangaTaggerLib/MangaTaggerLib.py b/MangaTaggerLib/MangaTaggerLib.py index f776ef0..08a722d 100644 --- a/MangaTaggerLib/MangaTaggerLib.py +++ b/MangaTaggerLib/MangaTaggerLib.py @@ -12,7 +12,7 @@ from MangaTaggerLib._version import __version__ from MangaTaggerLib.api import MTJikan, AniList -from MangaTaggerLib.database import MetadataTable, ProcFilesTable, ProcSeriesTable +from MangaTaggerLib.database import MangaTable, FilesTable from MangaTaggerLib.errors import FileAlreadyProcessedError, FileUpdateNotRequiredError, UnparsableFilenameError, \ MangaNotFoundError, MangaMatchedException from MangaTaggerLib.models import Metadata @@ -22,7 +22,7 @@ # Global Variable Declaration LOG = logging.getLogger('MangaTaggerLib.MangaTaggerLib') -CURRENTLY_PENDING_DB_SEARCH = set() +# CURRENTLY_PENDING_DB_SEARCH = set() CURRENTLY_PENDING_RENAME = set() @@ -42,6 +42,7 @@ def main(): def process_manga_chapter(file_path: Path, event_id): + file_id = None filename = file_path.name directory_path = file_path.parent directory_name = file_path.parent.name @@ -52,7 +53,7 @@ def process_manga_chapter(file_path: Path, event_id): "original_filename": filename } - LOG.info(f'Now processing "{file_path}"...', extra=logging_info) + LOG.info(f'Now processing "{file_path}"...') LOG.debug(f'filename: {filename}') LOG.debug(f'directory_path: {directory_path}') @@ -64,7 +65,7 @@ def process_manga_chapter(file_path: Path, event_id): new_filename = manga_details[0] LOG.debug(f'new_filename: {new_filename}') except TypeError: - LOG.warning(f'Manga Tagger was unable to process "{file_path}"', extra=logging_info) + LOG.warning(f'Manga Tagger was unable to process "{file_path}"') return None manga_library_dir = Path(AppSettings.library_dir, directory_name) @@ -77,60 +78,60 @@ def process_manga_chapter(file_path: Path, event_id): new_file_path = Path(manga_library_dir, new_filename) LOG.debug(f'new_file_path: {new_file_path}') - LOG.info(f'Checking for current and previously processed files with filename "{new_filename}"...', - extra=logging_info) + LOG.info(f'Checking for current and previously processed files with filename "{new_filename}"...') if AppSettings.mode_settings is None or AppSettings.mode_settings['rename_file']: try: # Multithreading Optimization if new_file_path in CURRENTLY_PENDING_RENAME: LOG.info(f'A file is currently being renamed under the filename "{new_filename}". Locking ' - f'{file_path} from further processing until this rename action is complete...', - extra=logging_info) + f'{file_path} from further processing until this rename action is complete...') while new_file_path in CURRENTLY_PENDING_RENAME: time.sleep(1) LOG.info(f'The file being renamed to "{new_file_path}" has been completed. Unlocking ' - f'"{new_filename}" for file rename processing.', extra=logging_info) + f'"{new_filename}" for file rename processing.') else: LOG.info(f'No files currently currently being processed under the filename ' - f'"{new_filename}". Locking new filename for processing...', extra=logging_info) + f'"{new_filename}". Locking new filename for processing...') CURRENTLY_PENDING_RENAME.add(new_file_path) - rename_action(file_path, new_file_path, directory_name, manga_details[1], logging_info) + file_id = rename_action(file_path, new_file_path, directory_name, manga_details[1], logging_info) except (FileExistsError, FileUpdateNotRequiredError, FileAlreadyProcessedError) as e: - LOG.exception(e, extra=logging_info) + LOG.exception(e) CURRENTLY_PENDING_RENAME.remove(new_file_path) return - # More Multithreading Optimization - if directory_name in ProcSeriesTable.processed_series: - LOG.info(f'"{directory_name}" has been processed as a searched series and will continue processing.', - extra=logging_info) - else: - if directory_name in CURRENTLY_PENDING_DB_SEARCH: - LOG.info(f'"{directory_name}" has not been processed as a searched series but is currently pending ' - f'a database search. Suspending further processing until database search has finished...', - extra=logging_info) + metadata_tagger(directory_name, manga_details[1], logging_info, file_id, new_file_path) + LOG.info(f'Processing on "{new_file_path}" has finished.') - while directory_name in CURRENTLY_PENDING_DB_SEARCH: - time.sleep(1) - LOG.info(f'"{directory_name}" has been processed as a searched series and will now be unlocked for ' - f'processing.', extra=logging_info) - else: - LOG.info(f'"{directory_name}" has not been processed as a searched series nor is it currently pending ' - f'a database search. Locking series from being processing until database has been searched...', - extra=logging_info) - CURRENTLY_PENDING_DB_SEARCH.add(directory_name) +def tag_manga_chapter(file_id): + result = FilesTable.get_by_id(file_id) + + filename = result['new_filename'] + directory_name = result['series_title'] + + logging_info = { + 'file_id': file_id, + 'manga_title': directory_name, + "filename": filename + } + + LOG.info(f'Adding meta data to file id: {file_id}.') + + manga_library_dir = Path(AppSettings.library_dir, directory_name) + LOG.debug(f'Manga Library Directory: {manga_library_dir}') + new_file_path = Path(manga_library_dir, filename) + LOG.debug(f'new_file_path: {new_file_path}') - metadata_tagger(directory_name, manga_details[1], logging_info, new_file_path) - LOG.info(f'Processing on "{new_file_path}" has finished.', extra=logging_info) + metadata_tagger(directory_name, result['chapter_number'], logging_info, file_id, new_file_path) + LOG.info(f'Processing on "{new_file_path}" has finished.') def file_renamer(filename, logging_info): - LOG.info(f'Attempting to rename "{filename}"...', extra=logging_info) + LOG.info(f'Attempting to rename "{filename}"...') # Parse the manga title and chapter name/number (this depends on where the manga is downloaded from) try: @@ -138,9 +139,9 @@ def file_renamer(filename, logging_info): raise UnparsableFilenameError(filename, '-.-') filename = filename.split(' -.- ') - LOG.info(f'Filename was successfully parsed as {filename}.', extra=logging_info) + LOG.info(f'Filename was successfully parsed as {filename}.') except UnparsableFilenameError as ufe: - LOG.exception(ufe, extra=logging_info) + LOG.exception(ufe) return None manga_title: str = filename[0] @@ -160,7 +161,10 @@ def file_renamer(filename, logging_info): chapter_title = chapter_title.replace(' ', '') - if 'chapter' in chapter_title: + if 'vol' in chapter_title: + delimiter = 'vol' + delimiter_index = 3 + elif 'chapter' in chapter_title: delimiter = 'chapter' delimiter_index = 7 elif 'ch.' in chapter_title: @@ -175,7 +179,7 @@ def file_renamer(filename, logging_info): else: raise UnparsableFilenameError(filename, 'ch/chapter') except UnparsableFilenameError as ufe: - LOG.exception(ufe, extra=logging_info) + LOG.exception(ufe) return None except MangaMatchedException: if 'chapter' in chapter_title: @@ -194,6 +198,8 @@ def file_renamer(filename, logging_info): LOG.debug(f'Length: {len(chapter_title)}') chapter_number = '' + chapter_title = chapter_title.replace(' ', '') + LOG.debug(f'chapter_title: {chapter_title}') while i < len(chapter_title): substring = chapter_title[i] LOG.debug(f'substring: {substring}') @@ -219,23 +225,24 @@ def file_renamer(filename, logging_info): logging_info['chapter_number'] = chapter_number logging_info['new_filename'] = filename - LOG.info(f'File will be renamed to "{filename}".', extra=logging_info) + LOG.info(f'File will be renamed to "{filename}".') return filename, chapter_number def rename_action(current_file_path: Path, new_file_path: Path, manga_title, chapter_number, logging_info): chapter_number = chapter_number.replace('.', '-') - results = ProcFilesTable.search(manga_title, chapter_number) + results = FilesTable.search(manga_title, chapter_number) LOG.debug(f'Results: {results}') # If the series OR the chapter has not been processed if results is None: LOG.info(f'"{manga_title}" chapter {chapter_number} has not been processed before. ' - f'Proceeding with file rename...', extra=logging_info) - ProcFilesTable.insert_record_and_rename(current_file_path, new_file_path, manga_title, chapter_number, - logging_info) + f'Proceeding with file rename...') + file_id = FilesTable.insert_record_and_rename(current_file_path, new_file_path, manga_title, chapter_number, + logging_info) else: + file_id = results['file_id'] versions = ['v2', 'v3', 'v4', 'v5'] existing_old_filename = results['old_filename'] @@ -243,39 +250,42 @@ def rename_action(current_file_path: Path, new_file_path: Path, manga_title, cha # If currently processing file has the same name as an existing file if existing_current_filename == new_file_path.name: - # If currently processing file has a version in it's filename - if any(version in current_file_path.name.lower() for version in versions): + # Check if file was processed successfully + if results['processed_date'] is None: + LOG.info(f'File previously failed to renamed, trying again.') + FilesTable.update_record_and_rename(results, current_file_path, new_file_path, logging_info) + # If currently processing file has a version in its filename + elif any(version in current_file_path.name.lower() for version in versions): # If the version is newer than the existing file if compare_versions(existing_old_filename, current_file_path.name): LOG.info(f'Newer version of "{manga_title}" chapter {chapter_number} has been found. Deleting ' - f'existing file and proceeding with file rename...', extra=logging_info) + f'existing file and proceeding with file rename...') new_file_path.unlink() - LOG.info(f'"{new_file_path.name}" has been deleted! Proceeding to rename new file...', - extra=logging_info) - ProcFilesTable.update_record_and_rename(results, current_file_path, new_file_path, logging_info) + LOG.info(f'"{new_file_path.name}" has been deleted! Proceeding to rename new file...') + FilesTable.update_record_and_rename(results, current_file_path, new_file_path, logging_info) else: LOG.warning(f'"{current_file_path.name}" was not renamed due being the exact same as the ' - f'existing chapter; file currently being processed will be deleted', - extra=logging_info) + f'existing chapter; file currently being processed will be deleted') current_file_path.unlink() raise FileUpdateNotRequiredError(current_file_path.name) # If the current file doesn't have a version in it's filename, but the existing file does elif any(version in existing_old_filename.lower() for version in versions): LOG.warning(f'"{current_file_path.name}" was not renamed due to not being an updated version ' - f'of the existing chapter; file currently being processed will be deleted', - extra=logging_info) + f'of the existing chapter; file currently being processed will be deleted') current_file_path.unlink() raise FileUpdateNotRequiredError(current_file_path.name) # If all else fails else: LOG.warning(f'No changes have been found for "{existing_current_filename}"; file currently being ' - f'processed will be deleted', extra=logging_info) + f'processed will be deleted') current_file_path.unlink() raise FileAlreadyProcessedError(current_file_path.name) - LOG.info(f'"{new_file_path.name}" will be unlocked for any pending processes.', extra=logging_info) + LOG.info(f'"{new_file_path.name}" will be unlocked for any pending processes.') CURRENTLY_PENDING_RENAME.remove(new_file_path) + return file_id + def compare_versions(old_filename: str, new_filename: str): old_version = 0 @@ -313,92 +323,75 @@ def compare_versions(old_filename: str, new_filename: str): return False -def metadata_tagger(manga_title, manga_chapter_number, logging_info, manga_file_path=None): - manga_search = None +def metadata_tagger(manga_title, manga_chapter_number, logging_info, file_id, manga_file_path=None): db_exists = True - retries = 0 - - LOG.info(f'Table search value is "{manga_title}"', extra=logging_info) - while manga_search is None: - if retries == 0: - LOG.info('Searching manga_metadata for manga title by search value...', extra=logging_info) - manga_search = MetadataTable.search_by_search_value(manga_title) - retries = 1 - elif retries == 1: - LOG.info('Searching manga_metadata for regular manga title...', extra=logging_info) - manga_search = MetadataTable.search_by_series_title(manga_title) - retries = 2 - elif retries == 2: - LOG.info('Searching manga_metadata for English manga title...', extra=logging_info) - manga_search = MetadataTable.search_by_series_title_eng(manga_title) - retries = 3 - else: # The manga is not in the database, so ping the API and create the database - LOG.info('Manga was not found in the database; resorting to Jikan API.', extra=logging_info) + LOG.info(f'Table search value is "{manga_title} and file id is "{file_id}"') + manga_search = MangaTable.search(manga_title) + if manga_search is None: + LOG.info('Manga was not found in the database; resorting to Jikan API.') + while manga_search is None: try: manga_search = MTJikan().search('manga', manga_title) except (APIException, ConnectionError) as e: - LOG.warning(e, extra=logging_info) + LOG.warning(e) LOG.warning('Manga Tagger has unintentionally breached the API limits on Jikan. Waiting 60s to clear ' 'all rate limiting limits...') time.sleep(60) manga_search = MTJikan().search('manga', manga_title) - db_exists = False + db_exists = False - if db_exists: - if manga_title in ProcSeriesTable.processed_series: - LOG.info(f'Found an entry in manga_metadata for "{manga_title}".', extra=logging_info) - else: - LOG.info(f'Found an entry in manga_metadata for "{manga_title}"; unlocking series for processing.', - extra=logging_info) - ProcSeriesTable.processed_series.add(manga_title) - CURRENTLY_PENDING_DB_SEARCH.remove(manga_title) + LOG.debug(f"API Results for {manga_title}: {manga_search}") + if db_exists: + manga_id = manga_search['manga_id'] manga_metadata = Metadata(manga_title, logging_info, details=manga_search) logging_info['metadata'] = manga_metadata.__dict__ else: manga_found = False try: - for result in manga_search['results']: - if result['type'].lower() == 'manga': - manga_id = result['mal_id'] - anilist_titles = construct_anilist_titles( - AniList.search_for_manga_title_by_mal_id(manga_id, logging_info)['title']) + for result in manga_search['data']: + if result['type'].lower() == 'manga' or result['type'].lower() == 'one-shot': + mal_id = result['mal_id'] + anilist_results = AniList.search_for_manga_title_by_mal_id(mal_id, logging_info) + if anilist_results is None: + continue + anilist_titles = construct_anilist_titles(anilist_results['title']) logging_info['anilist_titles'] = anilist_titles try: - jikan_details = MTJikan().manga(manga_id) + jikan_details = MTJikan().manga(mal_id) except (APIException, ConnectionError) as e: - LOG.warning(e, extra=logging_info) + LOG.warning(e) LOG.warning( 'Manga Tagger has unintentionally breached the API limits on Jikan. Waiting 60s to clear ' 'all rate limiting limits...') time.sleep(60) - jikan_details = MTJikan().manga(manga_id) + jikan_details = MTJikan().manga(mal_id) - jikan_titles = construct_jikan_titles(jikan_details) + jikan_titles = construct_jikan_titles(jikan_details['data']) logging_info['jikan_titles'] = jikan_titles - LOG.info(f'Comparing titles found for "{manga_title}"...', extra=logging_info) + LOG.info(f'Comparing titles found for "{manga_title}"...') comparison_values = compare_titles(manga_title, jikan_titles, anilist_titles, logging_info) if comparison_values is None: continue elif any(value > .8 for value in comparison_values): - LOG.info(f'Match found for {manga_title}', extra=logging_info) + LOG.info(f'Match found for {manga_title}') manga_found = True break elif any(value > .5 for value in comparison_values): - jikan_details = MTJikan().manga(result['mal_id']) + jikan_details = MTJikan().manga(mal_id) jikan_authors = jikan_details['authors'] - anilist_authors = AniList.search_staff_by_mal_id(result['mal_id'], + anilist_authors = AniList.search_staff_by_mal_id(mal_id, logging_info)['staff']['edges'] logging_info['jikan_authors'] = jikan_authors logging_info['anilist_authors'] = anilist_authors LOG.info(f'Match found for {manga_title} with 50% likelihood; now checking ' - f'authors for further veritifcation', extra=logging_info) + f'authors for further veritifcation') if compare_authors(jikan_authors, anilist_authors, logging_info): LOG.info(f'Authors matched up for {manga_title}; proceeding with processing') @@ -407,32 +400,34 @@ def metadata_tagger(manga_title, manga_chapter_number, logging_info, manga_file_ if not manga_found: raise MangaNotFoundError(manga_title) except MangaNotFoundError as mnfe: - LOG.exception(mnfe, extra=logging_info) + LOG.exception(mnfe) return - LOG.info(f'ID for "{manga_title}" found as "{manga_id}".', extra=logging_info) + LOG.info(f'ID for "{manga_title}" found as "{mal_id}".') - anilist_details = AniList.search_staff_by_mal_id(manga_id, logging_info) + anilist_details = AniList.search_staff_by_mal_id(mal_id, logging_info) LOG.debug(f'jikan_details: {jikan_details}') LOG.debug(f'anilist_details: {anilist_details}') - manga_metadata = Metadata(manga_title, logging_info, jikan_details, anilist_details) + manga_metadata = Metadata(manga_title, logging_info, jikan_details['data'], anilist_details) logging_info['metadata'] = manga_metadata.__dict__ if AppSettings.mode_settings is None or ('database_insert' in AppSettings.mode_settings.keys() and AppSettings.mode_settings['database_insert']): - MetadataTable.insert(manga_metadata, logging_info) + manga_id = MangaTable.insert(manga_metadata, logging_info) + manga_metadata.manga_id = manga_id + LOG.info(f'Retrieved metadata for "{manga_title}" from the Anilist and MyAnimeList APIs; ' - f'now unlocking series for processing!', extra=logging_info) - ProcSeriesTable.processed_series.add(manga_title) - CURRENTLY_PENDING_DB_SEARCH.remove(manga_title) + f'now unlocking series for processing!') if AppSettings.mode_settings is None or ('write_comicinfo' in AppSettings.mode_settings.keys() and AppSettings.mode_settings['write_comicinfo']): comicinfo_xml = construct_comicinfo_xml(manga_metadata, manga_chapter_number, logging_info) reconstruct_manga_chapter(comicinfo_xml, manga_file_path, logging_info) + FilesTable.add_manga_id(file_id, manga_id) + FilesTable.add_tagged_date(file_id) return manga_metadata @@ -481,7 +476,7 @@ def compare_titles(manga_title: str, jikan_titles: dict, anilist_titles: dict, l comparison_values.append(compare(manga_title, anilist_titles[anilist_key])) logging_info['pre_comparison_values'] = comparison_values - LOG.debug(f'pre_comparison_values: {comparison_values}', extra=logging_info) + LOG.debug(f'pre_comparison_values: {comparison_values}') if not any(value > .69 for value in comparison_values): return None @@ -493,7 +488,7 @@ def compare_titles(manga_title: str, jikan_titles: dict, anilist_titles: dict, l comparison_values.append(compare(jikan_titles[jikan_key], anilist_titles[anilist_key])) logging_info['post_comparison_values'] = comparison_values - LOG.debug(f'post_comparison_values: {comparison_values}', extra=logging_info) + LOG.debug(f'post_comparison_values: {comparison_values}') return comparison_values @@ -513,8 +508,7 @@ def compare_authors(jikan_authors, anilist_authors, logging_info): def construct_comicinfo_xml(metadata, chapter_number, logging_info): - LOG.info(f'Constructing comicinfo object for "{metadata.series_title}", chapter {chapter_number}...', - extra=logging_info) + LOG.info(f'Constructing comicinfo object for "{metadata.series_title}", chapter {chapter_number}...') comicinfo = Element('ComicInfo') @@ -584,8 +578,7 @@ def construct_comicinfo_xml(metadata, chapter_number, logging_info): comicinfo.set('xmlns:xsd', 'http://www.w3.org/2001/XMLSchema') comicinfo.set('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance') - LOG.info(f'Finished creating ComicInfo object for "{metadata.series_title}", chapter {chapter_number}.', - extra=logging_info) + LOG.info(f'Finished creating ComicInfo object for "{metadata.series_title}", chapter {chapter_number}.') return parseString(tostring(comicinfo)).toprettyxml(indent=" ") @@ -594,9 +587,8 @@ def reconstruct_manga_chapter(comicinfo_xml, manga_file_path, logging_info): with ZipFile(manga_file_path, 'a') as zipfile: zipfile.writestr('ComicInfo.xml', comicinfo_xml) except Exception as e: - LOG.exception(e, extra=logging_info) - LOG.warning('Manga Tagger is unfamiliar with this error. Please log an issue for investigation.', - extra=logging_info) + LOG.exception(e) + LOG.warning('Manga Tagger is unfamiliar with this error. Please log an issue for investigation.') return - LOG.info(f'ComicInfo.xml has been created and appended to "{manga_file_path}".', extra=logging_info) + LOG.info(f'ComicInfo.xml has been created and appended to "{manga_file_path}".') diff --git a/MangaTaggerLib/_version.py b/MangaTaggerLib/_version.py index 6a16774..9e28e4d 100644 --- a/MangaTaggerLib/_version.py +++ b/MangaTaggerLib/_version.py @@ -1 +1 @@ -__version__ = '1.1.6-alpha' +__version__ = '2.0.3-alpha' diff --git a/MangaTaggerLib/api.py b/MangaTaggerLib/api.py index bbfa9f0..6b34ac9 100644 --- a/MangaTaggerLib/api.py +++ b/MangaTaggerLib/api.py @@ -18,11 +18,12 @@ def __init__( self.calls_minute = 0 self.last_api_call = datetime.now() - # Rate Limit: 2 requests/second + # Rate Limit: 1 requests/second def _check_rate_seconds(self): + time.sleep(5) last_api_call_delta = (datetime.now() - self.last_api_call).total_seconds() - if self.calls_second > 2 and last_api_call_delta < 1: + if self.calls_second > 1 > last_api_call_delta: time.sleep(1) elif last_api_call_delta > 1: self.calls_second = 0 @@ -32,7 +33,7 @@ def _check_rate_minutes(self): last_api_call_delta = (datetime.now() - self.last_api_call).total_seconds() if self.calls_minute > 30 and last_api_call_delta < 60: - time.sleep(61 - last_api_call_delta) + time.sleep(61) elif last_api_call_delta > 60: self.calls_minute = 0 @@ -49,6 +50,7 @@ def search( self.calls_second += 1 self.calls_minute += 1 self.last_api_call = datetime.now() + search_results = super(MTJikan, self).search(search_type, query, page, parameters) self.session.close() return search_results @@ -76,12 +78,14 @@ def initialize(cls): @classmethod def _post(cls, query, variables, logging_info): + time.sleep(5) try: response = requests.post('https://graphql.anilist.co', json={'query': query, 'variables': variables}) + if "errors" in response.json(): + raise Exception(f"Error: {response.json()['errors']}") except Exception as e: - cls._log.exception(e, extra=logging_info) - cls._log.warning('Manga Tagger is unfamiliar with this error. Please log an issue for investigation.', - extra=logging_info) + cls._log.exception(e) + cls._log.warning('Manga Tagger is unfamiliar with this error. Please log an issue for investigation.') return None cls._log.debug(f'Query: {query}') @@ -92,6 +96,7 @@ def _post(cls, query, variables, logging_info): @classmethod def search_staff_by_mal_id(cls, mal_id, logging_info): + time.sleep(5) query = ''' query search_staff_by_mal_id ($mal_id: Int) { Media (idMal: $mal_id, type: MANGA) { @@ -122,6 +127,7 @@ def search_staff_by_mal_id(cls, mal_id, logging_info): @classmethod def search_for_manga_title_by_mal_id(cls, mal_id, logging_info): + time.sleep(5) query = ''' query search_manga_by_mal_id ($mal_id: Int) { Media (idMal: $mal_id, type: MANGA) { diff --git a/MangaTaggerLib/database.py b/MangaTaggerLib/database.py index 0f4f193..bfc0705 100644 --- a/MangaTaggerLib/database.py +++ b/MangaTaggerLib/database.py @@ -1,278 +1,421 @@ -import logging -import sys -from datetime import datetime -from pathlib import Path -from queue import Queue - -from bson.errors import InvalidDocument -from pymongo import MongoClient -from pymongo.errors import ServerSelectionTimeoutError, DuplicateKeyError - - -class Database: - database_name = None - host_address = None - port = None - username = None - password = None - auth_source = None - server_selection_timeout_ms = None - - _client = None - _database = None - _log = None - - @classmethod - def initialize(cls): - cls._log = logging.getLogger(f'{cls.__module__}.{cls.__name__}') - - if cls.auth_source is None: - cls._client = MongoClient(cls.host_address, - cls.port, - username=cls.username, - password=cls.password, - serverSelectionTimeoutMS=cls.server_selection_timeout_ms) - else: - cls._client = MongoClient(cls.host_address, - cls.port, - username=cls.username, - password=cls.password, - authSource=cls.auth_source, - serverSelectionTimeoutMS=cls.server_selection_timeout_ms) - - try: - cls._log.info('Establishing database connection...') - cls._client.is_mongos - except ServerSelectionTimeoutError as sste: - cls._log.exception(sste) - cls._log.critical('Manga Tagger cannot run without a database connection. Please check the' - 'configuration in settings.json and try again.') - sys.exit(1) - - cls._database = cls._client[cls.database_name] - - MetadataTable.initialize() - ProcFilesTable.initialize() - ProcSeriesTable.initialize() - TaskQueueTable.initialize() - - cls._log.info('Database connection established!') - cls._log.debug(f'{cls.__name__} class has been initialized') - - @classmethod - def load_database_tables(cls): - ProcSeriesTable.load() - - @classmethod - def save_database_tables(cls): - ProcSeriesTable.save() - - @classmethod - def close_connection(cls): - cls._log.info('Closing database connection...') - cls._client.close() - - @classmethod - def print_debug_settings(cls): - cls._log.debug(f'Database Name: {Database.database_name}') - cls._log.debug(f'Host Address: {Database.host_address}') - cls._log.debug(f'Port: {Database.port}') - cls._log.debug(f'Username: {Database.username}') - cls._log.debug(f'Password: {Database.password}') - cls._log.debug(f'Authentication Source: {Database.auth_source}') - cls._log.debug(f'Server Selection Timeout (ms): {Database.server_selection_timeout_ms}') - - @classmethod - def insert(cls, data, logging_info=None): - try: - cls._log.info('Attempting to insert record into the database...', extra=logging_info) - - if type(data) is dict: - cls._database.insert_one(data) - else: - cls._database.insert_one(data.__dict__) - except (DuplicateKeyError, InvalidDocument) as e: - cls._log.exception(e, extra=logging_info) - return - except Exception as e: - cls._log.exception(e, extra=logging_info) - cls._log.warning('Manga Tagger is unfamiliar with this error. Please log an issue for investigation.', - extra=logging_info) - return - - cls._log.info('Insertion was successful!', extra=logging_info) - - @classmethod - def update(cls, search_filter, data, logging_info): - try: - cls._log.info('Attempting to update record in the database...', extra=logging_info) - cls._database.update_one(search_filter, data) - except Exception as e: - cls._log.exception(e, extra=logging_info) - cls._log.warning('Manga Tagger is unfamiliar with this error. Please log an issue for investigation.', - extra=logging_info) - return - - cls._log.info('Update was successful!', extra=logging_info) - - @classmethod - def delete_all(cls, logging_info): - try: - cls._log.info('Attempting to delete all records in the database...', extra=logging_info) - cls._database.delete_many({}) - except Exception as e: - cls._log.exception(e, extra=logging_info) - cls._log.warning('Manga Tagger is unfamiliar with this error. Please log an issue for investigation.', - extra=logging_info) - return - - cls._log.info('Deletion was successful!', extra=logging_info) - - -class MetadataTable(Database): - @classmethod - def initialize(cls): - cls._log = logging.getLogger(f'{cls.__module__}.{cls.__name__}') - cls._database = super()._database['manga_metadata'] - cls._log.debug(f'{cls.__name__} class has been initialized') - - @classmethod - def search_by_search_value(cls, manga_title): - cls._log.debug(f'Searching manga_metadata cls by key "search_value" using value "{manga_title}"') - return cls._database.find_one({ - 'search_value': manga_title - }) - - @classmethod - def search_by_series_title_eng(cls, manga_title): - cls._log.debug( - f'Searching manga_metadata cls by key "series_title_eng" using value "{manga_title}"') - return cls._database.find_one({ - 'series_title_eng': manga_title - }) - - @classmethod - def search_by_series_title(cls, manga_title): - cls._log.debug(f'Searching manga_metadata cls by key "series_title" using value "{manga_title}"') - return cls._database.find_one({ - 'series_title': manga_title - }) - - -class ProcFilesTable(Database): - @classmethod - def initialize(cls): - cls._log = logging.getLogger(f'{cls.__module__}.{cls.__name__}') - cls._database = super()._database['processed_files'] - cls._log.debug(f'{cls.__name__} class has been initialized') - - @classmethod - def search(cls, manga_title, chapter_number): - cls._log.debug(f'Searching processed_files cls by keys "series_title" and "chapter_number" ' - f'using values "{manga_title}" and {chapter_number}') - return cls._database.find_one({ - 'series_title': manga_title, - 'chapter_number': chapter_number - }) - - @classmethod - def insert_record_and_rename(cls, old_file_path: Path, new_file_path: Path, manga_title, chapter, logging_info): - old_file_path.rename(new_file_path) - cls._log.info(f'"{new_file_path.name.strip(".cbz")}" has been renamed.', extra=logging_info) - - record = { - "series_title": manga_title, - "chapter_number": chapter, - "old_filename": old_file_path.name, - "new_filename": new_file_path.name, - "process_date": datetime.now().date().strftime('%Y-%m-%d @ %I:%M:%S %p') - } - - cls._log.debug(f'Record: {record}') - - logging_info['inserted_processed_record'] = record - cls._database.insert(record, logging_info) - - @classmethod - def update_record_and_rename(cls, results, old_file_path: Path, new_file_path: Path, logging_info): - old_file_path.rename(new_file_path) - cls._log.info(f'"{new_file_path.name.strip(".cbz")}" has been renamed.', extra=logging_info) - - record = { - "$set": { - "old_filename": old_file_path.name, - "update_date": datetime.now().date().strftime('%Y-%m-%d @ %I:%M:%S %p') - } - } - cls._log.debug(f'Record: {record}') - - logging_info['updated_processed_record'] = record - cls._database.update(results, record, logging_info) - - -class ProcSeriesTable(Database): - processed_series = set() - - @classmethod - def initialize(cls): - cls._log = logging.getLogger(f'{cls.__module__}.{cls.__name__}') - cls._database = super()._database['processed_series'] - cls._id = None - cls._last_save_time = None - cls._log.debug(f'{cls.__name__} class has been initialized') - - @classmethod - def save(cls): - cls._log.info('Saving processed series...') - cls._database.delete_one({ - '_id': cls._id - }) - super(ProcSeriesTable, cls).insert(dict.fromkeys(cls.processed_series, True)) - - @classmethod - def load(cls): - cls._log.info('Loading processed series...') - results = cls._database.find_one() - if results is not None: - cls._id = results.pop('_id') - cls.processed_series = set(results.keys()) - - @classmethod - def save_while_running(cls): - if cls._last_save_time is not None: - last_save_delta = (datetime.now() - cls._last_save_time).total_seconds() - - # Save every hour - if last_save_delta > 3600: - cls._last_save_time = datetime.now() - cls.save() - - -class TaskQueueTable(Database): - @classmethod - def initialize(cls): - cls._log = logging.getLogger(f'{cls.__module__}.{cls.__name__}') - cls._database = super()._database['task_queue'] - cls.queue = Queue() - cls._log.debug(f'{cls.__name__} class has been initialized') - - @classmethod - def load(cls, task_list: dict): - cls._log.info('Loading task queue...') - results = cls._database.find() - - if results is not None: - for result in results: - task_list[result['manga_chapter']] = result - - @classmethod - def save(cls, queue): - if not queue.empty(): - cls._log.info('Saving task queue...') - while not queue.empty(): - event = queue.get() - super(TaskQueueTable, cls).insert(event.dictionary()) - - @classmethod - def delete_all(cls): - super(TaskQueueTable, cls).delete_all(None) +import json +import logging +import shutil +import sqlite3 +import sys +import threading +from datetime import datetime +from pathlib import Path +from queue import Queue +from sqlite3 import Error + +# Define the lock globally +lock = threading.Lock() + + +class Database: + database_name = None + + sql_create_manga_table = """CREATE TABLE IF NOT EXISTS manga ( + manga_id integer PRIMARY KEY, + mal_id integer, + series_title text, + series_title_eng text, + series_title_jap text, + status text, + type text, + description text, + mal_url text, + anilist_url text, + genres text, + staff text, + serializations text, + scrape_date text, + publish_date text + );""" + + sql_create_files_table = """CREATE TABLE IF NOT EXISTS files ( + file_id integer PRIMARY KEY, + chapter_number text, + new_filename text, + old_filename text, + series_title text, + processed_date text, + tagged_date text, + manga_id integer, + FOREIGN KEY (manga_id) REFERENCES manga (manga_id) + );""" + + sql_create_task_queue_table = """CREATE TABLE IF NOT EXISTS task_queue ( + task_id integer PRIMARY KEY, + event_type text, + manga_chapter text, + src_path text + );""" + + _client = None + _database = None + _table = None + _log = None + + def dict_factory(cursor, row): + d = {} + for idx, col in enumerate(cursor.description): + d[col[0]] = row[idx] + return d + + @classmethod + def get_sqlite3_thread_safety(cls): + + # Map value from SQLite's THREADSAFE to Python's DBAPI 2.0 + # threadsafety attribute. + sqlite_threadsafe2python_dbapi = {0: 0, 2: 1, 1: 3} + conn = sqlite3.connect(cls.database_name) + threadsafety = conn.execute( + """ + select * from pragma_compile_options + where compile_options like 'THREADSAFE=%' + """ + ).fetchone()[0] + conn.close() + + threadsafety_value = int(threadsafety.split("=")[1]) + + return sqlite_threadsafe2python_dbapi[threadsafety_value] + + @classmethod + def initialize(cls): + cls._log = logging.getLogger(f'{cls.__module__}.{cls.__name__}') + + try: + cls._log.info('Establishing database connection...') + + if cls.get_sqlite3_thread_safety() == 3: + check_same_thread = False + else: + check_same_thread = True + + cls._client = sqlite3.connect(cls.database_name, check_same_thread=check_same_thread) + cls._client.row_factory = cls.dict_factory + cls._database = cls._client.cursor() + try: + lock.acquire(True) + cls._database.execute(cls.sql_create_manga_table) + finally: + lock.release() + + try: + lock.acquire(True) + cls._database.execute(cls.sql_create_files_table) + finally: + lock.release() + + try: + lock.acquire(True) + cls._database.execute(cls.sql_create_task_queue_table) + finally: + lock.release() + + except Error as e: + cls._log.exception(e) + cls._log.critical('Manga Tagger cannot run without a database connection. Please check the' + 'configuration in settings.json and try again.') + sys.exit(1) + # finally: + # if cls._client: + # cls._client.close() + + # cls._database = cls._client[cls.database_name] + + MangaTable.initialize() + FilesTable.initialize() + TaskQueueTable.initialize() + + cls._log.info('Database connection established!') + cls._log.debug(f'{cls.__name__} class has been initialized') + + @classmethod + def print_debug_settings(cls): + cls._log.debug(f'Database Name: {Database.database_name}') + + @classmethod + def delete_all(cls, table, logging_info): + try: + cls._log.info(f'Attempting to delete all records in table {table}...') + try: + lock.acquire(True) + cls._database.execute(f'DELETE FROM {table}') + finally: + lock.release() + except Exception as e: + cls._log.exception(e) + cls._log.warning('Manga Tagger is unfamiliar with this error. Please log an issue for investigation.') + return + + cls._log.info('Deletion was successful!') + + @classmethod + def close_connection(cls): + cls._client.close() + + +class MangaTable(Database): + @classmethod + def initialize(cls): + cls._log = logging.getLogger(f'{cls.__module__}.{cls.__name__}') + cls._table = 'manga' + cls._log.debug(f'{cls.__name__} class has been initialized') + + @classmethod + def search(cls, manga_title): + cls._log.debug(f'Searching manage for "{manga_title}"') + try: + lock.acquire(True) + results = cls._database.execute( + 'SELECT * FROM manga WHERE series_title_eng = ? OR series_title = ?', + (manga_title, manga_title,)) + result = results.fetchone() + finally: + lock.release() + return result + + @classmethod + def insert(cls, data, logging_info=None): + params = ( + data.mal_id, + data.series_title, + data.series_title_eng, + data.series_title_jap, + data.status, + data.type, + data.description, + data.mal_url, + data.anilist_url, + json.dumps(data.genres), + json.dumps(data.staff), + json.dumps(data.serializations), + data.publish_date, + data.scrape_date + ) + + cls._log.info('Inserting record into the database...') + try: + lock.acquire(True) + cls._database.execute( + 'INSERT INTO manga (mal_id, series_title, series_title_eng, series_title_jap, status, type, ' + "description, mal_url, anilist_url, genres, staff, serializations, publish_date, scrape_date) VALUES " + "(?, ?, ?, strftime('%Y-%m-%d', ?), ?, strftime('%Y-%m-%d %H:%M:%S[+-]HH:MM', ?), ?, ?, ?, ?, ?, ?, ?," + " ?)", params) + cls._client.commit() + + manga_id = cls._database.lastrowid + finally: + lock.release() + cls._log.info(f'Insertion was successful! Manga ID: {manga_id}') + + return manga_id + + +# class ProcSeriesTable(Database): +# processed_series = set() +# +# @classmethod +# def initialize(cls): +# cls._log = logging.getLogger(f'{cls.__module__}.{cls.__name__}') +# cls._table = 'processed_series' +# cls._id = None +# cls._last_save_time = None +# cls._log.debug(f'{cls.__name__} class has been initialized') + + +class FilesTable(Database): + @classmethod + def initialize(cls): + cls._log = logging.getLogger(f'{cls.__module__}.{cls.__name__}') + cls._table = 'files' + cls._log.debug(f'{cls.__name__} class has been initialized') + + @classmethod + def search(cls, manga_title, chapter_number): + cls._log.debug(f'Searching files cls by keys "series_title" and "chapter_number" ' + f'using values "{manga_title}" and {chapter_number}') + try: + lock.acquire(True) + results = cls._database.execute( + 'SELECT * FROM files WHERE series_title = ? AND chapter_number = ?', + (manga_title, + chapter_number,)) + result = results.fetchone() + finally: + lock.release() + return result + + @classmethod + def get_by_id(cls, file_id): + cls._log.debug(f'Getting details for file id: {file_id}') + try: + lock.acquire(True) + results = cls._database.execute( + 'SELECT * FROM files WHERE file_id = ?', + (file_id,)) + result = results.fetchone() + finally: + lock.release() + return result + + @classmethod + def untagged(cls): + cls._log.debug('Getting untagged files.') + try: + lock.acquire(True) + results = cls._database.execute('SELECT file_id FROM files WHERE tagged_date is null') + finally: + lock.release() + return results + + @classmethod + def insert_record_and_rename(cls, old_file_path: Path, new_file_path: Path, manga_title, chapter, logging_info): + + params = ( + manga_title, + chapter, + old_file_path.name, + new_file_path.name + ) + + cls._log.debug(f'Params: {params}') + + logging_info['record_params'] = params + + try: + #old_file_path.rename(new_file_path) + shutil.move(old_file_path.as_posix(), new_file_path.as_posix()) + except FileNotFoundError as e: + cls._log.exception(f'{old_file_path.as_posix()} not found.') + + try: + lock.acquire(True) + + if new_file_path.is_file(): + cls._log.info(f'"{new_file_path.name.strip(".cbz")}" has been renamed.') + + cls._database.execute( + 'INSERT INTO files (series_title, chapter_number, old_filename,new_filename, processed_date) ' + 'VALUES (?, ?, ?, ?, datetime()) ', params) + else: + cls._log.info(f'"{new_file_path.name.strip(".cbz")}" rename failed.') + cls._database.execute( + 'INSERT INTO files (series_title, chapter_number, old_filename, new_filename) VALUES (?, ?, ?, ?)', + params) + + cls._client.commit() + + file_id = cls._database.lastrowid + + finally: + lock.release() + + cls._log.debug(f'File record added. File ID: {file_id} Params: {params}') + return file_id + + @classmethod + def update_record_and_rename(cls, results, old_file_path: Path, new_file_path: Path, logging_info): + + logging_info['updated_processed_record'] = results + + try: + # old_file_path.rename(new_file_path) + shutil.move(old_file_path.resolve().name, new_file_path.resolve().name) + except FileNotFoundError as e: + cls._log.exception(f'{old_file_path.name} not found.') + + if new_file_path.is_file(): + cls._log.info(f'"{new_file_path.name.strip(".cbz")}" has been renamed.') + try: + lock.acquire(True) + cls._database.execute( + 'UPDATE files SET processed_date = datetime() WHERE file_id = ?', (results['file_id'],)) + cls._client.commit() + finally: + lock.release() + else: + cls._log.info(f'"{new_file_path.name.strip(".cbz")}" rename failed.') + + cls._log.debug(f'File record updated: {results["file_id"]}') + + @classmethod + def add_manga_id(cls, file_id, manga_id): + cls._log.info(f'Adding Manga ID: {manga_id} to File ID: {file_id}') + + params = ( + manga_id, + file_id + ) + + try: + lock.acquire(True) + cls._database.execute('UPDATE files SET manga_id = ? WHERE file_id = ?', params) + cls._client.commit() + finally: + lock.release() + cls._log.debug(f'File record updated: {file_id}') + + @classmethod + def add_tagged_date(cls, file_id): + cls._log.info(f'Adding tagged date to File ID: {file_id}') + try: + lock.acquire(True) + cls._database.execute('UPDATE files SET tagged_date = datetime() WHERE file_id = ?', (file_id,)) + cls._client.commit() + finally: + lock.release() + cls._log.debug(f'File record updated: {file_id}') + + +class TaskQueueTable(Database): + @classmethod + def initialize(cls): + cls._log = logging.getLogger(f'{cls.__module__}.{cls.__name__}') + cls._table = 'task_queue' + cls.queue = Queue() + cls._log.debug(f'{cls.__name__} class has been initialized') + + @classmethod + def load(cls, task_list: dict): + cls._log.info('Loading task queue...') + try: + lock.acquire(True) + results = cls._database.execute('SELECT * FROM task_queue') + + if results is not None: + for result in results.fetchall(): + cls._log.info(f'Adding task: {result}') + result['src_path'] = result['src_path'].replace('\\', '/') + task_list[result['manga_chapter']] = result + finally: + lock.release() + + @classmethod + def save(cls, queue): + if not queue.empty(): + cls._log.info('Saving task queue...') + while not queue.empty(): + event = queue.get() + cls._log.debug(f'Event: {event}') + params = ( + event['event_type'], + event['manga_chapter'], + event['src_path'], + ) + try: + lock.acquire(True) + cls._database.execute( + 'INSERT INTO task_queue (event_type, manga_chapter, src_path) VALUES (?, ?, ?)', + params) + cls._client.commit() + finally: + lock.release() + + @classmethod + def delete_all(cls): + super(TaskQueueTable, cls).delete_all(cls._table, None) diff --git a/MangaTaggerLib/models.py b/MangaTaggerLib/models.py index 0cdc22c..9c02c5a 100644 --- a/MangaTaggerLib/models.py +++ b/MangaTaggerLib/models.py @@ -1,3 +1,4 @@ +import json import logging from datetime import datetime from pytz import timezone @@ -17,21 +18,22 @@ def __init__(self, manga_title, logging_info, jikan_details=None, anilist_detail Metadata._log = logging.getLogger(self.fully_qualified_class_name()) self.search_value = manga_title - Metadata._log.info(f'Creating Metadata model for series "{manga_title}"...', extra=logging_info) + Metadata._log.info(f'Creating Metadata model for series "{manga_title}"...') if jikan_details and anilist_details: # If details are grabbed from Jikan and Anilist APIs self._construct_api_metadata(jikan_details, anilist_details, logging_info) elif details: # If details were stored in the database self._construct_database_metadata(details) else: - Metadata._log.exception(MetadataNotCompleteError, extra=logging_info) + Metadata._log.exception(MetadataNotCompleteError) Metadata._log.debug(f'{self.search_value} Metadata Model: {self.__dict__.__str__()}') logging_info['metadata'] = self.__dict__ - Metadata._log.info('Successfully created Metadata model.', extra=logging_info) + Metadata._log.info('Successfully created Metadata model.') def _construct_api_metadata(self, jikan_details, anilist_details, logging_info): self._id = jikan_details['mal_id'] + self.mal_id = jikan_details['mal_id'] self.series_title = jikan_details['title'] if jikan_details['title_english'] == 'None' or jikan_details['title_english'] is None: @@ -49,21 +51,19 @@ def _construct_api_metadata(self, jikan_details, anilist_details, logging_info): self.description = jikan_details['synopsis'] self.mal_url = jikan_details['url'] self.anilist_url = anilist_details['siteUrl'] - self.publish_date = None self.genres = [] self.staff = {} self.serializations = {} - - self._construct_publish_date(jikan_details['published']['from']) self._parse_genres(jikan_details['genres'], logging_info) self._parse_staff(anilist_details['staff']['edges'], jikan_details['authors'], logging_info) self._parse_serializations(jikan_details['serializations'], logging_info) - + self._construct_publish_date(jikan_details['published']['from']) # self.scrape_date = datetime.now().date().strftime('%Y-%m-%d %I:%M %p') self.scrape_date = timezone(AppSettings.timezone).localize(datetime.now()).strftime('%Y-%m-%d %I:%M %p %Z') def _construct_database_metadata(self, details): - self._id = details['_id'] + self._id = details['mal_id'] + self.mal_id = details['mal_id'] self.series_title = details['series_title'] self.series_title_eng = details['series_title_eng'] self.series_title_jap = details['series_title_jap'] @@ -72,9 +72,8 @@ def _construct_database_metadata(self, details): self.description = details['description'] self.mal_url = details['mal_url'] self.anilist_url = details['anilist_url'] - self.publish_date = details['publish_date'] - self.genres = details['genres'] - self.staff = details['staff'] + self.genres = json.loads(details['genres']) + self.staff = json.loads(details['staff']) self.serializations = details['serializations'] self.publish_date = details['publish_date'] self.scrape_date = details['scrape_date'] @@ -85,13 +84,13 @@ def _construct_publish_date(self, date): Metadata._log.debug(f'Publish date constructed: {self.publish_date}') def _parse_genres(self, genres, logging_info): - Metadata._log.info('Parsing genres...', extra=logging_info) + Metadata._log.info('Parsing genres...') for genre in genres: Metadata._log.debug(f'Genre found: {genre}') self.genres.append(genre['name']) def _parse_staff(self, anilist_staff, jikan_staff, logging_info): - Metadata._log.info('Parsing staff roles...', extra=logging_info) + Metadata._log.info('Parsing staff roles...') roles = [] @@ -138,7 +137,7 @@ def _parse_staff(self, anilist_staff, jikan_staff, logging_info): break else: Metadata._log.warning(f'Expected role not found for staff member "{a_name}"; instead' - f' found "{role}"', extra=logging_info) + f' found "{role}"') break # Validate expected roles for staff members @@ -146,7 +145,7 @@ def _parse_staff(self, anilist_staff, jikan_staff, logging_info): if set(roles) != set(role_set): Metadata._log.warning(f'Not all expected roles are present for series "{self.search_value}"; ' - f'double check ID "{self._id}"', extra=logging_info) + f'double check ID "{self._id}"') def _add_staff_member(self, role, a_staff, j_staff): self.staff[role][a_staff['node']['name']['full']] = { @@ -158,7 +157,7 @@ def _add_staff_member(self, role, a_staff, j_staff): } def _parse_serializations(self, serializations, logging_info): - Metadata._log.info('Parsing serializations...', extra=logging_info) + Metadata._log.info('Parsing serializations...') for serialization in serializations: Metadata._log.debug(serialization) self.serializations[serialization['name'].strip('.')] = { diff --git a/MangaTaggerLib/task_queue.py b/MangaTaggerLib/task_queue.py index 3b4a406..b3ffbd3 100644 --- a/MangaTaggerLib/task_queue.py +++ b/MangaTaggerLib/task_queue.py @@ -19,6 +19,7 @@ class QueueEventOrigin(Enum): WATCHDOG = 1 FROM_DB = 2 SCAN = 3 + METADATA = 4 class QueueEvent: @@ -40,12 +41,17 @@ def __init__(self, event, origin=QueueEventOrigin.WATCHDOG): elif origin == QueueEventOrigin.SCAN: self.event_type = 'existing' self.src_path = event + elif origin == QueueEventOrigin.METADATA: + self.event_type = 'metadata' + self.file_id = event def __str__(self): if self.event_type in ('created', 'existing'): return f'File {self.event_type} event at {self.src_path.absolute()}' elif self.event_type == 'modified': return f'File {self.event_type} event at {self.dest_path.absolute()}' + elif self.event_type == 'metadata': + return f'File {self.event_type} event for file id: {self.file_id}' def dictionary(self): ret_dict = { @@ -84,10 +90,11 @@ def initialize(cls): cls._running = True for i in range(cls.threads): - if not cls._debug_mode: - worker = Thread(target=cls.process, name=f'MTT-{i}', daemon=True) - else: - worker = Thread(target=cls.dummy_process, name=f'MTT-{i}', daemon=True) + worker = Thread(target=cls.process, name=f'MTT-{i}', daemon=True) + #if not cls._debug_mode: + # worker = Thread(target=cls.process, name=f'MTT-{i}', daemon=True) + #else: + # worker = Thread(target=cls.dummy_process, name=f'Dummy MTT-{i}', daemon=True) cls._log.debug(f'Worker thread {worker.name} has been initialized') cls._worker_list.append(worker) @@ -121,6 +128,12 @@ def add_to_task_queue(cls, manga_chapter): cls._log.info(f'{event} has been added to the task queue') cls._queue.put(event) + @classmethod + def add_to_metadata_task_queue(cls, file_id): + event = QueueEvent(file_id, QueueEventOrigin.METADATA) + cls._log.info(f'{event} has been added to the task queue') + cls._queue.put(event) + @classmethod def exit(cls): # Stop worker threads from picking new items from the queue in process() @@ -151,6 +164,7 @@ def run(cls): cls._log.info(f'Watching "{cls.download_dir}" for new downloads') while cls._running: + QueueWorker.process() time.sleep(1) @classmethod @@ -160,36 +174,41 @@ def dummy_process(cls): @classmethod def process(cls): while cls._running: - if not cls._queue.empty(): + if cls._queue.qsize() != 0: event = cls._queue.get() - if event.event_type in ('created', 'existing'): - cls._log.info(f'Pulling "file {event.event_type}" event from the queue for "{event.src_path}"') - path = Path(event.src_path) - elif event.event_type == 'moved': - cls._log.info(f'Pulling "file {event.event_type}" event from the queue for "{event.dest_path}"') - path = Path(event.dest_path) + if event.event_type == 'metadata': + cls._log.info(f'Pulling "file {event.event_type}" event from the queue for file id: "{event.file_id}"') + MangaTaggerLib.tag_manga_chapter(event.file_id) else: - cls._log.error('Event was passed, but Manga Tagger does not know how to handle it. Please open an ' - 'issue for further investigation.') - cls._queue.task_done() - return - - current_size = -1 - try: - destination_size = path.stat().st_size - while current_size != destination_size: - current_size = destination_size - time.sleep(1) - except FileNotFoundError as fnfe: - cls._log.exception(fnfe) - - try: - MangaTaggerLib.process_manga_chapter(path, uuid.uuid1()) - except Exception as e: - cls._log.exception(e) - cls._log.warning('Manga Tagger is unfamiliar with this error. Please log an issue for ' - 'investigation.') + if event.event_type in ('created', 'existing'): + cls._log.info(f'Pulling "file {event.event_type}" event from the queue for "{event.src_path}"') + path = Path(event.src_path) + elif event.event_type == 'moved': + cls._log.info(f'Pulling "file {event.event_type}" event from the queue for "{event.dest_path}"') + path = Path(event.dest_path) + else: + cls._log.error('Event was passed, but Manga Tagger does not know how to handle it. Please ' + 'open an' + 'issue for further investigation.') + cls._queue.task_done() + return + + current_size = -1 + try: + destination_size = path.stat().st_size + while current_size != destination_size: + current_size = destination_size + time.sleep(1) + except FileNotFoundError as fnfe: + cls._log.exception(fnfe) + + try: + MangaTaggerLib.process_manga_chapter(path, uuid.uuid1()) + except Exception as e: + cls._log.exception(e) + cls._log.warning('Manga Tagger is unfamiliar with this error. Please log an issue for ' + 'investigation.') cls._queue.task_done() diff --git a/MangaTaggerLib/utils.py b/MangaTaggerLib/utils.py index 0f0c7be..4d482dd 100644 --- a/MangaTaggerLib/utils.py +++ b/MangaTaggerLib/utils.py @@ -1,17 +1,13 @@ import atexit import json import logging -import subprocess import sys +import os from logging.handlers import RotatingFileHandler, SocketHandler from pathlib import Path -from tkinter import filedialog, messagebox, Tk - import numpy -import psutil from pythonjsonlogger import jsonlogger - -from MangaTaggerLib.database import Database +from MangaTaggerLib.database import Database, FilesTable from MangaTaggerLib.task_queue import QueueWorker from MangaTaggerLib.api import AniList @@ -30,7 +26,7 @@ class AppSettings: @classmethod def load(cls): - settings_location = Path(Path.cwd(), 'settings.json') + settings_location = Path(Path.cwd(), '/config/settings.json') if Path(settings_location).exists(): with open(settings_location, 'r') as settings_json: settings = json.load(settings_json) @@ -45,35 +41,32 @@ def load(cls): # Database Configuration cls._log.debug('Now setting database configuration...') - Database.database_name = settings['database']['database_name'] - Database.host_address = settings['database']['host_address'] - Database.port = settings['database']['port'] - Database.username = settings['database']['username'] - Database.password = settings['database']['password'] - Database.auth_source = settings['database']['auth_source'] - Database.server_selection_timeout_ms = settings['database']['server_selection_timeout_ms'] + Database.database_name = f"/config/{settings['application']['database_name']}" cls._log.debug('Database settings configured!') Database.initialize() Database.print_debug_settings() - # Free Manga Downloader Configuration - cls._initialize_fmd_settings(settings['fmd']['fmd_dir'], settings['fmd']['download_dir']) + download_dir = Path(settings['application']['download_dir']) + + if not download_dir.is_absolute(): + cls._log.warning(f'"{download_dir}" is not a valid path. The download directory must be an ' + f'absolute path, such as "C:\\Downloads". Please select a new download path.') + + QueueWorker.download_dir = download_dir + cls._log.info(f'Download directory has been set as "{QueueWorker.download_dir}"') # Set Application Timezone cls.timezone = settings['application']['timezone'] cls._log.debug(f'Timezone: {cls.timezone}') - # Dry Run Mode Configuration - # No logging here due to being handled at the INFO level in MangaTaggerLib - if settings['application']['dry_run']['enabled']: - cls.mode_settings = {'database_insert': settings['application']['dry_run']['database_insert'], - 'rename_file': settings['application']['dry_run']['rename_file'], - 'write_comicinfo': settings['application']['dry_run']['write_comicinfo']} - # Multithreading Configuration if settings['application']['multithreading']['threads'] <= 0: - QueueWorker.threads = 1 + cpus = os.cpu_count() / 2 + if cpus: + QueueWorker.threads = cpus + else: + QueueWorker.threads = 1 else: QueueWorker.threads = settings['application']['multithreading']['threads'] @@ -93,11 +86,11 @@ def load(cls): cls._log.debug(f'Debug Mode: {QueueWorker._debug_mode}') # Manga Library Configuration - if settings['application']['library']['dir'] is not None: - cls.library_dir = settings['application']['library']['dir'] + if settings['application']['library_dir'] is not None: + cls.library_dir = settings['application']['library_dir'].replace('\\', '/') cls._log.debug(f'Library Directory: {cls.library_dir}') - cls.is_network_path = settings['application']['library']['is_network_path'] + # cls.is_network_path = settings['application']['library']['is_network_path'] if not Path(cls.library_dir).exists(): cls._log.info(f'Library directory "{AppSettings.library_dir}" does not exist; creating now.') @@ -108,14 +101,20 @@ def load(cls): sys.exit(1) # Load necessary database tables - Database.load_database_tables() + # Database.load_database_tables() # Initialize QueueWorker and load task queue QueueWorker.initialize() QueueWorker.load_task_queue() + # Scan the database for files that haven't had metadata added. + cls._scan_untagged_files() + # Scan download directory for downloads not already in database upon loading - cls._scan_download_dir() + try: + cls._scan_download_dir() + except AttributeError: + cls._log.info(f'No files in download directory.') # Initialize API AniList.initialize() @@ -124,87 +123,6 @@ def load(cls): atexit.register(cls._exit_handler) cls._log.debug(f'{cls.__name__} class has been initialized') - @classmethod - def _initialize_fmd_settings(cls, fmd_dir, download_dir): - cls._log.info('Now setting Free Manga Downloader configuration settings...') - - fmd_settings_path = Path(fmd_dir, 'userdata', 'settings.json') - - # If FMD is running, stop it - for process in psutil.process_iter(): - if 'fmd.exe' == process.name(): - cls._log.info('Free Manga Downloader is currently running and must be closed for Manga Tagger to ' - 'initialize the FMD settings properly.') - process.terminate() - - # If FMD settings has not been initialized, start and stop FMD to generate the settings.json file, so that we - # can then set the download path - if not fmd_settings_path.exists(): - cls._log.info('The settings.json for Free Manga Downloader (FMD) does not exist, meaning that FMD has ' - 'not been opened before. Opening the application to generate the settings.json...') - - Tk().withdraw() - messagebox.showinfo('Manga Tagger', 'For Manga Tagger to continue, the settings.json for Free Manga ' - 'Downloader (FMD) must first be generated. After clicking "OK", FMD ' - 'will open. Please click "No" to any module update pop-ups and close ' - 'FMD using the "X" in the upper right-hand corner.') - - subprocess.run(str(Path(fmd_dir, 'fmd.exe'))) - - if download_dir is None: - cls._log.info('Download directory has not been set; a file dialog window will be opened to input ' - 'the destination download directory.') - Tk().withdraw() - download_dir = Path(filedialog.askdirectory(title='Select the folder where you want your manga to be ' - 'downloaded to')) - - # Load settings - with open(fmd_settings_path, 'r') as fmd_settings: - settings_json = json.load(fmd_settings) - changes_made = False - - # GenerateMangaFolder MUST BE TRUE in order to properly parse the download directory - if settings_json['saveto']['GenerateMangaFolder'] is False: - settings_json['saveto']['GenerateMangaFolder'] = True - settings_json['saveto']['MangaCustomRename'] = '%MANGA%' - changes_made = True - cls._log.info('Setting "Generate Manga Folder" should be enabled with "Manga Custom Rename" ' - f'configured as "%MANGA%"; this configuration has been applied') - - # ChapterCustomRename MUST FOLLOW this format to be properly parsed - if settings_json['saveto']['ChapterCustomRename'].find('-.-') == -1 \ - or settings_json['saveto']['ChapterCustomRename'] != '%MANGA% -.- %CHAPTER%': - settings_json['saveto']['ChapterCustomRename'] = '%MANGA% -.- %CHAPTER%' - changes_made = True - cls._log.info('Setting "Chapter Custom Rename" should be configured as "%MANGA% -.- ' - f'%CHAPTER%" for parsing by Manga Tagger; this configuration has been applied') - - # Set the download format to CBZ - if settings_json['saveto']['Compress'] != 2: - settings_json['saveto']['Compress'] = 2 - changes_made = True - cls._log.info('Setting "Compress" should be set to 2, which corresponds to the CBZ file format.') - - # Set the download directory - if download_dir is None: - download_dir = Path(settings_json['saveto']['SaveTo']) - - if not download_dir.is_absolute(): - cls._log.warning(f'"{download_dir}" is not a valid path. The download directory must be an ' - f'absolute path, such as "C:\\Downloads". Please select a new download path.') - - Tk().withdraw() - download_dir = Path(filedialog.askdirectory(title='Select the folder where you want your manga to be ' - 'downloaded to')) - - QueueWorker.download_dir = download_dir - cls._log.info(f'Download directory has been set as "{QueueWorker.download_dir}"') - - if changes_made: - with open(Path(fmd_dir, 'userdata', 'settings.json'), 'w') as fmd_settings: - json.dump(settings_json, fmd_settings, indent=4) - cls._log.debug(f'Changes to the "settings.json" for Free Manga Downloader have been saved') - @classmethod def _initialize_logger(cls, settings): logger = logging.getLogger('MangaTaggerLib') @@ -278,9 +196,6 @@ def _exit_handler(cls): # Stop worker threads QueueWorker.exit() - # Save necessary database tables - Database.save_database_tables() - # Close MongoDB connection Database.close_connection() @@ -288,37 +203,18 @@ def _exit_handler(cls): @classmethod def _create_settings(cls): - Tk().withdraw() - fmd_dir = filedialog.askdirectory(title='Select the folder that Free Manga Downloader is installed in') - return { "application": { "debug_mode": False, - "timezone": "America/New_York", - "library": { - "dir": "C:\\Library", - "is_network_path": False - }, - "dry_run": { - "enabled": False, - "rename_file": False, - "database_insert": False, - "write_comicinfo": False - }, + "timezone": "Europe/London", + "database_name": "manga_tagger", + "library_dir": "/library", + "download_dir": "/downloads", "multithreading": { "threads": 8, "max_queue_size": 0 } }, - "database": { - "database_name": "manga_tagger", - "host_address": "localhost", - "port": 27017, - "username": "manga_tagger", - "password": "Manga4LYFE", - "auth_source": "admin", - "server_selection_timeout_ms": 1 - }, "logger": { "logging_level": "info", "log_dir": "logs", @@ -333,7 +229,7 @@ def _create_settings(cls): "log_format": "%(asctime)s | %(threadName)s %(thread)d | %(name)s | %(levelname)s - %(message)s" }, "json": { - "enabled": True, + "enabled": False, "log_format": "%(threadName)s %(thread)d %(asctime)s %(name)s %(levelname)s %(message)s" }, "tcp": { @@ -348,20 +244,28 @@ def _create_settings(cls): "port": 1798, "log_format": "%(threadName)s %(thread)d %(asctime)s %(name)s %(levelname)s %(message)s" } - }, - "fmd": { - "fmd_dir": fmd_dir, - "download_dir": None } } @classmethod def _scan_download_dir(cls): - for directory in QueueWorker.download_dir.iterdir(): + cls._log.debug(f'download_dir: {QueueWorker.download_dir}') + if isinstance(QueueWorker.download_dir, Path): + _path = QueueWorker.download_dir + else: + _path = Path(QueueWorker.download_dir) + for directory in _path.iterdir(): for manga_chapter in directory.glob('*.cbz'): if manga_chapter.name.strip('.cbz') not in QueueWorker.task_list.keys(): QueueWorker.add_to_task_queue(manga_chapter) + @classmethod + def _scan_untagged_files(cls): + results = FilesTable.untagged() + if results is not None: + for result in results: + QueueWorker.add_to_metadata_task_queue(result['file_id']) + def compare(s1, s2): s1 = s1.lower().strip('/[^a-zA-Z ]/g", ') diff --git a/README.md b/README.md index 52a5dab..339dc40 100644 --- a/README.md +++ b/README.md @@ -1,38 +1,64 @@ -![GitHub all releases](https://img.shields.io/github/downloads/Inpacchi/Manga-Tagger/total) -![GitHub issues](https://img.shields.io/github/issues/Inpacchi/Manga-Tagger) +![GitHub tag (latest by date)](https://img.shields.io/github/v/tag/SanchoBlaze/Manga-Tagger?label=latest) ![GitHub issues](https://img.shields.io/github/issues/sanchoblaze/Manga-Tagger) +![Docker Cloud Build Status](https://img.shields.io/docker/cloud/build/sanchoblaze/manga-tagger) + ![Docker Pulls](https://img.shields.io/docker/pulls/sanchoblaze/manga-tagger) ![GitHub all releases](https://img.shields.io/github/downloads/SanchoBlaze/Manga-Tagger/total) -![Manga Tagger Logo](images/manga_tagger_logo_cropped.png) +![Manga Tagger Logo](https://raw.githubusercontent.com/SanchoBlaze/Manga-Tagger/main/images/manga_tagger_logo_cropped.png) -A tool to rename and write metadata to digital manga chapters - -## [New alpha build v1.1.5-alpha out now! A LOT of QOL (quality-of-life) changes were made and even though it's an alpha version, you should definitely be using it.](https://github.com/Inpacchi/Manga-Tagger/releases/tag/v1.1.5-alpha) - -## Background and Inspiration -Where do I even start...well, I **really** enjoy Japanese culture, specifically anime and manga. While there is a lot of support for American comics in a digital format, the same cannot be said about manga. One day, I stumbled across Free Manga Downloader, which allowed me to start my digital manga library. However, the one pitfall of the application is the lack of capability for grabbing metadata. - -Being an American comic fan, I regularly use ComicRack and [Comic Tagger](https://github.com/comictagger/comictagger). While Comic Tagger works with manga, it wasn't **made** for manga and so it's implementation in that regard is lackluster. And thus, this project was born... +A tool to rename and write metadata to digital manga chapters, forked from [Inpacchi/Manga-Tagger](https://github.com/Inpacchi/Manga-Tagger). ## Features -* Direct integration with [Free Manga Downloader 2](https://github.com/dazedcat19/FMD2) -* Scrapes metadata from [Anilist](https://anilist.co/) and [MyAnimeList](https://myanimelist.net/) (using [Jikan](https://jikan.moe/)) -* Fully automated batch processing -* Extremely easy integration with [DataDog](https://www.datadoghq.com/) for log monitoring +* Converted to Docker to so it can be run anywhere. +* Switched to SQLite instead of MongoDB to increase portability. +* Point the container at your download and library folder and let it take care of the rest. +* Scrapes metadata from [Anilist](https://anilist.co/) and [MyAnimeList](https://myanimelist.net/) (using [Jikan](https://jikan.moe/)). +* Fully automated batch processing. * Multithreaded for handling multiple files at a time * Writes metadata in the ComicRack format (using comicinfo.xml) * Full support for strictly **CBZ** files -## [Installation & Configuration](https://github.com/Inpacchi/Manga-Tagger/wiki/Installation-&-Configuration) +## Installation +**docker-compose:** + + services: + manga-tagger: + image: sanchoblaze/manga-tagger + container_name: manga-tagger + volumes: + - /path/to/library:/library + - /path/to/downloads:/downloads + - /path/to/config:/config + +**docker cli:** + + docker run -d \ + --name=manga-tagger \ + -v /path/to/library:/library \ + -v /path/to/downloads:/downloads \ + -v /path/to/config:/config \ + --restart unless-stopped \ + sanchoblaze/manga-tagger:latest + +## File Naming + +Files to be processed, should be named in the format: + +> MANGA -.- CHAPTER + +For example: +> Akira -.- Chapter 001.cbz + +This will be renamed to +> Akira 001.cbz -## [Settings](https://github.com/Inpacchi/Manga-Tagger/wiki/Setting-Configuration) ## Support -Log issues via [GitHub](https://github.com/ivtechboyinpa/Manga-Tagger/issues) +Log issues via [GitHub](https://github.com/sanchoblaze/Manga-Tagger/issues) ## Contributing Pull requests are always welcome. For major changes, please open an issue first to discuss what you would like to change. -If you have any questions, please feel free to reach out on our [GitHub Discussions](https://github.com/Inpacchi/Manga-Tagger/discussions). +If you have any questions, please feel free to reach out on our [GitHub Discussions](https://github.com/sanchoblaze/Manga-Tagger/discussions). ## License [MIT](https://choosealicense.com/licenses/mit/) diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..3bc231e --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,8 @@ +services: + manga-tagger: + image: sanchoblaze/manga-tagger + container_name: manga-tagger + volumes: + - /path/to/library:/library + - /path/to/downloads:/downloads + - /path/to/config:/config \ No newline at end of file diff --git a/install.bat b/install.bat deleted file mode 100644 index 57acb5c..0000000 --- a/install.bat +++ /dev/null @@ -1,9 +0,0 @@ -pip install -r requirements.txt -nssm install manga_tagger "C:\Manga Tagger\run.bat" -nssm set manga_tagger AppDirectory "C:\Manga Tagger" -nssm set manga_tagger AppExit Default Restart -nssm set manga_tagger Description "A tool to rename and tag downloaded manga chapters with scraped metadata" -nssm set manga_tagger DisplayName "Manga Tagger" -nssm set manga_tagger ObjectName LocalSystem -nssm set manga_tagger Start SERVICE_AUTO_START -nssm set manga_tagger Type SERVICE_WIN32_OWN_PROCESS diff --git a/requirements.txt b/requirements.txt index 727a213..19ba975 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ -pymongo==3.11.0 -watchdog==0.10.4 -numpy==1.19.3 -jikanpy==4.2.2 -requests==2.24.0 -python_json_logger==2.0.1 -pytz==2020.1 +pep517==0.13.0 +watchdog==3.0.0 +numpy==1.24.3 +jikanpy-v4==1.0.2 +requests==2.31.0 +python_json_logger==2.0.7 +pytz==2023.3 \ No newline at end of file diff --git a/run.bat b/run.bat deleted file mode 100644 index 44ade3e..0000000 --- a/run.bat +++ /dev/null @@ -1 +0,0 @@ -python MangaTagger.py \ No newline at end of file diff --git a/settings.json b/settings.json index f9ff91f..f5fde6a 100644 --- a/settings.json +++ b/settings.json @@ -1,38 +1,22 @@ { "application": { - "debug_mode": true, - "timezone": "America/New_York", - "library": { - "dir": "C:\\Library", - "is_network_path": false - }, - "dry_run": { - "enabled": false, - "rename_file": false, - "database_insert": false, - "write_comicinfo": false - }, + "debug_mode": false, + "timezone": "Europe/London", + "database_name": "manga_tagger.db", + "library_dir": "/library", + "download_dir": "/downloads", "multithreading": { "threads": 8, "max_queue_size": 0 } }, - "database": { - "database_name": "manga_tagger", - "host_address": "localhost", - "port": 27017, - "username": "manga_tagger", - "password": "Manga4LYFE", - "auth_source": "admin", - "server_selection_timeout_ms": 1 - }, "logger": { - "logging_level": "info", + "logging_level": "debug", "log_dir": "logs", "max_size": 10485760, "backup_count": 5, "console": { - "enabled": false, + "enabled": true, "log_format": "%(asctime)s | %(threadName)s %(thread)d | %(name)s | %(levelname)s - %(message)s" }, "file": { @@ -55,9 +39,5 @@ "port": 1798, "log_format": "%(threadName)s %(thread)d %(asctime)s %(name)s %(levelname)s %(message)s" } - }, - "fmd": { - "fmd_dir": "C:\\Free Manga Downloader", - "download_dir": null } } \ No newline at end of file