From 0cd67c315503cd2b9df631bd6738fb4167b137d8 Mon Sep 17 00:00:00 2001 From: Bartyboy <46422805+Bartyboy@users.noreply.github.com> Date: Sat, 27 Jun 2026 11:53:47 +0200 Subject: [PATCH 1/2] Add date-numbered episode search support for DL (#400) --- quasarr/providers/utils.py | 32 +- quasarr/providers/version.py | 4 +- quasarr/search/__init__.py | 34 +- quasarr/search/sources/dl.py | 431 +++++++++++++++--- .../search/sources/helpers/search_source.py | 7 + tests/test_dl_jahresthema.py | 134 ++++++ tests/test_utils_release_matching.py | 31 +- uv.lock | 42 +- 8 files changed, 637 insertions(+), 78 deletions(-) diff --git a/quasarr/providers/utils.py b/quasarr/providers/utils.py index 4e0b7f0e..dd36f428 100644 --- a/quasarr/providers/utils.py +++ b/quasarr/providers/utils.py @@ -1053,6 +1053,9 @@ def is_valid_release( search_string: str, season: int = None, episode: int = None, + episode_year: int = None, + episode_month: int = None, + episode_day: int = None, ) -> bool: """ Return True if the given release title is valid for the given search parameters. @@ -1073,7 +1076,8 @@ def is_valid_release( if not is_docs_search and not is_imdb_id(search_string): if not search_string_in_sanitized_title(search_string, title): trace( - "Skipping {title!r} as it doesn't match sanitized search string: {search_string!r}", + "Skipping {title!r} as it doesn't match sanitized " + "search string: {search_string!r}", title=title, search_string=search_string, ) @@ -1090,8 +1094,31 @@ def is_valid_release( return False return True + date_pattern = None + if ( + episode_year is not None + and episode_month is not None + and episode_day is not None + ): + date_pattern = re.compile( + rf"(?S{season}" if episode: stype += f"{'' if season else ' '}E{episode}" + if episode_year: + stype += ( + f" {episode_year}-{episode_month}-{episode_day}" + ) if base_search_category in [SEARCH_CAT_MOVIES, SEARCH_CAT_SHOWS]: args = (shared_state, start_time, behavior_search_category) - kwargs = {"search_string": imdb_id, "season": season, "episode": episode} for source in sources.values(): source_logger = get_source_logger(source.initials) @@ -137,6 +150,25 @@ def get_search_results( source_logger.trace("Search with absolute EP number unsupported") continue + kwargs = { + "search_string": imdb_id, + "season": season, + "episode": episode, + } + + if episode_year: + if not source.supports_date_numbering: + source_logger.trace("Search with date unsupported") + continue + + kwargs.update( + { + "episode_year": episode_year, + "episode_month": episode_month, + "episode_day": episode_day, + } + ) + search_executor.add( source, args, diff --git a/quasarr/search/sources/dl.py b/quasarr/search/sources/dl.py index 84dcc750..44573116 100644 --- a/quasarr/search/sources/dl.py +++ b/quasarr/search/sources/dl.py @@ -4,7 +4,7 @@ import re import time -from datetime import datetime +from datetime import datetime, timedelta from html import unescape from urllib.parse import urlsplit, urlunsplit @@ -55,6 +55,7 @@ class Source(AbstractSearchSource): SEARCH_CAT_BOOKS, ] requires_login = True + supports_date_numbering = True def feed( self, shared_state: shared_state, start_time: float, search_category: str @@ -190,6 +191,9 @@ def _search_single_page( search_category, season, episode, + episode_year, + episode_month, + episode_day, ): """ Search a single page. This method is called sequentially for each page. @@ -258,11 +262,20 @@ def _search_single_page( title = re.sub(r"\s+", " ", title) title = unescape(title) title_normalized = _normalize_title_for_arr(title) + is_date_thread_candidate = ( + episode_year + and _should_check_thread_for_date_release( + title_normalized, + search_string, + episode_year, + ) + ) # Filter: Skip if no resolution or codec info (unless Magazarr/Lidarr) if base_search_category not in [SEARCH_CAT_BOOKS, SEARCH_CAT_MUSIC]: if not ( - RESOLUTION_REGEX.search(title_normalized) + is_date_thread_candidate + or RESOLUTION_REGEX.search(title_normalized) or CODEC_REGEX.search(title_normalized) ): continue @@ -278,27 +291,49 @@ def _search_single_page( if thread_url.startswith("/"): thread_url = f"https://www.{host}{thread_url}" - if not is_valid_release( + date_release = {} + is_release_valid = is_valid_release( title_normalized, search_category, search_string, season, episode, - ): - continue + episode_year, + episode_month, + episode_day, + ) + if not is_release_valid: + if is_date_thread_candidate: + date_release = _date_release_from_thread( + shared_state, + thread_url, + search_string, + episode_year, + episode_month, + episode_day, + ) + if not date_release: + continue + title_normalized = date_release["title"] + elif episode_year: + title_normalized = _date_release_title_for_arr( + title_normalized, + search_string, + ) # Extract date and convert to RFC 2822 format date_elem = item.select_one("time.u-dt") iso_date = date_elem.get("datetime", "") if date_elem else "" published = _convert_to_rss_date(iso_date) - mb = 0 + mb = date_release.get("mb", 0) password = "" + source_url = date_release.get("source", thread_url) link = generate_download_link( shared_state, title_normalized, - thread_url, + source_url, mb, password, imdb_id or "", @@ -314,7 +349,7 @@ def _search_single_page( "link": link, "size": mb * 1024 * 1024, "date": published, - "source": thread_url, + "source": source_url, }, "type": "protected", } @@ -355,6 +390,9 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_year: int = None, + episode_month: int = None, + episode_day: int = None, ) -> list[SearchRelease]: """ Search with sequential pagination to find best quality releases. @@ -370,15 +408,29 @@ def search( info(f"no title for IMDb {imdb_id}") return releases search_string = title + if episode_year: + search_string = _date_search_alias(search_string) if not season: if year := get_year(imdb_id): search_string += f" {year}" search_string = unescape(search_string) - max_search_duration = 7 + search_strings = ( + _date_search_strings( + search_string, + episode_year, + episode_month, + episode_day, + ) + if episode_year + else [search_string] + ) + max_search_duration = 15 if episode_year else 7 trace( - f"Starting sequential paginated search for '{search_string}' (Season: {season}, Episode: {episode}) - max {max_search_duration}s" + f"Starting sequential paginated search for '{search_string}' " + f"(Season: {season}, Episode: {episode}) - " + f"max {max_search_duration}s" ) try: @@ -387,52 +439,66 @@ def search( warn(f"Could not retrieve valid session for {host}") return releases - search_id = None - page_num = 0 search_start_time = time.time() - release_titles_per_page = set() + seen_release_titles = set() - # Sequential search through pages until timeout or no results - while (time.time() - search_start_time) < max_search_duration: - page_num += 1 + for current_search_string in search_strings: + search_id = None + page_num = 0 + release_titles_per_page = set() - page_releases, extracted_search_id = self._search_single_page( - shared_state, - host, - search_string, - search_id, - page_num, - imdb_id, - search_category, - season, - episode, - ) + # Sequential search through pages until timeout or no results + while (time.time() - search_start_time) < max_search_duration: + page_num += 1 - page_release_titles = tuple( - pr["details"]["title"] for pr in page_releases - ) - if page_release_titles in release_titles_per_page: - trace(f"[Page {page_num}] duplicate page detected, stopping") - break - release_titles_per_page.add(page_release_titles) - - # Update search_id from first page - if page_num == 1: - search_id = extracted_search_id - if not search_id: - trace("Could not extract search ID, stopping pagination") + page_releases, extracted_search_id = self._search_single_page( + shared_state, + host, + current_search_string, + search_id, + page_num, + imdb_id, + search_category, + season, + episode, + episode_year, + episode_month, + episode_day, + ) + + page_release_titles = tuple( + pr["details"]["title"] for pr in page_releases + ) + if page_release_titles in release_titles_per_page: + trace(f"[Page {page_num}] duplicate page detected, stopping") break + release_titles_per_page.add(page_release_titles) + + # Update search_id from first page + if page_num == 1: + search_id = extracted_search_id + if not search_id: + trace("Could not extract search ID, stopping pagination") + break + + for release in page_releases: + release_title = release["details"]["title"] + dedupe_key = release_title.strip().casefold() + if dedupe_key in seen_release_titles: + continue + seen_release_titles.add(dedupe_key) + releases.append(release) - # Add releases from this page - releases.extend(page_releases) - trace( - f"[Page {page_num}] completed with {len(page_releases)} valid releases" - ) + trace( + f"[Page {page_num}] completed with {len(page_releases)} valid releases" + ) - # Stop if this page returned 0 results - if len(page_releases) == 0: - trace(f"[Page {page_num}] returned 0 results, stopping pagination") - break + # Stop if this page returned 0 results + if len(page_releases) == 0: + trace( + f"[Page {page_num}] returned 0 results, stopping pagination" + ) + break except Exception as e: info(f"search error: {e}") @@ -482,6 +548,79 @@ def _normalize_title_for_arr(title): return title +def _date_search_alias(search_string): + normalized = replace_umlauts(unescape(str(search_string or ""))).lower() + normalized = re.sub(r"[^a-z0-9]+", " ", normalized) + normalized = re.sub(r"\s+", " ", normalized).strip() + + aliases = { + "wwe monday night raw": "WWE RAW", + "wwe friday night smackdown": "WWE SmackDown", + } + alias = aliases.get(normalized, search_string) + return alias + + +def _date_search_strings(search_string, episode_year, episode_month, episode_day): + try: + episode_date = datetime( + int(episode_year), + int(episode_month), + int(episode_day), + ) + except (TypeError, ValueError): + return [search_string] + + candidates = [ + episode_date, + episode_date - timedelta(days=1), + episode_date + timedelta(days=1), + ] + search_strings = [search_string] + search_variants = [search_string] + if re.search(r"(?i)\bsmackdown\b", search_string): + smackdown_variant = re.sub( + r"(?i)\bsmackdown\b", + "Smackdown", + search_string, + count=1, + ) + if smackdown_variant not in search_variants: + search_variants.append(smackdown_variant) + + for candidate in candidates: + date_variants = ( + f"{candidate:%Y %m %d}", + f"{candidate:%Y-%m-%d}", + f"{candidate:%Y.%m.%d}", + ) + for search_variant in search_variants: + for date_variant in date_variants: + value = f"{search_variant} {date_variant}" + if value not in search_strings: + search_strings.append(value) + + return search_strings + + +def _should_check_thread_for_date_release(title, search_string=None, episode_year=None): + normalized = replace_umlauts(unescape(str(title or ""))).lower() + normalized = re.sub(r"[^a-z0-9]+", " ", normalized) + tokens = set(normalized.split()) + + if episode_year and str(episode_year) not in tokens: + return False + + search_tokens = _title_match_tokens(search_string or "") + if not search_tokens: + return bool(re.search(r"\b(?:19|20)\d{2}\b", normalized)) + + if not search_tokens.issubset(tokens): + return False + + return bool(re.search(r"\b(?:19|20)\d{2}\b", normalized)) + + def _is_current_year_jahresthema_thread(title, search_string, base_search_category): if base_search_category != SEARCH_CAT_BOOKS: return False @@ -510,7 +649,7 @@ def _magazine_title_matches(search_string, title): def _magazine_match_tokens(text): text = replace_umlauts(unescape(str(text or ""))).lower() - text = re.sub(r"\bc\s*['`´’]?\s*t\b", "ct", text) + text = re.sub(r"\bc\s*['`\u00b4\u2019]?\s*t\b", "ct", text) text = re.sub(r"[^a-z0-9]+", " ", text) ignored = { @@ -612,6 +751,196 @@ def _fetch_thread_page(shared_state, page_url): return response +def _date_release_from_thread( + shared_state, + thread_url, + search_string, + episode_year, + episode_month, + episode_day, +): + if not (episode_year and episode_month and episode_day): + return {} + + first_page = _fetch_thread_page(shared_state, thread_url) + if first_page is None: + return {} + + last_page = _extract_last_thread_page(first_page.text) + start_page = max(1, last_page - 4) + page_numbers = [1, *range(start_page, last_page + 1)] + page_numbers = list(dict.fromkeys(page_numbers)) + + for page_num in page_numbers: + page_url = ( + thread_url + if page_num == 1 + else _thread_page_url( + thread_url, + page_num, + ) + ) + response = ( + first_page + if page_num == 1 + else _fetch_thread_page( + shared_state, + page_url, + ) + ) + if response is None: + continue + + soup = BeautifulSoup(response.text, "html.parser") + for post in soup.select("article.message--post"): + title = _date_release_title_from_post(post) + if not title: + continue + if _date_release_title_matches_search( + title, + search_string, + episode_year, + episode_month, + episode_day, + ): + arr_title = _date_release_title_for_arr(title, search_string) + source = thread_url + if _post_contains_supported_download(post): + source = _post_url(page_url, post) + return { + "title": arr_title, + "mb": _date_release_size_mb_from_post(post), + "source": source, + } + + return {} + + +def _date_release_title_from_post(post): + content = _own_message_content(post) + text = content.get_text("\n", strip=True) + lines = text.splitlines() + + for index, line in enumerate(lines): + stripped = line.strip() + match = re.match(r"(?i)^(?:title|titel)\s*:\s*(.+)$", stripped) + if match: + return _normalize_title_for_arr(_clean_issue_title(match.group(1))) + + if stripped.lower() in {"title:", "titel:"} and index + 1 < len(lines): + return _normalize_title_for_arr(_clean_issue_title(lines[index + 1])) + + text_flat = " ".join(lines) + match = re.search( + r"(?i)\b([A-Z0-9][A-Z0-9.\s'&-]{1,120}?" + r"[.\s]+(?:19|20)\d{2}[.\s]+\d{2}[.\s]+\d{2}" + r".{0,120}?)\b", + text_flat, + ) + if match: + return _normalize_title_for_arr(_clean_issue_title(match.group(1))) + + return "" + + +def _date_release_size_mb_from_post(post): + content = _own_message_content(post) + text = content.get_text("\n", strip=True) + match = re.search( + r"(?i)\b(?:size|gr\u00f6\u00dfe|groesse|grosse)\s*:\s*" + r"(\d+(?:[.,]\d+)?)\s*([kmgt]i?b|[kmgt]b)\b", + text, + ) + if not match: + return 0 + + size = float(match.group(1).replace(",", ".")) + unit = match.group(2).lower() + if unit.startswith("k"): + return round(size / 1024) + if unit.startswith("m"): + return round(size) + if unit.startswith("g"): + return round(size * 1024) + if unit.startswith("t"): + return round(size * 1024 * 1024) + return 0 + + +def _date_release_title_for_arr(title, search_string): + normalized_search = replace_umlauts(unescape(str(search_string or ""))).lower() + normalized_search = re.sub(r"[^a-z0-9]+", " ", normalized_search) + normalized_search = re.sub(r"\b\d+\b", " ", normalized_search) + normalized_search = re.sub(r"\s+", " ", normalized_search).strip() + + canonical_prefixes = { + "wwe raw": "WWE.Monday.Night.RAW", + "wwe smackdown": "WWE.Friday.Night.SmackDown", + } + canonical_prefix = canonical_prefixes.get(normalized_search) + if not canonical_prefix: + return title + + compact_prefix = canonical_prefix.replace(".", r"[\s.]+") + raw_prefix = re.sub( + r"^(wwe)[\s.]+(?:monday[\s.]+night[\s.]+)?raw", + "wwe raw", + normalized_search, + ) + raw_prefix = re.escape(raw_prefix).replace(r"\ ", r"[\s.]+") + if re.match(rf"(?i)^{raw_prefix}[\s.]+", title): + return re.sub(rf"(?i)^{raw_prefix}", canonical_prefix, title, count=1) + if re.match(rf"(?i)^{compact_prefix}[\s.]+", title): + return re.sub(rf"(?i)^{compact_prefix}", canonical_prefix, title, count=1) + + return title + + +def _date_release_title_matches_search( + title, + search_string, + episode_year, + episode_month, + episode_day, +): + date_pattern = re.compile( + rf"(? bool: def supports_absolute_numbering(self) -> bool: return False + @property + def supports_date_numbering(self) -> bool: + return False + @property @abstractmethod def supported_categories(self) -> list[int]: @@ -71,6 +75,9 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_year: int = None, + episode_month: int = None, + episode_day: int = None, ) -> list[SearchRelease]: pass diff --git a/tests/test_dl_jahresthema.py b/tests/test_dl_jahresthema.py index 0310cbe1..678555b6 100644 --- a/tests/test_dl_jahresthema.py +++ b/tests/test_dl_jahresthema.py @@ -12,10 +12,12 @@ Source as SearchSource, ) from quasarr.search.sources.dl import ( + _date_release_from_thread, _expand_jahresthema_thread_releases, _is_current_year_jahresthema_thread, _post_contains_supported_download, _release_from_jahresthema_post, + _should_check_thread_for_date_release, ) @@ -45,6 +47,138 @@ def config(self, section): class DlJahresthemaSearchTests(unittest.TestCase): + def test_date_thread_candidate_uses_search_tokens_without_release_group_lock(self): + self.assertTrue( + _should_check_thread_for_date_release( + "Sample Show 2026 Collection", + "Sample Show", + 2026, + ) + ) + + def test_date_thread_candidate_rejects_unrelated_series(self): + self.assertFalse( + _should_check_thread_for_date_release( + "Other Show 2026 Collection", + "Sample Show", + 2026, + ) + ) + + def test_date_release_from_thread_uses_post_url_only_for_downloadable_post(self): + html = """ +
+
+

Title: Sample.Show.2026.06.19.1080p.WEB.h264-GRP

+

Metadata only.

+
+
+
+
+

Title: Sample.Show.2026.06.19.1080p.WEB.h264-GRP

+

https://ddownload.com/example

+
+
+ """ + + with patch( + "quasarr.search.sources.dl._fetch_thread_page", + return_value=FakeResponse(html, "https://www.source.invalid/thread.1/"), + ): + release = _date_release_from_thread( + FakeSharedState(), + "https://www.source.invalid/thread.1/", + "Sample Show", + 2026, + 6, + 19, + ) + + self.assertEqual( + "https://www.source.invalid/thread.1/", + release["source"], + ) + + def test_date_release_from_thread_pins_downloadable_post(self): + html = """ +
+
+

Title: Sample.Show.2026.06.19.1080p.WEB.h264-GRP

+

https://ddownload.com/example

+
+
+ """ + + with patch( + "quasarr.search.sources.dl._fetch_thread_page", + return_value=FakeResponse(html, "https://www.source.invalid/thread.1/"), + ): + release = _date_release_from_thread( + FakeSharedState(), + "https://www.source.invalid/thread.1/", + "Sample Show", + 2026, + 6, + 19, + ) + + self.assertEqual( + "https://www.source.invalid/thread.1/#post-2", + release["source"], + ) + + def test_date_release_from_thread_scans_recent_thread_pages(self): + first_page_html = """ + + 2 +
+
+

Title: Sample.Show.2026.06.12.1080p.WEB.h264-GRP

+

https://ddownload.com/old-example

+
+
+ + """ + second_page_html = """ +
+
+

Title: Sample.Show.2026.06.19.1080p.WEB.h264-GRP

+

https://ddownload.com/example

+
+
+ """ + fetched_thread_urls = [] + + def fake_fetch(_shared_state, page_url): + fetched_thread_urls.append(page_url) + if page_url.endswith("/thread.1/"): + return FakeResponse(first_page_html, page_url) + if page_url.endswith("/thread.1/page-2"): + return FakeResponse(second_page_html, page_url) + raise AssertionError(f"unexpected fetch: {page_url}") + + with patch("quasarr.search.sources.dl._fetch_thread_page", fake_fetch): + release = _date_release_from_thread( + FakeSharedState(), + "https://www.source.invalid/thread.1/", + "Sample Show", + 2026, + 6, + 19, + ) + + self.assertEqual( + [ + "https://www.source.invalid/thread.1/", + "https://www.source.invalid/thread.1/page-2", + ], + fetched_thread_urls, + ) + self.assertEqual( + "https://www.source.invalid/thread.1/page-2#post-2", + release["source"], + ) + def test_matches_compact_ct_style_spelling(self): current_year = datetime.now().year diff --git a/tests/test_utils_release_matching.py b/tests/test_utils_release_matching.py index edfef14b..fc358a71 100644 --- a/tests/test_utils_release_matching.py +++ b/tests/test_utils_release_matching.py @@ -2,7 +2,8 @@ import unittest -from quasarr.providers.utils import normalize_optional_int +from quasarr.constants import SEARCH_CAT_SHOWS +from quasarr.providers.utils import is_valid_release, normalize_optional_int class ReleaseMatchingUtilsTests(unittest.TestCase): @@ -12,6 +13,34 @@ def test_normalize_optional_int_returns_none_for_empty_string(self): def test_normalize_optional_int_parses_numbers(self): self.assertEqual(4, normalize_optional_int("4")) + def test_date_numbered_tv_release_matches_date_components(self): + self.assertTrue( + is_valid_release( + "Sample.Show.2026.06.19.1080p.WEB.h264-GRP", + SEARCH_CAT_SHOWS, + "Sample Show", + season=2026, + episode="06/19", + episode_year=2026, + episode_month=6, + episode_day=19, + ) + ) + + def test_date_numbered_tv_release_rejects_wrong_date(self): + self.assertFalse( + is_valid_release( + "Sample.Show.2026.06.18.1080p.WEB.h264-GRP", + SEARCH_CAT_SHOWS, + "Sample Show", + season=2026, + episode="06/19", + episode_year=2026, + episode_month=6, + episode_day=19, + ) + ) + if __name__ == "__main__": unittest.main() diff --git a/uv.lock b/uv.lock index af16e639..0f5ed06b 100644 --- a/uv.lock +++ b/uv.lock @@ -653,27 +653,27 @@ wheels = [ [[package]] name = "ruff" -version = "0.15.19" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d5/e6/15800dfde183a1a106594016c912b4c12d050a301989d1aca6cb63759fe8/ruff-0.15.19.tar.gz", hash = "sha256:edc27f7172a93b32b102687009d6a588508815072141543ae603a8b9b0823063", size = 4772071, upload-time = "2026-06-24T01:10:46.942Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/88/4c/9ded7626c39a0440c575bf69e2bf500d443388272c842662c59852ee7fcd/ruff-0.15.19-py3-none-linux_armv6l.whl", hash = "sha256:922d1eb283161564759bd49f507e91dc6112c15da8bd5b84ed714e086243cf86", size = 10950859, upload-time = "2026-06-24T01:10:38.491Z" }, - { url = "https://files.pythonhosted.org/packages/fb/ef/c211505ece1d00ef493d58e54e3b6383c946a21e9874774eb531f2512cf3/ruff-0.15.19-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:4d190d8f62a0b94aba8f721116538a9ee29b1e74d26650846ba9b99f0ae21c40", size = 11294529, upload-time = "2026-06-24T01:10:36.481Z" }, - { url = "https://files.pythonhosted.org/packages/fe/93/78d462e7d39968e58094dc57be7d09ffb14ce37da5b68ed70338a35a1f21/ruff-0.15.19-py3-none-macosx_11_0_arm64.whl", hash = "sha256:5a2c86ba6870dd415a9d9eb8be94d7924ebec6a26ffc7958ec7ca29d4bff967d", size = 10641416, upload-time = "2026-06-24T01:10:48.923Z" }, - { url = "https://files.pythonhosted.org/packages/76/c4/5cb66cfd1f865d5cca908b86c93ac785e7f572193d3c7426079ca6643e24/ruff-0.15.19-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82b432bc087264aea70fd25ac198918b70bd9e2aa0db4297b0bb91bbfbbc63ce", size = 11015582, upload-time = "2026-06-24T01:10:30.089Z" }, - { url = "https://files.pythonhosted.org/packages/51/9f/8ecfaec10cf5eecd28fbc00ff4fb867db90a1be54bf3d39ebf93f893cd52/ruff-0.15.19-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8530a09d03b3a8c994f8b559a7dcdabc690bcd3f78ef276c38c83166798ebf56", size = 10744059, upload-time = "2026-06-24T01:10:32.48Z" }, - { url = "https://files.pythonhosted.org/packages/35/6b/983249d04562bc2d590edd75f32455cdb473affb3ba4bc8d883e939c697d/ruff-0.15.19-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:87bf21fb3875fe69f0eacc825411657e2e85589cce633c35c0adf1113649c62b", size = 11568461, upload-time = "2026-06-24T01:10:17.435Z" }, - { url = "https://files.pythonhosted.org/packages/eb/39/bc7794f127b18f492a3b4ee82bba5a900c985ff13b72b46f46e3c171ba34/ruff-0.15.19-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f9b229cb3ef56ecc2c1c8ebeca64b7a7740ccaef40a9eb097e78dde5a8560b83", size = 12429690, upload-time = "2026-06-24T01:10:40.638Z" }, - { url = "https://files.pythonhosted.org/packages/0a/3b/0de6859e698ed11c8a49e765196c8d333599b6a546c0715df39b6ba1aa2e/ruff-0.15.19-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c6c754515be7b76afe6e7e62df7776709571bcfc1631183828afcf3bafa869e3", size = 11693067, upload-time = "2026-06-24T01:10:25.681Z" }, - { url = "https://files.pythonhosted.org/packages/89/3d/0b1f30f84bee9ae6ae8d349c2ba8b6f4b040966744efdd3acc804ae7c024/ruff-0.15.19-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a498f82e0f4d8904c4e0aea5139cdfac1f39d19a3c51d491292f63a36e83b2e", size = 11616911, upload-time = "2026-06-24T01:10:44.809Z" }, - { url = "https://files.pythonhosted.org/packages/4d/eb/c90bd3dfc12eed9032c2c1bfe05105b93a1b2c8bce555db6308315b853ce/ruff-0.15.19-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:d48caa34488fb521fd0ef4aea2b0e8fe758298df044138f0d67b687a6a0d07ed", size = 11649343, upload-time = "2026-06-24T01:10:23.472Z" }, - { url = "https://files.pythonhosted.org/packages/82/91/01caa13602a2f12fae5edbe8caf78b3c1e6db1293132aee6959eecce095c/ruff-0.15.19-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:4171b6613effa9363cd46dd4f75bd1827b6d1b946b5e278ed0c600d305379445", size = 10977610, upload-time = "2026-06-24T01:10:50.892Z" }, - { url = "https://files.pythonhosted.org/packages/3c/51/acb817922feab9ecbb3201377d4dbe7a25f1395e46545820061973f03468/ruff-0.15.19-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:27c15b2a241dd4d995557949a094fe78b8ad99122a38ccae1595849bcc947b3f", size = 10744900, upload-time = "2026-06-24T01:10:42.726Z" }, - { url = "https://files.pythonhosted.org/packages/84/bc/5c8ca46b8a7a3f2b16cfbec88721d772b1c93912904e8f8c2e49470fea63/ruff-0.15.19-py3-none-musllinux_1_2_i686.whl", hash = "sha256:ed03b7862d68f0a8771d50ee129980cbf1b113f96e250b73954bc292f689e0bb", size = 11293560, upload-time = "2026-06-24T01:10:21.262Z" }, - { url = "https://files.pythonhosted.org/packages/81/e0/4a888cbe4d5523b3f77a2b1fa043f46cfeba1b32eac35dcfadee0578fa8a/ruff-0.15.19-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:08143f0685ae278b30727ea72e90c61e5bd9c31b91aac4f5bb989538f73d24b8", size = 11696533, upload-time = "2026-06-24T01:10:53.046Z" }, - { url = "https://files.pythonhosted.org/packages/98/43/c34b2fcd79262a85161764a97aaca89c3e4f574340ab61430cefa2bdd2c1/ruff-0.15.19-py3-none-win32.whl", hash = "sha256:8f47f0f92952af2557212bb10cf3e695cd4cf28b2c6e42cdb18ec6c9ebfa19da", size = 10986299, upload-time = "2026-06-24T01:10:55.185Z" }, - { url = "https://files.pythonhosted.org/packages/22/e8/15fd23e02b2442b56b2026b455977bc3057aa34b26e6323d1e99e8531a9f/ruff-0.15.19-py3-none-win_amd64.whl", hash = "sha256:efeca47ee3f9d4a7162655a3b8e6ee4a878646044233978d4d2c1ff8cdd914f0", size = 12123473, upload-time = "2026-06-24T01:10:27.74Z" }, - { url = "https://files.pythonhosted.org/packages/30/66/9a73695e31eaee04f35d8475998bf8ab354465f9c638936d76111603dcc5/ruff-0.15.19-py3-none-win_arm64.whl", hash = "sha256:6c6b607466e47349332eb1d9be52fb1467423fc07c217341af41cd0f3f0573be", size = 11376779, upload-time = "2026-06-24T01:10:34.465Z" }, +version = "0.15.20" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/43/dc/35b341fc554ba02f217fc10da57d1a75168cfbcf75b0ef2202176d4c4f2d/ruff-0.15.20.tar.gz", hash = "sha256:1416eb04349192646b54de98f146c4f59afe37d0decfc02c3cbbf396f3a28566", size = 4755489, upload-time = "2026-06-25T17:20:37.578Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/d9/2d5014f0253ba541d2061d9fa7193f48e941c8b21bb88a7ff9bbe0bd0596/ruff-0.15.20-py3-none-linux_armv6l.whl", hash = "sha256:00e188c53e499c3c1637f73c91dcf2fb56d576cab76ce1be50a27c4e80e37078", size = 10839665, upload-time = "2026-06-25T17:19:44.702Z" }, + { url = "https://files.pythonhosted.org/packages/c6/d3/ac1798ba64f670698867fcfc591d50e7e421bef137db564858f619a30fcf/ruff-0.15.20-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:9ebd1fd9b9c95fc0bd7b2761aebec1f030013d2e193a2901b224af68fe47251b", size = 11208649, upload-time = "2026-06-25T17:19:48.787Z" }, + { url = "https://files.pythonhosted.org/packages/47/47/d3ac899991202095dfcf3d5176be4272642be3cf981a2f1a30f72a2afb95/ruff-0.15.20-py3-none-macosx_11_0_arm64.whl", hash = "sha256:c5b16cdd67ca108185cd36dce98c576350c03b1660a751de725fb049193a0632", size = 10622638, upload-time = "2026-06-25T17:19:51.354Z" }, + { url = "https://files.pythonhosted.org/packages/33/13/4e043fe30aa94d4ff5213a9881fc296d12960f5971b234a5263fdc225312/ruff-0.15.20-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3413bb3c3d2ca6a8208f1f4809cd2dca3c6de6d0b491c0e70847672bde6e6efd", size = 10984227, upload-time = "2026-06-25T17:19:54.044Z" }, + { url = "https://files.pythonhosted.org/packages/76/e6/92e7bf40388bc5800073b96564f56264f7e48bfd1a498f5ced6ae6d5a769/ruff-0.15.20-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd7ec42b3bb3da066488db093308a69c4ac5ee6d2af333a86ba6e2eb2e7dd44b", size = 10622882, upload-time = "2026-06-25T17:19:57.037Z" }, + { url = "https://files.pythonhosted.org/packages/13/7a/43460be3f24495a3aa46d4b16873e2c4941b3b5f0b00cf88c03b7b94b339/ruff-0.15.20-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1a36ad0eb77fba9aabfb69ede54de6f376d04ac18ebea022847046d340a8267", size = 11474808, upload-time = "2026-06-25T17:20:00.357Z" }, + { url = "https://files.pythonhosted.org/packages/27/a0/f37077884873221c6b33b4ab49eb18f9f88e54a16a25a5bca59bef46dd66/ruff-0.15.20-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b6df3b1e4610432f0386dba04d853b5f08cbbc903410c6fcc02f620f05aff53c", size = 12293094, upload-time = "2026-06-25T17:20:03.446Z" }, + { url = "https://files.pythonhosted.org/packages/a6/74/165545b60256a9704c21ac0ec4a0d07933b320812f9584836c9f4aca4292/ruff-0.15.20-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e89f198a1ea6ef0d727c1cf16088bc91a6cb0ab947dedc966715691647186eae", size = 11526176, upload-time = "2026-06-25T17:20:06.301Z" }, + { url = "https://files.pythonhosted.org/packages/86/b1/a976a136d40ade83ce743578399865f57001003a409acadc0ecbb3051082/ruff-0.15.20-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:309809086c2acb67624950a3c8133e80f32d0d3e27106c0cd60ff26657c9f24b", size = 11520767, upload-time = "2026-06-25T17:20:09.191Z" }, + { url = "https://files.pythonhosted.org/packages/19/0f/f032696cb01c9b54c0263fa393474d7758f1cdc021a01b04e3cbc2500999/ruff-0.15.20-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:2d2374caa2f2c2f9e2b7da0a50802cfb8b79f55a9b5e49379f564544fbf56487", size = 11500132, upload-time = "2026-06-25T17:20:13.602Z" }, + { url = "https://files.pythonhosted.org/packages/4b/f4/51b1a14bc69e8c224b15dab9cce8e99b425e0455d462caa2b3c9be2b6a8e/ruff-0.15.20-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:a1ed17b65293e0c2f22fc387bc13198a5de94bf4429589b0ff6946b0feaf21a3", size = 10943828, upload-time = "2026-06-25T17:20:16.635Z" }, + { url = "https://files.pythonhosted.org/packages/71/4b/fe267640783cd02bf6c5cc290b1df1051be2ec294c678b5c15fe19e52343/ruff-0.15.20-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:f701305e66b38ea6c91882490eb73459796808e4c6362a1b765255e0cdcd4053", size = 10645418, upload-time = "2026-06-25T17:20:19.4Z" }, + { url = "https://files.pythonhosted.org/packages/b0/c0/a65aa4ec2f5e87a1df32dc3ec1fede434fe3dfd5cbcf3b503cafc676ab54/ruff-0.15.20-py3-none-musllinux_1_2_i686.whl", hash = "sha256:5b9c0c367ad8e5d0d5b5b8537864c469a0a0e55417aadfbeca41fa61333be9f4", size = 11211770, upload-time = "2026-06-25T17:20:22.033Z" }, + { url = "https://files.pythonhosted.org/packages/5a/a4/0caa331d954ae2723d729d351c989cb4ca8b6077d5c6c2cb6de75e98c041/ruff-0.15.20-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:01cc00dd58f0df339d0e902219dd53990ea99996a0344e5d9cc8d45d5307e460", size = 11618698, upload-time = "2026-06-25T17:20:25.259Z" }, + { url = "https://files.pythonhosted.org/packages/10/9b/5f14927848d2fd4aa891fd88d883788c5a7baba561c7874732364045708c/ruff-0.15.20-py3-none-win32.whl", hash = "sha256:ed65ef510e43a137207e0f01cfcf998aeddb1aeeda5c9d35023e910284d7cf21", size = 10857322, upload-time = "2026-06-25T17:20:28.612Z" }, + { url = "https://files.pythonhosted.org/packages/fa/f0/fe47c501f9dea92a26d788ff98bb5d92ed4cb4c88792c5c88af6b697dc8e/ruff-0.15.20-py3-none-win_amd64.whl", hash = "sha256:a525c81c70fb0380344dd1d8745d8cc1c890b7fc94a58d5a07bd8eb9557b8415", size = 11993274, upload-time = "2026-06-25T17:20:31.871Z" }, + { url = "https://files.pythonhosted.org/packages/d7/2b/9555445e1201d92b3195f45cdb153a0b68f24e0a4273f6e3d5ab46e212bb/ruff-0.15.20-py3-none-win_arm64.whl", hash = "sha256:2f5b2a6d614e8700388806a14996c40fab2c47b819ef57d790a34878858ed9ca", size = 11343498, upload-time = "2026-06-25T17:20:35.03Z" }, ] [[package]] From 06acc668dc2670ed8d36990cc433f66e8d429424 Mon Sep 17 00:00:00 2001 From: RiX Date: Sat, 27 Jun 2026 12:20:01 +0200 Subject: [PATCH 2/2] Generalize date-numbered episode search (#403) Co-authored-by: RiX <9930448+rix1337@users.noreply.github.com> --- quasarr/providers/AGENTS.md | 2 +- quasarr/providers/utils.py | 184 ++++++++++++++-- quasarr/search/AGENTS.md | 3 +- quasarr/search/__init__.py | 27 +-- quasarr/search/sources/AGENTS.md | 7 +- quasarr/search/sources/al.py | 1 + quasarr/search/sources/at.py | 1 + quasarr/search/sources/by.py | 10 +- quasarr/search/sources/dd.py | 10 +- quasarr/search/sources/dj.py | 8 +- quasarr/search/sources/dl.py | 203 +++--------------- quasarr/search/sources/dt.py | 8 +- quasarr/search/sources/dw.py | 8 +- quasarr/search/sources/ff.py | 4 +- quasarr/search/sources/fx.py | 8 +- quasarr/search/sources/he.py | 10 +- .../search/sources/helpers/search_source.py | 6 +- quasarr/search/sources/hs.py | 17 +- quasarr/search/sources/mb.py | 10 +- quasarr/search/sources/mx.py | 1 + quasarr/search/sources/nk.py | 10 +- quasarr/search/sources/nx.py | 8 +- quasarr/search/sources/rm.py | 15 ++ quasarr/search/sources/sf.py | 10 +- quasarr/search/sources/sj.py | 8 +- quasarr/search/sources/sl.py | 8 +- quasarr/search/sources/wd.py | 10 +- quasarr/search/sources/wx.py | 9 +- tests/AGENTS.md | 1 + tests/test_dl_jahresthema.py | 58 +++-- tests/test_hostname_capabilities.py | 44 ++++ tests/test_utils_release_matching.py | 89 +++++++- 32 files changed, 525 insertions(+), 273 deletions(-) diff --git a/quasarr/providers/AGENTS.md b/quasarr/providers/AGENTS.md index 4ede2647..91523298 100644 --- a/quasarr/providers/AGENTS.md +++ b/quasarr/providers/AGENTS.md @@ -20,7 +20,7 @@ The shared-services layer consumed by every other subsystem: cross-process state - `cloudflare.py` — challenge detection, `ensure_session_cf_bypassed`, FlareSolverr get/post/session helpers - `html_templates.py` / `html_images.py` — UI page shell, base64 image constants, and language-flag emoji/SVG fallback assets for setup UI - `hostname_issues.py` — DB-backed source health tracker (`mark_/clear_/get_hostname_issue`) -- `utils.py` — grab-bag: payload generate/parse, category resolvers, title matching, online-status checks, `download_package` (the JD linkgrabber submission) +- `utils.py` — grab-bag: payload generate/parse, category resolvers, title matching (including shared date-numbering parsing/query/match/canonicalization), online-status checks, `download_package` (the JD linkgrabber submission) - `sessions/` and `notifications/` — see Child DOX Index ## Local Contracts diff --git a/quasarr/providers/utils.py b/quasarr/providers/utils.py index dd36f428..0696d60a 100644 --- a/quasarr/providers/utils.py +++ b/quasarr/providers/utils.py @@ -1047,15 +1047,160 @@ def match_in_title(title: str, season: int = None, episode: int = None) -> bool: return False +_DATE_NUMBERING_IGNORED_TITLE_WORDS = { + "a", + "an", + "and", + "das", + "der", + "die", + "friday", + "monday", + "night", + "saturday", + "sunday", + "the", + "thursday", + "tuesday", + "wednesday", +} +_DATE_NUMBERING_SCHEDULE_WORDS = { + "friday", + "monday", + "night", + "saturday", + "sunday", + "thursday", + "tuesday", + "wednesday", +} + + +def parse_episode_date(season, episode): + """Return a validated date for Sonarr's year + MM/DD numbering shape.""" + parts = str(episode or "").split("/") + if len(parts) != 2: + return None + + try: + return date(int(season), int(parts[0]), int(parts[1])) + except (TypeError, ValueError): + return None + + +def date_numbering_title_tokens(value): + normalized = replace_umlauts(html.unescape(str(value or ""))).lower() + normalized = re.sub(r"[^a-z0-9]+", " ", normalized) + return { + token + for token in normalized.split() + if token not in _DATE_NUMBERING_IGNORED_TITLE_WORDS + and not re.fullmatch(r"\d+", token) + } + + +def date_numbering_title_matches(title, search_string): + search_tokens = date_numbering_title_tokens(search_string) + if not search_tokens: + return False + return search_tokens.issubset(date_numbering_title_tokens(title)) + + +def date_numbering_release_matches(title, search_string, episode_date): + if episode_date is None: + return False + + date_pattern = re.compile( + rf"(?= 2 and compact_words != words: + title_variants.append(" ".join(compact_words)) + + for value in list(title_variants): + case_variant = _date_numbering_case_variant(value) + if case_variant and case_variant not in title_variants: + title_variants.append(case_variant) + + search_strings = list(title_variants) + for candidate in ( + episode_date, + episode_date - timedelta(days=1), + episode_date + timedelta(days=1), + ): + for title_variant in title_variants: + for date_variant in ( + f"{candidate:%Y %m %d}", + f"{candidate:%Y-%m-%d}", + f"{candidate:%Y.%m.%d}", + ): + value = f"{title_variant} {date_variant}" + if value not in search_strings: + search_strings.append(value) + + return search_strings + + +def canonicalize_date_numbered_title(title, search_string, episode_date): + if is_imdb_id(search_string) or not date_numbering_release_matches( + title, search_string, episode_date + ): + return title + + date_match = re.search( + rf"(? bool: """ Return True if the given release title is valid for the given search parameters. @@ -1064,6 +1209,7 @@ def is_valid_release( - search_string: the original search phrase (could be an IMDb id or plain text) - season: desired season number (or None) - episode: desired episode number (or None) + - episode_date: validated date for a date-numbered TV episode (or None) """ try: is_movie_search = search_category // 1000 * 1000 == SEARCH_CAT_MOVIES @@ -1072,9 +1218,14 @@ def is_valid_release( is_music_search = search_category // 1000 * 1000 == SEARCH_CAT_MUSIC is_xxx_search = search_category // 1000 * 1000 == SEARCH_CAT_XXX - # if search string is NOT an imdb id check search_string_in_sanitized_title - if not match, it is not valid + # if search string is NOT an imdb id, require a title match if not is_docs_search and not is_imdb_id(search_string): - if not search_string_in_sanitized_title(search_string, title): + title_matches = ( + date_numbering_title_matches(title, search_string) + if is_tv_search and episode_date is not None + else search_string_in_sanitized_title(search_string, title) + ) + if not title_matches: trace( "Skipping {title!r} as it doesn't match sanitized " "search string: {search_string!r}", @@ -1094,27 +1245,16 @@ def is_valid_release( return False return True - date_pattern = None - if ( - episode_year is not None - and episode_month is not None - and episode_day is not None - ): - date_pattern = re.compile( - rf"(?S{season}" if episode: stype += f"{'' if season else ' '}E{episode}" - if episode_year: - stype += ( - f" {episode_year}-{episode_month}-{episode_day}" - ) + if episode_date: + stype += f" {episode_date:%Y}-{episode_date:%m}-{episode_date:%d}" if base_search_category in [SEARCH_CAT_MOVIES, SEARCH_CAT_SHOWS]: args = (shared_state, start_time, behavior_search_category) @@ -156,18 +147,12 @@ def get_search_results( "episode": episode, } - if episode_year: + if episode_date: if not source.supports_date_numbering: source_logger.trace("Search with date unsupported") continue - kwargs.update( - { - "episode_year": episode_year, - "episode_month": episode_month, - "episode_day": episode_day, - } - ) + kwargs["episode_date"] = episode_date search_executor.add( source, diff --git a/quasarr/search/sources/AGENTS.md b/quasarr/search/sources/AGENTS.md index 3896cfa3..6a4d91fd 100644 --- a/quasarr/search/sources/AGENTS.md +++ b/quasarr/search/sources/AGENTS.md @@ -11,10 +11,10 @@ Two-letter lowercase source modules plus `helpers/`: `search_source.py` (`Abstra ## Local Contracts - Registration is by file existence alone: drop `.py` into this folder exposing `class Source(AbstractSearchSource)`. The module filename, `Source.initials`, and the `Config("Hostnames")` key all use the same two-letter key; a same-key download twin exists only when release links need source-specific extraction (FX has none). Adding/renaming a module file changes the Hostnames config key space automatically. -- Signatures: `search(shared_state, start_time, search_category, search_string="", season=None, episode=None)` and `feed(shared_state, start_time, search_category)`, both returning `list[SearchRelease]` — empty list on failure, never raise to the caller. +- Signatures: `search(shared_state, start_time, search_category, search_string="", season=None, episode=None, episode_date=None)` and `feed(shared_state, start_time, search_category)`, both returning `list[SearchRelease]` — empty list on failure, never raise to the caller. `episode_date` is a validated `datetime.date` for Sonarr's year + `MM/DD` numbering shape. - `SearchRelease`: `{"details": {"title", "hostname" (= initials), "imdb_id" (str or None), "link", "size" (bytes), "date" (RFC822 preferred), "source" (original page URL)}, "type": "protected"}` — every emit site uses type `"protected"`. - `details.link` must come from `quasarr.providers.utils.generate_download_link(...)`; the payload is pipe-delimited (`title|url|size_mb|password|imdb_id|source_key`), so field values must not contain `|`. -- Capabilities are plain class attributes: `initials`, `language` (`"de"`, `"en"`, or `"fr"`), `supports_imdb`, `supports_phrase`, `supported_categories` (constants `SEARCH_CAT_*`), plus optional `supports_absolute_numbering` / `requires_login` / `requires_account` / `invite_only` / `requires_flaresolverr` / `requires_radarr` / `requires_sonarr`. `language`, category, account, invite, login, FlareSolverr, and *arr-client metadata is surfaced in the hostname editor through `helpers.get_source_metadata()`. +- Capabilities are plain class attributes: `initials`, `language` (`"de"`, `"en"`, or `"fr"`), `supports_imdb`, `supports_phrase`, `supported_categories` (constants `SEARCH_CAT_*`), plus optional `supports_absolute_numbering` / `supports_date_numbering` / `requires_login` / `requires_account` / `invite_only` / `requires_flaresolverr` / `requires_radarr` / `requires_sonarr`. Date numbering defaults to supported; AL and AT opt out because their anime-specific numbering rewrites require numeric episodes, MX opts out because its API requires numeric season+episode, and movie-only FF is never dispatched for TV dates. `language`, category, account, invite, login, FlareSolverr, and *arr-client metadata is surfaced in the hostname editor through `helpers.get_source_metadata()`. - `is_valid_release(...)` is the default validation for each candidate title in `search()` (not `feed()`); AT and AL intentionally deviate with bespoke matching suited to absolute-numbered anime. IMDb convention: if the searched ID and a release-page ID both exist and differ → skip; if the release lacks one → inherit the searched ID. - Call `mark_hostname_issue(self.initials, "feed"|"search", msg)` on fetch/parse errors and `clear_hostname_issue(self.initials)` when releases were produced. - `Source.__init__` must be cheap and never fail — a failure is logged as an error and the source is dropped from the registry. @@ -26,6 +26,7 @@ Two-letter lowercase source modules plus `helpers/`: `search_source.py` (`Abstra - Timeouts from `constants.FEED_/SEARCH_REQUEST_TIMEOUT_SECONDS`; User-Agent from shared state. Sources without native IMDb search resolve a localized title via `get_localized_title(shared_state, imdb_id, language)` — pass `"de"`, `"en"`, or `"fr"` matching the source site's content language. - Module-private parsing helpers are underscore-prefixed at the module bottom; `size` flows as MB int into `generate_download_link` and as bytes in `details.size`; size 0 is the accepted fallback. - Do not infer payloads or response shapes — the root `Third-Party Source Work` rules require real traffic captures or direct curl confirmation first. +- Date-numbering parsing, title matching, query variants, and canonical title rewriting belong in `quasarr.providers.utils`; source modules only pass `episode_date` through existing verified request/result paths. Keep production logic series-agnostic and add series-specific compatibility cases only as synthetic tests. - A new source adds its entry to the Per-Source Notes below and, when it has a download module, to the notes in `quasarr/downloads/sources/AGENTS.md` — in the same change. ### Per-Source Notes (search side) @@ -37,7 +38,7 @@ Capability flags (`supports_*`, `requires_*`) and categories are class attribute - **BY** — no login. Book/magazine titles run through Magazarr-compatible date/issue normalization; search drops releases without valid resolution/codec (feed keeps the original metadata); per-category fetches use category-ID constants inside the module. - **DD** — login (`providers/sessions/dd`, which applies a fixed quality-profile filter to API responses — new resolutions must be added there). IMDb mismatch between request and API response discards the result; a suspected fake release (the API's `fake` flag) invalidates the cached session. - **DJ** — login (shares the `JUNKIES` credentials section with SJ). IMDb-only; series discovered by HTML scrape to locate a media id, releases then fetched via JSON and aggregated per season block. -- **DL** — login (`providers/sessions/dl`); umlauts normalized when building queries. Paginated search is sequential, bounded by a wall-clock budget, and stops on an empty page; yearly magazine threads ("Jahresthema") expand into per-issue entries (requires the current year in the thread); magazine titles use a token-normalized matcher to align month/issue variants. +- **DL** — login (`providers/sessions/dl`); umlauts normalized when building queries. Paginated search is sequential, bounded by a wall-clock budget, and stops on an empty page; yearly magazine threads ("Jahresthema") expand into per-issue entries (requires the current year in the thread); magazine titles use a token-normalized matcher to align month/issue variants. Date-numbered thread discovery/pagination is DL-specific, while query variants, title/date matching, and canonicalization use shared generic helpers. - **DT** — no login. Article date parsing assumes a fixed timezone offset; IMDb id parsed from article HTML and propagated; search drops candidates not matching requested resolution/codec (feed keeps them). - **DW** — no login. German month names mapped in a local table (new variants go there); IMDb id read from article HTML validates the result still matches the request. - **FF** — no login, movie-only. Search uses the public title lookup, then opens each movie page to extract IMDb id and the movie-token release API; releases are emitted from API `div.entry` blocks and use the release page URL as the download payload source. Feed reads recent update rows, then cross-references each movie page/API to fill size and IMDb data for the release anchors; cross-reference stops when the source's global feed budget reaches `FEED_REQUEST_TIMEOUT_SECONDS`. diff --git a/quasarr/search/sources/al.py b/quasarr/search/sources/al.py index d6d6a7cc..e4ff3b30 100644 --- a/quasarr/search/sources/al.py +++ b/quasarr/search/sources/al.py @@ -45,6 +45,7 @@ class Source(AbstractSearchSource): requires_flaresolverr = True supports_imdb = True supports_phrase = False + supports_date_numbering = False supports_absolute_numbering = True supported_categories = [SEARCH_CAT_MOVIES, SEARCH_CAT_SHOWS, SEARCH_CAT_SHOWS_ANIME] requires_login = True diff --git a/quasarr/search/sources/at.py b/quasarr/search/sources/at.py index 01d3da51..38f38bc0 100644 --- a/quasarr/search/sources/at.py +++ b/quasarr/search/sources/at.py @@ -69,6 +69,7 @@ class Source(AbstractSearchSource): language = "en" supports_imdb = True supports_phrase = True + supports_date_numbering = False supports_absolute_numbering = True supported_categories = [SEARCH_CAT_MOVIES, SEARCH_CAT_SHOWS, SEARCH_CAT_SHOWS_ANIME] diff --git a/quasarr/search/sources/by.py b/quasarr/search/sources/by.py index 2bbe15b4..78a73509 100644 --- a/quasarr/search/sources/by.py +++ b/quasarr/search/sources/by.py @@ -103,6 +103,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: by = shared_state.values["config"]("Hostnames").get(self.initials) password = by @@ -140,6 +141,7 @@ def search( search_string=search_string, season=season, episode=episode, + episode_date=episode_date, ) except Exception as e: error(f"Error loading search: {e}") @@ -164,6 +166,7 @@ def _parse_posts( search_string=None, season=None, episode=None, + episode_date=None, ): releases = [] @@ -281,7 +284,12 @@ def _parse_posts( continue if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue if XXX_REGEX.search(title) and "xxx" not in search_string.lower(): diff --git a/quasarr/search/sources/dd.py b/quasarr/search/sources/dd.py index 35827c13..1ca663e1 100644 --- a/quasarr/search/sources/dd.py +++ b/quasarr/search/sources/dd.py @@ -54,6 +54,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] dd = shared_state.values["config"]("Hostnames").get(self.initials) @@ -76,7 +77,7 @@ def search( info(f"Could not extract title from IMDb-ID {imdb_id}") return releases search_string = html.unescape(search_string) - if season: + if season and episode_date is None: search_string += f" S{int(season):02d}" if episode: search_string += f"E{int(episode):02d}" @@ -130,7 +131,12 @@ def search( title = release.get("release") if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue diff --git a/quasarr/search/sources/dj.py b/quasarr/search/sources/dj.py index feca6b66..3824c514 100644 --- a/quasarr/search/sources/dj.py +++ b/quasarr/search/sources/dj.py @@ -131,6 +131,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] @@ -220,7 +221,12 @@ def search( continue if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue diff --git a/quasarr/search/sources/dl.py b/quasarr/search/sources/dl.py index 44573116..3d935a02 100644 --- a/quasarr/search/sources/dl.py +++ b/quasarr/search/sources/dl.py @@ -4,7 +4,7 @@ import re import time -from datetime import datetime, timedelta +from datetime import datetime from html import unescape from urllib.parse import urlsplit, urlunsplit @@ -32,6 +32,10 @@ retrieve_and_validate_session, ) from quasarr.providers.utils import ( + canonicalize_date_numbered_title, + date_numbering_release_matches, + date_numbering_search_strings, + date_numbering_title_matches, generate_download_link, get_base_search_category_id, is_imdb_id, @@ -191,9 +195,7 @@ def _search_single_page( search_category, season, episode, - episode_year, - episode_month, - episode_day, + episode_date, ): """ Search a single page. This method is called sequentially for each page. @@ -263,11 +265,11 @@ def _search_single_page( title = unescape(title) title_normalized = _normalize_title_for_arr(title) is_date_thread_candidate = ( - episode_year + episode_date and _should_check_thread_for_date_release( title_normalized, search_string, - episode_year, + episode_date, ) ) @@ -298,9 +300,7 @@ def _search_single_page( search_string, season, episode, - episode_year, - episode_month, - episode_day, + episode_date, ) if not is_release_valid: if is_date_thread_candidate: @@ -308,17 +308,16 @@ def _search_single_page( shared_state, thread_url, search_string, - episode_year, - episode_month, - episode_day, + episode_date, ) if not date_release: continue title_normalized = date_release["title"] - elif episode_year: - title_normalized = _date_release_title_for_arr( + elif episode_date: + title_normalized = canonicalize_date_numbered_title( title_normalized, search_string, + episode_date, ) # Extract date and convert to RFC 2822 format @@ -390,9 +389,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, - episode_year: int = None, - episode_month: int = None, - episode_day: int = None, + episode_date=None, ) -> list[SearchRelease]: """ Search with sequential pagination to find best quality releases. @@ -408,24 +405,20 @@ def search( info(f"no title for IMDb {imdb_id}") return releases search_string = title - if episode_year: - search_string = _date_search_alias(search_string) if not season: if year := get_year(imdb_id): search_string += f" {year}" search_string = unescape(search_string) search_strings = ( - _date_search_strings( + date_numbering_search_strings( search_string, - episode_year, - episode_month, - episode_day, + episode_date, ) - if episode_year + if episode_date else [search_string] ) - max_search_duration = 15 if episode_year else 7 + max_search_duration = 15 if episode_date else 7 trace( f"Starting sequential paginated search for '{search_string}' " @@ -461,9 +454,7 @@ def search( search_category, season, episode, - episode_year, - episode_month, - episode_day, + episode_date, ) page_release_titles = tuple( @@ -548,74 +539,18 @@ def _normalize_title_for_arr(title): return title -def _date_search_alias(search_string): - normalized = replace_umlauts(unescape(str(search_string or ""))).lower() - normalized = re.sub(r"[^a-z0-9]+", " ", normalized) - normalized = re.sub(r"\s+", " ", normalized).strip() - - aliases = { - "wwe monday night raw": "WWE RAW", - "wwe friday night smackdown": "WWE SmackDown", - } - alias = aliases.get(normalized, search_string) - return alias - - -def _date_search_strings(search_string, episode_year, episode_month, episode_day): - try: - episode_date = datetime( - int(episode_year), - int(episode_month), - int(episode_day), - ) - except (TypeError, ValueError): - return [search_string] - - candidates = [ - episode_date, - episode_date - timedelta(days=1), - episode_date + timedelta(days=1), - ] - search_strings = [search_string] - search_variants = [search_string] - if re.search(r"(?i)\bsmackdown\b", search_string): - smackdown_variant = re.sub( - r"(?i)\bsmackdown\b", - "Smackdown", - search_string, - count=1, - ) - if smackdown_variant not in search_variants: - search_variants.append(smackdown_variant) - - for candidate in candidates: - date_variants = ( - f"{candidate:%Y %m %d}", - f"{candidate:%Y-%m-%d}", - f"{candidate:%Y.%m.%d}", - ) - for search_variant in search_variants: - for date_variant in date_variants: - value = f"{search_variant} {date_variant}" - if value not in search_strings: - search_strings.append(value) - - return search_strings - - -def _should_check_thread_for_date_release(title, search_string=None, episode_year=None): +def _should_check_thread_for_date_release(title, search_string=None, episode_date=None): normalized = replace_umlauts(unescape(str(title or ""))).lower() normalized = re.sub(r"[^a-z0-9]+", " ", normalized) tokens = set(normalized.split()) - if episode_year and str(episode_year) not in tokens: + if episode_date and str(episode_date.year) not in tokens: return False - search_tokens = _title_match_tokens(search_string or "") - if not search_tokens: + if not search_string: return bool(re.search(r"\b(?:19|20)\d{2}\b", normalized)) - if not search_tokens.issubset(tokens): + if not date_numbering_title_matches(title, search_string): return False return bool(re.search(r"\b(?:19|20)\d{2}\b", normalized)) @@ -755,11 +690,9 @@ def _date_release_from_thread( shared_state, thread_url, search_string, - episode_year, - episode_month, - episode_day, + episode_date, ): - if not (episode_year and episode_month and episode_day): + if episode_date is None: return {} first_page = _fetch_thread_page(shared_state, thread_url) @@ -796,14 +729,10 @@ def _date_release_from_thread( title = _date_release_title_from_post(post) if not title: continue - if _date_release_title_matches_search( - title, - search_string, - episode_year, - episode_month, - episode_day, - ): - arr_title = _date_release_title_for_arr(title, search_string) + if date_numbering_release_matches(title, search_string, episode_date): + arr_title = canonicalize_date_numbered_title( + title, search_string, episode_date + ) source = thread_url if _post_contains_supported_download(post): source = _post_url(page_url, post) @@ -867,80 +796,6 @@ def _date_release_size_mb_from_post(post): return 0 -def _date_release_title_for_arr(title, search_string): - normalized_search = replace_umlauts(unescape(str(search_string or ""))).lower() - normalized_search = re.sub(r"[^a-z0-9]+", " ", normalized_search) - normalized_search = re.sub(r"\b\d+\b", " ", normalized_search) - normalized_search = re.sub(r"\s+", " ", normalized_search).strip() - - canonical_prefixes = { - "wwe raw": "WWE.Monday.Night.RAW", - "wwe smackdown": "WWE.Friday.Night.SmackDown", - } - canonical_prefix = canonical_prefixes.get(normalized_search) - if not canonical_prefix: - return title - - compact_prefix = canonical_prefix.replace(".", r"[\s.]+") - raw_prefix = re.sub( - r"^(wwe)[\s.]+(?:monday[\s.]+night[\s.]+)?raw", - "wwe raw", - normalized_search, - ) - raw_prefix = re.escape(raw_prefix).replace(r"\ ", r"[\s.]+") - if re.match(rf"(?i)^{raw_prefix}[\s.]+", title): - return re.sub(rf"(?i)^{raw_prefix}", canonical_prefix, title, count=1) - if re.match(rf"(?i)^{compact_prefix}[\s.]+", title): - return re.sub(rf"(?i)^{compact_prefix}", canonical_prefix, title, count=1) - - return title - - -def _date_release_title_matches_search( - title, - search_string, - episode_year, - episode_month, - episode_day, -): - date_pattern = re.compile( - rf"(? list[SearchRelease]: releases = [] dt = shared_state.values["config"]("Hostnames").get(self.initials) @@ -250,7 +251,12 @@ def search( ) if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue diff --git a/quasarr/search/sources/dw.py b/quasarr/search/sources/dw.py index 89ec40a8..1c2f3cde 100644 --- a/quasarr/search/sources/dw.py +++ b/quasarr/search/sources/dw.py @@ -139,6 +139,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] dw = shared_state.values["config"]("Hostnames").get(self.initials) @@ -184,7 +185,12 @@ def search( title = result.a.text.strip() if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue diff --git a/quasarr/search/sources/ff.py b/quasarr/search/sources/ff.py index d63a7965..d68fbab8 100644 --- a/quasarr/search/sources/ff.py +++ b/quasarr/search/sources/ff.py @@ -174,9 +174,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, - episode_year: int = None, - episode_month: int = None, - episode_day: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] host = shared_state.values["config"]("Hostnames").get(self.initials) diff --git a/quasarr/search/sources/fx.py b/quasarr/search/sources/fx.py index 68f153ca..5ca028c5 100644 --- a/quasarr/search/sources/fx.py +++ b/quasarr/search/sources/fx.py @@ -159,6 +159,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] fx = shared_state.values["config"]("Hostnames").get(self.initials) @@ -228,7 +229,12 @@ def search( title = sanitize_title(title.text) if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue diff --git a/quasarr/search/sources/he.py b/quasarr/search/sources/he.py index 23b1c4fc..6ab90a8f 100644 --- a/quasarr/search/sources/he.py +++ b/quasarr/search/sources/he.py @@ -52,6 +52,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] host = shared_state.values["config"]("Hostnames").get(self.initials) @@ -92,7 +93,7 @@ def search( search_type = "search" timeout = SEARCH_REQUEST_TIMEOUT_SECONDS - if season: + if season and episode_date is None: source_search += f" S{int(season):02d}" if episode: @@ -142,7 +143,12 @@ def search( title = head_split[0].strip() if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): trace("invalid release {}", title) continue diff --git a/quasarr/search/sources/helpers/search_source.py b/quasarr/search/sources/helpers/search_source.py index 36a88618..34df7c19 100644 --- a/quasarr/search/sources/helpers/search_source.py +++ b/quasarr/search/sources/helpers/search_source.py @@ -32,7 +32,7 @@ def supports_absolute_numbering(self) -> bool: @property def supports_date_numbering(self) -> bool: - return False + return True @property @abstractmethod @@ -75,9 +75,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, - episode_year: int = None, - episode_month: int = None, - episode_day: int = None, + episode_date=None, ) -> list[SearchRelease]: pass diff --git a/quasarr/search/sources/hs.py b/quasarr/search/sources/hs.py index f93c81cb..42f5949c 100644 --- a/quasarr/search/sources/hs.py +++ b/quasarr/search/sources/hs.py @@ -149,6 +149,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: """Search HS for releases by IMDb ID""" releases = [] @@ -183,6 +184,7 @@ def search( search_string, season, episode, + episode_date, ) except Exception as e: @@ -207,6 +209,7 @@ def _parse_search_results( search_string, season, episode, + episode_date, ): """Parse search results page and extract releases with filecrypt links. @@ -290,7 +293,12 @@ def _parse_search_results( for title in unique_episodes: # Validate release against search criteria if not is_valid_release( - title, base_search_category, search_string, season, episode + title, + base_search_category, + search_string, + season, + episode, + episode_date, ): continue @@ -328,7 +336,12 @@ def _parse_search_results( # Also add the main title (season pack) with full size - if not duplicate if main_title.lower() not in seen: if is_valid_release( - main_title, base_search_category, search_string, season, episode + main_title, + base_search_category, + search_string, + season, + episode, + episode_date, ): link = generate_download_link( shared_state, diff --git a/quasarr/search/sources/mb.py b/quasarr/search/sources/mb.py index b8a99a1e..cc00c7b7 100644 --- a/quasarr/search/sources/mb.py +++ b/quasarr/search/sources/mb.py @@ -52,6 +52,7 @@ def _parse_posts( search_string=None, season=None, episode=None, + episode_date=None, ): releases = [] one_hour_ago = (datetime.now() - timedelta(hours=1)).strftime( @@ -85,7 +86,12 @@ def _parse_posts( if is_search: if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue @@ -198,6 +204,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: mb = shared_state.values["config"]("Hostnames").get(self.initials) @@ -226,6 +233,7 @@ def search( search_string=search_string, season=season, episode=episode, + episode_date=episode_date, ) except Exception as e: warn(f"Error loading search: {e}") diff --git a/quasarr/search/sources/mx.py b/quasarr/search/sources/mx.py index 62bde7ba..e94faae7 100644 --- a/quasarr/search/sources/mx.py +++ b/quasarr/search/sources/mx.py @@ -51,6 +51,7 @@ class Source(AbstractSearchSource): language = "fr" supports_imdb = True supports_phrase = False + supports_date_numbering = False supported_categories = [SEARCH_CAT_MOVIES, SEARCH_CAT_SHOWS] # The movie feed reads Radarr and the show feed reads Sonarr (ID search # needs neither). Setup prompts remain source-wide, but feed() degrades diff --git a/quasarr/search/sources/nk.py b/quasarr/search/sources/nk.py index f03709cf..15b27664 100644 --- a/quasarr/search/sources/nk.py +++ b/quasarr/search/sources/nk.py @@ -51,6 +51,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] host = shared_state.values["config"]("Hostnames").get(self.initials) @@ -81,7 +82,7 @@ def search( search_type = "search" timeout = SEARCH_REQUEST_TIMEOUT_SECONDS - if season: + if season and episode_date is None: source_search += f" S{int(season):02d}" if episode: @@ -137,7 +138,12 @@ def search( release_imdb_id = imdb_id if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue diff --git a/quasarr/search/sources/nx.py b/quasarr/search/sources/nx.py index 4615e478..7eb0678e 100644 --- a/quasarr/search/sources/nx.py +++ b/quasarr/search/sources/nx.py @@ -155,6 +155,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: """ Search using internal API. @@ -212,7 +213,12 @@ def search( title = item["name"] if title: if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue diff --git a/quasarr/search/sources/rm.py b/quasarr/search/sources/rm.py index 9d0568de..92b7af25 100644 --- a/quasarr/search/sources/rm.py +++ b/quasarr/search/sources/rm.py @@ -98,6 +98,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] match_search_string = search_string @@ -141,6 +142,7 @@ def search( imdb_id=imdb_id, season=season, episode=episode, + episode_date=episode_date, ) except Exception as e: warn(f"Error loading search: {e}") @@ -296,8 +298,19 @@ def _matches_requested_release( search_string, season=None, episode=None, + episode_date=None, ): base_search_category = get_base_search_category_id(search_category) + if episode_date is not None: + return is_valid_release( + title, + search_category, + search_string, + season, + episode, + episode_date, + ) + if base_search_category != SEARCH_CAT_SHOWS: return is_valid_release(title, search_category, search_string, season, episode) @@ -345,6 +358,7 @@ def _build_search_results( imdb_id, season=None, episode=None, + episode_date=None, is_feed=False, ): base_url = _get_base_url(shared_state) @@ -370,6 +384,7 @@ def _build_search_results( search_string, season, episode, + episode_date, ): continue diff --git a/quasarr/search/sources/sf.py b/quasarr/search/sources/sf.py index d97a2c3b..3965afeb 100644 --- a/quasarr/search/sources/sf.py +++ b/quasarr/search/sources/sf.py @@ -147,6 +147,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] sf = shared_state.values["config"]("Hostnames").get(self.initials) @@ -310,7 +311,7 @@ def search( debug(f"Error extracting size for {title}: {e}") mb = 0 - if episode: + if episode and episode_date is None: try: if not re.search(r"S\d{1,3}E\d{1,3}", title): episodes_in_release = len(mirrors["episodes"]) @@ -354,7 +355,12 @@ def search( # check down here on purpose, because the title may be modified at episode stage if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue diff --git a/quasarr/search/sources/sj.py b/quasarr/search/sources/sj.py index d8380da0..bb288730 100644 --- a/quasarr/search/sources/sj.py +++ b/quasarr/search/sources/sj.py @@ -132,6 +132,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] @@ -222,7 +223,12 @@ def search( continue if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue diff --git a/quasarr/search/sources/sl.py b/quasarr/search/sources/sl.py index 2958f24f..1f1df8ee 100644 --- a/quasarr/search/sources/sl.py +++ b/quasarr/search/sources/sl.py @@ -177,6 +177,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] @@ -271,7 +272,12 @@ def fetch(url): title = a.get_text(strip=True) if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue diff --git a/quasarr/search/sources/wd.py b/quasarr/search/sources/wd.py index 00b977ca..012cfb5d 100644 --- a/quasarr/search/sources/wd.py +++ b/quasarr/search/sources/wd.py @@ -132,6 +132,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] wd = shared_state.values["config"]("Hostnames").get(self.initials) @@ -196,6 +197,7 @@ def search( search_string=search_string, season=season, episode=episode, + episode_date=episode_date, imdb_id=imdb_id, ) except Exception as e: @@ -220,6 +222,7 @@ def _parse_rows( search_string=None, season=None, episode=None, + episode_date=None, imdb_id=None, ): """ @@ -261,7 +264,12 @@ def _parse_rows( # search context contains non-video releases (ebooks, games, etc.) if is_search: if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue diff --git a/quasarr/search/sources/wx.py b/quasarr/search/sources/wx.py index 007f2ad2..900faa10 100644 --- a/quasarr/search/sources/wx.py +++ b/quasarr/search/sources/wx.py @@ -168,6 +168,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: """ Search using internal API. @@ -301,7 +302,12 @@ def search( title = title.replace(" ", ".") if is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): # Skip if we've already seen this exact title if title in seen_titles: @@ -364,6 +370,7 @@ def search( search_string, season, episode, + episode_date, ): continue diff --git a/tests/AGENTS.md b/tests/AGENTS.md index 35d70975..2ba5900b 100644 --- a/tests/AGENTS.md +++ b/tests/AGENTS.md @@ -14,6 +14,7 @@ Hermetic unit tests for Quasarr, built exclusively on the standard-library `unit - Full-suite command: `uv run python -X utf8 -m unittest discover -s tests` (the `-X utf8` flag avoids Windows console encoding noise in log output). - Tests must not perform network I/O or touch JDownloader. Patch in the consuming module's namespace (e.g. `quasarr.downloads.sources..requests.Session`), not the `requests` library globally. Only `test_sqlite_database.py` touches disk, via `tempfile.TemporaryDirectory`. - Synthetic-data rule (security-critical): source hostnames in tests are fake domains on the reserved `.invalid` TLD; use synthetic release titles (never paste real ones). Real public hoster/crypter domains are permitted only where the production matching logic keys on those literal domains — they are hoster/crypter services, not protected sources. +- Date-numbering regression tests may name a motivating real series to prove title-alias compatibility, but every complete release string, date, quality/group suffix, hostname, and URL remains synthetic. - `shared_state` is always faked (MagicMock with a `.values` dict, SimpleNamespace, or a small local class whose `values["config"]` is a callable returning dicts) — except `test_sqlite_database.py`, which mutates the real module in `setUp`. - There is no fixtures directory and no shared test-helpers module: each file defines its own `FakeResponse`/`FakeSession`/fake shared_state inline. - Run the full suite after touching shared providers, download flow, search behavior, or notification logic. Per root change discipline, tests change only when the intended behavior in the covered area changed or the existing test is incorrect. diff --git a/tests/test_dl_jahresthema.py b/tests/test_dl_jahresthema.py index 678555b6..09bf5737 100644 --- a/tests/test_dl_jahresthema.py +++ b/tests/test_dl_jahresthema.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import unittest -from datetime import datetime +from datetime import date, datetime from unittest.mock import patch from bs4 import BeautifulSoup @@ -52,7 +52,7 @@ def test_date_thread_candidate_uses_search_tokens_without_release_group_lock(sel _should_check_thread_for_date_release( "Sample Show 2026 Collection", "Sample Show", - 2026, + date(2026, 6, 19), ) ) @@ -61,7 +61,7 @@ def test_date_thread_candidate_rejects_unrelated_series(self): _should_check_thread_for_date_release( "Other Show 2026 Collection", "Sample Show", - 2026, + date(2026, 6, 19), ) ) @@ -89,9 +89,7 @@ def test_date_release_from_thread_uses_post_url_only_for_downloadable_post(self) FakeSharedState(), "https://www.source.invalid/thread.1/", "Sample Show", - 2026, - 6, - 19, + date(2026, 6, 19), ) self.assertEqual( @@ -117,9 +115,7 @@ def test_date_release_from_thread_pins_downloadable_post(self): FakeSharedState(), "https://www.source.invalid/thread.1/", "Sample Show", - 2026, - 6, - 19, + date(2026, 6, 19), ) self.assertEqual( @@ -162,9 +158,7 @@ def fake_fetch(_shared_state, page_url): FakeSharedState(), "https://www.source.invalid/thread.1/", "Sample Show", - 2026, - 6, - 19, + date(2026, 6, 19), ) self.assertEqual( @@ -179,6 +173,46 @@ def fake_fetch(_shared_state, page_url): release["source"], ) + def test_date_release_from_thread_finds_wwe_scheduled_series_generically(self): + episode_date = date(2031, 2, 3) + cases = ( + ( + "WWE Monday Night RAW", + "WWE.RAW.2031.02.03.1080p.WEB.h264-GRP", + "WWE.Monday.Night.RAW.2031.02.03.1080p.WEB.h264-GRP", + ), + ( + "WWE Friday Night SmackDown", + "WWE.SmackDown.2031.02.03.1080p.WEB.h264-GRP", + "WWE.Friday.Night.SmackDown.2031.02.03.1080p.WEB.h264-GRP", + ), + ) + + for search_string, posted_title, expected_title in cases: + with self.subTest(search_string=search_string): + html = f""" +
+
+

Title: {posted_title}

+

https://ddownload.com/example

+
+
+ """ + with patch( + "quasarr.search.sources.dl._fetch_thread_page", + return_value=FakeResponse( + html, "https://www.source.invalid/thread.1/" + ), + ): + release = _date_release_from_thread( + FakeSharedState(), + "https://www.source.invalid/thread.1/", + search_string, + episode_date, + ) + + self.assertEqual(expected_title, release["title"]) + def test_matches_compact_ct_style_spelling(self): current_year = datetime.now().year diff --git a/tests/test_hostname_capabilities.py b/tests/test_hostname_capabilities.py index ec8c2186..4e7b24d0 100644 --- a/tests/test_hostname_capabilities.py +++ b/tests/test_hostname_capabilities.py @@ -1,5 +1,7 @@ +import inspect import unittest +from quasarr.constants import SEARCH_CAT_SHOWS from quasarr.providers.html_images import FLAG_SVGS, LANGUAGE_FLAG_EMOJI from quasarr.search.sources import get_sources from quasarr.search.sources.helpers import get_source_metadata @@ -30,6 +32,48 @@ def test_flag_assets_cover_every_used_language(self): self.assertIn(language, LANGUAGE_FLAG_EMOJI) self.assertIn(language, FLAG_SVGS) + def test_date_numbering_sources_accept_shared_date_context(self): + for key, source in get_sources().items(): + if ( + SEARCH_CAT_SHOWS not in source.supported_categories + or not source.supports_date_numbering + ): + continue + with self.subTest(source=key): + self.assertIn( + "episode_date", + inspect.signature(source.search).parameters, + ) + + def test_date_numbering_enabled_for_all_compatible_tv_sources(self): + expected = { + "by", + "dd", + "dj", + "dl", + "dt", + "dw", + "fx", + "he", + "hs", + "mb", + "nk", + "nx", + "rm", + "sf", + "sj", + "sl", + "wd", + "wx", + } + actual = { + key + for key, source in get_sources().items() + if SEARCH_CAT_SHOWS in source.supported_categories + and source.supports_date_numbering + } + self.assertEqual(expected, actual) + class SourceMetadataTests(unittest.TestCase): def test_metadata_exposes_expected_keys_for_every_source(self): diff --git a/tests/test_utils_release_matching.py b/tests/test_utils_release_matching.py index fc358a71..f104d0ff 100644 --- a/tests/test_utils_release_matching.py +++ b/tests/test_utils_release_matching.py @@ -1,9 +1,16 @@ # -*- coding: utf-8 -*- import unittest +from datetime import date from quasarr.constants import SEARCH_CAT_SHOWS -from quasarr.providers.utils import is_valid_release, normalize_optional_int +from quasarr.providers.utils import ( + canonicalize_date_numbered_title, + date_numbering_search_strings, + is_valid_release, + normalize_optional_int, + parse_episode_date, +) class ReleaseMatchingUtilsTests(unittest.TestCase): @@ -14,33 +21,93 @@ def test_normalize_optional_int_parses_numbers(self): self.assertEqual(4, normalize_optional_int("4")) def test_date_numbered_tv_release_matches_date_components(self): + episode_date = date(2031, 6, 19) self.assertTrue( is_valid_release( - "Sample.Show.2026.06.19.1080p.WEB.h264-GRP", + "Sample.Show.2031.06.19.1080p.WEB.h264-GRP", SEARCH_CAT_SHOWS, "Sample Show", - season=2026, + season=2031, episode="06/19", - episode_year=2026, - episode_month=6, - episode_day=19, + episode_date=episode_date, ) ) def test_date_numbered_tv_release_rejects_wrong_date(self): + episode_date = date(2031, 6, 19) self.assertFalse( is_valid_release( - "Sample.Show.2026.06.18.1080p.WEB.h264-GRP", + "Sample.Show.2031.06.18.1080p.WEB.h264-GRP", SEARCH_CAT_SHOWS, "Sample Show", - season=2026, + season=2031, episode="06/19", - episode_year=2026, - episode_month=6, - episode_day=19, + episode_date=episode_date, ) ) + def test_date_numbered_tv_release_accepts_verified_imdb_search(self): + episode_date = date(2031, 6, 19) + self.assertTrue( + is_valid_release( + "Sample.Show.2031.06.19.1080p.WEB.h264-GRP", + SEARCH_CAT_SHOWS, + "tt0000001", + season=2031, + episode="06/19", + episode_date=episode_date, + ) + ) + + def test_parse_episode_date_validates_calendar_date(self): + self.assertEqual(date(2031, 2, 3), parse_episode_date(2031, "02/03")) + self.assertIsNone(parse_episode_date(2031, "02/30")) + self.assertIsNone(parse_episode_date(2031, "2")) + + def test_date_numbering_canonicalizes_generic_scheduled_title(self): + episode_date = date(2031, 2, 3) + self.assertEqual( + "Sample.Monday.Night.Showcase.2031.02.03.1080p-GRP", + canonicalize_date_numbered_title( + "Sample.Showcase.2031.02.03.1080p-GRP", + "Sample Monday Night Showcase", + episode_date, + ), + ) + + def test_wwe_raw_uses_generic_schedule_alias_and_canonical_title(self): + episode_date = date(2031, 2, 3) + search_strings = date_numbering_search_strings( + "WWE Monday Night RAW", episode_date + ) + + self.assertIn("WWE RAW 2031.02.03", search_strings) + self.assertEqual( + "WWE.Monday.Night.RAW.2031.02.03.1080p-GRP", + canonicalize_date_numbered_title( + "WWE.RAW.2031.02.03.1080p-GRP", + "WWE Monday Night RAW", + episode_date, + ), + ) + + def test_wwe_smackdown_uses_generic_schedule_and_case_variants(self): + episode_date = date(2031, 2, 3) + search_strings = date_numbering_search_strings( + "WWE Friday Night SmackDown", episode_date + ) + + self.assertIn("WWE SmackDown 2031.02.03", search_strings) + self.assertIn("WWE Smackdown 2031.02.03", search_strings) + self.assertEqual( + "WWE.Friday.Night.SmackDown.2031.02.03.1080p-GRP", + canonicalize_date_numbered_title( + "WWE.SmackDown.2031.02.03.1080p-GRP", + "WWE Friday Night SmackDown", + episode_date, + ), + ) + if __name__ == "__main__": unittest.main()