diff --git a/quasarr/providers/AGENTS.md b/quasarr/providers/AGENTS.md index 4ede2647..91523298 100644 --- a/quasarr/providers/AGENTS.md +++ b/quasarr/providers/AGENTS.md @@ -20,7 +20,7 @@ The shared-services layer consumed by every other subsystem: cross-process state - `cloudflare.py` — challenge detection, `ensure_session_cf_bypassed`, FlareSolverr get/post/session helpers - `html_templates.py` / `html_images.py` — UI page shell, base64 image constants, and language-flag emoji/SVG fallback assets for setup UI - `hostname_issues.py` — DB-backed source health tracker (`mark_/clear_/get_hostname_issue`) -- `utils.py` — grab-bag: payload generate/parse, category resolvers, title matching, online-status checks, `download_package` (the JD linkgrabber submission) +- `utils.py` — grab-bag: payload generate/parse, category resolvers, title matching (including shared date-numbering parsing/query/match/canonicalization), online-status checks, `download_package` (the JD linkgrabber submission) - `sessions/` and `notifications/` — see Child DOX Index ## Local Contracts diff --git a/quasarr/providers/utils.py b/quasarr/providers/utils.py index 4e0b7f0e..0696d60a 100644 --- a/quasarr/providers/utils.py +++ b/quasarr/providers/utils.py @@ -1047,12 +1047,160 @@ def match_in_title(title: str, season: int = None, episode: int = None) -> bool: return False +_DATE_NUMBERING_IGNORED_TITLE_WORDS = { + "a", + "an", + "and", + "das", + "der", + "die", + "friday", + "monday", + "night", + "saturday", + "sunday", + "the", + "thursday", + "tuesday", + "wednesday", +} +_DATE_NUMBERING_SCHEDULE_WORDS = { + "friday", + "monday", + "night", + "saturday", + "sunday", + "thursday", + "tuesday", + "wednesday", +} + + +def parse_episode_date(season, episode): + """Return a validated date for Sonarr's year + MM/DD numbering shape.""" + parts = str(episode or "").split("/") + if len(parts) != 2: + return None + + try: + return date(int(season), int(parts[0]), int(parts[1])) + except (TypeError, ValueError): + return None + + +def date_numbering_title_tokens(value): + normalized = replace_umlauts(html.unescape(str(value or ""))).lower() + normalized = re.sub(r"[^a-z0-9]+", " ", normalized) + return { + token + for token in normalized.split() + if token not in _DATE_NUMBERING_IGNORED_TITLE_WORDS + and not re.fullmatch(r"\d+", token) + } + + +def date_numbering_title_matches(title, search_string): + search_tokens = date_numbering_title_tokens(search_string) + if not search_tokens: + return False + return search_tokens.issubset(date_numbering_title_tokens(title)) + + +def date_numbering_release_matches(title, search_string, episode_date): + if episode_date is None: + return False + + date_pattern = re.compile( + rf"(?= 2 and compact_words != words: + title_variants.append(" ".join(compact_words)) + + for value in list(title_variants): + case_variant = _date_numbering_case_variant(value) + if case_variant and case_variant not in title_variants: + title_variants.append(case_variant) + + search_strings = list(title_variants) + for candidate in ( + episode_date, + episode_date - timedelta(days=1), + episode_date + timedelta(days=1), + ): + for title_variant in title_variants: + for date_variant in ( + f"{candidate:%Y %m %d}", + f"{candidate:%Y-%m-%d}", + f"{candidate:%Y.%m.%d}", + ): + value = f"{title_variant} {date_variant}" + if value not in search_strings: + search_strings.append(value) + + return search_strings + + +def canonicalize_date_numbered_title(title, search_string, episode_date): + if is_imdb_id(search_string) or not date_numbering_release_matches( + title, search_string, episode_date + ): + return title + + date_match = re.search( + rf"(? bool: """ Return True if the given release title is valid for the given search parameters. @@ -1061,6 +1209,7 @@ def is_valid_release( - search_string: the original search phrase (could be an IMDb id or plain text) - season: desired season number (or None) - episode: desired episode number (or None) + - episode_date: validated date for a date-numbered TV episode (or None) """ try: is_movie_search = search_category // 1000 * 1000 == SEARCH_CAT_MOVIES @@ -1069,11 +1218,17 @@ def is_valid_release( is_music_search = search_category // 1000 * 1000 == SEARCH_CAT_MUSIC is_xxx_search = search_category // 1000 * 1000 == SEARCH_CAT_XXX - # if search string is NOT an imdb id check search_string_in_sanitized_title - if not match, it is not valid + # if search string is NOT an imdb id, require a title match if not is_docs_search and not is_imdb_id(search_string): - if not search_string_in_sanitized_title(search_string, title): + title_matches = ( + date_numbering_title_matches(title, search_string) + if is_tv_search and episode_date is not None + else search_string_in_sanitized_title(search_string, title) + ) + if not title_matches: trace( - "Skipping {title!r} as it doesn't match sanitized search string: {search_string!r}", + "Skipping {title!r} as it doesn't match sanitized " + "search string: {search_string!r}", title=title, search_string=search_string, ) @@ -1092,6 +1247,18 @@ def is_valid_release( # if it's a TV show search, don't allow any movies (check for season or episode tags in the title) if is_tv_search: + if episode_date is not None: + if not date_numbering_release_matches( + title, search_string, episode_date + ): + trace( + "Skipping {title!r} as it doesn't match date {episode_date}", + title=title, + episode_date=episode_date, + ) + return False + return True + # must have some S/E tag present if not SEASON_EP_REGEX.search(title): trace( @@ -1104,7 +1271,8 @@ def is_valid_release( if season is not None or episode is not None: if not match_in_title(title, season, episode): trace( - "Skipping {title!r} as it doesn't match season {season} and episode {episode}", + "Skipping {title!r} as it doesn't match season " + "{season} and episode {episode}", title=title, season=season, episode=episode, diff --git a/quasarr/providers/version.py b/quasarr/providers/version.py index c9bdbad4..1a8e8177 100644 --- a/quasarr/providers/version.py +++ b/quasarr/providers/version.py @@ -5,7 +5,7 @@ import re import sys -__version__ = "4.6.1" +__version__ = "4.6.2" def get_version(): @@ -127,7 +127,7 @@ def create_version_file(): + str(int(suffix)) + "'),", " StringStruct(u'InternalName', u'Quasarr'),", - " StringStruct(u'LegalCopyright', u'Copyright © RiX'),", + " StringStruct(u'LegalCopyright', u'Copyright \\u00a9 RiX'),", " StringStruct(u'OriginalFilename', u'Quasarr.exe'),", " StringStruct(u'ProductName', u'Quasarr'),", " StringStruct(u'ProductVersion', u'" diff --git a/quasarr/search/AGENTS.md b/quasarr/search/AGENTS.md index e9e0acb0..16a55b94 100644 --- a/quasarr/search/AGENTS.md +++ b/quasarr/search/AGENTS.md @@ -11,7 +11,8 @@ The Newznab-facing search layer: `get_search_results()` fans a single *arr reque ## Local Contracts -- Per-source gating before dispatch: hostname configured, category in `supported_categories`, category whitelist from `get_search_category_sources`, `supports_imdb` for the imdb branch, `supports_phrase` for the phrase branch, `supports_absolute_numbering` when an episode is given without a season. The feed branch checks only hostname/category/whitelist. +- Per-source gating before dispatch: hostname configured, category in `supported_categories`, category whitelist from `get_search_category_sources`, `supports_imdb` for the imdb branch, `supports_phrase` for the phrase branch, `supports_absolute_numbering` when an episode is given without a season, and `supports_date_numbering` for Sonarr's year + `MM/DD` episode shape. The feed branch checks only hostname/category/whitelist. +- Date-numbered requests are parsed once into a validated `datetime.date` and passed to sources as `episode_date`; invalid calendar dates stay on the normal numbering path. - The method names `search` and `feed` are load-bearing — dispatch is `getattr(source, action)`. - Cache TTL is 300s for search, 60s for feed; the key nulls `start_time` and uses the cache-owner category. Cached entries skip execution entirely, so source methods must be safe to skip. - Per-source results are merged, date-sorted descending, title-filtered by `release_matches_search_category`, then offset/limit-sliced; feed responses are never paginated. diff --git a/quasarr/search/__init__.py b/quasarr/search/__init__.py index 5973f162..acdd521f 100644 --- a/quasarr/search/__init__.py +++ b/quasarr/search/__init__.py @@ -44,6 +44,7 @@ def get_search_results( get_search_behavior_category, get_search_cache_owner_category, get_search_capability_category, + parse_episode_date, release_matches_search_category, ) @@ -55,6 +56,8 @@ def get_search_results( if imdb_id: get_imdb_metadata(imdb_id) + episode_date = parse_episode_date(season, episode) + # Determine search category if not provided if not search_category: search_category = determine_search_category(request_from) @@ -106,10 +109,11 @@ def get_search_results( stype += f" S{season}" if episode: stype += f"{'' if season else ' '}E{episode}" + if episode_date: + stype += f" {episode_date:%Y}-{episode_date:%m}-{episode_date:%d}" if base_search_category in [SEARCH_CAT_MOVIES, SEARCH_CAT_SHOWS]: args = (shared_state, start_time, behavior_search_category) - kwargs = {"search_string": imdb_id, "season": season, "episode": episode} for source in sources.values(): source_logger = get_source_logger(source.initials) @@ -137,6 +141,19 @@ def get_search_results( source_logger.trace("Search with absolute EP number unsupported") continue + kwargs = { + "search_string": imdb_id, + "season": season, + "episode": episode, + } + + if episode_date: + if not source.supports_date_numbering: + source_logger.trace("Search with date unsupported") + continue + + kwargs["episode_date"] = episode_date + search_executor.add( source, args, diff --git a/quasarr/search/sources/AGENTS.md b/quasarr/search/sources/AGENTS.md index 3896cfa3..6a4d91fd 100644 --- a/quasarr/search/sources/AGENTS.md +++ b/quasarr/search/sources/AGENTS.md @@ -11,10 +11,10 @@ Two-letter lowercase source modules plus `helpers/`: `search_source.py` (`Abstra ## Local Contracts - Registration is by file existence alone: drop `.py` into this folder exposing `class Source(AbstractSearchSource)`. The module filename, `Source.initials`, and the `Config("Hostnames")` key all use the same two-letter key; a same-key download twin exists only when release links need source-specific extraction (FX has none). Adding/renaming a module file changes the Hostnames config key space automatically. -- Signatures: `search(shared_state, start_time, search_category, search_string="", season=None, episode=None)` and `feed(shared_state, start_time, search_category)`, both returning `list[SearchRelease]` — empty list on failure, never raise to the caller. +- Signatures: `search(shared_state, start_time, search_category, search_string="", season=None, episode=None, episode_date=None)` and `feed(shared_state, start_time, search_category)`, both returning `list[SearchRelease]` — empty list on failure, never raise to the caller. `episode_date` is a validated `datetime.date` for Sonarr's year + `MM/DD` numbering shape. - `SearchRelease`: `{"details": {"title", "hostname" (= initials), "imdb_id" (str or None), "link", "size" (bytes), "date" (RFC822 preferred), "source" (original page URL)}, "type": "protected"}` — every emit site uses type `"protected"`. - `details.link` must come from `quasarr.providers.utils.generate_download_link(...)`; the payload is pipe-delimited (`title|url|size_mb|password|imdb_id|source_key`), so field values must not contain `|`. -- Capabilities are plain class attributes: `initials`, `language` (`"de"`, `"en"`, or `"fr"`), `supports_imdb`, `supports_phrase`, `supported_categories` (constants `SEARCH_CAT_*`), plus optional `supports_absolute_numbering` / `requires_login` / `requires_account` / `invite_only` / `requires_flaresolverr` / `requires_radarr` / `requires_sonarr`. `language`, category, account, invite, login, FlareSolverr, and *arr-client metadata is surfaced in the hostname editor through `helpers.get_source_metadata()`. +- Capabilities are plain class attributes: `initials`, `language` (`"de"`, `"en"`, or `"fr"`), `supports_imdb`, `supports_phrase`, `supported_categories` (constants `SEARCH_CAT_*`), plus optional `supports_absolute_numbering` / `supports_date_numbering` / `requires_login` / `requires_account` / `invite_only` / `requires_flaresolverr` / `requires_radarr` / `requires_sonarr`. Date numbering defaults to supported; AL and AT opt out because their anime-specific numbering rewrites require numeric episodes, MX opts out because its API requires numeric season+episode, and movie-only FF is never dispatched for TV dates. `language`, category, account, invite, login, FlareSolverr, and *arr-client metadata is surfaced in the hostname editor through `helpers.get_source_metadata()`. - `is_valid_release(...)` is the default validation for each candidate title in `search()` (not `feed()`); AT and AL intentionally deviate with bespoke matching suited to absolute-numbered anime. IMDb convention: if the searched ID and a release-page ID both exist and differ → skip; if the release lacks one → inherit the searched ID. - Call `mark_hostname_issue(self.initials, "feed"|"search", msg)` on fetch/parse errors and `clear_hostname_issue(self.initials)` when releases were produced. - `Source.__init__` must be cheap and never fail — a failure is logged as an error and the source is dropped from the registry. @@ -26,6 +26,7 @@ Two-letter lowercase source modules plus `helpers/`: `search_source.py` (`Abstra - Timeouts from `constants.FEED_/SEARCH_REQUEST_TIMEOUT_SECONDS`; User-Agent from shared state. Sources without native IMDb search resolve a localized title via `get_localized_title(shared_state, imdb_id, language)` — pass `"de"`, `"en"`, or `"fr"` matching the source site's content language. - Module-private parsing helpers are underscore-prefixed at the module bottom; `size` flows as MB int into `generate_download_link` and as bytes in `details.size`; size 0 is the accepted fallback. - Do not infer payloads or response shapes — the root `Third-Party Source Work` rules require real traffic captures or direct curl confirmation first. +- Date-numbering parsing, title matching, query variants, and canonical title rewriting belong in `quasarr.providers.utils`; source modules only pass `episode_date` through existing verified request/result paths. Keep production logic series-agnostic and add series-specific compatibility cases only as synthetic tests. - A new source adds its entry to the Per-Source Notes below and, when it has a download module, to the notes in `quasarr/downloads/sources/AGENTS.md` — in the same change. ### Per-Source Notes (search side) @@ -37,7 +38,7 @@ Capability flags (`supports_*`, `requires_*`) and categories are class attribute - **BY** — no login. Book/magazine titles run through Magazarr-compatible date/issue normalization; search drops releases without valid resolution/codec (feed keeps the original metadata); per-category fetches use category-ID constants inside the module. - **DD** — login (`providers/sessions/dd`, which applies a fixed quality-profile filter to API responses — new resolutions must be added there). IMDb mismatch between request and API response discards the result; a suspected fake release (the API's `fake` flag) invalidates the cached session. - **DJ** — login (shares the `JUNKIES` credentials section with SJ). IMDb-only; series discovered by HTML scrape to locate a media id, releases then fetched via JSON and aggregated per season block. -- **DL** — login (`providers/sessions/dl`); umlauts normalized when building queries. Paginated search is sequential, bounded by a wall-clock budget, and stops on an empty page; yearly magazine threads ("Jahresthema") expand into per-issue entries (requires the current year in the thread); magazine titles use a token-normalized matcher to align month/issue variants. +- **DL** — login (`providers/sessions/dl`); umlauts normalized when building queries. Paginated search is sequential, bounded by a wall-clock budget, and stops on an empty page; yearly magazine threads ("Jahresthema") expand into per-issue entries (requires the current year in the thread); magazine titles use a token-normalized matcher to align month/issue variants. Date-numbered thread discovery/pagination is DL-specific, while query variants, title/date matching, and canonicalization use shared generic helpers. - **DT** — no login. Article date parsing assumes a fixed timezone offset; IMDb id parsed from article HTML and propagated; search drops candidates not matching requested resolution/codec (feed keeps them). - **DW** — no login. German month names mapped in a local table (new variants go there); IMDb id read from article HTML validates the result still matches the request. - **FF** — no login, movie-only. Search uses the public title lookup, then opens each movie page to extract IMDb id and the movie-token release API; releases are emitted from API `div.entry` blocks and use the release page URL as the download payload source. Feed reads recent update rows, then cross-references each movie page/API to fill size and IMDb data for the release anchors; cross-reference stops when the source's global feed budget reaches `FEED_REQUEST_TIMEOUT_SECONDS`. diff --git a/quasarr/search/sources/al.py b/quasarr/search/sources/al.py index d6d6a7cc..e4ff3b30 100644 --- a/quasarr/search/sources/al.py +++ b/quasarr/search/sources/al.py @@ -45,6 +45,7 @@ class Source(AbstractSearchSource): requires_flaresolverr = True supports_imdb = True supports_phrase = False + supports_date_numbering = False supports_absolute_numbering = True supported_categories = [SEARCH_CAT_MOVIES, SEARCH_CAT_SHOWS, SEARCH_CAT_SHOWS_ANIME] requires_login = True diff --git a/quasarr/search/sources/at.py b/quasarr/search/sources/at.py index 01d3da51..38f38bc0 100644 --- a/quasarr/search/sources/at.py +++ b/quasarr/search/sources/at.py @@ -69,6 +69,7 @@ class Source(AbstractSearchSource): language = "en" supports_imdb = True supports_phrase = True + supports_date_numbering = False supports_absolute_numbering = True supported_categories = [SEARCH_CAT_MOVIES, SEARCH_CAT_SHOWS, SEARCH_CAT_SHOWS_ANIME] diff --git a/quasarr/search/sources/by.py b/quasarr/search/sources/by.py index 2bbe15b4..78a73509 100644 --- a/quasarr/search/sources/by.py +++ b/quasarr/search/sources/by.py @@ -103,6 +103,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: by = shared_state.values["config"]("Hostnames").get(self.initials) password = by @@ -140,6 +141,7 @@ def search( search_string=search_string, season=season, episode=episode, + episode_date=episode_date, ) except Exception as e: error(f"Error loading search: {e}") @@ -164,6 +166,7 @@ def _parse_posts( search_string=None, season=None, episode=None, + episode_date=None, ): releases = [] @@ -281,7 +284,12 @@ def _parse_posts( continue if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue if XXX_REGEX.search(title) and "xxx" not in search_string.lower(): diff --git a/quasarr/search/sources/dd.py b/quasarr/search/sources/dd.py index 35827c13..1ca663e1 100644 --- a/quasarr/search/sources/dd.py +++ b/quasarr/search/sources/dd.py @@ -54,6 +54,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] dd = shared_state.values["config"]("Hostnames").get(self.initials) @@ -76,7 +77,7 @@ def search( info(f"Could not extract title from IMDb-ID {imdb_id}") return releases search_string = html.unescape(search_string) - if season: + if season and episode_date is None: search_string += f" S{int(season):02d}" if episode: search_string += f"E{int(episode):02d}" @@ -130,7 +131,12 @@ def search( title = release.get("release") if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue diff --git a/quasarr/search/sources/dj.py b/quasarr/search/sources/dj.py index feca6b66..3824c514 100644 --- a/quasarr/search/sources/dj.py +++ b/quasarr/search/sources/dj.py @@ -131,6 +131,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] @@ -220,7 +221,12 @@ def search( continue if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue diff --git a/quasarr/search/sources/dl.py b/quasarr/search/sources/dl.py index 84dcc750..3d935a02 100644 --- a/quasarr/search/sources/dl.py +++ b/quasarr/search/sources/dl.py @@ -32,6 +32,10 @@ retrieve_and_validate_session, ) from quasarr.providers.utils import ( + canonicalize_date_numbered_title, + date_numbering_release_matches, + date_numbering_search_strings, + date_numbering_title_matches, generate_download_link, get_base_search_category_id, is_imdb_id, @@ -55,6 +59,7 @@ class Source(AbstractSearchSource): SEARCH_CAT_BOOKS, ] requires_login = True + supports_date_numbering = True def feed( self, shared_state: shared_state, start_time: float, search_category: str @@ -190,6 +195,7 @@ def _search_single_page( search_category, season, episode, + episode_date, ): """ Search a single page. This method is called sequentially for each page. @@ -258,11 +264,20 @@ def _search_single_page( title = re.sub(r"\s+", " ", title) title = unescape(title) title_normalized = _normalize_title_for_arr(title) + is_date_thread_candidate = ( + episode_date + and _should_check_thread_for_date_release( + title_normalized, + search_string, + episode_date, + ) + ) # Filter: Skip if no resolution or codec info (unless Magazarr/Lidarr) if base_search_category not in [SEARCH_CAT_BOOKS, SEARCH_CAT_MUSIC]: if not ( - RESOLUTION_REGEX.search(title_normalized) + is_date_thread_candidate + or RESOLUTION_REGEX.search(title_normalized) or CODEC_REGEX.search(title_normalized) ): continue @@ -278,27 +293,46 @@ def _search_single_page( if thread_url.startswith("/"): thread_url = f"https://www.{host}{thread_url}" - if not is_valid_release( + date_release = {} + is_release_valid = is_valid_release( title_normalized, search_category, search_string, season, episode, - ): - continue + episode_date, + ) + if not is_release_valid: + if is_date_thread_candidate: + date_release = _date_release_from_thread( + shared_state, + thread_url, + search_string, + episode_date, + ) + if not date_release: + continue + title_normalized = date_release["title"] + elif episode_date: + title_normalized = canonicalize_date_numbered_title( + title_normalized, + search_string, + episode_date, + ) # Extract date and convert to RFC 2822 format date_elem = item.select_one("time.u-dt") iso_date = date_elem.get("datetime", "") if date_elem else "" published = _convert_to_rss_date(iso_date) - mb = 0 + mb = date_release.get("mb", 0) password = "" + source_url = date_release.get("source", thread_url) link = generate_download_link( shared_state, title_normalized, - thread_url, + source_url, mb, password, imdb_id or "", @@ -314,7 +348,7 @@ def _search_single_page( "link": link, "size": mb * 1024 * 1024, "date": published, - "source": thread_url, + "source": source_url, }, "type": "protected", } @@ -355,6 +389,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: """ Search with sequential pagination to find best quality releases. @@ -375,10 +410,20 @@ def search( search_string += f" {year}" search_string = unescape(search_string) - max_search_duration = 7 + search_strings = ( + date_numbering_search_strings( + search_string, + episode_date, + ) + if episode_date + else [search_string] + ) + max_search_duration = 15 if episode_date else 7 trace( - f"Starting sequential paginated search for '{search_string}' (Season: {season}, Episode: {episode}) - max {max_search_duration}s" + f"Starting sequential paginated search for '{search_string}' " + f"(Season: {season}, Episode: {episode}) - " + f"max {max_search_duration}s" ) try: @@ -387,52 +432,64 @@ def search( warn(f"Could not retrieve valid session for {host}") return releases - search_id = None - page_num = 0 search_start_time = time.time() - release_titles_per_page = set() + seen_release_titles = set() - # Sequential search through pages until timeout or no results - while (time.time() - search_start_time) < max_search_duration: - page_num += 1 + for current_search_string in search_strings: + search_id = None + page_num = 0 + release_titles_per_page = set() - page_releases, extracted_search_id = self._search_single_page( - shared_state, - host, - search_string, - search_id, - page_num, - imdb_id, - search_category, - season, - episode, - ) + # Sequential search through pages until timeout or no results + while (time.time() - search_start_time) < max_search_duration: + page_num += 1 - page_release_titles = tuple( - pr["details"]["title"] for pr in page_releases - ) - if page_release_titles in release_titles_per_page: - trace(f"[Page {page_num}] duplicate page detected, stopping") - break - release_titles_per_page.add(page_release_titles) - - # Update search_id from first page - if page_num == 1: - search_id = extracted_search_id - if not search_id: - trace("Could not extract search ID, stopping pagination") + page_releases, extracted_search_id = self._search_single_page( + shared_state, + host, + current_search_string, + search_id, + page_num, + imdb_id, + search_category, + season, + episode, + episode_date, + ) + + page_release_titles = tuple( + pr["details"]["title"] for pr in page_releases + ) + if page_release_titles in release_titles_per_page: + trace(f"[Page {page_num}] duplicate page detected, stopping") break + release_titles_per_page.add(page_release_titles) + + # Update search_id from first page + if page_num == 1: + search_id = extracted_search_id + if not search_id: + trace("Could not extract search ID, stopping pagination") + break + + for release in page_releases: + release_title = release["details"]["title"] + dedupe_key = release_title.strip().casefold() + if dedupe_key in seen_release_titles: + continue + seen_release_titles.add(dedupe_key) + releases.append(release) - # Add releases from this page - releases.extend(page_releases) - trace( - f"[Page {page_num}] completed with {len(page_releases)} valid releases" - ) + trace( + f"[Page {page_num}] completed with {len(page_releases)} valid releases" + ) - # Stop if this page returned 0 results - if len(page_releases) == 0: - trace(f"[Page {page_num}] returned 0 results, stopping pagination") - break + # Stop if this page returned 0 results + if len(page_releases) == 0: + trace( + f"[Page {page_num}] returned 0 results, stopping pagination" + ) + break except Exception as e: info(f"search error: {e}") @@ -482,6 +539,23 @@ def _normalize_title_for_arr(title): return title +def _should_check_thread_for_date_release(title, search_string=None, episode_date=None): + normalized = replace_umlauts(unescape(str(title or ""))).lower() + normalized = re.sub(r"[^a-z0-9]+", " ", normalized) + tokens = set(normalized.split()) + + if episode_date and str(episode_date.year) not in tokens: + return False + + if not search_string: + return bool(re.search(r"\b(?:19|20)\d{2}\b", normalized)) + + if not date_numbering_title_matches(title, search_string): + return False + + return bool(re.search(r"\b(?:19|20)\d{2}\b", normalized)) + + def _is_current_year_jahresthema_thread(title, search_string, base_search_category): if base_search_category != SEARCH_CAT_BOOKS: return False @@ -510,7 +584,7 @@ def _magazine_title_matches(search_string, title): def _magazine_match_tokens(text): text = replace_umlauts(unescape(str(text or ""))).lower() - text = re.sub(r"\bc\s*['`´’]?\s*t\b", "ct", text) + text = re.sub(r"\bc\s*['`\u00b4\u2019]?\s*t\b", "ct", text) text = re.sub(r"[^a-z0-9]+", " ", text) ignored = { @@ -612,6 +686,116 @@ def _fetch_thread_page(shared_state, page_url): return response +def _date_release_from_thread( + shared_state, + thread_url, + search_string, + episode_date, +): + if episode_date is None: + return {} + + first_page = _fetch_thread_page(shared_state, thread_url) + if first_page is None: + return {} + + last_page = _extract_last_thread_page(first_page.text) + start_page = max(1, last_page - 4) + page_numbers = [1, *range(start_page, last_page + 1)] + page_numbers = list(dict.fromkeys(page_numbers)) + + for page_num in page_numbers: + page_url = ( + thread_url + if page_num == 1 + else _thread_page_url( + thread_url, + page_num, + ) + ) + response = ( + first_page + if page_num == 1 + else _fetch_thread_page( + shared_state, + page_url, + ) + ) + if response is None: + continue + + soup = BeautifulSoup(response.text, "html.parser") + for post in soup.select("article.message--post"): + title = _date_release_title_from_post(post) + if not title: + continue + if date_numbering_release_matches(title, search_string, episode_date): + arr_title = canonicalize_date_numbered_title( + title, search_string, episode_date + ) + source = thread_url + if _post_contains_supported_download(post): + source = _post_url(page_url, post) + return { + "title": arr_title, + "mb": _date_release_size_mb_from_post(post), + "source": source, + } + + return {} + + +def _date_release_title_from_post(post): + content = _own_message_content(post) + text = content.get_text("\n", strip=True) + lines = text.splitlines() + + for index, line in enumerate(lines): + stripped = line.strip() + match = re.match(r"(?i)^(?:title|titel)\s*:\s*(.+)$", stripped) + if match: + return _normalize_title_for_arr(_clean_issue_title(match.group(1))) + + if stripped.lower() in {"title:", "titel:"} and index + 1 < len(lines): + return _normalize_title_for_arr(_clean_issue_title(lines[index + 1])) + + text_flat = " ".join(lines) + match = re.search( + r"(?i)\b([A-Z0-9][A-Z0-9.\s'&-]{1,120}?" + r"[.\s]+(?:19|20)\d{2}[.\s]+\d{2}[.\s]+\d{2}" + r".{0,120}?)\b", + text_flat, + ) + if match: + return _normalize_title_for_arr(_clean_issue_title(match.group(1))) + + return "" + + +def _date_release_size_mb_from_post(post): + content = _own_message_content(post) + text = content.get_text("\n", strip=True) + match = re.search( + r"(?i)\b(?:size|gr\u00f6\u00dfe|groesse|grosse)\s*:\s*" + r"(\d+(?:[.,]\d+)?)\s*([kmgt]i?b|[kmgt]b)\b", + text, + ) + if not match: + return 0 + + size = float(match.group(1).replace(",", ".")) + unit = match.group(2).lower() + if unit.startswith("k"): + return round(size / 1024) + if unit.startswith("m"): + return round(size) + if unit.startswith("g"): + return round(size * 1024) + if unit.startswith("t"): + return round(size * 1024 * 1024) + return 0 + + def _extract_last_thread_page(html): soup = BeautifulSoup(html, "html.parser") page_numbers = [1] @@ -796,7 +980,7 @@ def _looks_like_issue_title(title, search_string): return False if re.search( - r"\b(?:download|mirror|passwort|password|size|groesse|grosse|größe|mb|gb)\b", + r"\b(?:download|mirror|passwort|password|size|groesse|grosse|gr\u00f6\u00dfe|mb|gb)\b", replace_umlauts(title_lower), ): return False diff --git a/quasarr/search/sources/dt.py b/quasarr/search/sources/dt.py index a546f6a0..81dd7727 100644 --- a/quasarr/search/sources/dt.py +++ b/quasarr/search/sources/dt.py @@ -174,6 +174,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] dt = shared_state.values["config"]("Hostnames").get(self.initials) @@ -250,7 +251,12 @@ def search( ) if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue diff --git a/quasarr/search/sources/dw.py b/quasarr/search/sources/dw.py index 89ec40a8..1c2f3cde 100644 --- a/quasarr/search/sources/dw.py +++ b/quasarr/search/sources/dw.py @@ -139,6 +139,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] dw = shared_state.values["config"]("Hostnames").get(self.initials) @@ -184,7 +185,12 @@ def search( title = result.a.text.strip() if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue diff --git a/quasarr/search/sources/ff.py b/quasarr/search/sources/ff.py index d63a7965..d68fbab8 100644 --- a/quasarr/search/sources/ff.py +++ b/quasarr/search/sources/ff.py @@ -174,9 +174,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, - episode_year: int = None, - episode_month: int = None, - episode_day: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] host = shared_state.values["config"]("Hostnames").get(self.initials) diff --git a/quasarr/search/sources/fx.py b/quasarr/search/sources/fx.py index 68f153ca..5ca028c5 100644 --- a/quasarr/search/sources/fx.py +++ b/quasarr/search/sources/fx.py @@ -159,6 +159,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] fx = shared_state.values["config"]("Hostnames").get(self.initials) @@ -228,7 +229,12 @@ def search( title = sanitize_title(title.text) if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue diff --git a/quasarr/search/sources/he.py b/quasarr/search/sources/he.py index 23b1c4fc..6ab90a8f 100644 --- a/quasarr/search/sources/he.py +++ b/quasarr/search/sources/he.py @@ -52,6 +52,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] host = shared_state.values["config"]("Hostnames").get(self.initials) @@ -92,7 +93,7 @@ def search( search_type = "search" timeout = SEARCH_REQUEST_TIMEOUT_SECONDS - if season: + if season and episode_date is None: source_search += f" S{int(season):02d}" if episode: @@ -142,7 +143,12 @@ def search( title = head_split[0].strip() if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): trace("invalid release {}", title) continue diff --git a/quasarr/search/sources/helpers/search_source.py b/quasarr/search/sources/helpers/search_source.py index cf51608c..34df7c19 100644 --- a/quasarr/search/sources/helpers/search_source.py +++ b/quasarr/search/sources/helpers/search_source.py @@ -30,6 +30,10 @@ def supports_phrase(self) -> bool: def supports_absolute_numbering(self) -> bool: return False + @property + def supports_date_numbering(self) -> bool: + return True + @property @abstractmethod def supported_categories(self) -> list[int]: @@ -71,6 +75,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: pass diff --git a/quasarr/search/sources/hs.py b/quasarr/search/sources/hs.py index f93c81cb..42f5949c 100644 --- a/quasarr/search/sources/hs.py +++ b/quasarr/search/sources/hs.py @@ -149,6 +149,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: """Search HS for releases by IMDb ID""" releases = [] @@ -183,6 +184,7 @@ def search( search_string, season, episode, + episode_date, ) except Exception as e: @@ -207,6 +209,7 @@ def _parse_search_results( search_string, season, episode, + episode_date, ): """Parse search results page and extract releases with filecrypt links. @@ -290,7 +293,12 @@ def _parse_search_results( for title in unique_episodes: # Validate release against search criteria if not is_valid_release( - title, base_search_category, search_string, season, episode + title, + base_search_category, + search_string, + season, + episode, + episode_date, ): continue @@ -328,7 +336,12 @@ def _parse_search_results( # Also add the main title (season pack) with full size - if not duplicate if main_title.lower() not in seen: if is_valid_release( - main_title, base_search_category, search_string, season, episode + main_title, + base_search_category, + search_string, + season, + episode, + episode_date, ): link = generate_download_link( shared_state, diff --git a/quasarr/search/sources/mb.py b/quasarr/search/sources/mb.py index b8a99a1e..cc00c7b7 100644 --- a/quasarr/search/sources/mb.py +++ b/quasarr/search/sources/mb.py @@ -52,6 +52,7 @@ def _parse_posts( search_string=None, season=None, episode=None, + episode_date=None, ): releases = [] one_hour_ago = (datetime.now() - timedelta(hours=1)).strftime( @@ -85,7 +86,12 @@ def _parse_posts( if is_search: if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue @@ -198,6 +204,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: mb = shared_state.values["config"]("Hostnames").get(self.initials) @@ -226,6 +233,7 @@ def search( search_string=search_string, season=season, episode=episode, + episode_date=episode_date, ) except Exception as e: warn(f"Error loading search: {e}") diff --git a/quasarr/search/sources/mx.py b/quasarr/search/sources/mx.py index 62bde7ba..e94faae7 100644 --- a/quasarr/search/sources/mx.py +++ b/quasarr/search/sources/mx.py @@ -51,6 +51,7 @@ class Source(AbstractSearchSource): language = "fr" supports_imdb = True supports_phrase = False + supports_date_numbering = False supported_categories = [SEARCH_CAT_MOVIES, SEARCH_CAT_SHOWS] # The movie feed reads Radarr and the show feed reads Sonarr (ID search # needs neither). Setup prompts remain source-wide, but feed() degrades diff --git a/quasarr/search/sources/nk.py b/quasarr/search/sources/nk.py index f03709cf..15b27664 100644 --- a/quasarr/search/sources/nk.py +++ b/quasarr/search/sources/nk.py @@ -51,6 +51,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] host = shared_state.values["config"]("Hostnames").get(self.initials) @@ -81,7 +82,7 @@ def search( search_type = "search" timeout = SEARCH_REQUEST_TIMEOUT_SECONDS - if season: + if season and episode_date is None: source_search += f" S{int(season):02d}" if episode: @@ -137,7 +138,12 @@ def search( release_imdb_id = imdb_id if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue diff --git a/quasarr/search/sources/nx.py b/quasarr/search/sources/nx.py index 4615e478..7eb0678e 100644 --- a/quasarr/search/sources/nx.py +++ b/quasarr/search/sources/nx.py @@ -155,6 +155,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: """ Search using internal API. @@ -212,7 +213,12 @@ def search( title = item["name"] if title: if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue diff --git a/quasarr/search/sources/rm.py b/quasarr/search/sources/rm.py index 9d0568de..92b7af25 100644 --- a/quasarr/search/sources/rm.py +++ b/quasarr/search/sources/rm.py @@ -98,6 +98,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] match_search_string = search_string @@ -141,6 +142,7 @@ def search( imdb_id=imdb_id, season=season, episode=episode, + episode_date=episode_date, ) except Exception as e: warn(f"Error loading search: {e}") @@ -296,8 +298,19 @@ def _matches_requested_release( search_string, season=None, episode=None, + episode_date=None, ): base_search_category = get_base_search_category_id(search_category) + if episode_date is not None: + return is_valid_release( + title, + search_category, + search_string, + season, + episode, + episode_date, + ) + if base_search_category != SEARCH_CAT_SHOWS: return is_valid_release(title, search_category, search_string, season, episode) @@ -345,6 +358,7 @@ def _build_search_results( imdb_id, season=None, episode=None, + episode_date=None, is_feed=False, ): base_url = _get_base_url(shared_state) @@ -370,6 +384,7 @@ def _build_search_results( search_string, season, episode, + episode_date, ): continue diff --git a/quasarr/search/sources/sf.py b/quasarr/search/sources/sf.py index d97a2c3b..3965afeb 100644 --- a/quasarr/search/sources/sf.py +++ b/quasarr/search/sources/sf.py @@ -147,6 +147,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] sf = shared_state.values["config"]("Hostnames").get(self.initials) @@ -310,7 +311,7 @@ def search( debug(f"Error extracting size for {title}: {e}") mb = 0 - if episode: + if episode and episode_date is None: try: if not re.search(r"S\d{1,3}E\d{1,3}", title): episodes_in_release = len(mirrors["episodes"]) @@ -354,7 +355,12 @@ def search( # check down here on purpose, because the title may be modified at episode stage if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue diff --git a/quasarr/search/sources/sj.py b/quasarr/search/sources/sj.py index d8380da0..bb288730 100644 --- a/quasarr/search/sources/sj.py +++ b/quasarr/search/sources/sj.py @@ -132,6 +132,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] @@ -222,7 +223,12 @@ def search( continue if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue diff --git a/quasarr/search/sources/sl.py b/quasarr/search/sources/sl.py index 2958f24f..1f1df8ee 100644 --- a/quasarr/search/sources/sl.py +++ b/quasarr/search/sources/sl.py @@ -177,6 +177,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] @@ -271,7 +272,12 @@ def fetch(url): title = a.get_text(strip=True) if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue diff --git a/quasarr/search/sources/wd.py b/quasarr/search/sources/wd.py index 00b977ca..012cfb5d 100644 --- a/quasarr/search/sources/wd.py +++ b/quasarr/search/sources/wd.py @@ -132,6 +132,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: releases = [] wd = shared_state.values["config"]("Hostnames").get(self.initials) @@ -196,6 +197,7 @@ def search( search_string=search_string, season=season, episode=episode, + episode_date=episode_date, imdb_id=imdb_id, ) except Exception as e: @@ -220,6 +222,7 @@ def _parse_rows( search_string=None, season=None, episode=None, + episode_date=None, imdb_id=None, ): """ @@ -261,7 +264,12 @@ def _parse_rows( # search context contains non-video releases (ebooks, games, etc.) if is_search: if not is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): continue diff --git a/quasarr/search/sources/wx.py b/quasarr/search/sources/wx.py index 007f2ad2..900faa10 100644 --- a/quasarr/search/sources/wx.py +++ b/quasarr/search/sources/wx.py @@ -168,6 +168,7 @@ def search( search_string: str = "", season: int = None, episode: int = None, + episode_date=None, ) -> list[SearchRelease]: """ Search using internal API. @@ -301,7 +302,12 @@ def search( title = title.replace(" ", ".") if is_valid_release( - title, search_category, search_string, season, episode + title, + search_category, + search_string, + season, + episode, + episode_date, ): # Skip if we've already seen this exact title if title in seen_titles: @@ -364,6 +370,7 @@ def search( search_string, season, episode, + episode_date, ): continue diff --git a/tests/AGENTS.md b/tests/AGENTS.md index 35d70975..2ba5900b 100644 --- a/tests/AGENTS.md +++ b/tests/AGENTS.md @@ -14,6 +14,7 @@ Hermetic unit tests for Quasarr, built exclusively on the standard-library `unit - Full-suite command: `uv run python -X utf8 -m unittest discover -s tests` (the `-X utf8` flag avoids Windows console encoding noise in log output). - Tests must not perform network I/O or touch JDownloader. Patch in the consuming module's namespace (e.g. `quasarr.downloads.sources..requests.Session`), not the `requests` library globally. Only `test_sqlite_database.py` touches disk, via `tempfile.TemporaryDirectory`. - Synthetic-data rule (security-critical): source hostnames in tests are fake domains on the reserved `.invalid` TLD; use synthetic release titles (never paste real ones). Real public hoster/crypter domains are permitted only where the production matching logic keys on those literal domains — they are hoster/crypter services, not protected sources. +- Date-numbering regression tests may name a motivating real series to prove title-alias compatibility, but every complete release string, date, quality/group suffix, hostname, and URL remains synthetic. - `shared_state` is always faked (MagicMock with a `.values` dict, SimpleNamespace, or a small local class whose `values["config"]` is a callable returning dicts) — except `test_sqlite_database.py`, which mutates the real module in `setUp`. - There is no fixtures directory and no shared test-helpers module: each file defines its own `FakeResponse`/`FakeSession`/fake shared_state inline. - Run the full suite after touching shared providers, download flow, search behavior, or notification logic. Per root change discipline, tests change only when the intended behavior in the covered area changed or the existing test is incorrect. diff --git a/tests/test_dl_jahresthema.py b/tests/test_dl_jahresthema.py index 0310cbe1..09bf5737 100644 --- a/tests/test_dl_jahresthema.py +++ b/tests/test_dl_jahresthema.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import unittest -from datetime import datetime +from datetime import date, datetime from unittest.mock import patch from bs4 import BeautifulSoup @@ -12,10 +12,12 @@ Source as SearchSource, ) from quasarr.search.sources.dl import ( + _date_release_from_thread, _expand_jahresthema_thread_releases, _is_current_year_jahresthema_thread, _post_contains_supported_download, _release_from_jahresthema_post, + _should_check_thread_for_date_release, ) @@ -45,6 +47,172 @@ def config(self, section): class DlJahresthemaSearchTests(unittest.TestCase): + def test_date_thread_candidate_uses_search_tokens_without_release_group_lock(self): + self.assertTrue( + _should_check_thread_for_date_release( + "Sample Show 2026 Collection", + "Sample Show", + date(2026, 6, 19), + ) + ) + + def test_date_thread_candidate_rejects_unrelated_series(self): + self.assertFalse( + _should_check_thread_for_date_release( + "Other Show 2026 Collection", + "Sample Show", + date(2026, 6, 19), + ) + ) + + def test_date_release_from_thread_uses_post_url_only_for_downloadable_post(self): + html = """ +
+
+

Title: Sample.Show.2026.06.19.1080p.WEB.h264-GRP

+

Metadata only.

+
+
+
+
+

Title: Sample.Show.2026.06.19.1080p.WEB.h264-GRP

+

https://ddownload.com/example

+
+
+ """ + + with patch( + "quasarr.search.sources.dl._fetch_thread_page", + return_value=FakeResponse(html, "https://www.source.invalid/thread.1/"), + ): + release = _date_release_from_thread( + FakeSharedState(), + "https://www.source.invalid/thread.1/", + "Sample Show", + date(2026, 6, 19), + ) + + self.assertEqual( + "https://www.source.invalid/thread.1/", + release["source"], + ) + + def test_date_release_from_thread_pins_downloadable_post(self): + html = """ +
+
+

Title: Sample.Show.2026.06.19.1080p.WEB.h264-GRP

+

https://ddownload.com/example

+
+
+ """ + + with patch( + "quasarr.search.sources.dl._fetch_thread_page", + return_value=FakeResponse(html, "https://www.source.invalid/thread.1/"), + ): + release = _date_release_from_thread( + FakeSharedState(), + "https://www.source.invalid/thread.1/", + "Sample Show", + date(2026, 6, 19), + ) + + self.assertEqual( + "https://www.source.invalid/thread.1/#post-2", + release["source"], + ) + + def test_date_release_from_thread_scans_recent_thread_pages(self): + first_page_html = """ + + 2 +
+
+

Title: Sample.Show.2026.06.12.1080p.WEB.h264-GRP

+

https://ddownload.com/old-example

+
+
+ + """ + second_page_html = """ +
+
+

Title: Sample.Show.2026.06.19.1080p.WEB.h264-GRP

+

https://ddownload.com/example

+
+
+ """ + fetched_thread_urls = [] + + def fake_fetch(_shared_state, page_url): + fetched_thread_urls.append(page_url) + if page_url.endswith("/thread.1/"): + return FakeResponse(first_page_html, page_url) + if page_url.endswith("/thread.1/page-2"): + return FakeResponse(second_page_html, page_url) + raise AssertionError(f"unexpected fetch: {page_url}") + + with patch("quasarr.search.sources.dl._fetch_thread_page", fake_fetch): + release = _date_release_from_thread( + FakeSharedState(), + "https://www.source.invalid/thread.1/", + "Sample Show", + date(2026, 6, 19), + ) + + self.assertEqual( + [ + "https://www.source.invalid/thread.1/", + "https://www.source.invalid/thread.1/page-2", + ], + fetched_thread_urls, + ) + self.assertEqual( + "https://www.source.invalid/thread.1/page-2#post-2", + release["source"], + ) + + def test_date_release_from_thread_finds_wwe_scheduled_series_generically(self): + episode_date = date(2031, 2, 3) + cases = ( + ( + "WWE Monday Night RAW", + "WWE.RAW.2031.02.03.1080p.WEB.h264-GRP", + "WWE.Monday.Night.RAW.2031.02.03.1080p.WEB.h264-GRP", + ), + ( + "WWE Friday Night SmackDown", + "WWE.SmackDown.2031.02.03.1080p.WEB.h264-GRP", + "WWE.Friday.Night.SmackDown.2031.02.03.1080p.WEB.h264-GRP", + ), + ) + + for search_string, posted_title, expected_title in cases: + with self.subTest(search_string=search_string): + html = f""" +
+
+

Title: {posted_title}

+

https://ddownload.com/example

+
+
+ """ + with patch( + "quasarr.search.sources.dl._fetch_thread_page", + return_value=FakeResponse( + html, "https://www.source.invalid/thread.1/" + ), + ): + release = _date_release_from_thread( + FakeSharedState(), + "https://www.source.invalid/thread.1/", + search_string, + episode_date, + ) + + self.assertEqual(expected_title, release["title"]) + def test_matches_compact_ct_style_spelling(self): current_year = datetime.now().year diff --git a/tests/test_hostname_capabilities.py b/tests/test_hostname_capabilities.py index ec8c2186..4e7b24d0 100644 --- a/tests/test_hostname_capabilities.py +++ b/tests/test_hostname_capabilities.py @@ -1,5 +1,7 @@ +import inspect import unittest +from quasarr.constants import SEARCH_CAT_SHOWS from quasarr.providers.html_images import FLAG_SVGS, LANGUAGE_FLAG_EMOJI from quasarr.search.sources import get_sources from quasarr.search.sources.helpers import get_source_metadata @@ -30,6 +32,48 @@ def test_flag_assets_cover_every_used_language(self): self.assertIn(language, LANGUAGE_FLAG_EMOJI) self.assertIn(language, FLAG_SVGS) + def test_date_numbering_sources_accept_shared_date_context(self): + for key, source in get_sources().items(): + if ( + SEARCH_CAT_SHOWS not in source.supported_categories + or not source.supports_date_numbering + ): + continue + with self.subTest(source=key): + self.assertIn( + "episode_date", + inspect.signature(source.search).parameters, + ) + + def test_date_numbering_enabled_for_all_compatible_tv_sources(self): + expected = { + "by", + "dd", + "dj", + "dl", + "dt", + "dw", + "fx", + "he", + "hs", + "mb", + "nk", + "nx", + "rm", + "sf", + "sj", + "sl", + "wd", + "wx", + } + actual = { + key + for key, source in get_sources().items() + if SEARCH_CAT_SHOWS in source.supported_categories + and source.supports_date_numbering + } + self.assertEqual(expected, actual) + class SourceMetadataTests(unittest.TestCase): def test_metadata_exposes_expected_keys_for_every_source(self): diff --git a/tests/test_utils_release_matching.py b/tests/test_utils_release_matching.py index edfef14b..f104d0ff 100644 --- a/tests/test_utils_release_matching.py +++ b/tests/test_utils_release_matching.py @@ -1,8 +1,16 @@ # -*- coding: utf-8 -*- import unittest +from datetime import date -from quasarr.providers.utils import normalize_optional_int +from quasarr.constants import SEARCH_CAT_SHOWS +from quasarr.providers.utils import ( + canonicalize_date_numbered_title, + date_numbering_search_strings, + is_valid_release, + normalize_optional_int, + parse_episode_date, +) class ReleaseMatchingUtilsTests(unittest.TestCase): @@ -12,6 +20,94 @@ def test_normalize_optional_int_returns_none_for_empty_string(self): def test_normalize_optional_int_parses_numbers(self): self.assertEqual(4, normalize_optional_int("4")) + def test_date_numbered_tv_release_matches_date_components(self): + episode_date = date(2031, 6, 19) + self.assertTrue( + is_valid_release( + "Sample.Show.2031.06.19.1080p.WEB.h264-GRP", + SEARCH_CAT_SHOWS, + "Sample Show", + season=2031, + episode="06/19", + episode_date=episode_date, + ) + ) + + def test_date_numbered_tv_release_rejects_wrong_date(self): + episode_date = date(2031, 6, 19) + self.assertFalse( + is_valid_release( + "Sample.Show.2031.06.18.1080p.WEB.h264-GRP", + SEARCH_CAT_SHOWS, + "Sample Show", + season=2031, + episode="06/19", + episode_date=episode_date, + ) + ) + + def test_date_numbered_tv_release_accepts_verified_imdb_search(self): + episode_date = date(2031, 6, 19) + self.assertTrue( + is_valid_release( + "Sample.Show.2031.06.19.1080p.WEB.h264-GRP", + SEARCH_CAT_SHOWS, + "tt0000001", + season=2031, + episode="06/19", + episode_date=episode_date, + ) + ) + + def test_parse_episode_date_validates_calendar_date(self): + self.assertEqual(date(2031, 2, 3), parse_episode_date(2031, "02/03")) + self.assertIsNone(parse_episode_date(2031, "02/30")) + self.assertIsNone(parse_episode_date(2031, "2")) + + def test_date_numbering_canonicalizes_generic_scheduled_title(self): + episode_date = date(2031, 2, 3) + self.assertEqual( + "Sample.Monday.Night.Showcase.2031.02.03.1080p-GRP", + canonicalize_date_numbered_title( + "Sample.Showcase.2031.02.03.1080p-GRP", + "Sample Monday Night Showcase", + episode_date, + ), + ) + + def test_wwe_raw_uses_generic_schedule_alias_and_canonical_title(self): + episode_date = date(2031, 2, 3) + search_strings = date_numbering_search_strings( + "WWE Monday Night RAW", episode_date + ) + + self.assertIn("WWE RAW 2031.02.03", search_strings) + self.assertEqual( + "WWE.Monday.Night.RAW.2031.02.03.1080p-GRP", + canonicalize_date_numbered_title( + "WWE.RAW.2031.02.03.1080p-GRP", + "WWE Monday Night RAW", + episode_date, + ), + ) + + def test_wwe_smackdown_uses_generic_schedule_and_case_variants(self): + episode_date = date(2031, 2, 3) + search_strings = date_numbering_search_strings( + "WWE Friday Night SmackDown", episode_date + ) + + self.assertIn("WWE SmackDown 2031.02.03", search_strings) + self.assertIn("WWE Smackdown 2031.02.03", search_strings) + self.assertEqual( + "WWE.Friday.Night.SmackDown.2031.02.03.1080p-GRP", + canonicalize_date_numbered_title( + "WWE.SmackDown.2031.02.03.1080p-GRP", + "WWE Friday Night SmackDown", + episode_date, + ), + ) + if __name__ == "__main__": unittest.main() diff --git a/uv.lock b/uv.lock index af16e639..0f5ed06b 100644 --- a/uv.lock +++ b/uv.lock @@ -653,27 +653,27 @@ wheels = [ [[package]] name = "ruff" -version = "0.15.19" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d5/e6/15800dfde183a1a106594016c912b4c12d050a301989d1aca6cb63759fe8/ruff-0.15.19.tar.gz", hash = "sha256:edc27f7172a93b32b102687009d6a588508815072141543ae603a8b9b0823063", size = 4772071, upload-time = "2026-06-24T01:10:46.942Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/88/4c/9ded7626c39a0440c575bf69e2bf500d443388272c842662c59852ee7fcd/ruff-0.15.19-py3-none-linux_armv6l.whl", hash = "sha256:922d1eb283161564759bd49f507e91dc6112c15da8bd5b84ed714e086243cf86", size = 10950859, upload-time = "2026-06-24T01:10:38.491Z" }, - { url = "https://files.pythonhosted.org/packages/fb/ef/c211505ece1d00ef493d58e54e3b6383c946a21e9874774eb531f2512cf3/ruff-0.15.19-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:4d190d8f62a0b94aba8f721116538a9ee29b1e74d26650846ba9b99f0ae21c40", size = 11294529, upload-time = "2026-06-24T01:10:36.481Z" }, - { url = "https://files.pythonhosted.org/packages/fe/93/78d462e7d39968e58094dc57be7d09ffb14ce37da5b68ed70338a35a1f21/ruff-0.15.19-py3-none-macosx_11_0_arm64.whl", hash = "sha256:5a2c86ba6870dd415a9d9eb8be94d7924ebec6a26ffc7958ec7ca29d4bff967d", size = 10641416, upload-time = "2026-06-24T01:10:48.923Z" }, - { url = "https://files.pythonhosted.org/packages/76/c4/5cb66cfd1f865d5cca908b86c93ac785e7f572193d3c7426079ca6643e24/ruff-0.15.19-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82b432bc087264aea70fd25ac198918b70bd9e2aa0db4297b0bb91bbfbbc63ce", size = 11015582, upload-time = "2026-06-24T01:10:30.089Z" }, - { url = "https://files.pythonhosted.org/packages/51/9f/8ecfaec10cf5eecd28fbc00ff4fb867db90a1be54bf3d39ebf93f893cd52/ruff-0.15.19-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8530a09d03b3a8c994f8b559a7dcdabc690bcd3f78ef276c38c83166798ebf56", size = 10744059, upload-time = "2026-06-24T01:10:32.48Z" }, - { url = "https://files.pythonhosted.org/packages/35/6b/983249d04562bc2d590edd75f32455cdb473affb3ba4bc8d883e939c697d/ruff-0.15.19-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:87bf21fb3875fe69f0eacc825411657e2e85589cce633c35c0adf1113649c62b", size = 11568461, upload-time = "2026-06-24T01:10:17.435Z" }, - { url = "https://files.pythonhosted.org/packages/eb/39/bc7794f127b18f492a3b4ee82bba5a900c985ff13b72b46f46e3c171ba34/ruff-0.15.19-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f9b229cb3ef56ecc2c1c8ebeca64b7a7740ccaef40a9eb097e78dde5a8560b83", size = 12429690, upload-time = "2026-06-24T01:10:40.638Z" }, - { url = "https://files.pythonhosted.org/packages/0a/3b/0de6859e698ed11c8a49e765196c8d333599b6a546c0715df39b6ba1aa2e/ruff-0.15.19-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c6c754515be7b76afe6e7e62df7776709571bcfc1631183828afcf3bafa869e3", size = 11693067, upload-time = "2026-06-24T01:10:25.681Z" }, - { url = "https://files.pythonhosted.org/packages/89/3d/0b1f30f84bee9ae6ae8d349c2ba8b6f4b040966744efdd3acc804ae7c024/ruff-0.15.19-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a498f82e0f4d8904c4e0aea5139cdfac1f39d19a3c51d491292f63a36e83b2e", size = 11616911, upload-time = "2026-06-24T01:10:44.809Z" }, - { url = "https://files.pythonhosted.org/packages/4d/eb/c90bd3dfc12eed9032c2c1bfe05105b93a1b2c8bce555db6308315b853ce/ruff-0.15.19-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:d48caa34488fb521fd0ef4aea2b0e8fe758298df044138f0d67b687a6a0d07ed", size = 11649343, upload-time = "2026-06-24T01:10:23.472Z" }, - { url = "https://files.pythonhosted.org/packages/82/91/01caa13602a2f12fae5edbe8caf78b3c1e6db1293132aee6959eecce095c/ruff-0.15.19-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:4171b6613effa9363cd46dd4f75bd1827b6d1b946b5e278ed0c600d305379445", size = 10977610, upload-time = "2026-06-24T01:10:50.892Z" }, - { url = "https://files.pythonhosted.org/packages/3c/51/acb817922feab9ecbb3201377d4dbe7a25f1395e46545820061973f03468/ruff-0.15.19-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:27c15b2a241dd4d995557949a094fe78b8ad99122a38ccae1595849bcc947b3f", size = 10744900, upload-time = "2026-06-24T01:10:42.726Z" }, - { url = "https://files.pythonhosted.org/packages/84/bc/5c8ca46b8a7a3f2b16cfbec88721d772b1c93912904e8f8c2e49470fea63/ruff-0.15.19-py3-none-musllinux_1_2_i686.whl", hash = "sha256:ed03b7862d68f0a8771d50ee129980cbf1b113f96e250b73954bc292f689e0bb", size = 11293560, upload-time = "2026-06-24T01:10:21.262Z" }, - { url = "https://files.pythonhosted.org/packages/81/e0/4a888cbe4d5523b3f77a2b1fa043f46cfeba1b32eac35dcfadee0578fa8a/ruff-0.15.19-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:08143f0685ae278b30727ea72e90c61e5bd9c31b91aac4f5bb989538f73d24b8", size = 11696533, upload-time = "2026-06-24T01:10:53.046Z" }, - { url = "https://files.pythonhosted.org/packages/98/43/c34b2fcd79262a85161764a97aaca89c3e4f574340ab61430cefa2bdd2c1/ruff-0.15.19-py3-none-win32.whl", hash = "sha256:8f47f0f92952af2557212bb10cf3e695cd4cf28b2c6e42cdb18ec6c9ebfa19da", size = 10986299, upload-time = "2026-06-24T01:10:55.185Z" }, - { url = "https://files.pythonhosted.org/packages/22/e8/15fd23e02b2442b56b2026b455977bc3057aa34b26e6323d1e99e8531a9f/ruff-0.15.19-py3-none-win_amd64.whl", hash = "sha256:efeca47ee3f9d4a7162655a3b8e6ee4a878646044233978d4d2c1ff8cdd914f0", size = 12123473, upload-time = "2026-06-24T01:10:27.74Z" }, - { url = "https://files.pythonhosted.org/packages/30/66/9a73695e31eaee04f35d8475998bf8ab354465f9c638936d76111603dcc5/ruff-0.15.19-py3-none-win_arm64.whl", hash = "sha256:6c6b607466e47349332eb1d9be52fb1467423fc07c217341af41cd0f3f0573be", size = 11376779, upload-time = "2026-06-24T01:10:34.465Z" }, +version = "0.15.20" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/43/dc/35b341fc554ba02f217fc10da57d1a75168cfbcf75b0ef2202176d4c4f2d/ruff-0.15.20.tar.gz", hash = "sha256:1416eb04349192646b54de98f146c4f59afe37d0decfc02c3cbbf396f3a28566", size = 4755489, upload-time = "2026-06-25T17:20:37.578Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/d9/2d5014f0253ba541d2061d9fa7193f48e941c8b21bb88a7ff9bbe0bd0596/ruff-0.15.20-py3-none-linux_armv6l.whl", hash = "sha256:00e188c53e499c3c1637f73c91dcf2fb56d576cab76ce1be50a27c4e80e37078", size = 10839665, upload-time = "2026-06-25T17:19:44.702Z" }, + { url = "https://files.pythonhosted.org/packages/c6/d3/ac1798ba64f670698867fcfc591d50e7e421bef137db564858f619a30fcf/ruff-0.15.20-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:9ebd1fd9b9c95fc0bd7b2761aebec1f030013d2e193a2901b224af68fe47251b", size = 11208649, upload-time = "2026-06-25T17:19:48.787Z" }, + { url = "https://files.pythonhosted.org/packages/47/47/d3ac899991202095dfcf3d5176be4272642be3cf981a2f1a30f72a2afb95/ruff-0.15.20-py3-none-macosx_11_0_arm64.whl", hash = "sha256:c5b16cdd67ca108185cd36dce98c576350c03b1660a751de725fb049193a0632", size = 10622638, upload-time = "2026-06-25T17:19:51.354Z" }, + { url = "https://files.pythonhosted.org/packages/33/13/4e043fe30aa94d4ff5213a9881fc296d12960f5971b234a5263fdc225312/ruff-0.15.20-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3413bb3c3d2ca6a8208f1f4809cd2dca3c6de6d0b491c0e70847672bde6e6efd", size = 10984227, upload-time = "2026-06-25T17:19:54.044Z" }, + { url = "https://files.pythonhosted.org/packages/76/e6/92e7bf40388bc5800073b96564f56264f7e48bfd1a498f5ced6ae6d5a769/ruff-0.15.20-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd7ec42b3bb3da066488db093308a69c4ac5ee6d2af333a86ba6e2eb2e7dd44b", size = 10622882, upload-time = "2026-06-25T17:19:57.037Z" }, + { url = "https://files.pythonhosted.org/packages/13/7a/43460be3f24495a3aa46d4b16873e2c4941b3b5f0b00cf88c03b7b94b339/ruff-0.15.20-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1a36ad0eb77fba9aabfb69ede54de6f376d04ac18ebea022847046d340a8267", size = 11474808, upload-time = "2026-06-25T17:20:00.357Z" }, + { url = "https://files.pythonhosted.org/packages/27/a0/f37077884873221c6b33b4ab49eb18f9f88e54a16a25a5bca59bef46dd66/ruff-0.15.20-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b6df3b1e4610432f0386dba04d853b5f08cbbc903410c6fcc02f620f05aff53c", size = 12293094, upload-time = "2026-06-25T17:20:03.446Z" }, + { url = "https://files.pythonhosted.org/packages/a6/74/165545b60256a9704c21ac0ec4a0d07933b320812f9584836c9f4aca4292/ruff-0.15.20-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e89f198a1ea6ef0d727c1cf16088bc91a6cb0ab947dedc966715691647186eae", size = 11526176, upload-time = "2026-06-25T17:20:06.301Z" }, + { url = "https://files.pythonhosted.org/packages/86/b1/a976a136d40ade83ce743578399865f57001003a409acadc0ecbb3051082/ruff-0.15.20-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:309809086c2acb67624950a3c8133e80f32d0d3e27106c0cd60ff26657c9f24b", size = 11520767, upload-time = "2026-06-25T17:20:09.191Z" }, + { url = "https://files.pythonhosted.org/packages/19/0f/f032696cb01c9b54c0263fa393474d7758f1cdc021a01b04e3cbc2500999/ruff-0.15.20-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:2d2374caa2f2c2f9e2b7da0a50802cfb8b79f55a9b5e49379f564544fbf56487", size = 11500132, upload-time = "2026-06-25T17:20:13.602Z" }, + { url = "https://files.pythonhosted.org/packages/4b/f4/51b1a14bc69e8c224b15dab9cce8e99b425e0455d462caa2b3c9be2b6a8e/ruff-0.15.20-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:a1ed17b65293e0c2f22fc387bc13198a5de94bf4429589b0ff6946b0feaf21a3", size = 10943828, upload-time = "2026-06-25T17:20:16.635Z" }, + { url = "https://files.pythonhosted.org/packages/71/4b/fe267640783cd02bf6c5cc290b1df1051be2ec294c678b5c15fe19e52343/ruff-0.15.20-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:f701305e66b38ea6c91882490eb73459796808e4c6362a1b765255e0cdcd4053", size = 10645418, upload-time = "2026-06-25T17:20:19.4Z" }, + { url = "https://files.pythonhosted.org/packages/b0/c0/a65aa4ec2f5e87a1df32dc3ec1fede434fe3dfd5cbcf3b503cafc676ab54/ruff-0.15.20-py3-none-musllinux_1_2_i686.whl", hash = "sha256:5b9c0c367ad8e5d0d5b5b8537864c469a0a0e55417aadfbeca41fa61333be9f4", size = 11211770, upload-time = "2026-06-25T17:20:22.033Z" }, + { url = "https://files.pythonhosted.org/packages/5a/a4/0caa331d954ae2723d729d351c989cb4ca8b6077d5c6c2cb6de75e98c041/ruff-0.15.20-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:01cc00dd58f0df339d0e902219dd53990ea99996a0344e5d9cc8d45d5307e460", size = 11618698, upload-time = "2026-06-25T17:20:25.259Z" }, + { url = "https://files.pythonhosted.org/packages/10/9b/5f14927848d2fd4aa891fd88d883788c5a7baba561c7874732364045708c/ruff-0.15.20-py3-none-win32.whl", hash = "sha256:ed65ef510e43a137207e0f01cfcf998aeddb1aeeda5c9d35023e910284d7cf21", size = 10857322, upload-time = "2026-06-25T17:20:28.612Z" }, + { url = "https://files.pythonhosted.org/packages/fa/f0/fe47c501f9dea92a26d788ff98bb5d92ed4cb4c88792c5c88af6b697dc8e/ruff-0.15.20-py3-none-win_amd64.whl", hash = "sha256:a525c81c70fb0380344dd1d8745d8cc1c890b7fc94a58d5a07bd8eb9557b8415", size = 11993274, upload-time = "2026-06-25T17:20:31.871Z" }, + { url = "https://files.pythonhosted.org/packages/d7/2b/9555445e1201d92b3195f45cdb153a0b68f24e0a4273f6e3d5ab46e212bb/ruff-0.15.20-py3-none-win_arm64.whl", hash = "sha256:2f5b2a6d614e8700388806a14996c40fab2c47b819ef57d790a34878858ed9ca", size = 11343498, upload-time = "2026-06-25T17:20:35.03Z" }, ] [[package]]