Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion quasarr/providers/AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ The shared-services layer consumed by every other subsystem: cross-process state
- `cloudflare.py` — challenge detection, `ensure_session_cf_bypassed`, FlareSolverr get/post/session helpers
- `html_templates.py` / `html_images.py` — UI page shell, base64 image constants, and language-flag emoji/SVG fallback assets for setup UI
- `hostname_issues.py` — DB-backed source health tracker (`mark_/clear_/get_hostname_issue`)
- `utils.py` — grab-bag: payload generate/parse, category resolvers, title matching, online-status checks, `download_package` (the JD linkgrabber submission)
- `utils.py` — grab-bag: payload generate/parse, category resolvers, title matching (including shared date-numbering parsing/query/match/canonicalization), online-status checks, `download_package` (the JD linkgrabber submission)
- `sessions/` and `notifications/` — see Child DOX Index

## Local Contracts
Expand Down
176 changes: 172 additions & 4 deletions quasarr/providers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1047,12 +1047,160 @@ def match_in_title(title: str, season: int = None, episode: int = None) -> bool:
return False


_DATE_NUMBERING_IGNORED_TITLE_WORDS = {
"a",
"an",
"and",
"das",
"der",
"die",
"friday",
"monday",
"night",
"saturday",
"sunday",
"the",
"thursday",
"tuesday",
"wednesday",
}
_DATE_NUMBERING_SCHEDULE_WORDS = {
"friday",
"monday",
"night",
"saturday",
"sunday",
"thursday",
"tuesday",
"wednesday",
}


def parse_episode_date(season, episode):
"""Return a validated date for Sonarr's year + MM/DD numbering shape."""
parts = str(episode or "").split("/")
if len(parts) != 2:
return None

try:
return date(int(season), int(parts[0]), int(parts[1]))
except (TypeError, ValueError):
return None


def date_numbering_title_tokens(value):
normalized = replace_umlauts(html.unescape(str(value or ""))).lower()
normalized = re.sub(r"[^a-z0-9]+", " ", normalized)
return {
token
for token in normalized.split()
if token not in _DATE_NUMBERING_IGNORED_TITLE_WORDS
and not re.fullmatch(r"\d+", token)
}


def date_numbering_title_matches(title, search_string):
search_tokens = date_numbering_title_tokens(search_string)
if not search_tokens:
return False
return search_tokens.issubset(date_numbering_title_tokens(title))


def date_numbering_release_matches(title, search_string, episode_date):
if episode_date is None:
return False

date_pattern = re.compile(
rf"(?<!\d){episode_date:%Y}[\s.-]+{episode_date:%m}[\s.-]+"
rf"{episode_date:%d}(?!\d)"
)
if not date_pattern.search(str(title or "")):
return False
return bool(
is_imdb_id(search_string) or date_numbering_title_matches(title, search_string)
)


def _date_numbering_case_variant(value):
words = str(value or "").split()
normalized = []
changed = False
for word in words:
if any(char.islower() for char in word) and any(
char.isupper() for char in word[1:]
):
word = word[:1] + word[1:].lower()
changed = True
normalized.append(word)
return " ".join(normalized) if changed else ""


def date_numbering_search_strings(search_string, episode_date):
"""Build generic title/date variants without series-specific aliases."""
search_string = " ".join(str(search_string or "").split())
if not search_string or episode_date is None:
return [search_string]

title_variants = [search_string]
words = search_string.split()
compact_words = [
word
for word in words
if re.sub(r"[^a-z0-9]+", "", word.lower()) not in _DATE_NUMBERING_SCHEDULE_WORDS
]
if len(compact_words) >= 2 and compact_words != words:
title_variants.append(" ".join(compact_words))

for value in list(title_variants):
case_variant = _date_numbering_case_variant(value)
if case_variant and case_variant not in title_variants:
title_variants.append(case_variant)

search_strings = list(title_variants)
for candidate in (
episode_date,
episode_date - timedelta(days=1),
episode_date + timedelta(days=1),
):
for title_variant in title_variants:
for date_variant in (
f"{candidate:%Y %m %d}",
f"{candidate:%Y-%m-%d}",
f"{candidate:%Y.%m.%d}",
):
value = f"{title_variant} {date_variant}"
if value not in search_strings:
search_strings.append(value)

return search_strings


def canonicalize_date_numbered_title(title, search_string, episode_date):
if is_imdb_id(search_string) or not date_numbering_release_matches(
title, search_string, episode_date
):
return title

date_match = re.search(
rf"(?<!\d){episode_date:%Y}[\s.-]+{episode_date:%m}[\s.-]+"
rf"{episode_date:%d}(?!\d)",
str(title or ""),
)
if not date_match:
return title

canonical_prefix = re.sub(r"[^\w]+", ".", str(search_string)).strip(".")
suffix = str(title)[date_match.start() :].lstrip(" .-_")
return f"{canonical_prefix}.{suffix}" if canonical_prefix and suffix else title


def is_valid_release(
title: str,
search_category: int,
search_string: str,
season: int = None,
episode: int = None,
episode_date: date = None,
) -> bool:
"""
Return True if the given release title is valid for the given search parameters.
Expand All @@ -1061,6 +1209,7 @@ def is_valid_release(
- search_string: the original search phrase (could be an IMDb id or plain text)
- season: desired season number (or None)
- episode: desired episode number (or None)
- episode_date: validated date for a date-numbered TV episode (or None)
"""
try:
is_movie_search = search_category // 1000 * 1000 == SEARCH_CAT_MOVIES
Expand All @@ -1069,11 +1218,17 @@ def is_valid_release(
is_music_search = search_category // 1000 * 1000 == SEARCH_CAT_MUSIC
is_xxx_search = search_category // 1000 * 1000 == SEARCH_CAT_XXX

# if search string is NOT an imdb id check search_string_in_sanitized_title - if not match, it is not valid
# if search string is NOT an imdb id, require a title match
if not is_docs_search and not is_imdb_id(search_string):
if not search_string_in_sanitized_title(search_string, title):
title_matches = (
date_numbering_title_matches(title, search_string)
if is_tv_search and episode_date is not None
else search_string_in_sanitized_title(search_string, title)
)
if not title_matches:
trace(
"Skipping {title!r} as it doesn't match sanitized search string: {search_string!r}",
"Skipping {title!r} as it doesn't match sanitized "
"search string: {search_string!r}",
title=title,
search_string=search_string,
)
Expand All @@ -1092,6 +1247,18 @@ def is_valid_release(

# if it's a TV show search, don't allow any movies (check for season or episode tags in the title)
if is_tv_search:
if episode_date is not None:
if not date_numbering_release_matches(
title, search_string, episode_date
):
trace(
"Skipping {title!r} as it doesn't match date {episode_date}",
title=title,
episode_date=episode_date,
)
return False
return True

# must have some S/E tag present
if not SEASON_EP_REGEX.search(title):
trace(
Expand All @@ -1104,7 +1271,8 @@ def is_valid_release(
if season is not None or episode is not None:
if not match_in_title(title, season, episode):
trace(
"Skipping {title!r} as it doesn't match season {season} and episode {episode}",
"Skipping {title!r} as it doesn't match season "
"{season} and episode {episode}",
title=title,
season=season,
episode=episode,
Expand Down
4 changes: 2 additions & 2 deletions quasarr/providers/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import re
import sys

__version__ = "4.6.1"
__version__ = "4.6.2"


def get_version():
Expand Down Expand Up @@ -127,7 +127,7 @@ def create_version_file():
+ str(int(suffix))
+ "'),",
" StringStruct(u'InternalName', u'Quasarr'),",
" StringStruct(u'LegalCopyright', u'Copyright © RiX'),",
" StringStruct(u'LegalCopyright', u'Copyright \\u00a9 RiX'),",

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why?

" StringStruct(u'OriginalFilename', u'Quasarr.exe'),",
" StringStruct(u'ProductName', u'Quasarr'),",
" StringStruct(u'ProductVersion', u'"
Expand Down
3 changes: 2 additions & 1 deletion quasarr/search/AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ The Newznab-facing search layer: `get_search_results()` fans a single *arr reque

## Local Contracts

- Per-source gating before dispatch: hostname configured, category in `supported_categories`, category whitelist from `get_search_category_sources`, `supports_imdb` for the imdb branch, `supports_phrase` for the phrase branch, `supports_absolute_numbering` when an episode is given without a season. The feed branch checks only hostname/category/whitelist.
- Per-source gating before dispatch: hostname configured, category in `supported_categories`, category whitelist from `get_search_category_sources`, `supports_imdb` for the imdb branch, `supports_phrase` for the phrase branch, `supports_absolute_numbering` when an episode is given without a season, and `supports_date_numbering` for Sonarr's year + `MM/DD` episode shape. The feed branch checks only hostname/category/whitelist.
- Date-numbered requests are parsed once into a validated `datetime.date` and passed to sources as `episode_date`; invalid calendar dates stay on the normal numbering path.
- The method names `search` and `feed` are load-bearing — dispatch is `getattr(source, action)`.
- Cache TTL is 300s for search, 60s for feed; the key nulls `start_time` and uses the cache-owner category. Cached entries skip execution entirely, so source methods must be safe to skip.
- Per-source results are merged, date-sorted descending, title-filtered by `release_matches_search_category`, then offset/limit-sliced; feed responses are never paginated.
Expand Down
19 changes: 18 additions & 1 deletion quasarr/search/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def get_search_results(
get_search_behavior_category,
get_search_cache_owner_category,
get_search_capability_category,
parse_episode_date,
release_matches_search_category,
)

Expand All @@ -55,6 +56,8 @@ def get_search_results(
if imdb_id:
get_imdb_metadata(imdb_id)

episode_date = parse_episode_date(season, episode)

# Determine search category if not provided
if not search_category:
search_category = determine_search_category(request_from)
Expand Down Expand Up @@ -106,10 +109,11 @@ def get_search_results(
stype += f" <g>S{season}</g>"
if episode:
stype += f"{'' if season else ' '}<e>E{episode}</e>"
if episode_date:
stype += f" <g>{episode_date:%Y}</g>-<e>{episode_date:%m}</e>-<y>{episode_date:%d}</y>"

if base_search_category in [SEARCH_CAT_MOVIES, SEARCH_CAT_SHOWS]:
args = (shared_state, start_time, behavior_search_category)
kwargs = {"search_string": imdb_id, "season": season, "episode": episode}
for source in sources.values():
source_logger = get_source_logger(source.initials)

Expand Down Expand Up @@ -137,6 +141,19 @@ def get_search_results(
source_logger.trace("Search with absolute EP number unsupported")
continue

kwargs = {
"search_string": imdb_id,
"season": season,
"episode": episode,
}

if episode_date:
if not source.supports_date_numbering:
source_logger.trace("Search with date unsupported")
continue

kwargs["episode_date"] = episode_date

search_executor.add(
source,
args,
Expand Down
7 changes: 4 additions & 3 deletions quasarr/search/sources/AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ Two-letter lowercase source modules plus `helpers/`: `search_source.py` (`Abstra
## Local Contracts

- Registration is by file existence alone: drop `<xy>.py` into this folder exposing `class Source(AbstractSearchSource)`. The module filename, `Source.initials`, and the `Config("Hostnames")` key all use the same two-letter key; a same-key download twin exists only when release links need source-specific extraction (FX has none). Adding/renaming a module file changes the Hostnames config key space automatically.
- Signatures: `search(shared_state, start_time, search_category, search_string="", season=None, episode=None)` and `feed(shared_state, start_time, search_category)`, both returning `list[SearchRelease]` — empty list on failure, never raise to the caller.
- Signatures: `search(shared_state, start_time, search_category, search_string="", season=None, episode=None, episode_date=None)` and `feed(shared_state, start_time, search_category)`, both returning `list[SearchRelease]` — empty list on failure, never raise to the caller. `episode_date` is a validated `datetime.date` for Sonarr's year + `MM/DD` numbering shape.
- `SearchRelease`: `{"details": {"title", "hostname" (= initials), "imdb_id" (str or None), "link", "size" (bytes), "date" (RFC822 preferred), "source" (original page URL)}, "type": "protected"}` — every emit site uses type `"protected"`.
- `details.link` must come from `quasarr.providers.utils.generate_download_link(...)`; the payload is pipe-delimited (`title|url|size_mb|password|imdb_id|source_key`), so field values must not contain `|`.
- Capabilities are plain class attributes: `initials`, `language` (`"de"`, `"en"`, or `"fr"`), `supports_imdb`, `supports_phrase`, `supported_categories` (constants `SEARCH_CAT_*`), plus optional `supports_absolute_numbering` / `requires_login` / `requires_account` / `invite_only` / `requires_flaresolverr` / `requires_radarr` / `requires_sonarr`. `language`, category, account, invite, login, FlareSolverr, and *arr-client metadata is surfaced in the hostname editor through `helpers.get_source_metadata()`.
- Capabilities are plain class attributes: `initials`, `language` (`"de"`, `"en"`, or `"fr"`), `supports_imdb`, `supports_phrase`, `supported_categories` (constants `SEARCH_CAT_*`), plus optional `supports_absolute_numbering` / `supports_date_numbering` / `requires_login` / `requires_account` / `invite_only` / `requires_flaresolverr` / `requires_radarr` / `requires_sonarr`. Date numbering defaults to supported; AL and AT opt out because their anime-specific numbering rewrites require numeric episodes, MX opts out because its API requires numeric season+episode, and movie-only FF is never dispatched for TV dates. `language`, category, account, invite, login, FlareSolverr, and *arr-client metadata is surfaced in the hostname editor through `helpers.get_source_metadata()`.
- `is_valid_release(...)` is the default validation for each candidate title in `search()` (not `feed()`); AT and AL intentionally deviate with bespoke matching suited to absolute-numbered anime. IMDb convention: if the searched ID and a release-page ID both exist and differ → skip; if the release lacks one → inherit the searched ID.
- Call `mark_hostname_issue(self.initials, "feed"|"search", msg)` on fetch/parse errors and `clear_hostname_issue(self.initials)` when releases were produced.
- `Source.__init__` must be cheap and never fail — a failure is logged as an error and the source is dropped from the registry.
Expand All @@ -26,6 +26,7 @@ Two-letter lowercase source modules plus `helpers/`: `search_source.py` (`Abstra
- Timeouts from `constants.FEED_/SEARCH_REQUEST_TIMEOUT_SECONDS`; User-Agent from shared state. Sources without native IMDb search resolve a localized title via `get_localized_title(shared_state, imdb_id, language)` — pass `"de"`, `"en"`, or `"fr"` matching the source site's content language.
- Module-private parsing helpers are underscore-prefixed at the module bottom; `size` flows as MB int into `generate_download_link` and as bytes in `details.size`; size 0 is the accepted fallback.
- Do not infer payloads or response shapes — the root `Third-Party Source Work` rules require real traffic captures or direct curl confirmation first.
- Date-numbering parsing, title matching, query variants, and canonical title rewriting belong in `quasarr.providers.utils`; source modules only pass `episode_date` through existing verified request/result paths. Keep production logic series-agnostic and add series-specific compatibility cases only as synthetic tests.
- A new source adds its entry to the Per-Source Notes below and, when it has a download module, to the notes in `quasarr/downloads/sources/AGENTS.md` — in the same change.

### Per-Source Notes (search side)
Expand All @@ -37,7 +38,7 @@ Capability flags (`supports_*`, `requires_*`) and categories are class attribute
- **BY** — no login. Book/magazine titles run through Magazarr-compatible date/issue normalization; search drops releases without valid resolution/codec (feed keeps the original metadata); per-category fetches use category-ID constants inside the module.
- **DD** — login (`providers/sessions/dd`, which applies a fixed quality-profile filter to API responses — new resolutions must be added there). IMDb mismatch between request and API response discards the result; a suspected fake release (the API's `fake` flag) invalidates the cached session.
- **DJ** — login (shares the `JUNKIES` credentials section with SJ). IMDb-only; series discovered by HTML scrape to locate a media id, releases then fetched via JSON and aggregated per season block.
- **DL** — login (`providers/sessions/dl`); umlauts normalized when building queries. Paginated search is sequential, bounded by a wall-clock budget, and stops on an empty page; yearly magazine threads ("Jahresthema") expand into per-issue entries (requires the current year in the thread); magazine titles use a token-normalized matcher to align month/issue variants.
- **DL** — login (`providers/sessions/dl`); umlauts normalized when building queries. Paginated search is sequential, bounded by a wall-clock budget, and stops on an empty page; yearly magazine threads ("Jahresthema") expand into per-issue entries (requires the current year in the thread); magazine titles use a token-normalized matcher to align month/issue variants. Date-numbered thread discovery/pagination is DL-specific, while query variants, title/date matching, and canonicalization use shared generic helpers.
- **DT** — no login. Article date parsing assumes a fixed timezone offset; IMDb id parsed from article HTML and propagated; search drops candidates not matching requested resolution/codec (feed keeps them).
- **DW** — no login. German month names mapped in a local table (new variants go there); IMDb id read from article HTML validates the result still matches the request.
- **FF** — no login, movie-only. Search uses the public title lookup, then opens each movie page to extract IMDb id and the movie-token release API; releases are emitted from API `div.entry` blocks and use the release page URL as the download payload source. Feed reads recent update rows, then cross-references each movie page/API to fill size and IMDb data for the release anchors; cross-reference stops when the source's global feed budget reaches `FEED_REQUEST_TIMEOUT_SECONDS`.
Expand Down
1 change: 1 addition & 0 deletions quasarr/search/sources/al.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class Source(AbstractSearchSource):
requires_flaresolverr = True
supports_imdb = True
supports_phrase = False
supports_date_numbering = False
supports_absolute_numbering = True
supported_categories = [SEARCH_CAT_MOVIES, SEARCH_CAT_SHOWS, SEARCH_CAT_SHOWS_ANIME]
requires_login = True
Expand Down
Loading
Loading