diff --git a/.env.example b/.env.example index 5e4189a..c65eec5 100644 --- a/.env.example +++ b/.env.example @@ -12,3 +12,21 @@ CORS_ORIGINS=http://127.0.0.1,http://localhost LOG_LEVEL=INFO LOG_JSON=false DEBUG=false + +# --- hum-subsonic-shim (optional sibling service; docs/SONOS_SPEC.md) --- +# Required to run the shim: the Subsonic password bonob/Amperfy log in with +SHIM_SUBSONIC_PASSWORD= +SHIM_SUBSONIC_USER=hum +# Token for the shim's calls to Hum; falls back to API_BEARER_TOKEN above. +# Leave commented unless the shim talks to a different Hum instance. +#SHIM_HUM_BEARER_TOKEN= +# Optional +SHIM_HUM_BASE_URL=http://127.0.0.1:8000 +SHIM_HOST=127.0.0.1 +SHIM_PORT=8001 +SHIM_ALLOW_PLAIN_PASSWORD=false +SHIM_FFMPEG_PATH=ffmpeg +SHIM_MP3_BITRATE_KBPS=256 +# Sonos-facing browse surfaces (via bonob) +SHIM_PINNED_PLAYLISTS= # comma-separated YouTube playlist IDs for the Playlist shelf +SHIM_PUBLIC_URL= # LAN-reachable base for radio streams, e.g. http://192.168.1.10:8001 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1495a0b..149d0e3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,10 +33,10 @@ jobs: run: uv sync --extra dev --python ${{ matrix.python }} - name: Lint (ruff) - run: uv run ruff check app/ + run: uv run ruff check app/ shim/ - name: Type-check (mypy --strict) - run: uv run mypy app/ --strict + run: uv run mypy app/ shim/ --strict - name: Test (pytest, unit only) # Integration tests hit live YouTube and are deselected by default diff --git a/.gitignore b/.gitignore index 081be91..a0573ba 100644 --- a/.gitignore +++ b/.gitignore @@ -172,3 +172,6 @@ frontend/node_modules/ frontend/dist/ frontend/.svelte-kit/ frontend/coverage/ + +# Shim runtime state +.shim-data/ diff --git a/README.md b/README.md index 0af6a91..ebb55ad 100644 --- a/README.md +++ b/README.md @@ -64,16 +64,20 @@ All `/api/*` endpoints require `Authorization: Bearer `. | Method | Path | Purpose | |---|---|---| -| GET | `/api/search?q=...&limit=20` | Search videos | +| GET | `/api/search?q=...&limit=20&category=music&live=false` | Search videos (optional music/live filters) | +| GET | `/api/radio?limit=20` | Currently-live music streams (filtered live search) | | GET | `/api/video/{id}` | Video metadata + signed proxy URLs | | GET | `/api/channel/{id}` | Channel info | | GET | `/api/playlist/{id}` | Playlist with items | +| GET | `/api/hls/{id}.m3u8?itag&exp&sig` | HLS byterange wrapper over fMP4/AAC audio (Safari seeking) | +| GET | `/api/live/{id}/manifest.m3u8?exp&sig` | Proxied live-stream HLS playlist (rewritten segment URIs) | | GET | `/proxy/audio/{id}?itag&exp&sig` | Audio stream (signed, range-aware) | | GET | `/proxy/stream/{id}?itag&exp&sig` | Video stream (signed, range-aware) | | GET | `/proxy/thumbnail/{id}?itag=0&exp&sig` | Thumbnail (signed) | +| GET | `/proxy/live-segment/{id}?u&exp&sig` | Live HLS segment proxy (signed, host-allowlisted) | | GET | `/health` | Health check | -Proxy URLs are minted by `/api/video/{id}` — call it first, hand the returned URLs to your player. +Proxy URLs are minted by `/api/video/{id}` — call it first, hand the returned URLs to your player. For AAC (`audio/mp4`) formats it additionally returns an `hls_url`; for live videos it returns a signed `live_stream_url` manifest. A bearer-protected `/api/debug/live/{id}/upstream` endpoint exposes the raw upstream playlists for live-stream debugging. ## Testing @@ -118,8 +122,10 @@ app/ ├── adapters/ │ ├── youtube.py Only file that imports pytubefix │ └── upstream_http.py Shared httpx.AsyncClient + YouTube host allowlist -├── api/ GET routes: search, video, channel, playlist -└── proxy/ GET routes: audio, video, thumbnail (range pass-through) +├── api/ GET routes: search, radio, video, channel, playlist, hls, live +├── proxy/ GET routes: audio, video, thumbnail, live-segment (range pass-through) +├── hls/ sidx box parsing for the fMP4 byterange HLS wrapper +└── live/ HLS master/media playlist parsing + segment-URI rewriting ``` ## Why pytubefix? @@ -132,7 +138,7 @@ If pytubefix breaks (it eventually will), the fix lives in `app/adapters/youtube - Single uvicorn worker is correct at this scale (single user) - No rate limiting beyond the bearer token gate -- In-memory stream URL cache only (5-min TTL); restart loses it +- In-memory stream URL cache only (capped at 1h, bounded by YouTube's own `expire=`); restart loses it - Not for public deployment without further hardening - pytubefix is reverse-engineered; YouTube can break it without notice diff --git a/app/config.py b/app/config.py index 9e050c5..fc6b70d 100644 --- a/app/config.py +++ b/app/config.py @@ -2,15 +2,20 @@ from __future__ import annotations from functools import lru_cache +from pathlib import Path from pydantic import Field, field_validator from pydantic_settings import BaseSettings, SettingsConfigDict +# Anchor .env to the repo root (this file is /app/config.py), not the +# current working directory — launching from a subdir must not lose settings. +_ENV_FILE = str(Path(__file__).resolve().parents[1] / ".env") + class Settings(BaseSettings): """Single source of truth for runtime configuration.""" - model_config = SettingsConfigDict(env_file=".env", case_sensitive=False, extra="ignore") + model_config = SettingsConfigDict(env_file=_ENV_FILE, case_sensitive=False, extra="ignore") # App app_name: str = "Hum" diff --git a/app/main.py b/app/main.py index cf8f1ab..b886e20 100644 --- a/app/main.py +++ b/app/main.py @@ -119,6 +119,15 @@ async def health() -> dict[str, str]: def main() -> None: + import sys + + # The `hum` script takes no subcommands; `hum shim` is a common slip — the + # shim has its own entry point. Fail loudly instead of silently starting Hum. + if len(sys.argv) > 1: + raise SystemExit( + f"hum: unknown argument {sys.argv[1]!r} — did you mean 'hum-shim'?" + ) + import uvicorn settings = get_settings() diff --git a/docs/SONOS_SPEC.md b/docs/SONOS_SPEC.md new file mode 100644 index 0000000..5d78aec --- /dev/null +++ b/docs/SONOS_SPEC.md @@ -0,0 +1,460 @@ +# Hum → Sonos Integration Spec + +**Status:** audited against the Hum codebase at `c68470e` (Hum 0.1.0). This +revision corrects the original draft where the repo had moved past the README +it was written from, and resolves the open decisions the code already answers. + +**Goal:** Make Hum (self-hosted YouTube audio streamer) appear as a first-class +music source *inside the Sonos app* — browsable, searchable, queueable — +without writing a Sonos SMAPI server from scratch. + +**Strategy:** Implement a **Subsonic-compatible API adapter** in front of Hum, +then point **bonob** (an existing, mature SMAPI⇄Subsonic bridge) at it. bonob +handles all Sonos-facing complexity; we only build the Hum→Subsonic translation. + +``` +Sonos app ─SMAPI(SOAP)─► bonob ─Subsonic API─► hum-subsonic-shim ─► Hum backend ─► YouTube + │ │ │ │ + controller (unmodified, (new component (existing FastAPI, + (phone, out mature project) we build) pytubefix, signed URLs) + of audio path) + Sonos speaker pulls audio directly ◄─── shim /stream +``` + +The phone is only a controller. The **Sonos speaker fetches audio directly** +from the shim's `/stream` endpoint — true handoff, phone can sleep/leave. + +----- + +## 1. Architecture decision record + +### Why the shim-over-bonob path (Option A) instead of a native SMAPI server (Option B) + +|Factor |Shim + bonob (chosen) |From-scratch SMAPI | +|------------------------------------------|--------------------------------------------|---------------------------------------| +|Sonos SOAP / auth / S2 cloud handling |Handled by bonob |We build & maintain it | +|Browse/search/art/now-playing in Sonos app|Inherited from bonob |We build it | +|API surface to implement |Subsonic subset (well-documented, REST/JSON)|SMAPI (SOAP, fiddlier) | +|Local test client before Sonos |**Amperfy** (already in use) |None — must test against Sonos directly| +|Reuse of existing stack |Aligns with existing Gonic/Subsonic setup |Parallel new stack | +|Long-term maintenance |Track bonob releases |Solo-maintain SMAPI quirks | + +The deciding factor: an existing Gonic+Amperfy setup means a Subsonic shim can +be validated end-to-end with a trusted client (Amperfy) **before Sonos ever +sees it**. That feedback loop is worth more than the control gained by going +native. + +### Accepted constraints + +- Sonos network is already internet-connected; the S2 "expose to internet" + requirement is therefore an accepted cost, not a blocker. (See §6.) +- Hum is pytubefix-based and inherently ToS-fragile. This integration does not + change that; it inherits it. + +### Audit deltas from the original draft + +1. **Hum exposes AAC, not just Opus.** `AudioFormat.codec` is `"aac" | "opus"`; + the adapter maps `mp4a → aac` and signs an `hls_url` for `audio/mp4` + formats. The transcode strategy is therefore **remux-first** (§4), which + downgrades the draft's biggest technical risk. +2. **Hum has live-stream and radio surfaces** (`/api/radio`, `/api/live/...`, + `/proxy/live-segment/...`). Live content is **out of scope** for the shim: + it doesn't fit the Subsonic track model and can't be transcoded the same + way. `search3` must filter live hits out (§3.4). +3. **Hum has zero persistence.** No play history, no favourites, in-memory + caches only. "Recently played" and `star`/`unstar` are shim-side features + or omitted (§3.2–3.3, §9). +4. **Internal upstream URL cache is 1 h** (capped by YouTube's own `expire=`), + not the 5 min the old README claimed. Signed *proxy* URL TTL remains 6 h. + Latency budgeting in §4/§5 reflects this. +5. **`/api/video/{id}` is the expensive call** (full pytubefix extraction, + seconds when cold; single-flighted and cached inside Hum). The shim must + cache its responses (§5) so `getCoverArt` and repeat `stream` calls don't + re-trigger extraction. + +----- + +## 2. Component inventory + +|Component |Status |Role | +|---------------------|------------|----------------------------------------------------------| +|Hum backend |Exists |YouTube search + signed, range-aware proxy URLs; AAC+Opus formats; live/radio (unused here)| +|Gonic |Exists |Current Subsonic server feeding Amperfy (untouched) | +|Amperfy |Exists |Subsonic client — used here as the shim's **test harness**| +|**hum-subsonic-shim**|**To build**|Translates Subsonic API → Hum API; remuxes/transcodes audio| +|bonob |To deploy |SMAPI⇄Subsonic bridge; registers Hum as a Sonos source | +|Sonos |Exists |Endpoint; speakers pull audio directly from the shim | + +Key point: the shim is a **separate** Subsonic endpoint from Gonic. Do not graft +YouTube content into the Gonic library. bonob supports multiple registrations; +Hum becomes its own Sonos source alongside the existing music. + +**Shim location & stack (resolved):** Python/FastAPI, living in the Hum repo as +a sibling service. The shim reuses Hum's idioms — ports-and-adapters layout, +`StreamingResponse` lifecycle handling modelled on `app/proxy/_common.py` +(`body_iterator`'s guaranteed-close pattern is exactly what the ffmpeg pipe +needs), pydantic-settings config, same deploy story. + +----- + +## 3. The shim: Subsonic API surface to implement + +bonob calls a subset of the Subsonic API. Implement only what it touches. All +Subsonic endpoints live under `/rest/` and accept `u`, `p`/`t`+`s`, `v`, `c`, +`f=json` query params. Respond in JSON (`subsonic-response` envelope). + +### 3.1 Must-have (minimum playable) + +|Subsonic endpoint|Maps to Hum |Notes | +|-----------------|----------------------------------------|-----------------------------------------------------------------| +|`ping` |— |Health/auth check. Return OK envelope. | +|`getLicense` |— |Return `valid=true`. | +|`stream?id=` |Hum `/api/video/{id}` → signed proxy URL|**Remux/transcode to a Sonos-safe format** (see §4). | +|`getCoverArt?id=`|see §3.5 |Shim fetches and re-serves bytes; never redirects the client. | +|`search3?query=` |Hum `/api/search` |Map YouTube results → Subsonic `song`/`album`/`artist` shapes. **Filter live hits** (§3.4).| + +### 3.2 Browsing (makes it feel native in the Sonos app) + +|Subsonic endpoint |Maps to / strategy | +|----------------------------------|---------------------------------------------------------------------| +|`getMusicFolders` |Return one synthetic folder, e.g. "Hum". | +|`getIndexes` / `getArtists` |Synthetic top-level: "Search", "Playlists". ("Recently Played" only if built shim-side — Hum has no history.)| +|`getPlaylists` / `getPlaylist?id=`|Hum `/api/playlist/{id}` → Subsonic playlist. | +|`getAlbumList2?type=` |Hum has no history; return empty for `recent`/`frequent` unless the shim records its own.| +|`getArtist` / `getAlbum` |Synthesize from playlist/channel (`/api/channel/{id}`) where it makes sense.| + +### 3.3 Optional / nice-to-have + +|Subsonic endpoint|Purpose | +|-----------------|------------------------------------------------------------------| +|`scrobble` |bonob reports now-playing/scrobble; accept and no-op or log. Doubles as the data source if the shim grows its own "recently played".| +|`star` / `unstar`|Favourites from the Sonos app — requires shim-side storage (Hum stores nothing).| +|`getGenres` |Skip unless you want genre browsing. | + +### 3.4 The modelling problem (the real design work) + +Subsonic assumes a **library** (artists → albums → tracks). Hum is **search + +stream**. You are mapping a search engine onto a library shape. Decide the +synthetic hierarchy up front: + +- **Search** as the primary entry (Sonos search box → `search3` → Hum search). + Pass `category=music` to Hum's search for better signal where appropriate. +- **Playlists** map cleanly (Hum already has `/api/playlist/{id}`). +- A single video → model as a single-track "album" so it slots into Subsonic's + album/track expectations. +- **Live hits must be excluded.** `SearchHit.is_live` is unreliable from + pytubefix (often `None` for live streams); use the inverse of the heuristic + already proven in `app/api/radio.py:_looks_live` — zero/missing duration + means live, so drop the hit. Concretely: + ```python + results = [h for h in raw if not _looks_live(h)] + ``` + (`radio.py` keeps hits where `_looks_live` is True; `search3` drops them.) + A "Radio" browse entry for live streams is a possible later phase, not Phase 1/2. + +Stable ID scheme is critical: Subsonic IDs must round-trip to YouTube IDs. +Prefixing: `vid:`, `pl:`, `art:` so the shim can +route any incoming `id` back to the right Hum call. Note Hum validates video +IDs as exactly 11 chars of `[A-Za-z0-9_-]`; playlist IDs 2–64 chars; channel +IDs 5–64 chars — the shim should reject malformed IDs before calling Hum. + +### 3.5 Cover art strategy + +Two sources, both server-side (the Sonos client never sees a YouTube URL): + +- **Search hits / playlist items** carry raw `i.ytimg.com` thumbnail URLs in + Hum's responses. The shim may fetch these directly and re-serve the bytes — + cheap, no extraction triggered. +- **Video details** (`/api/video/{id}`) return a **signed** + `/proxy/thumbnail/{id}` URL. Use this when the shim already has a cached + details response; do **not** call `/api/video/{id}` solely for cover art — + that triggers a full pytubefix extraction. + +Resize/normalize to the `size=` Subsonic param as needed; cache aggressively. + +----- + +## 4. Audio delivery: remux first, transcode as fallback + +**The original draft assumed Opus/WebM only and made mp3 re-encode the plan of +record. The codebase says otherwise:** Hum surfaces YouTube's AAC (`audio/mp4`, +itag-140 class) formats with `codec="aac"`, alongside Opus. Sonos plays AAC +natively. So: + +### Mode 1 (default): AAC remux — `ffmpeg -c copy` + +``` +Sonos GET /rest/stream?id=vid: + → shim calls Hum /api/video/ (cached; picks audio format with codec=="aac") + → shim spawns: ffmpeg -i -c:a copy -f mp4 -movflags frag_keyframe+empty_moov - (pipe) + → shim streams stdout to Sonos with Content-Type: audio/mp4 +``` + +Near-zero CPU, no quality loss, first byte limited only by Hum URL resolution. +Fragmented MP4 (fMP4) is preferred over ADTS: it preserves the timing atom so +Sonos can display track length and seek, and is the same container Hum's own +HLS path produces (`app/api/hls.py`). ADTS is a secondary experiment — try it +if fMP4 causes buffering issues, but expect no seek bar. Mode 2 is the last +resort. + +**Mode 1 container preference order:** +1. `-f mp4 -movflags frag_keyframe+empty_moov` → `Content-Type: audio/mp4` (start here) +2. `-f adts` → `Content-Type: audio/aac` (try if fMP4 causes issues) +3. Mode 2 (mp3 re-encode) if both AAC containers are rejected + +### Mode 2 (fallback): mp3 re-encode + +``` + → shim spawns: ffmpeg -i -f mp3 -b:a 256k - (pipe) + → Content-Type: audio/mpeg +``` + +Used when no AAC format exists for a video (rare) or if Sonos rejects the +remuxed AAC in Amperfy/Sonos testing. mp3 CBR 256k, no exotic sample rates. + +### Requirements / gotchas (both modes) + +- **Range requests:** Sonos issues HTTP range requests for seeking. Live ffmpeg + pipes are not seekable. Options: + - (a) Ignore ranges, stream straight through (works for play-through; seeking + may be degraded). Simplest. Start here. + - (b) Pre-transcode to a temp file, then serve with proper range support. + Adds latency + disk, gains seeking. Optimize to this later. + - (c) *Possible Mode-1-only shortcut, verify before relying on it:* since + remux is byte-cheap, a ranged request can be served by restarting the remux + and discarding output up to the offset — still (a)-class simplicity with + approximate seek support. +- **Container/timing:** fMP4/ADTS/mp3 over a chunked pipe generally works; some + renderers want `Content-Length`. If Sonos balks, fall back to temp-file mode (b). +- **Process lifecycle:** kill ffmpeg when Sonos disconnects (skip/stop) or you + leak processes. Wire to the request lifecycle — follow the + `app/proxy/_common.py` `body_iterator` pattern: a generator whose `finally` + reaps the process, wrapped in `StreamingResponse`. +- **Latency:** first-byte latency = Hum `/api/video` call (seconds when the + extraction is cold; Hum single-flights and caches upstream URLs for up to + 1 h) + ffmpeg spin-up (negligible in copy mode). The shim's own details + cache (§5) keeps the warm path fast; consider prefetching the *next* queue + item's details on `stream` calls. + +Test this in isolation before involving Sonos: curl the shim's `/stream` into +a file, inspect with `ffprobe`, play locally — then Amperfy — then Sonos. + +----- + +## 5. URL signing, TTLs, and the shim's cache + +Three clocks to keep straight: + +|Layer |TTL |Where | +|-------------------------------------|-------------|--------------------| +|Hum signed proxy URLs (`exp`/`sig`) |6 h default (`STREAM_URL_TTL_SECONDS`)|minted by `/api/video/{id}`, per-itag| +|Hum internal upstream-URL cache |≤ 1 h, also capped by YouTube's `expire=`|inside `app/adapters/youtube.py`; transparent to the shim| +|Shim details cache (to build) |`min(1800, exp − now − 60)` s|shim-side| + +- Hum mints signed proxy URLs per-itag with `exp`/`sig` query params. The + **shim** consumes those internally and re-exposes its own `/stream` to Sonos + — Sonos never sees a Hum signed URL or a raw YouTube CDN URL. (Preserves Hum + invariant #3.) +- The shim's own `/stream` URL is what bonob/Sonos hold; it embeds only the + `vid:` ID, so it never expires. The shim mints/refreshes Hum URLs per stream + request — the 6 h TTL is never the limiting factor, including for gapless/ + queued playback. +- **Cache `/api/video/{id}` responses in the shim** (metadata + signed URLs + + signed thumbnail URL). This is what keeps `getCoverArt`, repeated `stream` + calls, and queue prefetch from hammering pytubefix extraction. Evict each + entry using `ttl = min(1800, exp − now − 60)` — the 60 s safety margin + prevents serving an already-expired signed URL to ffmpeg mid-stream. +- The shim holds Hum's `API_BEARER_TOKEN` server-side; it is never exposed to + bonob/Sonos. + +----- + +## 6. S1 vs S2 deployment (network exposure) + +bonob behaves differently by Sonos generation: + +- **S1:** works fully **local** — bonob only needs to be reachable by Sonos + devices on the LAN (`BNB_URL` = LAN IP). No internet exposure. +- **S2 (since May 2024):** bonob **must be reachable from the internet** because + Sonos's cloud calls into it. `BNB_URL` must be a public DNS name (HTTPS). + +Since the Sonos network is already internet-connected, the S2 path is acceptable +here. Hardening checklist for S2: + +- Expose **only bonob** publicly (HTTPS/443). Keep the shim and Hum on the LAN, + reachable by bonob but **not** from the internet. (Hum already defaults to + binding `127.0.0.1`; the shim must be reachable by the Sonos *speakers* on + the LAN for `/stream`, but not by the internet.) +- Restrict the firewall to Sonos's published IP ranges where possible. +- Terminate TLS at a reverse proxy (Caddy/nginx/Cloudflare Tunnel — bonob docs + confirm cloudflared works). +- Long random `BNB_SECRET`. +- Note the trust boundary: bonob is the only internet-facing surface; the + YouTube-extracting shim + Hum stay private. + +If any Sonos units can run S1, you could keep everything LAN-only — worth +checking per device, but not required given current setup. + +----- + +## 7. Development trajectory (build order = feedback order) + +Each phase is independently testable. Do not start a phase until the previous +one is green. + +> **Build status (code phases done; hardware phases pending).** Phases 0, 1, 2, +> and the code-able parts of Phase 5 are implemented, unit-tested (ruff + mypy +> --strict + pytest green), and on PR #2. What remains is human/hardware: the +> Amperfy listening/browse test (Phase 1/2 exit), then bonob + Sonos (Phases +> 3–4). An opt-in live harness (`pytest -m integration`) produces the audio +> evidence for the Amperfy step. + +### Phase 0 — Scaffolding ✅ done + +- Stand up the shim as a sibling FastAPI service in the Hum repo (stack + decision resolved, §2), with `/rest/ping` returning a valid + `subsonic-response` JSON envelope. +- **Exit test:** `curl` ping returns the correct envelope. ✅ + +### Phase 1 — Auth + search + stream (minimum playable) ✅ code done + +- Implement `ping`, `getLicense`, `search3`, `stream`, `getCoverArt`. +- Implement Subsonic token auth (`t`+`s` salted-MD5; plaintext `p` fallback + for dev) — bonob sends token auth by default (§9). +- Implement the ID scheme (`vid:`/`pl:`/`art:`) with Hum's ID validation rules. +- Implement live-hit filtering in `search3` (§3.4). +- Implement the shim details cache (§5). +- Implement `stream` as **AAC remux (Mode 1)** with mp3 re-encode (Mode 2) + fallback, range-ignoring first. +- **Exit test:** point **Amperfy** at the shim. Search a track, play it, hear + audio. Verify both an AAC-remuxed and a forced-mp3 stream play. This + validates the hardest parts with a trusted client. **Do not proceed to Sonos + until Amperfy plays cleanly.** ⏳ awaiting human test. + +### Phase 2 — Browsing ✅ code done + +- Implement `getMusicFolders`, `getArtists`/`getIndexes`, `getPlaylists`, + `getPlaylist`, `getAlbumList2` (empty `recent`/`frequent`). ✅ +- Build the synthetic hierarchy (Search / Playlists). ✅ Playlist hits surface + as drill-in albums (`getAlbum`/`getPlaylist` on `pl:` ids); artist/album + catalog is intentionally empty (discovery via search). +- **Exit test:** Amperfy shows browsable structure; playlists load and play. + ⏳ awaiting human test. + +**Sonos/bonob note (verified against bonob source).** bonob is a browse bridge, +not a library-syncing client: it fetches from the shim on demand and Sonos keeps +no persistent catalog, so `search3` results are **not** accumulated into a +library (the DB-pollution seen in Amperfy does not occur on Sonos). bonob's +top-level containers map to `getArtists`/`getAlbumList2`/`getGenres` (empty +here — no catalog), `getStarred2` (Favourites ✅), `getPlaylists`, **Internet +Radio** (`getInternetRadioStations`), and Search. The populated, non-polluting +Sonos surfaces are therefore: +- **Playlists** ✅ — `getPlaylists` returns `SHIM_PINNED_PLAYLISTS` + starred + `pl:` playlists (Hum can't enumerate; curated only). +- **Internet Radio** ✅ (listing) — `getInternetRadioStations` maps Hum + `/api/radio` live streams to stations; `streamUrl` → the shim's + unauthenticated `/radio/{id}` mp3 pipe (needs `SHIM_PUBLIC_URL`). + ⚠️ **live-stream → Sonos-radio playback is unverified** (hardware gate). +- **Favourites** ✅ — `getStarred2`. + +### Phase 3 — bonob (LAN, S1-style first if possible) + +- Run bonob (Docker, pinned tag) pointed at the shim via `BNB_SUBSONIC_URL`. +- If any S1-capable device: register locally, validate end to end on LAN. +- **Exit test:** Hum appears as a source in the Sonos app; search + play works. + +### Phase 4 — S2 / internet exposure (if required) + +- Put bonob behind HTTPS reverse proxy with public DNS. +- Lock down firewall; keep shim + Hum private. +- Re-register service for S2; confirm Sonos cloud reaches bonob. +- **Exit test:** play from Sonos app on S2 hardware. + +### Phase 5 — Polish (code-able parts ✅ done) + +- ✅ scrobble (accepts/no-ops; Hum has no history to write). +- ✅ favourites — `star`/`unstar`/`getStarred2` over a shim-side JSON store + (`SHIM_DATA_DIR`), titles rendered from a recently-emitted cache (no + extraction). +- ✅ cover-art sizing — `size` selects an `i.ytimg` variant (no decode/dep). +- ✅ richer error envelopes — Hum 4xx (unplayable/region/live) → Subsonic 70, + 5xx/transport (Hum down) → 0, instead of bare 500s. +- ✅ ffmpeg lifecycle hardening — stderr logged on real failure; disconnect vs + failure distinguished via GeneratorExit/cancel, not returncode. +- ✅ transcode mode (b) seeking — `SHIM_SEEKABLE_REMUX` materializes the remux + to a cached `+faststart` file served with Range. **Off by default** (adds + first-byte latency); flip on and validate during the Sonos phase. +- **Queue-ahead prefetch — investigated, not built.** The per-track Subsonic + `stream` contract never exposes the next queue item to the shim, so there's + nothing to prefetch from; speculative warming would trigger expensive + extractions. The details cache (§5) remains the latency mitigation. +- Possible later phase: "Radio" browse entry backed by `/api/radio` if live + HLS → Sonos-safe stream proves workable. + +----- + +## 8. Risk register + +|Risk |Likelihood|Impact|Mitigation | +|-------------------------------------------------|----------|------|------------------------------------------------------------------------| +|Remuxed AAC rejected by Sonos |Low-Med |Med |Validate with Amperfy first; ADTS variant second; mp3 256k re-encode fallback| +|Range requests break seeking |High |Med |Ship range-ignoring first; temp-file mode later | +|ffmpeg process leaks |Med |Med |Tie process lifecycle to request (`body_iterator` pattern); reap on disconnect| +|Cold-extraction first-byte latency (seconds) |Med |Med |Shim details cache; queue-ahead prefetch; Hum single-flights extraction | +|pytubefix breaks (YouTube change) |Med |High |Inherited from Hum; fix isolated to `app/adapters/youtube.py` | +|Live hits leak into search results, fail playback|Med |Med |`_looks_live`-style filter in `search3` | +|Subsonic↔library model mismatch confuses Sonos UI|Med |Low |Keep hierarchy minimal; lead with Search | +|S2 internet exposure surface |Accepted |Med |Only bonob public; firewall to Sonos IPs; TLS; private shim/Hum | +|bonob version drift |Low |Low |Pin bonob image to a fixed `vX.Y.Z` tag | + +----- + +## 9. Decisions + +Resolved by the codebase audit: + +1. ~~Shim language~~ → **Python/FastAPI** (matches Hum; reuses its streaming + and config idioms). +2. ~~Shim location~~ → **sibling service in the Hum repo**. +3. ~~Transcode format~~ → **AAC remux first, mp3 256k re-encode fallback** (§4). +4. ~~History/Recently-played~~ → **Hum has none.** Omit at Phase 1/2; optional + shim-side feature later (fed by `scrobble`). +5. ~~Auth model~~ → **shim implements Subsonic token auth.** bonob sends + **salted-MD5 token auth** by default — `t` (MD5 hex of `password + s`) + + `s` (random salt). The shim must implement the token-auth hash check + (`MD5(password + salt)`). Plain `p` (password in clear/hex) is acceptable + as a fallback for Amperfy dev mode only. Treating auth as fully optional + means bonob's credential handshake will silently fail. + +6. ~~Hierarchy scope~~ → **Search + Playlists.** search3 returns video songs + + playlist albums; `getAlbum`/`getPlaylist` expand a playlist's items. The + artist/album catalog (`getArtists`/`getAlbumList2`) is intentionally empty — + a search-centric source has no static library. + +----- + +## 10. Reference endpoints (Hum, verified against code) + +In scope for the shim: + +|Hum endpoint |Use in shim | +|--------------------------------------|-------------------------------------| +|`GET /api/search?q=&limit=&category=` |`search3` (filter live hits) | +|`GET /api/video/{id}` |signed audio/thumbnail URLs + metadata for `stream`/`getCoverArt`; **expensive — cache**| +|`GET /api/playlist/{id}` |`getPlaylist` | +|`GET /api/channel/{id}` |`getArtist` (optional) | +|`GET /proxy/audio/{id}?itag&exp&sig` |ffmpeg input for `stream` (range-aware)| +|`GET /proxy/thumbnail/{id}?itag=0&exp&sig`|`getCoverArt` (when details cached) | +|`GET /health` |shim → Hum liveness | + +Exists in Hum but **out of scope** for the shim (live/radio/HLS surfaces): + +|Hum endpoint |Why excluded | +|------------------------------------------|--------------------------------------| +|`GET /api/radio` |Live streams don't fit the Subsonic track model; possible Phase 5+| +|`GET /api/hls/{id}.m3u8` |Safari-specific byterange wrapper; Subsonic wants progressive streams| +|`GET /api/live/{id}/manifest.m3u8` |Live HLS; out of scope | +|`GET /proxy/live-segment/{id}` |Live HLS; out of scope | + +All Hum `/api/*` calls require `Authorization: Bearer `; the shim holds +that token server-side and never exposes it to bonob/Sonos. diff --git a/docs/SONOS_VALIDATION.md b/docs/SONOS_VALIDATION.md new file mode 100644 index 0000000..3483403 --- /dev/null +++ b/docs/SONOS_VALIDATION.md @@ -0,0 +1,198 @@ +# Sonos shim — validation runbook (pick-up guide) + +Everything code-able is done and on **PR #2** (branch `smapi`). What remains is +**human/hardware validation** — things a test suite can't do: listening by ear, +running bonob, and playing through a real Sonos speaker. Work top to bottom; +each step says what to expect and what to capture if it fails. + +Status legend: ✅ done in code · 🎧 needs you (ear) · 📦 needs bonob · 🔊 needs Sonos hardware + +--- + +## 0. Where things stand + +- **Implemented + unit-tested (245 tests, green):** Subsonic `ping`, `getLicense`, + `search3`, `stream` (AAC remux → mp3 fallback), `getCoverArt`, browsing + (`getMusicFolders`/`getArtists`/`getIndexes`/`getAlbumList2`/`getPlaylists`/ + `getPlaylist`/`getAlbum`), favourites (`star`/`unstar`/`getStarred2`), + `scrobble`, **Internet Radio** (`getInternetRadioStations` + `/radio/{id}`), + XML+JSON responses, token + legacy auth. +- **Already validated this session:** Phase 0 ping; live integration suite + (search3 + stream→ffprobe); Amperfy **login works** after the XML fix; search + returns results; one track streamed end-to-end. +- **Not yet validated (this runbook):** Amperfy clean playback by ear; bonob; + Sonos browse + playback; Internet Radio playback (the big unknown); seekable + remux; S2 internet exposure. + +--- + +## 1. One-time `.env` settings + +Edit the repo-root `.env` (the shim and Hum share it): + +```bash +# Required +SHIM_SUBSONIC_PASSWORD= # what bonob/Amperfy log in with (>=8 chars) + +# For Amperfy "legacy login" and curl poking (plain password). Turn OFF before any +# internet exposure (S2) — legacy sends the password in clear. +SHIM_ALLOW_PLAIN_PASSWORD=true + +# For Internet Radio: the LAN address Sonos speakers can reach the shim at. +# Find your IP: ipconfig getifaddr en0 +SHIM_PUBLIC_URL=http://:8001 + +# Optional: curated playlists for the Sonos Playlist shelf (comma-separated YT playlist IDs) +SHIM_PINNED_PLAYLISTS= + +# Optional: gives Sonos a seek bar (adds first-byte latency). Validate separately. +SHIM_SEEKABLE_REMUX=false +``` + +`SHIM_HUM_BEARER_TOKEN` is **not** needed on the same box — it falls back to `API_BEARER_TOKEN`. + +--- + +## 2. Start the stack (and the stale-process gotcha) + +Always pull and **restart both** so the running processes have current code — a +stale shim caused several false failures this session. + +```bash +cd /Volumes/Data/Workspace/dev/audio-tools/hum +git pull +uv sync --extra dev # refresh entry points after pulling + +# stop any old shim still holding :8001 +kill $(lsof -ti :8001) 2>/dev/null + +hum & # Hum on :8000 (MUST be up — the shim proxies it) +hum-shim & # shim on :8001 +``` + +Gotchas: +- If you see `address already in use` after "Application startup complete", a + stale shim is on :8001 — `kill $(lsof -ti :8001)` and retry. (uvicorn logs the + bind error *after* the startup line; misleading but harmless.) +- `hum shim` (space) is wrong and now errors — the shim is `hum-shim` (hyphen). +- Run from the repo root. Hum down → the shim returns clean "Hum unreachable" + errors, but nothing will play. + +--- + +## 3. Smoke test with curl (30 seconds, no client needed) + +```bash +P= +# XML by default (what Amperfy/bonob get) — should be +curl -s "http://127.0.0.1:8001/rest/ping.view?u=hum&p=$P" +# JSON when asked +curl -s "http://127.0.0.1:8001/rest/ping.view?u=hum&p=$P&f=json" +# Root probe (Amperfy Auto-Detect needs this non-404) +curl -s "http://127.0.0.1:8001/" +# Search (slow: 6–60s, real pytubefix) +curl -s "http://127.0.0.1:8001/rest/search3?u=hum&p=$P&f=json&query=lofi" +# Radio shelf (needs Hum live search working) +curl -s "http://127.0.0.1:8001/rest/getInternetRadioStations?u=hum&p=$P&f=json" +``` + +Expect `status="ok"` envelopes; search returns `song` entries. If any return +`status="failed"`, the `error.message` says why (it surfaces Hum's error). + +--- + +## 4. Integration suite (automated end-to-end, with the stack up) + +```bash +pytest -m integration tests/shim/integration +``` + +Hits the running shim → Hum → YouTube, and ffprobes a stream as audio. Skips +cleanly if the shim or Hum is down. Green here = the server side is sound. + +--- + +## 5. 🎧 Amperfy (Phase 1/2 exit) — confirms playback by ear + +Amperfy is the trusted test client *before* Sonos. Note: Amperfy caches search +results into a local library (it's a syncing client) — that's an Amperfy trait, +**not** how Sonos behaves, so ignore the library "pollution" here. + +1. Add server: URL `http://:8001` (or `localhost` if same machine), + user `hum`, your password. +2. **API mode: pick `Subsonic` or `Subsonic (legacy login)`** — not Ampache + (wrong protocol), Auto-Detect now works too. +3. Search a track → it appears under **Songs**. Play it. + +**Pass:** audio plays cleanly, no stutter/dropout. Try both an AAC track (most) +and confirm it sounds right. **This is the gate to proceed to Sonos.** + +Capture if it fails: the shim's log lines for that play (the `/rest/stream` call) +and whether audio started at all. + +--- + +## 6. 📦🔊 bonob + Sonos (Phase 3) — the real target + +bonob bridges Sonos (SMAPI/SOAP) to the shim (Subsonic). Unlike Amperfy, Sonos +keeps **no persistent library** — it browses live, so search doesn't accumulate. + +1. Run bonob (Docker, **pin a version tag**), pointed at the shim: + ``` + BNB_SUBSONIC_URL=http://:8001 + BNB_SECRET= + BNB_URL=http://:4534 # bonob's own LAN URL (S1/local) + ``` +2. Register bonob as a music service in the Sonos app (it prints setup steps). +3. In the Sonos app, open the Hum service and check each shelf: + +| Shelf | Expect | Backed by | +|---|---|---| +| **Search** | finds + plays tracks | `search3` → `stream` | +| **Playlists** | your `SHIM_PINNED_PLAYLISTS` + starred playlists | `getPlaylists` | +| **Favourites** | tracks/playlists you starred | `getStarred2` | +| **Internet Radio** | live stations from Hum `/api/radio` | `getInternetRadioStations` | +| Artists/Albums/Genres | **empty** (no catalog — expected) | — | + +**Pass:** Hum appears as a Sonos source; Search finds and **plays** a track on a +speaker. Capture if it fails: bonob's container logs + which shelf is empty/erroring. + +--- + +## 7. 🔊 Internet Radio playback — the known unknown + +Listing the stations is done; whether Sonos *plays* one is **unverified**. It +chains live YouTube → Hum HLS → ffmpeg mp3 → Sonos, and the last hop couldn't be +tested without hardware. + +- Requires `SHIM_PUBLIC_URL` set to a LAN IP (not `127.0.0.1`) so the speaker can + fetch `/radio/{id}` directly. +- Test: open **Internet Radio** in Sonos, pick a station, hit play. +- If it **doesn't** play: capture the shim log for the `/radio/` request and + run this to see if the pipe itself produces audio: + ```bash + curl -s "http://:8001/radio/" --max-time 10 -o /tmp/r.mp3 + ffprobe /tmp/r.mp3 # should show an mp3 audio stream + ``` + That isolates "shim can't produce the stream" from "Sonos won't accept it." + +--- + +## 8. Optional / later + +- **Seekable remux (seek bar):** set `SHIM_SEEKABLE_REMUX=true`, restart, re-test + a track in Sonos/Amperfy. Trades first-byte latency for a seek bar. Falls back + to the streaming pipe if it fails. +- **S2 / internet exposure (Phase 4):** only if Sonos units are S2. Put **bonob + only** behind HTTPS with a public DNS name; keep shim + Hum LAN-only. Set + `SHIM_ALLOW_PLAIN_PASSWORD=false` and use token auth. (Spec §6.) + +--- + +## 9. If you hand it back to Claude + +For any failure, paste: (a) which step, (b) the shim log lines for the failing +request, (c) for bonob issues, the bonob container logs. The shim maps Hum errors +into the response `message`, so a `status="failed"` envelope usually names the +cause. Known perf note: search is slow (pytubefix, ~6–60s) and Amperfy fires one +search per keystroke — that's Hum-side latency, not a shim bug. diff --git a/pyproject.toml b/pyproject.toml index d77b26a..c894f53 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,9 +41,10 @@ dev = [ [project.scripts] hum = "app.main:main" +hum-shim = "shim.main:main" [tool.setuptools.packages.find] -include = ["app*"] +include = ["app*", "shim*"] [tool.ruff] line-length = 100 diff --git a/shim/__init__.py b/shim/__init__.py new file mode 100644 index 0000000..4bb353e --- /dev/null +++ b/shim/__init__.py @@ -0,0 +1,6 @@ +"""hum-subsonic-shim — a Subsonic-compatible adapter in front of Hum. + +Sibling service to the Hum backend (spec: docs/SONOS_SPEC.md). bonob points +at this shim via BNB_SUBSONIC_URL; the shim translates the Subsonic subset +bonob calls into Hum API calls and remuxes/transcodes audio for Sonos. +""" diff --git a/shim/auth.py b/shim/auth.py new file mode 100644 index 0000000..80d258a --- /dev/null +++ b/shim/auth.py @@ -0,0 +1,70 @@ +"""Subsonic credential check: salted-MD5 token auth (spec §9.5). + +bonob sends `t` (MD5 hex of password + salt) and `s` (random salt) by +default. Plaintext `p` — optionally hex-encoded as `enc:` — is accepted +only when SHIM_ALLOW_PLAIN_PASSWORD is on (Amperfy dev mode). +""" +from __future__ import annotations + +import hashlib +import hmac + +from fastapi import Request + +from shim.config import ShimSettings, get_settings +from shim.subsonic import ( + MISSING_PARAMETER, + WRONG_CREDENTIALS, + SubsonicError, + set_response_format, +) + +_WRONG = "wrong username or password" + + +def _md5_hex(text: str) -> str: + # MD5 is mandated by the Subsonic token-auth scheme, not a security choice. + return hashlib.md5(text.encode(), usedforsecurity=False).hexdigest() + + +def check_credentials( + settings: ShimSettings, + u: str | None, + p: str | None, + t: str | None, + s: str | None, +) -> None: + if not u: + raise SubsonicError(MISSING_PARAMETER, "missing required parameter: u") + if not hmac.compare_digest(u.encode(), settings.subsonic_user.encode()): + raise SubsonicError(WRONG_CREDENTIALS, _WRONG) + + if t is not None and s is not None: + expected = _md5_hex(settings.subsonic_password + s) + if not hmac.compare_digest(t.lower().encode(), expected.encode()): + raise SubsonicError(WRONG_CREDENTIALS, _WRONG) + return + + if p is not None and settings.allow_plain_password: + password = p + if p.startswith("enc:"): + try: + password = bytes.fromhex(p[4:]).decode() + except (ValueError, UnicodeDecodeError) as e: + raise SubsonicError(WRONG_CREDENTIALS, _WRONG) from e + if not hmac.compare_digest(password.encode(), settings.subsonic_password.encode()): + raise SubsonicError(WRONG_CREDENTIALS, _WRONG) + return + + raise SubsonicError( + MISSING_PARAMETER, "token auth required: pass t=md5(password+salt) and s=salt" + ) + + +async def require_subsonic_auth(request: Request) -> None: + """FastAPI dependency: every /rest endpoint authenticates, ping included. + Also records the requested response format (runs before the handler, so + ok_response picks JSON vs XML correctly).""" + q = request.query_params + set_response_format(q.get("f")) + check_credentials(get_settings(), q.get("u"), q.get("p"), q.get("t"), q.get("s")) diff --git a/shim/config.py b/shim/config.py new file mode 100644 index 0000000..8f62573 --- /dev/null +++ b/shim/config.py @@ -0,0 +1,105 @@ +"""Shim settings loaded from environment variables (SHIM_* prefix).""" +from __future__ import annotations + +from functools import lru_cache +from pathlib import Path + +from pydantic import AliasChoices, Field, ValidationError +from pydantic_settings import BaseSettings, SettingsConfigDict + +# Anchor paths to the repo root (this file is /shim/config.py), not the +# current working directory — launching from a subdir must not lose settings. +_REPO_ROOT = Path(__file__).resolve().parents[1] +_ENV_FILE = str(_REPO_ROOT / ".env") +# Default location for shim-side runtime state (favourites). Gitignored. +DATA_DIR_DEFAULT = _REPO_ROOT / ".shim-data" + + +class ShimSettings(BaseSettings): + """Single source of truth for shim runtime configuration.""" + + model_config = SettingsConfigDict( + env_prefix="SHIM_", + env_file=_ENV_FILE, + case_sensitive=False, + extra="ignore", + populate_by_name=True, + ) + + # Upstream Hum — the bearer token lives here server-side and is never + # exposed to bonob/Sonos (spec §5). Falls back to Hum's own + # API_BEARER_TOKEN since the services share an .env in this repo. + hum_base_url: str = "http://127.0.0.1:8000" + hum_bearer_token: str = Field( + ..., + min_length=16, + validation_alias=AliasChoices("shim_hum_bearer_token", "api_bearer_token"), + ) + + # Subsonic credentials bonob/Amperfy authenticate with. bonob sends + # salted-MD5 token auth by default (spec §9.5); plaintext `p` is a + # dev-only fallback for Amperfy testing — keep it off in production. + subsonic_user: str = "hum" + subsonic_password: str = Field(..., min_length=8) + allow_plain_password: bool = False + + # Server + host: str = "127.0.0.1" + port: int = 8001 + debug: bool = False + # Base URL Sonos speakers use to fetch radio streams directly (must be + # LAN-reachable, e.g. http://192.168.1.10:8001). Empty → http://host:port, + # which only works if host isn't 127.0.0.1. + public_url: str = "" + + # Audio delivery (spec §4) + ffmpeg_path: str = "ffmpeg" + mp3_bitrate_kbps: int = 256 + + # Shim-side state (favourites). Empty → DATA_DIR_DEFAULT (/.shim-data). + data_dir: str = "" + + # Curated playlists for the Sonos "Playlist" shelf — comma-separated YouTube + # playlist IDs. Combined with starred (pl:) playlists; Hum can't enumerate. + pinned_playlists: str = "" + + # Seekable remux (spec §4 mode (b)): materialize the remuxed fMP4 to a + # cached temp file and serve it with Range support (gives Sonos a seek bar) + # at the cost of first-byte latency. Off by default — the streaming pipe + # (mode (a)) stays the default; flip on and validate during the Sonos phase. + seekable_remux: bool = False + temp_dir: str = "" # empty → DATA_DIR_DEFAULT/cache + temp_cache_mb: int = 512 + + # Details cache (spec §5): ttl = min(max_ttl, exp - now - safety) + details_cache_max_ttl_seconds: float = 1800.0 + details_cache_safety_seconds: float = 60.0 + + # Upstream HTTP + upstream_connect_timeout: float = 10.0 + upstream_read_timeout: float = 30.0 + + log_level: str = "INFO" + + def pinned_playlist_ids(self) -> list[str]: + return [p.strip() for p in self.pinned_playlists.split(",") if p.strip()] + + def public_base_url(self) -> str: + return (self.public_url or f"http://{self.host}:{self.port}").rstrip("/") + + +@lru_cache +def get_settings() -> ShimSettings: + try: + return ShimSettings() # type: ignore[call-arg] + except ValidationError as e: + missing = sorted( + str(err["loc"][0]).upper() for err in e.errors() if err["type"] == "missing" + ) + if missing: + names = ", ".join(n if n.startswith("SHIM_") else f"SHIM_{n}" for n in missing) + # A concise exit beats a pydantic traceback that echoes .env values. + raise SystemExit( + f"hum-subsonic-shim: missing required settings: {names} (see .env.example)" + ) from e + raise diff --git a/shim/hum_client.py b/shim/hum_client.py new file mode 100644 index 0000000..7a252cc --- /dev/null +++ b/shim/hum_client.py @@ -0,0 +1,254 @@ +"""HTTP adapter to the Hum backend — the only place the bearer token lives. + +Also owns the details cache (spec §5): /api/video/{id} is the expensive call +(cold pytubefix extraction takes seconds), so responses are cached with +ttl = min(max_ttl, exp − now − safety) derived from the signed URLs' `exp`, +and cover art is sourced without ever triggering an extraction (spec §3.5). +""" +from __future__ import annotations + +import contextlib +import time +from urllib.parse import parse_qs, urljoin, urlparse, urlunparse + +import httpx + +from shim.config import get_settings +from shim.models import HumPlaylistInfo, HumSearchHit, HumVideoDetails +from shim.subsonic import GENERIC, NOT_FOUND, SubsonicError + +# YouTube thumbnail hosts and the always-present size variants. We cap at +# hqdefault (480x360): sddefault/maxresdefault 404 for many uploads, and a 404 +# here would surface as "no cover art". default/mqdefault/hqdefault always exist. +_YTIMG_HOSTS = {"i.ytimg.com", "img.youtube.com", "i9.ytimg.com"} +_YTIMG_EXTS = (".jpg", ".jpeg", ".webp") + + +def _exp_param(url: str) -> float | None: + values = parse_qs(urlparse(url).query).get("exp") + if not values: + return None + try: + return float(values[0]) + except ValueError: + return None + + +def _ytimg_variant(url: str, size: int | None) -> str: + """Rewrite a YouTube thumbnail URL to the variant nearest `size` (spec §3.5 + cover-art sizing without decoding/resizing). Non-ytimg URLs pass through.""" + if size is None: + return url + parsed = urlparse(url) + if parsed.hostname not in _YTIMG_HOSTS: + return url + base, sep, name = parsed.path.rpartition("/") + if not sep or "." not in name or not name.lower().endswith(_YTIMG_EXTS): + return url + ext = name[name.rfind(".") :] + variant = "default" if size <= 120 else "mqdefault" if size <= 320 else "hqdefault" + return urlunparse(parsed._replace(path=f"{base}/{variant}{ext}")) + + +def _raise_for_hum_error(r: httpx.Response) -> None: + if r.status_code == 200: + return + code = "" + message = "" + with contextlib.suppress(ValueError, KeyError, TypeError): + body = r.json() + code = str(body.get("error", "")) + message = str(body.get("message", "")) + detail = message or f"HTTP {r.status_code}" + if code: + detail = f"{detail} ({code})" + # Client-side conditions (not found / unplayable / region-locked / live) + # → Subsonic 70; transient/upstream → generic 0. + if r.status_code in (403, 404, 410, 415, 422, 451): + raise SubsonicError(NOT_FOUND, detail) + raise SubsonicError(GENERIC, f"Hum upstream error {r.status_code}: {detail}") + + +class HumClient: + def __init__( + self, + base_url: str, + bearer_token: str, + *, + connect_timeout: float, + read_timeout: float, + cache_max_ttl: float, + cache_safety: float, + ) -> None: + timeout = httpx.Timeout(connect_timeout, read=read_timeout) + self._base_url = base_url.rstrip("/") + self._hum = httpx.AsyncClient( + base_url=self._base_url, + headers={"Authorization": f"Bearer {bearer_token}"}, + timeout=timeout, + ) + # External fetches (raw i.ytimg thumbnails) must not carry the Hum token. + self._ext = httpx.AsyncClient(timeout=timeout) + self._cache_max_ttl = cache_max_ttl + self._cache_safety = cache_safety + self._details: dict[str, tuple[HumVideoDetails, float]] = {} + # Keyed by full Subsonic id ("vid:" / "pl:") so cover art for + # both videos and playlists is served without triggering extraction. + self._art_urls: dict[str, str] = {} + + async def close(self) -> None: + await self._hum.aclose() + await self._ext.aclose() + + def absolute(self, signed_path: str) -> str: + """Resolve a Hum-relative signed URL against the Hum base.""" + return urljoin(self._base_url + "/", signed_path.lstrip("/")) + + async def _get( + self, client: httpx.AsyncClient, url: str, *, params: dict[str, str | int] | None = None + ) -> httpx.Response: + """GET that turns a transport failure (Hum down/unreachable) into a + Subsonic error envelope instead of letting httpx.RequestError escape as + a 500 — caught by the integration harness with Hum stopped.""" + try: + return await client.get(url, params=params) + except httpx.RequestError as e: + raise SubsonicError(GENERIC, f"Hum unreachable: {e.__class__.__name__}") from e + + # ----- search ------------------------------------------------------- + + async def search(self, q: str, limit: int) -> list[HumSearchHit]: + r = await self._get(self._hum, "/api/search", params={"q": q, "limit": limit}) + _raise_for_hum_error(r) + hits = [HumSearchHit.model_validate(item) for item in r.json()["items"]] + # Remember raw thumbnail URLs so getCoverArt never needs an extraction. + for hit in hits: + if not hit.thumbnail_url: + continue + if hit.kind == "video": + self._art_urls[f"vid:{hit.id}"] = hit.thumbnail_url + elif hit.kind == "playlist": + self._art_urls[f"pl:{hit.id}"] = hit.thumbnail_url + return hits + + # ----- radio (live music streams) ------------------------------------- + + async def radio(self, limit: int) -> list[HumSearchHit]: + r = await self._get(self._hum, "/api/radio", params={"limit": limit}) + _raise_for_hum_error(r) + return [HumSearchHit.model_validate(item) for item in r.json()["items"]] + + async def live_manifest_url(self, video_id: str) -> str: + """Absolute, signed HLS manifest URL for a live video — ffmpeg input + for the radio stream. Raises NOT_FOUND if the video isn't live.""" + r = await self._get(self._hum, f"/api/video/{video_id}") + _raise_for_hum_error(r) + details = HumVideoDetails.model_validate(r.json()) + if not details.is_live or not details.live_stream_url: + raise SubsonicError(NOT_FOUND, f"{video_id} is not a live stream") + return self.absolute(details.live_stream_url) + + # ----- playlist ------------------------------------------------------- + + async def playlist(self, playlist_id: str) -> HumPlaylistInfo: + r = await self._get(self._hum, f"/api/playlist/{playlist_id}") + _raise_for_hum_error(r) + info = HumPlaylistInfo.model_validate(r.json()) + # Remember item thumbnails so per-track getCoverArt stays extraction-free. + for item in info.items: + if item.thumbnail_url: + self._art_urls.setdefault(f"vid:{item.video_id}", item.thumbnail_url) + return info + + # ----- video details (cached) ---------------------------------------- + + async def video_details(self, video_id: str) -> HumVideoDetails: + now = time.time() + cached = self._details.get(video_id) + if cached and cached[1] > now: + return cached[0] + r = await self._get(self._hum, f"/api/video/{video_id}") + _raise_for_hum_error(r) + details = HumVideoDetails.model_validate(r.json()) + ttl = self._cache_ttl(details, now=now) + if ttl > 0: + self._details[video_id] = (details, now + ttl) + return details + + def _cache_ttl(self, details: HumVideoDetails, *, now: float) -> float: + """Eviction rule from spec §5: min(max_ttl, exp − now − safety).""" + exps = [e for f in details.audio_formats if (e := _exp_param(f.url)) is not None] + if not exps: + return 0.0 + return min(self._cache_max_ttl, min(exps) - now - self._cache_safety) + + # ----- cover art ------------------------------------------------------ + + async def fetch_art( + self, kind: str, value: str, size: int | None = None + ) -> tuple[bytes, str]: + """Fetch cover art bytes server-side (spec §3.5) — never an extraction. + + Video: cached signed Hum thumbnail → remembered search-hit thumbnail → + the predictable i.ytimg URL. Playlist: remembered playlist thumbnail → + the playlist's first item thumbnail (cheap /api/playlist call). `size` + selects a ytimg variant where applicable; signed Hum thumbnails are + served as-is (resizing them would mean decoding). + """ + if kind == "video": + url, client = self._video_art_source(value) + elif kind == "playlist": + url, client = await self._playlist_art_source(value) + else: + raise SubsonicError(NOT_FOUND, f"no cover art for {kind} ids") + r = await self._get(client, _ytimg_variant(url, size)) + if r.status_code != 200: + raise SubsonicError(NOT_FOUND, f"cover art unavailable for {kind}:{value}") + return r.content, r.headers.get("content-type", "image/jpeg") + + def _video_art_source(self, video_id: str) -> tuple[str, httpx.AsyncClient]: + now = time.time() + cached = self._details.get(video_id) + if cached and cached[1] > now and cached[0].thumbnail_url: + return self.absolute(cached[0].thumbnail_url), self._hum + remembered = self._art_urls.get(f"vid:{video_id}") + if remembered: + return remembered, self._ext + return f"https://i.ytimg.com/vi/{video_id}/hqdefault.jpg", self._ext + + async def _playlist_art_source(self, playlist_id: str) -> tuple[str, httpx.AsyncClient]: + remembered = self._art_urls.get(f"pl:{playlist_id}") + if remembered: + return remembered, self._ext + info = await self.playlist(playlist_id) + for item in info.items: + if item.thumbnail_url: + return item.thumbnail_url, self._ext + raise SubsonicError(NOT_FOUND, f"no cover art for playlist {playlist_id}") + + +# ----- module-level singleton (mirrors app/adapters/upstream_http.py) ------- + +_client: HumClient | None = None + + +def get_client() -> HumClient: + global _client + if _client is None: + s = get_settings() + _client = HumClient( + s.hum_base_url, + s.hum_bearer_token, + connect_timeout=s.upstream_connect_timeout, + read_timeout=s.upstream_read_timeout, + cache_max_ttl=s.details_cache_max_ttl_seconds, + cache_safety=s.details_cache_safety_seconds, + ) + return _client + + +async def close_client() -> None: + global _client + if _client is not None: + await _client.close() + _client = None diff --git a/shim/ids.py b/shim/ids.py new file mode 100644 index 0000000..d264188 --- /dev/null +++ b/shim/ids.py @@ -0,0 +1,55 @@ +"""Stable Subsonic↔YouTube ID scheme: vid: