From dae3803e7e2dec142693b070bcc06c968dcf7762 Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Fri, 1 May 2026 14:03:16 +0000 Subject: [PATCH 1/3] [minor] simplify v1 release surface --- .dockerignore | 1 - .github/workflows/{ci.yml => build-push.yaml} | 0 .github/workflows/github-release.yaml | 1 + Dockerfile | 5 + README.md | 6 + config.example.yaml | 66 +++---- deploy/compose/config.yaml | 17 +- docs/authorization.md | 11 +- docs/caching.md | 39 ++-- docs/configuration.md | 53 +++--- docs/conformance.md | 13 +- docs/deploys.md | 21 --- docs/formats.md | 12 +- docs/index.md | 8 +- docs/libvips.md | 2 +- go.mod | 46 +---- go.sum | 106 +---------- internal/cache/cache.go | 3 +- internal/cache/gcs.go | 106 ----------- internal/cache/gcs_test.go | 20 -- internal/config/bytesize.go | 75 ++++++++ internal/config/config.go | 68 +------ internal/config/config_test.go | 122 ++++-------- .../iiif/auth/v2/authorizer/authorizer.go | 31 --- internal/iiif/auth/v2/handler/handler.go | 176 ------------------ internal/iiif/auth/v2/handler/handler_test.go | 79 -------- internal/iiif/auth/v2/types/types.go | 25 --- .../presentation/v3/handler/handler_test.go | 31 ++- internal/iiif/presentation/v3/types/types.go | 4 +- internal/iiif/search/v2/handler/handler.go | 111 ----------- .../iiif/search/v2/handler/handler_test.go | 97 ---------- internal/iiif/search/v2/searcher/searcher.go | 31 --- internal/iiif/search/v2/types/types.go | 33 ---- internal/server/server.go | 72 ++----- internal/storage/gcs.go | 168 ----------------- internal/storage/gcs_test.go | 23 --- internal/storage/opener.go | 2 +- scripts/benchmark-iiif.sh | 6 +- 38 files changed, 282 insertions(+), 1408 deletions(-) rename .github/workflows/{ci.yml => build-push.yaml} (100%) delete mode 100644 internal/cache/gcs.go delete mode 100644 internal/cache/gcs_test.go create mode 100644 internal/config/bytesize.go delete mode 100644 internal/iiif/auth/v2/authorizer/authorizer.go delete mode 100644 internal/iiif/auth/v2/handler/handler.go delete mode 100644 internal/iiif/auth/v2/handler/handler_test.go delete mode 100644 internal/iiif/auth/v2/types/types.go delete mode 100644 internal/iiif/search/v2/handler/handler.go delete mode 100644 internal/iiif/search/v2/handler/handler_test.go delete mode 100644 internal/iiif/search/v2/searcher/searcher.go delete mode 100644 internal/iiif/search/v2/types/types.go delete mode 100644 internal/storage/gcs.go delete mode 100644 internal/storage/gcs_test.go diff --git a/.dockerignore b/.dockerignore index 0066bf5..7ec282b 100644 --- a/.dockerignore +++ b/.dockerignore @@ -6,7 +6,6 @@ results site deploy/compose/cache -deploy/compose/images deploy/compose/presentation deploy/compose/source-cache diff --git a/.github/workflows/ci.yml b/.github/workflows/build-push.yaml similarity index 100% rename from .github/workflows/ci.yml rename to .github/workflows/build-push.yaml diff --git a/.github/workflows/github-release.yaml b/.github/workflows/github-release.yaml index f489a79..b692f5d 100644 --- a/.github/workflows/github-release.yaml +++ b/.github/workflows/github-release.yaml @@ -12,6 +12,7 @@ jobs: if: github.event.pull_request.merged == true && !contains(github.event.pull_request.title, 'skip-release') uses: libops/actions/.github/workflows/bump-release.yaml@ef667db8c16533a257d841e75df5c3388152b2d7 # main with: + workflow_file: build-push.yaml prefix: v permissions: contents: write diff --git a/Dockerfile b/Dockerfile index 3ff2793..8c0c112 100644 --- a/Dockerfile +++ b/Dockerfile @@ -216,6 +216,11 @@ RUN rm -rf \ && groupadd --system triplet \ && useradd --system --gid triplet --uid 100 --home-dir /nonexistent --shell /usr/sbin/nologin triplet +WORKDIR /var/lib/triplet +RUN mkdir -p /var/lib/triplet/cache /var/lib/triplet/testdata/images \ + && chown -R triplet:triplet /var/lib/triplet +COPY --chown=triplet:triplet deploy/compose/images/ /var/lib/triplet/testdata/images/ + COPY --from=build /out/triplet /usr/local/bin/triplet COPY --from=build /out/triplet-healthcheck /usr/local/bin/triplet-healthcheck COPY config.example.yaml /etc/triplet/config.yaml diff --git a/README.md b/README.md index 07d4b62..53669ad 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,12 @@ All image processing is done by [libvips] through [govips]. docker run -p 8080:8080 ghcr.io/libops/triplet:main ``` +Then try the bundled sample image: + +```bash +curl http://localhost:8080/iiif/3/sample.png/info.json +``` + ## Documentation The project documentation lives at . diff --git a/config.example.yaml b/config.example.yaml index c2b3c40..db016d5 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -52,8 +52,8 @@ vips: - VipsForeignLoadPdf iiif: - # Optional shared CORS allowlist for IIIF Presentation, Search, Auth, and - # Image unless iiif.image.allowed_origins is set. Entries must be exact + # Optional shared CORS allowlist for IIIF Presentation and Image unless + # iiif.image.allowed_origins is set. Entries must be exact # origins (`https://viewer.example.edu`) or `*`. # When empty, no Access-Control-Allow-Origin header is emitted. # allowed_origins: @@ -77,10 +77,13 @@ iiif: max_source_pixels: 250000000 # Refuse or stop spooling encoded source files larger than this many bytes # when the source is not already available as a file path. 0 disables. - max_source_bytes: 1073741824 - # Refuse encoded derivatives larger than this many bytes after libvips - # export. 0 disables. - max_derivative_bytes: 536870912 + max_source_bytes: 1GiB + # Per-request encoded response limit. Refuse one generated derivative if it + # is larger than this many bytes after libvips export. This protects the + # server from returning or caching a single unexpectedly huge response. + # This is not the total cache size; see cache.max_bytes for the aggregate + # filesystem derivative-cache budget. 0 disables. + max_derivative_bytes: 512MiB # Bound concurrent libvips jobs across image derivatives and info probes. max_concurrent_transforms: 4 # Advertise additional transform limits in info.json so clients can avoid @@ -91,8 +94,14 @@ iiif: # Cantaloupe behavior. `normalize` converts to sRGB/gray. `none` skips # profile conversion and strips metadata where the output codec supports it. color_management: preserve - # `auto` uses random access for region crops and sequential access for - # full/resize requests. You can force sequential or random for profiling. + # How libvips should read source pixels from disk or spooled source files. + # `auto` is the production default: it uses random access for region crops + # and sequential access for full-image or resize requests. Sequential access + # streams forward and can reduce memory and I/O for whole-image reads, but it + # is a poor fit for tile/region workloads that need pixels from arbitrary + # offsets. Random access is better for crops and tiled viewers, but can do + # unnecessary work for simple full-image derivatives. Force `sequential` or + # `random` only when profiling a specific deployment or source format. load_access: auto # Cache info.json dimensions by identifier plus source mtime/size. info_dimension_cache: true @@ -119,20 +128,9 @@ iiif: # Bearer token. Prefer injecting this from the environment. write_enabled: false # write_token: ${TRIPLET_PRESENTATION_WRITE_TOKEN} - search: - # IIIF Content Search 2.0 surface. The default backend is a no-op that - # returns an empty AnnotationPage; indexing adapters are future work. - enabled: false - prefix: /search/v2 - auth: - # IIIF Authorization Flow API 2.0 surface. No production authorizer is - # built in yet; permit-all must be explicitly enabled for development. - enabled: false - prefix: /auth/v2 - development_permit_all: false # Identifier resolution. Exactly one source must be the default; additional -# sources are selected by identifier scheme (e.g. `https://…`, `gs://…`). +# sources are selected by identifier scheme (e.g. `https://…`). sources: default: file file: @@ -180,27 +178,23 @@ sources: # allowed_origins: [https://islandora-stage.lib.lehigh.edu] # allow_private_hosts: false # request_timeout: 2m - # max_bytes: 52428800 - # gcs: - # bucket_url: gs://my-bucket - # prefix: images + # max_bytes: 50MiB cache: - # Derivative cache. Configure either a filesystem root or a blob bucket URL. + # Derivative cache. Configure a filesystem root. root: /var/lib/triplet/cache - # bucket_url: gs://triplet-cache - # prefix: derivatives - # Best-effort eviction target for the file cache. 0 disables size-based - # eviction. - max_bytes: 1073741824 - # Optional source cache for fetched source bytes (primarily HTTP - # identifiers). Configure either a filesystem root or a blob bucket URL. + # Best-effort aggregate size target for all cached derivative payload files + # under cache.root. This controls retained cache footprint over time, not the + # size of any single generated response. A write may temporarily exceed this + # target before eviction runs, and metadata sidecar files are not counted. + # 0 disables size-based eviction. + max_bytes: 500GiB + # Optional filesystem source cache for fetched source bytes (primarily HTTP + # identifiers). # source_root: /var/lib/triplet/source-cache - # source_bucket_url: gs://triplet-source-cache - # source_prefix: sources # Best-effort eviction target for the source cache. 0 disables size-based # eviction. - source_max_bytes: 1073741824 + source_max_bytes: 1GiB # When non-zero, stale source-cache hits are served immediately while a # background refresh fetches a fresh copy for later requests. source_stale_after: 24h @@ -210,7 +204,7 @@ extensions: # → encoded derivative. Same pipeline as the spec routes. transform: enabled: true - max_upload_bytes: 52428800 # 50 MiB + max_upload_bytes: 50MiB # Non-spec endpoint: POST bytes → mints an opaque identifier resolvable # via the standard /iiif/3/{id}/... routes. uploads: diff --git a/deploy/compose/config.yaml b/deploy/compose/config.yaml index fb7ac40..83167b5 100644 --- a/deploy/compose/config.yaml +++ b/deploy/compose/config.yaml @@ -20,8 +20,8 @@ iiif: max_output_pixels: 100000000 allow_unsafe_unlimited_output_pixels: false max_source_pixels: 250000000 - max_source_bytes: 1073741824 - max_derivative_bytes: 536870912 + max_source_bytes: 1GiB + max_derivative_bytes: 512MiB max_concurrent_transforms: 4 color_management: preserve load_access: auto @@ -32,13 +32,6 @@ iiif: root: /var/lib/triplet/presentation write_enabled: ${TRIPLET_PRESENTATION_WRITE_ENABLED} write_token: "${TRIPLET_PRESENTATION_WRITE_TOKEN}" - search: - enabled: false - prefix: /search/v2 - auth: - enabled: false - prefix: /auth/v2 - development_permit_all: false sources: default: file @@ -52,13 +45,13 @@ sources: # allowed_origins: [https://images.example.org] # allow_private_hosts: false # request_timeout: 2m - # max_bytes: 52428800 + # max_bytes: 50MiB cache: root: /var/lib/triplet/cache - max_bytes: 1073741824 + max_bytes: 500GiB source_root: /var/lib/triplet/source-cache - source_max_bytes: 1073741824 + source_max_bytes: 1GiB source_stale_after: 24h extensions: diff --git a/docs/authorization.md b/docs/authorization.md index 44eb690..90e51cb 100644 --- a/docs/authorization.md +++ b/docs/authorization.md @@ -119,7 +119,7 @@ flowchart TD deriv -- No --> transform[Transform source] --> store[Store if cacheable] --> serve([Serve derivative]) ``` -## IIIF Authorization Flow terminology +## Source authorization terminology Triplet's local URL `auth_probe` is a server-side source authorization check. It is related to, but not the same thing as, an IIIF Authorization Flow API 2.0 @@ -137,12 +137,7 @@ Triplet's `auth_probe` uses the same idea internally: before serving a local fil or a cached derivative, Triplet asks the original source URL what status this request would receive. The source response remains authoritative. -The IIIF auth service declarations can inform viewers and Presentation API -responses, but they should not be treated as a standalone filesystem bypass -inside the Image API path. A single Image API request does not necessarily carry -the Manifest context that referenced it, manifests can be stale, and multiple -manifests can point at the same image service with different access stories. For -Triplet's local-file shortcut, the safe optimization is still based on the source +For Triplet's local-file shortcut, the safe optimization is based on the source authorization result: - If the source allows anonymous access, Triplet can cache that anonymous allow @@ -205,5 +200,5 @@ sources: - https://repository.example.edu allow_private_hosts: false request_timeout: 2m - max_bytes: 52428800 + max_bytes: 50MiB ``` diff --git a/docs/caching.md b/docs/caching.md index 0ea0460..536b6ba 100644 --- a/docs/caching.md +++ b/docs/caching.md @@ -7,24 +7,26 @@ authorization decision. ## Derivative cache -Configure either a filesystem root or a blob bucket URL for encoded IIIF image -responses. This is the main cache for public IIIF traffic: once a derivative is -generated, later requests for the same identifier, source version, region, size, -rotation, quality, and format can be served without running libvips again. - -GCS bucket configuration is implemented but has not yet been deployment-tested. +Configure a filesystem root for encoded IIIF image responses. This is the main +cache for public IIIF traffic: once a derivative is generated, later requests +for the same identifier, source version, region, size, rotation, quality, and +format can be served without running libvips again. ```yaml cache: root: /var/lib/triplet/cache - # bucket_url: gs://triplet-cache - # prefix: derivatives - max_bytes: 1073741824 + max_bytes: 500GiB ``` -`max_bytes` is a best-effort filesystem eviction target. It does not apply to -GCS-backed caches; use object lifecycle policy or bucket tooling there. Failed -transforms and HTTP error responses are not stored. +`max_bytes` is a best-effort filesystem eviction target. Failed transforms and +HTTP error responses are not stored. + +`cache.max_bytes` is the approximate total retained size of derivative payload +files under `cache.root`. It is different from +`iiif.image.max_derivative_bytes`, which limits one generated response before it +can be returned or cached. A cache write can temporarily exceed `cache.max_bytes` +before eviction runs, and metadata sidecar files are not counted toward the +target. ### Derivative invalidation @@ -61,15 +63,10 @@ fetch repeatedly, or slower than Triplet's local cache storage. It does not replace the HTTP source allowlist: cache fills still pass through the same host checks. -GCS-backed source cache configuration should be treated as untested until it has -been exercised in a real deployment. - ```yaml cache: source_root: /var/lib/triplet/source-cache - # source_bucket_url: gs://triplet-source-cache - # source_prefix: sources - source_max_bytes: 1073741824 + source_max_bytes: 1GiB source_stale_after: 24h ``` @@ -80,7 +77,7 @@ in the background. Upstream 4xx/5xx responses are not stored. Local URL mappings with `auth_probe: true` cache anonymous and credentialed source authorization decisions in process. See [Authorization](authorization.md) -for the full auth-probe flow, IIIF Authorization Flow terminology, and TTL +for the full auth-probe flow, source authorization terminology, and TTL behavior. ## In-process caches @@ -119,8 +116,8 @@ derivative and source caches. | Layer | Configuration | What is cached | Invalidation / freshness | |---|---|---|---| -| Derivative cache | `cache.root` or `cache.bucket_url`; optional `cache.max_bytes`, `cache.prefix`, `iiif.image.cache_invalidation_token` | Encoded IIIF image responses, keyed by identifier, source version, invalidation marker, region, size, rotation, quality, and format. | A changed source version produces a new key. The protected invalidation route bumps the per-identifier invalidation marker. Filesystem caches can evict best-effort by size; GCS/object lifecycle is external. Failed transforms and HTTP error responses are not stored. | -| HTTP source cache | `cache.source_root` or `cache.source_bucket_url`; optional `cache.source_max_bytes`, `cache.source_prefix`, `cache.source_stale_after` | Original source bytes fetched through the HTTP source backend. | Keys are source identifiers. When `source_stale_after` is set, stale hits are served immediately and refreshed in the background. Upstream 4xx/5xx responses are not stored. | +| Derivative cache | `cache.root`; optional `cache.max_bytes`, `iiif.image.cache_invalidation_token` | Encoded IIIF image responses, keyed by identifier, source version, invalidation marker, region, size, rotation, quality, and format. | A changed source version produces a new key. The protected invalidation route bumps the per-identifier invalidation marker. `cache.max_bytes` is a best-effort aggregate cache budget; `iiif.image.max_derivative_bytes` is the per-response size limit before return/cache. Failed transforms and HTTP error responses are not stored. | +| HTTP source cache | `cache.source_root`; optional `cache.source_max_bytes`, `cache.source_stale_after` | Original source bytes fetched through the HTTP source backend. | Keys are source identifiers. When `source_stale_after` is set, stale hits are served immediately and refreshed in the background. Upstream 4xx/5xx responses are not stored. | | `info.json` dimension cache | `iiif.image.info_dimension_cache` | Source dimensions used to build Image API `info.json`. | In-memory only. Entries are keyed by identifier plus source size/modtime metadata, so source changes with updated metadata miss the cache. | | Local URL auth-probe cache | `sources.file.url_mappings[].auth_*` | Authorization probe results for local URL mappings with `auth_probe: true`. Anonymous and credentialed probes are cached separately. See [Authorization](authorization.md). | In-memory only. Tier defaults are 5 minutes unless overridden by `auth_anonymous_cache_ttl`, `auth_authenticated_cache_ttl`, or `auth_cache_ttl`. The image cache invalidation route also clears matching auth-probe entries when the source backend supports it. | | libvips operation cache | `vips.cache_max_mem`, `vips.cache_max_files` | libvips in-process operation results. | Disabled by default in the example config. This is process-local and separate from Triplet's derivative/source caches. | diff --git a/docs/configuration.md b/docs/configuration.md index 857e857..268ffcc 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -10,6 +10,10 @@ server: public_base_url: "${TRIPLET_PUBLIC_BASE_URL}" ``` +Byte-size fields accept either raw bytes or unit strings such as `50MiB`, +`1GiB`, and `500GiB`. Binary units use powers of 1024 (`KiB`, `MiB`, `GiB`); +decimal units use powers of 1000 (`KB`, `MB`, `GB`). + ## Server The server section controls the HTTP listener and the public URL used to build @@ -41,8 +45,8 @@ metrics: ## IIIF services -The Image API is enabled by default. Presentation, Search, and Auth are -separate surfaces and can be enabled independently. +The Image API is enabled by default. Presentation is a separate surface and can +be enabled independently. ```yaml iiif: @@ -54,14 +58,12 @@ iiif: presentation: enabled: false prefix: /presentation/v3 - search: - enabled: false - prefix: /search/v2 - auth: - enabled: false - prefix: /auth/v2 ``` +When Presentation is enabled, Triplet exposes `ETag` through CORS so browser +annotation editors can read it and send optimistic-concurrency writes with +`If-Match`. + ## Image safety limits These limits bound libvips request work and protect public deployments from @@ -74,8 +76,8 @@ iiif: max_output_pixels: 100000000 allow_unsafe_unlimited_output_pixels: false max_source_pixels: 250000000 - max_source_bytes: 1073741824 - max_derivative_bytes: 536870912 + max_source_bytes: 1GiB + max_derivative_bytes: 512MiB ``` `max_output_pixels` is the decoded derivative size limit after the IIIF region, @@ -88,9 +90,12 @@ pixels for public HTTP deployments. `max_source_pixels` rejects sources whose decoded width multiplied by height is too large before Triplet transforms them. The default is 250,000,000 pixels. `max_source_bytes` applies while Triplet is reading an encoded source that is -not already available as a local file path. `max_derivative_bytes` applies after -export and prevents returning very large encoded derivatives. Both byte limits -default to the values shown above. +not already available as a local file path. + +`max_derivative_bytes` is a per-request encoded response limit. It applies after +libvips export and prevents returning or caching one unexpectedly large +derivative response. It is not the total derivative cache budget; `cache.max_bytes` +controls the aggregate filesystem cache footprint. ## Image processing @@ -118,8 +123,14 @@ profile or normalizes pixels before encoding the derivative: | `normalize` | Optimizes the embedded ICC profile, then converts supported non-sRGB color images to sRGB. Grayscale images remain grayscale. Exported derivatives strip metadata where the codec supports stripping. | Best for web-oriented delivery when predictable browser display is more important than retaining source profiles. It can change pixel values by design, adds conversion cost, and may remove metadata/profiles from derivatives. | | `none` | Does not convert color space and asks the encoder to strip metadata where supported. | Best when derivatives should avoid metadata/profile retention but you do not want Triplet to alter pixel values through color conversion. Non-sRGB images remain non-sRGB, so display still depends on client interpretation. | -`load_access: auto` uses random access for region crops and sequential access for -full-image or resize requests. +`load_access` controls how libvips reads pixels from disk or spooled source +files: + +| Value | Behavior | When to use | +|---|---|---| +| `auto` | Default. Uses random access for region crops and sequential access for full-image or resize requests. | Best production default for mixed IIIF viewer traffic. | +| `sequential` | Streams source pixels forward. | Useful for profiling whole-image derivatives or source formats where sequential reads are materially cheaper. Poor fit for tile-heavy crop workloads. | +| `random` | Allows libvips to seek around the source. | Useful for profiling tile and region workloads. Can do unnecessary work for simple full-image derivatives. | ## Advertised image limits @@ -145,8 +156,8 @@ Cache-related settings, including derivative caches, source caches, ## Source selection -Exactly one source is the default. Additional sources are selected by identifier -scheme, such as `https://...` or `gs://...`. +Exactly one source is the default. HTTP sources are selected by URL identifier +schemes such as `https://...`. ```yaml sources: @@ -158,11 +169,7 @@ sources: - https://repository.example.edu allow_private_hosts: false request_timeout: 2m - max_bytes: 52428800 - gcs: - # Implemented, but not deployment-tested yet. - bucket_url: gs://my-bucket - prefix: images + max_bytes: 50MiB ``` ## Local URL mappings @@ -209,7 +216,7 @@ sources: - https://repository.example.edu allow_private_hosts: false request_timeout: 2m - max_bytes: 52428800 + max_bytes: 50MiB ``` The HTTP host allowlist is a source-fetch boundary. See diff --git a/docs/conformance.md b/docs/conformance.md index 1b15ba8..d47314e 100644 --- a/docs/conformance.md +++ b/docs/conformance.md @@ -15,8 +15,7 @@ imported wire types where the server needs stable names or extension fields beyond the upstream schemas. Triplet also tracks extension support in code and tests. In particular, the -Presentation annotation path validates the IIIF Text Granularity extension, and -the Search 2.0 route exposes a default no-op Content Search surface. +Presentation annotation path validates the IIIF Text Granularity extension. The IIIF API surfaces are configured independently: @@ -31,11 +30,7 @@ iiif: root: ./testdata/presentation # dsn: scribe:scribe@tcp(mariadb:3306)/scribe?parseTime=true write_enabled: false - search: - enabled: false - prefix: /search/v2 - auth: - enabled: false - prefix: /auth/v2 - development_permit_all: false ``` + +Presentation annotation writes use strong ETags and require `If-Match`, and the +Presentation CORS policy exposes `ETag` for browser-based annotation editors. diff --git a/docs/deploys.md b/docs/deploys.md index ddf0e6e..a738ce3 100644 --- a/docs/deploys.md +++ b/docs/deploys.md @@ -9,27 +9,6 @@ server: public_base_url: "${TRIPLET_PUBLIC_BASE_URL}" ``` -GCS support is implemented as a storage/cache backend without leaking Google -APIs above the storage abstraction. This has not yet been deployed against GCS, -so treat the backend as untested until it has been exercised in a real -deployment. - -AWS/S3 is intentionally out of scope for this spike. - -```yaml -sources: - gcs: - # Implemented, but not deployment-tested yet. - bucket_url: gs://my-bucket - prefix: images - -cache: - bucket_url: gs://triplet-cache - prefix: derivatives - source_bucket_url: gs://triplet-source-cache - source_prefix: sources -``` - The runtime exposes Prometheus metrics at `/metrics` when `metrics.enabled` is true. diff --git a/docs/formats.md b/docs/formats.md index 3b77bc3..a46e1b3 100644 --- a/docs/formats.md +++ b/docs/formats.md @@ -12,16 +12,14 @@ iiif: prefix: /iiif/3 max_output_pixels: 100000000 max_source_pixels: 250000000 - max_source_bytes: 1073741824 - max_derivative_bytes: 536870912 + max_source_bytes: 1GiB + max_derivative_bytes: 512MiB color_management: preserve load_access: auto ``` Source backends determine where identifiers resolve from. A file source is the -default; HTTP and GCS sources can be added for URL and bucket-backed -identifiers. The GCS backend is implemented but has not yet been -deployment-tested. +default; HTTP sources can be added for URL identifiers. ```yaml sources: @@ -31,10 +29,6 @@ sources: http: allowed_origins: - https://repository.example.edu - gcs: - # Implemented, but not deployment-tested yet. - bucket_url: gs://my-bucket - prefix: images ``` | Format | Source / Input | Response / Output | Notes | diff --git a/docs/index.md b/docs/index.md index 01a601d..bdb1158 100644 --- a/docs/index.md +++ b/docs/index.md @@ -11,6 +11,12 @@ All image processing is done by [libvips] through [govips]. docker run -p 8080:8080 ghcr.io/libops/triplet:main ``` +Then try the bundled sample image: + +```bash +curl http://localhost:8080/iiif/3/sample.png/info.json +``` + Triplet needs a public base URL before generated IIIF identifiers are useful outside the container: @@ -36,7 +42,7 @@ iiif: - [Caching](caching.md) explains Triplet's cache layers and invalidation behavior. - [Format support](formats.md) lists source and response formats. - [libvips build](libvips.md) documents the runtime image feature surface. -- [Deploys](deploys.md) covers deployment notes and storage backends. +- [Deploys](deploys.md) covers deployment notes and Presentation storage migrations. - [Conformance](conformance.md) summarizes IIIF spec integration. - [Benchmarks](benchmarks.md) summarizes Triplet performance measurements. diff --git a/docs/libvips.md b/docs/libvips.md index 16c88eb..588ca12 100644 --- a/docs/libvips.md +++ b/docs/libvips.md @@ -25,7 +25,7 @@ iiif: image: max_output_pixels: 100000000 max_source_pixels: 250000000 - max_derivative_bytes: 536870912 + max_derivative_bytes: 512MiB max_concurrent_transforms: 4 color_management: preserve load_access: auto diff --git a/go.mod b/go.mod index 59ca951..ea21e71 100644 --- a/go.mod +++ b/go.mod @@ -3,69 +3,31 @@ module github.com/libops/triplet go 1.26.2 require ( - cloud.google.com/go/storage v1.62.1 github.com/davidbyttow/govips/v2 v2.18.0 github.com/go-sql-driver/mysql v1.10.0 github.com/libops/iiif-spec v0.1.0 github.com/prometheus/client_golang v1.23.2 - google.golang.org/api v0.277.0 gopkg.in/yaml.v3 v3.0.1 ) require ( - cel.dev/expr v0.25.1 // indirect - cloud.google.com/go v0.123.0 // indirect - cloud.google.com/go/auth v0.20.0 // indirect - cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect - cloud.google.com/go/compute/metadata v0.9.0 // indirect - cloud.google.com/go/iam v1.9.0 // indirect - cloud.google.com/go/monitoring v1.27.0 // indirect filippo.io/edwards25519 v1.2.0 // indirect - github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.32.0 // indirect - github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.56.0 // indirect - github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.56.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect - github.com/cncf/xds/go v0.0.0-20260202195803-dba9d589def2 // indirect - github.com/envoyproxy/go-control-plane/envoy v1.37.0 // indirect - github.com/envoyproxy/protoc-gen-validate v1.3.3 // indirect - github.com/felixge/httpsnoop v1.0.4 // indirect - github.com/go-jose/go-jose/v4 v4.1.4 // indirect - github.com/go-logr/logr v1.4.3 // indirect - github.com/go-logr/stdr v1.2.2 // indirect - github.com/google/s2a-go v0.1.9 // indirect - github.com/google/uuid v1.6.0 // indirect - github.com/googleapis/enterprise-certificate-proxy v0.3.15 // indirect - github.com/googleapis/gax-go/v2 v2.22.0 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/kr/text v0.2.0 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect - github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus/client_model v0.6.2 // indirect github.com/prometheus/common v0.67.5 // indirect github.com/prometheus/procfs v0.20.1 // indirect + github.com/rogpeppe/go-internal v1.14.1 // indirect github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 // indirect - github.com/spiffe/go-spiffe/v2 v2.6.0 // indirect - go.opentelemetry.io/auto/sdk v1.2.1 // indirect - go.opentelemetry.io/contrib/detectors/gcp v1.43.0 // indirect - go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.68.0 // indirect - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0 // indirect - go.opentelemetry.io/otel v1.43.0 // indirect - go.opentelemetry.io/otel/metric v1.43.0 // indirect - go.opentelemetry.io/otel/sdk v1.43.0 // indirect - go.opentelemetry.io/otel/sdk/metric v1.43.0 // indirect - go.opentelemetry.io/otel/trace v1.43.0 // indirect go.yaml.in/yaml/v2 v2.4.4 // indirect - golang.org/x/crypto v0.50.0 // indirect golang.org/x/image v0.39.0 // indirect golang.org/x/net v0.53.0 // indirect - golang.org/x/oauth2 v0.36.0 // indirect - golang.org/x/sync v0.20.0 // indirect golang.org/x/sys v0.43.0 // indirect golang.org/x/text v0.36.0 // indirect - golang.org/x/time v0.15.0 // indirect - google.golang.org/genproto v0.0.0-20260420184626-e10c466a9529 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20260420184626-e10c466a9529 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20260427160629-7cedc36a6bc4 // indirect - google.golang.org/grpc v1.80.0 // indirect google.golang.org/protobuf v1.36.11 // indirect ) diff --git a/go.sum b/go.sum index b98f4be..5543720 100644 --- a/go.sum +++ b/go.sum @@ -1,78 +1,18 @@ -cel.dev/expr v0.25.1 h1:1KrZg61W6TWSxuNZ37Xy49ps13NUovb66QLprthtwi4= -cel.dev/expr v0.25.1/go.mod h1:hrXvqGP6G6gyx8UAHSHJ5RGk//1Oj5nXQ2NI02Nrsg4= -cloud.google.com/go v0.123.0 h1:2NAUJwPR47q+E35uaJeYoNhuNEM9kM8SjgRgdeOJUSE= -cloud.google.com/go v0.123.0/go.mod h1:xBoMV08QcqUGuPW65Qfm1o9Y4zKZBpGS+7bImXLTAZU= -cloud.google.com/go/auth v0.20.0 h1:kXTssoVb4azsVDoUiF8KvxAqrsQcQtB53DcSgta74CA= -cloud.google.com/go/auth v0.20.0/go.mod h1:942/yi/itH1SsmpyrbnTMDgGfdy2BUqIKyd0cyYLc5Q= -cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc= -cloud.google.com/go/auth/oauth2adapt v0.2.8/go.mod h1:XQ9y31RkqZCcwJWNSx2Xvric3RrU88hAYYbjDWYDL+c= -cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs= -cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10= -cloud.google.com/go/iam v1.9.0 h1:89wyjxT6DL4b5rk/Nk8eBC9DHqf+JiMstrn5IEYxFw4= -cloud.google.com/go/iam v1.9.0/go.mod h1:KP+nKGugNJW4LcLx1uEZcq1ok5sQHFaQehQNl4QDgV4= -cloud.google.com/go/logging v1.16.0 h1:MMNgYRvZ/pEwiNSkcoJTKWfAbAJDqCqAMJiarZx+/CI= -cloud.google.com/go/logging v1.16.0/go.mod h1:ZGKnpBaURITh+g/uom2VhbiFoFWvejcrHPDhxFtU/gI= -cloud.google.com/go/longrunning v0.11.0 h1:fE4XVLJQj+gRnw1HrbDyQXXgC0aiqY3wxP7DDU4cWk0= -cloud.google.com/go/longrunning v0.11.0/go.mod h1:8nqFBPOO1U/XkhWl0I19AMZEphrHi73VNABIpKYaTwM= -cloud.google.com/go/monitoring v1.27.0 h1:BhYwMqao+e5Nn7JtWMM9m6zRtKtVUK6kJWMizXChkLU= -cloud.google.com/go/monitoring v1.27.0/go.mod h1:72NOVjJXHY/HBfoLT0+qlCZBT059+9VXLeAnL2PeeVM= -cloud.google.com/go/storage v1.62.1 h1:Os0G3XbUbjZumkpDUf2Y0rLoXJTCF1kU2kWUujKYXD8= -cloud.google.com/go/storage v1.62.1/go.mod h1:cpYz/kRVZ+UQAF1uHeea10/9ewcRbxGoGNKsS9daSXA= -cloud.google.com/go/trace v1.14.0 h1:jUtnmOrNcu5XJNk4Gz0fv+v5sM0weaOa3z5MPQUjRXs= -cloud.google.com/go/trace v1.14.0/go.mod h1:r+bdAn16dKLSV1G2D5v3e58IlQlizfxWrUfjx7kM7X0= filippo.io/edwards25519 v1.2.0 h1:crnVqOiS4jqYleHd9vaKZ+HKtHfllngJIiOpNpoJsjo= filippo.io/edwards25519 v1.2.0/go.mod h1:xzAOLCNug/yB62zG1bQ8uziwrIqIuxhctzJT18Q77mc= -github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.32.0 h1:rIkQfkCOVKc1OiRCNcSDD8ml5RJlZbH/Xsq7lbpynwc= -github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.32.0/go.mod h1:RD2SsorTmYhF6HkTmDw7KmPYQk8OBYwTkuasChwv7R4= -github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.56.0 h1:O2sXMyJh8b7devAGdE+163xtRurt0RVpB6DIzX5vGfg= -github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.56.0/go.mod h1:hEpiGU18xf70qb3jbTcIggWAiEfX/cOIVc2OTe4OegA= -github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.56.0 h1:ZIT85vKP7LBS84XJ0WdJ3dPOX3iz4j3c0+lpajGQMyo= -github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.56.0/go.mod h1:rqP9UEhOXv9WhQ7Gjz+G5y/pf8+BJZW5/Ts0AhE0PwE= -github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.56.0 h1:0YP0+/ixwu+Uqeu/FGiBZNQ19huiUxxiPXIc9WsLKuQ= -github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.56.0/go.mod h1:6ZZMQhZKDvUvkJw2rc+oDP90tMMzuU/J+5HG1ZmPOmE= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/cncf/xds/go v0.0.0-20260202195803-dba9d589def2 h1:aBangftG7EVZoUb69Os8IaYg++6uMOdKK83QtkkvJik= -github.com/cncf/xds/go v0.0.0-20260202195803-dba9d589def2/go.mod h1:qwXFYgsP6T7XnJtbKlf1HP8AjxZZyzxMmc+Lq5GjlU4= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dlclark/regexp2 v1.11.0 h1:G/nrcoOa7ZXlpoa/91N3X7mM3r8eIlMBBJZvsz/mxKI= github.com/dlclark/regexp2 v1.11.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= -github.com/envoyproxy/go-control-plane v0.14.0 h1:hbG2kr4RuFj222B6+7T83thSPqLjwBIfQawTkC++2HA= -github.com/envoyproxy/go-control-plane v0.14.0/go.mod h1:NcS5X47pLl/hfqxU70yPwL9ZMkUlwlKxtAohpi2wBEU= -github.com/envoyproxy/go-control-plane/envoy v1.37.0 h1:u3riX6BoYRfF4Dr7dwSOroNfdSbEPe9Yyl09/B6wBrQ= -github.com/envoyproxy/go-control-plane/envoy v1.37.0/go.mod h1:DReE9MMrmecPy+YvQOAOHNYMALuowAnbjjEMkkWOi6A= -github.com/envoyproxy/go-control-plane/ratelimit v0.1.0 h1:/G9QYbddjL25KvtKTv3an9lx6VBE2cnb8wp1vEGNYGI= -github.com/envoyproxy/go-control-plane/ratelimit v0.1.0/go.mod h1:Wk+tMFAFbCXaJPzVVHnPgRKdUdwW/KdbRt94AzgRee4= -github.com/envoyproxy/protoc-gen-validate v1.3.3 h1:MVQghNeW+LZcmXe7SY1V36Z+WFMDjpqGAGacLe2T0ds= -github.com/envoyproxy/protoc-gen-validate v1.3.3/go.mod h1:TsndJ/ngyIdQRhMcVVGDDHINPLWB7C82oDArY51KfB0= -github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= -github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/go-jose/go-jose/v4 v4.1.4 h1:moDMcTHmvE6Groj34emNPLs/qtYXRVcd6S7NHbHz3kA= -github.com/go-jose/go-jose/v4 v4.1.4/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08= -github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= -github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= -github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= -github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-sql-driver/mysql v1.10.0 h1:Q+1LV8DkHJvSYAdR83XzuhDaTykuDx0l6fkXxoWCWfw= github.com/go-sql-driver/mysql v1.10.0/go.mod h1:M+cqaI7+xxXGG9swrdeUIoPG3Y3KCkF0pZej+SK+nWk= -github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= -github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= -github.com/google/martian/v3 v3.3.3 h1:DIhPTQrbPkgs2yJYdXU/eNACCG5DVQjySNRNlflZ9Fc= -github.com/google/martian/v3 v3.3.3/go.mod h1:iEPrYcgCF7jA9OtScMFQyAlZZ4YXTKEtJ1E6RWzmBA0= -github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0= -github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM= -github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= -github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/googleapis/enterprise-certificate-proxy v0.3.15 h1:xolVQTEXusUcAA5UgtyRLjelpFFHWlPQ4XfWGc7MBas= -github.com/googleapis/enterprise-certificate-proxy v0.3.15/go.mod h1:vqVt9yG9480NtzREnTlmGSBmFrA+bzb0yl0TxoBQXOg= -github.com/googleapis/gax-go/v2 v2.22.0 h1:PjIWBpgGIVKGoCXuiCoP64altEJCj3/Ei+kSU5vlZD4= -github.com/googleapis/gax-go/v2 v2.22.0/go.mod h1:irWBbALSr0Sk3qlqb9SyJ1h68WjgeFuiOzI4Rqw5+aY= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= @@ -87,8 +27,6 @@ github.com/libops/iiif-spec v0.1.0 h1:qiEokzXE3F8ZFAarA0Q0Ai6dqtndzNRt2mBQj8EvQG github.com/libops/iiif-spec v0.1.0/go.mod h1:0HuD9gEQePZ6+z9SyH9TJoMRf9l1af02DyKQsMgQebU= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= -github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo= -github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= @@ -103,62 +41,20 @@ github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0t github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 h1:KRzFb2m7YtdldCEkzs6KqmJw4nqEVZGK7IN2kJkjTuQ= github.com/santhosh-tekuri/jsonschema/v6 v6.0.2/go.mod h1:JXeL+ps8p7/KNMjDQk3TCwPpBy0wYklyWTfbkIzdIFU= -github.com/spiffe/go-spiffe/v2 v2.6.0 h1:l+DolpxNWYgruGQVV0xsfeya3CsC7m8iBzDnMpsbLuo= -github.com/spiffe/go-spiffe/v2 v2.6.0/go.mod h1:gm2SeUoMZEtpnzPNs2Csc0D/gX33k1xIx7lEzqblHEs= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= -go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= -go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= -go.opentelemetry.io/contrib/detectors/gcp v1.43.0 h1:62yY3dT7/ShwOxzA0RsKRgshBmfElKI4d/Myu2OxDFU= -go.opentelemetry.io/contrib/detectors/gcp v1.43.0/go.mod h1:RyaZMFY7yi1kAs45S6mbFGz8O8rqB0dTY14uzvG4LCs= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.68.0 h1:0Qx7VGBacMm9ZENQ7TnNObTYI4ShC+lHI16seduaxZo= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.68.0/go.mod h1:Sje3i3MjSPKTSPvVWCaL8ugBzJwik3u4smCjUeuupqg= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0 h1:CqXxU8VOmDefoh0+ztfGaymYbhdB/tT3zs79QaZTNGY= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0/go.mod h1:BuhAPThV8PBHBvg8ZzZ/Ok3idOdhWIodywz2xEcRbJo= -go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I= -go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0= -go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.43.0 h1:TC+BewnDpeiAmcscXbGMfxkO+mwYUwE/VySwvw88PfA= -go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.43.0/go.mod h1:J/ZyF4vfPwsSr9xJSPyQ4LqtcTPULFR64KwTikGLe+A= -go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM= -go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY= -go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg= -go.opentelemetry.io/otel/sdk v1.43.0/go.mod h1:P+IkVU3iWukmiit/Yf9AWvpyRDlUeBaRg6Y+C58QHzg= -go.opentelemetry.io/otel/sdk/metric v1.43.0 h1:S88dyqXjJkuBNLeMcVPRFXpRw2fuwdvfCGLEo89fDkw= -go.opentelemetry.io/otel/sdk/metric v1.43.0/go.mod h1:C/RJtwSEJ5hzTiUz5pXF1kILHStzb9zFlIEe85bhj6A= -go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A= -go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.yaml.in/yaml/v2 v2.4.4 h1:tuyd0P+2Ont/d6e2rl3be67goVK4R6deVxCUX5vyPaQ= go.yaml.in/yaml/v2 v2.4.4/go.mod h1:gMZqIpDtDqOfM0uNfy0SkpRhvUryYH0Z6wdMYcacYXQ= -golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI= -golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q= golang.org/x/image v0.39.0 h1:skVYidAEVKgn8lZ602XO75asgXBgLj9G/FE3RbuPFww= golang.org/x/image v0.39.0/go.mod h1:sIbmppfU+xFLPIG0FoVUTvyBMmgng1/XAMhQ2ft0hpA= golang.org/x/net v0.53.0 h1:d+qAbo5L0orcWAr0a9JweQpjXF19LMXJE8Ey7hwOdUA= golang.org/x/net v0.53.0/go.mod h1:JvMuJH7rrdiCfbeHoo3fCQU24Lf5JJwT9W3sJFulfgs= -golang.org/x/oauth2 v0.36.0 h1:peZ/1z27fi9hUOFCAZaHyrpWG5lwe0RJEEEeH0ThlIs= -golang.org/x/oauth2 v0.36.0/go.mod h1:YDBUJMTkDnJS+A4BP4eZBjCqtokkg1hODuPjwiGPO7Q= -golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= -golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI= golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg= golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164= -golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U= -golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno= -gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4= -gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E= -google.golang.org/api v0.277.0 h1:HJfyJUiNeBBUMai7ez8u14wkp/gH/I4wpGbbO9o+cSk= -google.golang.org/api v0.277.0/go.mod h1:B9TqLBwJqVjp1mtt7WeoQwWRwvu/400y5lETOql+giQ= -google.golang.org/genproto v0.0.0-20260420184626-e10c466a9529 h1:QoMBg0moLIlB/eucPzc+ID5SgPZWuirtjAn3l8nW2Dg= -google.golang.org/genproto v0.0.0-20260420184626-e10c466a9529/go.mod h1:EjLmDZ8liSLBrCTK5vP+bGIxRQHE3ovGvOI0CzGk1PI= -google.golang.org/genproto/googleapis/api v0.0.0-20260420184626-e10c466a9529 h1:zUWMZsvo/IJcD1t6MNCPO/azZTwz0TvwCBqr5aifoVY= -google.golang.org/genproto/googleapis/api v0.0.0-20260420184626-e10c466a9529/go.mod h1:a5OGAgyRr4lqco7AG9hQM9Fwh0N2ZV4grR0eXFEsXQg= -google.golang.org/genproto/googleapis/rpc v0.0.0-20260427160629-7cedc36a6bc4 h1:tEkOQcXgF6dH1G+MVKZrfpYvozGrzb91k6ha7jireSM= -google.golang.org/genproto/googleapis/rpc v0.0.0-20260427160629-7cedc36a6bc4/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= -google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM= -google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4= google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/internal/cache/cache.go b/internal/cache/cache.go index 2186036..316940b 100644 --- a/internal/cache/cache.go +++ b/internal/cache/cache.go @@ -6,8 +6,7 @@ // - Derivative cache stores encoded IIIF responses keyed by // (identifier, region, size, rotation, quality, format). // -// Both layers use the same [Store] interface so backends (file, GCS) are -// interchangeable. +// Both layers use the same [Store] interface so backends are interchangeable. package cache import ( diff --git a/internal/cache/gcs.go b/internal/cache/gcs.go deleted file mode 100644 index 69884ba..0000000 --- a/internal/cache/gcs.go +++ /dev/null @@ -1,106 +0,0 @@ -package cache - -import ( - "context" - "fmt" - "io" - "net/url" - "strings" - - gcs "cloud.google.com/go/storage" - "google.golang.org/api/googleapi" -) - -// GCSStore stores cached bytes in a Google Cloud Storage bucket. -type GCSStore struct { - client *gcs.Client - bucket string - prefix string -} - -// NewGCSStore constructs a GCS-backed cache store. bucketURL must be gs://bucket. -func NewGCSStore(ctx context.Context, bucketURL, prefix string) (*GCSStore, error) { - bucket, err := bucketFromURL(bucketURL) - if err != nil { - return nil, err - } - client, err := gcs.NewClient(ctx) - if err != nil { - return nil, fmt.Errorf("cache gcs store: %w", err) - } - return &GCSStore{client: client, bucket: bucket, prefix: strings.Trim(prefix, "/")}, nil -} - -// Close releases resources held by the GCS client. -func (s *GCSStore) Close() error { - return s.client.Close() -} - -// Get implements Store. -func (s *GCSStore) Get(ctx context.Context, key string) (io.ReadCloser, Entry, error) { - obj := s.client.Bucket(s.bucket).Object(s.objectName(key)) - attrs, err := obj.Attrs(ctx) - if err != nil { - if isGCSNotFound(err) { - return nil, Entry{}, ErrMiss - } - return nil, Entry{}, err - } - r, err := obj.NewReader(ctx) - if err != nil { - if isGCSNotFound(err) { - return nil, Entry{}, ErrMiss - } - return nil, Entry{}, err - } - return r, Entry{ - ContentType: attrs.ContentType, - Size: attrs.Size, - StoredAt: attrs.Updated, - }, nil -} - -// Put implements Store. -func (s *GCSStore) Put(ctx context.Context, key, contentType string, value io.Reader) error { - w := s.client.Bucket(s.bucket).Object(s.objectName(key)).NewWriter(ctx) - w.ContentType = contentType - if _, err := io.Copy(w, value); err != nil { - _ = w.Close() - return err - } - return w.Close() -} - -// Delete implements Store. -func (s *GCSStore) Delete(ctx context.Context, key string) error { - err := s.client.Bucket(s.bucket).Object(s.objectName(key)).Delete(ctx) - if isGCSNotFound(err) { - return nil - } - return err -} - -func (s *GCSStore) objectName(key string) string { - if s.prefix == "" { - return key - } - return s.prefix + "/" + key -} - -func bucketFromURL(bucketURL string) (string, error) { - u, err := url.Parse(bucketURL) - if err != nil { - return "", fmt.Errorf("gcs bucket url: %w", err) - } - if u.Scheme != "gs" || u.Host == "" || u.Path != "" { - return "", fmt.Errorf("gcs bucket url: expected gs://bucket, got %q", bucketURL) - } - return u.Host, nil -} - -func isGCSNotFound(err error) bool { - if e, ok := err.(*googleapi.Error); ok && e.Code == 404 { - return true - } - return false -} diff --git a/internal/cache/gcs_test.go b/internal/cache/gcs_test.go deleted file mode 100644 index 07170e8..0000000 --- a/internal/cache/gcs_test.go +++ /dev/null @@ -1,20 +0,0 @@ -package cache - -import "testing" - -func TestGCSStoreObjectName(t *testing.T) { - st := &GCSStore{prefix: "derivatives"} - if got := st.objectName("abc"); got != "derivatives/abc" { - t.Fatalf("objectName = %q", got) - } -} - -func TestBucketFromURL(t *testing.T) { - got, err := bucketFromURL("gs://cache-bucket") - if err != nil { - t.Fatal(err) - } - if got != "cache-bucket" { - t.Fatalf("bucket = %q", got) - } -} diff --git a/internal/config/bytesize.go b/internal/config/bytesize.go new file mode 100644 index 0000000..e09884f --- /dev/null +++ b/internal/config/bytesize.go @@ -0,0 +1,75 @@ +package config + +import ( + "fmt" + "math" + "strconv" + "strings" + + "gopkg.in/yaml.v3" +) + +// ByteSize is an integer byte count that may be written as either raw bytes or +// a human-readable value such as 50MiB, 1GiB, or 500GB. +type ByteSize int64 + +func (s *ByteSize) UnmarshalYAML(node *yaml.Node) error { + if node.Kind != yaml.ScalarNode { + return fmt.Errorf("byte size must be a scalar") + } + n, err := parseByteSize(node.Value) + if err != nil { + return err + } + *s = ByteSize(n) + return nil +} + +func parseByteSize(raw string) (int64, error) { + value := strings.TrimSpace(raw) + if value == "" { + return 0, fmt.Errorf("byte size must not be empty") + } + i := 0 + if value[0] == '-' || value[0] == '+' { + i++ + } + startDigits := i + for i < len(value) && ((value[i] >= '0' && value[i] <= '9') || value[i] == '_') { + i++ + } + if i == startDigits { + return 0, fmt.Errorf("byte size %q must start with an integer", raw) + } + num := strings.ReplaceAll(value[:i], "_", "") + n, err := strconv.ParseInt(num, 10, 64) + if err != nil { + return 0, fmt.Errorf("byte size %q: %w", raw, err) + } + unit := strings.ToLower(strings.TrimSpace(value[i:])) + multiplier, ok := byteSizeMultipliers[unit] + if !ok { + return 0, fmt.Errorf("byte size %q uses unknown unit %q", raw, strings.TrimSpace(value[i:])) + } + if n > 0 && multiplier > 0 && n > math.MaxInt64/multiplier { + return 0, fmt.Errorf("byte size %q overflows int64", raw) + } + if n < 0 && multiplier > 0 && n < math.MinInt64/multiplier { + return 0, fmt.Errorf("byte size %q overflows int64", raw) + } + return n * multiplier, nil +} + +var byteSizeMultipliers = map[string]int64{ + "": 1, + "b": 1, + "kb": 1000, + "mb": 1000 * 1000, + "gb": 1000 * 1000 * 1000, + "tb": 1000 * 1000 * 1000 * 1000, + "pib": 1 << 50, + "tib": 1 << 40, + "gib": 1 << 30, + "mib": 1 << 20, + "kib": 1 << 10, +} diff --git a/internal/config/config.go b/internal/config/config.go index 23e30b5..6912236 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -78,8 +78,6 @@ type IIIF struct { AllowedOrigins []string `yaml:"allowed_origins"` Image Image `yaml:"image"` Presentation Presentation `yaml:"presentation"` - Search Search `yaml:"search"` - Auth Auth `yaml:"auth"` } // Image holds Image API 3.0 settings. @@ -92,8 +90,8 @@ type Image struct { MaxOutputPixels int64 `yaml:"max_output_pixels"` AllowUnsafeUnlimitedOutputPixels bool `yaml:"allow_unsafe_unlimited_output_pixels"` MaxSourcePixels int64 `yaml:"max_source_pixels"` - MaxSourceBytes int64 `yaml:"max_source_bytes"` - MaxDerivativeBytes int64 `yaml:"max_derivative_bytes"` + MaxSourceBytes ByteSize `yaml:"max_source_bytes"` + MaxDerivativeBytes ByteSize `yaml:"max_derivative_bytes"` MaxConcurrentTransforms int `yaml:"max_concurrent_transforms"` MaxWidth int `yaml:"max_width"` MaxHeight int `yaml:"max_height"` @@ -112,26 +110,12 @@ type Presentation struct { WriteToken string `yaml:"write_token"` } -// Search holds Content Search API 2.0 settings. -type Search struct { - Enabled bool `yaml:"enabled"` - Prefix string `yaml:"prefix"` -} - -// Auth holds Authorization Flow API 2.0 settings. -type Auth struct { - Enabled bool `yaml:"enabled"` - Prefix string `yaml:"prefix"` - DevelopmentPermitAll bool `yaml:"development_permit_all"` -} - // Sources declares identifier-resolution backends. Exactly one of the // declared sources must match Default. type Sources struct { Default string `yaml:"default"` File *FileSource `yaml:"file,omitempty"` HTTP *HTTPSource `yaml:"http,omitempty"` - GCS *GCSSource `yaml:"gcs,omitempty"` } // FileSource resolves identifiers as paths under Root. @@ -160,25 +144,15 @@ type HTTPSource struct { AllowedOrigins []string `yaml:"allowed_origins"` AllowPrivateHosts bool `yaml:"allow_private_hosts"` RequestTimeout time.Duration `yaml:"request_timeout"` - MaxBytes int64 `yaml:"max_bytes"` -} - -// GCSSource resolves identifiers as object keys in a GCS bucket. -type GCSSource struct { - BucketURL string `yaml:"bucket_url"` - Prefix string `yaml:"prefix"` + MaxBytes ByteSize `yaml:"max_bytes"` } // Cache declares optional derivative-cache settings. type Cache struct { Root string `yaml:"root"` - MaxBytes int64 `yaml:"max_bytes"` - BucketURL string `yaml:"bucket_url"` - Prefix string `yaml:"prefix"` + MaxBytes ByteSize `yaml:"max_bytes"` SourceRoot string `yaml:"source_root"` - SourceMaxBytes int64 `yaml:"source_max_bytes"` - SourceBucketURL string `yaml:"source_bucket_url"` - SourcePrefix string `yaml:"source_prefix"` + SourceMaxBytes ByteSize `yaml:"source_max_bytes"` SourceStaleAfter time.Duration `yaml:"source_stale_after"` } @@ -190,8 +164,8 @@ type Extensions struct { // Transform configures the POST /v1/transform endpoint. type Transform struct { - Enabled bool `yaml:"enabled"` - MaxUploadBytes int64 `yaml:"max_upload_bytes"` + Enabled bool `yaml:"enabled"` + MaxUploadBytes ByteSize `yaml:"max_upload_bytes"` } // Uploads configures the POST /v1/uploads endpoint. @@ -352,12 +326,6 @@ func (c *Config) applyDefaults() { if c.IIIF.Presentation.Prefix == "" { c.IIIF.Presentation.Prefix = "/presentation/v3" } - if c.IIIF.Search.Prefix == "" { - c.IIIF.Search.Prefix = "/search/v2" - } - if c.IIIF.Auth.Prefix == "" { - c.IIIF.Auth.Prefix = "/auth/v2" - } } func (c *Config) validate() error { @@ -429,15 +397,6 @@ func (c *Config) validate() error { if !strings.HasPrefix(c.IIIF.Presentation.Prefix, "/") { return fmt.Errorf("iiif.presentation.prefix: must start with `/`, got %q", c.IIIF.Presentation.Prefix) } - if !strings.HasPrefix(c.IIIF.Search.Prefix, "/") { - return fmt.Errorf("iiif.search.prefix: must start with `/`, got %q", c.IIIF.Search.Prefix) - } - if !strings.HasPrefix(c.IIIF.Auth.Prefix, "/") { - return fmt.Errorf("iiif.auth.prefix: must start with `/`, got %q", c.IIIF.Auth.Prefix) - } - if c.IIIF.Auth.Enabled && !c.IIIF.Auth.DevelopmentPermitAll { - return errors.New("iiif.auth.development_permit_all is required when iiif.auth.enabled = true") - } if c.IIIF.Image.MaxOutputPixels < 0 { return errors.New("iiif.image.max_output_pixels: must be >= 0") } @@ -531,9 +490,6 @@ func (c *Config) validate() error { } } } - if c.Sources.GCS != nil && c.Sources.GCS.BucketURL == "" { - return errors.New("sources.gcs.bucket_url is required when sources.gcs is configured") - } if c.IIIF.Image.Enabled { switch c.Sources.Default { case "": @@ -546,20 +502,10 @@ func (c *Config) validate() error { if c.Sources.HTTP == nil { return errors.New("sources.http is required when sources.default = http") } - case "gcs": - if c.Sources.GCS == nil || c.Sources.GCS.BucketURL == "" { - return errors.New("sources.gcs.bucket_url is required when sources.default = gcs") - } default: return fmt.Errorf("sources.default: %q not supported in this build", c.Sources.Default) } } - if c.Cache.Root != "" && c.Cache.BucketURL != "" { - return errors.New("cache.root and cache.bucket_url are mutually exclusive") - } - if c.Cache.SourceRoot != "" && c.Cache.SourceBucketURL != "" { - return errors.New("cache.source_root and cache.source_bucket_url are mutually exclusive") - } if c.IIIF.Presentation.Enabled && c.IIIF.Presentation.Root == "" && c.IIIF.Presentation.DSN == "" { return errors.New("iiif.presentation.root or iiif.presentation.dsn is required when iiif.presentation.enabled = true") } diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 1e71549..875e1b5 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -255,17 +255,6 @@ sources: `, wantErr: "sources.file.root is required when sources.file.url_prefixes is configured", }, - { - name: "gcs source valid", - body: ` -server: - public_base_url: http://localhost:8080 -sources: - default: gcs - gcs: - bucket_url: gs://example-bucket -`, - }, { name: "image allowed origins valid", body: ` @@ -450,51 +439,6 @@ iiif: root: /tmp `, }, - { - name: "auth enabled requires permit-all opt-in", - body: ` -server: - public_base_url: http://localhost:8080 -iiif: - auth: - enabled: true -sources: - default: file - file: - root: /tmp -`, - wantErr: "iiif.auth.development_permit_all is required", - }, - { - name: "auth enabled with development permit-all opt-in", - body: ` -server: - public_base_url: http://localhost:8080 -iiif: - auth: - enabled: true - development_permit_all: true -sources: - default: file - file: - root: /tmp -`, - }, - { - name: "cache root and bucket url conflict", - body: ` -server: - public_base_url: http://localhost:8080 -sources: - default: file - file: - root: /tmp -cache: - root: /tmp/cache - bucket_url: gs://cache-bucket -`, - wantErr: "cache.root and cache.bucket_url are mutually exclusive", - }, { name: "image allowed origins rejects empty entry", body: ` @@ -683,12 +627,6 @@ sources: if len(c.IIIF.AllowedOrigins) != 0 { t.Errorf("IIIF.AllowedOrigins default = %#v", c.IIIF.AllowedOrigins) } - if c.IIIF.Search.Prefix != "/search/v2" { - t.Errorf("Search.Prefix default = %q", c.IIIF.Search.Prefix) - } - if c.IIIF.Auth.Prefix != "/auth/v2" { - t.Errorf("Auth.Prefix default = %q", c.IIIF.Auth.Prefix) - } if c.Logging.Level != "info" { t.Errorf("Logging.Level default = %q", c.Logging.Level) } @@ -730,39 +668,47 @@ cache: } } -func TestLoadRejectsBadSearchPrefix(t *testing.T) { +func TestLoadParsesHumanReadableByteSizes(t *testing.T) { path := writeConfig(t, ` server: public_base_url: http://localhost:8080 iiif: - search: - prefix: search/v2 + image: + max_source_bytes: 1GiB + max_derivative_bytes: 512MiB sources: - default: file - file: - root: /tmp + default: http + http: + allowed_origins: [https://example.org] + max_bytes: 50MiB +cache: + max_bytes: 500GiB + source_max_bytes: 2GB +extensions: + transform: + max_upload_bytes: 25MiB `) - _, err := Load(path) - if err == nil || !strings.Contains(err.Error(), "iiif.search.prefix") { - t.Fatalf("err = %v, want iiif.search.prefix validation error", err) + c, err := Load(path) + if err != nil { + t.Fatalf("load: %v", err) } -} - -func TestLoadRejectsBadAuthPrefix(t *testing.T) { - path := writeConfig(t, ` -server: - public_base_url: http://localhost:8080 -iiif: - auth: - prefix: auth/v2 -sources: - default: file - file: - root: /tmp -`) - _, err := Load(path) - if err == nil || !strings.Contains(err.Error(), "iiif.auth.prefix") { - t.Fatalf("err = %v, want iiif.auth.prefix validation error", err) + if c.IIIF.Image.MaxSourceBytes != 1<<30 { + t.Errorf("MaxSourceBytes = %d", c.IIIF.Image.MaxSourceBytes) + } + if c.IIIF.Image.MaxDerivativeBytes != 512<<20 { + t.Errorf("MaxDerivativeBytes = %d", c.IIIF.Image.MaxDerivativeBytes) + } + if c.Sources.HTTP == nil || c.Sources.HTTP.MaxBytes != 50<<20 { + t.Errorf("HTTP.MaxBytes = %#v", c.Sources.HTTP) + } + if c.Cache.MaxBytes != 500<<30 { + t.Errorf("Cache.MaxBytes = %d", c.Cache.MaxBytes) + } + if c.Cache.SourceMaxBytes != 2_000_000_000 { + t.Errorf("Cache.SourceMaxBytes = %d", c.Cache.SourceMaxBytes) + } + if c.Extensions.Transform.MaxUploadBytes != 25<<20 { + t.Errorf("Transform.MaxUploadBytes = %d", c.Extensions.Transform.MaxUploadBytes) } } diff --git a/internal/iiif/auth/v2/authorizer/authorizer.go b/internal/iiif/auth/v2/authorizer/authorizer.go deleted file mode 100644 index 8669b71..0000000 --- a/internal/iiif/auth/v2/authorizer/authorizer.go +++ /dev/null @@ -1,31 +0,0 @@ -package authorizer - -import ( - "context" - "net/http" -) - -type Request struct { - ItemID string - Token string -} - -type Authorizer interface { - Probe(ctx context.Context, req Request) (int, error) - Token(ctx context.Context, itemID string, r *http.Request) (string, int, error) - Logout(ctx context.Context, itemID string, r *http.Request) error -} - -type PermitAll struct{} - -func (PermitAll) Probe(context.Context, Request) (int, error) { - return http.StatusOK, nil -} - -func (PermitAll) Token(context.Context, string, *http.Request) (string, int, error) { - return "", 0, nil -} - -func (PermitAll) Logout(context.Context, string, *http.Request) error { - return nil -} diff --git a/internal/iiif/auth/v2/handler/handler.go b/internal/iiif/auth/v2/handler/handler.go deleted file mode 100644 index e443e50..0000000 --- a/internal/iiif/auth/v2/handler/handler.go +++ /dev/null @@ -1,176 +0,0 @@ -package handler - -import ( - "encoding/json" - "html" - "log/slog" - "net/http" - "net/url" - "strings" - - "github.com/libops/triplet/internal/cors" - "github.com/libops/triplet/internal/iiif/auth/v2/authorizer" - "github.com/libops/triplet/internal/iiif/auth/v2/types" - "github.com/libops/triplet/internal/redact" -) - -type Handler struct { - prefix string - publicBaseURL string - authz authorizer.Authorizer - cors cors.Policy - logger *slog.Logger -} - -func New(prefix, publicBaseURL string, authz authorizer.Authorizer, corsPolicy cors.Policy, logger *slog.Logger) *Handler { - return &Handler{ - prefix: strings.TrimRight(prefix, "/"), - publicBaseURL: strings.TrimRight(publicBaseURL, "/"), - authz: authz, - cors: corsPolicy, - logger: logger, - } -} - -func (h *Handler) Register(mux *http.ServeMux) { - mux.Handle(h.prefix+"/", h) -} - -func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { - if r.Method == http.MethodOptions { - h.writeCORS(w, r) - w.Header().Set("Access-Control-Allow-Methods", "GET, HEAD, POST, OPTIONS") - w.Header().Set("Access-Control-Allow-Headers", "Authorization, Content-Type") - w.WriteHeader(http.StatusNoContent) - return - } - rest := strings.Trim(strings.TrimPrefix(r.URL.Path, h.prefix), "/") - parts := strings.Split(rest, "/") - if len(parts) != 2 { - h.writeError(w, r, http.StatusNotFound, "not found") - return - } - itemID, err := url.PathUnescape(parts[0]) - if err != nil || !validRequestID(itemID) { - h.writeError(w, r, http.StatusBadRequest, "invalid item id") - return - } - switch parts[1] { - case "probe": - h.probe(w, r, itemID) - case "access": - h.access(w, r, itemID) - case "token": - h.token(w, r, itemID) - case "logout": - h.logout(w, r, itemID) - default: - h.writeError(w, r, http.StatusNotFound, "not found") - } -} - -func (h *Handler) probe(w http.ResponseWriter, r *http.Request, itemID string) { - if r.Method != http.MethodGet && r.Method != http.MethodHead { - h.writeError(w, r, http.StatusMethodNotAllowed, "method not allowed") - return - } - status, err := h.authz.Probe(r.Context(), authorizer.Request{ItemID: itemID, Token: bearerToken(r)}) - if err != nil { - h.logger.Error("auth probe", "item_id", redact.Identifier(itemID), "item_id_hash", redact.Hash(itemID), "err", err) - h.writeError(w, r, http.StatusInternalServerError, "probe failed") - return - } - h.writeJSONHeaders(w, r) - w.WriteHeader(http.StatusOK) - if r.Method == http.MethodHead { - return - } - _ = json.NewEncoder(w).Encode(types.ProbeResult{ - Context: types.ContextAuth2, - Type: types.TypeProbeResult, - Status: status, - }) -} - -func (h *Handler) access(w http.ResponseWriter, r *http.Request, itemID string) { - if r.Method != http.MethodGet && r.Method != http.MethodHead { - h.writeError(w, r, http.StatusMethodNotAllowed, "method not allowed") - return - } - h.writeCORS(w, r) - w.Header().Set("Content-Type", "text/html; charset=utf-8") - w.WriteHeader(http.StatusOK) - if r.Method == http.MethodHead { - return - } - _, _ = w.Write([]byte(`Access granted

Access granted for ` + html.EscapeString(itemID) + `.

`)) -} - -func (h *Handler) token(w http.ResponseWriter, r *http.Request, itemID string) { - if r.Method != http.MethodGet && r.Method != http.MethodPost && r.Method != http.MethodHead { - h.writeError(w, r, http.StatusMethodNotAllowed, "method not allowed") - return - } - token, expiresIn, err := h.authz.Token(r.Context(), itemID, r) - if err != nil { - h.logger.Error("auth token", "item_id", redact.Identifier(itemID), "item_id_hash", redact.Hash(itemID), "err", err) - h.writeError(w, r, http.StatusInternalServerError, "token failed") - return - } - h.writeJSONHeaders(w, r) - w.WriteHeader(http.StatusOK) - if r.Method == http.MethodHead { - return - } - _ = json.NewEncoder(w).Encode(types.TokenResult{ - Context: types.ContextAuth2, - AccessToken: token, - ExpiresIn: expiresIn, - }) -} - -func (h *Handler) logout(w http.ResponseWriter, r *http.Request, itemID string) { - if r.Method != http.MethodGet && r.Method != http.MethodPost && r.Method != http.MethodHead { - h.writeError(w, r, http.StatusMethodNotAllowed, "method not allowed") - return - } - if err := h.authz.Logout(r.Context(), itemID, r); err != nil { - h.logger.Error("auth logout", "item_id", redact.Identifier(itemID), "item_id_hash", redact.Hash(itemID), "err", err) - h.writeError(w, r, http.StatusInternalServerError, "logout failed") - return - } - h.writeCORS(w, r) - w.WriteHeader(http.StatusNoContent) -} - -func bearerToken(r *http.Request) string { - auth := r.Header.Get("Authorization") - if !strings.HasPrefix(strings.ToLower(auth), "bearer ") { - return "" - } - return strings.TrimSpace(auth[len("Bearer "):]) -} - -func validRequestID(id string) bool { - return id != "" && len(id) <= 255 && !strings.ContainsAny(id, "\x00\n\r") -} - -func (h *Handler) writeJSONHeaders(w http.ResponseWriter, r *http.Request) { - h.writeCORS(w, r) - w.Header().Set("Content-Type", `application/ld+json;profile="http://iiif.io/api/auth/2/context.json"`) -} - -func (h *Handler) writeCORS(w http.ResponseWriter, r *http.Request) { - h.cors.SetHeaders(w, r) -} - -func (h *Handler) writeError(w http.ResponseWriter, r *http.Request, status int, msg string) { - h.writeCORS(w, r) - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(status) - _ = json.NewEncoder(w).Encode(errorResponse{Error: msg}) -} - -type errorResponse struct { - Error string `json:"error"` -} diff --git a/internal/iiif/auth/v2/handler/handler_test.go b/internal/iiif/auth/v2/handler/handler_test.go deleted file mode 100644 index a87b123..0000000 --- a/internal/iiif/auth/v2/handler/handler_test.go +++ /dev/null @@ -1,79 +0,0 @@ -package handler - -import ( - "encoding/json" - "io" - "log/slog" - "net/http" - "net/http/httptest" - "strings" - "testing" - - "github.com/libops/triplet/internal/cors" - "github.com/libops/triplet/internal/iiif/auth/v2/authorizer" - "github.com/libops/triplet/internal/iiif/auth/v2/types" -) - -func setupAuthServer() *httptest.Server { - logger := slog.New(slog.NewTextHandler(io.Discard, nil)) - h := New("/auth/v2", "http://example.test", authorizer.PermitAll{}, cors.New(nil, ""), logger) - mux := http.NewServeMux() - h.Register(mux) - return httptest.NewServer(mux) -} - -func TestProbePermitAll(t *testing.T) { - srv := setupAuthServer() - defer srv.Close() - - resp, err := http.Get(srv.URL + "/auth/v2/item-1/probe") - if err != nil { - t.Fatal(err) - } - defer resp.Body.Close() - if resp.StatusCode != http.StatusOK { - t.Fatalf("status = %d", resp.StatusCode) - } - var got types.ProbeResult - if err := json.NewDecoder(resp.Body).Decode(&got); err != nil { - t.Fatal(err) - } - if got.Context != types.ContextAuth2 || got.Type != types.TypeProbeResult || got.Status != http.StatusOK { - t.Fatalf("probe = %#v", got) - } -} - -func TestTokenPermitAll(t *testing.T) { - srv := setupAuthServer() - defer srv.Close() - - resp, err := http.Get(srv.URL + "/auth/v2/item-1/token") - if err != nil { - t.Fatal(err) - } - defer resp.Body.Close() - if resp.StatusCode != http.StatusOK { - t.Fatalf("status = %d", resp.StatusCode) - } - var got types.TokenResult - if err := json.NewDecoder(resp.Body).Decode(&got); err != nil { - t.Fatal(err) - } - if got.Context != types.ContextAuth2 { - t.Fatalf("token = %#v", got) - } -} - -func TestRejectsOverlongItemID(t *testing.T) { - srv := setupAuthServer() - defer srv.Close() - - resp, err := http.Get(srv.URL + "/auth/v2/" + strings.Repeat("a", 256) + "/probe") - if err != nil { - t.Fatal(err) - } - defer resp.Body.Close() - if resp.StatusCode != http.StatusBadRequest { - t.Fatalf("status = %d", resp.StatusCode) - } -} diff --git a/internal/iiif/auth/v2/types/types.go b/internal/iiif/auth/v2/types/types.go deleted file mode 100644 index 8ce02a2..0000000 --- a/internal/iiif/auth/v2/types/types.go +++ /dev/null @@ -1,25 +0,0 @@ -package types - -const ( - ContextAuth2 = "http://iiif.io/api/auth/2/context.json" - - TypeProbeResult = "AuthProbeResult2" - TypeProbeService = "AuthProbeService2" - TypeAccessService = "AuthAccessService2" - TypeAccessTokenService = "AuthAccessTokenService2" - TypeLogoutService = "AuthLogoutService2" -) - -type ProbeResult struct { - Context string `json:"@context"` - Type string `json:"type"` - Status int `json:"status"` -} - -type TokenResult struct { - Context string `json:"@context,omitempty"` - AccessToken string `json:"accessToken,omitempty"` - ExpiresIn int `json:"expiresIn,omitempty"` - MessageId string `json:"messageId,omitempty"` - Error string `json:"error,omitempty"` -} diff --git a/internal/iiif/presentation/v3/handler/handler_test.go b/internal/iiif/presentation/v3/handler/handler_test.go index 314d1fb..e1ecaa5 100644 --- a/internal/iiif/presentation/v3/handler/handler_test.go +++ b/internal/iiif/presentation/v3/handler/handler_test.go @@ -21,6 +21,11 @@ func setupTestServer(t *testing.T) *httptest.Server { } func setupTestServerWithWrites(t *testing.T, writeEnabled bool, writeToken string) *httptest.Server { + t.Helper() + return setupTestServerWithWritesAndCORS(t, writeEnabled, writeToken, nil) +} + +func setupTestServerWithWritesAndCORS(t *testing.T, writeEnabled bool, writeToken string, allowedOrigins []string) *httptest.Server { t.Helper() root := t.TempDir() itemDir := filepath.Join(root, "item-1") @@ -44,7 +49,7 @@ func setupTestServerWithWrites(t *testing.T, writeEnabled bool, writeToken strin t.Fatal(err) } logger := slog.New(slog.NewTextHandler(io.Discard, nil)) - h := New("/presentation/v3", st, cors.New(nil, ""), writeEnabled, writeToken, logger) + h := New("/presentation/v3", st, cors.New(allowedOrigins, "ETag"), writeEnabled, writeToken, logger) mux := http.NewServeMux() h.Register(mux) return httptest.NewServer(mux) @@ -124,6 +129,30 @@ func TestAnnotationPageHead(t *testing.T) { } } +func TestAnnotationPageCORSExposesETag(t *testing.T) { + srv := setupTestServerWithWritesAndCORS(t, true, "test-token", []string{"https://editor.example.edu"}) + defer srv.Close() + req, err := http.NewRequest(http.MethodGet, srv.URL+"/presentation/v3/item-1/canvas/canvas-1/annotations", nil) + if err != nil { + t.Fatal(err) + } + req.Header.Set("Origin", "https://editor.example.edu") + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatal(err) + } + defer resp.Body.Close() + if got := resp.Header.Get("Access-Control-Allow-Origin"); got != "https://editor.example.edu" { + t.Fatalf("Access-Control-Allow-Origin = %q", got) + } + if got := resp.Header.Get("Access-Control-Expose-Headers"); got != "ETag" { + t.Fatalf("Access-Control-Expose-Headers = %q", got) + } + if got := resp.Header.Get("ETag"); got == "" { + t.Fatal("missing ETag") + } +} + func TestAnnotationPagePut(t *testing.T) { srv := setupTestServerWithWrites(t, true, "test-token") defer srv.Close() diff --git a/internal/iiif/presentation/v3/types/types.go b/internal/iiif/presentation/v3/types/types.go index 83d606a..b156519 100644 --- a/internal/iiif/presentation/v3/types/types.go +++ b/internal/iiif/presentation/v3/types/types.go @@ -102,8 +102,8 @@ type Resource struct { type Collection = Resource type Range = Resource -// Service is intentionally open because IIIF services span Image, Search, -// Auth, and extension APIs. +// Service is intentionally open because IIIF services span Image and extension +// APIs. type Service struct { ID string `json:"id,omitempty"` Type string `json:"type"` diff --git a/internal/iiif/search/v2/handler/handler.go b/internal/iiif/search/v2/handler/handler.go deleted file mode 100644 index 046406a..0000000 --- a/internal/iiif/search/v2/handler/handler.go +++ /dev/null @@ -1,111 +0,0 @@ -package handler - -import ( - "encoding/json" - "log/slog" - "net/http" - "net/url" - "strings" - - "github.com/libops/triplet/internal/cors" - "github.com/libops/triplet/internal/iiif/search/v2/searcher" -) - -// Handler serves the IIIF Content Search API 2.0 surface. -type Handler struct { - prefix string - publicBaseURL string - searcher searcher.Searcher - cors cors.Policy - logger *slog.Logger -} - -func New(prefix, publicBaseURL string, s searcher.Searcher, corsPolicy cors.Policy, logger *slog.Logger) *Handler { - return &Handler{ - prefix: strings.TrimRight(prefix, "/"), - publicBaseURL: strings.TrimRight(publicBaseURL, "/"), - searcher: s, - cors: corsPolicy, - logger: logger, - } -} - -func (h *Handler) Register(mux *http.ServeMux) { - mux.Handle(h.prefix+"/", h) -} - -func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { - switch r.Method { - case http.MethodGet, http.MethodHead, http.MethodOptions: - default: - h.writeError(w, r, http.StatusMethodNotAllowed, "method not allowed") - return - } - if r.Method == http.MethodOptions { - h.writeCORS(w, r) - w.Header().Set("Access-Control-Allow-Methods", "GET, HEAD, OPTIONS") - w.Header().Set("Access-Control-Allow-Headers", "Content-Type") - w.WriteHeader(http.StatusNoContent) - return - } - - rest := strings.TrimPrefix(r.URL.Path, h.prefix) - rest = strings.Trim(rest, "/") - parts := strings.Split(rest, "/") - if len(parts) != 2 || parts[1] != "search" { - h.writeError(w, r, http.StatusNotFound, "not found") - return - } - itemID, err := url.PathUnescape(parts[0]) - if err != nil || itemID == "" { - h.writeError(w, r, http.StatusBadRequest, "invalid item id") - return - } - query := strings.TrimSpace(r.URL.Query().Get("q")) - if query == "" { - h.writeError(w, r, http.StatusBadRequest, "missing q") - return - } - - page, err := h.searcher.Search(r.Context(), searcher.Request{ - ItemID: itemID, - Query: query, - }) - if err != nil { - h.logger.Error("search", "item_id", itemID, "err", err) - h.writeError(w, r, http.StatusInternalServerError, "search failed") - return - } - if page.ID == "" { - page.ID = h.publicBaseURL + r.URL.RequestURI() - } - - h.writeDocumentHeaders(w, r) - w.WriteHeader(http.StatusOK) - if r.Method == http.MethodHead { - return - } - if err := json.NewEncoder(w).Encode(page); err != nil { - h.logger.Warn("write search response", "item_id", itemID, "err", err) - } -} - -func (h *Handler) writeDocumentHeaders(w http.ResponseWriter, r *http.Request) { - h.writeCORS(w, r) - w.Header().Set("Content-Type", `application/ld+json;profile="http://iiif.io/api/search/2/context.json"`) -} - -func (h *Handler) writeCORS(w http.ResponseWriter, r *http.Request) { - h.cors.SetHeaders(w, r) -} - -func (h *Handler) writeError(w http.ResponseWriter, r *http.Request, status int, msg string) { - h.writeCORS(w, r) - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(status) - _ = json.NewEncoder(w).Encode(errorResponse{Error: msg}) -} - -type errorResponse struct { - Error string `json:"error"` -} diff --git a/internal/iiif/search/v2/handler/handler_test.go b/internal/iiif/search/v2/handler/handler_test.go deleted file mode 100644 index 9e8da5c..0000000 --- a/internal/iiif/search/v2/handler/handler_test.go +++ /dev/null @@ -1,97 +0,0 @@ -package handler - -import ( - "encoding/json" - "io" - "log/slog" - "net/http" - "net/http/httptest" - "testing" - - "github.com/libops/triplet/internal/cors" - "github.com/libops/triplet/internal/iiif/search/v2/searcher" - "github.com/libops/triplet/internal/iiif/search/v2/types" -) - -func setupTestServer() *httptest.Server { - logger := slog.New(slog.NewTextHandler(io.Discard, nil)) - h := New("/search/v2", "http://example.test", searcher.Noop{}, cors.New(nil, ""), logger) - mux := http.NewServeMux() - h.Register(mux) - return httptest.NewServer(mux) -} - -func TestSearchNoop(t *testing.T) { - srv := setupTestServer() - defer srv.Close() - - resp, err := http.Get(srv.URL + "/search/v2/item-1/search?q=needle") - if err != nil { - t.Fatal(err) - } - defer resp.Body.Close() - if resp.StatusCode != http.StatusOK { - t.Fatalf("status = %d", resp.StatusCode) - } - if got := resp.Header.Get("Access-Control-Allow-Origin"); got != "" { - t.Fatalf("CORS = %q", got) - } - if got := resp.Header.Get("Content-Type"); got == "" { - t.Fatal("missing content-type") - } - var page types.AnnotationPage - if err := json.NewDecoder(resp.Body).Decode(&page); err != nil { - t.Fatal(err) - } - if page.Context != types.ContextSearch2 { - t.Fatalf("context = %#v", page.Context) - } - if page.Type != types.TypeAnnotationPage { - t.Fatalf("type = %q", page.Type) - } - if page.ID != "http://example.test/search/v2/item-1/search?q=needle" { - t.Fatalf("id = %q", page.ID) - } - if len(page.Items) != 0 { - t.Fatalf("items = %#v", page.Items) - } -} - -func TestSearchHead(t *testing.T) { - srv := setupTestServer() - defer srv.Close() - - req, err := http.NewRequest(http.MethodHead, srv.URL+"/search/v2/item-1/search?q=needle", nil) - if err != nil { - t.Fatal(err) - } - resp, err := http.DefaultClient.Do(req) - if err != nil { - t.Fatal(err) - } - defer resp.Body.Close() - if resp.StatusCode != http.StatusOK { - t.Fatalf("status = %d", resp.StatusCode) - } - b, err := io.ReadAll(resp.Body) - if err != nil { - t.Fatal(err) - } - if len(b) != 0 { - t.Fatalf("expected empty body, got %q", string(b)) - } -} - -func TestSearchRequiresQuery(t *testing.T) { - srv := setupTestServer() - defer srv.Close() - - resp, err := http.Get(srv.URL + "/search/v2/item-1/search") - if err != nil { - t.Fatal(err) - } - defer resp.Body.Close() - if resp.StatusCode != http.StatusBadRequest { - t.Fatalf("status = %d", resp.StatusCode) - } -} diff --git a/internal/iiif/search/v2/searcher/searcher.go b/internal/iiif/search/v2/searcher/searcher.go deleted file mode 100644 index 7a1d65c..0000000 --- a/internal/iiif/search/v2/searcher/searcher.go +++ /dev/null @@ -1,31 +0,0 @@ -package searcher - -import ( - "context" - - "github.com/libops/triplet/internal/iiif/search/v2/types" -) - -// Request captures the spec query parameters triplet needs to route a Content -// Search request. Backend-specific ranking and indexing stay outside triplet. -type Request struct { - ItemID string - Query string -} - -// Searcher resolves a Content Search query into an AnnotationPage. -type Searcher interface { - Search(ctx context.Context, req Request) (types.AnnotationPage, error) -} - -// Noop is the default backend. It preserves the IIIF HTTP surface without -// making triplet responsible for indexing. -type Noop struct{} - -func (Noop) Search(context.Context, Request) (types.AnnotationPage, error) { - return types.AnnotationPage{ - Context: types.ContextSearch2, - Type: types.TypeAnnotationPage, - Items: []types.Annotation{}, - }, nil -} diff --git a/internal/iiif/search/v2/types/types.go b/internal/iiif/search/v2/types/types.go deleted file mode 100644 index 2d77d6f..0000000 --- a/internal/iiif/search/v2/types/types.go +++ /dev/null @@ -1,33 +0,0 @@ -package types - -const ( - ContextSearch2 = "http://iiif.io/api/search/2/context.json" - TypeAnnotation = "Annotation" - TypeAnnotationPage = "AnnotationPage" -) - -// AnnotationPage is the Content Search 2.0 response container. Triplet's -// default searcher currently returns an empty page; backend adapters can fill -// Items with Web Annotation matches. -type AnnotationPage struct { - Context any `json:"@context,omitempty"` - ID string `json:"id"` - Type string `json:"type"` - Items []Annotation `json:"items"` - PartOf []ServiceRef `json:"partOf,omitempty"` -} - -// Annotation is intentionally permissive: Content Search annotations vary by -// body/target selector shape, so adapters can emit JSON-LD-compatible maps. -type Annotation struct { - ID string `json:"id,omitempty"` - Type string `json:"type"` - Motivation any `json:"motivation,omitempty"` - Body any `json:"body,omitempty"` - Target any `json:"target,omitempty"` -} - -type ServiceRef struct { - ID string `json:"id"` - Type string `json:"type,omitempty"` -} diff --git a/internal/server/server.go b/internal/server/server.go index f393fdb..8aa506a 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -17,15 +17,11 @@ import ( "github.com/libops/triplet/internal/cache" "github.com/libops/triplet/internal/config" "github.com/libops/triplet/internal/cors" - authz "github.com/libops/triplet/internal/iiif/auth/v2/authorizer" - authhandler "github.com/libops/triplet/internal/iiif/auth/v2/handler" imghandler "github.com/libops/triplet/internal/iiif/image/v3/handler" "github.com/libops/triplet/internal/iiif/image/v3/pipeline" imgtypes "github.com/libops/triplet/internal/iiif/image/v3/types" preshandler "github.com/libops/triplet/internal/iiif/presentation/v3/handler" presstore "github.com/libops/triplet/internal/iiif/presentation/v3/store" - searchhandler "github.com/libops/triplet/internal/iiif/search/v2/handler" - "github.com/libops/triplet/internal/iiif/search/v2/searcher" "github.com/libops/triplet/internal/metrics" "github.com/libops/triplet/internal/observability" "github.com/libops/triplet/internal/storage" @@ -81,8 +77,8 @@ func Build(cfg *config.Config, logger *slog.Logger) (*http.Server, error) { pipe := pipeline.New(src, pipeline.Limits{ MaxOutputPixels: cfg.IIIF.Image.MaxOutputPixels, MaxSourcePixels: cfg.IIIF.Image.MaxSourcePixels, - MaxSourceBytes: cfg.IIIF.Image.MaxSourceBytes, - MaxDerivativeBytes: cfg.IIIF.Image.MaxDerivativeBytes, + MaxSourceBytes: int64(cfg.IIIF.Image.MaxSourceBytes), + MaxDerivativeBytes: int64(cfg.IIIF.Image.MaxDerivativeBytes), }, pipeline.Options{ ColorManagement: cfg.IIIF.Image.ColorManagement, LoadAccess: cfg.IIIF.Image.LoadAccess, @@ -111,7 +107,7 @@ func Build(cfg *config.Config, logger *slog.Logger) (*http.Server, error) { }, cfg.IIIF.Image.InfoDimensionCache == nil || *cfg.IIIF.Image.InfoDimensionCache, cfg.IIIF.Image.MaxSourcePixels, - cfg.IIIF.Image.MaxSourceBytes, + int64(cfg.IIIF.Image.MaxSourceBytes), cfg.IIIF.Image.MaxConcurrentTransforms, logger, ) @@ -129,7 +125,7 @@ func Build(cfg *config.Config, logger *slog.Logger) (*http.Server, error) { h := preshandler.New( cfg.IIIF.Presentation.Prefix, st, - cors.New(cfg.IIIF.AllowedOrigins, ""), + cors.New(cfg.IIIF.AllowedOrigins, "ETag"), cfg.IIIF.Presentation.WriteEnabled, cfg.IIIF.Presentation.WriteToken, logger, @@ -137,20 +133,6 @@ func Build(cfg *config.Config, logger *slog.Logger) (*http.Server, error) { h.Register(mux) logger.Info("presentation api enabled", "prefix", cfg.IIIF.Presentation.Prefix) } - if cfg.IIIF.Search.Enabled { - h := searchhandler.New(cfg.IIIF.Search.Prefix, cfg.Server.PublicBaseURL, searcher.Noop{}, cors.New(cfg.IIIF.AllowedOrigins, ""), logger) - h.Register(mux) - logger.Info("search api enabled", "prefix", cfg.IIIF.Search.Prefix) - } - if cfg.IIIF.Auth.Enabled { - if !cfg.IIIF.Auth.DevelopmentPermitAll { - return nil, errors.New("iiif auth requires an explicit authorizer") - } - h := authhandler.New(cfg.IIIF.Auth.Prefix, cfg.Server.PublicBaseURL, authz.PermitAll{}, cors.New(cfg.IIIF.AllowedOrigins, ""), logger) - h.Register(mux) - logger.Warn("auth api enabled with development permit-all authorizer", "prefix", cfg.IIIF.Auth.Prefix) - } - var handler http.Handler = mux handler = metrics.Middleware(handler) handler = observability.LoggingMiddleware(logger, observability.LoggingOptions{ @@ -263,7 +245,6 @@ func Run(ctx context.Context, s *http.Server, logger *slog.Logger) error { } func buildSource(cfg *config.Config, logger *slog.Logger) (storage.Opener, func(), error) { - ctx := context.Background() var cleanup func() var fileOp storage.Opener @@ -280,19 +261,19 @@ func buildSource(cfg *config.Config, logger *slog.Logger) (storage.Opener, func( op := storage.NewHTTPOpener( cfg.Sources.HTTP.AllowedOrigins, cfg.Sources.HTTP.RequestTimeout, - cfg.Sources.HTTP.MaxBytes, + int64(cfg.Sources.HTTP.MaxBytes), ) op.AllowPrivateHosts = cfg.Sources.HTTP.AllowPrivateHosts authOp := storage.NewHTTPOpener( cfg.Sources.HTTP.AllowedOrigins, cfg.Sources.HTTP.RequestTimeout, - cfg.Sources.HTTP.MaxBytes, + int64(cfg.Sources.HTTP.MaxBytes), ) authOp.AllowPrivateHosts = cfg.Sources.HTTP.AllowPrivateHosts authOp.ForwardAuthHeaders = true httpOp = op - if cfg.Cache.SourceRoot != "" || cfg.Cache.SourceBucketURL != "" { - sourceCache, err := buildSourceCache(ctx, cfg) + if cfg.Cache.SourceRoot != "" { + sourceCache, err := buildSourceCache(cfg) if err != nil { return nil, nil, err } @@ -327,30 +308,12 @@ func buildSource(cfg *config.Config, logger *slog.Logger) (storage.Opener, func( } } - var gcsOp storage.Opener - if cfg.Sources.GCS != nil && cfg.Sources.GCS.BucketURL != "" { - op, err := storage.NewGCSOpener(ctx, cfg.Sources.GCS.BucketURL, cfg.Sources.GCS.Prefix, cfg.IIIF.Image.MaxSourceBytes) - if err != nil { - return nil, nil, err - } - previousCleanup := cleanup - cleanup = func() { - if previousCleanup != nil { - previousCleanup() - } - _ = op.Close() - } - gcsOp = op - } - var defaultOp storage.Opener switch cfg.Sources.Default { case "file": defaultOp = fileOp case "http": defaultOp = httpOp - case "gcs": - defaultOp = gcsOp default: return nil, nil, fmt.Errorf("source %q not supported", cfg.Sources.Default) } @@ -365,10 +328,7 @@ func buildSource(cfg *config.Config, logger *slog.Logger) (storage.Opener, func( storage.Route{HasScheme: "https", Opener: httpOp}, ) } - if gcsOp != nil { - routes = append(routes, storage.Route{HasScheme: "gs", Opener: gcsOp}) - } - if len(routes) > 0 && (fileOp != nil || httpOp != nil || gcsOp != nil) { + if len(routes) > 0 && (fileOp != nil || httpOp != nil) { return &storage.Multiplex{Routes: routes, Default: defaultOp}, cleanup, nil } return defaultOp, cleanup, nil @@ -414,20 +374,14 @@ func buildLocalURLMappings(cfg *config.Config, fileOp storage.Opener) ([]storage func buildDerivativeCache(cfg *config.Config) (cache.Store, error) { if cfg.Cache.Root == "" { - if cfg.Cache.BucketURL == "" { - return cache.Noop{}, nil - } - return cache.NewGCSStore(context.Background(), cfg.Cache.BucketURL, cfg.Cache.Prefix) + return cache.Noop{}, nil } - return cache.NewFileStore(cfg.Cache.Root, cfg.Cache.MaxBytes) + return cache.NewFileStore(cfg.Cache.Root, int64(cfg.Cache.MaxBytes)) } -func buildSourceCache(ctx context.Context, cfg *config.Config) (cache.Store, error) { +func buildSourceCache(cfg *config.Config) (cache.Store, error) { if cfg.Cache.SourceRoot != "" { - return cache.NewFileStore(cfg.Cache.SourceRoot, cfg.Cache.SourceMaxBytes) - } - if cfg.Cache.SourceBucketURL != "" { - return cache.NewGCSStore(ctx, cfg.Cache.SourceBucketURL, cfg.Cache.SourcePrefix) + return cache.NewFileStore(cfg.Cache.SourceRoot, int64(cfg.Cache.SourceMaxBytes)) } return cache.Noop{}, nil } diff --git a/internal/storage/gcs.go b/internal/storage/gcs.go deleted file mode 100644 index 9b76f0f..0000000 --- a/internal/storage/gcs.go +++ /dev/null @@ -1,168 +0,0 @@ -package storage - -import ( - "context" - "fmt" - "io" - "net/url" - "os" - "path" - "strings" - - gcs "cloud.google.com/go/storage" - "google.golang.org/api/googleapi" -) - -// GCSOpener reads objects from a Google Cloud Storage bucket. -type GCSOpener struct { - client *gcs.Client - bucket string - prefix string - maxBytes int64 -} - -// NewGCSOpener constructs a GCS source opener. bucketURL must be gs://bucket. -func NewGCSOpener(ctx context.Context, bucketURL, prefix string, maxBytes int64) (*GCSOpener, error) { - bucket, err := bucketFromURL(bucketURL) - if err != nil { - return nil, err - } - client, err := gcs.NewClient(ctx) - if err != nil { - return nil, fmt.Errorf("gcs opener: %w", err) - } - return &GCSOpener{client: client, bucket: bucket, prefix: strings.Trim(prefix, "/"), maxBytes: maxBytes}, nil -} - -// Close releases resources held by the GCS client. -func (o *GCSOpener) Close() error { - return o.client.Close() -} - -// Open implements Opener. -func (o *GCSOpener) Open(ctx context.Context, identifier string) (io.ReadSeekCloser, Meta, error) { - key, attrs, meta, err := o.meta(ctx, identifier) - if err != nil { - return nil, Meta{}, err - } - obj := o.client.Bucket(o.bucket).Object(key) - if o.maxBytes > 0 && attrs.Size > o.maxBytes { - return nil, Meta{}, fmt.Errorf("gcs source %q: response exceeds max_bytes %d", key, o.maxBytes) - } - r, err := obj.NewReader(ctx) - if err != nil { - if isGCSNotFound(err) { - return nil, Meta{}, ErrNotFound - } - return nil, Meta{}, err - } - defer r.Close() - - f, err := os.CreateTemp("", "triplet-gcs-*") - if err != nil { - return nil, Meta{}, err - } - defer func() { - if err != nil { - _ = os.Remove(f.Name()) - _ = f.Close() - } - }() - var reader io.Reader = r - if o.maxBytes > 0 { - reader = io.LimitReader(r, o.maxBytes+1) - } - n, err := io.Copy(f, reader) - if err != nil { - return nil, Meta{}, err - } - if o.maxBytes > 0 && n > o.maxBytes { - return nil, Meta{}, fmt.Errorf("gcs source %q: response exceeds max_bytes %d", key, o.maxBytes) - } - if _, err = f.Seek(0, io.SeekStart); err != nil { - return nil, Meta{}, err - } - return &tempReadSeekCloser{File: f}, meta, nil -} - -// Meta implements MetaReader. -func (o *GCSOpener) Meta(ctx context.Context, identifier string) (Meta, error) { - _, _, meta, err := o.meta(ctx, identifier) - return meta, err -} - -func (o *GCSOpener) meta(ctx context.Context, identifier string) (string, *gcs.ObjectAttrs, Meta, error) { - key, err := objectKey(o.prefix, identifier) - if err != nil { - return "", nil, Meta{}, err - } - obj := o.client.Bucket(o.bucket).Object(key) - attrs, err := obj.Attrs(ctx) - if err != nil { - if isGCSNotFound(err) { - return "", nil, Meta{}, ErrNotFound - } - return "", nil, Meta{}, err - } - if o.maxBytes > 0 && attrs.Size > o.maxBytes { - return "", nil, Meta{}, fmt.Errorf("gcs source %q: response exceeds max_bytes %d", key, o.maxBytes) - } - return key, attrs, Meta{ - ContentType: attrs.ContentType, - Size: attrs.Size, - ModTime: attrs.Updated, - Version: fmt.Sprintf("gcs:%d:%d", attrs.Generation, attrs.Metageneration), - }, nil -} - -func bucketFromURL(bucketURL string) (string, error) { - u, err := url.Parse(bucketURL) - if err != nil { - return "", fmt.Errorf("gcs bucket url: %w", err) - } - if u.Scheme != "gs" || u.Host == "" || u.Path != "" { - return "", fmt.Errorf("gcs bucket url: expected gs://bucket, got %q", bucketURL) - } - return u.Host, nil -} - -func objectKey(prefix, identifier string) (string, error) { - if identifier == "" || strings.ContainsAny(identifier, "\x00\n\r") { - return "", ErrNotFound - } - if u, err := url.Parse(identifier); err == nil && u.Scheme == "gs" { - identifier = strings.TrimPrefix(identifier, "gs://") - if idx := strings.Index(identifier, "/"); idx >= 0 { - identifier = identifier[idx+1:] - } else { - return "", ErrNotFound - } - } - clean := path.Clean("/" + identifier) - clean = strings.TrimPrefix(clean, "/") - if clean == "" || clean == "." || strings.HasPrefix(clean, "../") { - return "", ErrNotFound - } - if prefix == "" { - return clean, nil - } - return strings.TrimPrefix(path.Join(prefix, clean), "/"), nil -} - -func isGCSNotFound(err error) bool { - if e, ok := err.(*googleapi.Error); ok && e.Code == 404 { - return true - } - return false -} - -type tempReadSeekCloser struct { - *os.File -} - -func (t *tempReadSeekCloser) Close() error { - name := t.Name() - err := t.File.Close() - _ = os.Remove(name) - return err -} diff --git a/internal/storage/gcs_test.go b/internal/storage/gcs_test.go deleted file mode 100644 index b0f14fe..0000000 --- a/internal/storage/gcs_test.go +++ /dev/null @@ -1,23 +0,0 @@ -package storage - -import "testing" - -func TestBucketFromURL(t *testing.T) { - got, err := bucketFromURL("gs://example-bucket") - if err != nil { - t.Fatal(err) - } - if got != "example-bucket" { - t.Fatalf("bucket = %q", got) - } -} - -func TestObjectKey(t *testing.T) { - got, err := objectKey("images", "gs://example-bucket/path/to/sample.tif") - if err != nil { - t.Fatal(err) - } - if got != "images/path/to/sample.tif" { - t.Fatalf("key = %q", got) - } -} diff --git a/internal/storage/opener.go b/internal/storage/opener.go index d7ed62f..9218701 100644 --- a/internal/storage/opener.go +++ b/internal/storage/opener.go @@ -1,7 +1,7 @@ // Package storage resolves IIIF identifiers to readable image bytes. // // Every Image and Presentation handler routes through an [Opener]. New -// backends (HTTP, GCS, in-memory uploads) implement the same interface so +// backends (HTTP and in-memory uploads) implement the same interface so // the transform pipeline never sees backend-specific code. package storage diff --git a/scripts/benchmark-iiif.sh b/scripts/benchmark-iiif.sh index 2c5eb71..f72407a 100755 --- a/scripts/benchmark-iiif.sh +++ b/scripts/benchmark-iiif.sh @@ -155,16 +155,12 @@ iiif: enabled: true prefix: /iiif/3 max_output_pixels: 400000000 - max_source_bytes: 1073741824 + max_source_bytes: 1GiB color_management: "$TRIPLET_COLOR_MANAGEMENT" load_access: "$TRIPLET_LOAD_ACCESS" info_dimension_cache: true presentation: enabled: false - search: - enabled: false - auth: - enabled: false sources: default: file From 78dfb57b0f5096d713616830cb87113dde00b81e Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Fri, 1 May 2026 14:30:10 +0000 Subject: [PATCH 2/3] Add remote URI metadata cache --- config.example.yaml | 7 ++ docs/caching.md | 20 ++++++ docs/configuration.md | 12 ++++ internal/config/config.go | 16 +++-- internal/config/config_test.go | 28 ++++++++ internal/server/server.go | 7 ++ internal/storage/meta_cache.go | 105 ++++++++++++++++++++++++++++ internal/storage/meta_cache_test.go | 96 +++++++++++++++++++++++++ 8 files changed, 287 insertions(+), 4 deletions(-) create mode 100644 internal/storage/meta_cache.go create mode 100644 internal/storage/meta_cache_test.go diff --git a/config.example.yaml b/config.example.yaml index db016d5..fdd68ae 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -179,6 +179,13 @@ sources: # allow_private_hosts: false # request_timeout: 2m # max_bytes: 50MiB + # # Optional in-process metadata cache for remote URL identifiers. This lets + # # derivative cache hits reuse recent ETag/Last-Modified/size metadata + # # instead of making a HEAD or range request to the upstream source every + # # time. During this TTL, Triplet may serve a cached derivative without + # # noticing that the remote source changed or disappeared. + # metadata_cache_ttl: 5m + # metadata_cache_max_entries: 4096 cache: # Derivative cache. Configure a filesystem root. diff --git a/docs/caching.md b/docs/caching.md index 536b6ba..09183f0 100644 --- a/docs/caching.md +++ b/docs/caching.md @@ -73,6 +73,25 @@ cache: When `source_stale_after` is set, stale hits are served immediately and refreshed in the background. Upstream 4xx/5xx responses are not stored. +## HTTP metadata cache + +Remote URL identifiers need source metadata to build derivative cache keys. By +default, Triplet revalidates that metadata with the upstream source before it +checks the derivative cache. Configure `sources.http.metadata_cache_ttl` to +allow recent metadata to stand in for that upstream `HEAD` or range request: + +```yaml +sources: + http: + metadata_cache_ttl: 5m + metadata_cache_max_entries: 4096 +``` + +This is an explicit staleness window. While metadata is cached, a derivative +cache hit can be served without touching the remote source. If the remote source +changes, disappears, or changes authorization during the TTL, Triplet may serve +the cached derivative until the metadata entry expires. + ## Authorization decision cache Local URL mappings with `auth_probe: true` cache anonymous and credentialed @@ -118,6 +137,7 @@ derivative and source caches. |---|---|---|---| | Derivative cache | `cache.root`; optional `cache.max_bytes`, `iiif.image.cache_invalidation_token` | Encoded IIIF image responses, keyed by identifier, source version, invalidation marker, region, size, rotation, quality, and format. | A changed source version produces a new key. The protected invalidation route bumps the per-identifier invalidation marker. `cache.max_bytes` is a best-effort aggregate cache budget; `iiif.image.max_derivative_bytes` is the per-response size limit before return/cache. Failed transforms and HTTP error responses are not stored. | | HTTP source cache | `cache.source_root`; optional `cache.source_max_bytes`, `cache.source_stale_after` | Original source bytes fetched through the HTTP source backend. | Keys are source identifiers. When `source_stale_after` is set, stale hits are served immediately and refreshed in the background. Upstream 4xx/5xx responses are not stored. | +| HTTP metadata cache | `sources.http.metadata_cache_ttl`; optional `sources.http.metadata_cache_max_entries` | Successful remote source metadata lookups for URL identifiers. | In-memory only. While fresh, derivative cache checks can avoid upstream metadata requests. This can serve stale derivatives until the TTL expires. | | `info.json` dimension cache | `iiif.image.info_dimension_cache` | Source dimensions used to build Image API `info.json`. | In-memory only. Entries are keyed by identifier plus source size/modtime metadata, so source changes with updated metadata miss the cache. | | Local URL auth-probe cache | `sources.file.url_mappings[].auth_*` | Authorization probe results for local URL mappings with `auth_probe: true`. Anonymous and credentialed probes are cached separately. See [Authorization](authorization.md). | In-memory only. Tier defaults are 5 minutes unless overridden by `auth_anonymous_cache_ttl`, `auth_authenticated_cache_ttl`, or `auth_cache_ttl`. The image cache invalidation route also clears matching auth-probe entries when the source backend supports it. | | libvips operation cache | `vips.cache_max_mem`, `vips.cache_max_files` | libvips in-process operation results. | Disabled by default in the example config. This is process-local and separate from Triplet's derivative/source caches. | diff --git a/docs/configuration.md b/docs/configuration.md index 268ffcc..76ccc71 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -217,8 +217,20 @@ sources: allow_private_hosts: false request_timeout: 2m max_bytes: 50MiB + metadata_cache_ttl: 5m + metadata_cache_max_entries: 4096 ``` The HTTP host allowlist is a source-fetch boundary. See [Authorization](authorization.md#source-fetch-boundary) for origin, redirect, private host, and DNS rebinding behavior. + +`metadata_cache_ttl` gives remote URL identifiers the same kind of explicit +staleness window that local URL auth-probe mappings use. While a remote +identifier's metadata cache entry is fresh, Triplet can build derivative cache +keys from cached `ETag`, `Last-Modified`, and size metadata instead of making a +new upstream `HEAD` or range request before checking the derivative cache. If +the upstream source changes, disappears, or changes authorization during that +TTL, Triplet may continue serving the locally cached derivative until the +metadata entry expires. Leave the TTL unset or `0` when every derivative request +must revalidate source metadata upstream. diff --git a/internal/config/config.go b/internal/config/config.go index 6912236..e27cbf0 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -141,10 +141,12 @@ type FileURLMapping struct { // HTTPSource resolves identifiers that are HTTP(S) URLs. type HTTPSource struct { - AllowedOrigins []string `yaml:"allowed_origins"` - AllowPrivateHosts bool `yaml:"allow_private_hosts"` - RequestTimeout time.Duration `yaml:"request_timeout"` - MaxBytes ByteSize `yaml:"max_bytes"` + AllowedOrigins []string `yaml:"allowed_origins"` + AllowPrivateHosts bool `yaml:"allow_private_hosts"` + RequestTimeout time.Duration `yaml:"request_timeout"` + MaxBytes ByteSize `yaml:"max_bytes"` + MetadataCacheTTL time.Duration `yaml:"metadata_cache_ttl"` + MetadataCacheMaxEntries int `yaml:"metadata_cache_max_entries"` } // Cache declares optional derivative-cache settings. @@ -450,6 +452,12 @@ func (c *Config) validate() error { if c.Sources.HTTP.MaxBytes < 0 { return errors.New("sources.http.max_bytes: must be >= 0") } + if c.Sources.HTTP.MetadataCacheTTL < 0 { + return errors.New("sources.http.metadata_cache_ttl: must be >= 0") + } + if c.Sources.HTTP.MetadataCacheMaxEntries < 0 { + return errors.New("sources.http.metadata_cache_max_entries: must be >= 0") + } } if c.Sources.File != nil { for _, prefix := range c.Sources.File.URLPrefixes { diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 875e1b5..a7b25a5 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -94,6 +94,8 @@ sources: http: allowed_origins: [https://example.org] max_bytes: 1048576 + metadata_cache_ttl: 24h + metadata_cache_max_entries: 4096 `, }, { @@ -322,6 +324,32 @@ sources: `, wantErr: "sources.http.allowed_origins", }, + { + name: "http source rejects negative metadata cache ttl", + body: ` +server: + public_base_url: http://localhost:8080 +sources: + default: http + http: + allowed_origins: [https://example.org] + metadata_cache_ttl: -1s +`, + wantErr: "sources.http.metadata_cache_ttl", + }, + { + name: "http source rejects negative metadata cache max entries", + body: ` +server: + public_base_url: http://localhost:8080 +sources: + default: http + http: + allowed_origins: [https://example.org] + metadata_cache_max_entries: -1 +`, + wantErr: "sources.http.metadata_cache_max_entries", + }, { name: "pprof enabled requires token", body: ` diff --git a/internal/server/server.go b/internal/server/server.go index 8aa506a..4275d48 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -293,6 +293,13 @@ func buildSource(cfg *config.Config, logger *slog.Logger) (storage.Opener, func( RefreshContext: refreshCtx, } } + if cfg.Sources.HTTP.MetadataCacheTTL > 0 { + httpOp = &storage.MetaCaching{ + Inner: httpOp, + TTL: cfg.Sources.HTTP.MetadataCacheTTL, + MaxEntries: cfg.Sources.HTTP.MetadataCacheMaxEntries, + } + } localURLMappings, err := buildLocalURLMappings(cfg, fileOp) if err != nil { return nil, nil, err diff --git a/internal/storage/meta_cache.go b/internal/storage/meta_cache.go new file mode 100644 index 0000000..e635e0e --- /dev/null +++ b/internal/storage/meta_cache.go @@ -0,0 +1,105 @@ +package storage + +import ( + "context" + "io" + "sync" + "time" +) + +// MetaCaching caches successful source metadata lookups in process. It is +// useful for remote sources where metadata checks are required to build stable +// derivative cache keys, but repeated upstream HEAD requests are too expensive. +type MetaCaching struct { + Inner Opener + TTL time.Duration + MaxEntries int + + mu sync.Mutex + items map[string]metaCacheEntry +} + +type metaCacheEntry struct { + meta Meta + storedAt time.Time +} + +// Open delegates to Inner and records the returned metadata. +func (c *MetaCaching) Open(ctx context.Context, identifier string) (io.ReadSeekCloser, Meta, error) { + rc, meta, err := c.Inner.Open(ctx, identifier) + if err != nil { + return nil, Meta{}, err + } + c.store(identifier, meta) + return rc, meta, nil +} + +// Meta implements MetaReader with an in-process TTL cache. +func (c *MetaCaching) Meta(ctx context.Context, identifier string) (Meta, error) { + if meta, ok := c.get(identifier); ok { + return meta, nil + } + metaReader, ok := c.Inner.(MetaReader) + if !ok { + rc, meta, err := c.Inner.Open(ctx, identifier) + if err != nil { + return Meta{}, err + } + _ = rc.Close() + c.store(identifier, meta) + return meta, nil + } + meta, err := metaReader.Meta(ctx, identifier) + if err != nil { + return Meta{}, err + } + c.store(identifier, meta) + return meta, nil +} + +func (c *MetaCaching) get(identifier string) (Meta, bool) { + if c.TTL <= 0 { + return Meta{}, false + } + c.mu.Lock() + defer c.mu.Unlock() + entry, ok := c.items[identifier] + if !ok { + return Meta{}, false + } + if time.Since(entry.storedAt) > c.TTL { + delete(c.items, identifier) + return Meta{}, false + } + return entry.meta, true +} + +func (c *MetaCaching) store(identifier string, meta Meta) { + if c.TTL <= 0 { + return + } + c.mu.Lock() + defer c.mu.Unlock() + if c.items == nil { + c.items = make(map[string]metaCacheEntry) + } + if max := c.maxEntries(); len(c.items) >= max { + var oldestKey string + var oldest time.Time + for key, entry := range c.items { + if oldestKey == "" || entry.storedAt.Before(oldest) { + oldestKey = key + oldest = entry.storedAt + } + } + delete(c.items, oldestKey) + } + c.items[identifier] = metaCacheEntry{meta: meta, storedAt: time.Now()} +} + +func (c *MetaCaching) maxEntries() int { + if c.MaxEntries > 0 { + return c.MaxEntries + } + return 4096 +} diff --git a/internal/storage/meta_cache_test.go b/internal/storage/meta_cache_test.go new file mode 100644 index 0000000..5431d45 --- /dev/null +++ b/internal/storage/meta_cache_test.go @@ -0,0 +1,96 @@ +package storage + +import ( + "context" + "io" + "testing" + "time" +) + +type countingMetaOpener struct { + metaCalls int + openCalls int + meta Meta +} + +func (o *countingMetaOpener) Open(context.Context, string) (io.ReadSeekCloser, Meta, error) { + o.openCalls++ + return &bytesReadSeekCloser{r: newSeekableBytes([]byte("source"))}, o.meta, nil +} + +func (o *countingMetaOpener) Meta(context.Context, string) (Meta, error) { + o.metaCalls++ + return o.meta, nil +} + +func TestMetaCachingCachesWithinTTL(t *testing.T) { + inner := &countingMetaOpener{meta: Meta{Size: 10, Version: "v1"}} + cached := &MetaCaching{Inner: inner, TTL: time.Hour} + + for range 2 { + meta, err := cached.Meta(context.Background(), "id") + if err != nil { + t.Fatal(err) + } + if meta.Version != "v1" { + t.Fatalf("meta version = %q", meta.Version) + } + } + if inner.metaCalls != 1 { + t.Fatalf("meta calls = %d, want 1", inner.metaCalls) + } +} + +func TestMetaCachingExpires(t *testing.T) { + inner := &countingMetaOpener{meta: Meta{Size: 10, Version: "v1"}} + cached := &MetaCaching{Inner: inner, TTL: time.Nanosecond} + + if _, err := cached.Meta(context.Background(), "id"); err != nil { + t.Fatal(err) + } + time.Sleep(time.Millisecond) + if _, err := cached.Meta(context.Background(), "id"); err != nil { + t.Fatal(err) + } + if inner.metaCalls != 2 { + t.Fatalf("meta calls = %d, want 2", inner.metaCalls) + } +} + +func TestMetaCachingOpenStoresMeta(t *testing.T) { + inner := &countingMetaOpener{meta: Meta{Size: 10, Version: "v1"}} + cached := &MetaCaching{Inner: inner, TTL: time.Hour} + + rc, _, err := cached.Open(context.Background(), "id") + if err != nil { + t.Fatal(err) + } + _ = rc.Close() + if _, err := cached.Meta(context.Background(), "id"); err != nil { + t.Fatal(err) + } + if inner.metaCalls != 0 { + t.Fatalf("meta calls = %d, want 0", inner.metaCalls) + } + if inner.openCalls != 1 { + t.Fatalf("open calls = %d, want 1", inner.openCalls) + } +} + +func TestMetaCachingEvictsOldest(t *testing.T) { + inner := &countingMetaOpener{meta: Meta{Size: 10, Version: "v1"}} + cached := &MetaCaching{Inner: inner, TTL: time.Hour, MaxEntries: 1} + + if _, err := cached.Meta(context.Background(), "one"); err != nil { + t.Fatal(err) + } + if _, err := cached.Meta(context.Background(), "two"); err != nil { + t.Fatal(err) + } + if _, err := cached.Meta(context.Background(), "one"); err != nil { + t.Fatal(err) + } + if inner.metaCalls != 3 { + t.Fatalf("meta calls = %d, want 3", inner.metaCalls) + } +} From 99abb783a60b6bfc330dc2933b902eddc630fee0 Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Fri, 1 May 2026 15:16:08 +0000 Subject: [PATCH 3/3] Add derivative cache max age --- config.example.yaml | 14 ++-- docs/authorization.md | 45 +++++------- docs/caching.md | 20 +++-- docs/configuration.md | 12 +-- internal/cache/file.go | 53 ++++++++++++-- internal/cache/file_test.go | 56 ++++++++++++++ internal/config/config.go | 46 ++++-------- internal/config/config_test.go | 113 ++++++----------------------- internal/server/server.go | 21 ++---- internal/storage/local_url.go | 34 +++------ internal/storage/local_url_test.go | 49 ++++++------- 11 files changed, 223 insertions(+), 240 deletions(-) diff --git a/config.example.yaml b/config.example.yaml index fdd68ae..4c8661c 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -146,10 +146,6 @@ sources: # - prefix: /system/files # root: /private # auth_probe: true - # auth_anonymous_cache_ttl: 720h - # auth_authenticated_cache_ttl: 168h - # auth_error_cache_min_age: 5m - # auth_cache_max_entries: 4096 # - prefix: /fedora # root: /fcrepo # ocfl: true @@ -159,10 +155,8 @@ sources: # `auth_probe: true` means Triplet forwards browser Cookie/Authorization # headers to the original URL and requires 200/206 before reading locally. # Auth probes are tiered: anonymous is checked first and cached separately; - # credentialed probes only run when anonymous access is denied. Long TTLs - # are explicit access-staleness windows; defaults are short when omitted. - # 403/404 probe results are not cached when Last-Modified is newer than - # auth_error_cache_min_age, avoiding permission-publication races. + # credentialed probes only run when anonymous access is denied. Probe + # decisions inherit sources.http.metadata_cache_ttl. # Optional HTTP(S) source. When configured alongside the file source, # `http://...` and `https://...` identifiers are routed here automatically. # @@ -185,7 +179,6 @@ sources: # # time. During this TTL, Triplet may serve a cached derivative without # # noticing that the remote source changed or disappeared. # metadata_cache_ttl: 5m - # metadata_cache_max_entries: 4096 cache: # Derivative cache. Configure a filesystem root. @@ -196,6 +189,9 @@ cache: # target before eviction runs, and metadata sidecar files are not counted. # 0 disables size-based eviction. max_bytes: 500GiB + # Optional age limit for derivative entries. Expired entries are removed on + # read and opportunistically during writes. 0 disables age-based eviction. + max_age: 720h # Optional filesystem source cache for fetched source bytes (primarily HTTP # identifiers). # source_root: /var/lib/triplet/source-cache diff --git a/docs/authorization.md b/docs/authorization.md index 90e51cb..af9372e 100644 --- a/docs/authorization.md +++ b/docs/authorization.md @@ -64,24 +64,25 @@ sources: - prefix: /system/files root: /private auth_probe: true - auth_anonymous_cache_ttl: 720h - auth_authenticated_cache_ttl: 168h - auth_error_cache_min_age: 5m - auth_cache_max_entries: 4096 + http: + allowed_origins: + - https://repository.example.edu + metadata_cache_ttl: 168h ``` The probe answers whether the original source URL would let this request read the file. Triplet uses that source response as the authority before serving the local copy or a cached derivative. -Anonymous access is checked first. If the source allows anonymous access, -Triplet caches that anonymous allow decision for the identifier and all callers -can use it until the TTL expires. If anonymous access is denied and the incoming -request has `Cookie` or `Authorization` headers, Triplet checks the source again -with those headers. Credentialed decisions are cached separately by identifier -and by the exact forwarded `Cookie` and `Authorization` header values, so two -different sessions do not share one authenticated auth decision. A repeated -request with the same headers uses the cached decision until its TTL expires. +Anonymous access is checked first. If `sources.http.metadata_cache_ttl` is set +and the source allows anonymous access, Triplet caches that anonymous allow +decision for the identifier and all callers can use it until the TTL expires. If +anonymous access is denied and the incoming request has `Cookie` or +`Authorization` headers, Triplet checks the source again with those headers. +Credentialed decisions are cached separately by identifier and by the exact +forwarded `Cookie` and `Authorization` header values, so two different sessions +do not share one authenticated auth decision. A repeated request with the same +headers uses the cached decision until the TTL expires. Derivative bytes are shared after authorization. The authorization probe or auth-cache lookup happens before Triplet serves a derivative-cache hit, but the @@ -148,11 +149,9 @@ authorization result: ## Auth decision TTLs -Anonymous and authenticated probe decisions default to 5 minutes. Override them -per mapping with `auth_anonymous_cache_ttl` and -`auth_authenticated_cache_ttl`. If either tier-specific value is omitted, -Triplet falls back to `auth_cache_ttl`. If that is also omitted, the tier uses -the 5 minute default. +Anonymous and authenticated probe decisions inherit +`sources.http.metadata_cache_ttl`. Leave the TTL unset or `0` to disable +auth-probe decision caching and recheck the upstream source on every request. Long TTLs are explicit access-staleness windows. They are appropriate when repository permissions change rarely or when the mapping is used for content @@ -165,14 +164,10 @@ configured TTL. Other upstream errors are not cached. Negative auth-probe caching is conservative. For 401, 403, and 404 probe responses, Triplet checks the upstream `Last-Modified` header before caching the denial. If `Last-Modified` parses and is newer than -`now - auth_error_cache_min_age`, the denial is not cached. This avoids holding a -stale denial while repository access rules or file publication are still -settling. If `Last-Modified` is absent, unparseable, or older than that minimum -age window, the denial can be cached for the configured auth TTL. -`auth_error_cache_min_age` defaults to 5 minutes; increase it when repository -metadata and permissions are known to settle more slowly. - -`auth_cache_max_entries` defaults to 4096 entries when omitted. +5 minutes ago, the denial is not cached. This avoids holding a stale denial +while repository access rules or file publication are still settling. If +`Last-Modified` is absent, unparseable, or older than that minimum age window, +the denial can be cached for the configured auth TTL. The image cache invalidation route also clears matching auth-probe entries when the source backend supports it. diff --git a/docs/caching.md b/docs/caching.md index 09183f0..6766059 100644 --- a/docs/caching.md +++ b/docs/caching.md @@ -16,10 +16,12 @@ format can be served without running libvips again. cache: root: /var/lib/triplet/cache max_bytes: 500GiB + max_age: 720h ``` -`max_bytes` is a best-effort filesystem eviction target. Failed transforms and -HTTP error responses are not stored. +`max_bytes` is a best-effort filesystem eviction target. `max_age` is an +optional age limit for derivative entries. Failed transforms and HTTP error +responses are not stored. `cache.max_bytes` is the approximate total retained size of derivative payload files under `cache.root`. It is different from @@ -28,6 +30,13 @@ can be returned or cached. A cache write can temporarily exceed `cache.max_bytes before eviction runs, and metadata sidecar files are not counted toward the target. +`cache.max_age` is based on when Triplet wrote the derivative entry, not when it +was last requested. When a cached derivative is older than `max_age`, Triplet +removes it and treats the request as a cache miss. Expired entries are also +removed opportunistically when new entries are written. Set `max_age: 0` or omit +it to keep derivative files until size eviction, manual deletion, invalidation, +or cache-key changes make them unused. + ### Derivative invalidation The route writes an invalidation marker into the derivative cache. Subsequent @@ -84,7 +93,6 @@ allow recent metadata to stand in for that upstream `HEAD` or range request: sources: http: metadata_cache_ttl: 5m - metadata_cache_max_entries: 4096 ``` This is an explicit staleness window. While metadata is cached, a derivative @@ -135,9 +143,9 @@ derivative and source caches. | Layer | Configuration | What is cached | Invalidation / freshness | |---|---|---|---| -| Derivative cache | `cache.root`; optional `cache.max_bytes`, `iiif.image.cache_invalidation_token` | Encoded IIIF image responses, keyed by identifier, source version, invalidation marker, region, size, rotation, quality, and format. | A changed source version produces a new key. The protected invalidation route bumps the per-identifier invalidation marker. `cache.max_bytes` is a best-effort aggregate cache budget; `iiif.image.max_derivative_bytes` is the per-response size limit before return/cache. Failed transforms and HTTP error responses are not stored. | +| Derivative cache | `cache.root`; optional `cache.max_bytes`, `cache.max_age`, `iiif.image.cache_invalidation_token` | Encoded IIIF image responses, keyed by identifier, source version, invalidation marker, region, size, rotation, quality, and format. | A changed source version produces a new key. The protected invalidation route bumps the per-identifier invalidation marker. `cache.max_bytes` is a best-effort aggregate cache budget; `cache.max_age` removes derivative entries older than the configured duration. `iiif.image.max_derivative_bytes` is the per-response size limit before return/cache. Failed transforms and HTTP error responses are not stored. | | HTTP source cache | `cache.source_root`; optional `cache.source_max_bytes`, `cache.source_stale_after` | Original source bytes fetched through the HTTP source backend. | Keys are source identifiers. When `source_stale_after` is set, stale hits are served immediately and refreshed in the background. Upstream 4xx/5xx responses are not stored. | -| HTTP metadata cache | `sources.http.metadata_cache_ttl`; optional `sources.http.metadata_cache_max_entries` | Successful remote source metadata lookups for URL identifiers. | In-memory only. While fresh, derivative cache checks can avoid upstream metadata requests. This can serve stale derivatives until the TTL expires. | +| HTTP metadata cache | `sources.http.metadata_cache_ttl` | Successful remote source metadata lookups for URL identifiers. | In-memory only. While fresh, derivative cache checks can avoid upstream metadata requests. This can serve stale derivatives until the TTL expires. | | `info.json` dimension cache | `iiif.image.info_dimension_cache` | Source dimensions used to build Image API `info.json`. | In-memory only. Entries are keyed by identifier plus source size/modtime metadata, so source changes with updated metadata miss the cache. | -| Local URL auth-probe cache | `sources.file.url_mappings[].auth_*` | Authorization probe results for local URL mappings with `auth_probe: true`. Anonymous and credentialed probes are cached separately. See [Authorization](authorization.md). | In-memory only. Tier defaults are 5 minutes unless overridden by `auth_anonymous_cache_ttl`, `auth_authenticated_cache_ttl`, or `auth_cache_ttl`. The image cache invalidation route also clears matching auth-probe entries when the source backend supports it. | +| Local URL auth-probe cache | `sources.http.metadata_cache_ttl` for mappings with `auth_probe: true` | Authorization probe results for local URL mappings. Anonymous and credentialed probes are cached separately. See [Authorization](authorization.md). | In-memory only. The image cache invalidation route also clears matching auth-probe entries when the source backend supports it. | | libvips operation cache | `vips.cache_max_mem`, `vips.cache_max_files` | libvips in-process operation results. | Disabled by default in the example config. This is process-local and separate from Triplet's derivative/source caches. | diff --git a/docs/configuration.md b/docs/configuration.md index 76ccc71..f31ca5e 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -150,9 +150,8 @@ iiif: ## Caching Cache-related settings, including derivative caches, source caches, -`info_dimension_cache`, and libvips operation caches are covered in -[Caching](caching.md). Local URL auth-probe TTLs are covered in -[Authorization](authorization.md). +`info_dimension_cache`, local URL auth-probe caching, and libvips operation +caches are covered in [Caching](caching.md). ## Source selection @@ -218,7 +217,6 @@ sources: request_timeout: 2m max_bytes: 50MiB metadata_cache_ttl: 5m - metadata_cache_max_entries: 4096 ``` The HTTP host allowlist is a source-fetch boundary. See @@ -232,5 +230,7 @@ keys from cached `ETag`, `Last-Modified`, and size metadata instead of making a new upstream `HEAD` or range request before checking the derivative cache. If the upstream source changes, disappears, or changes authorization during that TTL, Triplet may continue serving the locally cached derivative until the -metadata entry expires. Leave the TTL unset or `0` when every derivative request -must revalidate source metadata upstream. +metadata entry expires. Local URL mappings with `auth_probe: true` inherit the +same TTL for anonymous and credentialed auth-probe decisions. Leave the TTL +unset or `0` when every derivative request must revalidate source metadata and +every auth-probed local URL request must recheck upstream authorization. diff --git a/internal/cache/file.go b/internal/cache/file.go index e46a975..0b5050f 100644 --- a/internal/cache/file.go +++ b/internal/cache/file.go @@ -26,6 +26,10 @@ type FileStore struct { // least-recently-modified entries are evicted on the next Put. MaxBytes int64 + // MaxAge optionally bounds how long entries remain usable after Put. + // Expired entries are removed on Get and opportunistically on Put. + MaxAge time.Duration + mu sync.Mutex } @@ -41,6 +45,16 @@ func NewFileStore(root string, maxBytes int64) (*FileStore, error) { return &FileStore{Root: abs, MaxBytes: maxBytes}, nil } +// NewFileStoreWithMaxAge constructs a FileStore with size and age eviction. +func NewFileStoreWithMaxAge(root string, maxBytes int64, maxAge time.Duration) (*FileStore, error) { + store, err := NewFileStore(root, maxBytes) + if err != nil { + return nil, err + } + store.MaxAge = maxAge + return store, nil +} + type fileMeta struct { ContentType string `json:"content_type"` Size int64 `json:"size"` @@ -61,6 +75,11 @@ func (s *FileStore) Get(_ context.Context, key string) (io.ReadCloser, Entry, er if err := json.Unmarshal(mb, &m); err != nil { return nil, Entry{}, fmt.Errorf("cache meta: %w", err) } + if s.expired(m.StoredAt, time.Now()) { + _ = os.Remove(dataPath) + _ = os.Remove(metaPath) + return nil, Entry{}, ErrMiss + } f, err := os.Open(dataPath) if err != nil { if errors.Is(err, fs.ErrNotExist) { @@ -104,7 +123,7 @@ func (s *FileStore) Put(_ context.Context, key, contentType string, value io.Rea return err } _ = os.Chtimes(metaPath, storedAt, storedAt) - if s.MaxBytes > 0 { + if s.MaxAge > 0 || s.MaxBytes > 0 { s.evict() } return nil @@ -133,11 +152,14 @@ func (s *FileStore) evict() { defer s.mu.Unlock() var total int64 type entry struct { - path string - size int64 - mod time.Time + path string + metaPath string + size int64 + mod time.Time + storedAt time.Time } var entries []entry + now := time.Now() _ = filepath.WalkDir(s.Root, func(p string, d fs.DirEntry, err error) error { if err != nil || d.IsDir() { return nil @@ -149,11 +171,24 @@ func (s *FileStore) evict() { if err != nil { return nil } - entries = append(entries, entry{p, info.Size(), info.ModTime()}) + storedAt := info.ModTime() + metaPath := p + ".meta" + if mb, err := os.ReadFile(metaPath); err == nil { + var m fileMeta + if err := json.Unmarshal(mb, &m); err == nil && !m.StoredAt.IsZero() { + storedAt = m.StoredAt + } + } + if s.expired(storedAt, now) { + _ = os.Remove(p) + _ = os.Remove(metaPath) + return nil + } + entries = append(entries, entry{path: p, metaPath: metaPath, size: info.Size(), mod: info.ModTime(), storedAt: storedAt}) total += info.Size() return nil }) - if total <= s.MaxBytes { + if s.MaxBytes <= 0 || total <= s.MaxBytes { return } sort.Slice(entries, func(i, j int) bool { @@ -167,7 +202,11 @@ func (s *FileStore) evict() { return } _ = os.Remove(e.path) - _ = os.Remove(e.path + ".meta") + _ = os.Remove(e.metaPath) total -= e.size } } + +func (s *FileStore) expired(storedAt, now time.Time) bool { + return s.MaxAge > 0 && !storedAt.IsZero() && now.Sub(storedAt) > s.MaxAge +} diff --git a/internal/cache/file_test.go b/internal/cache/file_test.go index 1083ed5..f933f63 100644 --- a/internal/cache/file_test.go +++ b/internal/cache/file_test.go @@ -3,8 +3,10 @@ package cache import ( "bytes" "context" + "encoding/json" "errors" "io" + "os" "strings" "testing" "time" @@ -77,3 +79,57 @@ func TestFileStoreEvictsWhenOversize(t *testing.T) { } t.Fatal("expected oldest entry to be evicted") } + +func TestFileStoreMaxAgeExpiresOnGet(t *testing.T) { + store, err := NewFileStoreWithMaxAge(t.TempDir(), 0, time.Nanosecond) + if err != nil { + t.Fatal(err) + } + if err := store.Put(context.Background(), "old", "text/plain", strings.NewReader("payload")); err != nil { + t.Fatal(err) + } + time.Sleep(time.Millisecond) + + _, _, err = store.Get(context.Background(), "old") + if !errors.Is(err, ErrMiss) { + t.Fatalf("err = %v, want miss", err) + } + + dataPath, metaPath := store.paths("old") + if _, err := os.Stat(dataPath); !errors.Is(err, os.ErrNotExist) { + t.Fatalf("data stat err = %v, want not exist", err) + } + if _, err := os.Stat(metaPath); !errors.Is(err, os.ErrNotExist) { + t.Fatalf("meta stat err = %v, want not exist", err) + } +} + +func TestFileStoreMaxAgeEvictsOnPut(t *testing.T) { + store, err := NewFileStoreWithMaxAge(t.TempDir(), 0, time.Hour) + if err != nil { + t.Fatal(err) + } + if err := store.Put(context.Background(), "old", "text/plain", strings.NewReader("old")); err != nil { + t.Fatal(err) + } + dataPath, metaPath := store.paths("old") + oldTime := time.Now().Add(-2 * time.Hour) + _ = os.Chtimes(dataPath, oldTime, oldTime) + meta := fileMeta{ContentType: "text/plain", Size: 3, StoredAt: oldTime} + mb, _ := json.Marshal(meta) + if err := os.WriteFile(metaPath, mb, 0o640); err != nil { + t.Fatal(err) + } + + if err := store.Put(context.Background(), "new", "text/plain", strings.NewReader("new")); err != nil { + t.Fatal(err) + } + if _, _, err := store.Get(context.Background(), "old"); !errors.Is(err, ErrMiss) { + t.Fatalf("old err = %v, want miss", err) + } + if rc, _, err := store.Get(context.Background(), "new"); err != nil { + t.Fatalf("new err = %v", err) + } else { + _ = rc.Close() + } +} diff --git a/internal/config/config.go b/internal/config/config.go index e27cbf0..150917e 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -128,31 +128,26 @@ type FileSource struct { // FileURLMapping maps a URL identifier prefix to a local filesystem root. type FileURLMapping struct { - Prefix string `yaml:"prefix"` - Root string `yaml:"root"` - OCFL bool `yaml:"ocfl"` - AuthProbe bool `yaml:"auth_probe"` - AuthCacheTTL time.Duration `yaml:"auth_cache_ttl"` - AuthAnonymousCacheTTL time.Duration `yaml:"auth_anonymous_cache_ttl"` - AuthAuthenticatedCacheTTL time.Duration `yaml:"auth_authenticated_cache_ttl"` - AuthErrorCacheMinAge time.Duration `yaml:"auth_error_cache_min_age"` - AuthCacheMaxEntries int `yaml:"auth_cache_max_entries"` + Prefix string `yaml:"prefix"` + Root string `yaml:"root"` + OCFL bool `yaml:"ocfl"` + AuthProbe bool `yaml:"auth_probe"` } // HTTPSource resolves identifiers that are HTTP(S) URLs. type HTTPSource struct { - AllowedOrigins []string `yaml:"allowed_origins"` - AllowPrivateHosts bool `yaml:"allow_private_hosts"` - RequestTimeout time.Duration `yaml:"request_timeout"` - MaxBytes ByteSize `yaml:"max_bytes"` - MetadataCacheTTL time.Duration `yaml:"metadata_cache_ttl"` - MetadataCacheMaxEntries int `yaml:"metadata_cache_max_entries"` + AllowedOrigins []string `yaml:"allowed_origins"` + AllowPrivateHosts bool `yaml:"allow_private_hosts"` + RequestTimeout time.Duration `yaml:"request_timeout"` + MaxBytes ByteSize `yaml:"max_bytes"` + MetadataCacheTTL time.Duration `yaml:"metadata_cache_ttl"` } // Cache declares optional derivative-cache settings. type Cache struct { Root string `yaml:"root"` MaxBytes ByteSize `yaml:"max_bytes"` + MaxAge time.Duration `yaml:"max_age"` SourceRoot string `yaml:"source_root"` SourceMaxBytes ByteSize `yaml:"source_max_bytes"` SourceStaleAfter time.Duration `yaml:"source_stale_after"` @@ -436,6 +431,9 @@ func (c *Config) validate() error { if c.Cache.MaxBytes < 0 { return errors.New("cache.max_bytes: must be >= 0") } + if c.Cache.MaxAge < 0 { + return errors.New("cache.max_age: must be >= 0") + } if c.Cache.SourceMaxBytes < 0 { return errors.New("cache.source_max_bytes: must be >= 0") } @@ -455,9 +453,6 @@ func (c *Config) validate() error { if c.Sources.HTTP.MetadataCacheTTL < 0 { return errors.New("sources.http.metadata_cache_ttl: must be >= 0") } - if c.Sources.HTTP.MetadataCacheMaxEntries < 0 { - return errors.New("sources.http.metadata_cache_max_entries: must be >= 0") - } } if c.Sources.File != nil { for _, prefix := range c.Sources.File.URLPrefixes { @@ -481,21 +476,6 @@ func (c *Config) validate() error { if mapping.Root == "" { return fmt.Errorf("sources.file.url_mappings[%d].root is required", i) } - if mapping.AuthCacheTTL < 0 { - return fmt.Errorf("sources.file.url_mappings[%d].auth_cache_ttl: must be >= 0", i) - } - if mapping.AuthAnonymousCacheTTL < 0 { - return fmt.Errorf("sources.file.url_mappings[%d].auth_anonymous_cache_ttl: must be >= 0", i) - } - if mapping.AuthAuthenticatedCacheTTL < 0 { - return fmt.Errorf("sources.file.url_mappings[%d].auth_authenticated_cache_ttl: must be >= 0", i) - } - if mapping.AuthErrorCacheMinAge < 0 { - return fmt.Errorf("sources.file.url_mappings[%d].auth_error_cache_min_age: must be >= 0", i) - } - if mapping.AuthCacheMaxEntries < 0 { - return fmt.Errorf("sources.file.url_mappings[%d].auth_cache_max_entries: must be >= 0", i) - } } } if c.IIIF.Image.Enabled { diff --git a/internal/config/config_test.go b/internal/config/config_test.go index a7b25a5..5325d3a 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -5,6 +5,7 @@ import ( "path/filepath" "strings" "testing" + "time" ) func writeConfig(t *testing.T, body string) string { @@ -95,7 +96,6 @@ sources: allowed_origins: [https://example.org] max_bytes: 1048576 metadata_cache_ttl: 24h - metadata_cache_max_entries: 4096 `, }, { @@ -129,12 +129,9 @@ sources: root: /bar ocfl: true auth_probe: true - auth_anonymous_cache_ttl: 720h - auth_authenticated_cache_ttl: 168h - auth_error_cache_min_age: 5m - auth_cache_max_entries: 4096 http: allowed_origins: [https://repo.example.edu] + metadata_cache_ttl: 5m `, }, { @@ -152,78 +149,6 @@ sources: `, wantErr: "sources.file.url_mappings[0].root is required", }, - { - name: "file url mapping rejects negative auth ttl", - body: ` -server: - public_base_url: http://localhost:8080 -sources: - default: http - file: - url_mappings: - - prefix: https://repo.example.edu/system/files - root: /tmp - auth_probe: true - auth_cache_ttl: -1s - http: - allowed_origins: [https://repo.example.edu] -`, - wantErr: "sources.file.url_mappings[0].auth_cache_ttl", - }, - { - name: "file url mapping rejects negative authenticated auth ttl", - body: ` -server: - public_base_url: http://localhost:8080 -sources: - default: http - file: - url_mappings: - - prefix: https://repo.example.edu/system/files - root: /tmp - auth_probe: true - auth_authenticated_cache_ttl: -1s - http: - allowed_origins: [https://repo.example.edu] -`, - wantErr: "sources.file.url_mappings[0].auth_authenticated_cache_ttl", - }, - { - name: "file url mapping rejects negative auth error cache min age", - body: ` -server: - public_base_url: http://localhost:8080 -sources: - default: http - file: - url_mappings: - - prefix: https://repo.example.edu/system/files - root: /tmp - auth_probe: true - auth_error_cache_min_age: -1s - http: - allowed_origins: [https://repo.example.edu] -`, - wantErr: "sources.file.url_mappings[0].auth_error_cache_min_age", - }, - { - name: "file url mapping rejects negative auth max entries", - body: ` -server: - public_base_url: http://localhost:8080 -sources: - default: http - file: - url_mappings: - - prefix: https://repo.example.edu/system/files - root: /tmp - auth_probe: true - auth_cache_max_entries: -1 - http: - allowed_origins: [https://repo.example.edu] -`, - wantErr: "sources.file.url_mappings[0].auth_cache_max_entries", - }, { name: "file url mapping requires http source", body: ` @@ -337,19 +262,6 @@ sources: `, wantErr: "sources.http.metadata_cache_ttl", }, - { - name: "http source rejects negative metadata cache max entries", - body: ` -server: - public_base_url: http://localhost:8080 -sources: - default: http - http: - allowed_origins: [https://example.org] - metadata_cache_max_entries: -1 -`, - wantErr: "sources.http.metadata_cache_max_entries", - }, { name: "pprof enabled requires token", body: ` @@ -711,6 +623,7 @@ sources: max_bytes: 50MiB cache: max_bytes: 500GiB + max_age: 720h source_max_bytes: 2GB extensions: transform: @@ -732,6 +645,9 @@ extensions: if c.Cache.MaxBytes != 500<<30 { t.Errorf("Cache.MaxBytes = %d", c.Cache.MaxBytes) } + if c.Cache.MaxAge != 720*time.Hour { + t.Errorf("Cache.MaxAge = %s", c.Cache.MaxAge) + } if c.Cache.SourceMaxBytes != 2_000_000_000 { t.Errorf("Cache.SourceMaxBytes = %d", c.Cache.SourceMaxBytes) } @@ -875,6 +791,23 @@ cache: } } +func TestLoadRejectsNegativeCacheMaxAge(t *testing.T) { + path := writeConfig(t, ` +server: + public_base_url: http://localhost:8080 +sources: + default: file + file: + root: /tmp +cache: + max_age: -1s +`) + _, err := Load(path) + if err == nil || !strings.Contains(err.Error(), "cache.max_age") { + t.Fatalf("err = %v, want cache.max_age validation error", err) + } +} + func TestLoadRejectsNegativeSourceStaleAfter(t *testing.T) { path := writeConfig(t, ` server: diff --git a/internal/server/server.go b/internal/server/server.go index 4275d48..fb215cf 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -295,9 +295,8 @@ func buildSource(cfg *config.Config, logger *slog.Logger) (storage.Opener, func( } if cfg.Sources.HTTP.MetadataCacheTTL > 0 { httpOp = &storage.MetaCaching{ - Inner: httpOp, - TTL: cfg.Sources.HTTP.MetadataCacheTTL, - MaxEntries: cfg.Sources.HTTP.MetadataCacheMaxEntries, + Inner: httpOp, + TTL: cfg.Sources.HTTP.MetadataCacheTTL, } } localURLMappings, err := buildLocalURLMappings(cfg, fileOp) @@ -352,15 +351,11 @@ func buildLocalURLMappings(cfg *config.Config, fileOp storage.Opener) ([]storage return nil, err } mappings = append(mappings, storage.LocalURLMapping{ - Prefix: mapping.Prefix, - File: op, - OCFL: mapping.OCFL, - AuthProbe: mapping.AuthProbe, - AuthCacheTTL: mapping.AuthCacheTTL, - AuthAnonymousCacheTTL: mapping.AuthAnonymousCacheTTL, - AuthAuthenticatedCacheTTL: mapping.AuthAuthenticatedCacheTTL, - AuthErrorCacheMinAge: mapping.AuthErrorCacheMinAge, - AuthCacheMaxEntries: mapping.AuthCacheMaxEntries, + Prefix: mapping.Prefix, + File: op, + OCFL: mapping.OCFL, + AuthProbe: mapping.AuthProbe, + AuthCacheTTL: cfg.Sources.HTTP.MetadataCacheTTL, }) } if len(cfg.Sources.File.URLPrefixes) > 0 { @@ -383,7 +378,7 @@ func buildDerivativeCache(cfg *config.Config) (cache.Store, error) { if cfg.Cache.Root == "" { return cache.Noop{}, nil } - return cache.NewFileStore(cfg.Cache.Root, int64(cfg.Cache.MaxBytes)) + return cache.NewFileStoreWithMaxAge(cfg.Cache.Root, int64(cfg.Cache.MaxBytes), cfg.Cache.MaxAge) } func buildSourceCache(cfg *config.Config) (cache.Store, error) { diff --git a/internal/storage/local_url.go b/internal/storage/local_url.go index 740af80..c8ead04 100644 --- a/internal/storage/local_url.go +++ b/internal/storage/local_url.go @@ -44,15 +44,13 @@ type LocalURLFallback struct { // LocalURLMapping maps a URL identifier prefix to a local file source. type LocalURLMapping struct { - Prefix string - File *FileOpener - OCFL bool - AuthProbe bool - AuthCacheTTL time.Duration - AuthAnonymousCacheTTL time.Duration - AuthAuthenticatedCacheTTL time.Duration - AuthErrorCacheMinAge time.Duration - AuthCacheMaxEntries int + Prefix string + File *FileOpener + OCFL bool + AuthProbe bool + AuthCacheTTL time.Duration + AuthErrorCacheMinAge time.Duration + AuthCacheMaxEntries int } type authCacheEntry struct { @@ -69,8 +67,6 @@ type authCacheTier string const ( authCacheTierAnonymous authCacheTier = "anonymous" authCacheTierAuthenticated authCacheTier = "authenticated" - defaultAnonymousAuthTTL = 5 * time.Minute - defaultAuthenticatedAuthTTL = 5 * time.Minute defaultAuthErrorCacheMinAge = 5 * time.Minute defaultAuthCacheMaxEntries = 4096 ) @@ -407,23 +403,11 @@ func cacheableAuthProbeResponse(err error, header http.Header, errorMinAge time. } func (m LocalURLMapping) anonymousAuthTTL() time.Duration { - if m.AuthAnonymousCacheTTL > 0 { - return m.AuthAnonymousCacheTTL - } - if m.AuthCacheTTL > 0 { - return m.AuthCacheTTL - } - return defaultAnonymousAuthTTL + return m.AuthCacheTTL } func (m LocalURLMapping) authenticatedAuthTTL() time.Duration { - if m.AuthAuthenticatedCacheTTL > 0 { - return m.AuthAuthenticatedCacheTTL - } - if m.AuthCacheTTL > 0 { - return m.AuthCacheTTL - } - return defaultAuthenticatedAuthTTL + return m.AuthCacheTTL } func (m LocalURLMapping) authErrorCacheMinAge() time.Duration { diff --git a/internal/storage/local_url_test.go b/internal/storage/local_url_test.go index 13ca687..285d065 100644 --- a/internal/storage/local_url_test.go +++ b/internal/storage/local_url_test.go @@ -494,10 +494,10 @@ func TestLocalURLFallbackAnonymousAuthProbeTTL(t *testing.T) { const ttl = 10 * time.Second op := &LocalURLFallback{ Mappings: []LocalURLMapping{{ - Prefix: "/system/files", - File: fileOp, - AuthProbe: true, - AuthAnonymousCacheTTL: ttl, + Prefix: "/system/files", + File: fileOp, + AuthProbe: true, + AuthCacheTTL: ttl, }}, AllowedOrigins: []string{"https://repo.example.edu"}, Fallback: errOpener{}, @@ -546,15 +546,13 @@ func TestLocalURLFallbackAuthenticatedAuthProbeTTL(t *testing.T) { if err != nil { t.Fatal(err) } - const anonTTL = time.Hour - const authTTL = 10 * time.Second + const ttl = 10 * time.Second op := &LocalURLFallback{ Mappings: []LocalURLMapping{{ - Prefix: "/system/files", - File: fileOp, - AuthProbe: true, - AuthAnonymousCacheTTL: anonTTL, - AuthAuthenticatedCacheTTL: authTTL, + Prefix: "/system/files", + File: fileOp, + AuthProbe: true, + AuthCacheTTL: ttl, }}, AllowedOrigins: []string{"https://repo.example.edu"}, Fallback: errOpener{}, @@ -573,7 +571,7 @@ func TestLocalURLFallbackAuthenticatedAuthProbeTTL(t *testing.T) { t.Fatalf("initial authenticated probes = %d", got) } - time.Sleep(authTTL - time.Nanosecond) + time.Sleep(ttl - time.Nanosecond) synctest.Wait() openAndClose(t, op, ctx, identifier) if got := anonProbes.Load(); got != 1 { @@ -586,7 +584,7 @@ func TestLocalURLFallbackAuthenticatedAuthProbeTTL(t *testing.T) { time.Sleep(2 * time.Nanosecond) synctest.Wait() openAndClose(t, op, ctx, identifier) - if got := anonProbes.Load(); got != 1 { + if got := anonProbes.Load(); got != 2 { t.Fatalf("anonymous probes after auth ttl = %d", got) } if got := authProbes.Load(); got != 2 { @@ -618,10 +616,10 @@ func TestLocalURLFallbackAuthProbeAnonymousSucceedsAndCaches(t *testing.T) { } op := &LocalURLFallback{ Mappings: []LocalURLMapping{{ - Prefix: srv.URL + "/system/files", - File: fileOp, - AuthProbe: true, - AuthAnonymousCacheTTL: time.Minute, + Prefix: srv.URL + "/system/files", + File: fileOp, + AuthProbe: true, + AuthCacheTTL: time.Minute, }}, Fallback: errOpener{}, AuthFallback: testAuthHTTP(t, srv), @@ -737,11 +735,10 @@ func TestLocalURLFallbackAuthProbeFallsBackToCredentialedCache(t *testing.T) { } op := &LocalURLFallback{ Mappings: []LocalURLMapping{{ - Prefix: srv.URL + "/system/files", - File: fileOp, - AuthProbe: true, - AuthAnonymousCacheTTL: time.Minute, - AuthAuthenticatedCacheTTL: time.Minute, + Prefix: srv.URL + "/system/files", + File: fileOp, + AuthProbe: true, + AuthCacheTTL: time.Minute, }}, Fallback: errOpener{}, AuthFallback: testAuthHTTP(t, srv), @@ -780,10 +777,10 @@ func TestLocalURLFallbackAuthProbeCoalescesConcurrentRequests(t *testing.T) { } op := &LocalURLFallback{ Mappings: []LocalURLMapping{{ - Prefix: srv.URL + "/system/files", - File: fileOp, - AuthProbe: true, - AuthAnonymousCacheTTL: time.Minute, + Prefix: srv.URL + "/system/files", + File: fileOp, + AuthProbe: true, + AuthCacheTTL: time.Minute, }}, Fallback: errOpener{}, AuthFallback: testAuthHTTP(t, srv),