From 2cde76b2611a897391040d04fffeb31afd2684a9 Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Fri, 1 May 2026 18:39:50 +0000 Subject: [PATCH 1/2] Handle unsupported source images as client errors --- internal/iiif/image/v3/handler/handler.go | 9 +++ .../iiif/image/v3/handler/handler_test.go | 59 ++++++++++++++++++- internal/iiif/image/v3/pipeline/pipeline.go | 27 ++++++++- 3 files changed, 90 insertions(+), 5 deletions(-) diff --git a/internal/iiif/image/v3/handler/handler.go b/internal/iiif/image/v3/handler/handler.go index d40f681..80a630a 100644 --- a/internal/iiif/image/v3/handler/handler.go +++ b/internal/iiif/image/v3/handler/handler.go @@ -152,6 +152,10 @@ func (h *Handler) serveInfo(w http.ResponseWriter, r *http.Request, identifier s writeError(w, http.StatusBadRequest, err.Error()) return } + if errors.Is(err, pipeline.ErrUnsupportedSource) { + writeError(w, http.StatusUnsupportedMediaType, "unsupported source image") + return + } h.logger.Error("read image dimensions", "identifier", redact.Identifier(identifier), "identifier_hash", redact.Hash(identifier), "err", err) writeError(w, http.StatusInternalServerError, "failed to read image") return @@ -267,6 +271,10 @@ func (h *Handler) serveImage(w http.ResponseWriter, r *http.Request, req parse.R writeError(w, http.StatusBadRequest, err.Error()) return } + if errors.Is(err, pipeline.ErrUnsupportedSource) { + writeError(w, http.StatusUnsupportedMediaType, "unsupported source image") + return + } h.logger.Error("pipeline transform", "identifier", redact.Identifier(req.Identifier), "identifier_hash", redact.Hash(req.Identifier), "err", err) writeError(w, http.StatusInternalServerError, "failed to transform image") return @@ -358,6 +366,7 @@ func (h *Handler) imageDimensions(ctx context.Context, identifier string) (int, params.Access.Set(gv.AccessSequential) img, err := gv.LoadImageFromFileDirect(path, params) if err != nil { + err = pipeline.WrapSourceLoadError("load", err) return 0, 0, fmt.Errorf("vips load %q size=%d content_type=%q mod_time=%s: %w", path, meta.Size, meta.ContentType, meta.ModTime.Format(time.RFC3339Nano), err) } defer img.Close() diff --git a/internal/iiif/image/v3/handler/handler_test.go b/internal/iiif/image/v3/handler/handler_test.go index 2685a72..e25c66b 100644 --- a/internal/iiif/image/v3/handler/handler_test.go +++ b/internal/iiif/image/v3/handler/handler_test.go @@ -211,7 +211,7 @@ func TestImageRequestHasCanonicalLink(t *testing.T) { } } -func TestPipelineErrorUsesGenericResponse(t *testing.T) { +func TestUnsupportedSourceUsesClientError(t *testing.T) { root := t.TempDir() if err := os.WriteFile(filepath.Join(root, "bad.png"), []byte("not an image"), 0o600); err != nil { t.Fatal(err) @@ -248,18 +248,71 @@ func TestPipelineErrorUsesGenericResponse(t *testing.T) { t.Fatal(err) } defer resp.Body.Close() - if resp.StatusCode != http.StatusInternalServerError { + if resp.StatusCode != http.StatusUnsupportedMediaType { t.Fatalf("status = %d", resp.StatusCode) } b, err := io.ReadAll(resp.Body) if err != nil { t.Fatal(err) } - if strings.Contains(string(b), "vips") || !strings.Contains(string(b), "failed to transform image") { + if strings.Contains(string(b), "vips") || !strings.Contains(string(b), "unsupported source image") { t.Fatalf("body = %q", string(b)) } } +func TestInfoUnsupportedSourceUsesClientError(t *testing.T) { + var logs bytes.Buffer + root := t.TempDir() + if err := os.WriteFile(filepath.Join(root, "bad.tiff"), []byte("not an image"), 0o600); err != nil { + t.Fatal(err) + } + op, err := storage.NewFileOpener(root) + if err != nil { + t.Fatal(err) + } + logger := slog.New(slog.NewTextHandler(&logs, nil)) + h := New( + "/iiif/3", + "http://example.test", + op, + pipeline.New(op, pipeline.Limits{MaxOutputPixels: 10_000_000}), + cache.Noop{}, + nil, + "", + nil, + nil, + types.Limits{MaxArea: 10_000_000}, + true, + 250_000_000, + 1<<30, + 2, + logger, + ) + mux := http.NewServeMux() + h.Register(mux) + srv := httptest.NewServer(mux) + defer srv.Close() + + resp, err := http.Get(srv.URL + "/iiif/3/bad.tiff/info.json") + if err != nil { + t.Fatal(err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusUnsupportedMediaType { + t.Fatalf("status = %d", resp.StatusCode) + } + b, err := io.ReadAll(resp.Body) + if err != nil { + t.Fatal(err) + } + if strings.Contains(string(b), "vips") || !strings.Contains(string(b), "unsupported source image") { + t.Fatalf("body = %q", string(b)) + } + if strings.Contains(logs.String(), "read image dimensions") { + t.Fatalf("logs = %q", logs.String()) + } +} + func TestDerivativeCacheFailureWarns(t *testing.T) { var logs bytes.Buffer root := t.TempDir() diff --git a/internal/iiif/image/v3/pipeline/pipeline.go b/internal/iiif/image/v3/pipeline/pipeline.go index f15b0b3..4ca57a6 100644 --- a/internal/iiif/image/v3/pipeline/pipeline.go +++ b/internal/iiif/image/v3/pipeline/pipeline.go @@ -32,6 +32,10 @@ import ( // fulfilled because their resolved parameters violate the Image API rules. var ErrBadRequest = errors.New("pipeline: bad request") +// ErrUnsupportedSource marks source bytes that libvips cannot decode as one of +// Triplet's supported input image formats. +var ErrUnsupportedSource = errors.New("pipeline: unsupported source image") + // Limits caps resource use per request. type Limits struct { // MaxOutputPixels rejects requests whose computed output exceeds this @@ -111,7 +115,7 @@ func (p *Pipeline) Transform(ctx context.Context, req parse.Request, w io.Writer params.Access.Set(p.loadAccess(req)) img, err := gv.LoadImageFromFileDirect(source.Path, params) if err != nil { - return Result{}, tvips.Wrap("govips load", err) + return Result{}, WrapSourceLoadError("govips load", err) } defer func() { if img != nil { @@ -144,7 +148,7 @@ func (p *Pipeline) Transform(ctx context.Context, req parse.Request, w io.Writer params.Page.Set(page) img, err = gv.LoadImageFromFileDirect(source.Path, params) if err != nil { - return Result{}, tvips.Wrap("govips jp2kload page", err) + return Result{}, WrapSourceLoadError("govips jp2kload page", err) } left, top, regionW, regionH = scaleRegionToLoadedPage(left, top, regionW, regionH, dims.width, dims.height, img.Width(), img.Height()) } @@ -275,6 +279,25 @@ func (p *Pipeline) openSource(ctx context.Context, identifier string) (*sourceFi return &sourceFile{Path: tmpPath, Meta: meta, tmpPath: tmpPath}, nil } +// WrapSourceLoadError preserves libvips load details while marking errors that +// mean the source is not a decodable supported image. +func WrapSourceLoadError(op string, err error) error { + if err == nil { + return nil + } + if isUnsupportedSourceLoad(err) { + return fmt.Errorf("%w: vips %s: %w", ErrUnsupportedSource, op, err) + } + return tvips.Wrap(op, err) +} + +func isUnsupportedSourceLoad(err error) bool { + msg := strings.ToLower(err.Error()) + return strings.Contains(msg, "is not a known file format") || + strings.Contains(msg, "no known loader") || + strings.Contains(msg, "operation class") && strings.Contains(msg, "is blocked") +} + func applyColorManagement(img *gv.ImageRef, mode string) error { switch mode { case "", "preserve", "none": From 09d73a49dc09fc611b5354cb9278536b6baba8f1 Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Fri, 1 May 2026 18:41:53 +0000 Subject: [PATCH 2/2] Fix cache miss and range validation issues --- internal/cache/file.go | 4 +- internal/cache/file_test.go | 26 ++++++++++++ internal/iiif/image/v3/handler/handler.go | 6 +-- .../iiif/image/v3/handler/handler_test.go | 17 ++++++++ internal/storage/http.go | 40 +++++++++++++++---- internal/storage/http_test.go | 40 +++++++++++++++++++ 6 files changed, 122 insertions(+), 11 deletions(-) diff --git a/internal/cache/file.go b/internal/cache/file.go index 0b5050f..fa655cc 100644 --- a/internal/cache/file.go +++ b/internal/cache/file.go @@ -96,6 +96,9 @@ func (s *FileStore) Get(_ context.Context, key string) (io.ReadCloser, Entry, er // Put implements Store. func (s *FileStore) Put(_ context.Context, key, contentType string, value io.Reader) error { dataPath, metaPath := s.paths(key) + if err := os.MkdirAll(filepath.Dir(dataPath), 0o750); err != nil { + return err + } tmp, err := os.CreateTemp(s.Root, ".tmp-*") if err != nil { return err @@ -142,7 +145,6 @@ func (s *FileStore) paths(key string) (data, meta string) { hex := hex.EncodeToString(sum[:]) // Two-level fan-out so a single directory doesn't grow unboundedly. dir := filepath.Join(s.Root, hex[:2], hex[2:4]) - _ = os.MkdirAll(dir, 0o750) base := filepath.Join(dir, hex) return base, base + ".meta" } diff --git a/internal/cache/file_test.go b/internal/cache/file_test.go index f933f63..b37e008 100644 --- a/internal/cache/file_test.go +++ b/internal/cache/file_test.go @@ -7,6 +7,7 @@ import ( "errors" "io" "os" + "path/filepath" "strings" "testing" "time" @@ -54,6 +55,31 @@ func TestFileStorePutGetDelete(t *testing.T) { } } +func TestFileStoreMissDoesNotCreateKeyDirectory(t *testing.T) { + root := t.TempDir() + store, err := NewFileStore(root, 0) + if err != nil { + t.Fatal(err) + } + dataPath, _ := store.paths("missing") + dir := filepath.Dir(dataPath) + + _, _, err = store.Get(context.Background(), "missing") + if !errors.Is(err, ErrMiss) { + t.Fatalf("err = %v, want miss", err) + } + if _, err := os.Stat(dir); !errors.Is(err, os.ErrNotExist) { + t.Fatalf("dir stat err = %v, want not exist", err) + } + + if err := store.Delete(context.Background(), "missing"); err != nil { + t.Fatal(err) + } + if _, err := os.Stat(dir); !errors.Is(err, os.ErrNotExist) { + t.Fatalf("dir stat after delete err = %v, want not exist", err) + } +} + func TestFileStoreEvictsWhenOversize(t *testing.T) { store, err := NewFileStore(t.TempDir(), 5) if err != nil { diff --git a/internal/iiif/image/v3/handler/handler.go b/internal/iiif/image/v3/handler/handler.go index 80a630a..2a4c9a5 100644 --- a/internal/iiif/image/v3/handler/handler.go +++ b/internal/iiif/image/v3/handler/handler.go @@ -235,9 +235,6 @@ func (h *Handler) serveImage(w http.ResponseWriter, r *http.Request, req parse.R contentType := contentTypeForFormat(req.Format) w.Header().Set("Content-Type", contentType) w.Header().Set("X-Cache", "miss") - if r.Method == http.MethodHead { - return - } release, err := h.acquireVips(r.Context()) if err != nil { @@ -305,6 +302,9 @@ func (h *Handler) serveImage(w http.ResponseWriter, r *http.Request, req parse.R return } } + if r.Method == http.MethodHead { + return + } if _, err := io.Copy(w, tmp); err != nil { h.logger.Warn("write derivative", "identifier", redact.Identifier(req.Identifier), "identifier_hash", redact.Hash(req.Identifier), "err", err) } diff --git a/internal/iiif/image/v3/handler/handler_test.go b/internal/iiif/image/v3/handler/handler_test.go index e25c66b..966aab6 100644 --- a/internal/iiif/image/v3/handler/handler_test.go +++ b/internal/iiif/image/v3/handler/handler_test.go @@ -444,6 +444,23 @@ func TestUpscaleWithoutCaretReturnsBadRequest(t *testing.T) { } } +func TestImageHeadValidatesPipeline(t *testing.T) { + srv, _ := setupTestServer(t) + defer srv.Close() + req, err := http.NewRequest(http.MethodHead, srv.URL+"/iiif/3/sample.png/full/201,101/0/default.png", nil) + if err != nil { + t.Fatal(err) + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatal(err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusBadRequest { + t.Fatalf("status = %d", resp.StatusCode) + } +} + func TestBadSyntax(t *testing.T) { srv, _ := setupTestServer(t) defer srv.Close() diff --git a/internal/storage/http.go b/internal/storage/http.go index 1ccdc6e..46c1e94 100644 --- a/internal/storage/http.go +++ b/internal/storage/http.go @@ -353,15 +353,37 @@ func httpMeta(header http.Header, size int64) Meta { } func parseContentRangeSize(v string) (int64, bool) { - slash := strings.LastIndexByte(v, '/') - if slash < 0 || slash == len(v)-1 { - return 0, false + _, _, size, ok := parseContentRange(v) + return size, ok +} + +func parseContentRange(v string) (start, end, size int64, ok bool) { + v = strings.TrimSpace(v) + if !strings.HasPrefix(v, "bytes ") { + return 0, 0, 0, false + } + spec := strings.TrimSpace(strings.TrimPrefix(v, "bytes ")) + rangePart, sizePart, found := strings.Cut(spec, "/") + if !found || sizePart == "" || sizePart == "*" { + return 0, 0, 0, false + } + startPart, endPart, found := strings.Cut(rangePart, "-") + if !found || startPart == "" || endPart == "" { + return 0, 0, 0, false } - if v[slash+1:] == "*" { - return 0, false + start, err := strconv.ParseInt(startPart, 10, 64) + if err != nil || start < 0 { + return 0, 0, 0, false } - n, err := strconv.ParseInt(v[slash+1:], 10, 64) - return n, err == nil && n >= 0 + end, err = strconv.ParseInt(endPart, 10, 64) + if err != nil || end < start { + return 0, 0, 0, false + } + size, err = strconv.ParseInt(sizePart, 10, 64) + if err != nil || size < 0 || end >= size { + return 0, 0, 0, false + } + return start, end, size, true } func (h *HTTPOpener) originAllowed(u *url.URL) bool { @@ -500,6 +522,10 @@ func (r *httpRangeReadSeekCloser) Read(p []byte) (int, error) { if resp.StatusCode != http.StatusPartialContent { return 0, fmt.Errorf("http range reader: upstream status %d", resp.StatusCode) } + rangeStart, rangeEnd, rangeSize, ok := parseContentRange(resp.Header.Get("Content-Range")) + if !ok || rangeStart != r.off || rangeEnd != end || rangeSize != r.size { + return 0, fmt.Errorf("http range reader: invalid content-range %q for bytes=%d-%d/%d", resp.Header.Get("Content-Range"), r.off, end, r.size) + } n, err := io.ReadFull(resp.Body, p) r.off += int64(n) if err == io.ErrUnexpectedEOF { diff --git a/internal/storage/http_test.go b/internal/storage/http_test.go index c5eb7f1..624d551 100644 --- a/internal/storage/http_test.go +++ b/internal/storage/http_test.go @@ -196,6 +196,46 @@ func TestHTTPOpenerUsesRangeRequests(t *testing.T) { } } +func TestHTTPOpenerRejectsMismatchedContentRange(t *testing.T) { + body := []byte("0123456789") + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + rng := r.Header.Get("Range") + if rng == "" { + t.Fatal("expected range request") + } + start, end := parseTestRange(t, rng) + if end >= int64(len(body)) { + end = int64(len(body)) - 1 + } + w.Header().Set("Content-Type", "image/png") + if rng == "bytes=0-0" { + w.Header().Set("Content-Range", "bytes 0-0/"+strconv.Itoa(len(body))) + w.WriteHeader(http.StatusPartialContent) + _, _ = w.Write(body[:1]) + return + } + w.Header().Set("Content-Range", "bytes 0-"+strconv.FormatInt(end-start, 10)+"/"+strconv.Itoa(len(body))) + w.WriteHeader(http.StatusPartialContent) + _, _ = w.Write(body[start : end+1]) + })) + defer srv.Close() + + op := NewHTTPOpener([]string{srv.URL}, 5*time.Second, 0) + op.AllowPrivateHosts = true + rc, _, err := op.Open(context.Background(), srv.URL+"/range") + if err != nil { + t.Fatal(err) + } + defer rc.Close() + if _, err := rc.Seek(5, io.SeekStart); err != nil { + t.Fatal(err) + } + buf := make([]byte, 2) + if _, err := rc.Read(buf); err == nil || !strings.Contains(err.Error(), "invalid content-range") { + t.Fatalf("err = %v, want invalid content-range", err) + } +} + func TestHTTPOpenerMetaUsesHEAD(t *testing.T) { body := []byte("0123456789") var gotMethods []string