Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 40 additions & 9 deletions xrspatial/geotiff/_encode.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,27 +249,58 @@ def _reject_disagreeing_photometric_override(
# Predictor helpers
# ---------------------------------------------------------------------------

# Compression codecs that do NOT honor the TIFF PREDICTOR tag. The
# horizontal / floating-point predictor rearranges pixel bytes assuming
# the downstream codec is a byte-oriented entropy coder (deflate, lzw,
# zstd, lz4, packbits) that a spec-compliant reader will run the inverse
# predictor over. JPEG, JPEG2000 and LERC do their own pixel modelling,
# so GDAL never applies PREDICTOR for them. Differencing the bytes anyway
# writes a file whose pixels are corrupted to every reader except one that
# happens to invert the predictor symmetrically (see issue #3371).
_PREDICTOR_INCOMPATIBLE_COMPRESSION = {
COMPRESSION_JPEG: 'jpeg',
COMPRESSION_JPEG2000: 'jpeg2000',
COMPRESSION_LERC: 'lerc',
}


def normalize_predictor(predictor, dtype, compression: int) -> int:
"""Normalize a user-supplied predictor value to a TIFF predictor int.

Accepts ``False``/``True`` (legacy) and integers ``1``/``2``/``3``.
Returns ``1`` (no predictor), ``2`` (horizontal differencing), or ``3``
(floating-point predictor).

A predictor other than 1 combined with a codec that does not honor the
PREDICTOR tag (jpeg / jpeg2000 / lerc) is rejected with ``ValueError``.
"""
if predictor is False or predictor == 0:
return 1
if predictor is True or predictor == 2:
return 2
if predictor == 1:
return 1
if predictor == 3:
resolved = 1
elif predictor is True or predictor == 2:
resolved = 2
elif predictor == 1:
resolved = 1
elif predictor == 3:
if np.dtype(dtype).kind != 'f':
raise ValueError(
"predictor=3 (floating-point) requires float data, "
f"got dtype={np.dtype(dtype)}")
return 3
raise ValueError(
f"predictor must be False/True or 1/2/3, got {predictor!r}")
resolved = 3
else:
raise ValueError(
f"predictor must be False/True or 1/2/3, got {predictor!r}")

if resolved != 1 and compression in _PREDICTOR_INCOMPATIBLE_COMPRESSION:
codec = _PREDICTOR_INCOMPATIBLE_COMPRESSION[compression]
raise ValueError(
f"predictor={resolved} is not supported with "
f"compression={codec!r}. The TIFF PREDICTOR tag only applies to "
f"byte-oriented codecs (deflate, lzw, zstd, lz4, packbits); "
f"{codec!r} does its own pixel modelling and GDAL ignores the "
f"tag, so differencing the bytes corrupts the pixels for "
f"spec-compliant readers. Drop predictor= (or set predictor=1) "
f"for this codec.")
return resolved


def _apply_predictor_encode(buf: np.ndarray, predictor: int,
Expand Down
89 changes: 89 additions & 0 deletions xrspatial/geotiff/tests/unit/test_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1234,3 +1234,92 @@ def test_normalize_predictor_table():

with pytest.raises(ValueError, match="predictor must be"):
normalize_predictor(99, f32, deflate)


# ---------------------------------------------------------------------------
# Predictor vs. codec compatibility (issue #3371). The TIFF PREDICTOR tag
# only applies to byte-oriented entropy codecs. jpeg / jpeg2000 / lerc do
# their own pixel modelling and ignore the tag, so differencing the bytes
# first writes a file that no spec-compliant reader can recover.
# ---------------------------------------------------------------------------


def test_normalize_predictor_rejects_predictor_codec_mismatch():
"""predictor 2/3 + jpeg/jpeg2000/lerc raises; byte codecs are fine."""
from xrspatial.geotiff._compression import (COMPRESSION_DEFLATE, COMPRESSION_JPEG,
COMPRESSION_JPEG2000, COMPRESSION_LERC,
COMPRESSION_LZ4, COMPRESSION_LZW,
COMPRESSION_PACKBITS, COMPRESSION_ZSTD)
f32 = np.dtype("float32")
u16 = np.dtype("uint16")

incompatible = {
"jpeg": COMPRESSION_JPEG,
"jpeg2000": COMPRESSION_JPEG2000,
"lerc": COMPRESSION_LERC,
}
for name, tag in incompatible.items():
with pytest.raises(ValueError, match=f"compression={name!r}"):
normalize_predictor(2, u16, tag)
with pytest.raises(ValueError, match=f"compression={name!r}"):
normalize_predictor(3, f32, tag)

# predictor=1 (the default for these codecs) is always allowed.
for tag in incompatible.values():
assert normalize_predictor(1, u16, tag) == 1
assert normalize_predictor(False, u16, tag) == 1

# Byte-oriented codecs keep honouring the predictor.
for tag in (COMPRESSION_DEFLATE, COMPRESSION_LZW, COMPRESSION_ZSTD,
COMPRESSION_LZ4, COMPRESSION_PACKBITS):
assert normalize_predictor(2, u16, tag) == 2


@pytest.mark.parametrize("codec", ["jpeg2000", "j2k", "lerc"])
def test_to_geotiff_rejects_predictor2_with_codec(tmp_path, codec):
"""Eager writer refuses predictor=2 with jpeg2000/lerc (issue #3371)."""
arr = (np.arange(64 * 64, dtype=np.uint16).reshape(64, 64) % 1000)
da = _da_xy(arr)
path = tmp_path / f"pred2_{codec}_3371.tif"
with pytest.raises(ValueError, match="PREDICTOR"):
to_geotiff(da, str(path), compression=codec, predictor=2,
allow_experimental_codecs=True)


@pytest.mark.parametrize("codec", ["jpeg2000", "lerc"])
def test_to_geotiff_rejects_predictor3_with_codec(tmp_path, codec):
"""Eager writer refuses predictor=3 (float) with jpeg2000/lerc."""
arr = _smooth_float((64, 64), np.float32)
da = _da_xy(arr)
path = tmp_path / f"pred3_{codec}_3371.tif"
with pytest.raises(ValueError, match="PREDICTOR"):
to_geotiff(da, str(path), compression=codec, predictor=3,
allow_experimental_codecs=True)


@pytest.mark.parametrize("codec", ["deflate", "lzw", "zstd"])
def test_to_geotiff_predictor2_byte_codec_round_trips(tmp_path, codec):
"""The gate is codec-scoped: byte-oriented codecs still round-trip."""
arr = (np.arange(64 * 64, dtype=np.uint16).reshape(64, 64) % 1000)
da = _da_xy(arr)
path = tmp_path / f"pred2_{codec}_3371.tif"
to_geotiff(da, str(path), compression=codec, predictor=2)
rt, _ = read_to_array(str(path))
np.testing.assert_array_equal(rt, arr)


@_gpu_only
@pytest.mark.parametrize("codec", ["jpeg2000", "lerc"])
def test_gpu_writer_rejects_predictor2_with_codec(tmp_path, codec):
"""GPU writer shares normalize_predictor, so the gate fires there too."""
import cupy

from xrspatial.geotiff import _write_geotiff_gpu

arr = (np.arange(64 * 64, dtype=np.uint16).reshape(64, 64) % 1000)
da = _da_xy(arr.copy())
da = da.copy(data=cupy.asarray(da.data))
path = tmp_path / f"gpu_pred2_{codec}_3371.tif"
with pytest.raises(ValueError, match="PREDICTOR"):
_write_geotiff_gpu(da, str(path), compression=codec, predictor=2,
allow_experimental_codecs=True)
Loading