Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions docs/examples/sentinel2-tci-to-geozarr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import zarr
from async_geotiff import GeoTIFF
from geozarr_toolkit import (
MultiscalesConventionMetadata,
ProjConventionMetadata,
SpatialConventionMetadata,
create_multiscales_layout,
create_proj_attrs,
create_spatial_attrs,
create_zarr_conventions,
)
from obstore.store import S3Store
from zarr.storage import LocalStore

DIMENSIONS = ("band", "Y", "X")


store = S3Store("sentinel-cogs", region="us-west-2", skip_signature=True)
# path = "sentinel-s2-l2a-cogs/12/S/UF/2022/6/S2B_12SUF_20220609_0_L2A/TCI.tif"
path = "sentinel-s2-l2a-cogs/18/T/WL/2026/1/S2B_18TWL_20260101_0_L2A/TCI.tif"

geotiff = await GeoTIFF.open(path, store=store)

proj_attrs = create_proj_attrs(code=f"EPSG:{geotiff.crs.to_epsg()}")
spatial_attrs = create_spatial_attrs(
dimensions=["Y", "X"],
bbox=geotiff.bounds,
)

# Build multiscales layout from the COG's overviews
# The base (full-resolution) image is level 0; each overview is a coarser level.
levels = [
{"asset": "0", "transform": {"scale": [1.0, 1.0], "translation": [0.0, 0.0]}},
]
for i, overview in enumerate(geotiff.overviews):
ov_res = overview.transform.a
scale_factor_x = overview.transform.a / geotiff.transform.a
scale_factor_y = overview.transform.e / geotiff.transform.e
Comment on lines +37 to +38

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a change from the example on main. I think it's best to always calculate the scale factor in terms of both x and y, in case it differs. It does differ in some cases (such as in an NAIP sample image I use often) due to pixel rounding for overviews.

levels.append(
{
"asset": str(i + 1),
"derived_from": "0",
"transform": {
"scale": [scale_factor_x, scale_factor_y],
"translation": [0.0, 0.0],
},
}
)

multiscales_attrs = create_multiscales_layout(levels)
multiscales_attrs["multiscales"]["layout"][0]["spatial:transform"] = geotiff.transform[:6]
multiscales_attrs["multiscales"]["layout"][0]["spatial:shape"] = geotiff.shape
for item, overview in zip(
multiscales_attrs["multiscales"]["layout"][1:], geotiff.overviews, strict=True
):
item["spatial:transform"] = overview.transform[:6]
item["spatial:shape"] = overview.height, overview.width


zarr_conventions = create_zarr_conventions(
MultiscalesConventionMetadata(),
ProjConventionMetadata(),
SpatialConventionMetadata(),
)

geozarr_attrs = {
**proj_attrs,
**spatial_attrs,
**multiscales_attrs,
"zarr_conventions": zarr_conventions,
}

local_path = "data/TCI.zarr"
zarr_store = LocalStore(local_path)

root: zarr.Group = zarr.open_group(zarr_store, mode="w", zarr_format=3)

# Set convention attributes on the group
root.attrs.update(geozarr_attrs)

# Write the full-resolution image as level "0"
base_array = await geotiff.read()
root.create_array(
"0",
data=base_array.data,
chunks=(3, 512, 512),
dimension_names=DIMENSIONS,
)
print(f"Level 0 (base): shape={base_array.data.shape}, dtype={base_array.data.dtype}")

# Write each overview as a separate level
for i, overview in enumerate(geotiff.overviews):
ov_array = await overview.read()
root.create_array(
str(i + 1),
data=ov_array.data,
chunks=(3, 512, 512),
dimension_names=DIMENSIONS,
)
print(f"Level {i + 1} (overview): shape={ov_array.data.shape}")