diff --git a/examples/data_pipeline/README.md b/examples/data_pipeline/README.md new file mode 100644 index 00000000..95220e95 --- /dev/null +++ b/examples/data_pipeline/README.md @@ -0,0 +1,24 @@ +# Data Pipeline Example for Weather Routing Tool + +This example demonstrates how to efficiently handle and process weather datasets using scalable techniques. + +## Overview + +Working with large weather datasets can be memory-intensive and slow when using traditional loading methods. This example shows how to improve performance and scalability by using: + +- Chunked data loading with Dask +- Subsetting and interpolation +- Efficient storage formats (NetCDF and Zarr) +- Basic performance comparison + +## Features + +- Load dataset using chunking (`xarray + Dask`) +- Subset a specific geographic region +- Interpolate data at a given location +- Save processed data in: + - NetCDF format + - Zarr format +- Compare execution time for different storage methods + +## File Structure diff --git a/examples/data_pipeline/analysis.py b/examples/data_pipeline/analysis.py new file mode 100644 index 00000000..22a2acac --- /dev/null +++ b/examples/data_pipeline/analysis.py @@ -0,0 +1,81 @@ +import xarray as xr +import time +import os + +# ------------------------------- +# LOAD DATASET WITH CHUNKING +# ------------------------------- +print("\n--- Loading Dataset ---") +ds = xr.open_dataset("weather.nc", chunks={"time": 10}) + +print(ds) + +# ------------------------------- +# SUBSETTING REGION +# ------------------------------- +ds = ds.sel( + latitude=slice(10, 20), + longitude=slice(80, 90) +) + +print("\n--- After Subsetting ---") +print(ds) + +# ------------------------------- +# INTERPOLATION +# ------------------------------- +ds = ds.interp(latitude=15, longitude=85) + +print("\n--- After Interpolation ---") +print(ds) + +# ------------------------------- +# SAVE AS NETCDF +# ------------------------------- +print("\n--- Saving as NetCDF ---") +start = time.time() + +ds.to_netcdf("output.nc") + +end = time.time() +netcdf_time = end - start + +print("NetCDF Save Time:", netcdf_time, "seconds") + +# ------------------------------- +# SAVE AS ZARR +# ------------------------------- +print("\n--- Saving as Zarr ---") +start = time.time() + +ds.to_zarr("output.zarr", mode="w") + +end = time.time() +zarr_time = end - start + +print("Zarr Save Time:", zarr_time, "seconds") + +# ------------------------------- +# FILE SIZE (NetCDF) +# ------------------------------- +if os.path.exists("output.nc"): + nc_size = os.path.getsize("output.nc") / (1024 * 1024) + print("\nNetCDF File Size:", round(nc_size, 2), "MB") + +# ------------------------------- +# VARIABLES INFO +# ------------------------------- +print("\n--- Dataset Variables ---") +print(list(ds.data_vars)) + + +print("\n--- Performance Summary ---") +print("NetCDF Time:", netcdf_time, "seconds") +print("Zarr Time:", zarr_time, "seconds") + +if zarr_time < netcdf_time: + print("Zarr is faster for this dataset") +else: + print("NetCDF is faster for this dataset") + +print("\n✔ Data pipeline example completed successfully") \ No newline at end of file diff --git a/examples/data_pipeline/output.nc b/examples/data_pipeline/output.nc new file mode 100644 index 00000000..374c731c Binary files /dev/null and b/examples/data_pipeline/output.nc differ diff --git a/examples/data_pipeline/output.zarr/Pressure_reduced_to_MSL_msl/c/0 b/examples/data_pipeline/output.zarr/Pressure_reduced_to_MSL_msl/c/0 new file mode 100644 index 00000000..20fe8584 Binary files /dev/null and b/examples/data_pipeline/output.zarr/Pressure_reduced_to_MSL_msl/c/0 differ diff --git a/examples/data_pipeline/output.zarr/Pressure_reduced_to_MSL_msl/c/1 b/examples/data_pipeline/output.zarr/Pressure_reduced_to_MSL_msl/c/1 new file mode 100644 index 00000000..20fe8584 Binary files /dev/null and b/examples/data_pipeline/output.zarr/Pressure_reduced_to_MSL_msl/c/1 differ diff --git a/examples/data_pipeline/output.zarr/Pressure_reduced_to_MSL_msl/c/2 b/examples/data_pipeline/output.zarr/Pressure_reduced_to_MSL_msl/c/2 new file mode 100644 index 00000000..b264fbaa Binary files /dev/null and b/examples/data_pipeline/output.zarr/Pressure_reduced_to_MSL_msl/c/2 differ diff --git a/examples/data_pipeline/output.zarr/Pressure_reduced_to_MSL_msl/zarr.json b/examples/data_pipeline/output.zarr/Pressure_reduced_to_MSL_msl/zarr.json new file mode 100644 index 00000000..a8572d60 --- /dev/null +++ b/examples/data_pipeline/output.zarr/Pressure_reduced_to_MSL_msl/zarr.json @@ -0,0 +1,46 @@ +{ + "shape": [ + 25 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 10 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": "NaN", + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "coordinates": "latitude longitude", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "time" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/examples/data_pipeline/output.zarr/Temperature_surface/c/0 b/examples/data_pipeline/output.zarr/Temperature_surface/c/0 new file mode 100644 index 00000000..d02be703 Binary files /dev/null and b/examples/data_pipeline/output.zarr/Temperature_surface/c/0 differ diff --git a/examples/data_pipeline/output.zarr/Temperature_surface/c/1 b/examples/data_pipeline/output.zarr/Temperature_surface/c/1 new file mode 100644 index 00000000..d02be703 Binary files /dev/null and b/examples/data_pipeline/output.zarr/Temperature_surface/c/1 differ diff --git a/examples/data_pipeline/output.zarr/Temperature_surface/c/2 b/examples/data_pipeline/output.zarr/Temperature_surface/c/2 new file mode 100644 index 00000000..f6c84711 Binary files /dev/null and b/examples/data_pipeline/output.zarr/Temperature_surface/c/2 differ diff --git a/examples/data_pipeline/output.zarr/Temperature_surface/zarr.json b/examples/data_pipeline/output.zarr/Temperature_surface/zarr.json new file mode 100644 index 00000000..a8572d60 --- /dev/null +++ b/examples/data_pipeline/output.zarr/Temperature_surface/zarr.json @@ -0,0 +1,46 @@ +{ + "shape": [ + 25 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 10 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": "NaN", + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "coordinates": "latitude longitude", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "time" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/examples/data_pipeline/output.zarr/VHM0/c/0 b/examples/data_pipeline/output.zarr/VHM0/c/0 new file mode 100644 index 00000000..0c734383 Binary files /dev/null and b/examples/data_pipeline/output.zarr/VHM0/c/0 differ diff --git a/examples/data_pipeline/output.zarr/VHM0/c/1 b/examples/data_pipeline/output.zarr/VHM0/c/1 new file mode 100644 index 00000000..0c734383 Binary files /dev/null and b/examples/data_pipeline/output.zarr/VHM0/c/1 differ diff --git a/examples/data_pipeline/output.zarr/VHM0/c/2 b/examples/data_pipeline/output.zarr/VHM0/c/2 new file mode 100644 index 00000000..d2411acf Binary files /dev/null and b/examples/data_pipeline/output.zarr/VHM0/c/2 differ diff --git a/examples/data_pipeline/output.zarr/VHM0/zarr.json b/examples/data_pipeline/output.zarr/VHM0/zarr.json new file mode 100644 index 00000000..a8572d60 --- /dev/null +++ b/examples/data_pipeline/output.zarr/VHM0/zarr.json @@ -0,0 +1,46 @@ +{ + "shape": [ + 25 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 10 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": "NaN", + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "coordinates": "latitude longitude", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "time" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/examples/data_pipeline/output.zarr/VMDR/c/0 b/examples/data_pipeline/output.zarr/VMDR/c/0 new file mode 100644 index 00000000..0c734383 Binary files /dev/null and b/examples/data_pipeline/output.zarr/VMDR/c/0 differ diff --git a/examples/data_pipeline/output.zarr/VMDR/c/1 b/examples/data_pipeline/output.zarr/VMDR/c/1 new file mode 100644 index 00000000..0c734383 Binary files /dev/null and b/examples/data_pipeline/output.zarr/VMDR/c/1 differ diff --git a/examples/data_pipeline/output.zarr/VMDR/c/2 b/examples/data_pipeline/output.zarr/VMDR/c/2 new file mode 100644 index 00000000..d2411acf Binary files /dev/null and b/examples/data_pipeline/output.zarr/VMDR/c/2 differ diff --git a/examples/data_pipeline/output.zarr/VMDR/zarr.json b/examples/data_pipeline/output.zarr/VMDR/zarr.json new file mode 100644 index 00000000..a8572d60 --- /dev/null +++ b/examples/data_pipeline/output.zarr/VMDR/zarr.json @@ -0,0 +1,46 @@ +{ + "shape": [ + 25 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 10 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": "NaN", + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "coordinates": "latitude longitude", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "time" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/examples/data_pipeline/output.zarr/VTPK/c/0 b/examples/data_pipeline/output.zarr/VTPK/c/0 new file mode 100644 index 00000000..c6884615 Binary files /dev/null and b/examples/data_pipeline/output.zarr/VTPK/c/0 differ diff --git a/examples/data_pipeline/output.zarr/VTPK/c/1 b/examples/data_pipeline/output.zarr/VTPK/c/1 new file mode 100644 index 00000000..c6884615 Binary files /dev/null and b/examples/data_pipeline/output.zarr/VTPK/c/1 differ diff --git a/examples/data_pipeline/output.zarr/VTPK/c/2 b/examples/data_pipeline/output.zarr/VTPK/c/2 new file mode 100644 index 00000000..149ca768 Binary files /dev/null and b/examples/data_pipeline/output.zarr/VTPK/c/2 differ diff --git a/examples/data_pipeline/output.zarr/VTPK/zarr.json b/examples/data_pipeline/output.zarr/VTPK/zarr.json new file mode 100644 index 00000000..a8572d60 --- /dev/null +++ b/examples/data_pipeline/output.zarr/VTPK/zarr.json @@ -0,0 +1,46 @@ +{ + "shape": [ + 25 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 10 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": "NaN", + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "coordinates": "latitude longitude", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "time" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/examples/data_pipeline/output.zarr/depth/zarr.json b/examples/data_pipeline/output.zarr/depth/zarr.json new file mode 100644 index 00000000..44ef04ca --- /dev/null +++ b/examples/data_pipeline/output.zarr/depth/zarr.json @@ -0,0 +1,43 @@ +{ + "shape": [ + 1 + ], + "data_type": "int32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": {}, + "dimension_names": [ + "depth" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/examples/data_pipeline/output.zarr/height_above_ground/c/0 b/examples/data_pipeline/output.zarr/height_above_ground/c/0 new file mode 100644 index 00000000..cab75e9f Binary files /dev/null and b/examples/data_pipeline/output.zarr/height_above_ground/c/0 differ diff --git a/examples/data_pipeline/output.zarr/height_above_ground/zarr.json b/examples/data_pipeline/output.zarr/height_above_ground/zarr.json new file mode 100644 index 00000000..32b5fee1 --- /dev/null +++ b/examples/data_pipeline/output.zarr/height_above_ground/zarr.json @@ -0,0 +1,43 @@ +{ + "shape": [ + 1 + ], + "data_type": "int32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": {}, + "dimension_names": [ + "height_above_ground" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/examples/data_pipeline/output.zarr/latitude/c b/examples/data_pipeline/output.zarr/latitude/c new file mode 100644 index 00000000..538ee5de Binary files /dev/null and b/examples/data_pipeline/output.zarr/latitude/c differ diff --git a/examples/data_pipeline/output.zarr/latitude/zarr.json b/examples/data_pipeline/output.zarr/latitude/zarr.json new file mode 100644 index 00000000..429f228a --- /dev/null +++ b/examples/data_pipeline/output.zarr/latitude/zarr.json @@ -0,0 +1,36 @@ +{ + "shape": [], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": {}, + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/examples/data_pipeline/output.zarr/longitude/c b/examples/data_pipeline/output.zarr/longitude/c new file mode 100644 index 00000000..45a2a882 Binary files /dev/null and b/examples/data_pipeline/output.zarr/longitude/c differ diff --git a/examples/data_pipeline/output.zarr/longitude/zarr.json b/examples/data_pipeline/output.zarr/longitude/zarr.json new file mode 100644 index 00000000..429f228a --- /dev/null +++ b/examples/data_pipeline/output.zarr/longitude/zarr.json @@ -0,0 +1,36 @@ +{ + "shape": [], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": {}, + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/examples/data_pipeline/output.zarr/so/c/0/0 b/examples/data_pipeline/output.zarr/so/c/0/0 new file mode 100644 index 00000000..0c734383 Binary files /dev/null and b/examples/data_pipeline/output.zarr/so/c/0/0 differ diff --git a/examples/data_pipeline/output.zarr/so/c/1/0 b/examples/data_pipeline/output.zarr/so/c/1/0 new file mode 100644 index 00000000..0c734383 Binary files /dev/null and b/examples/data_pipeline/output.zarr/so/c/1/0 differ diff --git a/examples/data_pipeline/output.zarr/so/c/2/0 b/examples/data_pipeline/output.zarr/so/c/2/0 new file mode 100644 index 00000000..d2411acf Binary files /dev/null and b/examples/data_pipeline/output.zarr/so/c/2/0 differ diff --git a/examples/data_pipeline/output.zarr/so/zarr.json b/examples/data_pipeline/output.zarr/so/zarr.json new file mode 100644 index 00000000..21d977b4 --- /dev/null +++ b/examples/data_pipeline/output.zarr/so/zarr.json @@ -0,0 +1,49 @@ +{ + "shape": [ + 25, + 1 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 10, + 1 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": "NaN", + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "coordinates": "latitude longitude", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "time", + "depth" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/examples/data_pipeline/output.zarr/thetao/c/0/0 b/examples/data_pipeline/output.zarr/thetao/c/0/0 new file mode 100644 index 00000000..0c734383 Binary files /dev/null and b/examples/data_pipeline/output.zarr/thetao/c/0/0 differ diff --git a/examples/data_pipeline/output.zarr/thetao/c/1/0 b/examples/data_pipeline/output.zarr/thetao/c/1/0 new file mode 100644 index 00000000..0c734383 Binary files /dev/null and b/examples/data_pipeline/output.zarr/thetao/c/1/0 differ diff --git a/examples/data_pipeline/output.zarr/thetao/c/2/0 b/examples/data_pipeline/output.zarr/thetao/c/2/0 new file mode 100644 index 00000000..d2411acf Binary files /dev/null and b/examples/data_pipeline/output.zarr/thetao/c/2/0 differ diff --git a/examples/data_pipeline/output.zarr/thetao/zarr.json b/examples/data_pipeline/output.zarr/thetao/zarr.json new file mode 100644 index 00000000..21d977b4 --- /dev/null +++ b/examples/data_pipeline/output.zarr/thetao/zarr.json @@ -0,0 +1,49 @@ +{ + "shape": [ + 25, + 1 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 10, + 1 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": "NaN", + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "coordinates": "latitude longitude", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "time", + "depth" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/examples/data_pipeline/output.zarr/time/c/0 b/examples/data_pipeline/output.zarr/time/c/0 new file mode 100644 index 00000000..e8fe47d5 Binary files /dev/null and b/examples/data_pipeline/output.zarr/time/c/0 differ diff --git a/examples/data_pipeline/output.zarr/time/zarr.json b/examples/data_pipeline/output.zarr/time/zarr.json new file mode 100644 index 00000000..58f9cbf5 --- /dev/null +++ b/examples/data_pipeline/output.zarr/time/zarr.json @@ -0,0 +1,46 @@ +{ + "shape": [ + 25 + ], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 25 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "units": "hours since 2024-01-01", + "calendar": "proleptic_gregorian" + }, + "dimension_names": [ + "time" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/examples/data_pipeline/output.zarr/u-component_of_wind_height_above_ground/c/0/0 b/examples/data_pipeline/output.zarr/u-component_of_wind_height_above_ground/c/0/0 new file mode 100644 index 00000000..0c734383 Binary files /dev/null and b/examples/data_pipeline/output.zarr/u-component_of_wind_height_above_ground/c/0/0 differ diff --git a/examples/data_pipeline/output.zarr/u-component_of_wind_height_above_ground/c/1/0 b/examples/data_pipeline/output.zarr/u-component_of_wind_height_above_ground/c/1/0 new file mode 100644 index 00000000..0c734383 Binary files /dev/null and b/examples/data_pipeline/output.zarr/u-component_of_wind_height_above_ground/c/1/0 differ diff --git a/examples/data_pipeline/output.zarr/u-component_of_wind_height_above_ground/c/2/0 b/examples/data_pipeline/output.zarr/u-component_of_wind_height_above_ground/c/2/0 new file mode 100644 index 00000000..d2411acf Binary files /dev/null and b/examples/data_pipeline/output.zarr/u-component_of_wind_height_above_ground/c/2/0 differ diff --git a/examples/data_pipeline/output.zarr/u-component_of_wind_height_above_ground/zarr.json b/examples/data_pipeline/output.zarr/u-component_of_wind_height_above_ground/zarr.json new file mode 100644 index 00000000..71cca17f --- /dev/null +++ b/examples/data_pipeline/output.zarr/u-component_of_wind_height_above_ground/zarr.json @@ -0,0 +1,49 @@ +{ + "shape": [ + 25, + 1 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 10, + 1 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": "NaN", + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "coordinates": "latitude longitude", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "time", + "height_above_ground" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/examples/data_pipeline/output.zarr/utotal/c/0/0 b/examples/data_pipeline/output.zarr/utotal/c/0/0 new file mode 100644 index 00000000..0c734383 Binary files /dev/null and b/examples/data_pipeline/output.zarr/utotal/c/0/0 differ diff --git a/examples/data_pipeline/output.zarr/utotal/c/1/0 b/examples/data_pipeline/output.zarr/utotal/c/1/0 new file mode 100644 index 00000000..0c734383 Binary files /dev/null and b/examples/data_pipeline/output.zarr/utotal/c/1/0 differ diff --git a/examples/data_pipeline/output.zarr/utotal/c/2/0 b/examples/data_pipeline/output.zarr/utotal/c/2/0 new file mode 100644 index 00000000..d2411acf Binary files /dev/null and b/examples/data_pipeline/output.zarr/utotal/c/2/0 differ diff --git a/examples/data_pipeline/output.zarr/utotal/zarr.json b/examples/data_pipeline/output.zarr/utotal/zarr.json new file mode 100644 index 00000000..21d977b4 --- /dev/null +++ b/examples/data_pipeline/output.zarr/utotal/zarr.json @@ -0,0 +1,49 @@ +{ + "shape": [ + 25, + 1 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 10, + 1 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": "NaN", + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "coordinates": "latitude longitude", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "time", + "depth" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/examples/data_pipeline/output.zarr/v-component_of_wind_height_above_ground/c/0/0 b/examples/data_pipeline/output.zarr/v-component_of_wind_height_above_ground/c/0/0 new file mode 100644 index 00000000..0c734383 Binary files /dev/null and b/examples/data_pipeline/output.zarr/v-component_of_wind_height_above_ground/c/0/0 differ diff --git a/examples/data_pipeline/output.zarr/v-component_of_wind_height_above_ground/c/1/0 b/examples/data_pipeline/output.zarr/v-component_of_wind_height_above_ground/c/1/0 new file mode 100644 index 00000000..0c734383 Binary files /dev/null and b/examples/data_pipeline/output.zarr/v-component_of_wind_height_above_ground/c/1/0 differ diff --git a/examples/data_pipeline/output.zarr/v-component_of_wind_height_above_ground/c/2/0 b/examples/data_pipeline/output.zarr/v-component_of_wind_height_above_ground/c/2/0 new file mode 100644 index 00000000..d2411acf Binary files /dev/null and b/examples/data_pipeline/output.zarr/v-component_of_wind_height_above_ground/c/2/0 differ diff --git a/examples/data_pipeline/output.zarr/v-component_of_wind_height_above_ground/zarr.json b/examples/data_pipeline/output.zarr/v-component_of_wind_height_above_ground/zarr.json new file mode 100644 index 00000000..71cca17f --- /dev/null +++ b/examples/data_pipeline/output.zarr/v-component_of_wind_height_above_ground/zarr.json @@ -0,0 +1,49 @@ +{ + "shape": [ + 25, + 1 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 10, + 1 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": "NaN", + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "coordinates": "latitude longitude", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "time", + "height_above_ground" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/examples/data_pipeline/output.zarr/vtotal/c/0/0 b/examples/data_pipeline/output.zarr/vtotal/c/0/0 new file mode 100644 index 00000000..0c734383 Binary files /dev/null and b/examples/data_pipeline/output.zarr/vtotal/c/0/0 differ diff --git a/examples/data_pipeline/output.zarr/vtotal/c/1/0 b/examples/data_pipeline/output.zarr/vtotal/c/1/0 new file mode 100644 index 00000000..0c734383 Binary files /dev/null and b/examples/data_pipeline/output.zarr/vtotal/c/1/0 differ diff --git a/examples/data_pipeline/output.zarr/vtotal/c/2/0 b/examples/data_pipeline/output.zarr/vtotal/c/2/0 new file mode 100644 index 00000000..d2411acf Binary files /dev/null and b/examples/data_pipeline/output.zarr/vtotal/c/2/0 differ diff --git a/examples/data_pipeline/output.zarr/vtotal/zarr.json b/examples/data_pipeline/output.zarr/vtotal/zarr.json new file mode 100644 index 00000000..21d977b4 --- /dev/null +++ b/examples/data_pipeline/output.zarr/vtotal/zarr.json @@ -0,0 +1,49 @@ +{ + "shape": [ + 25, + 1 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 10, + 1 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": "NaN", + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "coordinates": "latitude longitude", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "time", + "depth" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/examples/data_pipeline/output.zarr/zarr.json b/examples/data_pipeline/output.zarr/zarr.json new file mode 100644 index 00000000..8824f18c --- /dev/null +++ b/examples/data_pipeline/output.zarr/zarr.json @@ -0,0 +1,739 @@ +{ + "attributes": {}, + "zarr_format": 3, + "consolidated_metadata": { + "kind": "inline", + "must_understand": false, + "metadata": { + "depth": { + "shape": [ + 1 + ], + "data_type": "int32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": {}, + "dimension_names": [ + "depth" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "height_above_ground": { + "shape": [ + 1 + ], + "data_type": "int32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": {}, + "dimension_names": [ + "height_above_ground" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "latitude": { + "shape": [], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": {}, + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "longitude": { + "shape": [], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": {}, + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "Pressure_reduced_to_MSL_msl": { + "shape": [ + 25 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 10 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": "NaN", + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "coordinates": "latitude longitude", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "time" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "so": { + "shape": [ + 25, + 1 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 10, + 1 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": "NaN", + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "coordinates": "latitude longitude", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "time", + "depth" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "Temperature_surface": { + "shape": [ + 25 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 10 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": "NaN", + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "coordinates": "latitude longitude", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "time" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "thetao": { + "shape": [ + 25, + 1 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 10, + 1 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": "NaN", + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "coordinates": "latitude longitude", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "time", + "depth" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "time": { + "shape": [ + 25 + ], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 25 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "units": "hours since 2024-01-01", + "calendar": "proleptic_gregorian" + }, + "dimension_names": [ + "time" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "u-component_of_wind_height_above_ground": { + "shape": [ + 25, + 1 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 10, + 1 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": "NaN", + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "coordinates": "latitude longitude", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "time", + "height_above_ground" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "utotal": { + "shape": [ + 25, + 1 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 10, + 1 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": "NaN", + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "coordinates": "latitude longitude", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "time", + "depth" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "v-component_of_wind_height_above_ground": { + "shape": [ + 25, + 1 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 10, + 1 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": "NaN", + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "coordinates": "latitude longitude", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "time", + "height_above_ground" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "VHM0": { + "shape": [ + 25 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 10 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": "NaN", + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "coordinates": "latitude longitude", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "time" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "VMDR": { + "shape": [ + 25 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 10 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": "NaN", + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "coordinates": "latitude longitude", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "time" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "vtotal": { + "shape": [ + 25, + 1 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 10, + 1 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": "NaN", + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "coordinates": "latitude longitude", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "time", + "depth" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "VTPK": { + "shape": [ + 25 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 10 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": "NaN", + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "coordinates": "latitude longitude", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "time" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + } + } + }, + "node_type": "group" +} \ No newline at end of file diff --git a/examples/data_pipeline/weather.nc b/examples/data_pipeline/weather.nc new file mode 100644 index 00000000..b9f27bee Binary files /dev/null and b/examples/data_pipeline/weather.nc differ