Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions examples/data_pipeline/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Data Pipeline Example for Weather Routing Tool

This example demonstrates how to efficiently handle and process weather datasets using scalable techniques.

## Overview

Working with large weather datasets can be memory-intensive and slow when using traditional loading methods. This example shows how to improve performance and scalability by using:

- Chunked data loading with Dask
- Subsetting and interpolation
- Efficient storage formats (NetCDF and Zarr)
- Basic performance comparison

## Features

- Load dataset using chunking (`xarray + Dask`)
- Subset a specific geographic region
- Interpolate data at a given location
- Save processed data in:
- NetCDF format
- Zarr format
- Compare execution time for different storage methods

## File Structure
81 changes: 81 additions & 0 deletions examples/data_pipeline/analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import xarray as xr
import time
import os

# -------------------------------
# LOAD DATASET WITH CHUNKING
# -------------------------------
print("\n--- Loading Dataset ---")
ds = xr.open_dataset("weather.nc", chunks={"time": 10})

print(ds)

# -------------------------------
# SUBSETTING REGION
# -------------------------------
ds = ds.sel(
latitude=slice(10, 20),
longitude=slice(80, 90)
)

print("\n--- After Subsetting ---")
print(ds)

# -------------------------------
# INTERPOLATION
# -------------------------------
ds = ds.interp(latitude=15, longitude=85)

print("\n--- After Interpolation ---")
print(ds)

# -------------------------------
# SAVE AS NETCDF
# -------------------------------
print("\n--- Saving as NetCDF ---")
start = time.time()

ds.to_netcdf("output.nc")

end = time.time()
netcdf_time = end - start

print("NetCDF Save Time:", netcdf_time, "seconds")

# -------------------------------
# SAVE AS ZARR
# -------------------------------
print("\n--- Saving as Zarr ---")
start = time.time()

ds.to_zarr("output.zarr", mode="w")

end = time.time()
zarr_time = end - start

print("Zarr Save Time:", zarr_time, "seconds")

# -------------------------------
# FILE SIZE (NetCDF)
# -------------------------------
if os.path.exists("output.nc"):
nc_size = os.path.getsize("output.nc") / (1024 * 1024)
print("\nNetCDF File Size:", round(nc_size, 2), "MB")

# -------------------------------
# VARIABLES INFO
# -------------------------------
print("\n--- Dataset Variables ---")
print(list(ds.data_vars))


print("\n--- Performance Summary ---")
print("NetCDF Time:", netcdf_time, "seconds")
print("Zarr Time:", zarr_time, "seconds")

if zarr_time < netcdf_time:
print("Zarr is faster for this dataset")
else:
print("NetCDF is faster for this dataset")

print("\n✔ Data pipeline example completed successfully")
Binary file added examples/data_pipeline/output.nc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"shape": [
25
],
"data_type": "float64",
"chunk_grid": {
"name": "regular",
"configuration": {
"chunk_shape": [
10
]
}
},
"chunk_key_encoding": {
"name": "default",
"configuration": {
"separator": "/"
}
},
"fill_value": "NaN",
"codecs": [
{
"name": "bytes",
"configuration": {
"endian": "little"
}
},
{
"name": "zstd",
"configuration": {
"level": 0,
"checksum": false
}
}
],
"attributes": {
"coordinates": "latitude longitude",
"_FillValue": "AAAAAAAA+H8="
},
"dimension_names": [
"time"
],
"zarr_format": 3,
"node_type": "array",
"storage_transformers": []
}
Binary file not shown.
Binary file not shown.
Binary file not shown.
46 changes: 46 additions & 0 deletions examples/data_pipeline/output.zarr/Temperature_surface/zarr.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"shape": [
25
],
"data_type": "float64",
"chunk_grid": {
"name": "regular",
"configuration": {
"chunk_shape": [
10
]
}
},
"chunk_key_encoding": {
"name": "default",
"configuration": {
"separator": "/"
}
},
"fill_value": "NaN",
"codecs": [
{
"name": "bytes",
"configuration": {
"endian": "little"
}
},
{
"name": "zstd",
"configuration": {
"level": 0,
"checksum": false
}
}
],
"attributes": {
"coordinates": "latitude longitude",
"_FillValue": "AAAAAAAA+H8="
},
"dimension_names": [
"time"
],
"zarr_format": 3,
"node_type": "array",
"storage_transformers": []
}
Binary file added examples/data_pipeline/output.zarr/VHM0/c/0
Binary file not shown.
Binary file added examples/data_pipeline/output.zarr/VHM0/c/1
Binary file not shown.
Binary file added examples/data_pipeline/output.zarr/VHM0/c/2
Binary file not shown.
46 changes: 46 additions & 0 deletions examples/data_pipeline/output.zarr/VHM0/zarr.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"shape": [
25
],
"data_type": "float64",
"chunk_grid": {
"name": "regular",
"configuration": {
"chunk_shape": [
10
]
}
},
"chunk_key_encoding": {
"name": "default",
"configuration": {
"separator": "/"
}
},
"fill_value": "NaN",
"codecs": [
{
"name": "bytes",
"configuration": {
"endian": "little"
}
},
{
"name": "zstd",
"configuration": {
"level": 0,
"checksum": false
}
}
],
"attributes": {
"coordinates": "latitude longitude",
"_FillValue": "AAAAAAAA+H8="
},
"dimension_names": [
"time"
],
"zarr_format": 3,
"node_type": "array",
"storage_transformers": []
}
Binary file added examples/data_pipeline/output.zarr/VMDR/c/0
Binary file not shown.
Binary file added examples/data_pipeline/output.zarr/VMDR/c/1
Binary file not shown.
Binary file added examples/data_pipeline/output.zarr/VMDR/c/2
Binary file not shown.
46 changes: 46 additions & 0 deletions examples/data_pipeline/output.zarr/VMDR/zarr.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"shape": [
25
],
"data_type": "float64",
"chunk_grid": {
"name": "regular",
"configuration": {
"chunk_shape": [
10
]
}
},
"chunk_key_encoding": {
"name": "default",
"configuration": {
"separator": "/"
}
},
"fill_value": "NaN",
"codecs": [
{
"name": "bytes",
"configuration": {
"endian": "little"
}
},
{
"name": "zstd",
"configuration": {
"level": 0,
"checksum": false
}
}
],
"attributes": {
"coordinates": "latitude longitude",
"_FillValue": "AAAAAAAA+H8="
},
"dimension_names": [
"time"
],
"zarr_format": 3,
"node_type": "array",
"storage_transformers": []
}
Binary file added examples/data_pipeline/output.zarr/VTPK/c/0
Binary file not shown.
Binary file added examples/data_pipeline/output.zarr/VTPK/c/1
Binary file not shown.
Binary file added examples/data_pipeline/output.zarr/VTPK/c/2
Binary file not shown.
46 changes: 46 additions & 0 deletions examples/data_pipeline/output.zarr/VTPK/zarr.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"shape": [
25
],
"data_type": "float64",
"chunk_grid": {
"name": "regular",
"configuration": {
"chunk_shape": [
10
]
}
},
"chunk_key_encoding": {
"name": "default",
"configuration": {
"separator": "/"
}
},
"fill_value": "NaN",
"codecs": [
{
"name": "bytes",
"configuration": {
"endian": "little"
}
},
{
"name": "zstd",
"configuration": {
"level": 0,
"checksum": false
}
}
],
"attributes": {
"coordinates": "latitude longitude",
"_FillValue": "AAAAAAAA+H8="
},
"dimension_names": [
"time"
],
"zarr_format": 3,
"node_type": "array",
"storage_transformers": []
}
43 changes: 43 additions & 0 deletions examples/data_pipeline/output.zarr/depth/zarr.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"shape": [
1
],
"data_type": "int32",
"chunk_grid": {
"name": "regular",
"configuration": {
"chunk_shape": [
1
]
}
},
"chunk_key_encoding": {
"name": "default",
"configuration": {
"separator": "/"
}
},
"fill_value": 0,
"codecs": [
{
"name": "bytes",
"configuration": {
"endian": "little"
}
},
{
"name": "zstd",
"configuration": {
"level": 0,
"checksum": false
}
}
],
"attributes": {},
"dimension_names": [
"depth"
],
"zarr_format": 3,
"node_type": "array",
"storage_transformers": []
}
Binary file not shown.
Loading