Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 24 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ atmosphere:
climate:
- NOAA_correlation
- NOAA_correlation_xr
- Global_Carbon_Budget_2025
- Global_Carbon_Budget_2021

shapefiles:
- Countries
Expand Down Expand Up @@ -93,8 +95,28 @@ import intake
cat = intake.open_catalog('https://raw.githubusercontent.com/aaronspring/remote_climate_data/master/master.yaml')
cat.atmosphere.HadCRUT5.to_dask()
```
```
<xarray.Dataset> Size: 42MB
Dimensions: (time: 2028, latitude: 36, longitude: 72, bnds: 2)
...
```

```python
import hvplot.pandas
gcb = cat.climate().Global_Carbon_Budget_2025.read()
gcb.hvplot(y=['fossil emissions excluding carbonation', 'land-use change emissions',
'atmospheric growth', 'ocean sink', 'land sink'],
title='Global Carbon Budget 2025')
gcb
fossil emissions excluding carbonation ... budget imbalance
Year
1959 2.416788 ... 1.168380
...
2024 10.534546 ... -1.691863
[66 rows x 7 columns]
```

To explore the whole catalog, you can try:
Explore the whole catalog:
```python
cat.walk()
```
Expand All @@ -115,7 +137,7 @@ Make data access for climate data easy:
- [`intake_xarray`](https://intake-xarray.readthedocs.io/en/latest/) for:
- `nc` using [`netcdf4`](https://github.com/Unidata/netcdf4-python) [[example](https://github.com/aaronspring/remote_climate_data/blob/1209c5ebf5877b09b4403ea60da6d97b374b7b5c/catalogs/atmosphere.yaml#L64)]
- `tif` using [`rioxarray`](https://github.com/corteva/rioxarray) [[example](https://github.com/aaronspring/remote_climate_data/blob/1209c5ebf5877b09b4403ea60da6d97b374b7b5c/catalogs/humans.yaml#L42)]
- [`intake_excel`](https://github.com/edjdavid/intake-excel) for Excel `xls` and `xlsx` [[example](https://github.com/aaronspring/remote_climate_data/blob/1209c5ebf5877b09b4403ea60da6d97b374b7b5c/catalogs/climate.yaml#L35)]
- [`intake_excel`](https://github.com/edjdavid/intake-excel) for Excel `xls` and `xlsx` [[example](https://github.com/aaronspring/remote_climate_data/blob/1209c5ebf5877b09b4403ea60da6d97b374b7b5c/catalogs/climate.yaml#L35)] (see [`excel_source.py`](remote_climate_data/excel_source.py) for custom driver if package unavailable)
- [`intake_geopandas`](https://github.com/intake/intake_geopandas) for shapefiles `shp` [[example](https://github.com/aaronspring/remote_climate_data/blob/1209c5ebf5877b09b4403ea60da6d97b374b7b5c/catalogs/shapefiles.yaml#L11)], GeoJSON `geo.json` [[example](https://github.com/aaronspring/remote_climate_data/blob/1209c5ebf5877b09b4403ea60da6d97b374b7b5c/catalogs/shapefiles.yaml#L57)], GeoParquet `parquet`, `PostGIS` databases, `Spatialite` databases
- [`regionmask`](https://regionmask.readthedocs.io/) for aggregating over geoshapes

Expand Down
31 changes: 31 additions & 0 deletions catalogs/climate.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,38 @@
plugins:
source:
- module: intake_xarray
- module: remote_climate_data
sources:
Global_Carbon_Budget_2025:
description: Global Carbon Budget 2025
metadata:
url: https://globalcarbonbudget.org/
doi:
- https://doi.org/10.5194/essd-2025-659
- https://doi.org/10.18160/gcp-2025
driver: excel_url
args:
urlpath: simplecache::https://data.icos-cp.eu/licence_accept?ids=%5B%22UtUDiUg-PuYWkAiHoUNn83e0%22%5D
sheet_name: Global Carbon Budget
header: 21
index_col: Year
skipfooter: 0

Global_Carbon_Budget_2021:
description: Global Carbon Budget 2021
metadata:
url: https://www.globalcarbonproject.org/carbonbudget/
doi:
- https://doi.org/10.5194/essd-2021-386
- https://doi.org/10.18160/gcp-2021
driver: excel_url
args:
urlpath: simplecache::https://data.icos-cp.eu/licence_accept?ids=%5B%220ST81nXCND5VfAQdOCSJDveT%22%5D
sheet_name: Global Carbon Budget
header: 20
index_col: Year
skipfooter: 0

NOAA_correlation:
description: climate indices from psl.noaa.gov/data/correlation
metadata:
Expand Down
6 changes: 4 additions & 2 deletions remote_climate_data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import intake

from remote_climate_data import excel_source # noqa: F401


def cmor_cat(cat="../master.yml", keyword="cmor", depth=3):
if isinstance(cat, str):
cat = intake.open_catalog(cat)
assert isinstance(cat, intake.catalog.local.YAMLFileCatalog) # type: ignore[attr-defined]
return intake.Catalog.from_dict(cat.search(keyword, depth=depth).walk()) # type: ignore[call-arg, union-attr]
assert isinstance(cat, intake.catalog.local.YAMLFileCatalog)
return intake.Catalog.from_dict(cat.search(keyword, depth=depth).walk())
73 changes: 73 additions & 0 deletions remote_climate_data/excel_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import io

import intake
import intake.source


class ExcelSource(intake.source.base.DataSource):
"""Intake source for reading Excel files from URLs.

Supports simplecache:: prefix for caching.
"""

container = "dataframe"
name = "excel_url"
version = "0.0.1"

def __init__(
self,
urlpath,
sheet_name="Global Carbon Budget",
header=21,
index_col="Year",
skipfooter=4,
metadata=None,
):
"""Initialize Excel source.

Args:
urlpath: URL or simplecache path to Excel file
sheet_name: Name of sheet to read
header: Row number to use as header (0-indexed)
index_col: Column to use as index
skipfooter: Number of rows to skip at end
metadata: Additional metadata
"""
super().__init__(metadata=metadata)
self.urlpath = urlpath
self.sheet_name = sheet_name
self.header = header
self.index_col = index_col
self.skipfooter = skipfooter

def _get_schema(self):
return intake.source.base.Schema(
datatypes={"_": "python"},
shape=(None, None),
npartitions=1,
metadata=self.metadata,
)

def _load(self):
import fsspec
import pandas as pd

fs, _token, paths = fsspec.get_fs_token_paths(self.urlpath)
path = paths[0]
with fs.open(path, "rb") as f:
return pd.read_excel(
io.BytesIO(f.read()),
sheet_name=self.sheet_name,
header=self.header,
index_col=self.index_col,
skipfooter=self.skipfooter,
)

def read(self):
return self._load()

def _close(self):
pass


intake.source.register_driver("excel_url", ExcelSource)