diff --git a/kerchunk/grib2.py b/kerchunk/grib2.py index 8346809..ae2f565 100644 --- a/kerchunk/grib2.py +++ b/kerchunk/grib2.py @@ -66,12 +66,23 @@ def _split_file(f: io.FileIO, skip=0): while f.tell() < size: logger.debug(f"extract part {part + 1}") head = f.read(1024) - if b"GRIB" not in head: - f.seek(-4, 1) + ind = head.find(b"GRIB") + if ind == -1: + if len(head) < 1024: # final partial read -> no more messages + break + f.seek(-4, 1) # 'GRIB' may straddle the 1024-byte boundary continue - ind = head.index(b"GRIB") start = f.tell() - len(head) + ind - part_size = int.from_bytes(head[ind + 12 : ind + 16], "big") + edition = head[ind + 7] + if edition == 1: + # GRIB1: 24-bit total length at bytes 4-6. ECMWF "large message" extension: + # if the top bit is set, the real length is (len & 0x7fffff) * 120 bytes. + part_size = int.from_bytes(head[ind + 4 : ind + 7], "big") + if part_size & 0x800000: + part_size = (part_size & 0x7FFFFF) * 120 + else: + # GRIB2: 64-bit total length at bytes 8-15. + part_size = int.from_bytes(head[ind + 8 : ind + 16], "big") f.seek(start) yield start, part_size, f.read(part_size) part += 1 diff --git a/tests/test_grib.py b/tests/test_grib.py index 2c5387f..0b05162 100644 --- a/tests/test_grib.py +++ b/tests/test_grib.py @@ -451,3 +451,37 @@ def test_extract_methods_grib_parameter(zarr_tree_and_datatree_instance): # checking if level in each series data assert all(list(map(lambda data: "level" in data.keys(), grib_metadata))) + + +def test_scan_grib1(tmp_path): + # scan_grib must index GRIB1, not just GRIB2 (the decode path is edition-agnostic) + p = os.path.join(str(tmp_path), "sample.grib1") + gid = eccodes.codes_grib_new_from_samples("regular_ll_sfc_grib1") + assert eccodes.codes_get(gid, "editionNumber") == 1 + with open(p, "wb") as f: + eccodes.codes_write(gid, f) + eccodes.codes_release(gid) + + refs = scan_grib(p) + assert len(refs) >= 1 + ds = xr.open_dataset( + fsspec.filesystem("reference", fo=refs[0]).get_mapper(""), + engine="zarr", + backend_kwargs={"consolidated": False}, + ) + assert len(ds.data_vars) >= 1 + var = next(iter(ds.data_vars)) + assert np.isfinite(np.asarray(ds[var].values)).any() + + +def test_scan_grib1_and_grib2(tmp_path): + # a file holding both editions yields both messages + p = os.path.join(str(tmp_path), "mixed.grib") + g1 = eccodes.codes_grib_new_from_samples("regular_ll_sfc_grib1") + g2 = eccodes.codes_grib_new_from_samples("regular_ll_sfc_grib2") + with open(p, "wb") as f: + eccodes.codes_write(g1, f) + eccodes.codes_write(g2, f) + eccodes.codes_release(g1) + eccodes.codes_release(g2) + assert len(scan_grib(p)) >= 2