From 3c4076f74c59e50e781c57ddd59be648fa5ee01d Mon Sep 17 00:00:00 2001
From: Miguel Jimenez-Urias <mjimenez@opendap.org>
Date: Wed, 4 Mar 2026 16:43:58 -0800
Subject: [PATCH 01/22] remove migrated tests

---
 virtualizarr/tests/test_parsers/test_dmrpp.py | 121 ------------------
 1 file changed, 121 deletions(-)
diff --git a/virtualizarr/tests/test_parsers/test_dmrpp.py b/virtualizarr/tests/test_parsers/test_dmrpp.py
index e04d6833..01646250 100644
--- a/virtualizarr/tests/test_parsers/test_dmrpp.py
+++ b/virtualizarr/tests/test_parsers/test_dmrpp.py
@@ -588,127 +588,6 @@ def test_parse_attribute(netcdf4_file, attr_path, expected):
     assert result == expected
 
 
-def test_dmrpp_empty_scalar_warns_container(fill_value_scalar_no_chunks_nc4_url):
-    parsed_dmrpp = dmrparser(
-        DMRPP_XML_STRINGS["fill_value_scalar_no_chunks_nc4_url"],
-        filepath=fill_value_scalar_no_chunks_nc4_url,
-    )
-    store = obstore_local(url=f"file://{parsed_dmrpp.data_filepath}")
-    with pytest.warns(UserWarning):
-        parsed_vds = parsed_dmrpp.parse_dataset(object_store=store)
-        vds_g1 = parsed_vds.to_virtual_dataset()
-        assert vds_g1["data"].attrs == {"_FillValue": -999}
-
-
-def test_dmrpp_phony_dim_naming():
-    dmrpp_xml_str = textwrap.dedent(
-        """\
-        <?xml version="1.0" encoding="ISO-8859-1"?>
-        <Dataset xmlns="http://xml.opendap.org/ns/DAP/4.0#" xmlns:dmrpp="http://xml.opendap.org/dap/dmrpp/1.0.0#" dapVersion="4.0" dmrVersion="1.0" name="phony_test.nc" dmrpp:href="OPeNDAP_DMRpp_DATA_ACCESS_URL" dmrpp:version="3.21.1-451">
-            <Float32 name="data">
-                <Dim size="10"/>
-                <Dim size="20"/>
-                <Attribute name="_FillValue" type="Float32">
-                    <Value>NaN</Value>
-                </Attribute>
-                <dmrpp:chunks fillValue="nan" byteOrder="LE">
-                    <dmrpp:chunk offset="100" nBytes="800"/>
-                </dmrpp:chunks>
-            </Float32>
-        </Dataset>
-        """
-    )
-    parser = dmrparser(dmrpp_xml_str, filepath="file:///phony_test.nc")
-    var = parser._parse_variable(parser.find_node_fqn("/data"))
-    assert var.metadata.dimension_names == ("phony_dim_0", "phony_dim_1")
-    assert var.shape == (10, 20)
-
-
-def test_dmrpp_validation_issues_accumulation():
-    dmrpp_xml_str = textwrap.dedent(
-        """\
-        <?xml version="1.0" encoding="ISO-8859-1"?>
-        <Dataset xmlns="http://xml.opendap.org/ns/DAP/4.0#" xmlns:dmrpp="http://xml.opendap.org/dap/dmrpp/1.0.0#" dapVersion="4.0" dmrVersion="1.0" name="validation_test.nc" dmrpp:href="OPeNDAP_DMRpp_DATA_ACCESS_URL" dmrpp:version="3.21.1-451">
-            <Dimension name="lat" size="25"/>
-            <Float32 name="data">
-                <Dim name="/lat"/>
-                <Attribute type="Float32">
-                    <Value>1.0</Value>
-                </Attribute>
-                <dmrpp:chunks fillValue="nan" byteOrder="LE">
-                    <dmrpp:chunk offset="100" nBytes="100"/>
-                </dmrpp:chunks>
-            </Float32>
-        </Dataset>
-        """
-    )
-    parser = dmrparser(dmrpp_xml_str, filepath="file:///validation_test.nc")
-    parser._parse_dataset(parser.root)
-    assert len(parser._validation_issues) > 0
-    assert any(
-        "Missing required attribute 'name'" in issue
-        for issue in parser._validation_issues
-    )
-
-
-def test_dmrpp_get_attrib_with_missing_optional():
-    dmrpp_xml_str = textwrap.dedent(
-        """\
-        <?xml version="1.0" encoding="ISO-8859-1"?>
-        <Dataset xmlns="http://xml.opendap.org/ns/DAP/4.0#" xmlns:dmrpp="http://xml.opendap.org/dap/dmrpp/1.0.0#" dapVersion="4.0" dmrVersion="1.0" name="test.nc" dmrpp:href="OPeNDAP_DMRpp_DATA_ACCESS_URL" dmrpp:version="3.21.1-451">
-            <Dimension name="lat" size="25"/>
-        </Dataset>
-        """
-    )
-    parser = dmrparser(dmrpp_xml_str, filepath="file:///test.nc")
-    dimension = parser.root.find("dap:Dimension", parser._NS)
-    result = parser._get_attrib(dimension, "nonexistent")
-    assert result is None
-    assert len(parser._validation_issues) == 1
-
-
-def test_dmrpp_get_attrib_with_required_missing():
-    dmrpp_xml_str = textwrap.dedent(
-        """\
-        <?xml version="1.0" encoding="ISO-8859-1"?>
-        <Dataset xmlns="http://xml.opendap.org/ns/DAP/4.0#" xmlns:dmrpp="http://xml.opendap.org/dap/dmrpp/1.0.0#" dapVersion="4.0" dmrVersion="1.0" name="test.nc" dmrpp:href="OPeNDAP_DMRpp_DATA_ACCESS_URL" dmrpp:version="3.21.1-451">
-            <Dimension name="lat" size="25"/>
-        </Dataset>
-        """
-    )
-    parser = dmrparser(dmrpp_xml_str, filepath="file:///test.nc")
-    dimension = parser.root.find("dap:Dimension", parser._NS)
-    with pytest.raises(ValueError, match="Missing required attribute 'nonexistent'"):
-        parser._get_attrib(dimension, "nonexistent", required=True)
-
-
-def test_dmrpp_mixed_named_and_unnamed_dimensions():
-    dmrpp_xml_str = textwrap.dedent(
-        """\
-        <?xml version="1.0" encoding="ISO-8859-1"?>
-        <Dataset xmlns="http://xml.opendap.org/ns/DAP/4.0#" xmlns:dmrpp="http://xml.opendap.org/dap/dmrpp/1.0.0#" dapVersion="4.0" dmrVersion="1.0" name="mixed_test.nc" dmrpp:href="OPeNDAP_DMRpp_DATA_ACCESS_URL" dmrpp:version="3.21.1-451">
-            <Dimension name="time" size="10"/>
-            <Dimension name="lat" size="30"/>
-            <Float32 name="data">
-                <Dim name="/time"/>
-                <Dim size="20"/>
-                <Dim name="/lat"/>
-                <Attribute name="_FillValue" type="Float32">
-                    <Value>NaN</Value>
-                </Attribute>
-                <dmrpp:chunks fillValue="nan" byteOrder="LE">
-                    <dmrpp:chunk offset="100" nBytes="24000"/>
-                </dmrpp:chunks>
-            </Float32>
-        </Dataset>
-        """
-    )
-    parser = dmrparser(dmrpp_xml_str, filepath="file:///mixed_test.nc")
-    var = parser._parse_variable(parser.find_node_fqn("/data"))
-    assert var.metadata.dimension_names == ("time", "phony_dim_1", "lat")
-    assert var.shape == (10, 20, 30)
-
-
 def test_dmrpp_simple(dmrpp_xml_simple):
     """Test parsing a simple valid DMR++ XML creates virtual chunk manifests."""
     parser = dmrparser(dmrpp_xml_simple, filepath="file:///simple.nc")

From bd8d1e6937cc5fb2d9b6bbf5aeeca7b038f4e35e Mon Sep 17 00:00:00 2001
From: Miguel Jimenez-Urias <mjimenez@opendap.org>
Date: Wed, 4 Mar 2026 16:55:22 -0800
Subject: [PATCH 02/22] strip DMRParser class

---
 virtualizarr/parsers/dmrpp.py | 553 +---------------------------------
 1 file changed, 3 insertions(+), 550 deletions(-)

diff --git a/virtualizarr/parsers/dmrpp.py b/virtualizarr/parsers/dmrpp.py
index c6b7b3aa..2887e6bb 100644
--- a/virtualizarr/parsers/dmrpp.py
+++ b/virtualizarr/parsers/dmrpp.py
@@ -1,23 +1,13 @@
 import io
-import warnings
-from pathlib import Path
-from typing import Any, Iterable
+from typing import Iterable
 from xml.etree import ElementTree as ET
 
-import numpy as np
-from obspec_utils.protocols import ReadableStore
 from obspec_utils.readers import EagerStoreReader
 from obspec_utils.registry import ObjectStoreRegistry
 
 from virtualizarr.manifests import (
-    ChunkManifest,
-    ManifestArray,
-    ManifestGroup,
     ManifestStore,
 )
-from virtualizarr.manifests.utils import create_v3_array_metadata
-from virtualizarr.parsers.utils import encode_cf_fill_value
-from virtualizarr.types import ChunkKey
 
 
 class DMRPPParser:
@@ -89,27 +79,7 @@ class DMRParser:
         "dap": "http://xml.opendap.org/ns/DAP/4.0#",
         "dmrpp": "http://xml.opendap.org/dap/dmrpp/1.0.0#",
     }
-    # DAP data types to numpy data types
-    _DAP_NP_DTYPE = {
-        "Byte": "uint8",
-        "UByte": "uint8",
-        "Int8": "int8",
-        "UInt8": "uint8",
-        "Int16": "int16",
-        "UInt16": "uint16",
-        "Int32": "int32",
-        "UInt32": "uint32",
-        "Int64": "int64",
-        "UInt64": "uint64",
-        "Url": "object",
-        "Float32": "float32",
-        "Float64": "float64",
-        "String": "object",
-    }
-    # Default zlib compression value
-    _DEFAULT_ZLIB_VALUE = 6
-    # Encoding keys that should be removed from attributes and placed in xarray encoding dict
-    # _ENCODING_KEYS = {"_FillValue", "missing_value", "scale_factor", "add_offset"}
+
     root: ET.Element
     data_filepath: str
 
@@ -132,522 +102,5 @@ def __init__(
         """
         self.root = root
         self._validation_issues: list[str] = []
-        data_filepath_from_root = self._get_attrib(self.root, "name", required=True)
-        assert data_filepath_from_root is not None  # required=True guarantees non-None
-        self.data_filepath = (
-            data_filepath if data_filepath is not None else data_filepath_from_root
-        )
+        self.data_filepath = data_filepath
         self.skip_variables = skip_variables or ()
-
-    def _get_attrib(
-        self, element: ET.Element, attrib_name: str, required: bool = False
-    ) -> str | None:
-        """
-        Safely get an attribute from an XML element, logging validation issues.
-
-        Parameters
-        ----------
-        element
-            The XML element to get the attribute from.
-        attrib_name
-            The name of the attribute to get.
-        required
-            If True, raises a ValueError when the attribute is missing. If False,
-            returns None and logs the issue.
-
-        Returns
-        -------
-        str | None
-            The attribute value if found, None otherwise.
-
-        Raises
-        ------
-        ValueError
-            If required is True and the attribute is not found.
-        """
-        if attrib_name in element.attrib:
-            return element.attrib[attrib_name]
-
-        element_info = (
-            element.tag
-            if "name" not in element.attrib
-            else f"{element.tag}[@name='{element.attrib['name']}']"
-        )
-        issue_msg = (
-            f"Missing required attribute '{attrib_name}' in element: {element_info}"
-        )
-        self._validation_issues.append(issue_msg)
-
-        if required:
-            raise ValueError(issue_msg)
-
-        return None
-
-    def parse_dataset(
-        self,
-        object_store: ReadableStore,
-        group: str | None = None,
-    ) -> ManifestStore:
-        """
-        Parses the given file and creates a ManifestStore.
-
-        Parameters
-        ----------
-        group
-            The group to parse. Ignored if no groups are present, and the entire
-            dataset is parsed. If `None` or "/", and groups are present, the first group
-            is parsed.  If not `None` or "/", and no groups are present, a UserWarning
-            is issued indicating that the group will be ignored.
-
-        Returns
-        -------
-        ManifestStore
-
-        Examples
-        --------
-        Open a sample DMR++ file and parse the dataset
-        """
-        group = group or "/"
-        ngroups = len(self.root.findall("dap:Group", self._NS))
-
-        if ngroups == 0 and group != "/":
-            warnings.warn(
-                f"No groups in DMR++ file {self.data_filepath!r}; "
-                f"ignoring group parameter {group!r}"
-            )
-
-        group_path = Path("/") if ngroups == 0 else Path("/") / group.removeprefix("/")
-        dataset_element = self._split_groups(self.root).get(group_path)
-
-        if dataset_element is None:
-            raise ValueError(
-                f"Group {group_path} not found in DMR++ file {self.data_filepath!r}"
-            )
-
-        manifest_group = self._parse_dataset(dataset_element)
-        registry: ObjectStoreRegistry = ObjectStoreRegistry()
-        registry.register(self.data_filepath, object_store)
-
-        return ManifestStore(registry=registry, group=manifest_group)
-
-    def find_node_fqn(self, fqn: str) -> ET.Element:
-        """
-        Find the element in the root element by converting the fully qualified name to an xpath query.
-
-        E.g. fqn = "/a/b" --> root.find("./*[@name='a']/*[@name='b']")
-
-        See more about OPeNDAP fully qualified names (FQN) here: https://docs.opendap.org/index.php/DAP4:_Specification_Volume_1#Fully_Qualified_Names
-
-        Parameters
-        ----------
-        fqn
-            The fully qualified name of an element. For example, "/a/b".
-
-        Returns
-        -------
-        ET.Element
-            The matching node found within the root element.
-
-        Raises
-        ------
-        ValueError
-            If the fully qualified name is not found in the root element.
-        """
-        if fqn == "/":
-            return self.root
-
-        elements = fqn.strip("/").split("/")  # /a/b/ --> ['a', 'b']
-        xpath_segments = [f"*[@name='{element}']" for element in elements]
-        xpath_query = "/".join([".", *xpath_segments])  # "./[*[@name='a']/*[@name='b']"
-
-        if (element := self.root.find(xpath_query, self._NS)) is None:
-            raise ValueError(f"Path {fqn} not found in provided root")
-
-        return element
-
-    def _split_groups(self, root: ET.Element) -> dict[Path, ET.Element]:
-        """
-        Split the input <Dataset> element into several <Dataset> ET.Elements by <Group> name.
-        E.g. {"/": <Dataset>, "left": <Dataset>, "right": <Dataset>}
-
-        Parameters
-        ----------
-        root : ET.Element
-            The root element of the DMR file.
-
-        Returns
-        -------
-        dict[Path, ET.Element]
-        """
-        all_groups: dict[Path, ET.Element] = {}
-        dataset_tags = [
-            d for d in root if d.tag != "{" + self._NS["dap"] + "}" + "Group"
-        ]
-        if len(dataset_tags) > 0:
-            all_groups[Path("/")] = ET.Element(root.tag, root.attrib)
-            all_groups[Path("/")].extend(dataset_tags)
-        all_groups.update(self._split_groups_recursive(root, Path("/")))
-        return all_groups
-
-    def _split_groups_recursive(
-        self, root: ET.Element, current_path=Path("")
-    ) -> dict[Path, ET.Element]:
-        group_dict: dict[Path, ET.Element] = {}
-        for g in root.iterfind("dap:Group", self._NS):
-            group_name = self._get_attrib(g, "name", required=True)
-            if group_name is None:
-                continue
-            new_path = current_path / Path(group_name)
-            dataset_tags = [
-                d for d in g if d.tag != "{" + self._NS["dap"] + "}" + "Group"
-            ]
-            group_dict[new_path] = ET.Element(g.tag, g.attrib)
-            group_dict[new_path].extend(dataset_tags)
-            group_dict.update(self._split_groups_recursive(g, new_path))
-        return group_dict
-
-    def _parse_dataset(
-        self,
-        root: ET.Element,
-    ) -> ManifestGroup:
-        """
-        Parse the dataset using the root element of the DMR++ file.
-
-        Parameters
-        ----------
-        root : ET.Element
-            The root element of the DMR++ file.
-
-        Returns
-        -------
-        ManifestGroup
-        """
-
-        manifest_dict: dict[str, ManifestArray] = {}
-        for var_tag in self._find_var_tags(root):
-            var_name = self._get_attrib(var_tag, "name")
-            if var_name and var_name not in self.skip_variables:
-                try:
-                    variable = self._parse_variable(var_tag)
-                    manifest_dict[var_name] = variable
-                except (UnboundLocalError, ValueError):
-                    warnings.warn(
-                        f"This DMRpp contains the variable {var_name} that could not"
-                        " be parsed. Consider adding it to the list  of skipped "
-                        "variables, or opening an issue to help resolve this"
-                    )
-
-        # Attributes
-        attrs: dict[str, str] = {}
-        # Look for an attribute tag called "HDF5_GLOBAL" and unpack it
-        hdf5_global_attrs = root.find("dap:Attribute[@name='HDF5_GLOBAL']", self._NS)
-        if hdf5_global_attrs is not None:
-            # Remove the container attribute and add its children to the root dataset
-            root.remove(hdf5_global_attrs)
-            root.extend(hdf5_global_attrs)
-        for attr_tag in root.iterfind("dap:Attribute", self._NS):
-            attrs.update(self._parse_attribute(attr_tag))
-
-        return ManifestGroup(
-            arrays=manifest_dict,
-            attributes=attrs,
-        )
-
-    def _find_var_tags(self, root: ET.Element) -> list[ET.Element]:
-        """
-        Find all variable tags in the DMR++ file. Also known as array tags.
-        Tags are labeled with the DAP data type. E.g. <Float32>, <Int16>, <String>
-
-        Parameters
-        ----------
-        root : ET.Element
-            The root element of the DMR++ file.
-
-        Returns
-        -------
-        list[ET.Element]
-        """
-        vars_tags: list[ET.Element] = []
-        for dap_dtype in self._DAP_NP_DTYPE:
-            vars_tags += root.findall(f"dap:{dap_dtype}", self._NS)
-        return vars_tags
-
-    def _parse_dim(self, root: ET.Element, dim_index: int = 0) -> dict[str, int]:
-        """
-        Parse single <Dim> or <Dimension> tag
-
-        If the tag has no name attribute, it is a phony dimension. E.g. <Dim size="300"/> --> {"phony_dim_0": 300}
-        If the tag has both name and size attributes, it is a regular dimension. E.g. <Dim name="lat" size="1447"/> --> {"lat": 1447}
-
-        Parameters
-        ----------
-        root : ET.Element
-            The root element Dim/Dimension tag
-        dim_index : int
-            Index of the dimension, used for naming phony dimensions
-
-        Returns
-        -------
-        dict
-            E.g. {"time": 1, "lat": 1447, "lon": 2895}, {"phony_dim_0": 300}, {"time": None, "lat": None, "lon": None}
-        """
-        size_attr = self._get_attrib(root, "size")
-        name_attr = self._get_attrib(root, "name")
-
-        if size_attr is not None:
-            size = int(size_attr)
-            if name_attr is not None:
-                return {Path(name_attr).name: size}
-            else:
-                return {f"phony_dim_{dim_index}": size}
-
-        raise ValueError("Not enough information to parse Dim/Dimension tag")
-
-    def _find_dimension_tags(self, root: ET.Element) -> list[ET.Element]:
-        """
-        Find the all tags with dimension information.
-
-        First attempts to find Dimension tags, then falls back to Dim tags.
-        If Dim tags are found, the fully qualified name is used to find the corresponding Dimension tag.
-        If Dim tags have no name attribute, they are phony dimensions and used directly.
-
-        Parameters
-        ----------
-        root : ET.Element
-            An ElementTree Element from a DMR++ file.
-
-        Returns
-        -------
-        list[ET.Element]
-        """
-        dimension_tags = root.findall("dap:Dimension", self._NS)
-        if not dimension_tags:
-            # Dim tags contain a fully qualified name that references a Dimension tag elsewhere in the DMR++
-            # or they are phony dimensions (have size but no name)
-            dim_tags = root.findall("dap:Dim", self._NS)
-            for d in dim_tags:
-                dim_name = self._get_attrib(d, "name")
-                if dim_name is not None:
-                    dimension_tag = self.find_node_fqn(dim_name)
-                    if dimension_tag is not None:
-                        dimension_tags.append(dimension_tag)
-                else:
-                    # Phony dimension - use the Dim tag directly
-                    dimension_tags.append(d)
-        return dimension_tags
-
-    def _parse_variable(self, var_tag: ET.Element) -> ManifestArray:
-        """
-        Parse a variable from a DMR++ tag.
-
-        Parameters
-        ----------
-        var_tag : ET.Element
-            An ElementTree Element representing a variable in the DMR++ file. Will have DAP dtype as tag. E.g. <Float32>
-
-        Returns
-        -------
-        ManifestArray
-        """
-
-        # Dimension info
-        dims: dict[str, int] = {}
-        dimension_tags = self._find_dimension_tags(var_tag)
-        for dim_index, dim in enumerate(dimension_tags):
-            dims.update(self._parse_dim(dim, dim_index=dim_index))
-        # convert DAP dtype to numpy dtype
-        dtype = np.dtype(
-            self._DAP_NP_DTYPE[var_tag.tag.removeprefix("{" + self._NS["dap"] + "}")]
-        )
-        # Chunks and Filters
-        shape: tuple[int, ...] = tuple(dims.values())
-        chunks_shape = shape
-        chunks_tag = var_tag.find("dmrpp:chunks", self._NS)
-        array_fill_value = np.array(0).astype(dtype)[()]
-        if chunks_tag is not None:
-            # Chunks
-            chunk_dim_text = chunks_tag.findtext(
-                "dmrpp:chunkDimensionSizes", namespaces=self._NS
-            )
-            if chunk_dim_text is not None:
-                # 1 1447 2895 -> (1, 1447, 2895)
-                chunks_shape = tuple(map(int, chunk_dim_text.split()))
-            else:
-                chunks_shape = shape
-            fill_value = self._get_attrib(chunks_tag, "fillValue")
-            if fill_value is not None:
-                array_fill_value = np.array(fill_value).astype(dtype)[()]
-            if chunks_shape:
-                chunkmanifest = self._parse_chunks(chunks_tag, chunks_shape)
-            else:
-                chunkmanifest = ChunkManifest(entries={}, shape=array_fill_value.shape)
-            # Filters
-            codecs = self._parse_filters(chunks_tag, dtype)
-
-        # Attributes
-        attrs: dict[str, Any] = {}
-        for attr_tag in var_tag.iterfind("dap:Attribute", self._NS):
-            attrs.update(self._parse_attribute(attr_tag))
-        if "_FillValue" in attrs:
-            encoded_cf_fill_value = encode_cf_fill_value(attrs["_FillValue"], dtype)
-            attrs["_FillValue"] = encoded_cf_fill_value
-
-        metadata = create_v3_array_metadata(
-            shape=shape,
-            data_type=dtype,
-            chunk_shape=chunks_shape,
-            codecs=codecs,
-            dimension_names=dims,
-            attributes=attrs,
-            fill_value=array_fill_value,
-        )
-        return ManifestArray(metadata=metadata, chunkmanifest=chunkmanifest)
-
-    def _parse_attribute(self, attr_tag: ET.Element) -> dict[str, Any]:
-        """
-        Parse an attribute from a DMR++ attr tag. Converts the attribute value to a native python type.
-        Raises an exception if nested attributes are passed. Container attributes must be unwrapped in the parent function.
-
-        Parameters
-        ----------
-        attr_tag : ET.Element
-            An ElementTree Element with an <Attr> tag.
-
-        Returns
-        -------
-        dict
-        """
-        attr: dict[str, Any] = {}
-        values = []
-        attr_type = self._get_attrib(attr_tag, "type")
-        attr_name = self._get_attrib(attr_tag, "name", required=True)
-
-        if attr_name is None:
-            return {}
-
-        if attr_type == "Container":
-            # DMR++ build information that is not part of the dataset
-            if attr_name == "build_dmrpp_metadata":
-                return {}
-            else:
-                warnings.warn(
-                    "This DMRpp contains a nested attribute "
-                    f"{attr_name}. Nested attributes cannot "
-                    "be assigned to a variable or dataset and will be dropped"
-                )
-                return {}
-
-        if attr_type is None:
-            return {}
-
-        dtype = np.dtype(self._DAP_NP_DTYPE[attr_type])
-        # if multiple Value tags are present, store as "key": "[v1, v2, ...]"
-        for value_tag in attr_tag:
-            # cast attribute to native python type using dmr provided dtype
-            val = (
-                dtype.type(value_tag.text).item()
-                if dtype != np.object_
-                else value_tag.text
-            )
-            # "*" may represent nan values in DMR++
-            if val == "*":
-                val = np.nan
-            values.append(val)
-        attr[attr_name] = values[0] if len(values) == 1 else values
-        return attr
-
-    def _parse_filters(
-        self, chunks_tag: ET.Element, dtype: np.dtype
-    ) -> list[dict] | None:
-        """
-        Parse filters from a DMR++ chunks tag.
-
-        Parameters
-        ----------
-        chunks_tag : ET.Element
-            An ElementTree Element with a <chunks> tag.
-
-        dtype : np.dtype
-            The numpy dtype of the variable.
-
-        Returns
-        -------
-        list[dict] | None
-            E.g. [{"id": "shuffle", "elementsize": 4}, {"id": "zlib", "level": 4}]
-        """
-        compression_type = self._get_attrib(chunks_tag, "compressionType")
-        if compression_type is not None:
-            filters: list[dict] = []
-            # shuffle deflate --> ["shuffle", "deflate"]
-            compression_types = compression_type.split(" ")
-            for c in compression_types:
-                if c == "shuffle":
-                    filters.append(
-                        {
-                            "name": "numcodecs.shuffle",
-                            "configuration": {"elementsize": dtype.itemsize},
-                        }
-                    )
-                elif c == "deflate":
-                    deflate_level = self._get_attrib(chunks_tag, "deflateLevel")
-                    level = (
-                        int(deflate_level)
-                        if deflate_level is not None
-                        else self._DEFAULT_ZLIB_VALUE
-                    )
-                    filters.append(
-                        {
-                            "name": "numcodecs.zlib",
-                            "configuration": {
-                                "level": level,
-                            },
-                        }
-                    )
-            return filters
-        return None
-
-    def _parse_chunks(
-        self, chunks_tag: ET.Element, chunks_shape: tuple[int, ...]
-    ) -> ChunkManifest:
-        """
-        Parse the chunk manifest from a DMR++ chunks tag.
-
-        Parameters
-        ----------
-        chunks_tag : ET.Element
-            An ElementTree Element with a <chunks> tag.
-
-        chunks_shape : tuple
-            Chunk sizes for each dimension. E.g. (1, 1447, 2895)
-
-        Returns
-        -------
-        ChunkManifest
-        """
-        chunkmanifest: dict[ChunkKey, object] = {}
-        default_num: list[int] = (
-            [0 for i in range(len(chunks_shape))] if chunks_shape else [0]
-        )
-        chunk_key_template = ".".join(["{}" for i in range(len(default_num))])
-        for chunk_tag in chunks_tag.iterfind("dmrpp:chunk", self._NS):
-            chunk_num = default_num
-            chunk_position = self._get_attrib(chunk_tag, "chunkPositionInArray")
-            if chunk_position is not None:
-                # "[0,1023,10235]" -> ["0","1023","10235"]
-                chunk_pos = chunk_position[1:-1].split(",")
-                # [0,1023,10235] // [1, 1023, 2047] -> [0,1,5]
-                chunk_num = [
-                    int(chunk_pos[i]) // chunks_shape[i]
-                    for i in range(len(chunks_shape))
-                ]
-            # [0,1,5] -> "0.1.5"
-            chunk_key = ChunkKey(chunk_key_template.format(*chunk_num))
-            offset = self._get_attrib(chunk_tag, "offset", required=True)
-            n_bytes = self._get_attrib(chunk_tag, "nBytes", required=True)
-            if offset is not None and n_bytes is not None:
-                chunkmanifest[chunk_key] = {
-                    "path": self.data_filepath,
-                    "offset": int(offset),
-                    "length": int(n_bytes),
-                }
-        return ChunkManifest(entries=chunkmanifest)

From 05e493554e0310b2b1317cf06dd2acd3828f3482 Mon Sep 17 00:00:00 2001
From: Miguel Jimenez-Urias <mjimenez@opendap.org>
Date: Wed, 4 Mar 2026 17:03:03 -0800
Subject: [PATCH 03/22] update DMRParser class to allow for on-the-fly dmrpps

---
 virtualizarr/parsers/dmrpp.py | 101 +++++++++++++++++++++++++++++++++-
 1 file changed, 98 insertions(+), 3 deletions(-)

diff --git a/virtualizarr/parsers/dmrpp.py b/virtualizarr/parsers/dmrpp.py
index 2887e6bb..f0a7f53d 100644
--- a/virtualizarr/parsers/dmrpp.py
+++ b/virtualizarr/parsers/dmrpp.py
@@ -1,13 +1,22 @@
 import io
+import warnings
+from pathlib import Path
 from typing import Iterable
 from xml.etree import ElementTree as ET
 
+from obspec_utils.protocols import ReadableStore
 from obspec_utils.readers import EagerStoreReader
 from obspec_utils.registry import ObjectStoreRegistry
+from pydap.parsers.dmr import DMRPPParser as _DMRPPParser
 
 from virtualizarr.manifests import (
+    ChunkManifest,
+    ManifestArray,
+    ManifestGroup,
     ManifestStore,
 )
+from virtualizarr.manifests.utils import create_v3_array_metadata
+from virtualizarr.parsers.utils import encode_cf_fill_value
 
 
 class DMRPPParser:
@@ -55,9 +64,15 @@ def __call__(
         file_bytes = reader.readall()
         stream = io.BytesIO(file_bytes)
 
+        url = (
+            url.removesuffix(".dap.dmrpp")
+            if url.endswith(".dap.dmrpp")
+            else url.removesuffix(".dmrpp")
+        )
+
         parser = DMRParser(
             root=ET.parse(stream).getroot(),
-            data_filepath=url.removesuffix(".dmrpp"),
+            data_filepath=url,
             skip_variables=self.skip_variables,
         )
         manifest_store = parser.parse_dataset(object_store=store, group=self.group)
@@ -101,6 +116,86 @@ def __init__(
             If None, the data file path is taken from the DMR++ file.
         """
         self.root = root
-        self._validation_issues: list[str] = []
-        self.data_filepath = data_filepath
+        self.data_filepath = (
+            data_filepath if data_filepath is not None else self.root.attrib["name"]
+        )
         self.skip_variables = skip_variables or ()
+
+    def dmrparser(self) -> _DMRPPParser:
+        """Exposes the _DMRParser to external use (avoids breaking changes)"""
+        return _DMRPPParser(
+            root=self.root,
+            data_filepath=self.data_filepath,
+            skip_variables=self.skip_variables,
+        )
+
+    def parse_dataset(
+        self,
+        object_store: ReadableStore,
+        group: str | None = None,
+    ) -> ManifestStore:
+        """
+        Parses the given file and creates a ManifestStore.
+
+        Parameters
+        ----------
+        group
+            The group to parse. Ignored if no groups are present, and the entire
+            dataset is parsed. If `None` or "/", and groups are present, the first group
+            is parsed.  If not `None` or "/", and no groups are present, a UserWarning
+            is issued indicating that the group will be ignored.
+
+        Returns
+        -------
+        ManifestStore
+
+        Examples
+        --------
+        Open a sample DMR++ file and parse the dataset
+        """
+        group = group or "/"
+        ngroups = len(self.root.findall("dap:Group", self._NS))
+
+        if ngroups == 0 and group != "/":
+            warnings.warn(
+                f"No groups in DMR++ file {self.data_filepath!r}; "
+                f"ignoring group parameter {group!r}"
+            )
+
+        group_path = Path("/") if ngroups == 0 else Path("/") / group.removeprefix("/")
+
+        dataset_element = self.dmrparser()._split_groups(self.root).get(group_path)
+
+        if dataset_element is None:
+            raise ValueError(
+                f"Group {group_path} not found in DMR++ file {self.data_filepath!r}"
+            )
+
+        # get two dictionary containing relevant metadata
+        vars_dict, attrs = self.dmrparser()._parse_dataset(dataset_element)
+
+        manifest_dict: dict[str, ManifestArray] = {}
+
+        for var in vars_dict.keys():
+            chunkmanifest = ChunkManifest(vars_dict[var].pop("chunkmanifest", None))
+            meta = dict(
+                [
+                    (k, v)
+                    for k, v in vars_dict[var].items()
+                    if k not in ["Maps", "fqn_dims"]
+                ]
+            )
+            if "_FillValue" in meta["attributes"]:
+                encoded_cf_fill_value = encode_cf_fill_value(
+                    meta["attributes"]["_FillValue"], meta["data_type"]
+                )
+                meta["attributes"]["_FillValue"] = encoded_cf_fill_value
+            metadata = create_v3_array_metadata(**meta)
+            manifest_dict[var] = ManifestArray(
+                metadata=metadata, chunkmanifest=chunkmanifest
+            )
+        manifest_group = ManifestGroup(arrays=manifest_dict, attributes=attrs)
+        registry = ObjectStoreRegistry()
+        registry.register(self.data_filepath, object_store)
+
+        return ManifestStore(registry=registry, group=manifest_group)

From 7ef4b1565bf52f23552c0a41e6c5a194d6bd010f Mon Sep 17 00:00:00 2001
From: Miguel Jimenez-Urias <mjimenez@opendap.org>
Date: Wed, 4 Mar 2026 17:06:01 -0800
Subject: [PATCH 04/22] add pydap as optional dependency

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 827e60f4..90d0031b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,6 +40,7 @@ remote = [
     "requests",
     "aiohttp",
     "s3fs",
+    "pydap @ git+https://github.com/pydap/pydap.git",
 ]
 
 # non-kerchunk-based parsers

From 278a3df6f7b37468c5d004f66f897de446facd10 Mon Sep 17 00:00:00 2001
From: Miguel Jimenez-Urias <mjimenez@opendap.org>
Date: Wed, 4 Mar 2026 17:38:09 -0800
Subject: [PATCH 05/22] comment tests as these are migrated

---
 virtualizarr/tests/test_parsers/test_dmrpp.py | 161 ++++++++++--------
 1 file changed, 87 insertions(+), 74 deletions(-)

diff --git a/virtualizarr/tests/test_parsers/test_dmrpp.py b/virtualizarr/tests/test_parsers/test_dmrpp.py
index 01646250..ac4f562b 100644
--- a/virtualizarr/tests/test_parsers/test_dmrpp.py
+++ b/virtualizarr/tests/test_parsers/test_dmrpp.py
@@ -1,7 +1,6 @@
 import platform
 import textwrap
 from contextlib import nullcontext
-from pathlib import Path
 from xml.etree import ElementTree as ET
 
 import pytest
@@ -435,56 +434,56 @@ def test_NASA_dmrpp_load(data_url, dmrpp_url):
         assert ds.load()
 
 
-@pytest.mark.parametrize(
-    "fqn_path, expected_xpath",
-    [
-        ("/", "."),
-        ("/air", "./*[@name='air']"),
-    ],
-)
-def test_find_node_fqn_simple(netcdf4_file, fqn_path, expected_xpath):
-    parser_instance = dmrparser(
-        DMRPP_XML_STRINGS["netcdf4_file"], filepath=netcdf4_file
-    )
-    result = parser_instance.find_node_fqn(fqn_path)
-    expected = parser_instance.root.find(expected_xpath, parser_instance._NS)
-    assert result == expected
-
-
-def test_find_node_fqn_grouped(hdf5_groups_file):
-    parser_instance = dmrparser(
-        DMRPP_XML_STRINGS["hdf5_groups_file"], filepath=hdf5_groups_file
-    )
-    result = parser_instance.find_node_fqn("/test/group/air")
-    expected = parser_instance.root.find(
-        "./*[@name='test']/*[@name='group']/*[@name='air']", parser_instance._NS
-    )
-    assert result == expected
-
-
-@pytest.mark.parametrize(
-    "group_path",
-    [
-        ("/"),
-        ("/test"),
-        ("/test/group"),
-    ],
-)
-def test_split_groups(hdf5_groups_file, group_path):
-    dmrpp_instance = dmrparser(
-        DMRPP_XML_STRINGS["hdf5_groups_file"], filepath=hdf5_groups_file
-    )
-
-    # get all tags in a dataset (so all tags excluding nested groups)
-    dataset_tags = lambda x: [
-        d for d in x if d.tag != "{" + dmrpp_instance._NS["dap"] + "}" + "Group"
-    ]
-    # check that contents of the split groups dataset match contents of the original dataset
-    result_tags = dataset_tags(
-        dmrpp_instance._split_groups(dmrpp_instance.root)[Path(group_path)]
-    )
-    expected_tags = dataset_tags(dmrpp_instance.find_node_fqn(group_path))
-    assert result_tags == expected_tags
+# @pytest.mark.parametrize(
+#     "fqn_path, expected_xpath",
+#     [
+#         ("/", "."),
+#         ("/air", "./*[@name='air']"),
+#     ],
+# )
+# def test_find_node_fqn_simple(netcdf4_file, fqn_path, expected_xpath):
+#     parser_instance = dmrparser(
+#         DMRPP_XML_STRINGS["netcdf4_file"], filepath=netcdf4_file
+#     )
+#     result = parser_instance.find_node_fqn(fqn_path)
+#     expected = parser_instance.root.find(expected_xpath, parser_instance._NS)
+#     assert result == expected
+
+
+# def test_find_node_fqn_grouped(hdf5_groups_file):
+#     parser_instance = dmrparser(
+#         DMRPP_XML_STRINGS["hdf5_groups_file"], filepath=hdf5_groups_file
+#     )
+#     result = parser_instance.find_node_fqn("/test/group/air")
+#     expected = parser_instance.root.find(
+#         "./*[@name='test']/*[@name='group']/*[@name='air']", parser_instance._NS
+#     )
+#     assert result == expected
+
+
+# @pytest.mark.parametrize(
+#     "group_path",
+#     [
+#         ("/"),
+#         ("/test"),
+#         ("/test/group"),
+#     ],
+# )
+# def test_split_groups(hdf5_groups_file, group_path):
+#     dmrpp_instance = dmrparser(
+#         DMRPP_XML_STRINGS["hdf5_groups_file"], filepath=hdf5_groups_file
+#     )
+
+#     # get all tags in a dataset (so all tags excluding nested groups)
+#     dataset_tags = lambda x: [
+#         d for d in x if d.tag != "{" + dmrpp_instance._NS["dap"] + "}" + "Group"
+#     ]
+#     # check that contents of the split groups dataset match contents of the original dataset
+#     result_tags = dataset_tags(
+#         dmrpp_instance._split_groups(dmrpp_instance.root)[Path(group_path)]
+#     )
+#     expected_tags = dataset_tags(dmrpp_instance.find_node_fqn(group_path))
+#     assert result_tags == expected_tags
 
 
 @pytest.mark.xfail(
@@ -558,9 +557,10 @@ def test_parse_dataset_nested(hdf5_groups_file):
     assert vds_g2.data_vars["air"].dims == ("time", "lat", "lon")
 
 
-def test_parse_variable(netcdf4_file):
-    parser = dmrparser(DMRPP_XML_STRINGS["netcdf4_file"], filepath=netcdf4_file)
+# def test_parse_variable(netcdf4_file):
+#     parser = dmrparser(DMRPP_XML_STRINGS["netcdf4_file"], filepath=netcdf4_file)
 
+<<<<<<< HEAD
     var = parser._parse_variable(parser.find_node_fqn("/air"))
     assert var.metadata.dtype.to_native_dtype() == "int16"
     assert var.metadata.dimension_names == ("time", "lat", "lon")
@@ -572,20 +572,33 @@ def test_parse_variable(netcdf4_file):
         var.metadata.attributes["long_name"]
         == "4xDaily Air temperature at sigma level 995"
     )
-
-
-@pytest.mark.parametrize(
-    "attr_path, expected",
-    [
-        ("air/long_name", {"long_name": "4xDaily Air temperature at sigma level 995"}),
-        ("air/scale_factor", {"scale_factor": 0.01}),
-    ],
-)
-def test_parse_attribute(netcdf4_file, attr_path, expected):
-    parser = dmrparser(DMRPP_XML_STRINGS["netcdf4_file"], filepath=netcdf4_file)
-
-    result = parser._parse_attribute(parser.find_node_fqn(attr_path))
-    assert result == expected
+=======
+#     var = parser._parse_variable(parser.find_node_fqn("/air"))
+#     assert var.metadata.dtype.to_native_dtype() == "int16"
+#     assert var.metadata.dimension_names == ("time", "lat", "lon")
+#     assert var.shape == (2920, 25, 53)
+#     assert var.chunks == (2920, 25, 53)
+#     # _FillValue is encoded for array dtype
+#     assert var.metadata.attributes["scale_factor"] == 0.01
+#     assert (
+#         var.metadata.attributes["long_name"]
+#         == "4xDaily Air temperature at sigma level 995"
+#     )
+>>>>>>> 2536cb0 (comment tests as these are migrated)
+
+
+# @pytest.mark.parametrize(
+#     "attr_path, expected",
+#     [
+#         ("air/long_name", {"long_name": "4xDaily Air temperature at sigma level 995"}),
+#         ("air/scale_factor", {"scale_factor": 0.01}),
+#     ],
+# )
+# def test_parse_attribute(netcdf4_file, attr_path, expected):
+#     parser = dmrparser(DMRPP_XML_STRINGS["netcdf4_file"], filepath=netcdf4_file)
+
+#     result = parser._parse_attribute(parser.find_node_fqn(attr_path))
+#     assert result == expected
 
 
 def test_dmrpp_simple(dmrpp_xml_simple):
@@ -634,14 +647,14 @@ def test_dmrpp_missing_attrib_validation(dmrpp_xml_with_missing_attrib):
         object_store=obstore_local(url="file:///"), group="/"
     )
 
-    # Verify that validation issues were accumulated
-    assert len(parser._validation_issues) > 0
+    # # Verify that validation issues were accumulated
+    # assert len(parser._validation_issues) > 0
 
-    # Check that the issues mention missing attributes
-    assert any(
-        "Missing required attribute 'name'" in issue
-        for issue in parser._validation_issues
-    )
+    # # Check that the issues mention missing attributes
+    # assert any(
+    #     "Missing required attribute 'name'" in issue
+    #     for issue in parser._validation_issues
+    # )
 
     # Verify manifest store was still created (parser continues despite issues)
     assert manifest_store is not None

From d01b22b03587b8e2b7b36b400e8f6b3f90a06e03 Mon Sep 17 00:00:00 2001
From: Miguel Jimenez-Urias <mjimenez@opendap.org>
Date: Wed, 4 Mar 2026 17:40:47 -0800
Subject: [PATCH 06/22] raise NotImplementError if inline values

---
 virtualizarr/parsers/dmrpp.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/virtualizarr/parsers/dmrpp.py b/virtualizarr/parsers/dmrpp.py
index f0a7f53d..afa359d5 100644
--- a/virtualizarr/parsers/dmrpp.py
+++ b/virtualizarr/parsers/dmrpp.py
@@ -190,6 +190,14 @@ def parse_dataset(
                     meta["attributes"]["_FillValue"], meta["data_type"]
                 )
                 meta["attributes"]["_FillValue"] = encoded_cf_fill_value
+
+            if "inline" in meta:
+                raise NotImplementedError(
+                    "Reading inlined reference data is currently not supported."
+                    "See https://github.com/zarr-developers/VirtualiZarr/issues/489",
+                )
+                meta.pop("inline", None)
+
             metadata = create_v3_array_metadata(**meta)
             manifest_dict[var] = ManifestArray(
                 metadata=metadata, chunkmanifest=chunkmanifest

From 421097bc7522ac6e9b7abe21b2f942c83fd249d7 Mon Sep 17 00:00:00 2001
From: Miguel Jimenez-Urias <mjimenez@opendap.org>
Date: Thu, 5 Mar 2026 10:47:30 -0800
Subject: [PATCH 07/22] install pydap from unreleased PR

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 90d0031b..25a14890 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,7 +40,7 @@ remote = [
     "requests",
     "aiohttp",
     "s3fs",
-    "pydap @ git+https://github.com/pydap/pydap.git",
+    "pydap @ git+https://github.com/pydap/pydap.git@refs/pull/659/merge",
 ]
 
 # non-kerchunk-based parsers

From 161d8312d16bd3e7dae7b45b44f47c6c0180d4cf Mon Sep 17 00:00:00 2001
From: Miguel Jimenez-Urias <mjimenez@opendap.org>
Date: Wed, 11 Mar 2026 10:04:39 -0700
Subject: [PATCH 08/22] rebase

---
 virtualizarr/tests/test_parsers/test_dmrpp.py | 36 ++++++-------------
 1 file changed, 11 insertions(+), 25 deletions(-)

diff --git a/virtualizarr/tests/test_parsers/test_dmrpp.py b/virtualizarr/tests/test_parsers/test_dmrpp.py
index ac4f562b..8b7fe557 100644
--- a/virtualizarr/tests/test_parsers/test_dmrpp.py
+++ b/virtualizarr/tests/test_parsers/test_dmrpp.py
@@ -560,31 +560,17 @@ def test_parse_dataset_nested(hdf5_groups_file):
 # def test_parse_variable(netcdf4_file):
 #     parser = dmrparser(DMRPP_XML_STRINGS["netcdf4_file"], filepath=netcdf4_file)
 
-<<<<<<< HEAD
-    var = parser._parse_variable(parser.find_node_fqn("/air"))
-    assert var.metadata.dtype.to_native_dtype() == "int16"
-    assert var.metadata.dimension_names == ("time", "lat", "lon")
-    assert var.shape == (2920, 25, 53)
-    assert var.metadata.chunks == (2920, 25, 53)
-    # _FillValue is encoded for array dtype
-    assert var.metadata.attributes["scale_factor"] == 0.01
-    assert (
-        var.metadata.attributes["long_name"]
-        == "4xDaily Air temperature at sigma level 995"
-    )
-=======
-#     var = parser._parse_variable(parser.find_node_fqn("/air"))
-#     assert var.metadata.dtype.to_native_dtype() == "int16"
-#     assert var.metadata.dimension_names == ("time", "lat", "lon")
-#     assert var.shape == (2920, 25, 53)
-#     assert var.chunks == (2920, 25, 53)
-#     # _FillValue is encoded for array dtype
-#     assert var.metadata.attributes["scale_factor"] == 0.01
-#     assert (
-#         var.metadata.attributes["long_name"]
-#         == "4xDaily Air temperature at sigma level 995"
-#     )
->>>>>>> 2536cb0 (comment tests as these are migrated)
+    # var = parser._parse_variable(parser.find_node_fqn("/air"))
+    # assert var.metadata.dtype.to_native_dtype() == "int16"
+    # assert var.metadata.dimension_names == ("time", "lat", "lon")
+    # assert var.shape == (2920, 25, 53)
+    # assert var.metadata.chunks == (2920, 25, 53)
+    # # _FillValue is encoded for array dtype
+    # assert var.metadata.attributes["scale_factor"] == 0.01
+    # assert (
+    #     var.metadata.attributes["long_name"]
+    #     == "4xDaily Air temperature at sigma level 995"
+    # )
 
 
 # @pytest.mark.parametrize(

From 0f5bf2f0edfeed63a7c809b437824c41e36023c5 Mon Sep 17 00:00:00 2001
From: Miguel Jimenez-Urias <mjimenez@opendap.org>
Date: Wed, 11 Mar 2026 10:04:39 -0700
Subject: [PATCH 09/22] rebase

---
 conftest.py                                   |  9 ++
 virtualizarr/parsers/dmrpp.py                 | 10 ++-
 virtualizarr/tests/test_parsers/test_dmrpp.py | 87 ++++++-------------
 3 files changed, 44 insertions(+), 62 deletions(-)

diff --git a/conftest.py b/conftest.py
index 2c9d1c10..1538e60c 100644
--- a/conftest.py
+++ b/conftest.py
@@ -356,6 +356,15 @@ def hdf5_empty(tmp_path: Path) -> str:
     return str(filepath)
 
 
+@pytest.fixture
+def hdf5_missing_value(tmp_path: Path) -> str:
+    """Create an empty HDF5 file."""
+    filepath = tmp_path / "compact_lowlevel.h5"
+    with h5py.File(filepath, "w") as f:
+        f.create_dataset("my_dataset", shape=(10,), dtype="int64")
+    return str(filepath)
+
+
 @pytest.fixture
 def hdf5_scalar(tmp_path: Path) -> str:
     """Create an HDF5 file with a scalar dataset."""
diff --git a/virtualizarr/parsers/dmrpp.py b/virtualizarr/parsers/dmrpp.py
index afa359d5..60ae7c15 100644
--- a/virtualizarr/parsers/dmrpp.py
+++ b/virtualizarr/parsers/dmrpp.py
@@ -120,14 +120,17 @@ def __init__(
             data_filepath if data_filepath is not None else self.root.attrib["name"]
         )
         self.skip_variables = skip_variables or ()
+        self._validation_issues: list[str] = []
 
     def dmrparser(self) -> _DMRPPParser:
         """Exposes the _DMRParser to external use (avoids breaking changes)"""
-        return _DMRPPParser(
+        parser = _DMRPPParser(
             root=self.root,
             data_filepath=self.data_filepath,
             skip_variables=self.skip_variables,
         )
+        self._validation_issues = parser._validation_issues
+        return parser
 
     def parse_dataset(
         self,
@@ -177,7 +180,7 @@ def parse_dataset(
         manifest_dict: dict[str, ManifestArray] = {}
 
         for var in vars_dict.keys():
-            chunkmanifest = ChunkManifest(vars_dict[var].pop("chunkmanifest", None))
+            chunkmanifest = vars_dict[var].pop("chunkmanifest", None)
             meta = dict(
                 [
                     (k, v)
@@ -193,11 +196,12 @@ def parse_dataset(
 
             if "inline" in meta:
                 raise NotImplementedError(
-                    "Reading inlined reference data is currently not supported."
+                    "Reading inlined reference data is currently not supported. "
                     "See https://github.com/zarr-developers/VirtualiZarr/issues/489",
                 )
                 meta.pop("inline", None)
 
+            chunkmanifest = ChunkManifest(chunkmanifest)
             metadata = create_v3_array_metadata(**meta)
             manifest_dict[var] = ManifestArray(
                 metadata=metadata, chunkmanifest=chunkmanifest
diff --git a/virtualizarr/tests/test_parsers/test_dmrpp.py b/virtualizarr/tests/test_parsers/test_dmrpp.py
index 8b7fe557..fcb6ab88 100644
--- a/virtualizarr/tests/test_parsers/test_dmrpp.py
+++ b/virtualizarr/tests/test_parsers/test_dmrpp.py
@@ -4,6 +4,7 @@
 from xml.etree import ElementTree as ET
 
 import pytest
+import requests
 import xarray as xr
 import xarray.testing as xrt
 from obspec_utils.registry import ObjectStoreRegistry
@@ -434,58 +435,6 @@ def test_NASA_dmrpp_load(data_url, dmrpp_url):
         assert ds.load()
 
 
-# @pytest.mark.parametrize(
-#     "fqn_path, expected_xpath",
-#     [
-#         ("/", "."),
-#         ("/air", "./*[@name='air']"),
-#     ],
-# )
-# def test_find_node_fqn_simple(netcdf4_file, fqn_path, expected_xpath):
-#     parser_instance = dmrparser(
-#         DMRPP_XML_STRINGS["netcdf4_file"], filepath=netcdf4_file
-#     )
-#     result = parser_instance.find_node_fqn(fqn_path)
-#     expected = parser_instance.root.find(expected_xpath, parser_instance._NS)
-#     assert result == expected
-
-
-# def test_find_node_fqn_grouped(hdf5_groups_file):
-#     parser_instance = dmrparser(
-#         DMRPP_XML_STRINGS["hdf5_groups_file"], filepath=hdf5_groups_file
-#     )
-#     result = parser_instance.find_node_fqn("/test/group/air")
-#     expected = parser_instance.root.find(
-#         "./*[@name='test']/*[@name='group']/*[@name='air']", parser_instance._NS
-#     )
-#     assert result == expected
-
-
-# @pytest.mark.parametrize(
-#     "group_path",
-#     [
-#         ("/"),
-#         ("/test"),
-#         ("/test/group"),
-#     ],
-# )
-# def test_split_groups(hdf5_groups_file, group_path):
-#     dmrpp_instance = dmrparser(
-#         DMRPP_XML_STRINGS["hdf5_groups_file"], filepath=hdf5_groups_file
-#     )
-
-#     # get all tags in a dataset (so all tags excluding nested groups)
-#     dataset_tags = lambda x: [
-#         d for d in x if d.tag != "{" + dmrpp_instance._NS["dap"] + "}" + "Group"
-#     ]
-#     # check that contents of the split groups dataset match contents of the original dataset
-#     result_tags = dataset_tags(
-#         dmrpp_instance._split_groups(dmrpp_instance.root)[Path(group_path)]
-#     )
-#     expected_tags = dataset_tags(dmrpp_instance.find_node_fqn(group_path))
-#     assert result_tags == expected_tags
-
-
 @pytest.mark.xfail(
     platform.system() == "Linux",
     reason="See https://github.com/zarr-developers/VirtualiZarr/issues/904.",
@@ -557,6 +506,7 @@ def test_parse_dataset_nested(hdf5_groups_file):
     assert vds_g2.data_vars["air"].dims == ("time", "lat", "lon")
 
 
+<<<<<<< HEAD
 # def test_parse_variable(netcdf4_file):
 #     parser = dmrparser(DMRPP_XML_STRINGS["netcdf4_file"], filepath=netcdf4_file)
 
@@ -587,6 +537,8 @@ def test_parse_dataset_nested(hdf5_groups_file):
 #     assert result == expected
 
 
+=======
+>>>>>>> 79b9a79 (rebase)
 def test_dmrpp_simple(dmrpp_xml_simple):
     """Test parsing a simple valid DMR++ XML creates virtual chunk manifests."""
     parser = dmrparser(dmrpp_xml_simple, filepath="file:///simple.nc")
@@ -633,15 +585,32 @@ def test_dmrpp_missing_attrib_validation(dmrpp_xml_with_missing_attrib):
         object_store=obstore_local(url="file:///"), group="/"
     )
 
-    # # Verify that validation issues were accumulated
-    # assert len(parser._validation_issues) > 0
+    # Verify that validation issues were accumulated
+    assert len(parser._validation_issues) > 0
 
-    # # Check that the issues mention missing attributes
-    # assert any(
-    #     "Missing required attribute 'name'" in issue
-    #     for issue in parser._validation_issues
-    # )
+    # Check that the issues mention missing attributes
+    assert any(
+        "Missing required attribute 'name'" in issue
+        for issue in parser._validation_issues
+    )
 
     # Verify manifest store was still created (parser continues despite issues)
     assert manifest_store is not None
     assert manifest_store._group is not None
+
+
+@requires_network
+def test_nonimplement_inlinevalue(hdf5_missing_value):
+    dmrpp_file = (
+        "http://test.opendap.org/opendap/data/dmrpp/compact_lowlevel.h5.dmrpp.file"
+    )
+    session = requests.Session()
+    dmrpp = session.get(dmrpp_file).content.decode()
+    parser = dmrparser(dmrpp, filepath=f"file:///{hdf5_missing_value}")
+    store = obstore_local(url=parser.data_filepath)
+
+    with pytest.raises(
+        NotImplementedError,
+        match="Reading inlined reference data is currently not supported",
+    ):
+        parser.parse_dataset(object_store=store)

From c04f1278fc5b94f6cd435777e4479ef26cc1c481 Mon Sep 17 00:00:00 2001
From: Miguel Jimenez-Urias <mjimenez@opendap.org>
Date: Wed, 11 Mar 2026 10:07:03 -0700
Subject: [PATCH 10/22] rebase

---
 docs/releases.md | 1 +
 pyproject.toml   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/releases.md b/docs/releases.md
index f28cc441..ecc99096 100644
--- a/docs/releases.md
+++ b/docs/releases.md
@@ -249,6 +249,7 @@ Brings `region`-writing support in `.to_icechunk()`, a `ZarrParser` with orders
 - Fix `ZarrParser` to use public attribute instead of private one ([#916](https://github.com/zarr-developers/VirtualiZarr/pull/916)).
   By [Max Jones](https://github.com/maxrjones).
 
+
 ### Documentation
 
 - Added FAQ answer comparing the Kerchunk and Icechunk serialization formats. ([#818](https://github.com/zarr-developers/VirtualiZarr/pull/818)).
diff --git a/pyproject.toml b/pyproject.toml
index 25a14890..26cc2406 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,7 +40,7 @@ remote = [
     "requests",
     "aiohttp",
     "s3fs",
-    "pydap @ git+https://github.com/pydap/pydap.git@refs/pull/659/merge",
+    "pydap>=3.5.9",
 ]
 
 # non-kerchunk-based parsers

From 1df2b87c789b32cc331242f3427c2f941e0553b1 Mon Sep 17 00:00:00 2001
From: Miguel Jimenez-Urias <mjimenez@opendap.org>
Date: Thu, 5 Mar 2026 16:44:45 -0800
Subject: [PATCH 11/22] fix mypy

---
 pyproject.toml                | 1 +
 virtualizarr/parsers/dmrpp.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 26cc2406..50d31181 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -268,6 +268,7 @@ module = [
     "ujson",
     "zarr",
     "requests",
+    "pydap.*",
 ]
 ignore_missing_imports = true
 
diff --git a/virtualizarr/parsers/dmrpp.py b/virtualizarr/parsers/dmrpp.py
index 60ae7c15..5c672d19 100644
--- a/virtualizarr/parsers/dmrpp.py
+++ b/virtualizarr/parsers/dmrpp.py
@@ -207,7 +207,7 @@ def parse_dataset(
                 metadata=metadata, chunkmanifest=chunkmanifest
             )
         manifest_group = ManifestGroup(arrays=manifest_dict, attributes=attrs)
-        registry = ObjectStoreRegistry()
+        registry: ObjectStoreRegistry = ObjectStoreRegistry()
         registry.register(self.data_filepath, object_store)
 
         return ManifestStore(registry=registry, group=manifest_group)

From c6b11ff393194a9f455a303e09400e43ac739be2 Mon Sep 17 00:00:00 2001
From: Miguel Jimenez-Urias <mjimenez@opendap.org>
Date: Wed, 11 Mar 2026 10:37:14 -0700
Subject: [PATCH 12/22] skip test on macOS too

---
 virtualizarr/tests/test_parsers/test_dmrpp.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/virtualizarr/tests/test_parsers/test_dmrpp.py b/virtualizarr/tests/test_parsers/test_dmrpp.py
index fcb6ab88..946a786f 100644
--- a/virtualizarr/tests/test_parsers/test_dmrpp.py
+++ b/virtualizarr/tests/test_parsers/test_dmrpp.py
@@ -1,4 +1,3 @@
-import platform
 import textwrap
 from contextlib import nullcontext
 from xml.etree import ElementTree as ET
@@ -436,7 +435,6 @@ def test_NASA_dmrpp_load(data_url, dmrpp_url):
 
 
 @pytest.mark.xfail(
-    platform.system() == "Linux",
     reason="See https://github.com/zarr-developers/VirtualiZarr/issues/904.",
 )
 @pytest.mark.parametrize(

From 4f3763adf45557a86c4e646ecd923be996ca3eb8 Mon Sep 17 00:00:00 2001
From: Miguel Jimenez-Urias <mjimenez@opendap.org>
Date: Mon, 4 May 2026 12:43:31 -0700
Subject: [PATCH 13/22] update how inline references are handled v1

---
 virtualizarr/parsers/dmrpp.py                 | 16 +++++++++++-----
 virtualizarr/tests/test_parsers/test_dmrpp.py | 14 ++++++--------
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/virtualizarr/parsers/dmrpp.py b/virtualizarr/parsers/dmrpp.py
index 5c672d19..9fecfa1b 100644
--- a/virtualizarr/parsers/dmrpp.py
+++ b/virtualizarr/parsers/dmrpp.py
@@ -15,6 +15,7 @@
     ManifestGroup,
     ManifestStore,
 )
+from virtualizarr.manifests.manifest import ChunkEntry
 from virtualizarr.manifests.utils import create_v3_array_metadata
 from virtualizarr.parsers.utils import encode_cf_fill_value
 
@@ -181,6 +182,7 @@ def parse_dataset(
 
         for var in vars_dict.keys():
             chunkmanifest = vars_dict[var].pop("chunkmanifest", None)
+            # remove opendap-related metadata
             meta = dict(
                 [
                     (k, v)
@@ -195,17 +197,21 @@ def parse_dataset(
                 meta["attributes"]["_FillValue"] = encoded_cf_fill_value
 
             if "inline" in meta:
-                raise NotImplementedError(
-                    "Reading inlined reference data is currently not supported. "
-                    "See https://github.com/zarr-developers/VirtualiZarr/issues/489",
+                # chunkmanifest is empty - extract inline data from dict
+                data = meta.pop("inline", None)
+                shape = meta.pop("shape", None)
+                chunk_entry = ChunkEntry(
+                    path="__inlined__", offset=0, length=len(data), data=data
                 )
-                meta.pop("inline", None)
+                chunkmanifest = ChunkManifest(entries=chunk_entry, shape=shape)
+            else:
+                chunkmanifest = ChunkManifest(chunkmanifest)
 
-            chunkmanifest = ChunkManifest(chunkmanifest)
             metadata = create_v3_array_metadata(**meta)
             manifest_dict[var] = ManifestArray(
                 metadata=metadata, chunkmanifest=chunkmanifest
             )
+
         manifest_group = ManifestGroup(arrays=manifest_dict, attributes=attrs)
         registry: ObjectStoreRegistry = ObjectStoreRegistry()
         registry.register(self.data_filepath, object_store)
diff --git a/virtualizarr/tests/test_parsers/test_dmrpp.py b/virtualizarr/tests/test_parsers/test_dmrpp.py
index 946a786f..d0c77c10 100644
--- a/virtualizarr/tests/test_parsers/test_dmrpp.py
+++ b/virtualizarr/tests/test_parsers/test_dmrpp.py
@@ -597,18 +597,16 @@ def test_dmrpp_missing_attrib_validation(dmrpp_xml_with_missing_attrib):
     assert manifest_store._group is not None
 
 
+@pytest.mark.xfail(
+    reason="inline reference is failing locally",
+)
 @requires_network
-def test_nonimplement_inlinevalue(hdf5_missing_value):
+def test_inlinevalue():
     dmrpp_file = (
         "http://test.opendap.org/opendap/data/dmrpp/compact_lowlevel.h5.dmrpp.file"
     )
     session = requests.Session()
     dmrpp = session.get(dmrpp_file).content.decode()
-    parser = dmrparser(dmrpp, filepath=f"file:///{hdf5_missing_value}")
+    parser = dmrparser(dmrpp, filepath="file:///")
     store = obstore_local(url=parser.data_filepath)
-
-    with pytest.raises(
-        NotImplementedError,
-        match="Reading inlined reference data is currently not supported",
-    ):
-        parser.parse_dataset(object_store=store)
+    parser.parse_dataset(object_store=store)

From 8dccd228378f47cbd92fd140df1fe95001c5c6a7 Mon Sep 17 00:00:00 2001
From: Miguel Jimenez-Urias <mjimenez@opendap.org>
Date: Mon, 4 May 2026 13:48:52 -0700
Subject: [PATCH 14/22] update how inline references are handled v2

---
 virtualizarr/parsers/dmrpp.py                 | 24 +++++++++++++------
 virtualizarr/tests/test_parsers/test_dmrpp.py | 13 ++++++----
 2 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/virtualizarr/parsers/dmrpp.py b/virtualizarr/parsers/dmrpp.py
index 9fecfa1b..5c5ae0a9 100644
--- a/virtualizarr/parsers/dmrpp.py
+++ b/virtualizarr/parsers/dmrpp.py
@@ -1,3 +1,4 @@
+import base64
 import io
 import warnings
 from pathlib import Path
@@ -15,7 +16,6 @@
     ManifestGroup,
     ManifestStore,
 )
-from virtualizarr.manifests.manifest import ChunkEntry
 from virtualizarr.manifests.utils import create_v3_array_metadata
 from virtualizarr.parsers.utils import encode_cf_fill_value
 
@@ -197,13 +197,23 @@ def parse_dataset(
                 meta["attributes"]["_FillValue"] = encoded_cf_fill_value
 
             if "inline" in meta:
-                # chunkmanifest is empty - extract inline data from dict
+                # extract data already decoded into array/string
                 data = meta.pop("inline", None)
-                shape = meta.pop("shape", None)
-                chunk_entry = ChunkEntry(
-                    path="__inlined__", offset=0, length=len(data), data=data
-                )
-                chunkmanifest = ChunkManifest(entries=chunk_entry, shape=shape)
+                bdata = base64.b64encode(data)
+                # chunk_entry = ChunkEntry(
+                #     path="", offset=0, length=len(bdata), data=bdata
+                # )
+                # chunkmanifest = ChunkManifest(entries=chunk_entry)
+
+                chunks = {
+                    "0.0": {
+                        "path": "__inline__",
+                        "offset": 0,
+                        "length": len(bdata),
+                        "data": bdata,
+                    },
+                }
+                chunkmanifest = ChunkManifest(entries=chunks)
             else:
                 chunkmanifest = ChunkManifest(chunkmanifest)
 
diff --git a/virtualizarr/tests/test_parsers/test_dmrpp.py b/virtualizarr/tests/test_parsers/test_dmrpp.py
index d0c77c10..5e382197 100644
--- a/virtualizarr/tests/test_parsers/test_dmrpp.py
+++ b/virtualizarr/tests/test_parsers/test_dmrpp.py
@@ -597,11 +597,13 @@ def test_dmrpp_missing_attrib_validation(dmrpp_xml_with_missing_attrib):
     assert manifest_store._group is not None
 
 
-@pytest.mark.xfail(
-    reason="inline reference is failing locally",
-)
 @requires_network
 def test_inlinevalue():
+    """
+    Test that inline values can be parsed into manifest
+    """
+    expected_bytes = b"AAAAAAAAAAABAAAAAAAAAAIAAAAAAAAAAwAAAAAAAAAEAAAAAAAAAAUAAAAAAAAABgAAAAAAAAAHAAAAAAAAAAgAAAAAAAAACQAAAAAAAAA="
+
     dmrpp_file = (
         "http://test.opendap.org/opendap/data/dmrpp/compact_lowlevel.h5.dmrpp.file"
     )
@@ -609,4 +611,7 @@ def test_inlinevalue():
     dmrpp = session.get(dmrpp_file).content.decode()
     parser = dmrparser(dmrpp, filepath="file:///")
     store = obstore_local(url=parser.data_filepath)
-    parser.parse_dataset(object_store=store)
+    ms = parser.parse_dataset(object_store=store)
+    vds = ms.to_virtual_dataset()
+    assert "my_dataset" in vds
+    assert ms._group["my_dataset"]._manifest._inlined == {(0, 0): expected_bytes}

From cf838b742f63613649d356d15c075c9272775882 Mon Sep 17 00:00:00 2001
From: Miguel Jimenez-Urias <mjimenez@opendap.org>
Date: Mon, 4 May 2026 14:45:42 -0700
Subject: [PATCH 15/22] add pydap to min-deps via `dev` - avoid min-deps ci/cd
 failure

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 50d31181..74b93e05 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -123,6 +123,7 @@ dev = [
     "pandas-stubs",
     "pooch",
     "pre-commit",
+    "pydap>=3.5.9"
     "pytest",
     "pytest-asyncio",
     "pytest-cov",

From 9a85bfdb07c5e3f1aca2aa5e4afe7eb890c62a68 Mon Sep 17 00:00:00 2001
From: Miguel Jimenez-Urias <mjimenez@opendap.org>
Date: Mon, 4 May 2026 14:51:29 -0700
Subject: [PATCH 16/22] fix missing `,`

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 74b93e05..e33eb88c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -123,7 +123,7 @@ dev = [
     "pandas-stubs",
     "pooch",
     "pre-commit",
-    "pydap>=3.5.9"
+    "pydap>=3.5.9",
     "pytest",
     "pytest-asyncio",
     "pytest-cov",

From c1fec298407807158687dd8febbcd00b1d7f837a Mon Sep 17 00:00:00 2001
From: Miguel Jimenez-Urias <mjimenez@opendap.org>
Date: Tue, 9 Jun 2026 09:47:03 -0700
Subject: [PATCH 17/22] add a separate isolated opt-dep entry for dmrpp

---
 pyproject.toml | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index e33eb88c..ec4bfea4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,7 +40,6 @@ remote = [
     "requests",
     "aiohttp",
     "s3fs",
-    "pydap>=3.5.9",
 ]
 
 # non-kerchunk-based parsers
@@ -51,6 +50,9 @@ hdf = [
     "imagecodecs-numcodecs==2024.6.1",
 ]
 
+# dmrpp
+dmrpp = ["pydap>=3.5.10"]
+
 zarr = ["arro3-core"]
 
 # kerchunk-based parsers
@@ -84,7 +86,8 @@ all_parsers = [
     "virtualizarr[kerchunk_parquet]",
     "virtualizarr[tiff]",
     "virtualizarr[grib]",
-    "virtualizarr[zarr]"
+    "virtualizarr[zarr]",
+    "virtualizarr[dmrpp]",
 ]
 
 # writers
@@ -123,7 +126,6 @@ dev = [
     "pandas-stubs",
     "pooch",
     "pre-commit",
-    "pydap>=3.5.9",
     "pytest",
     "pytest-asyncio",
     "pytest-cov",

From 2d08220eafb2941dd8916ab234708510b2b408d5 Mon Sep 17 00:00:00 2001
From: Miguel Jimenez-Urias <mjimenez@opendap.org>
Date: Fri, 12 Jun 2026 16:30:13 -0700
Subject: [PATCH 18/22] update dmrpp migration - soft imports and code refactor

---
 pyproject.toml                                |   4 +-
 virtualizarr/parsers/dmrpp.py                 | 169 +-------
 virtualizarr/tests/__init__.py                |   2 +
 virtualizarr/tests/test_parsers/test_dmrpp.py | 374 +-----------------
 4 files changed, 23 insertions(+), 526 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index ec4bfea4..7567ff4c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,7 +51,9 @@ hdf = [
 ]
 
 # dmrpp
-dmrpp = ["pydap>=3.5.10"]
+dmrpp = [
+    "pydap @ git+https://github.com/pydap/pydap.git@refs/pull/697/head",
+]
 
 zarr = ["arro3-core"]
 
diff --git a/virtualizarr/parsers/dmrpp.py b/virtualizarr/parsers/dmrpp.py
index 5c5ae0a9..29e183b9 100644
--- a/virtualizarr/parsers/dmrpp.py
+++ b/virtualizarr/parsers/dmrpp.py
@@ -1,23 +1,15 @@
-import base64
 import io
-import warnings
-from pathlib import Path
 from typing import Iterable
 from xml.etree import ElementTree as ET
 
-from obspec_utils.protocols import ReadableStore
+# from obspec_utils.protocols import ReadableStore
 from obspec_utils.readers import EagerStoreReader
 from obspec_utils.registry import ObjectStoreRegistry
-from pydap.parsers.dmr import DMRPPParser as _DMRPPParser
 
-from virtualizarr.manifests import (
-    ChunkManifest,
-    ManifestArray,
-    ManifestGroup,
-    ManifestStore,
-)
-from virtualizarr.manifests.utils import create_v3_array_metadata
-from virtualizarr.parsers.utils import encode_cf_fill_value
+from virtualizarr.manifests import ManifestStore
+from virtualizarr.utils import soft_import
+
+pydap = soft_import("pydap", "parsing dmrpp references", strict=False)
 
 
 class DMRPPParser:
@@ -71,6 +63,8 @@ def __call__(
             else url.removesuffix(".dmrpp")
         )
 
+        from pydap.virtualizarr.parser import DMRParser
+
         parser = DMRParser(
             root=ET.parse(stream).getroot(),
             data_filepath=url,
@@ -78,152 +72,3 @@ def __call__(
         )
         manifest_store = parser.parse_dataset(object_store=store, group=self.group)
         return manifest_store
-
-
-class DMRParser:
-    """
-    Parser for the OPeNDAP DMR++ XML format.
-    Reads groups, dimensions, coordinates, data variables, encoding, chunk manifests, and attributes.
-    Highly modular to allow support for older dmrpp schema versions. Includes many utility functions to extract
-    different information such as finding all variable tags, splitting hdf5 groups, parsing dimensions, and more.
-
-    OPeNDAP DMR++ homepage: https://docs.opendap.org/index.php/DMR%2B%2B
-    """
-
-    # DAP and DMRPP XML namespaces
-    _NS = {
-        "dap": "http://xml.opendap.org/ns/DAP/4.0#",
-        "dmrpp": "http://xml.opendap.org/dap/dmrpp/1.0.0#",
-    }
-
-    root: ET.Element
-    data_filepath: str
-
-    def __init__(
-        self,
-        root: ET.Element,
-        data_filepath: str | None = None,
-        skip_variables: Iterable[str] | None = None,
-    ):
-        """
-        Initialize the DMRParser with the given DMR++ file contents and source data file path.
-
-        Parameters
-        ----------
-        root
-            Root of the xml tree structure of a DMR++ file.
-        data_filepath
-            The path to the actual data file that will be set in the chunk manifests.
-            If None, the data file path is taken from the DMR++ file.
-        """
-        self.root = root
-        self.data_filepath = (
-            data_filepath if data_filepath is not None else self.root.attrib["name"]
-        )
-        self.skip_variables = skip_variables or ()
-        self._validation_issues: list[str] = []
-
-    def dmrparser(self) -> _DMRPPParser:
-        """Exposes the _DMRParser to external use (avoids breaking changes)"""
-        parser = _DMRPPParser(
-            root=self.root,
-            data_filepath=self.data_filepath,
-            skip_variables=self.skip_variables,
-        )
-        self._validation_issues = parser._validation_issues
-        return parser
-
-    def parse_dataset(
-        self,
-        object_store: ReadableStore,
-        group: str | None = None,
-    ) -> ManifestStore:
-        """
-        Parses the given file and creates a ManifestStore.
-
-        Parameters
-        ----------
-        group
-            The group to parse. Ignored if no groups are present, and the entire
-            dataset is parsed. If `None` or "/", and groups are present, the first group
-            is parsed.  If not `None` or "/", and no groups are present, a UserWarning
-            is issued indicating that the group will be ignored.
-
-        Returns
-        -------
-        ManifestStore
-
-        Examples
-        --------
-        Open a sample DMR++ file and parse the dataset
-        """
-        group = group or "/"
-        ngroups = len(self.root.findall("dap:Group", self._NS))
-
-        if ngroups == 0 and group != "/":
-            warnings.warn(
-                f"No groups in DMR++ file {self.data_filepath!r}; "
-                f"ignoring group parameter {group!r}"
-            )
-
-        group_path = Path("/") if ngroups == 0 else Path("/") / group.removeprefix("/")
-
-        dataset_element = self.dmrparser()._split_groups(self.root).get(group_path)
-
-        if dataset_element is None:
-            raise ValueError(
-                f"Group {group_path} not found in DMR++ file {self.data_filepath!r}"
-            )
-
-        # get two dictionary containing relevant metadata
-        vars_dict, attrs = self.dmrparser()._parse_dataset(dataset_element)
-
-        manifest_dict: dict[str, ManifestArray] = {}
-
-        for var in vars_dict.keys():
-            chunkmanifest = vars_dict[var].pop("chunkmanifest", None)
-            # remove opendap-related metadata
-            meta = dict(
-                [
-                    (k, v)
-                    for k, v in vars_dict[var].items()
-                    if k not in ["Maps", "fqn_dims"]
-                ]
-            )
-            if "_FillValue" in meta["attributes"]:
-                encoded_cf_fill_value = encode_cf_fill_value(
-                    meta["attributes"]["_FillValue"], meta["data_type"]
-                )
-                meta["attributes"]["_FillValue"] = encoded_cf_fill_value
-
-            if "inline" in meta:
-                # extract data already decoded into array/string
-                data = meta.pop("inline", None)
-                bdata = base64.b64encode(data)
-                # chunk_entry = ChunkEntry(
-                #     path="", offset=0, length=len(bdata), data=bdata
-                # )
-                # chunkmanifest = ChunkManifest(entries=chunk_entry)
-
-                chunks = {
-                    "0.0": {
-                        "path": "__inline__",
-                        "offset": 0,
-                        "length": len(bdata),
-                        "data": bdata,
-                    },
-                }
-                chunkmanifest = ChunkManifest(entries=chunks)
-            else:
-                chunkmanifest = ChunkManifest(chunkmanifest)
-
-            metadata = create_v3_array_metadata(**meta)
-            manifest_dict[var] = ManifestArray(
-                metadata=metadata, chunkmanifest=chunkmanifest
-            )
-
-        manifest_group = ManifestGroup(arrays=manifest_dict, attributes=attrs)
-        registry: ObjectStoreRegistry = ObjectStoreRegistry()
-        registry.register(self.data_filepath, object_store)
-
-        return ManifestStore(registry=registry, group=manifest_group)
diff --git a/virtualizarr/tests/__init__.py b/virtualizarr/tests/__init__.py
index 026b7c66..78ff40a3 100644
--- a/virtualizarr/tests/__init__.py
+++ b/virtualizarr/tests/__init__.py
@@ -44,3 +44,5 @@ def _importorskip(
 has_arro3, requires_arro3 = _importorskip("arro3.core")
 # The GribberishParser is new in gribberish 1.0.0.
 has_grib, requires_grib = _importorskip("gribberish", minversion="1.0.0")
+# The DMRPPParser is new in pydap 3.5.10
+has_pydap, requires_pydap = _importorskip("pydap")
diff --git a/virtualizarr/tests/test_parsers/test_dmrpp.py b/virtualizarr/tests/test_parsers/test_dmrpp.py
index 5e382197..7e2890a9 100644
--- a/virtualizarr/tests/test_parsers/test_dmrpp.py
+++ b/virtualizarr/tests/test_parsers/test_dmrpp.py
@@ -1,20 +1,18 @@
-import textwrap
-from contextlib import nullcontext
 from xml.etree import ElementTree as ET
 
 import pytest
 import requests
 import xarray as xr
-import xarray.testing as xrt
 from obspec_utils.registry import ObjectStoreRegistry
-from packaging import version
 
 from virtualizarr.parsers import DMRPPParser, HDFParser
-from virtualizarr.parsers.dmrpp import DMRParser
-from virtualizarr.tests import requires_network, slow_test
+from virtualizarr.tests import requires_network, requires_pydap, slow_test
 from virtualizarr.tests.utils import obstore_local, obstore_s3
 from virtualizarr.xarray import open_virtual_dataset
 
+pytest.importorskip("pydap")
+from pydap.virtualizarr.parser import DMRParser  # noqa: E402
+
 urls = [
     (
         "s3://its-live-data/test-space/cloud-experiments/dmrpp/20240826090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc",
@@ -23,364 +21,8 @@
     # TODO: later add MUR, SWOT, TEMPO and others by using kerchunk JSON to read refs (rather than reading the whole netcdf file)
 ]
 
-# This DMRPP was created following the instructions from https://opendap.github.io/DMRpp-wiki/DMRpp.html#sec-build-them on the files produced by conftest.py with the key matching the fixture name.
-DMRPP_XML_STRINGS = {
-    "netcdf4_file": textwrap.dedent(
-        """\
-        <?xml version="1.0" encoding="ISO-8859-1"?>
-        <Dataset xmlns="http://xml.opendap.org/ns/DAP/4.0#" xmlns:dmrpp="http://xml.opendap.org/dap/dmrpp/1.0.0#" dapVersion="4.0" dmrVersion="1.0" name="air.nc" dmrpp:href="OPeNDAP_DMRpp_DATA_ACCESS_URL" dmrpp:version="3.21.1-451">
-            <Dimension name="lat" size="25"/>
-            <Dimension name="lon" size="53"/>
-            <Dimension name="time" size="2920"/>
-            <Float32 name="lat">
-                <Dim name="/lat"/>
-                <Attribute name="_FillValue" type="Float32">
-                    <Value>NaN</Value>
-                </Attribute>
-                <Attribute name="standard_name" type="String">
-                    <Value>latitude</Value>
-                </Attribute>
-                <Attribute name="long_name" type="String">
-                    <Value>Latitude</Value>
-                </Attribute>
-                <Attribute name="units" type="String">
-                    <Value>degrees_north</Value>
-                </Attribute>
-                <Attribute name="axis" type="String">
-                    <Value>Y</Value>
-                </Attribute>
-                <dmrpp:chunks fillValue="nan" byteOrder="LE">
-                    <dmrpp:chunk offset="5179" nBytes="100"/>
-                </dmrpp:chunks>
-            </Float32>
-            <Float32 name="lon">
-                <Dim name="/lon"/>
-                <Attribute name="_FillValue" type="Float32">
-                    <Value>NaN</Value>
-                </Attribute>
-                <Attribute name="standard_name" type="String">
-                    <Value>longitude</Value>
-                </Attribute>
-                <Attribute name="long_name" type="String">
-                    <Value>Longitude</Value>
-                </Attribute>
-                <Attribute name="units" type="String">
-                    <Value>degrees_east</Value>
-                </Attribute>
-                <Attribute name="axis" type="String">
-                    <Value>X</Value>
-                </Attribute>
-                <dmrpp:chunks fillValue="nan" byteOrder="LE">
-                    <dmrpp:chunk offset="5279" nBytes="212"/>
-                </dmrpp:chunks>
-            </Float32>
-            <Float32 name="time">
-                <Dim name="/time"/>
-                <Attribute name="_FillValue" type="Float32">
-                    <Value>NaN</Value>
-                </Attribute>
-                <Attribute name="standard_name" type="String">
-                    <Value>time</Value>
-                </Attribute>
-                <Attribute name="long_name" type="String">
-                    <Value>Time</Value>
-                </Attribute>
-                <Attribute name="units" type="String">
-                    <Value>hours since 1800-01-01</Value>
-                </Attribute>
-                <Attribute name="calendar" type="String">
-                    <Value>standard</Value>
-                </Attribute>
-                <dmrpp:chunks fillValue="nan" byteOrder="LE">
-                    <dmrpp:chunk offset="7757499" nBytes="11680"/>
-                </dmrpp:chunks>
-            </Float32>
-            <Int16 name="air">
-                <Dim name="/time"/>
-                <Dim name="/lat"/>
-                <Dim name="/lon"/>
-                <Attribute name="long_name" type="String">
-                    <Value>4xDaily Air temperature at sigma level 995</Value>
-                </Attribute>
-                <Attribute name="units" type="String">
-                    <Value>degK</Value>
-                </Attribute>
-                <Attribute name="precision" type="Int16">
-                    <Value>2</Value>
-                </Attribute>
-                <Attribute name="GRIB_id" type="Int16">
-                    <Value>11</Value>
-                </Attribute>
-                <Attribute name="GRIB_name" type="String">
-                    <Value>TMP</Value>
-                </Attribute>
-                <Attribute name="var_desc" type="String">
-                    <Value>Air temperature</Value>
-                </Attribute>
-                <Attribute name="dataset" type="String">
-                    <Value>NMC Reanalysis</Value>
-                </Attribute>
-                <Attribute name="level_desc" type="String">
-                    <Value>Surface</Value>
-                </Attribute>
-                <Attribute name="statistic" type="String">
-                    <Value>Individual Obs</Value>
-                </Attribute>
-                <Attribute name="parent_stat" type="String">
-                    <Value>Other</Value>
-                </Attribute>
-                <Attribute name="actual_range" type="Float32">
-                    <Value>185.1600037</Value>
-                    <Value>322.1000061</Value>
-                </Attribute>
-                <Attribute name="scale_factor" type="Float64">
-                    <Value>0.01</Value>
-                </Attribute>
-                <Map name="/time"/>
-                <Map name="/lat"/>
-                <Map name="/lon"/>
-                <dmrpp:chunks fillValue="-32767" byteOrder="LE">
-                    <dmrpp:chunk offset="10283" nBytes="7738000"/>
-                </dmrpp:chunks>
-            </Int16>
-            <Attribute name="Conventions" type="String">
-                <Value>COARDS</Value>
-            </Attribute>
-            <Attribute name="title" type="String">
-                <Value>4x daily NMC reanalysis (1948)</Value>
-            </Attribute>
-            <Attribute name="description" type="String">
-                <Value>Data is from NMC initialized reanalysis
-        (4x/day).  These are the 0.9950 sigma level values.</Value>
-            </Attribute>
-            <Attribute name="platform" type="String">
-                <Value>Model</Value>
-            </Attribute>
-            <Attribute name="references" type="String">
-                <Value>http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanalysis.html</Value>
-            </Attribute>
-            <Attribute name="build_dmrpp_metadata" type="Container">
-                <Attribute name="created" type="String">
-                    <Value>2025-07-16T18:48:42Z</Value>
-                </Attribute>
-                <Attribute name="build_dmrpp" type="String">
-                    <Value>3.21.1-451</Value>
-                </Attribute>
-                <Attribute name="bes" type="String">
-                    <Value>3.21.1-451</Value>
-                </Attribute>
-                <Attribute name="libdap" type="String">
-                    <Value>libdap-3.21.1-178</Value>
-                </Attribute>
-                <Attribute name="invocation" type="String">
-                    <Value>build_dmrpp -f /usr/share/hyrax/air.nc -r air.nc.dmr -u OPeNDAP_DMRpp_DATA_ACCESS_URL -M</Value>
-                </Attribute>
-            </Attribute>
-        </Dataset>
-        """
-    ),
-    "hdf5_groups_file": textwrap.dedent(
-        """\
-        <?xml version="1.0" encoding="ISO-8859-1"?>
-        <Dataset xmlns="http://xml.opendap.org/ns/DAP/4.0#" xmlns:dmrpp="http://xml.opendap.org/dap/dmrpp/1.0.0#" dapVersion="4.0" dmrVersion="1.0" name="hdf5_groups_file.nc" dmrpp:href="OPeNDAP_DMRpp_DATA_ACCESS_URL" dmrpp:version="3.21.1-451">
-            <Attribute name="build_dmrpp_metadata" type="Container">
-                <Attribute name="created" type="String">
-                    <Value>2025-07-16T21:57:57Z</Value>
-                </Attribute>
-                <Attribute name="build_dmrpp" type="String">
-                    <Value>3.21.1-451</Value>
-                </Attribute>
-                <Attribute name="bes" type="String">
-                    <Value>3.21.1-451</Value>
-                </Attribute>
-                <Attribute name="libdap" type="String">
-                    <Value>libdap-3.21.1-178</Value>
-                </Attribute>
-                <Attribute name="invocation" type="String">
-                    <Value>build_dmrpp -f /usr/share/hyrax/hdf5_groups_file.nc -r hdf5_groups_file.nc.dmr -u OPeNDAP_DMRpp_DATA_ACCESS_URL -M</Value>
-                </Attribute>
-            </Attribute>
-            <Group name="test">
-                <Group name="group">
-                    <Dimension name="lat" size="25"/>
-                    <Dimension name="lon" size="53"/>
-                    <Dimension name="time" size="2920"/>
-                    <Float32 name="lat">
-                        <Dim name="/test/group/lat"/>
-                        <Attribute name="_FillValue" type="Float32">
-                            <Value>NaN</Value>
-                        </Attribute>
-                        <Attribute name="standard_name" type="String">
-                            <Value>latitude</Value>
-                        </Attribute>
-                        <Attribute name="long_name" type="String">
-                            <Value>Latitude</Value>
-                        </Attribute>
-                        <Attribute name="units" type="String">
-                            <Value>degrees_north</Value>
-                        </Attribute>
-                        <Attribute name="axis" type="String">
-                            <Value>Y</Value>
-                        </Attribute>
-                        <dmrpp:chunks fillValue="nan" byteOrder="LE">
-                            <dmrpp:chunk offset="5533" nBytes="100"/>
-                        </dmrpp:chunks>
-                    </Float32>
-                    <Float32 name="lon">
-                        <Dim name="/test/group/lon"/>
-                        <Attribute name="_FillValue" type="Float32">
-                            <Value>NaN</Value>
-                        </Attribute>
-                        <Attribute name="standard_name" type="String">
-                            <Value>longitude</Value>
-                        </Attribute>
-                        <Attribute name="long_name" type="String">
-                            <Value>Longitude</Value>
-                        </Attribute>
-                        <Attribute name="units" type="String">
-                            <Value>degrees_east</Value>
-                        </Attribute>
-                        <Attribute name="axis" type="String">
-                            <Value>X</Value>
-                        </Attribute>
-                        <dmrpp:chunks fillValue="nan" byteOrder="LE">
-                            <dmrpp:chunk offset="5633" nBytes="212"/>
-                        </dmrpp:chunks>
-                    </Float32>
-                    <Float32 name="time">
-                        <Dim name="/test/group/time"/>
-                        <Attribute name="_FillValue" type="Float32">
-                            <Value>NaN</Value>
-                        </Attribute>
-                        <Attribute name="standard_name" type="String">
-                            <Value>time</Value>
-                        </Attribute>
-                        <Attribute name="long_name" type="String">
-                            <Value>Time</Value>
-                        </Attribute>
-                        <Attribute name="units" type="String">
-                            <Value>hours since 1800-01-01</Value>
-                        </Attribute>
-                        <Attribute name="calendar" type="String">
-                            <Value>standard</Value>
-                        </Attribute>
-                        <dmrpp:chunks fillValue="nan" byteOrder="LE">
-                            <dmrpp:chunk offset="7756034" nBytes="11680"/>
-                        </dmrpp:chunks>
-                    </Float32>
-                    <Int16 name="air">
-                        <Dim name="/test/group/time"/>
-                        <Dim name="/test/group/lat"/>
-                        <Dim name="/test/group/lon"/>
-                        <Attribute name="long_name" type="String">
-                            <Value>4xDaily Air temperature at sigma level 995</Value>
-                        </Attribute>
-                        <Attribute name="units" type="String">
-                            <Value>degK</Value>
-                        </Attribute>
-                        <Attribute name="precision" type="Int16">
-                            <Value>2</Value>
-                        </Attribute>
-                        <Attribute name="GRIB_id" type="Int16">
-                            <Value>11</Value>
-                        </Attribute>
-                        <Attribute name="GRIB_name" type="String">
-                            <Value>TMP</Value>
-                        </Attribute>
-                        <Attribute name="var_desc" type="String">
-                            <Value>Air temperature</Value>
-                        </Attribute>
-                        <Attribute name="dataset" type="String">
-                            <Value>NMC Reanalysis</Value>
-                        </Attribute>
-                        <Attribute name="level_desc" type="String">
-                            <Value>Surface</Value>
-                        </Attribute>
-                        <Attribute name="statistic" type="String">
-                            <Value>Individual Obs</Value>
-                        </Attribute>
-                        <Attribute name="parent_stat" type="String">
-                            <Value>Other</Value>
-                        </Attribute>
-                        <Attribute name="actual_range" type="Float32">
-                            <Value>185.1600037</Value>
-                            <Value>322.1000061</Value>
-                        </Attribute>
-                        <Attribute name="scale_factor" type="Float64">
-                            <Value>0.01</Value>
-                        </Attribute>
-                        <Map name="/test/group/time"/>
-                        <Map name="/test/group/lat"/>
-                        <Map name="/test/group/lon"/>
-                        <dmrpp:chunks fillValue="-32767" byteOrder="LE">
-                            <dmrpp:chunk offset="10554" nBytes="7738000"/>
-                        </dmrpp:chunks>
-                    </Int16>
-                    <Attribute name="Conventions" type="String">
-                        <Value>COARDS</Value>
-                    </Attribute>
-                    <Attribute name="title" type="String">
-                        <Value>4x daily NMC reanalysis (1948)</Value>
-                    </Attribute>
-                    <Attribute name="description" type="String">
-                        <Value>Data is from NMC initialized reanalysis
-        (4x/day).  These are the 0.9950 sigma level values.</Value>
-                    </Attribute>
-                    <Attribute name="platform" type="String">
-                        <Value>Model</Value>
-                    </Attribute>
-                    <Attribute name="references" type="String">
-                        <Value>http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanalysis.html</Value>
-                    </Attribute>
-                </Group>
-            </Group>
-        </Dataset>
-            """
-    ),
-    "fill_value_scalar_no_chunks_nc4_url": textwrap.dedent(
-        """\
-        <?xml version="1.0" encoding="ISO-8859-1"?>
-        <Dataset xmlns="http://xml.opendap.org/ns/DAP/4.0#" xmlns:dmrpp="http://xml.opendap.org/dap/dmrpp/1.0.0#" dapVersion="4.0" dmrVersion="1.0" name="fill_value_scalar_no_chunks.nc4" dmrpp:href="OPeNDAP_DMRpp_DATA_ACCESS_URL" dmrpp:version="3.21.1-477">
-            <Int32 name="data">
-                <Attribute name="_FillValue" type="Int32">
-                    <Value>-999</Value>
-                </Attribute>
-                <dmrpp:chunks fillValue="-999"/>
-            </Int32>
-            <Attribute name="long_name" type="String">
-                <Value>empty scalar data</Value>
-            </Attribute>
-            <Attribute name="drop_container_attribute" type="Container">
-                <Attribute name="created" type="String">
-                    <Value>2025-08-14T23:32:01Z</Value>
-                </Attribute>
-                <Attribute name="reason" type="String">
-                    <Value>container attributes are no longer supported</Value>
-                </Attribute>
-            </Attribute>
-            <Attribute name="build_dmrpp_metadata" type="Container">
-                <Attribute name="created" type="String">
-                    <Value>2025-08-14T23:32:01Z</Value>
-                </Attribute>
-                <Attribute name="build_dmrpp" type="String">
-                    <Value>3.21.1-477</Value>
-                </Attribute>
-                <Attribute name="bes" type="String">
-                    <Value>3.21.1-477</Value>
-                </Attribute>
-                <Attribute name="libdap" type="String">
-                    <Value>libdap-3.21.1-222</Value>
-                </Attribute>
-                <Attribute name="invocation" type="String">
-                    <Value>build_dmrpp -f /usr/share/hyrax/fill_value_scalar_no_chunks.nc4 -r fill_value_scalar_no_chunks.nc4.dmr -u OPeNDAP_DMRpp_DATA_ACCESS_URL -M</Value>
-                </Attribute>
-            </Attribute>
-        </Dataset>
-        """
-    ),
-}
-
 
+@requires_pydap
 def dmrparser(dmrpp_xml_str: str, filepath: str) -> DMRParser:
     # TODO we should actually create a dmrpp file in a temporary directory
     # this would avoid the need to pass tmp_path separately
@@ -434,6 +76,7 @@ def test_NASA_dmrpp_load(data_url, dmrpp_url):
         assert ds.load()
 
 
+<<<<<<< HEAD
 @pytest.mark.xfail(
     reason="See https://github.com/zarr-developers/VirtualiZarr/issues/904.",
 )
@@ -537,6 +180,9 @@ def test_parse_dataset_nested(hdf5_groups_file):
 
 =======
 >>>>>>> 79b9a79 (rebase)
+=======
+@requires_pydap
+>>>>>>> 5fa3d8f (update dmrpp migration - soft imports and code refactor)
 def test_dmrpp_simple(dmrpp_xml_simple):
     """Test parsing a simple valid DMR++ XML creates virtual chunk manifests."""
     parser = dmrparser(dmrpp_xml_simple, filepath="file:///simple.nc")
@@ -572,6 +218,7 @@ def test_dmrpp_simple(dmrpp_xml_simple):
         assert isinstance(chunk_info["length"], int)
 
 
+@requires_pydap
 def test_dmrpp_missing_attrib_validation(dmrpp_xml_with_missing_attrib):
     """Test that validation issues are accumulated for missing attributes."""
     parser = dmrparser(
@@ -598,6 +245,7 @@ def test_dmrpp_missing_attrib_validation(dmrpp_xml_with_missing_attrib):
 
 
 @requires_network
+@requires_pydap
 def test_inlinevalue():
     """
     Test that inline values can be parsed into manifest

From d3c89e9f0a1c24a33bbde47101476142755cef54 Mon Sep 17 00:00:00 2001
From: Miguel Jimenez-Urias <mjimenez@opendap.org>
Date: Fri, 12 Jun 2026 16:45:12 -0700
Subject: [PATCH 19/22] add dmrpp testing to py312 and py313

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 7567ff4c..a7c6c652 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -220,8 +220,8 @@ run-flaky-tests = { cmd = "pytest -m flaky --run-flaky --run-network-tests --ver
 min-deps = ["dev", "test", "hdf", "hdf5-lib"] # VirtualiZarr/conftest.py using h5py, so the minimum set of dependencies for testing still includes hdf libs
 # Inherit from min-deps to get all the test commands, along with optional dependencies
 test = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "zarr", "py314"]
-test-py312 = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "tiff", "grib", "zarr", "py312"] # test against python 3.13
-test-py313 = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "tiff", "grib", "zarr", "py313"] # test against python 3.13
+test-py312 = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "tiff", "grib", "dmrpp", "zarr", "py312"] # test against python 3.13
+test-py313 = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "tiff", "grib", "dmrpp", "zarr", "py313"] # test against python 3.13
 test-py314 = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "zarr", "py314"] # test against python 3.14
 minio = ["dev", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "py314", "zarr", "minio"]
 # tiff (virtual-tiff) is excluded: virtual-tiff 0.5.0 imports zarr's CodecJSON

From cf4cefa95dcdf6d273f1fa373a9bc88a0ca156df Mon Sep 17 00:00:00 2001
From: Miguel Jimenez-Urias <mjimenez@opendap.org>
Date: Mon, 22 Jun 2026 22:11:04 -0700
Subject: [PATCH 20/22] migrate dmrpp reading to pydap

---
 virtualizarr/parsers/dmrpp.py                 | 20 +++----------------
 virtualizarr/tests/test_parsers/test_dmrpp.py |  6 +++---
 2 files changed, 6 insertions(+), 20 deletions(-)

diff --git a/virtualizarr/parsers/dmrpp.py b/virtualizarr/parsers/dmrpp.py
index 29e183b9..ad83a4a4 100644
--- a/virtualizarr/parsers/dmrpp.py
+++ b/virtualizarr/parsers/dmrpp.py
@@ -1,9 +1,5 @@
-import io
 from typing import Iterable
-from xml.etree import ElementTree as ET
 
-# from obspec_utils.protocols import ReadableStore
-from obspec_utils.readers import EagerStoreReader
 from obspec_utils.registry import ObjectStoreRegistry
 
 from virtualizarr.manifests import ManifestStore
@@ -52,23 +48,13 @@ def __call__(
         ManifestStore
             A ManifestStore that provides a Zarr representation of the data source referenced by the DMR++ file.
         """
-        store, path_in_store = registry.resolve(url)
-        reader = EagerStoreReader(store=store, path=path_in_store)
-        file_bytes = reader.readall()
-        stream = io.BytesIO(file_bytes)
-
-        url = (
-            url.removesuffix(".dap.dmrpp")
-            if url.endswith(".dap.dmrpp")
-            else url.removesuffix(".dmrpp")
-        )
 
         from pydap.virtualizarr.parser import DMRParser
 
         parser = DMRParser(
-            root=ET.parse(stream).getroot(),
-            data_filepath=url,
+            url=url,
+            object_store=registry,
             skip_variables=self.skip_variables,
         )
-        manifest_store = parser.parse_dataset(object_store=store, group=self.group)
+        manifest_store = parser.parse_dataset(group=self.group)
         return manifest_store
diff --git a/virtualizarr/tests/test_parsers/test_dmrpp.py b/virtualizarr/tests/test_parsers/test_dmrpp.py
index 7e2890a9..a684f7a2 100644
--- a/virtualizarr/tests/test_parsers/test_dmrpp.py
+++ b/virtualizarr/tests/test_parsers/test_dmrpp.py
@@ -1,4 +1,4 @@
-from xml.etree import ElementTree as ET
+# from xml.etree import ElementTree as ET
 
 import pytest
 import requests
@@ -23,11 +23,11 @@
 
 
 @requires_pydap
-def dmrparser(dmrpp_xml_str: str, filepath: str) -> DMRParser:
+def dmrparser(url: str, filepath: str) -> DMRParser:
     # TODO we should actually create a dmrpp file in a temporary directory
     # this would avoid the need to pass tmp_path separately
 
-    return DMRParser(root=ET.fromstring(dmrpp_xml_str), data_filepath=filepath)
+    return DMRParser(url=url, data_filepath=filepath)
 
 
 @slow_test

From 86d8f6282ebef888e2baf6aa35630a27625a161a Mon Sep 17 00:00:00 2001
From: Miguel Jimenez-Urias <mjimenez@opendap.org>
Date: Tue, 23 Jun 2026 22:05:04 -0700
Subject: [PATCH 21/22] rebase + update tests

---
 virtualizarr/tests/test_parsers/test_dmrpp.py | 142 ++----------------
 1 file changed, 14 insertions(+), 128 deletions(-)

diff --git a/virtualizarr/tests/test_parsers/test_dmrpp.py b/virtualizarr/tests/test_parsers/test_dmrpp.py
index a684f7a2..fd90b042 100644
--- a/virtualizarr/tests/test_parsers/test_dmrpp.py
+++ b/virtualizarr/tests/test_parsers/test_dmrpp.py
@@ -1,5 +1,3 @@
-# from xml.etree import ElementTree as ET
-
 import pytest
 import requests
 import xarray as xr
@@ -22,16 +20,9 @@
 ]
 
 
-@requires_pydap
-def dmrparser(url: str, filepath: str) -> DMRParser:
-    # TODO we should actually create a dmrpp file in a temporary directory
-    # this would avoid the need to pass tmp_path separately
-
-    return DMRParser(url=url, data_filepath=filepath)
-
-
 @slow_test
 @requires_network
+@requires_pydap
 @pytest.mark.parametrize("data_url, dmrpp_url", urls)
 def test_NASA_dmrpp(data_url, dmrpp_url):
     store = obstore_s3(
@@ -58,6 +49,7 @@ def test_NASA_dmrpp(data_url, dmrpp_url):
 
 
 @requires_network
+@requires_pydap
 @slow_test
 @pytest.mark.parametrize("data_url, dmrpp_url", urls)
 def test_NASA_dmrpp_load(data_url, dmrpp_url):
@@ -76,121 +68,15 @@ def test_NASA_dmrpp_load(data_url, dmrpp_url):
         assert ds.load()
 
 
-<<<<<<< HEAD
-@pytest.mark.xfail(
-    reason="See https://github.com/zarr-developers/VirtualiZarr/issues/904.",
-)
-@pytest.mark.parametrize(
-    "group,warns",
-    [
-        pytest.param(None, False, id="None"),
-        pytest.param("/", False, id="/"),
-        pytest.param("/no-such-group", True, id="/no-such-group"),
-    ],
-)
-def test_parse_dataset(group: str | None, warns: bool, netcdf4_file):
-    drmpp = dmrparser(
-        DMRPP_XML_STRINGS["netcdf4_file"], filepath=f"file://{netcdf4_file}"
-    )
-    store = obstore_local(url=drmpp.data_filepath)
-
-    with nullcontext() if warns else pytest.raises(BaseException, match="DID NOT WARN"):
-        with pytest.warns(UserWarning, match=f"ignoring group parameter {group!r}"):
-            ms = drmpp.parse_dataset(object_store=store, group=group)
-
-    vds = ms.to_virtual_dataset()
-
-    assert vds.sizes == {"lat": 25, "lon": 53, "time": 2920}
-    assert vds.data_vars.keys() == {"air"}
-    assert vds.data_vars["air"].dims == ("time", "lat", "lon")
-    assert vds.attrs == {
-        "Conventions": "COARDS",
-        "title": "4x daily NMC reanalysis (1948)",
-        "description": "Data is from NMC initialized reanalysis\n(4x/day).  These are the 0.9950 sigma level values.",
-        "platform": "Model",
-        "references": "http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanalysis.html",
-    }
-    assert vds.coords.keys() == {"lat", "lon", "time"}
-
-
-@pytest.mark.xfail(
-    version.parse(xr.__version__) < version.parse("2025.7.1"),
-    reason="Offsets in file changed",
-)
-def test_parse_dataset_nested(hdf5_groups_file):
-    nested_groups_dmrpp = dmrparser(
-        DMRPP_XML_STRINGS["hdf5_groups_file"], filepath=f"file://{hdf5_groups_file}"
-    )
-    store = obstore_local(url=f"file://{nested_groups_dmrpp.data_filepath}")
-
-    vds_root_implicit = nested_groups_dmrpp.parse_dataset(
-        object_store=store
-    ).to_virtual_dataset(loadable_variables=[])
-    vds_root = nested_groups_dmrpp.parse_dataset(
-        group="/", object_store=store
-    ).to_virtual_dataset(loadable_variables=[])
-
-    xrt.assert_identical(vds_root_implicit, vds_root)
-    assert vds_root.sizes == {}
-
-    vds_g1 = nested_groups_dmrpp.parse_dataset(
-        group="/test", object_store=store
-    ).to_virtual_dataset(loadable_variables=[])
-    assert vds_g1.sizes == {}
-
-    vds_g2 = nested_groups_dmrpp.parse_dataset(
-        group="/test/group", object_store=store
-    ).to_virtual_dataset(loadable_variables=[])
-
-    assert vds_g2.sizes == {"time": 2920, "lat": 25, "lon": 53}
-    assert vds_g2.data_vars.keys() == {"air"}
-    assert vds_g2.data_vars["air"].dims == ("time", "lat", "lon")
-
-
-<<<<<<< HEAD
-# def test_parse_variable(netcdf4_file):
-#     parser = dmrparser(DMRPP_XML_STRINGS["netcdf4_file"], filepath=netcdf4_file)
-
-    # var = parser._parse_variable(parser.find_node_fqn("/air"))
-    # assert var.metadata.dtype.to_native_dtype() == "int16"
-    # assert var.metadata.dimension_names == ("time", "lat", "lon")
-    # assert var.shape == (2920, 25, 53)
-    # assert var.metadata.chunks == (2920, 25, 53)
-    # # _FillValue is encoded for array dtype
-    # assert var.metadata.attributes["scale_factor"] == 0.01
-    # assert (
-    #     var.metadata.attributes["long_name"]
-    #     == "4xDaily Air temperature at sigma level 995"
-    # )
-
-
-# @pytest.mark.parametrize(
-#     "attr_path, expected",
-#     [
-#         ("air/long_name", {"long_name": "4xDaily Air temperature at sigma level 995"}),
-#         ("air/scale_factor", {"scale_factor": 0.01}),
-#     ],
-# )
-# def test_parse_attribute(netcdf4_file, attr_path, expected):
-#     parser = dmrparser(DMRPP_XML_STRINGS["netcdf4_file"], filepath=netcdf4_file)
-
-#     result = parser._parse_attribute(parser.find_node_fqn(attr_path))
-#     assert result == expected
-
-
-=======
->>>>>>> 79b9a79 (rebase)
-=======
 @requires_pydap
->>>>>>> 5fa3d8f (update dmrpp migration - soft imports and code refactor)
 def test_dmrpp_simple(dmrpp_xml_simple):
     """Test parsing a simple valid DMR++ XML creates virtual chunk manifests."""
-    parser = dmrparser(dmrpp_xml_simple, filepath="file:///simple.nc")
+    parser = DMRParser(
+        dmrpp_xml_simple, object_store=obstore_local(url="file:///simple.nc")
+    )
 
     # Parse dataset
-    manifest_store = parser.parse_dataset(
-        object_store=obstore_local(url="file:///"), group="/"
-    )
+    manifest_store = parser.parse_dataset(group="/")
 
     # Verify manifest store is created
     assert manifest_store is not None
@@ -221,14 +107,13 @@ def test_dmrpp_simple(dmrpp_xml_simple):
 @requires_pydap
 def test_dmrpp_missing_attrib_validation(dmrpp_xml_with_missing_attrib):
     """Test that validation issues are accumulated for missing attributes."""
-    parser = dmrparser(
-        dmrpp_xml_with_missing_attrib, filepath="file:///validation_test.nc"
+    parser = DMRParser(
+        dmrpp_xml_with_missing_attrib,
+        object_store=obstore_local(url="file:///validation_test.nc"),
     )
 
     # Parse dataset - this should accumulate validation issues
-    manifest_store = parser.parse_dataset(
-        object_store=obstore_local(url="file:///"), group="/"
-    )
+    manifest_store = parser.parse_dataset(group="/")
 
     # Verify that validation issues were accumulated
     assert len(parser._validation_issues) > 0
@@ -257,9 +142,10 @@ def test_inlinevalue():
     )
     session = requests.Session()
     dmrpp = session.get(dmrpp_file).content.decode()
-    parser = dmrparser(dmrpp, filepath="file:///")
-    store = obstore_local(url=parser.data_filepath)
-    ms = parser.parse_dataset(object_store=store)
+    parser = DMRParser(
+        dmrpp, object_store=obstore_local(url="file:///compact_lowlevel.h5")
+    )
+    ms = parser.parse_dataset()
     vds = ms.to_virtual_dataset()
     assert "my_dataset" in vds
     assert ms._group["my_dataset"]._manifest._inlined == {(0, 0): expected_bytes}

From 75cab1c77ddd06d7ffbc7f1180b482624d2db2d8 Mon Sep 17 00:00:00 2001
From: Miguel Jimenez-Urias <mjimenez@opendap.org>
Date: Tue, 23 Jun 2026 22:14:51 -0700
Subject: [PATCH 22/22] document the change

---
 docs/releases.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/releases.md b/docs/releases.md
index ecc99096..32ac6f22 100644
--- a/docs/releases.md
+++ b/docs/releases.md
@@ -33,6 +33,9 @@
 
 ### Internal changes
 
+- `DMRPPParser` is now migrated to [pydap](https://github.com/pydap/pydap) (`pydap>=3.5.10`) (see [pydap#417](https://github.com/pydap/pydap/issues/417)). To use the dmrpp parser one must install the extra `pip install "virtualizarr[dmrpp]"`.
+  By [Miguel Jimenez-Urias](https://github.com/Mikejmnez)
+
 - Speed up virtual chunk container validation when writing to Icechunk by passing the supported prefixes as a tuple to `str.startswith`, which runs the loop over prefixes in C rather than in a Python generator. The per-reference check is now ~2.6x faster in the common single-container case, which matters when writing manifests with millions of virtual references. The check is still a per-reference Python loop overall (see [icechunk#1167](https://github.com/earth-mover/icechunk/issues/1167) for pushing it down to Icechunk entirely).
   By [Tom Nicholas](https://github.com/TomNicholas).