From b2fb88b724e2cc571600e437ec4260c1b1a0d434 Mon Sep 17 00:00:00 2001
From: michaelj <michaelj@ie.ibm.com>
Date: Thu, 5 Mar 2026 22:15:30 +0000
Subject: [PATCH 1/2] fix(ray_tune): process_metric not handling virtual
 properties correctly

The code assumed the entity contained the just measured property values, and hence it could be used to do the aggregation - however this was not the case.

At some stage the code was changed to work directly with the MeasurementRequests and updating this piece was missed.
---
 .../ray_tune/ado_ray_tune/operator.py         |  42 +++-
 tests/operators/test_process_metric.py        | 182 ++++++++++++++++++
 2 files changed, 214 insertions(+), 10 deletions(-)
 create mode 100644 tests/operators/test_process_metric.py

diff --git a/plugins/operators/ray_tune/ado_ray_tune/operator.py b/plugins/operators/ray_tune/ado_ray_tune/operator.py
index bc977c9ea..7351ef772 100644
--- a/plugins/operators/ray_tune/ado_ray_tune/operator.py
+++ b/plugins/operators/ray_tune/ado_ray_tune/operator.py
@@ -196,6 +196,8 @@ def process_metric(
         # We use the last result
         return all_results[metric][-1]
 
+    failed = trainable_params.orchestrator_config.failed_metric_value
+
     # The metric is not in the results, so we need to process it
     # Check if this is a virtual metric
     log.debug(f"No measured properties match {metric} - checking if a virtual property")
@@ -204,18 +206,38 @@ def process_metric(
     except ValueError:
         log.warning(
             f"No experiment measured {metric} and it's not a valid virtual property.  "
-            f"Will set value of {metric} for {entity.identifier} to {trainable_params.orchestrator_config.failed_metric_value} "
+            f"Will set value of {metric} for {entity.identifier} to {failed} "
         )
-        processed_metric = trainable_params.orchestrator_config.failed_metric_value
+        processed_metric = failed
     else:
         if properties is not None:
             if len(properties) == 1:
-                value = entity.valueForProperty(property=properties[0])
-                processed_metric = (
-                    value.value
-                    if value is not None
-                    else trainable_params.orchestrator_config.failed_metric_value
-                )
+                virtual_prop = properties[0]
+                # Determine the key in all_results for the base property.
+                # all_results is keyed by targetProperty.identifier (metric_format="target")
+                # or by observed property identifier (metric_format="observed").
+                metric_format = trainable_params.orchestrator_config.metric_format
+                if metric_format == "target":
+                    base_key = (
+                        virtual_prop.baseObservedProperty.targetProperty.identifier
+                    )
+                else:
+                    base_key = virtual_prop.baseObservedProperty.identifier
+                base_values = all_results.get(base_key)
+                if base_values:
+                    aggregated = virtual_prop.aggregate(base_values)
+                    processed_metric = (
+                        aggregated.value
+                        if aggregated is not None and aggregated.value is not None
+                        else failed
+                    )
+                else:
+                    log.warning(
+                        f"{metric} is a valid virtual property name "
+                        f"however no experiment measured an underlying property with the required identifier. "
+                        f"Will set value of {metric} for {entity.identifier} to {failed}"
+                    )
+                    processed_metric = failed
             else:
                 raise ValueError(
                     f"Ambiguous virtual target metric provided - matches multiple observed properties. "
@@ -225,9 +247,9 @@ def process_metric(
             log.warning(
                 f"{metric} is a valid virtual property name "
                 f"however no experiment measured an underlying property with the required identifier. "
-                f"Will set value of {metric} for {entity.identifier} to {trainable_params.orchestrator_config.failed_metric_value}"
+                f"Will set value of {metric} for {entity.identifier} to {failed}"
             )
-            processed_metric = trainable_params.orchestrator_config.failed_metric_value
+            processed_metric = failed
 
     return processed_metric
 
diff --git a/tests/operators/test_process_metric.py b/tests/operators/test_process_metric.py
new file mode 100644
index 000000000..3ea326288
--- /dev/null
+++ b/tests/operators/test_process_metric.py
@@ -0,0 +1,182 @@
+# Copyright IBM Corporation 2025, 2026
+# SPDX-License-Identifier: MIT
+
+"""Tests for the process_metric function in the RayTune operator."""
+
+import math
+from unittest.mock import MagicMock
+
+import pytest
+from ado_ray_tune.operator import process_metric
+
+
+def _make_trainable_params(
+    metric_format: str = "target", failed_value: float = float("nan")
+) -> MagicMock:
+    """Return a minimal mock of OrchTrainableParameters."""
+    params = MagicMock()
+    params.orchestrator_config.metric_format = metric_format
+    params.orchestrator_config.failed_metric_value = failed_value
+    return params
+
+
+def _make_entity(
+    virtual_props: list | None = None, raises: Exception | None = None
+) -> MagicMock:
+    """Return a mock Entity whose virtualObservedPropertiesFromIdentifier returns virtual_props."""
+    entity = MagicMock()
+    entity.identifier = "test-entity"
+    if raises is not None:
+        entity.virtualObservedPropertiesFromIdentifier.side_effect = raises
+    else:
+        entity.virtualObservedPropertiesFromIdentifier.return_value = virtual_props
+    return entity
+
+
+def _make_virtual_prop(
+    target_key: str = "mip_gaps",
+    observed_key: str = "exp-mip_gaps",
+    agg_value: float | None = 0.2,
+) -> MagicMock:
+    """Return a mock VirtualObservedProperty whose aggregate() returns agg_value."""
+    vp = MagicMock()
+    vp.baseObservedProperty.targetProperty.identifier = target_key
+    vp.baseObservedProperty.identifier = observed_key
+    vp.identifier = f"{target_key}-mean"
+    agg_result = MagicMock()
+    agg_result.value = agg_value
+    vp.aggregate.return_value = agg_result
+    return vp
+
+
+class TestProcessMetricDirectHit:
+    """Metrics present directly in all_results are returned without touching the entity."""
+
+    def test_direct_metric_returns_last_value(self) -> None:
+        """When the metric is in all_results, the last entry is returned."""
+        result = process_metric(
+            metric="mip_gaps",
+            all_results={"mip_gaps": [0.1, 0.2, 0.3]},
+            entity=_make_entity(),
+            trainable_params=_make_trainable_params(),
+        )
+        assert result == 0.3
+
+    def test_direct_metric_single_value(self) -> None:
+        """Single-entry list returns that value."""
+        result = process_metric(
+            metric="mip_gaps",
+            all_results={"mip_gaps": [0.05]},
+            entity=_make_entity(),
+            trainable_params=_make_trainable_params(),
+        )
+        assert result == 0.05
+
+
+class TestProcessMetricVirtualTargetFormat:
+    """Virtual property computed from allResults when metric_format='target'."""
+
+    def test_virtual_metric_uses_allresults_not_entity(self) -> None:
+        """Virtual property is computed from all_results, entity.valueForProperty is never called."""
+        vp = _make_virtual_prop(target_key="mip_gaps", agg_value=0.15)
+        entity = _make_entity(virtual_props=[vp])
+
+        result = process_metric(
+            metric="mip_gaps-mean",
+            all_results={"mip_gaps": [[0.1, 0.2]]},
+            entity=entity,
+            trainable_params=_make_trainable_params(metric_format="target"),
+        )
+
+        assert result == 0.15
+        # Confirms aggregate was called with the allResults values
+        vp.aggregate.assert_called_once_with([[0.1, 0.2]])
+        # Confirms entity.valueForProperty was NOT used
+        entity.valueForProperty.assert_not_called()
+
+    def test_virtual_metric_observed_format_uses_observed_key(self) -> None:
+        """With metric_format='observed', the observed property identifier is used as key."""
+        vp = _make_virtual_prop(
+            target_key="mip_gaps", observed_key="exp-mip_gaps", agg_value=0.3
+        )
+        entity = _make_entity(virtual_props=[vp])
+
+        result = process_metric(
+            metric="mip_gaps-mean",
+            all_results={"exp-mip_gaps": [[0.3, 0.3]]},
+            entity=entity,
+            trainable_params=_make_trainable_params(metric_format="observed"),
+        )
+
+        assert result == 0.3
+        vp.aggregate.assert_called_once_with([[0.3, 0.3]])
+
+    def test_virtual_metric_all_none_returns_failed_value(self) -> None:
+        """When aggregate returns None value, failed_metric_value is returned (no crash)."""
+        vp = _make_virtual_prop(target_key="mip_gaps", agg_value=None)
+        entity = _make_entity(virtual_props=[vp])
+
+        failed = float("nan")
+        result = process_metric(
+            metric="mip_gaps-mean",
+            all_results={"mip_gaps": [[None, None, None]]},
+            entity=entity,
+            trainable_params=_make_trainable_params(failed_value=failed),
+        )
+
+        assert math.isnan(result)
+
+    def test_virtual_metric_base_not_in_allresults_returns_failed_value(self) -> None:
+        """When base property has no allResults entry, failed_metric_value is returned."""
+        vp = _make_virtual_prop(target_key="mip_gaps")
+        entity = _make_entity(virtual_props=[vp])
+
+        result = process_metric(
+            metric="mip_gaps-mean",
+            all_results={},  # base property missing from results
+            entity=entity,
+            trainable_params=_make_trainable_params(failed_value=-1.0),
+        )
+
+        assert result == -1.0
+        vp.aggregate.assert_not_called()
+
+    def test_virtual_metric_properties_none_returns_failed_value(self) -> None:
+        """When no observed property matches the virtual identifier, failed_metric_value is returned."""
+        entity = _make_entity(virtual_props=None)
+
+        result = process_metric(
+            metric="mip_gaps-mean",
+            all_results={"mip_gaps": [[0.1, 0.2]]},
+            entity=entity,
+            trainable_params=_make_trainable_params(failed_value=-1.0),
+        )
+
+        assert result == -1.0
+
+    def test_not_virtual_property_returns_failed_value(self) -> None:
+        """When the metric is not virtual, failed_metric_value is returned."""
+        entity = _make_entity(raises=ValueError("not a virtual property"))
+
+        result = process_metric(
+            metric="unknown_metric",
+            all_results={},
+            entity=entity,
+            trainable_params=_make_trainable_params(failed_value=-999.0),
+        )
+
+        assert result == -999.0
+
+    def test_ambiguous_virtual_properties_raises(self) -> None:
+        """Multiple matching virtual properties raises ValueError."""
+        vp1 = _make_virtual_prop(target_key="mip_gaps")
+        vp2 = _make_virtual_prop(target_key="mip_gaps_alt")
+        entity = _make_entity(virtual_props=[vp1, vp2])
+
+        with pytest.raises(ValueError, match="Ambiguous"):
+            process_metric(
+                metric="mip_gaps-mean",
+                all_results={"mip_gaps": [[0.1]]},
+                entity=entity,
+                trainable_params=_make_trainable_params(),
+            )

From 2b2d776e17a4a1010eb52de6ef0ebe309ed42466 Mon Sep 17 00:00:00 2001
From: michaelj <michaelj@ie.ibm.com>
Date: Fri, 6 Mar 2026 14:34:42 +0000
Subject: [PATCH 2/2] test(schema): replace mocks with fixtures

---
 tests/operators/test_process_metric.py | 249 ++++++++++++++++---------
 1 file changed, 158 insertions(+), 91 deletions(-)

diff --git a/tests/operators/test_process_metric.py b/tests/operators/test_process_metric.py
index 3ea326288..31adb70bf 100644
--- a/tests/operators/test_process_metric.py
+++ b/tests/operators/test_process_metric.py
@@ -9,155 +9,160 @@
 import pytest
 from ado_ray_tune.operator import process_metric
 
+from orchestrator.schema.entity import Entity
+from orchestrator.schema.experiment import Experiment, ParameterizedExperiment
+from orchestrator.schema.measurementspace import (
+    MeasurementSpace,
+    MeasurementSpaceConfiguration,
+)
+from orchestrator.schema.property import ConstitutivePropertyDescriptor
+from orchestrator.schema.property_value import ConstitutivePropertyValue
+
 
 def _make_trainable_params(
     metric_format: str = "target", failed_value: float = float("nan")
 ) -> MagicMock:
-    """Return a minimal mock of OrchTrainableParameters."""
+    """Return a minimal mock of OrchTrainableParameters.
+
+    Only orchestrator_config and measurement_space are used by process_metric;
+    other fields of OrchTrainableParameters require live Ray actors so are left as mocks.
+    Tests that exercise the virtual-property path should override
+    params.measurement_space with a real MeasurementSpace instance.
+    """
     params = MagicMock()
     params.orchestrator_config.metric_format = metric_format
     params.orchestrator_config.failed_metric_value = failed_value
+    params.measurement_space.observedProperties = []
     return params
 
 
-def _make_entity(
-    virtual_props: list | None = None, raises: Exception | None = None
-) -> MagicMock:
-    """Return a mock Entity whose virtualObservedPropertiesFromIdentifier returns virtual_props."""
-    entity = MagicMock()
-    entity.identifier = "test-entity"
-    if raises is not None:
-        entity.virtualObservedPropertiesFromIdentifier.side_effect = raises
-    else:
-        entity.virtualObservedPropertiesFromIdentifier.return_value = virtual_props
-    return entity
-
-
-def _make_virtual_prop(
-    target_key: str = "mip_gaps",
-    observed_key: str = "exp-mip_gaps",
-    agg_value: float | None = 0.2,
-) -> MagicMock:
-    """Return a mock VirtualObservedProperty whose aggregate() returns agg_value."""
-    vp = MagicMock()
-    vp.baseObservedProperty.targetProperty.identifier = target_key
-    vp.baseObservedProperty.identifier = observed_key
-    vp.identifier = f"{target_key}-mean"
-    agg_result = MagicMock()
-    agg_result.value = agg_value
-    vp.aggregate.return_value = agg_result
-    return vp
+def _measurement_space(experiment: Experiment) -> MeasurementSpace:
+    """Build a real MeasurementSpace from a single experiment."""
+    return MeasurementSpace(
+        configuration=MeasurementSpaceConfiguration(experiments=[experiment])
+    )
 
 
 class TestProcessMetricDirectHit:
-    """Metrics present directly in all_results are returned without touching the entity."""
+    """Metrics present directly in all_results are returned without virtual property lookup."""
 
-    def test_direct_metric_returns_last_value(self) -> None:
+    def test_direct_metric_returns_last_value(self, entity: Entity) -> None:
         """When the metric is in all_results, the last entry is returned."""
         result = process_metric(
             metric="mip_gaps",
             all_results={"mip_gaps": [0.1, 0.2, 0.3]},
-            entity=_make_entity(),
+            entity=entity,
             trainable_params=_make_trainable_params(),
         )
         assert result == 0.3
 
-    def test_direct_metric_single_value(self) -> None:
+    def test_direct_metric_single_value(self, entity: Entity) -> None:
         """Single-entry list returns that value."""
         result = process_metric(
             metric="mip_gaps",
             all_results={"mip_gaps": [0.05]},
-            entity=_make_entity(),
+            entity=entity,
             trainable_params=_make_trainable_params(),
         )
         assert result == 0.05
 
 
 class TestProcessMetricVirtualTargetFormat:
-    """Virtual property computed from allResults when metric_format='target'."""
+    """Virtual property computed from allResults using measurement space observed properties."""
 
-    def test_virtual_metric_uses_allresults_not_entity(self) -> None:
-        """Virtual property is computed from all_results, entity.valueForProperty is never called."""
-        vp = _make_virtual_prop(target_key="mip_gaps", agg_value=0.15)
-        entity = _make_entity(virtual_props=[vp])
+    def test_virtual_metric_uses_allresults_not_entity(
+        self, entity: Entity, experiment: Experiment
+    ) -> None:
+        """Virtual property is looked up from measurement_space and computed from all_results.
+
+        The experiment fixture exposes "pka" as a target property.  With metric_format="target",
+        all_results is keyed by the target property identifier "pka".
+        mean([0.1, 0.2]) == 0.15.
+        """
+        params = _make_trainable_params(metric_format="target")
+        params.measurement_space = _measurement_space(experiment)
 
         result = process_metric(
-            metric="mip_gaps-mean",
-            all_results={"mip_gaps": [[0.1, 0.2]]},
+            metric="pka-mean",
+            all_results={"pka": [[0.1, 0.2]]},
             entity=entity,
-            trainable_params=_make_trainable_params(metric_format="target"),
+            trainable_params=params,
         )
 
-        assert result == 0.15
-        # Confirms aggregate was called with the allResults values
-        vp.aggregate.assert_called_once_with([[0.1, 0.2]])
-        # Confirms entity.valueForProperty was NOT used
-        entity.valueForProperty.assert_not_called()
+        assert result == pytest.approx(0.15)
 
-    def test_virtual_metric_observed_format_uses_observed_key(self) -> None:
-        """With metric_format='observed', the observed property identifier is used as key."""
-        vp = _make_virtual_prop(
-            target_key="mip_gaps", observed_key="exp-mip_gaps", agg_value=0.3
-        )
-        entity = _make_entity(virtual_props=[vp])
+    def test_virtual_metric_observed_format_uses_observed_key(
+        self, entity: Entity, experiment: Experiment
+    ) -> None:
+        """With metric_format='observed', the observed property identifier is used as key.
+
+        The observed property identifier for target "pka" is "{experiment_id}-pka".
+        mean([0.3, 0.3]) == 0.3.
+        """
+        params = _make_trainable_params(metric_format="observed")
+        params.measurement_space = _measurement_space(experiment)
 
+        obs_key = f"{experiment.identifier}-pka"
         result = process_metric(
-            metric="mip_gaps-mean",
-            all_results={"exp-mip_gaps": [[0.3, 0.3]]},
+            metric=f"{obs_key}-mean",
+            all_results={obs_key: [[0.3, 0.3]]},
             entity=entity,
-            trainable_params=_make_trainable_params(metric_format="observed"),
+            trainable_params=params,
         )
 
-        assert result == 0.3
-        vp.aggregate.assert_called_once_with([[0.3, 0.3]])
+        assert result == pytest.approx(0.3)
 
-    def test_virtual_metric_all_none_returns_failed_value(self) -> None:
-        """When aggregate returns None value, failed_metric_value is returned (no crash)."""
-        vp = _make_virtual_prop(target_key="mip_gaps", agg_value=None)
-        entity = _make_entity(virtual_props=[vp])
+    def test_virtual_metric_all_none_returns_failed_value(
+        self, entity: Entity, experiment: Experiment
+    ) -> None:
+        """When all measurements are None, aggregate returns None and failed_metric_value is used."""
+        params = _make_trainable_params(failed_value=float("nan"))
+        params.measurement_space = _measurement_space(experiment)
 
-        failed = float("nan")
         result = process_metric(
-            metric="mip_gaps-mean",
-            all_results={"mip_gaps": [[None, None, None]]},
+            metric="pka-mean",
+            all_results={"pka": [None, None, None]},
             entity=entity,
-            trainable_params=_make_trainable_params(failed_value=failed),
+            trainable_params=params,
         )
 
         assert math.isnan(result)
 
-    def test_virtual_metric_base_not_in_allresults_returns_failed_value(self) -> None:
-        """When base property has no allResults entry, failed_metric_value is returned."""
-        vp = _make_virtual_prop(target_key="mip_gaps")
-        entity = _make_entity(virtual_props=[vp])
+    def test_virtual_metric_base_not_in_allresults_returns_failed_value(
+        self, entity: Entity, experiment: Experiment
+    ) -> None:
+        """When the base property key is absent from all_results, failed_metric_value is returned."""
+        params = _make_trainable_params(failed_value=-1.0)
+        params.measurement_space = _measurement_space(experiment)
 
         result = process_metric(
-            metric="mip_gaps-mean",
-            all_results={},  # base property missing from results
+            metric="pka-mean",
+            all_results={},
             entity=entity,
-            trainable_params=_make_trainable_params(failed_value=-1.0),
+            trainable_params=params,
         )
 
         assert result == -1.0
-        vp.aggregate.assert_not_called()
 
-    def test_virtual_metric_properties_none_returns_failed_value(self) -> None:
-        """When no observed property matches the virtual identifier, failed_metric_value is returned."""
-        entity = _make_entity(virtual_props=None)
+    def test_virtual_metric_properties_none_returns_failed_value(
+        self, entity: Entity, experiment: Experiment
+    ) -> None:
+        """When no property in measurement_space matches the virtual identifier, failed_metric_value is returned."""
+        params = _make_trainable_params(failed_value=-1.0)
+        params.measurement_space = _measurement_space(experiment)
 
+        # "nonexistent_prop" is not a target or observed property in the experiment
         result = process_metric(
-            metric="mip_gaps-mean",
-            all_results={"mip_gaps": [[0.1, 0.2]]},
+            metric="nonexistent_prop-mean",
+            all_results={"nonexistent_prop": [[0.1]]},
             entity=entity,
-            trainable_params=_make_trainable_params(failed_value=-1.0),
+            trainable_params=params,
         )
 
         assert result == -1.0
 
-    def test_not_virtual_property_returns_failed_value(self) -> None:
-        """When the metric is not virtual, failed_metric_value is returned."""
-        entity = _make_entity(raises=ValueError("not a virtual property"))
-
+    def test_not_virtual_property_returns_failed_value(self, entity: Entity) -> None:
+        """When the metric identifier is not a valid virtual property, failed_metric_value is returned."""
         result = process_metric(
             metric="unknown_metric",
             all_results={},
@@ -167,16 +172,78 @@ def test_not_virtual_property_returns_failed_value(self) -> None:
 
         assert result == -999.0
 
-    def test_ambiguous_virtual_properties_raises(self) -> None:
-        """Multiple matching virtual properties raises ValueError."""
-        vp1 = _make_virtual_prop(target_key="mip_gaps")
-        vp2 = _make_virtual_prop(target_key="mip_gaps_alt")
-        entity = _make_entity(virtual_props=[vp1, vp2])
+    def test_ambiguous_virtual_properties_raises(
+        self,
+        entity: Entity,
+        mock_parameterizable_experiment: Experiment,
+        customParameterization: list[ConstitutivePropertyValue],
+    ) -> None:
+        """Two parameterizations of the same experiment both share the target property
+        identifier, producing two matching virtual properties and raising ValueError.
+
+        mock_parameterizable_experiment has target "measurable_one".  Parameterizing it
+        two ways creates two observed properties with different identifiers but the same
+        target.  from_observed_properties_matching_identifier then returns both, which is
+        the ambiguity the code guards against.
+
+        customParameterization uses test_opt1="C", test_opt2=-1.
+        The second parameterization uses test_opt1="A", test_opt2=-5 — both differ from
+        the experiment's defaults (B / -3) so the validator accepts them.
+        """
+        pe1 = ParameterizedExperiment(
+            parameterization=customParameterization[:-1],
+            **mock_parameterizable_experiment.model_dump(),
+        )
+        second_parameterization = [
+            ConstitutivePropertyValue(
+                value="A",
+                property=ConstitutivePropertyDescriptor(identifier="test_opt1"),
+            ),
+            ConstitutivePropertyValue(
+                value=-5,
+                property=ConstitutivePropertyDescriptor(identifier="test_opt2"),
+            ),
+        ]
+        pe2 = ParameterizedExperiment(
+            parameterization=second_parameterization,
+            **mock_parameterizable_experiment.model_dump(),
+        )
+        ms = MeasurementSpace(
+            configuration=MeasurementSpaceConfiguration(experiments=[pe1, pe2])
+        )
+        params = _make_trainable_params()
+        params.measurement_space = ms
 
         with pytest.raises(ValueError, match="Ambiguous"):
             process_metric(
-                metric="mip_gaps-mean",
-                all_results={"mip_gaps": [[0.1]]},
+                metric="measurable_one-mean",
+                all_results={"measurable_one": [[0.1]]},
                 entity=entity,
-                trainable_params=_make_trainable_params(),
+                trainable_params=params,
             )
+
+    def test_measurement_space_used_not_entity(self, experiment: Experiment) -> None:
+        """Lookup uses measurement_space, not entity observed properties.
+
+        A bare entity (no observed properties) would cause the old entity-based lookup
+        to return None and fall back to failed_metric_value.  With the measurement_space-
+        based lookup the virtual property is found correctly and the result is computed.
+        """
+        bare_entity = Entity(
+            identifier="bare-entity",
+            generatorid="test",
+            constitutive_property_values=(),
+        )
+        params = _make_trainable_params(metric_format="target")
+        params.measurement_space = _measurement_space(experiment)
+
+        result = process_metric(
+            metric="pka-mean",
+            all_results={"pka": [0.5, 0.5]},
+            entity=bare_entity,
+            trainable_params=params,
+        )
+
+        # mean([0.5, 0.5]) == 0.5; would be nan with entity-based lookup (bare entity
+        # has no observed properties)
+        assert result == pytest.approx(0.5)