From b2fb88b724e2cc571600e437ec4260c1b1a0d434 Mon Sep 17 00:00:00 2001 From: michaelj Date: Thu, 5 Mar 2026 22:15:30 +0000 Subject: [PATCH 1/2] fix(ray_tune): process_metric not handling virtual properties correctly The code assumed the entity contained the just measured property values, and hence it could be used to do the aggregation - however this was not the case. At some stage the code was changed to work directly with the MeasurementRequests and updating this piece was missed. --- .../ray_tune/ado_ray_tune/operator.py | 42 +++- tests/operators/test_process_metric.py | 182 ++++++++++++++++++ 2 files changed, 214 insertions(+), 10 deletions(-) create mode 100644 tests/operators/test_process_metric.py diff --git a/plugins/operators/ray_tune/ado_ray_tune/operator.py b/plugins/operators/ray_tune/ado_ray_tune/operator.py index bc977c9ea..7351ef772 100644 --- a/plugins/operators/ray_tune/ado_ray_tune/operator.py +++ b/plugins/operators/ray_tune/ado_ray_tune/operator.py @@ -196,6 +196,8 @@ def process_metric( # We use the last result return all_results[metric][-1] + failed = trainable_params.orchestrator_config.failed_metric_value + # The metric is not in the results, so we need to process it # Check if this is a virtual metric log.debug(f"No measured properties match {metric} - checking if a virtual property") @@ -204,18 +206,38 @@ def process_metric( except ValueError: log.warning( f"No experiment measured {metric} and it's not a valid virtual property. " - f"Will set value of {metric} for {entity.identifier} to {trainable_params.orchestrator_config.failed_metric_value} " + f"Will set value of {metric} for {entity.identifier} to {failed} " ) - processed_metric = trainable_params.orchestrator_config.failed_metric_value + processed_metric = failed else: if properties is not None: if len(properties) == 1: - value = entity.valueForProperty(property=properties[0]) - processed_metric = ( - value.value - if value is not None - else trainable_params.orchestrator_config.failed_metric_value - ) + virtual_prop = properties[0] + # Determine the key in all_results for the base property. + # all_results is keyed by targetProperty.identifier (metric_format="target") + # or by observed property identifier (metric_format="observed"). + metric_format = trainable_params.orchestrator_config.metric_format + if metric_format == "target": + base_key = ( + virtual_prop.baseObservedProperty.targetProperty.identifier + ) + else: + base_key = virtual_prop.baseObservedProperty.identifier + base_values = all_results.get(base_key) + if base_values: + aggregated = virtual_prop.aggregate(base_values) + processed_metric = ( + aggregated.value + if aggregated is not None and aggregated.value is not None + else failed + ) + else: + log.warning( + f"{metric} is a valid virtual property name " + f"however no experiment measured an underlying property with the required identifier. " + f"Will set value of {metric} for {entity.identifier} to {failed}" + ) + processed_metric = failed else: raise ValueError( f"Ambiguous virtual target metric provided - matches multiple observed properties. " @@ -225,9 +247,9 @@ def process_metric( log.warning( f"{metric} is a valid virtual property name " f"however no experiment measured an underlying property with the required identifier. " - f"Will set value of {metric} for {entity.identifier} to {trainable_params.orchestrator_config.failed_metric_value}" + f"Will set value of {metric} for {entity.identifier} to {failed}" ) - processed_metric = trainable_params.orchestrator_config.failed_metric_value + processed_metric = failed return processed_metric diff --git a/tests/operators/test_process_metric.py b/tests/operators/test_process_metric.py new file mode 100644 index 000000000..3ea326288 --- /dev/null +++ b/tests/operators/test_process_metric.py @@ -0,0 +1,182 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Tests for the process_metric function in the RayTune operator.""" + +import math +from unittest.mock import MagicMock + +import pytest +from ado_ray_tune.operator import process_metric + + +def _make_trainable_params( + metric_format: str = "target", failed_value: float = float("nan") +) -> MagicMock: + """Return a minimal mock of OrchTrainableParameters.""" + params = MagicMock() + params.orchestrator_config.metric_format = metric_format + params.orchestrator_config.failed_metric_value = failed_value + return params + + +def _make_entity( + virtual_props: list | None = None, raises: Exception | None = None +) -> MagicMock: + """Return a mock Entity whose virtualObservedPropertiesFromIdentifier returns virtual_props.""" + entity = MagicMock() + entity.identifier = "test-entity" + if raises is not None: + entity.virtualObservedPropertiesFromIdentifier.side_effect = raises + else: + entity.virtualObservedPropertiesFromIdentifier.return_value = virtual_props + return entity + + +def _make_virtual_prop( + target_key: str = "mip_gaps", + observed_key: str = "exp-mip_gaps", + agg_value: float | None = 0.2, +) -> MagicMock: + """Return a mock VirtualObservedProperty whose aggregate() returns agg_value.""" + vp = MagicMock() + vp.baseObservedProperty.targetProperty.identifier = target_key + vp.baseObservedProperty.identifier = observed_key + vp.identifier = f"{target_key}-mean" + agg_result = MagicMock() + agg_result.value = agg_value + vp.aggregate.return_value = agg_result + return vp + + +class TestProcessMetricDirectHit: + """Metrics present directly in all_results are returned without touching the entity.""" + + def test_direct_metric_returns_last_value(self) -> None: + """When the metric is in all_results, the last entry is returned.""" + result = process_metric( + metric="mip_gaps", + all_results={"mip_gaps": [0.1, 0.2, 0.3]}, + entity=_make_entity(), + trainable_params=_make_trainable_params(), + ) + assert result == 0.3 + + def test_direct_metric_single_value(self) -> None: + """Single-entry list returns that value.""" + result = process_metric( + metric="mip_gaps", + all_results={"mip_gaps": [0.05]}, + entity=_make_entity(), + trainable_params=_make_trainable_params(), + ) + assert result == 0.05 + + +class TestProcessMetricVirtualTargetFormat: + """Virtual property computed from allResults when metric_format='target'.""" + + def test_virtual_metric_uses_allresults_not_entity(self) -> None: + """Virtual property is computed from all_results, entity.valueForProperty is never called.""" + vp = _make_virtual_prop(target_key="mip_gaps", agg_value=0.15) + entity = _make_entity(virtual_props=[vp]) + + result = process_metric( + metric="mip_gaps-mean", + all_results={"mip_gaps": [[0.1, 0.2]]}, + entity=entity, + trainable_params=_make_trainable_params(metric_format="target"), + ) + + assert result == 0.15 + # Confirms aggregate was called with the allResults values + vp.aggregate.assert_called_once_with([[0.1, 0.2]]) + # Confirms entity.valueForProperty was NOT used + entity.valueForProperty.assert_not_called() + + def test_virtual_metric_observed_format_uses_observed_key(self) -> None: + """With metric_format='observed', the observed property identifier is used as key.""" + vp = _make_virtual_prop( + target_key="mip_gaps", observed_key="exp-mip_gaps", agg_value=0.3 + ) + entity = _make_entity(virtual_props=[vp]) + + result = process_metric( + metric="mip_gaps-mean", + all_results={"exp-mip_gaps": [[0.3, 0.3]]}, + entity=entity, + trainable_params=_make_trainable_params(metric_format="observed"), + ) + + assert result == 0.3 + vp.aggregate.assert_called_once_with([[0.3, 0.3]]) + + def test_virtual_metric_all_none_returns_failed_value(self) -> None: + """When aggregate returns None value, failed_metric_value is returned (no crash).""" + vp = _make_virtual_prop(target_key="mip_gaps", agg_value=None) + entity = _make_entity(virtual_props=[vp]) + + failed = float("nan") + result = process_metric( + metric="mip_gaps-mean", + all_results={"mip_gaps": [[None, None, None]]}, + entity=entity, + trainable_params=_make_trainable_params(failed_value=failed), + ) + + assert math.isnan(result) + + def test_virtual_metric_base_not_in_allresults_returns_failed_value(self) -> None: + """When base property has no allResults entry, failed_metric_value is returned.""" + vp = _make_virtual_prop(target_key="mip_gaps") + entity = _make_entity(virtual_props=[vp]) + + result = process_metric( + metric="mip_gaps-mean", + all_results={}, # base property missing from results + entity=entity, + trainable_params=_make_trainable_params(failed_value=-1.0), + ) + + assert result == -1.0 + vp.aggregate.assert_not_called() + + def test_virtual_metric_properties_none_returns_failed_value(self) -> None: + """When no observed property matches the virtual identifier, failed_metric_value is returned.""" + entity = _make_entity(virtual_props=None) + + result = process_metric( + metric="mip_gaps-mean", + all_results={"mip_gaps": [[0.1, 0.2]]}, + entity=entity, + trainable_params=_make_trainable_params(failed_value=-1.0), + ) + + assert result == -1.0 + + def test_not_virtual_property_returns_failed_value(self) -> None: + """When the metric is not virtual, failed_metric_value is returned.""" + entity = _make_entity(raises=ValueError("not a virtual property")) + + result = process_metric( + metric="unknown_metric", + all_results={}, + entity=entity, + trainable_params=_make_trainable_params(failed_value=-999.0), + ) + + assert result == -999.0 + + def test_ambiguous_virtual_properties_raises(self) -> None: + """Multiple matching virtual properties raises ValueError.""" + vp1 = _make_virtual_prop(target_key="mip_gaps") + vp2 = _make_virtual_prop(target_key="mip_gaps_alt") + entity = _make_entity(virtual_props=[vp1, vp2]) + + with pytest.raises(ValueError, match="Ambiguous"): + process_metric( + metric="mip_gaps-mean", + all_results={"mip_gaps": [[0.1]]}, + entity=entity, + trainable_params=_make_trainable_params(), + ) From 2b2d776e17a4a1010eb52de6ef0ebe309ed42466 Mon Sep 17 00:00:00 2001 From: michaelj Date: Fri, 6 Mar 2026 14:34:42 +0000 Subject: [PATCH 2/2] test(schema): replace mocks with fixtures --- tests/operators/test_process_metric.py | 249 ++++++++++++++++--------- 1 file changed, 158 insertions(+), 91 deletions(-) diff --git a/tests/operators/test_process_metric.py b/tests/operators/test_process_metric.py index 3ea326288..31adb70bf 100644 --- a/tests/operators/test_process_metric.py +++ b/tests/operators/test_process_metric.py @@ -9,155 +9,160 @@ import pytest from ado_ray_tune.operator import process_metric +from orchestrator.schema.entity import Entity +from orchestrator.schema.experiment import Experiment, ParameterizedExperiment +from orchestrator.schema.measurementspace import ( + MeasurementSpace, + MeasurementSpaceConfiguration, +) +from orchestrator.schema.property import ConstitutivePropertyDescriptor +from orchestrator.schema.property_value import ConstitutivePropertyValue + def _make_trainable_params( metric_format: str = "target", failed_value: float = float("nan") ) -> MagicMock: - """Return a minimal mock of OrchTrainableParameters.""" + """Return a minimal mock of OrchTrainableParameters. + + Only orchestrator_config and measurement_space are used by process_metric; + other fields of OrchTrainableParameters require live Ray actors so are left as mocks. + Tests that exercise the virtual-property path should override + params.measurement_space with a real MeasurementSpace instance. + """ params = MagicMock() params.orchestrator_config.metric_format = metric_format params.orchestrator_config.failed_metric_value = failed_value + params.measurement_space.observedProperties = [] return params -def _make_entity( - virtual_props: list | None = None, raises: Exception | None = None -) -> MagicMock: - """Return a mock Entity whose virtualObservedPropertiesFromIdentifier returns virtual_props.""" - entity = MagicMock() - entity.identifier = "test-entity" - if raises is not None: - entity.virtualObservedPropertiesFromIdentifier.side_effect = raises - else: - entity.virtualObservedPropertiesFromIdentifier.return_value = virtual_props - return entity - - -def _make_virtual_prop( - target_key: str = "mip_gaps", - observed_key: str = "exp-mip_gaps", - agg_value: float | None = 0.2, -) -> MagicMock: - """Return a mock VirtualObservedProperty whose aggregate() returns agg_value.""" - vp = MagicMock() - vp.baseObservedProperty.targetProperty.identifier = target_key - vp.baseObservedProperty.identifier = observed_key - vp.identifier = f"{target_key}-mean" - agg_result = MagicMock() - agg_result.value = agg_value - vp.aggregate.return_value = agg_result - return vp +def _measurement_space(experiment: Experiment) -> MeasurementSpace: + """Build a real MeasurementSpace from a single experiment.""" + return MeasurementSpace( + configuration=MeasurementSpaceConfiguration(experiments=[experiment]) + ) class TestProcessMetricDirectHit: - """Metrics present directly in all_results are returned without touching the entity.""" + """Metrics present directly in all_results are returned without virtual property lookup.""" - def test_direct_metric_returns_last_value(self) -> None: + def test_direct_metric_returns_last_value(self, entity: Entity) -> None: """When the metric is in all_results, the last entry is returned.""" result = process_metric( metric="mip_gaps", all_results={"mip_gaps": [0.1, 0.2, 0.3]}, - entity=_make_entity(), + entity=entity, trainable_params=_make_trainable_params(), ) assert result == 0.3 - def test_direct_metric_single_value(self) -> None: + def test_direct_metric_single_value(self, entity: Entity) -> None: """Single-entry list returns that value.""" result = process_metric( metric="mip_gaps", all_results={"mip_gaps": [0.05]}, - entity=_make_entity(), + entity=entity, trainable_params=_make_trainable_params(), ) assert result == 0.05 class TestProcessMetricVirtualTargetFormat: - """Virtual property computed from allResults when metric_format='target'.""" + """Virtual property computed from allResults using measurement space observed properties.""" - def test_virtual_metric_uses_allresults_not_entity(self) -> None: - """Virtual property is computed from all_results, entity.valueForProperty is never called.""" - vp = _make_virtual_prop(target_key="mip_gaps", agg_value=0.15) - entity = _make_entity(virtual_props=[vp]) + def test_virtual_metric_uses_allresults_not_entity( + self, entity: Entity, experiment: Experiment + ) -> None: + """Virtual property is looked up from measurement_space and computed from all_results. + + The experiment fixture exposes "pka" as a target property. With metric_format="target", + all_results is keyed by the target property identifier "pka". + mean([0.1, 0.2]) == 0.15. + """ + params = _make_trainable_params(metric_format="target") + params.measurement_space = _measurement_space(experiment) result = process_metric( - metric="mip_gaps-mean", - all_results={"mip_gaps": [[0.1, 0.2]]}, + metric="pka-mean", + all_results={"pka": [[0.1, 0.2]]}, entity=entity, - trainable_params=_make_trainable_params(metric_format="target"), + trainable_params=params, ) - assert result == 0.15 - # Confirms aggregate was called with the allResults values - vp.aggregate.assert_called_once_with([[0.1, 0.2]]) - # Confirms entity.valueForProperty was NOT used - entity.valueForProperty.assert_not_called() + assert result == pytest.approx(0.15) - def test_virtual_metric_observed_format_uses_observed_key(self) -> None: - """With metric_format='observed', the observed property identifier is used as key.""" - vp = _make_virtual_prop( - target_key="mip_gaps", observed_key="exp-mip_gaps", agg_value=0.3 - ) - entity = _make_entity(virtual_props=[vp]) + def test_virtual_metric_observed_format_uses_observed_key( + self, entity: Entity, experiment: Experiment + ) -> None: + """With metric_format='observed', the observed property identifier is used as key. + + The observed property identifier for target "pka" is "{experiment_id}-pka". + mean([0.3, 0.3]) == 0.3. + """ + params = _make_trainable_params(metric_format="observed") + params.measurement_space = _measurement_space(experiment) + obs_key = f"{experiment.identifier}-pka" result = process_metric( - metric="mip_gaps-mean", - all_results={"exp-mip_gaps": [[0.3, 0.3]]}, + metric=f"{obs_key}-mean", + all_results={obs_key: [[0.3, 0.3]]}, entity=entity, - trainable_params=_make_trainable_params(metric_format="observed"), + trainable_params=params, ) - assert result == 0.3 - vp.aggregate.assert_called_once_with([[0.3, 0.3]]) + assert result == pytest.approx(0.3) - def test_virtual_metric_all_none_returns_failed_value(self) -> None: - """When aggregate returns None value, failed_metric_value is returned (no crash).""" - vp = _make_virtual_prop(target_key="mip_gaps", agg_value=None) - entity = _make_entity(virtual_props=[vp]) + def test_virtual_metric_all_none_returns_failed_value( + self, entity: Entity, experiment: Experiment + ) -> None: + """When all measurements are None, aggregate returns None and failed_metric_value is used.""" + params = _make_trainable_params(failed_value=float("nan")) + params.measurement_space = _measurement_space(experiment) - failed = float("nan") result = process_metric( - metric="mip_gaps-mean", - all_results={"mip_gaps": [[None, None, None]]}, + metric="pka-mean", + all_results={"pka": [None, None, None]}, entity=entity, - trainable_params=_make_trainable_params(failed_value=failed), + trainable_params=params, ) assert math.isnan(result) - def test_virtual_metric_base_not_in_allresults_returns_failed_value(self) -> None: - """When base property has no allResults entry, failed_metric_value is returned.""" - vp = _make_virtual_prop(target_key="mip_gaps") - entity = _make_entity(virtual_props=[vp]) + def test_virtual_metric_base_not_in_allresults_returns_failed_value( + self, entity: Entity, experiment: Experiment + ) -> None: + """When the base property key is absent from all_results, failed_metric_value is returned.""" + params = _make_trainable_params(failed_value=-1.0) + params.measurement_space = _measurement_space(experiment) result = process_metric( - metric="mip_gaps-mean", - all_results={}, # base property missing from results + metric="pka-mean", + all_results={}, entity=entity, - trainable_params=_make_trainable_params(failed_value=-1.0), + trainable_params=params, ) assert result == -1.0 - vp.aggregate.assert_not_called() - def test_virtual_metric_properties_none_returns_failed_value(self) -> None: - """When no observed property matches the virtual identifier, failed_metric_value is returned.""" - entity = _make_entity(virtual_props=None) + def test_virtual_metric_properties_none_returns_failed_value( + self, entity: Entity, experiment: Experiment + ) -> None: + """When no property in measurement_space matches the virtual identifier, failed_metric_value is returned.""" + params = _make_trainable_params(failed_value=-1.0) + params.measurement_space = _measurement_space(experiment) + # "nonexistent_prop" is not a target or observed property in the experiment result = process_metric( - metric="mip_gaps-mean", - all_results={"mip_gaps": [[0.1, 0.2]]}, + metric="nonexistent_prop-mean", + all_results={"nonexistent_prop": [[0.1]]}, entity=entity, - trainable_params=_make_trainable_params(failed_value=-1.0), + trainable_params=params, ) assert result == -1.0 - def test_not_virtual_property_returns_failed_value(self) -> None: - """When the metric is not virtual, failed_metric_value is returned.""" - entity = _make_entity(raises=ValueError("not a virtual property")) - + def test_not_virtual_property_returns_failed_value(self, entity: Entity) -> None: + """When the metric identifier is not a valid virtual property, failed_metric_value is returned.""" result = process_metric( metric="unknown_metric", all_results={}, @@ -167,16 +172,78 @@ def test_not_virtual_property_returns_failed_value(self) -> None: assert result == -999.0 - def test_ambiguous_virtual_properties_raises(self) -> None: - """Multiple matching virtual properties raises ValueError.""" - vp1 = _make_virtual_prop(target_key="mip_gaps") - vp2 = _make_virtual_prop(target_key="mip_gaps_alt") - entity = _make_entity(virtual_props=[vp1, vp2]) + def test_ambiguous_virtual_properties_raises( + self, + entity: Entity, + mock_parameterizable_experiment: Experiment, + customParameterization: list[ConstitutivePropertyValue], + ) -> None: + """Two parameterizations of the same experiment both share the target property + identifier, producing two matching virtual properties and raising ValueError. + + mock_parameterizable_experiment has target "measurable_one". Parameterizing it + two ways creates two observed properties with different identifiers but the same + target. from_observed_properties_matching_identifier then returns both, which is + the ambiguity the code guards against. + + customParameterization uses test_opt1="C", test_opt2=-1. + The second parameterization uses test_opt1="A", test_opt2=-5 — both differ from + the experiment's defaults (B / -3) so the validator accepts them. + """ + pe1 = ParameterizedExperiment( + parameterization=customParameterization[:-1], + **mock_parameterizable_experiment.model_dump(), + ) + second_parameterization = [ + ConstitutivePropertyValue( + value="A", + property=ConstitutivePropertyDescriptor(identifier="test_opt1"), + ), + ConstitutivePropertyValue( + value=-5, + property=ConstitutivePropertyDescriptor(identifier="test_opt2"), + ), + ] + pe2 = ParameterizedExperiment( + parameterization=second_parameterization, + **mock_parameterizable_experiment.model_dump(), + ) + ms = MeasurementSpace( + configuration=MeasurementSpaceConfiguration(experiments=[pe1, pe2]) + ) + params = _make_trainable_params() + params.measurement_space = ms with pytest.raises(ValueError, match="Ambiguous"): process_metric( - metric="mip_gaps-mean", - all_results={"mip_gaps": [[0.1]]}, + metric="measurable_one-mean", + all_results={"measurable_one": [[0.1]]}, entity=entity, - trainable_params=_make_trainable_params(), + trainable_params=params, ) + + def test_measurement_space_used_not_entity(self, experiment: Experiment) -> None: + """Lookup uses measurement_space, not entity observed properties. + + A bare entity (no observed properties) would cause the old entity-based lookup + to return None and fall back to failed_metric_value. With the measurement_space- + based lookup the virtual property is found correctly and the result is computed. + """ + bare_entity = Entity( + identifier="bare-entity", + generatorid="test", + constitutive_property_values=(), + ) + params = _make_trainable_params(metric_format="target") + params.measurement_space = _measurement_space(experiment) + + result = process_metric( + metric="pka-mean", + all_results={"pka": [0.5, 0.5]}, + entity=bare_entity, + trainable_params=params, + ) + + # mean([0.5, 0.5]) == 0.5; would be nan with entity-based lookup (bare entity + # has no observed properties) + assert result == pytest.approx(0.5)