Skip to content

Commit 64759da

Browse files
he-yufengboyangsvl
authored andcommitted
fix: handle failed eval inference results
1 parent 9126acb commit 64759da

2 files changed

Lines changed: 57 additions & 2 deletions

File tree

src/google/adk/evaluation/local_eval_service.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,29 @@ async def _evaluate_single_inference_result(
277277
else 'test_user_id'
278278
)
279279

280+
if inference_result.inferences is None:
281+
session_details = None
282+
if inference_result.session_id is not None:
283+
session_details = await self._session_service.get_session(
284+
app_name=inference_result.app_name,
285+
user_id=user_id,
286+
session_id=inference_result.session_id,
287+
)
288+
return (
289+
inference_result,
290+
EvalCaseResult(
291+
eval_set_file=inference_result.eval_set_id,
292+
eval_set_id=inference_result.eval_set_id,
293+
eval_id=inference_result.eval_case_id,
294+
final_eval_status=EvalStatus.FAILED,
295+
overall_eval_metric_results=[],
296+
eval_metric_result_per_invocation=[],
297+
session_id=inference_result.session_id or '',
298+
session_details=session_details,
299+
user_id=user_id,
300+
),
301+
)
302+
280303
if eval_case.conversation_scenario is None and len(
281304
inference_result.inferences
282305
) != len(eval_case.conversation):

tests/unittests/evaluation/test_local_eval_service.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
from __future__ import annotations
1616

1717
import asyncio
18-
import sys
1918
from typing import Optional
2019

2120
from google.adk.agents.llm_agent import LlmAgent
@@ -465,6 +464,39 @@ async def test_evaluate_single_inference_result(
465464
assert metric_result.eval_status == EvalStatus.PASSED
466465

467466

467+
@pytest.mark.asyncio
468+
async def test_evaluate_single_inference_result_failed_without_inferences(
469+
eval_service, mock_eval_sets_manager, mocker
470+
):
471+
inference_result = InferenceResult(
472+
app_name="test_app",
473+
eval_set_id="test_eval_set",
474+
eval_case_id="case1",
475+
inferences=None,
476+
session_id="session1",
477+
status=InferenceStatus.FAILURE,
478+
error_message="auth failed",
479+
)
480+
eval_metric = EvalMetric(metric_name="fake_metric", threshold=0.5)
481+
evaluate_config = EvaluateConfig(eval_metrics=[eval_metric], parallelism=1)
482+
483+
mock_eval_case = mocker.MagicMock(spec=EvalCase)
484+
mock_eval_case.conversation = []
485+
mock_eval_case.conversation_scenario = None
486+
mock_eval_case.session_input = None
487+
mock_eval_sets_manager.get_eval_case.return_value = mock_eval_case
488+
489+
_, result = await eval_service._evaluate_single_inference_result(
490+
inference_result=inference_result, evaluate_config=evaluate_config
491+
)
492+
493+
assert result.eval_id == "case1"
494+
assert result.session_id == "session1"
495+
assert result.final_eval_status == EvalStatus.FAILED
496+
assert result.overall_eval_metric_results == []
497+
assert result.eval_metric_result_per_invocation == []
498+
499+
468500
@pytest.mark.asyncio
469501
async def test_evaluate_single_inference_result_for_conversation_scenario(
470502
eval_service, mock_eval_sets_manager, mocker
@@ -520,7 +552,7 @@ async def test_evaluate_single_inference_result_for_conversation_scenario(
520552
for i in range(3):
521553
invocation_result = result.eval_metric_result_per_invocation[i]
522554
assert invocation_result.actual_invocation == inference_result.inferences[i]
523-
assert invocation_result.expected_invocation == None
555+
assert invocation_result.expected_invocation is None
524556
assert len(invocation_result.eval_metric_results) == 1
525557
metric_result = invocation_result.eval_metric_results[0]
526558
assert metric_result.metric_name == "fake_single_sided_metric"

0 commit comments

Comments
 (0)