Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/app/components/chat-panel/chat-panel.component.html
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,10 @@
<div class="eval-response-header header-expected">{{ i18n.expectedToolUsesLabel }}</div>
<ngx-json-viewer [json]="message.expectedInvocationToolUses"></ngx-json-viewer>
</div>
} @else if (message.actualFinalResponse) {
} @else if (
message.actualFinalResponse != null ||
message.expectedFinalResponse != null
) {
<div class="actual-result">
<div class="eval-response-header header-actual">{{ i18n.actualResponseLabel }}</div>
<div>{{ message.actualFinalResponse }}</div>
Expand Down
21 changes: 21 additions & 0 deletions src/app/components/chat-panel/chat-panel.component.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,27 @@ describe('ChatPanelComponent', () => {
const canvas = fixture.debugElement.query(By.css('app-a2ui-canvas'));
expect(canvas).toBeTruthy();
});

it(
'should render failed eval response compare when actual response is empty',
async () => {
component.messages = [{
role: 'bot',
evalStatus: 2,
failedMetric: 'response_match_score',
actualFinalResponse: '',
expectedFinalResponse: 'Expected eval response',
}];
fixture.detectChanges();
await fixture.whenStable();
fixture.detectChanges();

const compareContainer =
fixture.debugElement.query(By.css('.eval-compare-container'));
expect(compareContainer).toBeTruthy();
expect(compareContainer.nativeElement.textContent)
.toContain('Expected eval response');
});
});

it('should display loading bar if message isLoading', async () => {
Expand Down
39 changes: 24 additions & 15 deletions src/app/components/chat-panel/chat-panel.component.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,11 @@ import {MatProgressBarModule} from '@angular/material/progress-bar';
import {MatProgressSpinnerModule} from '@angular/material/progress-spinner';
import {MatTooltipModule} from '@angular/material/tooltip';
import {NgxJsonViewerModule} from 'ngx-json-viewer';
import {EMPTY, merge, NEVER, of, Subject} from 'rxjs';
import {defer, EMPTY, merge, NEVER, Subject} from 'rxjs';
import {catchError, filter, first, switchMap, tap} from 'rxjs/operators';

import {isComputerUseResponse, isVisibleComputerUseClick} from '../../core/models/ComputerUse';
import type {EvalCase} from '../../core/models/Eval';
import {FunctionCall, FunctionResponse} from '../../core/models/types';
import {AGENT_SERVICE} from '../../core/services/interfaces/agent';
import {FEATURE_FLAG_SERVICE} from '../../core/services/interfaces/feature-flag';
import {SAFE_VALUES_SERVICE} from '../../core/services/interfaces/safevalues';
Expand Down Expand Up @@ -157,6 +156,10 @@ export class ChatPanelComponent implements OnChanges, AfterViewInit {
this.featureFlagService.isManualStateUpdateEnabled();
readonly isBidiStreamingEnabledObs =
this.featureFlagService.isBidiStreamingEnabled();
readonly isInfinityMessageScrollingEnabled =
toSignal(this.featureFlagService.isInfinityMessageScrollingEnabled(), {
initialValue: false,
});
readonly canEditSession = signal(true);
readonly isUserFeedbackEnabled =
toSignal(this.featureFlagService.isFeedbackServiceEnabled());
Expand All @@ -169,24 +172,30 @@ export class ChatPanelComponent implements OnChanges, AfterViewInit {
constructor() {
effect(() => {
const sessionName = this.sessionName();
if (sessionName) {
this.nextPageToken = '';
this.uiStateService
.lazyLoadMessages(sessionName, {
pageSize: 100,
pageToken: this.nextPageToken,
})
.pipe(first())
.subscribe();
const isInfinityEnabled = this.isInfinityMessageScrollingEnabled();
if (!sessionName || !isInfinityEnabled) {
return;
}

this.loadInitialMessagesPage(sessionName);
});
}

private loadInitialMessagesPage(sessionName: string): void {
this.nextPageToken = '';
defer(() => this.uiStateService.lazyLoadMessages(sessionName, {
pageSize: 100,
pageToken: this.nextPageToken,
}))
.pipe(first(), catchError(() => EMPTY))
.subscribe();
}

ngOnInit() {
this.featureFlagService.isInfinityMessageScrollingEnabled()
.pipe(
first(),
filter((enabled) => enabled),
filter((enabled) => enabled === true),
switchMap(
() => merge(
this.uiStateService.onNewMessagesLoaded().pipe(
Expand All @@ -208,11 +217,11 @@ export class ChatPanelComponent implements OnChanges, AfterViewInit {
}

this.scrollHeight = element.scrollHeight;
return this.uiStateService
.lazyLoadMessages(this.sessionName(), {
return defer(() => this.uiStateService.lazyLoadMessages(
this.sessionName(), {
pageSize: 100,
pageToken: this.nextPageToken,
})
}))
.pipe(first(), catchError(() => NEVER));
})))),
takeUntilDestroyed(this.destroyRef),
Expand Down
48 changes: 48 additions & 0 deletions src/app/components/chat/chat.component.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,36 @@ describe('ChatComponent', () => {
});
});

describe('when synthetic eval session ID is provided in URL', () => {
const EVAL_SYNTHETIC_SESSION_ID = '___eval___session___case-1';

beforeEach(() => {
mockAgentService.listAppsResponse.next([TEST_APP_1_NAME]);
mockFeatureFlagService.isSessionUrlEnabledResponse.next(true);
mockActivatedRoute.snapshot!.queryParams = {
[APP_QUERY_PARAM]: TEST_APP_1_NAME,
[SESSION_QUERY_PARAM]: EVAL_SYNTHETIC_SESSION_ID,
};
});

it('should create a new session instead of restoring from URL',
async () => {
mockSessionService.createSession.calls.reset();
mockSessionService.getSession.calls.reset();
mockFeatureFlagService.isApplicationSelectorEnabledResponse.next(
false);
fixture = TestBed.createComponent(ChatComponent);
component = fixture.componentInstance;
fixture.detectChanges();
await fixture.whenStable();

expect(mockSessionService.getSession).not.toHaveBeenCalledWith(
USER_ID, TEST_APP_1_NAME, EVAL_SYNTHETIC_SESSION_ID);
expect(mockSessionService.createSession)
.toHaveBeenCalledWith(USER_ID, TEST_APP_1_NAME);
});
});

describe('when session in URL is not found', () => {
beforeEach(async () => {
mockActivatedRoute.snapshot!.queryParams = {
Expand Down Expand Up @@ -768,6 +798,12 @@ describe('ChatComponent', () => {
id: 'event-2',
author: 'bot',
content: {parts: [{text: 'bot response'}]},
evalStatus: 2,
failedMetric: 'response_match_score',
evalScore: 0.4,
evalThreshold: 0.7,
actualFinalResponse: '',
expectedFinalResponse: 'Expected bot response',
},
],
};
Expand Down Expand Up @@ -798,6 +834,18 @@ describe('ChatComponent', () => {
}));
});

it('should preserve eval comparison fields on bot messages', () => {
expect(component.messages()[1]).toEqual(jasmine.objectContaining({
role: 'bot',
evalStatus: 2,
failedMetric: 'response_match_score',
evalScore: 0.4,
evalThreshold: 0.7,
actualFinalResponse: '',
expectedFinalResponse: 'Expected bot response',
}));
});

it('should call getTrace', () => {
expect(mockEventService.getTrace)
.toHaveBeenCalledWith(SESSION_1_ID);
Expand Down
52 changes: 41 additions & 11 deletions src/app/components/chat/chat.component.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import {HttpErrorResponse} from '@angular/common/http';
import {AfterViewInit, ChangeDetectorRef, Component, ElementRef, HostListener, inject, Injectable, OnDestroy, OnInit, Renderer2, signal, viewChild, WritableSignal} from '@angular/core';
import {toSignal} from '@angular/core/rxjs-interop';
import {FormControl, FormsModule, ReactiveFormsModule} from '@angular/forms';
import {MatButton, MatFabButton} from '@angular/material/button';
import {MatButton} from '@angular/material/button';
import {MatCard} from '@angular/material/card';
import {MatDialog} from '@angular/material/dialog';
import {MatDivider} from '@angular/material/divider';
Expand Down Expand Up @@ -86,6 +86,17 @@ const A2A_DATA_PART_START_TAG = '<a2a_datapart_json>';
const A2A_DATA_PART_END_TAG = '</a2a_datapart_json>';
const A2UI_MIME_TYPE = 'application/json+a2ui';

interface EvalCompareFields {
evalStatus?: number;
failedMetric?: string;
evalScore?: number;
evalThreshold?: number;
actualInvocationToolUses?: any[];
expectedInvocationToolUses?: any[];
actualFinalResponse?: string;
expectedFinalResponse?: string;
}

function fixBase64String(base64: string): string {
// Replace URL-safe characters if they exist
base64 = base64.replace(/-/g, '+').replace(/_/g, '/');
Expand Down Expand Up @@ -119,6 +130,7 @@ class CustomPaginatorIntl extends MatPaginatorIntl {

const BIDI_STREAMING_RESTART_WARNING =
'Restarting bidirectional streaming is not currently supported. Please refresh the page or start a new session.';
const EVAL_SYNTHETIC_SESSION_PREFIX = '___eval___session___';

@Component({
selector: 'app-chat',
Expand All @@ -141,7 +153,6 @@ const BIDI_STREAMING_RESTART_WARNING =
MatSlideToggle,
MatDivider,
MatCard,
MatFabButton,
ResizableBottomDirective,
TraceEventComponent,
AsyncPipe,
Expand Down Expand Up @@ -438,12 +449,15 @@ export class ChatComponent implements OnInit, AfterViewInit, OnDestroy {
const queryParams = this.activatedRoute.snapshot.queryParams;
const sessionUrl = queryParams['session'];
const userUrl = queryParams['userId'];
const isEvalSyntheticSession =
typeof sessionUrl === 'string' &&
sessionUrl.startsWith(EVAL_SYNTHETIC_SESSION_PREFIX);

if (userUrl) {
this.userId = userUrl;
}

if (!sessionUrlEnabled || !sessionUrl) {
if (!sessionUrlEnabled || !sessionUrl || isEvalSyntheticSession) {
this.createSessionAndReset();

return;
Expand Down Expand Up @@ -973,14 +987,7 @@ export class ChatComponent implements OnInit, AfterViewInit, OnDestroy {

let message: any = {
role,
evalStatus: e?.evalStatus,
failedMetric: e?.failedMetric,
evalScore: e?.evalScore,
evalThreshold: e?.evalThreshold,
actualInvocationToolUses: e?.actualInvocationToolUses,
expectedInvocationToolUses: e?.expectedInvocationToolUses,
actualFinalResponse: e?.actualFinalResponse,
expectedFinalResponse: e?.expectedFinalResponse,
...this.mapEvalCompareFields(e),
invocationIndex: invocationIndex !== undefined ? invocationIndex :
undefined,
finalResponsePartIndex:
Expand Down Expand Up @@ -1092,6 +1099,27 @@ export class ChatComponent implements OnInit, AfterViewInit, OnDestroy {
return `data:${mimeType};base64,${fixedBase64Data}`;
}

// Centralized mapper keeps eval compare fields consistent
// across streamed and hydrated message construction paths.
private mapEvalCompareFields(event: any): EvalCompareFields {
return {
evalStatus: event?.evalStatus,
failedMetric: event?.failedMetric,
evalScore: event?.evalScore,
evalThreshold: event?.evalThreshold,
actualInvocationToolUses: event?.actualInvocationToolUses,
expectedInvocationToolUses: event?.expectedInvocationToolUses,
actualFinalResponse: event?.actualFinalResponse,
expectedFinalResponse: event?.expectedFinalResponse,
};
}

private addEvalFieldsToMessage(event: any, message: any) {
if (message.role !== 'bot') return;

Object.assign(message, this.mapEvalCompareFields(event));
}

private processPartIntoMessage(part: any, event: any, message: any) {
if (!part) return;

Expand Down Expand Up @@ -1586,6 +1614,7 @@ export class ChatComponent implements OnInit, AfterViewInit, OnDestroy {
role: 'bot',
eventId: event.id
};
this.addEvalFieldsToMessage(event, botMessage);

partsToProcess.forEach((part: any) => {
if (isA2aResponse && this.isA2uiDataPart(part)) {
Expand Down Expand Up @@ -1654,6 +1683,7 @@ export class ChatComponent implements OnInit, AfterViewInit, OnDestroy {
role: 'bot',
eventId: event.id
};
this.addEvalFieldsToMessage(event, botMessage);

event.content?.parts?.forEach((part: any) => {
this.processPartIntoMessage(part, event, botMessage);
Expand Down
8 changes: 4 additions & 4 deletions src/app/components/eval-tab/eval-tab.component.html
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,12 @@
}
@if (showEvalHistory()) {
<div>
@for (evalResult of getEvalHistoryOfCurrentSetSorted(); track evalResult) {
@for (evalResult of evalHistorySorted; track evalResult.timestamp) {
<div>
<div class="status-card">
<div class="status-card__overview">
<div class="status-card__info">
<div class="status-card__timestamp">{{ formatTimestamp(evalResult.timestamp) }}</div>
<div class="status-card__timestamp">{{ evalResult.formattedTimestamp }}</div>
<div class="status-card__summary">
<span class="status-card__passed">{{ getPassCountForCurrentResult(evalResult.evaluationResults.evaluationResults) }} {{ i18n.passedSuffix }}</span>
@if (getFailCountForCurrentResult(evalResult.evaluationResults.evaluationResults) > 0) {
Expand All @@ -126,9 +126,9 @@
</div>
}
</div>
@if (getEvalMetrics(evalResult)) {
@if (evalResult.metrics.length > 0) {
<div class="status-card__metrics">
@for (evalMetric of getEvalMetrics(evalResult); track evalMetric) {
@for (evalMetric of evalResult.metrics; track evalMetric.metricName) {
<span class="status-card__metric"> {{ evalMetric.metricName }}:
{{ evalMetric.threshold }}
</span>
Expand Down
Loading