Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions internal/app/cleanup.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package app
import (
"context"
"fmt"
"time"

"github.com/evalops/asb/internal/core"
)
Expand Down Expand Up @@ -178,12 +179,15 @@ func (s *Service) transitionGrantState(ctx context.Context, session *core.Sessio
if err != nil {
return fmt.Errorf("transition grant %q to %q: resolve connector: %w", grant.ID, state, err)
}
if err := connector.Revoke(ctx, core.RevokeRequest{
startedAt := time.Now()
err = connector.Revoke(ctx, core.RevokeRequest{
Session: session,
Grant: grant,
Artifact: artifact,
Reason: reason,
}); err != nil {
})
s.metrics.recordConnectorOperation(connector.Kind(), "revoke", time.Since(startedAt), err)
if err != nil {
return fmt.Errorf("transition grant %q to %q: revoke connector state: %w", grant.ID, state, err)
}
}
Expand Down
87 changes: 67 additions & 20 deletions internal/app/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,18 @@ type MetricsOptions struct {

// Metrics records ASB domain-level counters, gauges, and histograms.
type Metrics struct {
sessionsActive *prometheus.GaugeVec
sessionsTotal *prometheus.CounterVec
grantsTotal *prometheus.CounterVec
grantTTL prometheus.Histogram
approvalsTotal *prometheus.CounterVec
approvalWait *prometheus.HistogramVec
policyEval *prometheus.CounterVec
budgetExhaust *prometheus.CounterVec
artifactsActive *prometheus.GaugeVec
artifactUnwraps *prometheus.CounterVec
sessionsActive *prometheus.GaugeVec
sessionsTotal *prometheus.CounterVec
grantsTotal *prometheus.CounterVec
grantTTL prometheus.Histogram
approvalsTotal *prometheus.CounterVec
approvalWait *prometheus.HistogramVec
policyEval *prometheus.CounterVec
budgetExhaust *prometheus.CounterVec
artifactsActive *prometheus.GaugeVec
artifactUnwraps *prometheus.CounterVec
connectorOps *prometheus.CounterVec
connectorLatency *prometheus.HistogramVec
}

// NewMetrics creates Prometheus collectors for ASB domain metrics.
Expand Down Expand Up @@ -177,17 +179,48 @@ func NewMetrics(serviceName string, opts MetricsOptions) (*Metrics, error) {
return nil, err
}

connectorOps, err := registerCounterVec(
opts.Registerer,
prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: prefix + "_connector_operations_total",
Help: "Count of ASB connector operations by connector kind, operation, and outcome.",
},
[]string{"connector_kind", "operation", "outcome"},
),
)
if err != nil {
return nil, err
}

connectorLatency, err := registerHistogramVec(
opts.Registerer,
prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: prefix + "_connector_operation_seconds",
Help: "Latency of ASB connector operations by connector kind and operation.",
Buckets: prometheus.DefBuckets,
},
[]string{"connector_kind", "operation"},
),
)
if err != nil {
return nil, err
}

return &Metrics{
sessionsActive: sessionsActive,
sessionsTotal: sessionsTotal,
grantsTotal: grantsTotal,
grantTTL: grantTTL,
approvalsTotal: approvalsTotal,
approvalWait: approvalWait,
policyEval: policyEval,
budgetExhaust: budgetExhaust,
artifactsActive: artifactsActive,
artifactUnwraps: artifactUnwraps,
sessionsActive: sessionsActive,
sessionsTotal: sessionsTotal,
grantsTotal: grantsTotal,
grantTTL: grantTTL,
approvalsTotal: approvalsTotal,
approvalWait: approvalWait,
policyEval: policyEval,
budgetExhaust: budgetExhaust,
artifactsActive: artifactsActive,
artifactUnwraps: artifactUnwraps,
connectorOps: connectorOps,
connectorLatency: connectorLatency,
}, nil
}

Expand Down Expand Up @@ -299,6 +332,20 @@ func (metrics *Metrics) recordArtifactUnwrap(connectorKind string) {
metrics.artifactUnwraps.WithLabelValues(labelOrUnknown(connectorKind)).Inc()
}

func (metrics *Metrics) recordConnectorOperation(connectorKind string, operation string, duration time.Duration, err error) {
if metrics == nil {
return
}
outcome := "success"
if err != nil {
outcome = "error"
}
connectorKind = labelOrUnknown(connectorKind)
operation = labelOrUnknown(operation)
metrics.connectorOps.WithLabelValues(connectorKind, operation, outcome).Inc()
metrics.connectorLatency.WithLabelValues(connectorKind, operation).Observe(duration.Seconds())
}

func labelOrUnknown(value string) string {
value = strings.TrimSpace(value)
if value == "" {
Expand Down
109 changes: 109 additions & 0 deletions internal/app/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,12 @@ func TestServiceMetrics_CreateSessionAndIssueGrant(t *testing.T) {
if got := metricValueWithLabels(families, "asb_artifacts_active", map[string]string{"connector_kind": "github"}); got != 1 {
t.Fatalf("active github artifacts = %v, want 1", got)
}
if got := metricValueWithLabels(families, "asb_connector_operations_total", map[string]string{"connector_kind": "github", "operation": "issue", "outcome": "success"}); got != 1 {
t.Fatalf("github issue connector ops = %v, want 1", got)
}
if got := histogramCountWithLabels(families, "asb_connector_operation_seconds", map[string]string{"connector_kind": "github", "operation": "issue"}); got != 1 {
t.Fatalf("github issue connector latency count = %d, want 1", got)
}
if got := histogramCountWithLabels(families, "asb_grant_ttl_seconds", nil); got != 1 {
t.Fatalf("grant TTL histogram count = %d, want 1", got)
}
Expand Down Expand Up @@ -376,6 +382,12 @@ func TestServiceMetrics_RevokeSession(t *testing.T) {
if got := metricValueWithLabels(families, "asb_artifacts_active", map[string]string{"connector_kind": "github"}); got != 0 {
t.Fatalf("active github artifacts = %v, want 0", got)
}
if got := metricValueWithLabels(families, "asb_connector_operations_total", map[string]string{"connector_kind": "github", "operation": "revoke", "outcome": "success"}); got != 1 {
t.Fatalf("github revoke connector ops = %v, want 1", got)
}
if got := histogramCountWithLabels(families, "asb_connector_operation_seconds", map[string]string{"connector_kind": "github", "operation": "revoke"}); got != 1 {
t.Fatalf("github revoke connector latency count = %d, want 1", got)
}
grant, err := repo.GetGrant(ctx, grantResp.GrantID)
if err != nil {
t.Fatalf("GetGrant() error = %v", err)
Expand Down Expand Up @@ -779,6 +791,103 @@ func TestServiceMetrics_UnwrapArtifact(t *testing.T) {
}
}

func TestServiceMetrics_ExecuteGitHubProxy(t *testing.T) {
t.Parallel()

ctx := context.Background()
now := testNow()
registry := prometheus.NewRegistry()
metrics, err := app.NewMetrics("asb", app.MetricsOptions{
Registerer: registry,
})
if err != nil {
t.Fatalf("NewMetrics() error = %v", err)
}

repo := memstore.NewRepository()
runtimeStore := memstore.NewRuntimeStore()
svc, err := app.NewService(app.Config{
Clock: fixedClock(now),
Metrics: metrics,
Repository: repo,
Verifier: fakeVerifier{identity: workloadIdentity()},
SessionTokens: mustNewSigner(t),
Policy: stubPolicyEngine{},
Tools: stubToolRegistry{},
Connectors: fakeConnectorResolver{connector: &fakeConnector{kind: "github"}},
Runtime: runtimeStore,
GitHubProxy: &fakeGitHubProxyExecutor{payload: []byte(`{"files":[]}`)},
})
if err != nil {
t.Fatalf("NewService() error = %v", err)
}

session := &core.Session{
ID: "sess_proxy_metrics",
TenantID: "t_acme",
AgentID: "agent_pr_reviewer",
RunID: "run_proxy_metrics",
State: core.SessionStateActive,
ExpiresAt: now.Add(10 * time.Minute),
CreatedAt: now,
}
artifactID := "art_proxy_metrics"
grant := &core.Grant{
ID: "gr_proxy_metrics",
TenantID: "t_acme",
SessionID: session.ID,
Tool: "github",
Capability: "repo.read",
ResourceRef: "github:repo:acme/widgets",
State: core.GrantStateIssued,
ArtifactRef: &artifactID,
CreatedAt: now,
ExpiresAt: now.Add(10 * time.Minute),
}
artifact := &core.Artifact{
ID: artifactID,
TenantID: "t_acme",
SessionID: session.ID,
GrantID: grant.ID,
Handle: "ph_proxy_metrics",
Kind: core.ArtifactKindProxyHandle,
ConnectorKind: "github",
State: core.ArtifactStateIssued,
ExpiresAt: now.Add(10 * time.Minute),
CreatedAt: now,
Metadata: map[string]string{
"operations": "pull_request_files",
},
}
if err := repo.SaveSession(ctx, session); err != nil {
t.Fatalf("SaveSession() error = %v", err)
}
if err := repo.SaveGrant(ctx, grant); err != nil {
t.Fatalf("SaveGrant() error = %v", err)
}
if err := repo.SaveArtifact(ctx, artifact); err != nil {
t.Fatalf("SaveArtifact() error = %v", err)
}
if err := runtimeStore.RegisterProxyHandle(ctx, artifact.Handle, core.ProxyBudget{}, artifact.ExpiresAt); err != nil {
t.Fatalf("RegisterProxyHandle() error = %v", err)
}

if _, err := svc.ExecuteGitHubProxy(ctx, &core.ExecuteGitHubProxyRequest{
ProxyHandle: artifact.Handle,
Operation: "pull_request_files",
}); err != nil {
t.Fatalf("ExecuteGitHubProxy() error = %v", err)
}

families := mustGatherMetrics(t, registry)
if got := metricValueWithLabels(families, "asb_connector_operations_total", map[string]string{"connector_kind": "github", "operation": "pull_request_files", "outcome": "success"}); got != 1 {
t.Fatalf("github proxy connector ops = %v, want 1", got)
}
if got := histogramCountWithLabels(families, "asb_connector_operation_seconds", map[string]string{"connector_kind": "github", "operation": "pull_request_files"}); got != 1 {
t.Fatalf("github proxy connector latency count = %d, want 1", got)
}
}

func mustGatherMetrics(t *testing.T, gatherer prometheus.Gatherer) []*dto.MetricFamily {
t.Helper()

Expand Down
4 changes: 4 additions & 0 deletions internal/app/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,9 @@ func (s *Service) ExecuteGitHubProxy(ctx context.Context, req *core.ExecuteGitHu
}()
}

startedAt := time.Now()
payload, err := s.githubProxy.Execute(ctx, artifact, req.Operation, req.Params)
s.metrics.recordConnectorOperation(artifact.ConnectorKind, req.Operation, time.Since(startedAt), err)
responseBytes = int64(len(payload))
if err != nil {
return nil, fmt.Errorf("execute github proxy %q operation %q: execute upstream request: %w", req.ProxyHandle, req.Operation, err)
Expand Down Expand Up @@ -698,11 +700,13 @@ func (s *Service) UnwrapArtifact(ctx context.Context, req *core.UnwrapArtifactRe
}

func (s *Service) issueGrant(ctx context.Context, session *core.Session, grant *core.Grant, resource core.ResourceDescriptor, connector core.Connector) (*core.RequestGrantResponse, error) {
startedAt := time.Now()
artifact, err := connector.Issue(ctx, core.IssueRequest{
Session: session,
Grant: grant,
Resource: resource,
})
s.metrics.recordConnectorOperation(connector.Kind(), "issue", time.Since(startedAt), err)
if err != nil {
return nil, fmt.Errorf("issue grant %q: connector %q issue: %w", grant.ID, connector.Kind(), err)
}
Expand Down
Loading