Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 82 additions & 21 deletions src/main/java/dev/braintrust/api/BraintrustApiClient.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,14 @@ Optional<Prompt> getPrompt(
Optional<Function> getFunction(
@Nonnull String projectName, @Nonnull String slug, @Nullable String version);

/**
* Get a function by its ID.
*
* @param functionId the ID of the function
* @return the function if found
*/
Optional<Function> getFunctionById(@Nonnull String functionId);

/**
* Invoke a function (scorer, prompt, or tool) by its ID.
*
Expand All @@ -86,6 +94,15 @@ Optional<Function> getFunction(
*/
Object invokeFunction(@Nonnull String functionId, @Nonnull FunctionInvokeRequest request);

/**
* Execute a BTQL (Braintrust Query Language) query. Supports both BTQL pipe syntax and standard
* SQL syntax.
*
* @param query the BTQL/SQL query string
* @return the query result containing rows of data
*/
BtqlQueryResponse btqlQuery(@Nonnull String query);

static BraintrustApiClient of(BraintrustConfig config) {
return new HttpImpl(config);
}
Expand Down Expand Up @@ -351,6 +368,21 @@ public Optional<Function> getFunction(
}
}

@Override
public Optional<Function> getFunctionById(@Nonnull String functionId) {
Objects.requireNonNull(functionId, "functionId must not be null");
try {
String path = "/v1/function/" + functionId;
return Optional.of(getAsync(path, Function.class).get());
} catch (InterruptedException | ExecutionException e) {
if (e.getCause() instanceof ApiException apiEx
&& apiEx.getMessage().contains("404")) {
return Optional.empty();
}
throw new RuntimeException(e);
}
}

@Override
public Object invokeFunction(
@Nonnull String functionId, @Nonnull FunctionInvokeRequest request) {
Expand All @@ -364,6 +396,17 @@ public Object invokeFunction(
}
}

@Override
public BtqlQueryResponse btqlQuery(@Nonnull String query) {
Objects.requireNonNull(query, "query must not be null");
try {
var request = new BtqlQueryRequest(query);
return postAsync("/btql", request, BtqlQueryResponse.class).get();
} catch (InterruptedException | ExecutionException e) {
throw new ApiException("Failed to execute BTQL query", e);
}
}

private <T> CompletableFuture<T> getAsync(String path, Class<T> responseType) {
var request =
HttpRequest.newBuilder()
Expand Down Expand Up @@ -661,11 +704,21 @@ public Optional<Function> getFunction(
throw new RuntimeException("will not be invoked");
}

@Override
public Optional<Function> getFunctionById(@Nonnull String functionId) {
throw new RuntimeException("will not be invoked");
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this test double is being replaced w/ VCR (mostly done), so not implementing these functions

}

@Override
public Object invokeFunction(
@Nonnull String functionId, @Nonnull FunctionInvokeRequest request) {
throw new RuntimeException("will not be invoked");
}

@Override
public BtqlQueryResponse btqlQuery(@Nonnull String query) {
throw new RuntimeException("will not be invoked");
}
}

// Request/Response DTOs
Expand Down Expand Up @@ -794,50 +847,58 @@ record FunctionListResponse(List<Function> objects) {}
*
* <p>For remote Python/TypeScript scorers, the scorer handler parameters (input, output,
* expected, metadata) must be wrapped in the outer input field.
*
* <p>The parent field enables distributed tracing by linking the remote function's spans to the
* caller's span context. It can be either a base64-encoded SpanComponents string or an object
* with object_type, object_id, and row_ids.
*/
record FunctionInvokeRequest(@Nullable Object input, @Nullable String version) {
record FunctionInvokeRequest(
@Nullable Object input, @Nullable String version, @Nullable Object parent) {

/** Create a simple invoke request with just input */
public static FunctionInvokeRequest of(Object input) {
return new FunctionInvokeRequest(input, null);
return new FunctionInvokeRequest(input, null, null);
}

/** Create a simple invoke request with input and version */
public static FunctionInvokeRequest of(Object input, @Nullable String version) {
return new FunctionInvokeRequest(input, version);
}

/**
* Create an invoke request for a scorer with input, output, expected, and metadata. This
* maps to the standard scorer handler signature: handler(input, output, expected, metadata)
*
* <p>The scorer args are wrapped in the outer input field as required by the invoke API.
*/
public static FunctionInvokeRequest forScorer(
Object input, Object output, Object expected, Object metadata) {
return forScorer(input, output, expected, metadata, null);
return new FunctionInvokeRequest(input, version, null);
}

/**
* Create an invoke request for a scorer with input, output, expected, metadata, and
* version. This maps to the standard scorer handler signature: handler(input, output,
* expected, metadata)
* Create an invoke request for a scorer with distributed tracing support.
*
* <p>The scorer args are wrapped in the outer input field as required by the invoke API.
* @param input the input to the task being scored
* @param output the output from the task being scored
* @param expected the expected output
* @param metadata additional metadata
* @param version optional function version
* @param parent optional parent for distributed tracing - can be a base64-encoded
* SpanComponents string or a Map with object_type, object_id, and row_ids
*/
public static FunctionInvokeRequest forScorer(
public static FunctionInvokeRequest of(
Object input,
Object output,
Object expected,
Object metadata,
@Nullable String version) {
@Nullable String version,
@Nullable Object parent) {
// Wrap scorer args in an inner map that becomes the outer "input" field
var scorerArgs = new java.util.LinkedHashMap<String, Object>();
scorerArgs.put("input", input);
scorerArgs.put("output", output);
scorerArgs.put("expected", expected);
scorerArgs.put("metadata", metadata);
return new FunctionInvokeRequest(scorerArgs, version);
return new FunctionInvokeRequest(scorerArgs, version, parent);
}
}

/** Request body for BTQL queries. */
record BtqlQueryRequest(String query) {}

/**
* Response from a BTQL query. The data field contains the rows returned by the query, where
* each row is a map of column names to values.
*/
record BtqlQueryResponse(List<Map<String, Object>> data) {}
}
18 changes: 0 additions & 18 deletions src/main/java/dev/braintrust/config/BraintrustConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ public final class BraintrustConfig extends BaseConfig {
private final boolean enableTraceConsoleLog =
getConfig("BRAINTRUST_ENABLE_TRACE_CONSOLE_LOG", false);
private final boolean debug = getConfig("BRAINTRUST_DEBUG", false);
private final boolean experimentalOtelLogs = getConfig("BRAINTRUST_X_OTEL_LOGS", false);
private final Duration requestTimeout =
Duration.ofSeconds(getConfig("BRAINTRUST_REQUEST_TIMEOUT", 30));

Expand All @@ -47,10 +46,6 @@ public final class BraintrustConfig extends BaseConfig {
/** Custom X509 trust manager for OTLP exporter. Builder-only field, not backed by envars. */
private final X509TrustManager x509TrustManager;

/** Setting for unit testing. Do not use in production. */
private final boolean exportSpansInMemoryForUnitTest =
getConfig("BRAINTRUST_JAVA_EXPORT_SPANS_IN_MEMORY_FOR_UNIT_TEST", false);

/** CORS origins to allow when running remote eval devserver */
private final String devserverCorsOriginWhitelistCsv =
getConfig(
Expand Down Expand Up @@ -192,19 +187,6 @@ public Builder requestTimeout(Duration value) {
return this;
}

// hiding visibility. only used for testing
Builder experimentalOtelLogs(boolean value) {
envOverrides.put("BRAINTRUST_X_OTEL_LOGS", String.valueOf(value));
return this;
}

// only used for testing
public Builder exportSpansInMemoryForUnitTest(boolean value) {
envOverrides.put(
"BRAINTRUST_JAVA_EXPORT_SPANS_IN_MEMORY_FOR_UNIT_TEST", String.valueOf(value));
return this;
}

public Builder sslContext(SSLContext value) {
this.sslContext = value;
return this;
Expand Down
9 changes: 4 additions & 5 deletions src/main/java/dev/braintrust/devserver/Devserver.java
Original file line number Diff line number Diff line change
Expand Up @@ -594,10 +594,12 @@ private void setScoreSpanAttributes(
scoreSpanAttrs.put("generation", braintrustGeneration);
}

var scoresJson = json(scorerScores);
scoreSpan
.setAttribute(PARENT, braintrustParent.toParentValue())
.setAttribute("braintrust.span_attributes", json(scoreSpanAttrs))
.setAttribute("braintrust.output_json", json(scorerScores));
.setAttribute("braintrust.output_json", scoresJson)
.setAttribute("braintrust.scores", scoresJson);
}

private void sendSSEEvent(OutputStream os, String eventType, String data) throws IOException {
Expand Down Expand Up @@ -1075,10 +1077,7 @@ private static Scorer<Object, Object> resolveRemoteScorer(
}

return new ScorerBrainstoreImpl<>(
apiClient,
functionIdSpec.getFunctionId(),
remoteScorer.getName(),
functionIdSpec.getVersion());
apiClient, functionIdSpec.getFunctionId(), functionIdSpec.getVersion());
}

public static class Builder {
Expand Down
39 changes: 20 additions & 19 deletions src/main/java/dev/braintrust/eval/Eval.java
Original file line number Diff line number Diff line change
Expand Up @@ -116,32 +116,33 @@ private void evalOne(String experimentId, DatasetCase<INPUT, OUTPUT> datasetCase
throw new RuntimeException(e);
}
}
{ // run scorers
// run scorers - one span per scorer
for (var scorer : scorers) {
var scoreSpan =
tracer.spanBuilder("score")
.setAttribute(PARENT, "experiment_id:" + experimentId)
.setAttribute(
"braintrust.span_attributes", json(Map.of("type", "score")))
.startSpan();
try (var unused =
BraintrustContext.ofExperiment(experimentId, scoreSpan).makeCurrent()) {
var scores = scorer.score(taskResult);
// linked map to preserve ordering. Not in the spec but nice user experience
final Map<String, Double> nameToScore = new LinkedHashMap<>();
scorers.forEach(
scorer -> {
var scores = scorer.score(taskResult);
scores.forEach(
score -> {
if (score.value() < 0.0 || score.value() > 1.0) {
throw new RuntimeException(
"score must be between 0 and 1: %s : %s"
.formatted(
scorer.getName(), score));
}
nameToScore.put(score.name(), score.value());
});
});
scoreSpan.setAttribute("braintrust.scores", json(nameToScore));
final Map<String, Double> scorerScores = new LinkedHashMap<>();
for (var score : scores) {
if (score.value() < 0.0 || score.value() > 1.0) {
throw new RuntimeException(
"score must be between 0 and 1: %s : %s"
.formatted(scorer.getName(), score));
}
scorerScores.put(score.name(), score.value());
}
// Set span attributes with scorer name
Map<String, Object> spanAttrs = new LinkedHashMap<>();
spanAttrs.put("type", "score");
spanAttrs.put("name", scorer.getName());
scoreSpan.setAttribute("braintrust.span_attributes", json(spanAttrs));
var scoresJson = json(scorerScores);
scoreSpan.setAttribute("braintrust.output_json", scoresJson);
scoreSpan.setAttribute("braintrust.scores", scoresJson);
} finally {
scoreSpan.end();
}
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/dev/braintrust/eval/Scorer.java
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,6 @@ static <INPUT, OUTPUT> Scorer<INPUT, OUTPUT> fetchFromBraintrust(
+ ", slug="
+ scorerSlug));

return new ScorerBrainstoreImpl<>(apiClient, function.id(), function.name(), version);
return new ScorerBrainstoreImpl<>(apiClient, function.id(), version);
}
}
Loading