diff --git a/tests/integration_frameworks/utils/vcr-cassettes/openai/test_chat_completion_stream_False_openai_chat_completions_post_424f6218.json b/tests/integration_frameworks/utils/vcr-cassettes/openai/test_chat_completion_stream_False_openai_chat_completions_post_424f6218.json
new file mode 100644
index 00000000000..269368321c5
--- /dev/null
+++ b/tests/integration_frameworks/utils/vcr-cassettes/openai/test_chat_completion_stream_False_openai_chat_completions_post_424f6218.json
@@ -0,0 +1,57 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/chat/completions",
+    "headers": {
+      "User-Agent": "OpenAIClientImpl/Java unknown",
+      "X-Stainless-Arch": "arm64",
+      "X-Stainless-Lang": "java",
+      "X-Stainless-OS": "Linux",
+      "X-Stainless-OS-Version": "6.12.65-linuxkit",
+      "X-Stainless-Package-Version": "unknown",
+      "X-Stainless-Retry-Count": "0",
+      "X-Stainless-Runtime": "JRE",
+      "X-Stainless-Runtime-Version": "17.0.9",
+      "X-Stainless-Read-Timeout": "600",
+      "X-Stainless-Timeout": "600",
+      "Content-Type": "application/json",
+      "Content-Length": "96",
+      "Connection": "Keep-Alive",
+      "Accept-Encoding": "gzip"
+    },
+    "body": "{\"messages\":[{\"content\":\"Hello OpenAI!\",\"role\":\"user\"}],\"model\":\"gpt-3.5-turbo\",\"max_tokens\":35}"
+  },
+  "response": {
+    "status": {
+      "code": 200,
+      "message": "OK"
+    },
+    "headers": {
+      "Date": "Fri, 20 Feb 2026 00:00:43 GMT",
+      "Content-Type": "application/json",
+      "Transfer-Encoding": "chunked",
+      "Connection": "keep-alive",
+      "access-control-expose-headers": "X-Request-ID",
+      "openai-processing-ms": "796",
+      "openai-project": "proj_gt6TQZPRbZfoY2J9AQlEJMpd",
+      "openai-version": "2020-10-01",
+      "Server": "cloudflare",
+      "x-ratelimit-limit-requests": "10000",
+      "x-ratelimit-limit-tokens": "50000000",
+      "x-ratelimit-remaining-requests": "9999",
+      "x-ratelimit-remaining-tokens": "49999994",
+      "x-ratelimit-reset-requests": "6ms",
+      "x-ratelimit-reset-tokens": "0s",
+      "x-request-id": "req_ac2897c2c6e840a0a0e12c7684ecfaae",
+      "x-openai-proxy-wasm": "v0.1",
+      "cf-cache-status": "DYNAMIC",
+      "set-cookie": "__cf_bm=gxZLEchqKMKTztQgXT8SUS624bU_sDykIXEy5gIFT_0-1771545642.824098-1.0.1.1-QpgexMX9Wbte8oAZv3LqUJZ1h0k6pkjTClR8qrer4op9P_ZpyvkkAKda8jZv24esL4dcf9wH2J0QFkZwslpRyBNi17Ws.QFAgt18DyHO8dv4c_nHnZ_4GYCIU1r8ldNt; HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Fri, 20 Feb 2026 00:30:43 GMT",
+      "Strict-Transport-Security": "max-age=31536000; includeSubDomains; preload",
+      "X-Content-Type-Options": "nosniff",
+      "Content-Encoding": "gzip",
+      "CF-RAY": "9d09b9aba85630ad-SEA",
+      "alt-svc": "h3=\":443\"; ma=86400"
+    },
+    "body": "{\n  \"id\": \"chatcmpl-DB8ElAOKW90QXI3wOJx1LBxBOdfgf\",\n  \"object\": \"chat.completion\",\n  \"created\": 1771545643,\n  \"model\": \"gpt-3.5-turbo-0125\",\n  \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\": \"assistant\",\n        \"content\": \"Hello! How can I assist you today?\",\n        \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 11,\n    \"completion_tokens\": 9,\n    \"total_tokens\": 20,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\": {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\": 0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\": \"default\",\n  \"system_fingerprint\": null\n}\n"
+  }
+}
\ No newline at end of file
diff --git a/tests/integration_frameworks/utils/vcr-cassettes/openai/test_chat_completion_tool_call_stream_False_openai_chat_completions_post_ec6364ef.json b/tests/integration_frameworks/utils/vcr-cassettes/openai/test_chat_completion_tool_call_stream_False_openai_chat_completions_post_ec6364ef.json
new file mode 100644
index 00000000000..9e4ed325186
--- /dev/null
+++ b/tests/integration_frameworks/utils/vcr-cassettes/openai/test_chat_completion_tool_call_stream_False_openai_chat_completions_post_ec6364ef.json
@@ -0,0 +1,57 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/chat/completions",
+    "headers": {
+      "User-Agent": "OpenAIClientImpl/Java unknown",
+      "X-Stainless-Arch": "arm64",
+      "X-Stainless-Lang": "java",
+      "X-Stainless-OS": "Linux",
+      "X-Stainless-OS-Version": "6.12.65-linuxkit",
+      "X-Stainless-Package-Version": "unknown",
+      "X-Stainless-Retry-Count": "0",
+      "X-Stainless-Runtime": "JRE",
+      "X-Stainless-Runtime-Version": "17.0.9",
+      "X-Stainless-Read-Timeout": "600",
+      "X-Stainless-Timeout": "600",
+      "Content-Type": "application/json",
+      "Content-Length": "537",
+      "Connection": "Keep-Alive",
+      "Accept-Encoding": "gzip"
+    },
+    "body": "{\"messages\":[{\"content\":\"Bob is a student at Stanford University. He is studying computer science.\",\"role\":\"user\"}],\"model\":\"gpt-3.5-turbo\",\"tool_choice\":\"auto\",\"tools\":[{\"function\":{\"name\":\"extract_student_info\",\"description\":\"Get the student information from the body of the input text\",\"parameters\":{\"type\":\"object\",\"properties\":{\"major\":{\"description\":\"Major subject.\",\"type\":\"string\"},\"school\":{\"description\":\"The university name.\",\"type\":\"string\"},\"name\":{\"description\":\"Name of the person\",\"type\":\"string\"}}}},\"type\":\"function\"}]}"
+  },
+  "response": {
+    "status": {
+      "code": 200,
+      "message": "OK"
+    },
+    "headers": {
+      "Date": "Fri, 20 Feb 2026 00:01:06 GMT",
+      "Content-Type": "application/json",
+      "Transfer-Encoding": "chunked",
+      "Connection": "keep-alive",
+      "access-control-expose-headers": "X-Request-ID",
+      "openai-processing-ms": "463",
+      "openai-project": "proj_gt6TQZPRbZfoY2J9AQlEJMpd",
+      "openai-version": "2020-10-01",
+      "Server": "cloudflare",
+      "x-ratelimit-limit-requests": "10000",
+      "x-ratelimit-limit-tokens": "50000000",
+      "x-ratelimit-remaining-requests": "9999",
+      "x-ratelimit-remaining-tokens": "49999979",
+      "x-ratelimit-reset-requests": "6ms",
+      "x-ratelimit-reset-tokens": "0s",
+      "x-request-id": "req_1503996b1cea4bca91a0697db9eec68e",
+      "x-openai-proxy-wasm": "v0.1",
+      "cf-cache-status": "DYNAMIC",
+      "set-cookie": "__cf_bm=Y.uMUvKXcR9Yk6xXzcExCuLDDxpKWoS90CZWc5aj_78-1771545665.585499-1.0.1.1-qZlmanRqlTnlgnA_UKxWN6ccJeGJMZ5cU8hQn4XOsKn2_XJnm1IuHxGV8ejtnSlnbsmSfy6dmFNMOlDANMzF9kJG1U4ZD82BHh6ZqUZnE5Xgyn07s46Z66pomW1vQ4BI; HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Fri, 20 Feb 2026 00:31:06 GMT",
+      "Strict-Transport-Security": "max-age=31536000; includeSubDomains; preload",
+      "X-Content-Type-Options": "nosniff",
+      "Content-Encoding": "gzip",
+      "CF-RAY": "9d09ba39ea9bdf09-SEA",
+      "alt-svc": "h3=\":443\"; ma=86400"
+    },
+    "body": "{\n  \"id\": \"chatcmpl-DB8F79pRqLfT6pTKajxAu4MpCpMZe\",\n  \"object\": \"chat.completion\",\n  \"created\": 1771545665,\n  \"model\": \"gpt-3.5-turbo-0125\",\n  \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\": \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n            \"id\": \"call_NAE9VXAzyT6ftmPl4lmeSb4E\",\n            \"type\": \"function\",\n            \"function\": {\n              \"name\": \"extract_student_info\",\n              \"arguments\": \"{\\\"name\\\":\\\"Bob\\\",\\\"school\\\":\\\"Stanford University\\\",\\\"major\\\":\\\"computer science\\\"}\"\n            }\n          }\n        ],\n        \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"tool_calls\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 89,\n    \"completion_tokens\": 26,\n    \"total_tokens\": 115,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\": {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\": 0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\": \"default\",\n  \"system_fingerprint\": null\n}\n"
+  }
+}
\ No newline at end of file
diff --git a/tests/integration_frameworks/utils/vcr-cassettes/openai/test_responses_create_stream_False_openai_responses_post_661cf6eb.json b/tests/integration_frameworks/utils/vcr-cassettes/openai/test_responses_create_stream_False_openai_responses_post_661cf6eb.json
new file mode 100644
index 00000000000..3dac61de8ec
--- /dev/null
+++ b/tests/integration_frameworks/utils/vcr-cassettes/openai/test_responses_create_stream_False_openai_responses_post_661cf6eb.json
@@ -0,0 +1,49 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/responses",
+    "headers": {
+      "User-Agent": "OpenAIClientImpl/Java unknown",
+      "X-Stainless-Arch": "arm64",
+      "X-Stainless-Lang": "java",
+      "X-Stainless-OS": "Linux",
+      "X-Stainless-OS-Version": "6.12.65-linuxkit",
+      "X-Stainless-Package-Version": "unknown",
+      "X-Stainless-Retry-Count": "0",
+      "X-Stainless-Runtime": "JRE",
+      "X-Stainless-Runtime-Version": "17.0.9",
+      "X-Stainless-Read-Timeout": "600",
+      "X-Stainless-Timeout": "600",
+      "Content-Type": "application/json",
+      "Content-Length": "151",
+      "Connection": "Keep-Alive",
+      "Accept-Encoding": "gzip"
+    },
+    "body": "{\"input\":\"Where is the nearest Dunkin' Donuts?\",\"instructions\":\"Talk with a Boston accent.\",\"max_output_tokens\":50,\"model\":\"gpt-4.1\",\"temperature\":0.1}"
+  },
+  "response": {
+    "status": {
+      "code": 200,
+      "message": "OK"
+    },
+    "headers": {
+      "Date": "Fri, 20 Feb 2026 00:01:31 GMT",
+      "Content-Type": "application/json",
+      "Transfer-Encoding": "chunked",
+      "Connection": "keep-alive",
+      "Server": "cloudflare",
+      "openai-version": "2020-10-01",
+      "openai-project": "proj_gt6TQZPRbZfoY2J9AQlEJMpd",
+      "x-request-id": "req_796176c9da464aaf8c000e63907a2bf7",
+      "openai-processing-ms": "1286",
+      "cf-cache-status": "DYNAMIC",
+      "set-cookie": "__cf_bm=HilBezoeGvyXngzMc0Hg5DN6LG9wn_W3H8k.lG1Jz20-1771545689.5935988-1.0.1.1-bWm4zCWS6C.HmOejInX98jG7KXqlhLKGLxZlPVQtSvp_ZVUH8sbSxIOSCmYHzImhWSX55AD1yIc.F4swLEWMcUUtZPrBalOmf3KSpx75d.0DPU0BD_n4nstlZ1bNu0LX; HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Fri, 20 Feb 2026 00:31:31 GMT",
+      "Strict-Transport-Security": "max-age=31536000; includeSubDomains; preload",
+      "X-Content-Type-Options": "nosniff",
+      "Content-Encoding": "gzip",
+      "CF-RAY": "9d09bacffb441760-SEA",
+      "alt-svc": "h3=\":443\"; ma=86400"
+    },
+    "body": "{\n  \"id\": \"resp_081d29a052f49e9a016997a459f6fc819681f5286582acecea\",\n  \"object\": \"response\",\n  \"created_at\": 1771545690,\n  \"status\": \"incomplete\",\n  \"background\": false,\n  \"billing\": {\n    \"payer\": \"developer\"\n  },\n  \"completed_at\": null,\n  \"error\": null,\n  \"frequency_penalty\": 0.0,\n  \"incomplete_details\": {\n    \"reason\": \"max_output_tokens\"\n  },\n  \"instructions\": \"Talk with a Boston accent.\",\n  \"max_output_tokens\": 50,\n  \"max_tool_calls\": null,\n  \"model\": \"gpt-4.1-2025-04-14\",\n  \"output\": [\n    {\n      \"id\": \"msg_081d29a052f49e9a016997a45a44608196a2d157dfe2ab47ff\",\n      \"type\": \"message\",\n      \"status\": \"incomplete\",\n      \"content\": [\n        {\n          \"type\": \"output_text\",\n          \"annotations\": [],\n          \"logprobs\": [],\n          \"text\": \"Ah, ya lookin\\u2019 for a Dunkies, huh? Classic! In Boston, ya can\\u2019t throw a rock without hittin\\u2019 a Dunkin\\u2019. There\\u2019s prob\\u2019ly one on the next block, right next to the T stop.\"\n        }\n      ],\n      \"role\": \"assistant\"\n    }\n  ],\n  \"parallel_tool_calls\": true,\n  \"presence_penalty\": 0.0,\n  \"previous_response_id\": null,\n  \"prompt_cache_key\": null,\n  \"prompt_cache_retention\": null,\n  \"reasoning\": {\n    \"effort\": null,\n    \"summary\": null\n  },\n  \"safety_identifier\": null,\n  \"service_tier\": \"default\",\n  \"store\": false,\n  \"temperature\": 0.1,\n  \"text\": {\n    \"format\": {\n      \"type\": \"text\"\n    },\n    \"verbosity\": \"medium\"\n  },\n  \"tool_choice\": \"auto\",\n  \"tools\": [],\n  \"top_logprobs\": 0,\n  \"top_p\": 1.0,\n  \"truncation\": \"disabled\",\n  \"usage\": {\n    \"input_tokens\": 27,\n    \"input_tokens_details\": {\n      \"cached_tokens\": 0\n    },\n    \"output_tokens\": 50,\n    \"output_tokens_details\": {\n      \"reasoning_tokens\": 0\n    },\n    \"total_tokens\": 77\n  },\n  \"user\": null,\n  \"metadata\": {}\n}"
+  }
+}
\ No newline at end of file
diff --git a/utils/_context/_scenarios/integration_frameworks.py b/utils/_context/_scenarios/integration_frameworks.py
index 68e13b98af9..80927900df0 100644
--- a/utils/_context/_scenarios/integration_frameworks.py
+++ b/utils/_context/_scenarios/integration_frameworks.py
@@ -30,7 +30,7 @@ def __init__(self, name: str, doc: str) -> None:
             name,
             doc=doc,
             github_workflow="endtoend",
-            agent_image="ghcr.io/datadog/dd-apm-test-agent/ddapm-test-agent:v1.38.0",
+            agent_image="ghcr.io/datadog/dd-apm-test-agent/ddapm-test-agent:v1.42.0",
             scenario_groups=(groups.integration_frameworks,),
         )
 
@@ -159,3 +159,11 @@ def _set_dd_trace_integrations_enabled(self, library: str) -> None:
             self.environment["DD_TRACE_DNS_ENABLED"] = "false"
             self.environment["DD_TRACE_NET_ENABLED"] = "false"
             self.environment["DD_TRACE_FETCH_ENABLED"] = "false"
+        elif library == "java":
+            # Disable Jetty/Javalin/servlet instrumentation to only see OpenAI spans
+            self.environment["DD_INTEGRATION_OKHTTP_ENABLED"] = "false"
+            self.environment["DD_INTEGRATION_JETTY_ENABLED"] = "false"
+            # Reduce telemetry intervals for faster metric reporting in tests
+            self.environment["DD_TELEMETRY_HEARTBEAT_INTERVAL"] = "1"
+            self.environment["DD_TELEMETRY_METRICS_INTERVAL"] = "1"
+            self.environment["DD_TRACE_DEBUG"] = "true"
diff --git a/utils/build/docker/java/openai-java.Dockerfile b/utils/build/docker/java/openai-java.Dockerfile
new file mode 100644
index 00000000000..d1a2b2c5e04
--- /dev/null
+++ b/utils/build/docker/java/openai-java.Dockerfile
@@ -0,0 +1,32 @@
+FROM maven:3.9.5-eclipse-temurin-17
+
+ARG FRAMEWORK_VERSION
+
+WORKDIR /app
+
+COPY utils/build/docker/java/openai_app /app
+
+RUN ["./gradlew", "init"]
+
+
+# TODO $FRAMEWORK_VERSION pass thru if set
+
+
+# Copy DD trace installation scripts and binaries
+COPY utils/build/docker/java/install_ddtrace.sh binaries* /binaries/
+
+RUN /binaries/install_ddtrace.sh
+
+# Build the application
+RUN ["./gradlew", "build"]
+
+# Create logs directory
+RUN mkdir -p /integration-framework-tracer-logs
+
+# Set environment variables
+ENV DD_TRACE_STARTUP_LOGS=true
+
+ENV JAVA_TOOL_OPTIONS="-javaagent:/dd-tracer/dd-java-agent.jar"
+
+# Run the application with DD Java agent
+CMD ["./gradlew", "run", "--no-daemon"]
diff --git a/utils/build/docker/java/openai_app/.gitignore b/utils/build/docker/java/openai_app/.gitignore
new file mode 100644
index 00000000000..67bcc2f7272
--- /dev/null
+++ b/utils/build/docker/java/openai_app/.gitignore
@@ -0,0 +1,2 @@
+.gradle/
+build/
diff --git a/utils/build/docker/java/openai_app/build.gradle b/utils/build/docker/java/openai_app/build.gradle
new file mode 100644
index 00000000000..b3152f088c8
--- /dev/null
+++ b/utils/build/docker/java/openai_app/build.gradle
@@ -0,0 +1,30 @@
+plugins {
+    id 'java'
+    id 'application'
+    id 'com.github.johnrengelman.shadow' version '7.1.2'
+}
+
+group = 'com.example'
+version = '1.0.0'
+
+sourceCompatibility = '17'
+targetCompatibility = '17'
+
+repositories {
+    mavenCentral()
+}
+
+dependencies {
+    implementation 'io.javalin:javalin:5.3.2'
+    implementation 'com.fasterxml.jackson.core:jackson-databind:2.14.2'
+    implementation 'org.json:json:20230227'
+    implementation 'org.slf4j:slf4j-simple:2.0.3'
+    implementation 'com.openai:openai-java:3.0.0'
+    implementation 'com.anthropic:anthropic-java:2.4.0'
+
+    implementation files('/dd-tracer/dd-java-agent.jar')
+}
+
+application {
+    mainClass = 'SingleFileServer'
+}
\ No newline at end of file
diff --git a/utils/build/docker/java/openai_app/gradle/wrapper/gradle-wrapper.jar b/utils/build/docker/java/openai_app/gradle/wrapper/gradle-wrapper.jar
new file mode 100644
index 00000000000..e6441136f3d
Binary files /dev/null and b/utils/build/docker/java/openai_app/gradle/wrapper/gradle-wrapper.jar differ
diff --git a/utils/build/docker/java/openai_app/gradle/wrapper/gradle-wrapper.properties b/utils/build/docker/java/openai_app/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 00000000000..b82aa23a4f0
--- /dev/null
+++ b/utils/build/docker/java/openai_app/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,7 @@
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-8.7-bin.zip
+networkTimeout=10000
+validateDistributionUrl=true
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
diff --git a/utils/build/docker/java/openai_app/gradlew b/utils/build/docker/java/openai_app/gradlew
new file mode 100755
index 00000000000..1aa94a42690
--- /dev/null
+++ b/utils/build/docker/java/openai_app/gradlew
@@ -0,0 +1,249 @@
+#!/bin/sh
+
+#
+# Copyright © 2015-2021 the original authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+##############################################################################
+#
+#   Gradle start up script for POSIX generated by Gradle.
+#
+#   Important for running:
+#
+#   (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
+#       noncompliant, but you have some other compliant shell such as ksh or
+#       bash, then to run this script, type that shell name before the whole
+#       command line, like:
+#
+#           ksh Gradle
+#
+#       Busybox and similar reduced shells will NOT work, because this script
+#       requires all of these POSIX shell features:
+#         * functions;
+#         * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
+#           «${var#prefix}», «${var%suffix}», and «$( cmd )»;
+#         * compound commands having a testable exit status, especially «case»;
+#         * various built-in commands including «command», «set», and «ulimit».
+#
+#   Important for patching:
+#
+#   (2) This script targets any POSIX shell, so it avoids extensions provided
+#       by Bash, Ksh, etc; in particular arrays are avoided.
+#
+#       The "traditional" practice of packing multiple parameters into a
+#       space-separated string is a well documented source of bugs and security
+#       problems, so this is (mostly) avoided, by progressively accumulating
+#       options in "$@", and eventually passing that to Java.
+#
+#       Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
+#       and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
+#       see the in-line comments for details.
+#
+#       There are tweaks for specific operating systems such as AIX, CygWin,
+#       Darwin, MinGW, and NonStop.
+#
+#   (3) This script is generated from the Groovy template
+#       https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
+#       within the Gradle project.
+#
+#       You can find Gradle at https://github.com/gradle/gradle/.
+#
+##############################################################################
+
+# Attempt to set APP_HOME
+
+# Resolve links: $0 may be a link
+app_path=$0
+
+# Need this for daisy-chained symlinks.
+while
+    APP_HOME=${app_path%"${app_path##*/}"}  # leaves a trailing /; empty if no leading path
+    [ -h "$app_path" ]
+do
+    ls=$( ls -ld "$app_path" )
+    link=${ls#*' -> '}
+    case $link in             #(
+      /*)   app_path=$link ;; #(
+      *)    app_path=$APP_HOME$link ;;
+    esac
+done
+
+# This is normally unused
+# shellcheck disable=SC2034
+APP_BASE_NAME=${0##*/}
+# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
+APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD=maximum
+
+warn () {
+    echo "$*"
+} >&2
+
+die () {
+    echo
+    echo "$*"
+    echo
+    exit 1
+} >&2
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "$( uname )" in                #(
+  CYGWIN* )         cygwin=true  ;; #(
+  Darwin* )         darwin=true  ;; #(
+  MSYS* | MINGW* )  msys=true    ;; #(
+  NONSTOP* )        nonstop=true ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+        # IBM's JDK on AIX uses strange locations for the executables
+        JAVACMD=$JAVA_HOME/jre/sh/java
+    else
+        JAVACMD=$JAVA_HOME/bin/java
+    fi
+    if [ ! -x "$JAVACMD" ] ; then
+        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+    fi
+else
+    JAVACMD=java
+    if ! command -v java >/dev/null 2>&1
+    then
+        die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+    fi
+fi
+
+# Increase the maximum file descriptors if we can.
+if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
+    case $MAX_FD in #(
+      max*)
+        # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
+        # shellcheck disable=SC2039,SC3045
+        MAX_FD=$( ulimit -H -n ) ||
+            warn "Could not query maximum file descriptor limit"
+    esac
+    case $MAX_FD in  #(
+      '' | soft) :;; #(
+      *)
+        # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
+        # shellcheck disable=SC2039,SC3045
+        ulimit -n "$MAX_FD" ||
+            warn "Could not set maximum file descriptor limit to $MAX_FD"
+    esac
+fi
+
+# Collect all arguments for the java command, stacking in reverse order:
+#   * args from the command line
+#   * the main class name
+#   * -classpath
+#   * -D...appname settings
+#   * --module-path (only if needed)
+#   * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
+
+# For Cygwin or MSYS, switch paths to Windows format before running java
+if "$cygwin" || "$msys" ; then
+    APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
+    CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
+
+    JAVACMD=$( cygpath --unix "$JAVACMD" )
+
+    # Now convert the arguments - kludge to limit ourselves to /bin/sh
+    for arg do
+        if
+            case $arg in                                #(
+              -*)   false ;;                            # don't mess with options #(
+              /?*)  t=${arg#/} t=/${t%%/*}              # looks like a POSIX filepath
+                    [ -e "$t" ] ;;                      #(
+              *)    false ;;
+            esac
+        then
+            arg=$( cygpath --path --ignore --mixed "$arg" )
+        fi
+        # Roll the args list around exactly as many times as the number of
+        # args, so each arg winds up back in the position where it started, but
+        # possibly modified.
+        #
+        # NB: a `for` loop captures its iteration list before it begins, so
+        # changing the positional parameters here affects neither the number of
+        # iterations, nor the values presented in `arg`.
+        shift                   # remove old arg
+        set -- "$@" "$arg"      # push replacement arg
+    done
+fi
+
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
+
+# Collect all arguments for the java command:
+#   * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
+#     and any embedded shellness will be escaped.
+#   * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
+#     treated as '${Hostname}' itself on the command line.
+
+set -- \
+        "-Dorg.gradle.appname=$APP_BASE_NAME" \
+        -classpath "$CLASSPATH" \
+        org.gradle.wrapper.GradleWrapperMain \
+        "$@"
+
+# Stop when "xargs" is not available.
+if ! command -v xargs >/dev/null 2>&1
+then
+    die "xargs is not available"
+fi
+
+# Use "xargs" to parse quoted args.
+#
+# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
+#
+# In Bash we could simply go:
+#
+#   readarray ARGS < <( xargs -n1 <<<"$var" ) &&
+#   set -- "${ARGS[@]}" "$@"
+#
+# but POSIX shell has neither arrays nor command substitution, so instead we
+# post-process each arg (as a line of input to sed) to backslash-escape any
+# character that might be a shell metacharacter, then use eval to reverse
+# that process (while maintaining the separation between arguments), and wrap
+# the whole thing up as a single "set" statement.
+#
+# This will of course break if any of these variables contains a newline or
+# an unmatched quote.
+#
+
+eval "set -- $(
+        printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
+        xargs -n1 |
+        sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
+        tr '\n' ' '
+    )" '"$@"'
+
+exec "$JAVACMD" "$@"
diff --git a/utils/build/docker/java/openai_app/settings.gradle b/utils/build/docker/java/openai_app/settings.gradle
new file mode 100644
index 00000000000..03e8aa9fd39
--- /dev/null
+++ b/utils/build/docker/java/openai_app/settings.gradle
@@ -0,0 +1 @@
+rootProject.name = 'single-file-server'
\ No newline at end of file
diff --git a/utils/build/docker/java/openai_app/src/main/java/SingleFileServer.java b/utils/build/docker/java/openai_app/src/main/java/SingleFileServer.java
new file mode 100644
index 00000000000..e4f7cea76de
--- /dev/null
+++ b/utils/build/docker/java/openai_app/src/main/java/SingleFileServer.java
@@ -0,0 +1,591 @@
+import io.javalin.Javalin;
+import io.javalin.http.Context;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.core.JsonProcessingException;
+
+import org.json.JSONArray;
+import org.json.JSONObject;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.List;
+
+import datadog.trace.api.llmobs.LLMObs;
+import datadog.trace.api.llmobs.LLMObsSpan;
+import datadog.trace.api.GlobalTracer;
+import datadog.trace.bootstrap.instrumentation.api.AgentSpan;
+import datadog.trace.bootstrap.instrumentation.api.AgentTracer;
+import datadog.trace.bootstrap.instrumentation.api.AgentScope;
+
+// OpenAI imports
+import com.openai.client.OpenAIClient;
+import com.openai.client.okhttp.OpenAIOkHttpClient;
+import com.openai.models.completions.*;
+import com.openai.models.chat.completions.*;
+import com.openai.models.embeddings.*;
+import com.openai.models.responses.*;
+import com.openai.core.JsonValue;
+
+// Anthropic imports
+import com.anthropic.client.AnthropicClient;
+import com.anthropic.client.okhttp.AnthropicOkHttpClient;
+import com.anthropic.models.messages.Message;
+import com.anthropic.models.messages.MessageCreateParams;
+import com.anthropic.models.messages.Model;
+import com.anthropic.models.messages.RawMessageStreamEvent;
+import com.anthropic.models.messages.TextBlockParam;
+
+public class SingleFileServer {
+  private static final ObjectMapper objectMapper = new ObjectMapper();
+
+  public static void main(String[] args) {
+    Integer port = Integer.parseInt(System.getenv("FRAMEWORK_TEST_CLIENT_SERVER_PORT"));
+    Javalin app = Javalin.create(config -> {
+      config.plugins.enableDevLogging();
+    });
+
+    OpenAIClient openaiClient = OpenAIOkHttpClient.builder()
+      .fromEnv()
+      .baseUrl(getProxyUrl("openai"))
+      .build();
+
+    AnthropicClient anthropicClient = AnthropicOkHttpClient.builder()
+      .fromEnv()
+      .baseUrl(getProxyUrl("anthropic"))
+      .build();
+
+    // SDK endpoints
+    app.get("/sdk/info", ctx -> {
+      String responseJson = doGetSDKInfo(ctx);
+      ctx.result(responseJson).contentType("application/json");
+    });
+
+    app.post("/sdk/trace", ctx -> {
+      String requestBody = ctx.body();
+      JSONObject payload = new JSONObject(requestBody);
+      JSONObject traceStructure = payload.getJSONObject("trace_structure");
+
+      String responseJson = doCreateTrace(traceStructure);
+      ctx.result(responseJson).contentType("application/json");
+    });
+
+    app.post("/sdk/submit_evaluation_metric", ctx -> {
+      String responseJson = doSubmitEvaluationMetric(ctx);
+      ctx.result(responseJson).contentType("application/json");
+    });
+
+    // OpenAI endpoints
+    app.post("/completions", ctx -> {
+      String responseJson = doOpenAICompletion(ctx, openaiClient);
+      ctx.result(responseJson).contentType("application/json");
+    });
+
+    app.post("/chat/completions", ctx -> {
+      String responseJson = doOpenAIChatCompletion(ctx, openaiClient);
+      ctx.result(responseJson).contentType("application/json");
+    });
+
+    app.post("/embeddings", ctx -> {
+      String responseJson = doOpenAIEmbedding(ctx, openaiClient);
+      ctx.result(responseJson).contentType("application/json");
+    });
+
+    app.post("/responses/create", ctx -> {
+      String responseJson = doOpenAIResponsesCreate(ctx, openaiClient);
+      ctx.result(responseJson).contentType("application/json");
+    });
+
+    // Anthropic endpoints
+    app.post("/anthropic/create", ctx -> {
+      String responseJson = doAnthropicCreate(ctx, anthropicClient);
+      ctx.result(responseJson).contentType("application/json");
+    });
+
+    app.start(port);
+  }
+
+  private static String getProxyUrl (String provider) {
+    return System.getenv("DD_TRACE_AGENT_URL") + "/vcr/" + provider;
+  }
+
+  private static String doOpenAICompletion (Context ctx, OpenAIClient openaiClient) {
+    String requestBody = ctx.body();
+    JSONObject payload = new JSONObject(requestBody);
+
+    JSONObject parameters = payload.optJSONObject("parameters");
+
+    var builder = CompletionCreateParams.builder();
+    builder.model(payload.optString("model"));
+    builder.prompt(payload.optString("prompt"));
+
+    if (!Double.isNaN(parameters.optDouble("max_tokens"))) {
+      builder.maxTokens(parameters.optLong("max_tokens"));
+    }
+    if (!Double.isNaN(parameters.optDouble("temperature"))) {
+      builder.temperature(parameters.optDouble("temperature"));
+    }
+
+    var params = builder.build();
+
+    openaiClient.completions().create(builder.build());
+
+    return toJson(new HashMap<String, String>());
+  }
+
+  @SuppressWarnings("unchecked")
+	private static String doOpenAIChatCompletion(Context ctx, OpenAIClient openaiClient) {
+    String requestBody = ctx.body();
+    JSONObject payload = new JSONObject(requestBody);
+
+    JSONObject parameters = payload.optJSONObject("parameters");
+    boolean stream = parameters.optBoolean("stream");
+    JSONArray tools = parameters.optJSONArray("tools");
+
+    var builder = ChatCompletionCreateParams.builder();
+    builder.model(payload.optString("model"));
+
+    // Parse messages array
+    JSONArray messages = payload.getJSONArray("messages");
+    for (int i = 0; i < messages.length(); i++) {
+      JSONObject message = messages.getJSONObject(i);
+      String role = message.getString("role");
+      String content = message.getString("content");
+
+      if ("user".equals(role)) {
+        builder.addUserMessage(content);
+      } else if ("system".equals(role)) {
+        builder.addSystemMessage(content);
+      } else if ("assistant".equals(role)) {
+        builder.addAssistantMessage(content);
+      }
+    }
+
+    if (!Double.isNaN(parameters.optDouble("max_tokens"))) {
+      // using deprecated maxTokens for sake of cassettes
+      // maxCompletionTokens is preferable
+      builder.maxTokens(parameters.optLong("max_tokens"));
+    }
+    if (!Double.isNaN(parameters.optDouble("temperature"))) {
+      builder.temperature(parameters.optDouble("temperature"));
+    }
+    if (tools != null) {
+      for (int i = 0; i < tools.length(); i++) {
+        JSONObject tool = tools.getJSONObject(i);
+
+         JSONObject functionObj = tool.getJSONObject("function");
+         Map<String, Object> functionMap = functionObj.toMap();
+
+          ChatCompletionFunctionTool toolObject = ChatCompletionFunctionTool.builder()
+           .type(JsonValue.from(tool.getString("type")))
+           .function(JsonValue.from(functionMap))
+           .build();
+         builder.addTool(toolObject);
+      }
+
+      builder.toolChoice(JsonValue.from("auto"));
+    }
+
+    if (stream) {
+      builder.streamOptions(ChatCompletionStreamOptions.builder().includeUsage(true).build());
+      try (com.openai.core.http.StreamResponse<ChatCompletionChunk> streamResponse = openaiClient.chat().completions().createStreaming(builder.build())) {
+        streamResponse.stream().forEach(chunk -> {
+            // consume the stream
+        });
+      }
+    } else {
+      openaiClient.chat().completions().create(builder.build());
+    }
+
+    return toJson(new HashMap<String, String>());
+  }
+
+  private static String doOpenAIEmbedding (Context ctx, OpenAIClient openaiClient) {
+    String requestBody = ctx.body();
+    JSONObject payload = new JSONObject(requestBody);
+
+    String model = payload.getString("model");
+    String input = payload.getString("input");
+
+    EmbeddingCreateParams.Builder builder = EmbeddingCreateParams.builder();
+    builder.model(model);
+    builder.input(input);
+
+    openaiClient.embeddings().create(builder.build());
+
+    return toJson(new HashMap<String, String>());
+  }
+
+  @SuppressWarnings("unchecked")
+  private static String doOpenAIResponsesCreate (Context ctx, OpenAIClient openaiClient) {
+    String requestBody = ctx.body();
+    JSONObject payload = new JSONObject(requestBody);
+
+    String model = payload.getString("model");
+    var input = payload.get("input"); // string or JSONArray<JSONObject>
+    JSONObject parameters = payload.optJSONObject("parameters");
+    JSONArray tools = payload.optJSONArray("tools");
+
+    boolean stream = parameters.optBoolean("stream");
+
+    ResponseCreateParams.Builder builder = ResponseCreateParams.builder();
+
+    builder.model(model);
+    if (input instanceof String) {
+      builder.input(JsonValue.from(input));
+    } else {
+      List<Object> inputList = (List<Object>) deepConvertJsonToJava(input);
+      builder.input(JsonValue.from(inputList));
+    }
+
+    if (!Double.isNaN(parameters.optDouble("max_output_tokens"))) {
+      builder.maxOutputTokens((long) parameters.getDouble("max_output_tokens"));
+    }
+
+    if (!Double.isNaN(parameters.optDouble("temperature"))) {
+      builder.temperature(parameters.getDouble("temperature"));
+    }
+
+    if (!parameters.optString("instructions").isEmpty()) {
+      builder.instructions(parameters.getString("instructions"));
+    }
+
+    if (parameters.optJSONObject("reasoning") != null) {
+      JSONObject reasoning = parameters.getJSONObject("reasoning");
+      builder.reasoning(JsonValue.from(reasoning.toMap()));
+    }
+
+    if (tools != null) {
+      List<Object> toolsList = (List<Object>) deepConvertJsonToJava(tools);
+
+      builder.tools(JsonValue.from(toolsList));
+      builder.toolChoice(JsonValue.from("auto"));
+    }
+
+    if (stream) {
+      try (com.openai.core.http.StreamResponse<ResponseStreamEvent> streamResponse = openaiClient.responses().createStreaming(builder.build())) {
+        streamResponse.stream().forEach(chunk -> {
+            // consume the stream
+        });
+      }
+    } else {
+      openaiClient.responses().create(builder.build());
+    }
+
+    return toJson(new HashMap<String, String>());
+  }
+
+  @SuppressWarnings("unchecked")
+  private static String doAnthropicCreate (Context ctx, AnthropicClient anthropicClient) {
+    String requestBody = ctx.body();
+    JSONObject payload = new JSONObject(requestBody);
+
+    String model = payload.getString("model");
+    var messages = payload.optJSONArray("messages");
+    var system = payload.opt("system");
+    JSONObject parameters = payload.optJSONObject("parameters");
+    JSONArray tools = payload.optJSONArray("tools");
+    boolean stream = payload.optBoolean("stream");
+
+
+    MessageCreateParams.Builder builder = MessageCreateParams.builder();
+
+    JSONObject extraHeaders = parameters.optJSONObject("extra_headers");
+    if (extraHeaders != null) {
+      for (String key : extraHeaders.keySet()) {
+        builder.putAdditionalHeader(key, (String) extraHeaders.get(key));
+      }
+    }
+
+    if (system instanceof String) {
+      builder.system((String) system);
+    } else if (system instanceof JSONArray) {
+      List<Object> systemList = (List<Object>) deepConvertJsonToJava(system);
+      builder.system(com.anthropic.core.JsonValue.from(systemList));
+    }
+
+    builder.model(model);
+
+    if (messages != null) {
+      List<Object> messagesList = (List<Object>) deepConvertJsonToJava(messages);
+      builder.messages(com.anthropic.core.JsonValue.from(messagesList));
+    }
+
+    if (!Double.isNaN(parameters.optDouble("temperature"))) {
+      builder.temperature(parameters.getDouble("temperature"));
+    }
+
+    if (!Double.isNaN(parameters.optDouble("max_tokens"))) {
+      builder.maxTokens(parameters.getLong("max_tokens"));
+    }
+
+    // do tools
+    if (tools != null && tools != JSONObject.NULL) {
+      List<Object> toolsList = (List<Object>) deepConvertJsonToJava(tools);
+      builder.tools(com.anthropic.core.JsonValue.from(toolsList));
+    }
+
+    if (stream) {
+      try (com.anthropic.core.http.StreamResponse<RawMessageStreamEvent> streamResponse = anthropicClient.messages().createStreaming(builder.build())) {
+        streamResponse.stream().forEach(chunk -> {
+          // consume the stream
+        });
+      }
+    } else {
+      anthropicClient.messages().create(builder.build());
+    }
+
+    return toJson(new HashMap<String, String>());
+  }
+
+  private static String doGetSDKInfo (Context ctx) {
+    Package tracerPackage = GlobalTracer.class.getPackage();
+    String version = tracerPackage.getImplementationVersion();
+
+    Map<String, String> responseMap = Map.of(
+      "version", version
+    );
+    return toJson(responseMap);
+  }
+
+  private static String doCreateTrace (JSONObject traceStructure) {
+    boolean isLLMObs = "llmobs".equals(traceStructure.getString("sdk"));
+    String kind = isLLMObs ? traceStructure.getString("kind") : null;
+
+    String name = traceStructure.optString("name");
+    String modelName = traceStructure.optString("model_name", null);
+    String modelProvider = traceStructure.optString("model_provider", null);
+    String mlApp = traceStructure.optString("ml_app", null);
+    String sessionId = traceStructure.optString("session_id", null);
+
+    JSONObject exportedSpanCtx = null;
+
+    JSONArray annotations = traceStructure.optJSONArray("annotations");
+    boolean annotateAfter = traceStructure.optBoolean("annotate_after");
+
+    if (isLLMObs) {
+      LLMObsSpan span = startLLMObsSpan(kind, name, modelName, modelProvider, mlApp, sessionId);
+
+      JSONArray children = traceStructure.optJSONArray("children");
+      doTraceChildren(children);
+
+      if (annotateAfter) {
+        // to trigger an exception for not being able to apply annotations after the span is finished
+        span.finish();
+        doApplyAnnotations(span, kind, annotations);
+      } else {
+        doApplyAnnotations(span, kind, annotations);
+        span.finish();
+      }
+    } else {
+      AgentSpan span = AgentTracer
+          .get()
+          .buildSpan(name)
+          .start();
+
+      AgentScope scope = AgentTracer
+        .get()
+        .activateSpan(span);
+
+      JSONArray children = traceStructure.optJSONArray("children");
+      doTraceChildren(children);
+
+      span.finish();
+      scope.close();
+    }
+
+    return toJson(
+      Map.of(
+        "foo", "bar"
+      )
+    );
+  }
+
+  private static void doTraceChildren(JSONArray children) {
+    if (children == null) {
+      return;
+    }
+
+    for (int i = 0; i < children.length(); i++) {
+      JSONObject child = children.getJSONObject(i);
+      doCreateTrace(child);
+    }
+  }
+
+  private static void doApplyAnnotations (LLMObsSpan span, String kind, JSONArray annotations) {
+    if (annotations == null) {
+      return;
+    }
+
+    for (int i = 0; i < annotations.length(); i++) {
+      JSONObject annotation = annotations.getJSONObject(i);
+
+      // apply IO annotations - could be a string or a JSON object
+      Object inputDataObject = annotation.opt("input_data");
+      Object outputDataObject = annotation.opt("output_data");
+
+      if (inputDataObject instanceof String && outputDataObject instanceof String) {
+        String inputData = (String) inputDataObject;
+        String outputData = (String) outputDataObject;
+
+        span.annotateIO(inputData, outputData);
+      } else if ("llm".equals(kind)) {
+        JSONArray inputDataMessages = null;
+        if (!(inputDataObject instanceof JSONArray)) {
+          inputDataMessages = new JSONArray();
+          inputDataMessages.put(inputDataObject);
+        } else {
+          inputDataMessages = (JSONArray) inputDataObject;
+        }
+
+        JSONArray outputDataMessages = null;
+        if (!(outputDataObject instanceof JSONArray)) {
+          outputDataMessages = new JSONArray();
+          outputDataMessages.put(outputDataObject);
+        } else {
+          outputDataMessages = (JSONArray) outputDataObject;
+        }
+
+        List<LLMObs.LLMMessage> inputData = new ArrayList<>();
+        for (int j = 0; j < inputDataMessages.length(); j++) {
+          JSONObject message = inputDataMessages.optJSONObject(j);
+          if (message == null) {
+            continue;
+          }
+
+          String role = message.optString("role");
+          String content = message.optString("content");
+          // TODO: add tool calls when tests are added
+
+          inputData.add(LLMObs.LLMMessage.from(role, content));
+        }
+
+        List<LLMObs.LLMMessage> outputData = new ArrayList<>();
+        for (int j = 0; j < outputDataMessages.length(); j++) {
+          JSONObject message = outputDataMessages.optJSONObject(j);
+          if (message == null) {
+            continue;
+          }
+
+          String role = message.optString("role");
+          String content = message.optString("content");
+          // TODO: add tool calls when tests are added
+
+          outputData.add(LLMObs.LLMMessage.from(role, content));
+        }
+
+        span.annotateIO(inputData, outputData);
+      }
+
+      // apply metadata annotations
+      JSONObject metadata = annotation.optJSONObject("metadata");
+      if (metadata != null) {
+        span.setMetadata(metadata.toMap());
+      }
+
+      // apply metrics annotations
+      JSONObject metrics = annotation.optJSONObject("metrics");
+      if (metrics != null) {
+        Map<String, Number> metricsMap = new HashMap<>();
+        for (String key : metrics.keySet()) {
+          metricsMap.put(key, metrics.getDouble(key));
+        }
+        span.setMetrics(metricsMap);
+      }
+
+      // apply tags annotations
+      JSONObject tags = annotation.optJSONObject("tags");
+      if (tags != null) {
+        span.setTags(tags.toMap());
+      }
+    }
+  }
+
+  private static LLMObsSpan startLLMObsSpan(String kind, String name, String modelName, String modelProvider, String mlApp, String sessionId) {
+    if ("llm".equals(kind)) {
+      return LLMObs.startLLMSpan(name, modelName, modelProvider, mlApp, sessionId);
+    } else if ("task".equals(kind)) {
+      return LLMObs.startTaskSpan(name, mlApp, sessionId);
+    } else if ("agent".equals(kind)) {
+      return LLMObs.startAgentSpan(name, mlApp, sessionId);
+    } else if ("workflow".equals(kind)) {
+      return LLMObs.startWorkflowSpan(name, mlApp, sessionId);
+    } else if ("tool".equals(kind)) {
+      return LLMObs.startToolSpan(name, mlApp, sessionId);
+    } else {
+      // TODO: add embedding and retrieval spans once support is added
+      throw new RuntimeException("Unsupported kind: " + kind);
+    }
+  }
+
+  private static String doSubmitEvaluationMetric(Context ctx) {
+    String requestBody = ctx.body();
+    JSONObject payload = new JSONObject(requestBody);
+
+    // TODO: unlike the Python and Node.js SDKs, the Java SDK
+    // does not take a dictionary for a span context, it needs the span itself.
+    // if it is update to do so, remove this and use payload.trace_id and payload.span_id instead.
+    LLMObsSpan span = LLMObs.startTaskSpan("test-task", null, null);
+
+    String label = payload.optString("label");
+    var value = payload.optString("value"); // keep the type dynamic
+
+    JSONObject tagsObject = payload.optJSONObject("tags");
+    var tags = tagsObject != null ? tagsObject.toMap() : null;
+
+    String mlApp = payload.optString("ml_app");
+
+    if (mlApp != null) {
+      LLMObs.SubmitEvaluation(
+        span,
+        label,
+        value,
+        mlApp,
+        tags
+      );
+    } else {
+      LLMObs.SubmitEvaluation(
+        span,
+        label,
+        value,
+        tags
+      );
+    }
+
+    span.finish();
+
+    String responseJson = toJson(new HashMap<String, String>());
+    return responseJson;
+  }
+
+  private static String toJson (Object data) {
+    try {
+      return objectMapper.writeValueAsString(data);
+    } catch (JsonProcessingException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  private static Object deepConvertJsonToJava(Object obj) {
+    if (obj == JSONObject.NULL) {
+      return null;
+    }
+    if (obj instanceof JSONObject) {
+      JSONObject jsonObj = (JSONObject) obj;
+      Map<String, Object> map = new HashMap<>();
+      for (String key : jsonObj.keySet()) {
+        map.put(key, deepConvertJsonToJava(jsonObj.get(key)));
+      }
+      return map;
+    } else if (obj instanceof JSONArray) {
+      JSONArray jsonArray = (JSONArray) obj;
+      List<Object> list = new ArrayList<>();
+      for (int i = 0; i < jsonArray.length(); i++) {
+        list.add(deepConvertJsonToJava(jsonArray.get(i)));
+      }
+      return list;
+    } else {
+      return obj;
+    }
+  }
+}
diff --git a/utils/build/docker/java/openai_app/system_tests_library_version.sh b/utils/build/docker/java/openai_app/system_tests_library_version.sh
new file mode 100755
index 00000000000..7b2f22c2c7d
--- /dev/null
+++ b/utils/build/docker/java/openai_app/system_tests_library_version.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+set -e
+
+cat /binaries/SYSTEM_TESTS_LIBRARY_VERSION
+
diff --git a/utils/docker_fixtures/_test_agent.py b/utils/docker_fixtures/_test_agent.py
index 8fba201dc08..93157062329 100644
--- a/utils/docker_fixtures/_test_agent.py
+++ b/utils/docker_fixtures/_test_agent.py
@@ -2,6 +2,7 @@
 from collections.abc import Generator
 import contextlib
 import datetime
+import gzip
 import hashlib
 from http import HTTPStatus
 import json
@@ -11,6 +12,7 @@
 from typing import TypedDict, Any, cast
 import urllib.parse
 
+import msgpack
 import pytest
 import requests
 from retry import retry
@@ -386,24 +388,47 @@ def info(self):
         self._write_log("info", resp_json)
         return resp_json
 
+    def _decode_llmobs_body(self, body_b64: str) -> list[Any]:
+        """Decode base64 body; handle gzip (Java), then JSON or MessagePack (Java) encoding.
+
+        Returns a list of events (each event is a dict with 'spans'). Java can send multiple
+        concatenated msgpack objects in one request; we use Unpacker to decode all of them
+        (same as llm-obs test/conftest.py).
+        """
+        decoded = base64.b64decode(body_b64)
+        if decoded[:2] == b"\x1f\x8b":
+            decoded = gzip.decompress(decoded)
+        # JSON (Python/Node tracer): starts with { or [
+        if decoded.lstrip().startswith((b"{", b"[")):
+            parsed = json.loads(decoded)
+            return [parsed] if isinstance(parsed, dict) else parsed
+        # MessagePack (Java tracer): binary format; may be multiple concatenated objects
+        unpacker = msgpack.Unpacker(unicode_errors="replace", strict_map_key=False)
+        unpacker.feed(decoded)
+        return list(unpacker)
+
     def llmobs_requests(self) -> list[Any]:
-        reqs = [r for r in self.requests() if r["url"].endswith("/evp_proxy/v2/api/v2/llmobs")]
+        reqs = [
+            r
+            for r in self.requests()
+            if r["url"].endswith("/evp_proxy/v2/api/v2/llmobs") or r["url"].endswith("/evp_proxy/v4/api/v2/llmobs")
+        ]
 
         events = []
         for r in reqs:
-            decoded_body = base64.b64decode(r["body"])
-            events.append(json.loads(decoded_body))
+            events.append(self._decode_llmobs_body(r["body"]))
         return events
 
-    def llmobs_evaluations_requests(self):
+    def llmobs_evaluations_requests(self) -> list[Any]:
         reqs = [
             r
             for r in self.requests()
             if r["url"].endswith("/evp_proxy/v2/api/intake/llm-obs/v1/eval-metric")
             or r["url"].endswith("/evp_proxy/v2/api/intake/llm-obs/v2/eval-metric")
         ]
-
-        return [json.loads(base64.b64decode(r["body"])) for r in reqs]
+        # One decoded body per request (evaluations are typically single JSON per request)
+        decoded_per_request = [self._decode_llmobs_body(r["body"]) for r in reqs]
+        return [events[0] for events in decoded_per_request]
 
     @contextlib.contextmanager
     def snapshot_context(self, token: str, ignores: list[str] | None = None):