From f08e92ea5c91e1c85479f990568c0593f07535d9 Mon Sep 17 00:00:00 2001 From: chasefortier Date: Tue, 17 Mar 2026 11:31:35 -0400 Subject: [PATCH] OTel Experiment Example --- opik/otel_with_offline_eval_example/README.md | 87 +++++++++++++++++++ .../evaluate.py | 65 ++++++++++++++ .../otel_with_offline_eval_example/llm_app.py | 79 +++++++++++++++++ 3 files changed, 231 insertions(+) create mode 100644 opik/otel_with_offline_eval_example/README.md create mode 100644 opik/otel_with_offline_eval_example/evaluate.py create mode 100644 opik/otel_with_offline_eval_example/llm_app.py diff --git a/opik/otel_with_offline_eval_example/README.md b/opik/otel_with_offline_eval_example/README.md new file mode 100644 index 0000000..94c5525 --- /dev/null +++ b/opik/otel_with_offline_eval_example/README.md @@ -0,0 +1,87 @@ +# OTel Tracing + Opik Offline Evaluation + +Demonstrates how to use OpenTelemetry for application tracing **alongside** +Opik's offline evaluation workflow. These are complementary tools, not alternatives. + +## Architecture + +``` +┌─────────────────────────────────────┐ ┌───────────────────────────────────────┐ +│ llm_app.py │ │ evaluate.py │ +│ (your application code) │ │ (offline evaluation script) │ +│ │ │ │ +│ - OTel for tracing │ │ - Opik Python SDK │ +│ - Runs in production/dev │ │ - Runs separately (CI, scheduled) │ +│ - Sends spans → Opik OTLP endpoint │ │ - Imports answer_question() from app │ +│ │ │ - Runs task against a dataset │ +│ │ │ - Creates an Experiment in Opik │ +└─────────────────────────────────────┘ └───────────────────────────────────────┘ + │ │ + └──────────────────────┬───────────────────────┘ + ▼ + ┌─────────────────────┐ + │ Opik │ + │ │ + │ Traces (from OTel) │ + │ Experiments (from │ + │ evaluate()) │ + └─────────────────────┘ +``` + +## Key Insight + +| | OTel tracing | `opik.evaluate()` | +|---|---|---| +| Purpose | Observe your app in real-time | Test quality offline against a dataset | +| When it runs | During application execution | In a separate evaluation script | +| What you get in Opik | Traces / Spans | Experiments with scored results | +| SDK | OTel SDK | Opik Python SDK (`pip install opik`) | + +**There is no OTel-native way to run offline experiments.** The offline evaluation +workflow requires the Opik Python SDK because it needs to: +1. Read items from an Opik Dataset +2. Run your LLM task against each item +3. Score outputs with metrics +4. Record results as an Experiment in Opik + +## Setup + +```bash +pip install opentelemetry-sdk opentelemetry-exporter-otlp-proto-http opik +``` + +Environment variables for `llm_app.py`: +```bash +export OPIK_OTLP_ENDPOINT="https://your-opik-host/opik/api/v1/private/otel/v1/traces" +export OPIK_API_KEY="your-api-key" +export OPIK_WORKSPACE="your-workspace" +``` + +Environment variables for `evaluate.py`: +```bash +export OPIK_API_KEY="your-api-key" +export OPIK_WORKSPACE="your-workspace" +export OPIK_URL_OVERRIDE="https://your-opik-host/opik/api" +``` + +## Running + +```bash +# Run your application normally (OTel tracing active) +python llm_app.py + +# Run offline evaluation separately +python evaluate.py +``` + +## Notes for self-hosted Opik + +The OTel OTLP endpoint for self-hosted Opik is: +``` +https:///opik/api/v1/private/otel/v1/traces +``` + +The Opik SDK base URL override for self-hosted is: +``` +https:///opik/api +``` diff --git a/opik/otel_with_offline_eval_example/evaluate.py b/opik/otel_with_offline_eval_example/evaluate.py new file mode 100644 index 0000000..ab414a0 --- /dev/null +++ b/opik/otel_with_offline_eval_example/evaluate.py @@ -0,0 +1,65 @@ +""" +FILE 2: evaluate.py +Separate offline evaluation script — uses the Opik Python SDK. +Import your application function directly and wrap it as an Opik task. + +This does NOT replace OTel tracing in your app. It runs offline experiments +against a dataset and records scored results as an Experiment in Opik. + +Setup: + pip install opik + +Environment variables: + OPIK_API_KEY - your Opik API key + OPIK_WORKSPACE - your Opik workspace name + OPIK_URL_OVERRIDE - your self-hosted Opik URL + (e.g. https://your-opik-host/opik/api) +""" + +import opik +from opik.evaluation.metrics import Equals, Hallucination +# Note: Hallucination is an LLM-judge metric — it requires an LLM provider to be +# configured (e.g. set OPENAI_API_KEY). Replace or remove it if you don't have one. + +# Import the function from your application code. +# opik.evaluate() will call this function for each dataset item. +from llm_app import answer_question + + +# --- Step 1: Create (or get) a dataset --- + +client = opik.Opik() + +dataset = client.get_or_create_dataset("capital-cities-qa") +dataset.insert([ + {"question": "What is the capital of France?", "expected": "Paris"}, + {"question": "What is the capital of Japan?", "expected": "Tokyo"}, + {"question": "What is the capital of Germany?", "expected": "Berlin"}, +]) + + +# --- Step 2: Define the evaluation task --- +# This is a thin wrapper that maps dataset item fields to your app function. +# The dict keys returned here are used by scoring metrics below. + +def evaluation_task(dataset_item: dict) -> dict: + answer = answer_question(dataset_item["question"]) + return { + "output": answer, + "reference": dataset_item["expected"], # passed to Equals metric + "input": dataset_item["question"], # passed to Hallucination metric + } + + +# --- Step 3: Run the experiment --- + +opik.evaluate( + dataset=dataset, + task=evaluation_task, + scoring_metrics=[ + Equals(name="exact_match"), + Hallucination(name="hallucination"), + ], + experiment_name="capital-cities-baseline", + project_name="mediatek-llm-eval", +) diff --git a/opik/otel_with_offline_eval_example/llm_app.py b/opik/otel_with_offline_eval_example/llm_app.py new file mode 100644 index 0000000..d9a3af1 --- /dev/null +++ b/opik/otel_with_offline_eval_example/llm_app.py @@ -0,0 +1,79 @@ +""" +FILE 1: llm_app.py +Your application code — uses OpenTelemetry for tracing. +No Opik SDK required here. + +This is the code that runs in production or dev environments. +OTel spans are exported to Opik via the OTLP endpoint. + +Setup: + pip install opentelemetry-sdk opentelemetry-exporter-otlp-proto-http + +Environment variables: + OPIK_OTLP_ENDPOINT - e.g. https://your-opik-host/opik/api/v1/private/otel/v1/traces + OPIK_API_KEY - your Opik API key + OPIK_WORKSPACE - your Opik workspace name +""" + +import os +from opentelemetry import trace +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter + + +def setup_otel(): + """Configure OTel to export traces to Opik.""" + otlp_endpoint = os.environ["OPIK_OTLP_ENDPOINT"] + api_key = os.environ["OPIK_API_KEY"] + workspace = os.environ["OPIK_WORKSPACE"] + + exporter = OTLPSpanExporter( + endpoint=otlp_endpoint, + headers={ + "Authorization": f"Bearer {api_key}", + "Comet-Workspace": workspace, + }, + ) + + provider = TracerProvider() + provider.add_span_processor(BatchSpanProcessor(exporter)) + trace.set_tracer_provider(provider) + + +tracer = trace.get_tracer(__name__) + + +def call_llm(prompt: str) -> str: + """ + Your LLM call — wrapped in an OTel span for tracing. + Replace this with your actual model call (OpenAI, vLLM, etc.) + """ + with tracer.start_as_current_span("llm_call") as span: + span.set_attribute("input.prompt", prompt) + + # --- replace with your actual LLM call --- + response = f"[LLM response to: {prompt}]" + # ----------------------------------------- + + span.set_attribute("output.response", response) + return response + + +def answer_question(user_question: str) -> str: + """ + Your application logic — also traced via OTel. + This is the function you'll reuse in evaluate.py. + """ + with tracer.start_as_current_span("answer_question") as span: + span.set_attribute("input.question", user_question) + answer = call_llm(user_question) + span.set_attribute("output.answer", answer) + return answer + + +if __name__ == "__main__": + # Set up OTel when running as the application entry point + setup_otel() + result = answer_question("What is the capital of France?") + print(result)