Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion benchmarks/commit0/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
get_default_on_result_writer,
)
from benchmarks.utils.image_utils import create_docker_workspace, remote_image_exists
from benchmarks.utils.litellm_proxy import apply_virtual_key
from benchmarks.utils.llm_config import load_llm_config
from benchmarks.utils.models import (
EvalInstance,
Expand Down Expand Up @@ -393,7 +394,7 @@ def evaluate_instance(
keep_first=self.metadata.condenser_keep_first,
)
agent = Agent(
llm=self.metadata.llm,
llm=apply_virtual_key(self.metadata.llm),
tools=tools,
system_prompt_kwargs={"cli_mode": True},
condenser=condenser,
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/gaia/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
)
from benchmarks.utils.fake_user_response import run_conversation_with_fake_user_response
from benchmarks.utils.image_utils import create_docker_workspace, remote_image_exists
from benchmarks.utils.litellm_proxy import apply_virtual_key
from benchmarks.utils.llm_config import load_llm_config
from benchmarks.utils.models import EvalInstance, EvalMetadata, EvalOutput
from benchmarks.utils.version import IMAGE_TAG_PREFIX
Expand Down Expand Up @@ -336,7 +337,7 @@ def evaluate_instance(
keep_first=self.metadata.condenser_keep_first,
)
agent = Agent(
llm=self.metadata.llm,
llm=apply_virtual_key(self.metadata.llm),
tools=tools,
system_prompt_kwargs={"cli_mode": True},
condenser=condenser,
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/multiswebench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
)
from benchmarks.utils.fake_user_response import run_conversation_with_fake_user_response
from benchmarks.utils.image_utils import remote_image_exists
from benchmarks.utils.litellm_proxy import apply_virtual_key
from benchmarks.utils.llm_config import load_llm_config
from benchmarks.utils.models import (
EvalInstance,
Expand Down Expand Up @@ -294,7 +295,7 @@ def evaluate_instance(
)

agent = Agent(
llm=self.metadata.llm,
llm=apply_virtual_key(self.metadata.llm),
tools=tools,
system_prompt_kwargs={"cli_mode": True},
condenser=condenser,
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/openagentsafety/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from benchmarks.utils.evaluation import Evaluation
from benchmarks.utils.evaluation_utils import construct_eval_output_dir
from benchmarks.utils.fake_user_response import run_conversation_with_fake_user_response
from benchmarks.utils.litellm_proxy import apply_virtual_key
from benchmarks.utils.llm_config import load_llm_config
from benchmarks.utils.models import EvalInstance, EvalMetadata, EvalOutput
from openhands.sdk import Agent, Conversation, Tool, get_logger
Expand Down Expand Up @@ -455,7 +456,7 @@ def evaluate_instance(
tools.append(Tool(name=DelegateTool.name))

# Create agent
agent = Agent(llm=self.metadata.llm, tools=tools)
agent = Agent(llm=apply_virtual_key(self.metadata.llm), tools=tools)

# Collect events
received_events = []
Expand Down
5 changes: 2 additions & 3 deletions benchmarks/swebench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
setup_acp_workspace,
workspace_keepalive,
)
from benchmarks.utils.litellm_proxy import apply_virtual_key
from benchmarks.utils.args_parser import add_prompt_path_argument, get_parser
from benchmarks.utils.build_utils import ensure_local_image
from benchmarks.utils.console_logging import summarize_instance
Expand Down Expand Up @@ -268,12 +269,10 @@ def evaluate_instance(
keep_first=self.metadata.condenser_keep_first,
)
agent = Agent(
llm=self.metadata.llm,
llm=apply_virtual_key(self.metadata.llm),
tools=tools,
system_prompt_kwargs={"cli_mode": True},
condenser=condenser,
# TODO: we can enable security analyzer later
# security_analyzer=LLMSecurityAnalyzer(),
)

assert isinstance(workspace, RemoteWorkspace)
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/swebenchmultilingual/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
)
from benchmarks.utils.fake_user_response import run_conversation_with_fake_user_response
from benchmarks.utils.image_utils import remote_image_exists
from benchmarks.utils.litellm_proxy import apply_virtual_key
from benchmarks.utils.llm_config import load_llm_config
from benchmarks.utils.models import (
EvalInstance,
Expand Down Expand Up @@ -250,7 +251,7 @@ def evaluate_instance(
if self.metadata.enable_delegation:
tools.append(Tool(name=DelegateTool.name))
agent = Agent(
llm=self.metadata.llm,
llm=apply_virtual_key(self.metadata.llm),
tools=tools,
system_prompt_kwargs={"cli_mode": True},
# TODO: we can enable condenser and security analyzer later
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/swebenchmultimodal/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
)
from benchmarks.utils.fake_user_response import run_conversation_with_fake_user_response
from benchmarks.utils.image_utils import remote_image_exists
from benchmarks.utils.litellm_proxy import apply_virtual_key
from benchmarks.utils.llm_config import load_llm_config
from benchmarks.utils.models import (
EvalInstance,
Expand Down Expand Up @@ -254,7 +255,7 @@ def evaluate_instance(
keep_first=self.metadata.condenser_keep_first,
)
agent = Agent(
llm=self.metadata.llm,
llm=apply_virtual_key(self.metadata.llm),
tools=tools,
system_prompt_kwargs={"cli_mode": True},
condenser=condenser,
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/swefficiency/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
)
from benchmarks.utils.fake_user_response import run_conversation_with_fake_user_response
from benchmarks.utils.image_utils import remote_image_exists
from benchmarks.utils.litellm_proxy import apply_virtual_key
from benchmarks.utils.models import (
EvalInstance,
EvalMetadata,
Expand Down Expand Up @@ -308,7 +309,7 @@ def evaluate_instance(
"""
tools = get_default_tools(enable_browser=False)
agent = Agent(
llm=self.metadata.llm,
llm=apply_virtual_key(self.metadata.llm),
tools=tools,
system_prompt_kwargs={"cli_mode": True},
)
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/swtbench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
create_docker_workspace,
remote_image_exists,
)
from benchmarks.utils.litellm_proxy import apply_virtual_key
from benchmarks.utils.llm_config import load_llm_config
from benchmarks.utils.models import (
EvalInstance,
Expand Down Expand Up @@ -263,7 +264,7 @@ def evaluate_instance(
keep_first=self.metadata.condenser_keep_first,
)
agent = Agent(
llm=self.metadata.llm,
llm=apply_virtual_key(self.metadata.llm),
tools=tools,
system_prompt_kwargs={"cli_mode": True},
condenser=condenser,
Expand Down
19 changes: 19 additions & 0 deletions benchmarks/utils/litellm_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,22 @@ def set_current_virtual_key(key: str | None) -> None:
def get_current_virtual_key() -> str | None:
"""Return the virtual key for the current worker thread, or None."""
return getattr(_thread_local, "virtual_key", None)


def apply_virtual_key(llm): # type: ignore[no-untyped-def]
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟠 Important: Missing type hints violates project standards.

Per AGENTS.md: "Avoid # type: ignore unless absolutely necessary". The LLM type is available from the SDK:

Suggested change
def apply_virtual_key(llm): # type: ignore[no-untyped-def]
def apply_virtual_key(llm: LLM) -> LLM:
"""Return an LLM config copy with the per-instance virtual key as api_key.

You'll need to add from openhands.sdk.llm import LLM at the top of the file.

"""Return an LLM config copy with the per-instance virtual key as api_key.

If no virtual key is active for this thread, returns the original config
unchanged. This is thread-safe: ``model_copy`` creates a new instance
and ``get_current_virtual_key`` reads from ``threading.local``.

Use this when creating a default (non-ACP) ``Agent`` so that all LLM
calls go through the proxy with the per-instance virtual key, enabling
accurate per-instance cost tracking.
"""
virtual_key = get_current_virtual_key()
if virtual_key is None:
return llm
from pydantic import SecretStr
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Suggestion: Move import to top of file.

Per project guidelines: "Place all imports at the top of the file unless... circular imports, conditional imports, or imports that need to be delayed for specific reasons."

No circular import risk here since pydantic is external. Move this to the import block at the top:

from pydantic import SecretStr


return llm.model_copy(update={"api_key": SecretStr(virtual_key)})
Loading