Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
209 commits
Select commit Hold shift + click to select a range
4bdae5a
feat: Minimal agent instrumentation for AuthBridge OTEL (Approach A)
Ladas Feb 13, 2026
f80ba0f
feat: Add Starlette OTEL instrumentation for traceparent extraction
Ladas Feb 14, 2026
4cd3104
feat: add sandbox_agent with per-context workspace isolation
Ladas Feb 15, 2026
aa3dd18
fix: use a2a-sdk[http-server] for starlette/sse deps
Ladas Feb 15, 2026
5838a52
feat: add web_fetch tool with domain allowlist from sources.json
Ladas Feb 17, 2026
0bf5a38
feat: Emit LangGraph events as valid JSON for ext_proc parsing
Ladas Feb 17, 2026
2e4cdaa
fix: add MemorySaver checkpointer for multi-turn memory
Ladas Feb 18, 2026
6d83a8f
fix: address security review — interpreter bypass, HITL interrupt, TT…
Ladas Feb 25, 2026
ac9fbce
feat: add C19 workspace cleanup and C20 sub-agent spawning tools
Ladas Feb 25, 2026
14d8719
fix: harden interpreter bypass, path traversal, and approval checks
Ladas Feb 25, 2026
9822f63
feat: wire AsyncPostgresSaver for persistent session checkpointing
Ladas Feb 25, 2026
9f31312
feat: use A2A SDK DatabaseTaskStore for generic session persistence
Ladas Feb 25, 2026
92fc74c
refactor: rename agent from Sandbox Assistant to Sandbox Legion
Ladas Feb 25, 2026
7cf09ba
fix: correct DatabaseTaskStore import path
Ladas Feb 25, 2026
1649027
chore: update uv.lock after adding postgresql dependencies
Ladas Feb 25, 2026
bdb9e49
fix: lazy-init AsyncPostgresSaver with asyncpg pool
Ladas Feb 25, 2026
517cc45
fix: disable SSL for in-cluster postgres connections
Ladas Feb 25, 2026
36519ea
fix: use psycopg_pool for AsyncPostgresSaver (not asyncpg)
Ladas Feb 25, 2026
36cfc18
fix: use from_conn_string context manager for AsyncPostgresSaver
Ladas Feb 25, 2026
123d18c
fix: extract only text from tool-calling model responses
Ladas Feb 26, 2026
ec6fe43
feat: concurrency locks, interpreter bypass, TOFU verification
Ladas Feb 26, 2026
6d28be7
feat(sandbox): wire LangGraphSerializer into agent streaming loop
Ladas Feb 27, 2026
a74359c
feat(sandbox): emit LLM thinking with tool calls + aggregate multi-ta…
Ladas Feb 28, 2026
66ee018
fix(sandbox): add pool_recycle + pool_pre_ping to prevent stale DB co…
Ladas Feb 28, 2026
2e2590b
fix(sandbox): switch TaskStore from asyncpg to psycopg driver
Ladas Mar 1, 2026
048f0de
fix(sandbox): handle LLM 429/quota errors gracefully in SSE stream
Ladas Mar 1, 2026
e489461
fix(sandbox): add CACHE_BUST arg to Dockerfile for fresh builds
Ladas Mar 1, 2026
b83a366
debug: add agent.py line count check to Dockerfile build
Ladas Mar 2, 2026
dd84219
fix(sandbox): OCP arbitrary UID compatibility
Ladas Mar 2, 2026
b9bdc5c
feat(sandbox): wire multi-mode delegate tool into agent
Ladas Mar 2, 2026
939981e
feat(sandbox): add plan-execute-reflect reasoning loop
Ladas Mar 4, 2026
1d40073
feat(sandbox): add loop_id to all reasoning loop events for UI rendering
Ladas Mar 5, 2026
3772845
feat(sandbox): planner prompts for RCA reports and delegation
Ladas Mar 5, 2026
4a6d5be
feat(sandbox): skill loading + child session DB records
Ladas Mar 6, 2026
e742462
docs: add TODO for Session N skill_pack_loader integration
Ladas Mar 6, 2026
699966d
feat(sandbox): declare all tools as skills in agent card
Ladas Mar 6, 2026
716b513
fix(sandbox): revert to single skill, add dynamic scan TODO
Ladas Mar 6, 2026
5f4b512
feat(sandbox): dynamically scan workspace skills into agent card
Ladas Mar 6, 2026
4eee409
fix(sandbox): add missing os import for dynamic skill scanning
Ladas Mar 6, 2026
6f3f9b0
feat(sandbox): clone skill repos at startup for agent card + invocation
Ladas Mar 6, 2026
d9f1d9c
fix(sandbox): use upstream kagenti repo, support @branch, rm stale clone
Ladas Mar 6, 2026
eaf19db
fix(sandbox): scan SKILL.md files by directory, extract description
Ladas Mar 6, 2026
8cdcdca
fix(sandbox): search shared workspace root for skills, support SKILL.md
Ladas Mar 7, 2026
dc525f2
fix(sandbox): install gh CLI, fix delegation, improve prompts (Sessio…
Ladas Mar 7, 2026
a476b9e
feat(sandbox): text-based tool call parser for vLLM compat (Session L+3)
Ladas Mar 7, 2026
90bffff
fix(sandbox): instruct agent to clone repo before gh commands (Sessio…
Ladas Mar 7, 2026
bbaf7ef
fix(sandbox): set origin remote to upstream repo for gh CLI (Session …
Ladas Mar 7, 2026
3f84dc2
fix(sandbox): handle tuple/InvalidToolCall in event serializer (Sessi…
Ladas Mar 7, 2026
e5a63cf
feat(sandbox): add grep+glob tools, fix tuple error, single tool per …
Ladas Mar 7, 2026
0eb583d
fix(sandbox): crash-proof ToolNode + multi tool call support (Session…
Ladas Mar 7, 2026
377da2c
fix(sandbox): compound command permissions + rate-limit retry (Sessio…
Ladas Mar 8, 2026
d2cda9c
fix(sandbox): tools→reflector edge + duplicate prevention (Session R)
Ladas Mar 8, 2026
1762cab
fix(sandbox): add missing git subcommands to allow list (Session R)
Ladas Mar 8, 2026
f1b6a38
fix(sandbox): revert tools→reflector, restore tools→executor edge (Se…
Ladas Mar 8, 2026
f8d1d9b
feat(sandbox): fast-path planner + tool dedup + LiteLLM metadata (Ses…
Ladas Mar 8, 2026
40e84ad
fix(sandbox): parse Llama 4 tool format + never skip reflection (Sess…
Ladas Mar 8, 2026
43e567d
feat: token emission in SSE events + request_id tracking + recursion …
Ladas Mar 8, 2026
1dc08cd
fix(sandbox): shell tool docstring includes workspace path
Ladas Mar 8, 2026
231e857
fix(sandbox): revert f-string docstring on shell tool
Ladas Mar 8, 2026
29850d1
feat: typed event schema + serializer refactor + unit tests
Ladas Mar 8, 2026
38eed6a
fix: reporter_node detects bare decision keywords from reflector
Ladas Mar 9, 2026
add2f90
feat: emit tool_call events for text-parsed tools + reasoning field
Ladas Mar 9, 2026
d8cbe0c
fix: executor prompt enforces tool calling API usage
Ladas Mar 9, 2026
a7c68e6
fix: catch CancelledError, log every graph event for crash diagnosis
Ladas Mar 9, 2026
78c5ca2
fix: agent continues processing on client disconnect
Ladas Mar 9, 2026
be08f6f
fix: parse /shell and bash code blocks as tool calls, clarify prompt
Ladas Mar 9, 2026
4ea981b
revert: remove slash-command parser hack
Ladas Mar 9, 2026
d015770
fix: force tool calling with tool_choice=any
Ladas Mar 9, 2026
952fef9
feat: increase default budget — 40 iterations, 10 tools/step, 1M tokens
Ladas Mar 9, 2026
1ddf88b
feat: budget 100 iterations, hitl at 50
Ladas Mar 9, 2026
eae7ed6
feat: reflector stall detection — force done after 3 no-progress iter…
Ladas Mar 9, 2026
2b8fbe7
feat: planner gets tool call history on replan
Ladas Mar 9, 2026
2d58c86
fix: replan decision should go back to planner, not reporter
Ladas Mar 9, 2026
b8992b2
fix: improve stall detection, executor reliability, configurable budget
Ladas Mar 9, 2026
a08cf37
fix: escape curly braces in executor prompt to prevent format() error
Ladas Mar 9, 2026
622ab48
fix: use _safe_format for prompt templates to prevent agent crashes
Ladas Mar 9, 2026
40bee51
feat: add SERIALIZE and A2A_EMIT pipeline logging
Ladas Mar 9, 2026
2cc4031
feat: shield graph execution from client disconnect cancellation
Ladas Mar 9, 2026
4926c33
fix: include original plan with step status in replan context
Ladas Mar 10, 2026
558d98f
fix: reset stall detection after replan boundary
Ladas Mar 10, 2026
e7b344d
fix: reflector no longer forces done based on step count
Ladas Mar 10, 2026
891c8c3
fix: planner prompt defaults to proper multi-step planning
Ladas Mar 10, 2026
fa80b53
fix: filter dedup sentinel from reporter to prevent final answer leak
Ladas Mar 10, 2026
5454548
feat: router entry node + structured plan persistence across turns
Ladas Mar 10, 2026
8a86bb7
fix: reflector sees actual tool error instead of dedup sentinel
Ladas Mar 10, 2026
b512098
fix: allow export/curl/wget, enable outbound, fix HITL interrupt prop…
Ladas Mar 10, 2026
1be3345
fix: auto-approve all shell commands, remove web_fetch domain check
Ladas Mar 10, 2026
1be0259
fix: handle __interrupt__ graph events (HITL) without crashing
Ladas Mar 10, 2026
0045be7
fix: shell(*:*) wildcard prefix now matches all commands
Ladas Mar 10, 2026
6575673
fix: planner prompt remove broken export GH_TOKEN, reporter shows fai…
Ladas Mar 10, 2026
27b96d9
fix: break replan loop + add prompt visibility to events
Ladas Mar 10, 2026
a744e02
feat: prompt visibility + no-tool executor stall breaker
Ladas Mar 10, 2026
51b5d51
fix: replan loop — max replan limit, state tracking, reflector context
Ladas Mar 10, 2026
c8bb72e
feat: micro-reflection executor — one tool call at a time
Ladas Mar 10, 2026
eeac280
fix: skip lost+found in workspace cleanup (EBS ext4 metadata)
Ladas Mar 10, 2026
9b467bc
fix: don't stall-fail executor after tool errors with micro-reflection
Ladas Mar 10, 2026
134f072
fix: remove force-done overrides — let budget handle termination
Ladas Mar 10, 2026
c5e2543
fix: scope dedup to current plan iteration only
Ladas Mar 10, 2026
6ee5afd
fix: route reflector continue→executor, replan→planner
Ladas Mar 10, 2026
1d0af4a
fix: rename continue→execute in reflector routing
Ladas Mar 10, 2026
aad7ca1
docs: add mermaid graph diagram to agent code
Ladas Mar 10, 2026
39a62b8
fix: add LLM timeout (120s) and retry (3x) to ChatOpenAI
Ladas Mar 10, 2026
2e14a4d
feat: configurable LLM timeout and retries via budget
Ladas Mar 10, 2026
6e5d0dd
fix: persist background graph events after SSE consumer cancellation
Ladas Mar 10, 2026
2f2418b
feat(agent): add micro_reasoning events and full prompt data
Ladas Mar 11, 2026
1f10955
fix(agent): populate empty micro-reasoning with tool call summary
Ladas Mar 11, 2026
4d53186
fix(agent): preserve backend metadata during A2A task save
Ladas Mar 11, 2026
d0a55a8
fix(agent): add _system_prompt, _prompt_messages, model to SandboxState
Ladas Mar 11, 2026
c5164a7
feat(agent): always emit micro_reasoning, add call_id and status to t…
Ladas Mar 11, 2026
6bf25a1
feat(agent): increase prompt truncation to 50KB for full visibility
Ladas Mar 11, 2026
60712bf
fix(agent): unique step index per node invocation
Ladas Mar 11, 2026
5990d16
feat(agent): wire budget.add_tokens() in all reasoning nodes
Ladas Mar 11, 2026
4c0b2b9
feat(agent): budget_update events + general exceeded check in reflector
Ladas Mar 11, 2026
d59c328
feat(agent): add plan_step and iteration to executor events
Ladas Mar 11, 2026
7199dc5
fix(agent): truncate tool output, window executor messages, reflector…
Ladas Mar 11, 2026
913a9c5
fix(agent): reflector sees complete tool call pairs (args + result)
Ladas Mar 11, 2026
b1c57b4
fix(agent): token-based executor windowing and subagent tool filtering
Ladas Mar 11, 2026
a6649fd
fix(agent): prompt preview includes tool call arguments
Ladas Mar 11, 2026
1825d51
fix(agent): bump default max_iterations to 200
Ladas Mar 11, 2026
ca51925
fix(agent): revert max_iterations to 100, keep recursion_limit at 2000
Ladas Mar 11, 2026
a625887
fix(agent): reflector sees remaining steps, prevents premature "done"
Ladas Mar 11, 2026
b028da6
fix(agent): override reflector "done" when plan steps remain
Ladas Mar 11, 2026
2bff904
fix(agent): executor passes current_step in return dict for serializer
Ladas Mar 11, 2026
7124a25
fix(agent): enforce step boundary — executor must not jump to next step
Ladas Mar 11, 2026
7855485
feat(agent): add step_selector node between planner and executor
Ladas Mar 11, 2026
ac1e1f1
feat(agent): step_selector uses LLM to write focused executor brief
Ladas Mar 11, 2026
859f6cd
fix(agent): set recursion_limit default to 300
Ladas Mar 11, 2026
5a3d0b4
fix(agent): restore tool_choice=any — Llama 4 Scout fabricates output…
Ladas Mar 11, 2026
193f77d
feat(agent): configurable tool_choice via SANDBOX_FORCE_TOOL_CHOICE e…
Ladas Mar 11, 2026
d945fd1
feat(agent): text tool parsing controlled by SANDBOX_TEXT_TOOL_PARSIN…
Ladas Mar 11, 2026
5667ea9
fix(agent): reflector assessment echo and executor step propagation
Ladas Mar 11, 2026
09c84be
feat(agent): debug prompts controlled by SANDBOX_DEBUG_PROMPTS env var
Ladas Mar 11, 2026
7fcd9cd
fix(agent): move _DEBUG_PROMPTS after os import (NameError crash)
Ladas Mar 11, 2026
0f73f06
feat(agent): emit step_selector events for UI visibility
Ladas Mar 11, 2026
55b6fb0
fix(agent): add prompt context to early-termination events + gh CLI h…
Ladas Mar 11, 2026
0e11913
fix(agent): always run LLM in reporter — no single-step shortcut
Ladas Mar 11, 2026
1047703
fix(agent): add _budget_summary to SandboxState for budget_update events
Ladas Mar 11, 2026
7e64695
fix(agent): don't stall-detect when executor hits tool call limit
Ladas Mar 11, 2026
834937a
feat(agent): enforce token budget via LiteLLM as single source of truth
Ladas Mar 11, 2026
0d456f5
fix(agent): remove stall detector — let reflector LLM decide
Ladas Mar 12, 2026
5e1ff07
feat(agent): use LLM Budget Proxy for token budget enforcement
Ladas Mar 12, 2026
deee92c
fix: add jq to sandbox agent base image
Ladas Mar 12, 2026
65c7e57
fix(agent): reporter produces real summary on step limit instead of g…
Ladas Mar 12, 2026
31e30b5
fix(agent): remove token budget from local exceeded check
Ladas Mar 12, 2026
3cd7a9d
feat(agent): include bound tool schemas in debug prompt events
Ladas Mar 13, 2026
3914fb1
feat(agent): show full LLM response in debug mode (OpenAI format)
Ladas Mar 13, 2026
3543d2d
feat(agent): add debug prompts to step_selector + import _DEBUG_PROMPTS
Ladas Mar 13, 2026
dcfb643
feat(agent): per-node tool subsets — planner gets read+write, reflect…
Ladas Mar 13, 2026
5ed3aed
feat(agent): tool_choice=auto everywhere, per-node tool loops
Ladas Mar 13, 2026
8792fe6
fix(agent): executor must use tool_choice=any, not auto
Ladas Mar 13, 2026
e869220
fix(agent): tight context window when starting new step
Ladas Mar 13, 2026
b9cefa2
feat(agent): respond_to_user escape tool + STDERR false positive fix
Ladas Mar 13, 2026
d80b6b4
fix(agent): step counter from plan state + structured OTel logging
Ladas Mar 13, 2026
01a49f1
fix(agent): micro_reasoning includes previous tool result + gh CLI flags
Ladas Mar 13, 2026
b04d25e
fix(agent): restore event_index counter, remove dedup sentinel, gh ca…
Ladas Mar 13, 2026
76279f3
fix(agent): relative paths in prompts, disable delegate tool
Ladas Mar 13, 2026
85e4770
fix(agent): remove delegate tool from prompts
Ladas Mar 13, 2026
86558a4
feat(agent): step-scoped executor context + retry + replan from failure
Ladas Mar 13, 2026
054ac70
fix(agent): revert aggressive message isolation, keep 5K/30K window
Ladas Mar 13, 2026
61bc446
feat(agent): step-scoped executor context + error logging
Ladas Mar 13, 2026
f7e2e96
fix(agent): fix used_chars scoping error in executor context
Ladas Mar 13, 2026
f014597
feat(agent): step boundary marker for executor context isolation
Ladas Mar 13, 2026
1de7430
fix(agent): inject step + event_index into ALL event types
Ladas Mar 13, 2026
6349b5c
fix(agent): workspace path in prompt, SystemMessage boundary, tool ca…
Ladas Mar 13, 2026
9f9b259
feat(agent): extract context builders for node isolation
Ladas Mar 13, 2026
f84f3b2
fix(agent): unique event_index per event, exit-code-based tool status
Ladas Mar 13, 2026
a384a96
feat(agent): invoke_llm wrapper guarantees debug output matches LLM i…
Ladas Mar 13, 2026
30afa6c
feat(agent): universal workspace preamble injected via invoke_llm
Ladas Mar 13, 2026
e7f9f77
fix(agent): unique event_index, correct step tracking, micro_step reset
Ladas Mar 13, 2026
054e83b
fix(agent): planner uses invoke_llm for workspace preamble injection
Ladas Mar 13, 2026
21c6d6d
feat(agent): node_visit indexing model + workspace_path in state
Ladas Mar 13, 2026
0cc396d
fix(agent): remove dedup, fix tool loop + 8 new TDD tests
Ladas Mar 13, 2026
ec56ca3
fix(agent): add reflection prompt after each tool result
Ladas Mar 13, 2026
0c4e3b1
fix(agent): inject reflection HumanMessage after EACH tool result
Ladas Mar 13, 2026
8d866d5
fix(agent): explicit invalid gh flags + --help hint on unknown flag
Ladas Mar 13, 2026
cae8818
fix(agent): generic debugging guidelines, remove gh-specific from pro…
Ladas Mar 13, 2026
b582f31
feat(agent): replanner_output event type for replan visibility
Ladas Mar 13, 2026
3349693
fix(agent): executor tool loop shares same node_visit
Ladas Mar 14, 2026
2588933
feat(agent): switch executor to implicit auto tool_choice
Ladas Mar 14, 2026
8fb6f0f
test(agent): try explicit tool_choice="auto" for executor
Ladas Mar 14, 2026
a5cc813
feat(agent): capture bound tools in invoke_llm debug output
Ladas Mar 14, 2026
67043a5
revert(agent): back to tool_choice="any" — auto doesn't work on vLLM
Ladas Mar 14, 2026
7600399
feat(agent): read SANDBOX_FORCE_TOOL_CHOICE env var for tool_choice
Ladas Mar 14, 2026
29f2d2f
feat(agent): two-phase executor for force tool choice mode
Ladas Mar 14, 2026
580184c
fix(agent): Phase 1 uses bare LLM (no tools) for text reasoning
Ladas Mar 14, 2026
9b54b97
refactor(agent): remove all legacy event types
Ladas Mar 14, 2026
03dfa37
fix(agent): preserve actual LLM response on no-tool-count failure
Ladas Mar 14, 2026
07eb813
feat(agent): thinking iterations loop with configurable budget
Ladas Mar 14, 2026
6bd5863
fix(agent): graph.py imports removed _format_llm_response
Ladas Mar 14, 2026
e339558
debug: add _sub_events logging to serializer
Ladas Mar 14, 2026
6cbe33e
fix(agent): add _sub_events to SandboxState for thinking events
Ladas Mar 14, 2026
dcab9d8
fix(agent): step_done exit tool + thinking context fixes
Ladas Mar 14, 2026
95b07e2
fix(agent): concise thinking prompts + smart parallel tool instructions
Ladas Mar 14, 2026
19abd66
feat(agent): PlanStore — append-only nested plan container
Ladas Mar 14, 2026
2217107
feat(agent): wire PlanStore into reasoning nodes
Ladas Mar 14, 2026
3572db0
fix(agent): fix ps variable shadowing in event_serializer step_selector
Ladas Mar 14, 2026
93baa84
feat(agent): reflector history, reporter tools, prompt visibility, th…
Ladas Mar 14, 2026
b235308
feat(agent): invoke_with_tool_loop executes full multi-cycle loop int…
Ladas Mar 14, 2026
ff89c70
fix(agent): executor uses full multi-cycle tool loop internally
Ladas Mar 14, 2026
79d92d1
fix(agent): emit tool_call and tool_result sub_events for UI visibility
Ladas Mar 14, 2026
0c840fa
fix(agent): revert executor to graph-driven tool loop for SSE streaming
Ladas Mar 14, 2026
56b1975
fix(agent): route to reporter when all steps done, fix prompt echo
Ladas Mar 14, 2026
c933b6b
fix(agent): raise limits, fix reporter prompt echo
Ladas Mar 14, 2026
e21ecfa
fix(agent): always inject workspace_path in invoke_with_tool_loop
Ladas Mar 15, 2026
0b72ff7
feat(agent): per-node LLM model overrides via env vars
Ladas Mar 15, 2026
a1be4f0
feat(agent): AgentGraphCard, OTel observability, langgraph_node events
Ladas Mar 15, 2026
ff127dd
fix(agent): pass required args to build_graph for graph card introspe…
Ladas Mar 15, 2026
eb6975c
fix(agent): provide settings dict to PermissionChecker for graph card
Ladas Mar 15, 2026
ce62b0a
fix(agent): remove redundant _current_node, fix O(n^2) byte concat
Ladas Mar 15, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions a2a/sandbox_agent/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
FROM python:3.12-slim-bookworm
ARG RELEASE_VERSION="main"

# Install system tools for sandboxed execution
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
curl \
jq \
&& rm -rf /var/lib/apt/lists/* \
# Install GitHub CLI
&& curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
-o /usr/share/keyrings/githubcli-archive-keyring.gpg \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \
> /etc/apt/sources.list.d/github-cli.list \
&& apt-get update && apt-get install -y --no-install-recommends gh \
&& rm -rf /var/lib/apt/lists/*

# Install uv
RUN pip install --no-cache-dir uv

WORKDIR /app
ARG CACHE_BUST
COPY . .
RUN uv sync --no-cache --locked --link-mode copy

ENV PRODUCTION_MODE=True \
RELEASE_VERSION=${RELEASE_VERSION} \
GH_CACHE_DIR=/workspace/.gh-cache \
XDG_CACHE_HOME=/workspace/.cache

# Create workspace and set permissions.
# Use chmod g+w so OCP arbitrary UIDs (same group) can write to /app.
RUN mkdir -p /workspace /workspace/.gh-cache /workspace/.cache \
&& chown -R 1001:0 /app /workspace && chmod -R g+w /app /workspace
USER 1001

CMD ["uv", "run", "--no-sync", "server"]
1 change: 1 addition & 0 deletions a2a/sandbox_agent/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Sandbox Agent
38 changes: 38 additions & 0 deletions a2a/sandbox_agent/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
[project]
name = "sandbox-agent"
version = "0.0.1"
description = "LangGraph agent with sandboxed shell execution and per-context workspace isolation."
authors = []
readme = "README.md"
license = { text = "Apache" }
requires-python = ">=3.11"
dependencies = [
"a2a-sdk[http-server,postgresql]>=0.2.16",
"langgraph>=0.2.55",
"langchain-community>=0.3.9",
"langchain-openai>=0.3.7",
"langgraph-checkpoint-postgres>=2.0.0",
"asyncpg>=0.30.0",
"psycopg[binary]>=3.1.0",
"pydantic-settings>=2.8.1",
"opentelemetry-exporter-otlp",
"opentelemetry-instrumentation-starlette",
"openinference-instrumentation-langchain>=0.1.27",
"opentelemetry-instrumentation-openai>=0.34b0",
"httpx>=0.27.0",
"uvicorn>=0.40.0",
"starlette>=0.52.1",
]

[project.scripts]
server = "sandbox_agent.agent:run"

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[dependency-groups]
dev = [
"pytest>=9.0.2",
"pytest-asyncio>=1.3.0",
]
20 changes: 20 additions & 0 deletions a2a/sandbox_agent/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"_comment": "Agent sandbox operation settings. Operations not in allow or deny go through HITL.",
"context_workspace": "/workspace/${CONTEXT_ID}",
"permissions": {
"allow": [
"shell(*:*)",
"network(outbound:*)",
"file(read:${WORKSPACE}/**)", "file(write:${WORKSPACE}/**)",
"file(delete:${WORKSPACE}/**)"
],
"deny": [
"shell(rm -rf /:*)", "shell(rm -rf /*:*)", "shell(sudo:*)",
"shell(chmod 777:*)",
"shell(nc:*)", "shell(ncat:*)",
"file(read:/etc/shadow:*)", "file(write:/etc/**:*)",
"file(read:/proc/**:*)", "shell(mount:*)", "shell(umount:*)",
"shell(chroot:*)", "shell(nsenter:*)"
]
}
}
32 changes: 32 additions & 0 deletions a2a/sandbox_agent/sources.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"_comment": "Declares what this agent can access and install. Baked into agent image.",
"agent_type": "python-data-agent",
"package_managers": {
"pip": {
"enabled": true,
"registries": [
{"name": "pypi", "url": "https://pypi.org/simple/", "trusted": true}
],
"max_install_size_mb": 500,
"blocked_packages": ["subprocess32", "pyautogui"]
},
"conda": {"enabled": false},
"npm": {"enabled": false}
},
"web_access": {
"enabled": true,
"allowed_domains": ["github.com", "api.github.com", "raw.githubusercontent.com", "pypi.org", "huggingface.co", "docs.python.org"],
"blocked_domains": ["*.internal", "metadata.google.internal"]
},
"git": {
"enabled": true,
"allowed_remotes": ["https://github.com/*", "https://gitlab.com/*"],
"max_clone_size_mb": 1000
},
"runtime": {
"languages": ["python3.11", "bash"],
"interpreters": {"python": "/usr/bin/python3", "bash": "/bin/bash"},
"max_execution_time_seconds": 300,
"max_memory_mb": 2048
}
}
Empty file.
Loading