Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions src/trio_core/api/routers/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,25 @@ class CropDescribeRequest(BaseModel):
"to remote backends; ignored (with a warning) by local backends."
),
)
max_tokens: int | None = Field(
default=None,
ge=1,
le=16384,
description=(
"Maximum tokens to generate. None falls back to engine default "
"(512), which is too small for the structured-JSON scene schema "
"and silently truncates responses mid-output — observed as "
"'Expecting , delimiter' parse failures in cortex."
),
)
extra_body: dict | None = Field(
default=None,
description=(
"Backend-specific kwargs forwarded as the OpenAI SDK extra_body "
"(e.g. DashScope's enable_thinking). Honored by RemoteHTTPBackend; "
"ignored by local backends."
),
)


class CropDescribeResponse(BaseModel):
Expand Down Expand Up @@ -680,8 +699,10 @@ async def _crop_describe_inner(req: CropDescribeRequest):
engine.analyze_frame,
frame_chw,
scene_prompt,
max_tokens=req.max_tokens,
response_format=req.response_format,
model=req.model,
extra_body=req.extra_body,
),
)
text = _strip_thinking(result.text or "")
Expand Down
35 changes: 35 additions & 0 deletions tests/test_inference_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,41 @@ async def test_crop_describe_uses_single_composite_vlm_call(monkeypatch):
assert response.entities["people_count"] == 1


@pytest.mark.asyncio
async def test_crop_describe_forwards_max_tokens_and_extra_body(monkeypatch):
"""Regression: req.max_tokens and req.extra_body were silently dropped
before reaching engine.analyze_frame, so structured-JSON prompts hit
the engine default (512) and truncated mid-output. Cortex observed
2131 'Expecting , delimiter' parse failures in /tmp/vlm.log over a
single night because of this. Forwarding both keeps crop-describe
consistent with /describe at the same router."""
engine = MagicMock()
engine._profile = SimpleNamespace(merge_factor=32)
engine.analyze_frame.return_value = VideoResult(
text='{"summary":"x","scene_type":"s","activity_level":"quiet"}',
metrics=InferenceMetrics(latency_ms=10.0),
)
monkeypatch.setattr(inference, "_get_vlm", lambda: engine)

req = inference.CropDescribeRequest(
image_b64=_image_b64(),
crops=[],
max_crops=0,
max_tokens=4096,
extra_body={"enable_thinking": False},
)

await inference._crop_describe_inner(req)

assert engine.analyze_frame.call_count == 1
kwargs = engine.analyze_frame.call_args.kwargs
assert kwargs["max_tokens"] == 4096, (
"max_tokens must reach analyze_frame; default-512 truncation was the "
"root cause of the JSON parse failures."
)
assert kwargs["extra_body"] == {"enable_thinking": False}


@pytest.mark.asyncio
async def test_crop_describe_uses_summary_field_from_scene_schema(monkeypatch):
"""SCENE_SCHEMA output (lowercase `summary`) should populate description.
Expand Down
Loading