From fc90b047ee554ca94eafc0b604fed94446d8e795 Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Mon, 18 May 2026 14:38:26 +0000
Subject: [PATCH 01/26] refactor: drop jsonl_export result echo and server
 rewriter

The inference mixin stamped a `jsonl_export` dict into the executor
result and the server's results router rewrote its `path` / `url` /
`relative_path` on subsequent file uploads. Both are redundant: the
JSONL file lives at `artifacts/{spec.postprocess.jsonl_export.path}`,
the spec already declares that relative path, and the existing
`_artifacts` context plus the generic ref resolvers already turn any
`{path: rel}` into a URL or local path on demand. Removing the echo
also drops the only executor-specific field the server was reading
out of an opaque result payload.

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 src/server/routers/v1/results.py              | 52 -------------------
 src/worker/executors/mixins/inference.py      |  6 ---
 .../test_transformers_chat_inference.py       |  1 -
 3 files changed, 59 deletions(-)

diff --git a/src/server/routers/v1/results.py b/src/server/routers/v1/results.py
index 9edceb13..00086507 100644
--- a/src/server/routers/v1/results.py
+++ b/src/server/routers/v1/results.py
@@ -25,7 +25,6 @@
     result_file_path,
     write_result,
 )
-from shared.utils.atomic import atomic_write_text
 from shared.utils.manifest import ARTIFACTS_DIR, LOGS_DIR, RESULTS_NAME, sync_manifest
 
 from ...app_state import (
@@ -186,8 +185,6 @@ async def upload_result_file(
             detail=f"Failed to store artifact: {exc}",
         ) from exc
 
-    _rewrite_jsonl_export_paths(task_id, base_dir, target_path)
-
     record = runtime.get_record(task_id)
     expected_artifacts: list[str] = []
     if record:
@@ -333,55 +330,6 @@ async def download_task_logs(
     return FileResponse(target_path)
 
 
-def _rewrite_jsonl_export_paths(
-    task_id: str, base_dir: Path, artifact_path: Path
-) -> None:
-    results_path = base_dir / RESULTS_NAME
-    if not results_path.exists():
-        return
-    try:
-        payload = json.loads(results_path.read_text(encoding="utf-8"))
-    except Exception:
-        return
-
-    filename = artifact_path.name
-    new_abs = str(artifact_path)
-    new_relative = f"{ARTIFACTS_DIR}/{filename}"
-    updated = False
-
-    def _update(entry: dict[str, Any]) -> None:
-        nonlocal updated
-        if not isinstance(entry, dict):
-            return
-        block = entry.get("jsonl_export")
-        if isinstance(block, dict):
-            path_value = str(block.get("path") or "")
-            if path_value and Path(path_value).name == filename:
-                if "worker_path" not in block and path_value != new_abs:
-                    block["worker_path"] = path_value
-                block["path"] = new_abs
-                block["relative_path"] = new_relative
-                block.setdefault("url", f"/api/v1/results/{task_id}/files/{filename}")
-                updated = True
-        children = entry.get("children")
-        if isinstance(children, dict):
-            for child_entry in children.values():
-                if isinstance(child_entry, dict):
-                    _update(child_entry)
-
-    result_entry = payload.get("result") if isinstance(payload, dict) else None
-    if isinstance(result_entry, dict):
-        _update(result_entry)
-
-    if updated:
-        try:
-            atomic_write_text(
-                results_path, json.dumps(payload, ensure_ascii=False, indent=2)
-            )
-        except Exception:
-            pass
-
-
 def _resolve_bundle_sections(include: list[str]) -> tuple[str, ...]:
     if not include:
         return _BUNDLE_SECTIONS_DEFAULT
diff --git a/src/worker/executors/mixins/inference.py b/src/worker/executors/mixins/inference.py
index fb7ae7a2..b2c9d7e3 100644
--- a/src/worker/executors/mixins/inference.py
+++ b/src/worker/executors/mixins/inference.py
@@ -15,7 +15,6 @@
 from shared.utils.json import to_json_serializable
 
 from ..base_executor import ExecutionError
-from ..utils.checkpoints import artifact_ref
 from .data import DataMixin, InferenceEntry, PromptInput
 
 logger = logging.getLogger(__name__)
@@ -292,11 +291,6 @@ def _maybe_export_jsonl(
                 fh.write("\n")
 
         rel_path = target_path.relative_to(artifacts_dir).as_posix()
-        result["jsonl_export"] = {
-            **artifact_ref(rel_path),
-            "record_count": len(records),
-            "fields": list(fields_cfg.keys()),
-        }
         logger.info(
             "Task %s exported %d records to artifacts/%s",
             task_id,
diff --git a/tests/worker/test_transformers_chat_inference.py b/tests/worker/test_transformers_chat_inference.py
index 12fa0f4a..ef05f502 100644
--- a/tests/worker/test_transformers_chat_inference.py
+++ b/tests/worker/test_transformers_chat_inference.py
@@ -127,7 +127,6 @@ def test_transformers_executor_supports_chat_prompts_and_jsonl_export(
     assert [item["metadata"]["row_id"] for item in result["items"]] == ["a", "b"]
     assert result["usage"]["num_requests"] == 2
     assert "latency_sec" in result["usage"]
-    assert result["jsonl_export"]["record_count"] == 2
 
     exported = (tmp_path / "artifacts" / "rows.jsonl").read_text(encoding="utf-8")
     assert '"row_id": "a"' in exported

From 69edc346c8767423d8cb0aa266904d6f29b5d1d7 Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Mon, 18 May 2026 14:40:17 +0000
Subject: [PATCH 02/26] feat(shared): add typed executor result and artifact
 schemas
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce ``BaseExecutorResult`` as the shared shape every executor's
result conforms to, plus ``ArtifactRef`` and ``ArtifactContext`` for the
existing relative-path resolution model. ``extra="allow"`` on the base
class lets the server deserialize executor-specific subclass payloads
without losing fields, so the wire format stays decoupled from the
executor registry. No callers yet — wiring lands in follow-up commits.

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 src/shared/schemas/artifact.py        | 12 +++++++++++
 src/shared/schemas/executor_result.py | 30 +++++++++++++++++++++++++++
 2 files changed, 42 insertions(+)
 create mode 100644 src/shared/schemas/artifact.py
 create mode 100644 src/shared/schemas/executor_result.py

diff --git a/src/shared/schemas/artifact.py b/src/shared/schemas/artifact.py
new file mode 100644
index 00000000..d9a232cc
--- /dev/null
+++ b/src/shared/schemas/artifact.py
@@ -0,0 +1,12 @@
+from pydantic import BaseModel, Field
+
+
+class ArtifactRef(BaseModel):
+    path: str = Field(description="Path relative to the task's artifacts/ dir.")
+
+
+class ArtifactContext(BaseModel):
+    base_dir: str = Field(description="Producing task's output directory.")
+    base_url: str | None = Field(
+        default=None, description="HTTP origin (scheme://host[:port]) for upload."
+    )
diff --git a/src/shared/schemas/executor_result.py b/src/shared/schemas/executor_result.py
new file mode 100644
index 00000000..5ee39721
--- /dev/null
+++ b/src/shared/schemas/executor_result.py
@@ -0,0 +1,30 @@
+from pydantic import BaseModel, ConfigDict, Field
+
+from .artifact import ArtifactContext
+
+
+class BaseExecutorResult(BaseModel):
+    """Common shape for every executor's result payload.
+
+    ``extra="allow"`` lets the server round-trip subclass payloads through
+    this base class without losing executor-specific fields.
+    """
+
+    model_config = ConfigDict(
+        extra="allow",
+        populate_by_name=True,
+        serialize_by_alias=True,
+    )
+
+    children: dict[str, "BaseExecutorResult"] = Field(
+        default_factory=dict,
+        description="Per-child result payloads for task merging.",
+    )
+    artifacts: ArtifactContext | None = Field(
+        default=None,
+        alias="_artifacts",
+        description="Resolution context for relative artifact refs.",
+    )
+
+
+BaseExecutorResult.model_rebuild()

From def4085e193b0f606616721d41f77bde96b2ccab Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Mon, 18 May 2026 14:46:18 +0000
Subject: [PATCH 03/26] refactor(shared): type ResultEnvelope.result and
 artifact helpers

``ResultEnvelope.result`` is now ``BaseExecutorResult`` instead of
``dict[str, Any]``. ``artifact_ref`` returns ``ArtifactRef`` and
``build_artifact_context`` returns ``ArtifactContext``.
``write_executor_result`` accepts either form and validates dict input
into the base model before stamping the artifact context. The wire
format is preserved by serializing under the ``_artifacts`` alias.
Executors continue to return ``dict[str, Any]`` and are migrated to the
typed subclasses in follow-up commits; the server flattens the typed
``result`` back to a dict at ``_load_stage_result`` so the existing
ref-resolution path is unaffected.

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 src/server/dispatcher/base.py             |  2 +-
 src/shared/schemas/result.py              |  8 ++++++--
 src/worker/executors/utils/checkpoints.py | 24 +++++++++++++----------
 tests/worker/test_checkpoint_utils.py     | 15 +++++++++-----
 4 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/src/server/dispatcher/base.py b/src/server/dispatcher/base.py
index d3c15bc3..2f7c405b 100644
--- a/src/server/dispatcher/base.py
+++ b/src/server/dispatcher/base.py
@@ -922,7 +922,7 @@ def _load_stage_result(self, stage_task_id: str) -> dict[str, Any]:
                 f"Result for task {stage_task_id} not found at {path}"
             )
         content = json.loads(path.read_text(encoding="utf-8"))
-        return ResultEnvelope.model_validate(content).result
+        return ResultEnvelope.model_validate(content).result.model_dump()
 
     def _dig_path(self, data: Any, parts: list[str]) -> Any:
         current = data
diff --git a/src/shared/schemas/result.py b/src/shared/schemas/result.py
index 0eea3e5c..953e7fa7 100644
--- a/src/shared/schemas/result.py
+++ b/src/shared/schemas/result.py
@@ -9,10 +9,12 @@
 from shared.utils.manifest import prepare_output_dir
 from shared.utils.time import now_iso
 
+from .executor_result import BaseExecutorResult
+
 
 class ResultEnvelope(BaseModel):
     task_id: str = Field(description="Task identifier.")
-    result: dict[str, Any] = Field(description="Result payload data.")
+    result: BaseExecutorResult = Field(description="Result payload data.")
     worker_id: str | None = Field(
         default=None, description="Worker identifier submitting the result."
     )
@@ -40,7 +42,9 @@ def write_result(base_dir: Path, envelope: ResultEnvelope) -> Path:
     return path
 
 
-def write_result_in_envelope(path: Path, task_id: str, result: dict[str, Any]) -> None:
+def write_result_in_envelope(
+    path: Path, task_id: str, result: BaseExecutorResult | dict[str, Any]
+) -> None:
     """Wrap ``result`` in a ``ResultEnvelope`` and persist it at ``path``."""
     path.parent.mkdir(parents=True, exist_ok=True)
     envelope = ResultEnvelope(task_id=task_id, result=result)
diff --git a/src/worker/executors/utils/checkpoints.py b/src/worker/executors/utils/checkpoints.py
index 60ba6053..c5c6e59f 100644
--- a/src/worker/executors/utils/checkpoints.py
+++ b/src/worker/executors/utils/checkpoints.py
@@ -11,6 +11,8 @@
 
 import requests
 
+from shared.schemas.artifact import ArtifactContext, ArtifactRef
+from shared.schemas.executor_result import BaseExecutorResult
 from shared.schemas.result import write_result_in_envelope
 from shared.tasks.specs import TaskSpecStrictBase
 from shared.utils.http import add_auth_headers
@@ -394,15 +396,15 @@ def is_cleanup_enabled() -> bool:
     return normalized not in {"0", "false", "no", "off"}
 
 
-def artifact_ref(rel_path: str) -> dict[str, str]:
-    """Build an artifact reference dict. `rel_path` is the path relative to
-    `out_dir/artifacts/`"""
-    return {"path": rel_path}
+def artifact_ref(rel_path: str) -> ArtifactRef:
+    """Build an artifact reference. ``rel_path`` is relative to
+    ``out_dir/artifacts/``."""
+    return ArtifactRef(path=rel_path)
 
 
-def build_artifact_context(spec: TaskSpecStrictBase, out_dir: Path) -> dict[str, Any]:
-    """Top-level `_artifacts` context: {base_dir, base_url}. base_url is the
-    destination origin (scheme://host[:port]) for HTTP, else None."""
+def build_artifact_context(spec: TaskSpecStrictBase, out_dir: Path) -> ArtifactContext:
+    """Top-level ``_artifacts`` context. ``base_url`` is the destination
+    origin (scheme://host[:port]) for HTTP, else ``None``."""
     base_dir = Path(out_dir).resolve().as_posix()
     base_url: str | None = None
     destination = get_http_destination(spec)
@@ -410,17 +412,19 @@ def build_artifact_context(spec: TaskSpecStrictBase, out_dir: Path) -> dict[str,
         parsed = urlparse(destination.url)
         if parsed.scheme and parsed.netloc:
             base_url = f"{parsed.scheme}://{parsed.netloc}"
-    return {"base_dir": base_dir, "base_url": base_url}
+    return ArtifactContext(base_dir=base_dir, base_url=base_url)
 
 
 def write_executor_result(
     path: Path,
     task_id: str,
     spec: TaskSpecStrictBase,
-    result: dict[str, Any],
+    result: BaseExecutorResult | dict[str, Any],
 ) -> None:
     """Stamp ``_artifacts`` onto ``result`` and persist the envelope."""
-    result["_artifacts"] = build_artifact_context(spec, path.parent)
+    if isinstance(result, dict):
+        result = BaseExecutorResult.model_validate(result)
+    result.artifacts = build_artifact_context(spec, path.parent)
     write_result_in_envelope(path, task_id, result)
 
 
diff --git a/tests/worker/test_checkpoint_utils.py b/tests/worker/test_checkpoint_utils.py
index 27816fbe..171a0c1a 100644
--- a/tests/worker/test_checkpoint_utils.py
+++ b/tests/worker/test_checkpoint_utils.py
@@ -6,6 +6,7 @@
 
 import pytest
 
+from shared.schemas.artifact import ArtifactContext, ArtifactRef
 from worker.executors.base_executor import TaskReference
 from worker.executors.utils import checkpoints
 
@@ -31,7 +32,9 @@ def _task(
 
 class TestArtifactRef:
     def test_returns_path_only(self) -> None:
-        assert checkpoints.artifact_ref("images/foo.png") == {"path": "images/foo.png"}
+        assert checkpoints.artifact_ref("images/foo.png") == ArtifactRef(
+            path="images/foo.png"
+        )
 
 
 class TestBuildArtifactContext:
@@ -39,15 +42,17 @@ def test_http_destination_strips_api_suffix(self, tmp_path: Path) -> None:
         out_dir = tmp_path / "task-1"
         out_dir.mkdir()
         ctx = checkpoints.build_artifact_context(_task().spec, out_dir)
-        assert ctx["base_dir"] == out_dir.resolve().as_posix()
-        assert ctx["base_url"] == "http://host:8010"
+        assert ctx == ArtifactContext(
+            base_dir=out_dir.resolve().as_posix(), base_url="http://host:8010"
+        )
 
     def test_local_destination_leaves_base_url_none(self, tmp_path: Path) -> None:
         out_dir = tmp_path / "task-1"
         out_dir.mkdir()
         ctx = checkpoints.build_artifact_context(_task("local").spec, out_dir)
-        assert ctx["base_dir"] == out_dir.resolve().as_posix()
-        assert ctx["base_url"] is None
+        assert ctx == ArtifactContext(
+            base_dir=out_dir.resolve().as_posix(), base_url=None
+        )
 
 
 class TestMaybeUploadArtifacts:

From 33785b6540c2aa2ef400ea3779de44e2db5e6abe Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Mon, 18 May 2026 14:56:49 +0000
Subject: [PATCH 04/26] refactor(worker): type non-inference, non-training
 executor results

Each migrated executor now declares its own ``BaseExecutorResult``
subclass (``EchoResult``, ``APIResult``, ``SSHResult``,
``DataProfilingResult``, ``DataRetrievalResult``) and ``run()`` returns
the typed model in place of an opaque ``dict[str, Any]``. The runner
and the governance mixin accept either form during the transition
(``BaseExecutorResult | dict[str, Any]``); inference and training
executors migrate in follow-up commits. The base ``Executor.run`` return
type is widened correspondingly. ``APIResult.response_json`` carries the
``json`` wire-format alias to avoid shadowing ``BaseModel.json``.

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 src/worker/executors/api_executor.py          | 60 ++++++++++++-------
 src/worker/executors/base_executor.py         |  8 ++-
 .../executors/data_profiling_executor.py      | 32 +++++-----
 .../executors/data_retrieval_executor.py      | 52 +++++++++-------
 src/worker/executors/echo_executor.py         | 21 ++++---
 src/worker/executors/mixins/governance.py     | 10 +++-
 src/worker/executors/ssh_executor.py          | 19 ++++--
 src/worker/runner.py                          | 22 ++++---
 tests/worker/test_agent_connector.py          |  6 +-
 9 files changed, 142 insertions(+), 88 deletions(-)

diff --git a/src/worker/executors/api_executor.py b/src/worker/executors/api_executor.py
index 4129ce0f..45be3269 100644
--- a/src/worker/executors/api_executor.py
+++ b/src/worker/executors/api_executor.py
@@ -5,7 +5,9 @@
 from typing import Any, ClassVar
 
 import httpx
+from pydantic import Field
 
+from shared.schemas.executor_result import BaseExecutorResult
 from shared.tasks.specs import ApiSpecStrict
 
 from .base_executor import ExecutionError, Executor, ExecutorTask
@@ -16,6 +18,19 @@
 _ClientKey = tuple[str, float, bool, bool]
 
 
+class APIResult(BaseExecutorResult):
+    ok: bool
+    executor: str
+    method: str
+    url: str
+    status_code: int
+    truncated: bool = False
+    headers: dict[str, str] | None = None
+    response_json: Any = Field(default=None, alias="json")
+    usage: dict[str, Any] | None = None
+    text: str | None = None
+
+
 class APIExecutor(Executor):
     """Executor that performs a single HTTP request defined by task YAML.
 
@@ -89,7 +104,7 @@ def cleanup_after_run(self) -> None:
         """Close the connection pool when the runner deactivates this executor."""
         self.close_all_clients()
 
-    def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
+    def run(self, task: ExecutorTask, out_dir: Path) -> APIResult:
         spec = self.require_spec(task, ApiSpecStrict)
         api_cfg = spec.api or {}
         if not isinstance(api_cfg, dict):
@@ -173,17 +188,17 @@ def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
             body_bytes = body_bytes[:max_body_bytes]
             truncated = True
 
-        result: dict[str, Any] = {
-            "ok": resp.is_success,
-            "executor": self.name,
-            "method": method,
-            "url": str(resp.url),
-            "status_code": resp.status_code,
-            "truncated": truncated,
-        }
+        result = APIResult(
+            ok=resp.is_success,
+            executor=self.name,
+            method=method,
+            url=str(resp.url),
+            status_code=resp.status_code,
+            truncated=truncated,
+        )
 
         if include_headers:
-            result["headers"] = dict(resp.headers)
+            result.headers = dict(resp.headers)
 
         body_text: str | None = None
         if return_body:
@@ -191,26 +206,25 @@ def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
             body_text = body_bytes.decode(encoding, errors="replace")
 
         if parse_json:
-            result["json"] = resp.json()
-            if not isinstance(result["json"], dict):
+            result.response_json = resp.json()
+            if not isinstance(result.response_json, dict):
                 raise ExecutionError("Response is not a valid JSON mapping")
-            if isinstance(result["json"], dict):
-                usage = result["json"].get("usage")
-                if not isinstance(usage, dict):
-                    raise ExecutionError(
-                        "spec.api.response.parse_json is true but response JSON "
-                        f"does not contain usage info: {result['json']}"
-                    )
-                result["usage"] = usage
+            usage = result.response_json.get("usage")
+            if not isinstance(usage, dict):
+                raise ExecutionError(
+                    "spec.api.response.parse_json is true but response JSON "
+                    f"does not contain usage info: {result.response_json}"
+                )
+            result.usage = usage
             try:
-                result["text"] = result["json"]["choices"][0]["message"]["content"]
+                result.text = result.response_json["choices"][0]["message"]["content"]
             except Exception as exc:
                 raise ExecutionError(
                     "spec.api.response.parse_json is true but response JSON "
-                    f"does not contain message.content: {result['json']}"
+                    f"does not contain message.content: {result.response_json}"
                 ) from exc
         elif return_body:
-            result["text"] = body_text
+            result.text = body_text
 
         if raise_for_status and resp.is_error:
             if body_text:
diff --git a/src/worker/executors/base_executor.py b/src/worker/executors/base_executor.py
index f4f43f90..ebb58ed4 100644
--- a/src/worker/executors/base_executor.py
+++ b/src/worker/executors/base_executor.py
@@ -27,6 +27,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> dict:
 from pathlib import Path
 from typing import Any, TypeVar
 
+from shared.schemas.executor_result import BaseExecutorResult
 from shared.tasks import MergedChildTaskStrict
 from shared.tasks.specs import TaskSpecStrictBase
 from shared.tasks.worker_message import WorkerHardware, WorkerTaskMessage
@@ -84,7 +85,9 @@ def prepare(self) -> None:
         return None
 
     @abstractmethod
-    def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
+    def run(
+        self, task: ExecutorTask, out_dir: Path
+    ) -> BaseExecutorResult | dict[str, Any]:
         """Execute a single task.
 
         Args:
@@ -93,7 +96,8 @@ def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
             if needed.
 
         Returns:
-            A JSON-serializable dictionary summarizing the result.
+            A ``BaseExecutorResult`` subclass instance (preferred) or a
+            JSON-serializable dictionary summarizing the result.
 
         Raises:
             ExecutionError: for expected, user-facing failures.
diff --git a/src/worker/executors/data_profiling_executor.py b/src/worker/executors/data_profiling_executor.py
index bfb7678f..20b94645 100644
--- a/src/worker/executors/data_profiling_executor.py
+++ b/src/worker/executors/data_profiling_executor.py
@@ -8,6 +8,7 @@
 from pathlib import Path
 from typing import Any
 
+from shared.schemas.executor_result import BaseExecutorResult
 from shared.tasks.specs import DataProfilingSpecStrict
 from shared.utils.json import to_json_serializable, validate_keys
 
@@ -19,19 +20,25 @@
 logger = logging.getLogger(__name__)
 
 
+class DataProfilingResult(BaseExecutorResult):
+    ok: bool = True
+    type: str = "sql"
+    template: str | None = None
+    cost_estimates: dict[str, Any] | None = None
+
+
 class DataProfilingExecutor(DataMixin, Executor):
     """Executor that estimates SQL query costs by sampling SQL template params."""
 
     name = "data_profiling"
 
-    def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
+    def run(self, task: ExecutorTask, out_dir: Path) -> DataProfilingResult:
         spec = self.require_spec(task, DataProfilingSpecStrict)
         task_id = task.task_id
         merge_children = task.merged_children or []
 
         result = self._run_single_profile(spec, task_id)
 
-        child_results: dict[str, dict[str, Any]] = {}
         for child in merge_children:
             child_id = child.task_id
             child_spec = child.spec
@@ -39,16 +46,13 @@ def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
                 raise ExecutionError(
                     "Merged child spec must be data_profiling for merged profiling"
                 )
-            child_results[child_id] = self._run_single_profile(child_spec, child_id)
-
-        if child_results:
-            result["children"] = child_results
+            result.children[child_id] = self._run_single_profile(child_spec, child_id)
 
         return result
 
     def _run_single_profile(
         self, spec: DataProfilingSpecStrict, task_id: str
-    ) -> dict[str, Any]:
+    ) -> DataProfilingResult:
         data_cfg = spec.data
         if not isinstance(data_cfg, dict):
             raise ExecutionError(
@@ -100,14 +104,12 @@ def _run_single_profile(
             connection_string, queries, params_rows
         )
 
-        result: dict[str, Any] = {
-            "ok": True,
-            "type": "sql",
-            "template": template_str,
-            "cost_estimates": cost_estimates,
-        }
-
-        return result
+        return DataProfilingResult(
+            ok=True,
+            type="sql",
+            template=template_str,
+            cost_estimates=cost_estimates,
+        )
 
     def _sample_template_queries(
         self,
diff --git a/src/worker/executors/data_retrieval_executor.py b/src/worker/executors/data_retrieval_executor.py
index 0321f147..3775284b 100644
--- a/src/worker/executors/data_retrieval_executor.py
+++ b/src/worker/executors/data_retrieval_executor.py
@@ -10,6 +10,7 @@
 import pandas as pd
 from PIL import Image
 
+from shared.schemas.executor_result import BaseExecutorResult
 from shared.tasks.specs import DataRetrievalSpecStrict
 from shared.utils.json import validate_keys
 
@@ -27,10 +28,18 @@
 logger = logging.getLogger(__name__)
 
 
+class DataRetrievalResult(BaseExecutorResult):
+    ok: bool = True
+    type: str | None = None
+    items: list[dict[str, Any]] = []
+    count: int | None = None
+    metadata: dict[str, Any] | None = None
+
+
 class DataRetrievalExecutor(DataMixin, Executor):
     name = "data_retrieval"
 
-    def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
+    def run(self, task: ExecutorTask, out_dir: Path) -> DataRetrievalResult:
         spec = self.require_spec(task, DataRetrievalSpecStrict)
         task_id = task.task_id
         with self._task_span(
@@ -72,7 +81,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
 
     def _run_sql(
         self, data_cfg: dict[str, Any], context: dict[str, Any]
-    ) -> dict[str, Any]:
+    ) -> DataRetrievalResult:
         """
         Execute SQL queries based on the provided data configuration and context.
 
@@ -144,18 +153,18 @@ def _run_sql(
                     }
                 )
 
-        return {
-            "ok": True,
-            "items": items,
-            "count": len(items),
-        }
+        return DataRetrievalResult(
+            ok=True,
+            items=items,
+            count=len(items),
+        )
 
     def _run_s3(
         self,
         data_cfg: dict[str, Any],
         context: dict[str, Any],
         out_dir: Path,
-    ) -> dict[str, Any]:
+    ) -> DataRetrievalResult:
         validate_keys(
             data_cfg,
             "DataRetrievalExecutor.spec.data",
@@ -207,20 +216,19 @@ def _run_s3(
                     item["params"] = params_rows
                 items.append(item)
 
-        result = {
-            "ok": True,
-            "type": "s3",
-            "items": items,
-            "metadata": s3_result["metadata"],  # type: ignore
-        }
-        return result
+        return DataRetrievalResult(
+            ok=True,
+            type="s3",
+            items=items,
+            metadata=s3_result["metadata"],  # type: ignore
+        )
 
     def _run_agent(
         self,
         data_cfg: dict[str, Any],
         context: dict[str, Any],
         out_dir: Path,
-    ) -> dict[str, Any]:
+    ) -> DataRetrievalResult:
         """Drive lumid.data's data agent for NL-driven retrieval."""
         validate_keys(
             data_cfg,
@@ -314,12 +322,12 @@ def _run_agent(
                     }
                 )
 
-        return {
-            "ok": True,
-            "type": "agent",
-            "items": items,
-            "count": len(items),
-        }
+        return DataRetrievalResult(
+            ok=True,
+            type="agent",
+            items=items,
+            count=len(items),
+        )
 
     def _load_table(self, path: Path, output_format: str) -> pd.DataFrame:
         """Load the materialized retrieval file into a DataFrame."""
diff --git a/src/worker/executors/echo_executor.py b/src/worker/executors/echo_executor.py
index 480938a4..884cdcf9 100644
--- a/src/worker/executors/echo_executor.py
+++ b/src/worker/executors/echo_executor.py
@@ -2,6 +2,7 @@
 from pathlib import Path
 from typing import Any
 
+from shared.schemas.executor_result import BaseExecutorResult
 from shared.tasks.specs import EchoSpecStrict
 
 from .base_executor import ExecutionError, Executor, ExecutorTask
@@ -14,6 +15,12 @@
 type EchoItem = str | dict[str, str]
 
 
+class EchoResult(BaseExecutorResult):
+    ok: bool = True
+    items: list[dict[str, Any]] = []
+    count: int = 0
+
+
 class EchoExecutor(DataMixin, Executor):
     name = "echo"
 
@@ -55,7 +62,7 @@ def _resolve_item(self, item: EchoItem, context: dict[str, Any]) -> Any:
                 "a string literal or a mapping"
             )
 
-    def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
+    def run(self, task: ExecutorTask, out_dir: Path) -> EchoResult:
         spec = self.require_spec(task, EchoSpecStrict)
         task_id = task.task_id.strip()
         with self._task_span(
@@ -81,18 +88,16 @@ def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
                 resolved = self._resolve_item(item, context)
                 self._append_outputs(merged_items, resolved)
 
-            payload: dict[str, Any] = {
-                "ok": True,
-                "items": merged_items,
-                "count": len(merged_items),
-            }
+            result = EchoResult(
+                ok=True, items=merged_items, count=len(merged_items)
+            )
             deps = self._extract_source_data_ids(spec)
             dependencies_by_task = {task_id: deps}
 
             self._dump_to_governance(
                 task_id=task_id,
-                result=payload,
+                result=result,
                 dependencies_by_task=dependencies_by_task,
             )
         maybe_upload_traces(task, out_dir, logger=logger)
-        return payload
+        return result
diff --git a/src/worker/executors/mixins/governance.py b/src/worker/executors/mixins/governance.py
index 30adfcc9..7a7650ec 100644
--- a/src/worker/executors/mixins/governance.py
+++ b/src/worker/executors/mixins/governance.py
@@ -13,6 +13,7 @@
 
 from opentelemetry.trace import Span as OTelSpan
 
+from shared.schemas.executor_result import BaseExecutorResult
 from shared.schemas.governance import (
     READY_SPAN_NAME,
     TASK_SPAN_NAME,
@@ -236,17 +237,20 @@ def _extract_source_data_ids(self, spec: TaskSpecStrictBase) -> list[str]:
     def _dump_to_governance(
         self,
         task_id: str,
-        result: dict[str, Any],
+        result: BaseExecutorResult | dict[str, Any],
         dependencies_by_task: dict[str, list[str]],
     ) -> None:
         """Write parent + merged-child results and emit asset/lineage rows."""
         parent_deps = dependencies_by_task.get(task_id, [])
-        children_payload = result.get("children", {})
+        payload = (
+            result.model_dump() if isinstance(result, BaseExecutorResult) else result
+        )
+        children_payload = payload.get("children", {}) or {}
 
         collection_jobs: list[dict[str, Any]] = [
             {
                 "task_id": task_id,
-                "result": result,
+                "result": payload,
                 "deps": parent_deps,
                 "is_parent": True,
             }
diff --git a/src/worker/executors/ssh_executor.py b/src/worker/executors/ssh_executor.py
index ef6d7958..ac695e36 100644
--- a/src/worker/executors/ssh_executor.py
+++ b/src/worker/executors/ssh_executor.py
@@ -29,6 +29,7 @@
 
 import requests
 
+from shared.schemas.executor_result import BaseExecutorResult
 from shared.tasks.components.resources import GPURequirements
 from shared.tasks.specs.ssh import (
     SSHInputSpec,
@@ -55,6 +56,13 @@
     TaskCancelledError,
 )
 
+
+class SSHResult(BaseExecutorResult):
+    session_id: str
+    exit_code: int
+    command: str | None = None
+    entrypoint: str | None = None
+
 try:
     import docker
     from docker import DockerClient
@@ -425,7 +433,7 @@ def teardown(self) -> None:
     # Main execution
     # ------------------------------------------------------------------ #
 
-    def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
+    def run(self, task: ExecutorTask, out_dir: Path) -> SSHResult:
         spec = self.require_spec(task, SSHSpecStrict)
         cfg = SSHConfig.from_spec(spec, self._config, self._hardware)
         access_mode = cfg.access_mode
@@ -539,16 +547,17 @@ def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
                 cfg.output,
                 mount_plan,
             )
-            result: dict[str, Any] = {"session_id": session_id, "exit_code": exit_code}
+            result = SSHResult(session_id=session_id, exit_code=exit_code)
             if interactive:
-                result.update(session_info)
+                for key, value in session_info.items():
+                    setattr(result, key, value)
             else:
                 # Keep as fallback — captures any output the streaming thread missed.
                 self._save_container_logs(container, out_dir)
                 if cfg.command is not None:
-                    result["command"] = cfg.command
+                    result.command = cfg.command
                 if cfg.entrypoint is not None:
-                    result["entrypoint"] = cfg.entrypoint
+                    result.entrypoint = cfg.entrypoint
             if mount_plan.copy_output_path:
                 self._copy_output_directory(
                     container,
diff --git a/src/worker/runner.py b/src/worker/runner.py
index eef7e292..f009532c 100644
--- a/src/worker/runner.py
+++ b/src/worker/runner.py
@@ -10,6 +10,7 @@
 
 import requests
 
+from shared.schemas.executor_result import BaseExecutorResult
 from shared.tasks import MergedChildTaskStrict
 from shared.tasks.specs import InferenceSpecStrict, TaskSpecStrictBase
 from shared.tasks.worker_message import (
@@ -127,16 +128,17 @@ def _write_results(
         spec: TaskSpecStrictBase,
         merged_children: list[MergedChildTaskStrict],
         out_dir: Path,
-        result: dict[str, Any] | None,
+        result: BaseExecutorResult | dict[str, Any] | None,
     ):
         if result is None:
             return
         self._write_single_result(task_id, spec, out_dir, result)
 
         child_lookup = {entry.task_id: entry for entry in merged_children}
-        children_payload = (
-            result.get("children") if isinstance(result, dict) else {}
-        ) or {}
+        if isinstance(result, BaseExecutorResult):
+            children_payload: dict[str, Any] = dict(result.children)
+        else:
+            children_payload = result.get("children") or {}
         for child_id, child_result in children_payload.items():
             child_info = child_lookup.get(child_id)
             if child_info is None:
@@ -151,7 +153,7 @@ def _write_single_result(
         task_id: str,
         spec: TaskSpecStrictBase,
         out_dir: Path,
-        payload: dict[str, Any] | None,
+        payload: BaseExecutorResult | dict[str, Any] | None,
     ):
         if payload is None:
             return
@@ -179,7 +181,10 @@ def _simulate_bandwidth_delay(self, payload_bytes: int, destination: str) -> Non
         time.sleep(delay)
 
     def _maybe_emit_http(
-        self, task_id: str, spec: TaskSpecStrictBase, result: dict[str, Any]
+        self,
+        task_id: str,
+        spec: TaskSpecStrictBase,
+        result: BaseExecutorResult | dict[str, Any],
     ) -> None:
         """Send task results to an HTTP endpoint when requested by the spec."""
         destination = get_http_destination(spec)
@@ -188,9 +193,12 @@ def _maybe_emit_http(
 
         url = destination.url
         ignore_error = destination.ignore_error
+        result_dict = (
+            result.model_dump() if isinstance(result, BaseExecutorResult) else result
+        )
         payload = {
             "task_id": task_id,
-            "result": result,
+            "result": result_dict,
             "worker_id": self.lifecycle.worker_id,
         }
         payload_size = len(json.dumps(payload, ensure_ascii=False).encode("utf-8"))
diff --git a/tests/worker/test_agent_connector.py b/tests/worker/test_agent_connector.py
index b4fd1eb0..f3b07f32 100644
--- a/tests/worker/test_agent_connector.py
+++ b/tests/worker/test_agent_connector.py
@@ -182,9 +182,9 @@ def test_renders_description_from_params_and_yields_table_item(
             )
             result = executor._run_agent(data_cfg, context, out_dir)
 
-        assert result["ok"] is True
-        assert result["count"] == 1
-        item = result["items"][0]
+        assert result.ok is True
+        assert result.count == 1
+        item = result.items[0]
         assert "fetch NVDA quotes for 2024-01-01" in item["description"]
         assert item["rows"] == 1
         assert item["run_id"] == "run-fake"

From 14b8aaea4455c26b7bbfa5f71ac543d85f5af25d Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Mon, 18 May 2026 15:06:05 +0000
Subject: [PATCH 05/26] refactor(worker): type inference-family executor
 results

VLLM, transformers, diffusers, omni, RAG, agent, and the MP wrapper now
return typed ``BaseExecutorResult`` subclasses (``VLLMResult``,
``TransformersResult``, ``DiffusersResult``, ``OmniResult``,
``RAGResult``, ``AgentResult``). Internals keep building the result as a
dict and validate into the model at the return boundary, preserving the
wire shape. ``MPExecutor`` widens its return signature to pass through
either form from the inner executor. The transformers inference test
switches to attribute access on the typed result.

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 src/worker/executors/agent_executor.py        | 16 +++++++-
 src/worker/executors/diffusers_executor.py    | 12 +++++-
 src/worker/executors/mp_executor.py           |  5 ++-
 src/worker/executors/omni_executor_base.py    | 10 ++++-
 src/worker/executors/rag_executor.py          | 38 +++++++++++--------
 src/worker/executors/transformers_executor.py | 16 +++++++-
 src/worker/executors/vllm_executor.py         | 14 +++++--
 .../test_transformers_chat_inference.py       | 11 +++---
 8 files changed, 90 insertions(+), 32 deletions(-)

diff --git a/src/worker/executors/agent_executor.py b/src/worker/executors/agent_executor.py
index c12d4003..d0262dd7 100644
--- a/src/worker/executors/agent_executor.py
+++ b/src/worker/executors/agent_executor.py
@@ -16,6 +16,8 @@
 
 from datasets import load_dataset
 
+from shared.schemas.artifact import ArtifactRef
+from shared.schemas.executor_result import BaseExecutorResult
 from shared.tasks.specs import AgentSpecStrict
 
 from .base_executor import ExecutionError, Executor, ExecutorTask
@@ -54,6 +56,16 @@ def _resolve_task_timeout(agent: dict[str, Any] | None) -> int:
 logger = logging.getLogger("worker.agent")
 
 
+class AgentResult(BaseExecutorResult):
+    ok: bool
+    model: str
+    items: list[dict[str, Any]] = []
+    usage: dict[str, Any] | None = None
+    metadata: dict[str, Any] | None = None
+    agent_output: ArtifactRef | None = None
+    batch_summary_file: ArtifactRef | None = None
+
+
 class AgentExecutor(Executor):
     """Agent executor using youtu-agent (utu) framework"""
 
@@ -223,7 +235,7 @@ def prepare_data(self, spec: AgentSpecStrict) -> None:
         else:
             raise ExecutionError(f"Unsupported spec.data.type: {dtype!r}")
 
-    def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
+    def run(self, task: ExecutorTask, out_dir: Path) -> AgentResult:
         """Execute agent tasks using youtu-agent (utu) framework"""
         self.ensure_dir(out_dir)
 
@@ -321,7 +333,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
             maybe_upload_artifacts(task, out_dir, logger=logger)
 
             logger.info(f"Agent execution completed: {len(self._tasks)} task(s)")
-            return output
+            return AgentResult.model_validate(output)
 
         except ExecutionError:
             raise
diff --git a/src/worker/executors/diffusers_executor.py b/src/worker/executors/diffusers_executor.py
index f74fb8e4..8855b1ac 100644
--- a/src/worker/executors/diffusers_executor.py
+++ b/src/worker/executors/diffusers_executor.py
@@ -14,6 +14,8 @@
 
 from PIL import Image
 
+from shared.schemas.artifact import ArtifactRef
+from shared.schemas.executor_result import BaseExecutorResult
 from shared.tasks.specs import DiffusionSpecStrict
 
 from ..utils.logging import configure_hf_library_logging
@@ -45,6 +47,12 @@
 logger = logging.getLogger(__name__)
 
 
+class DiffusersResult(BaseExecutorResult):
+    ok: bool = True
+    model: str | None = None
+    images: list[ArtifactRef] = []
+
+
 class DiffusersExecutor(DataMixin, Executor):
     """Executor that runs text-to-image generation via Hugging Face Diffusers."""
 
@@ -240,7 +248,7 @@ def _encode_and_combine_prompts(
 
         return combined_pos, combined_neg, user_pos_pooled, user_neg_pooled
 
-    def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
+    def run(self, task: ExecutorTask, out_dir: Path) -> DiffusersResult:
         configure_hf_library_logging()
         spec = self.require_spec(task, DiffusionSpecStrict)
         task_id = task.task_id.strip()
@@ -250,7 +258,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
             response = self._run_inner(spec, task_id, out_dir)
         maybe_upload_artifacts(task, out_dir, logger=logger)
         maybe_upload_traces(task, out_dir, logger=logger)
-        return response
+        return DiffusersResult.model_validate(response)
 
     def _run_inner(
         self,
diff --git a/src/worker/executors/mp_executor.py b/src/worker/executors/mp_executor.py
index f2379fe2..db92eebc 100644
--- a/src/worker/executors/mp_executor.py
+++ b/src/worker/executors/mp_executor.py
@@ -21,6 +21,7 @@
 
 import psutil
 
+from shared.schemas.executor_result import BaseExecutorResult
 from shared.tasks.worker_message import WorkerHardware
 from worker.config import WorkerConfig
 
@@ -439,7 +440,9 @@ def _loop() -> None:
         self._log_thread = t
         t.start()
 
-    def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
+    def run(
+        self, task: ExecutorTask, out_dir: Path
+    ) -> BaseExecutorResult | dict[str, Any]:
         with self._lock:
             if self._shutdown:
                 logger.info("Starting worker subprocess for %s", self.name)
diff --git a/src/worker/executors/omni_executor_base.py b/src/worker/executors/omni_executor_base.py
index 87e36f93..86021bbd 100644
--- a/src/worker/executors/omni_executor_base.py
+++ b/src/worker/executors/omni_executor_base.py
@@ -19,6 +19,7 @@
 
 import yaml
 
+from shared.schemas.executor_result import BaseExecutorResult
 from shared.tasks.specs import TaskSpecStrictBase
 from shared.utils.parsing import to_bool, to_int
 
@@ -40,6 +41,11 @@
 logger = logging.getLogger(__name__)
 
 
+class OmniResult(BaseExecutorResult):
+    ok: bool = True
+    model: str | None = None
+
+
 class OmniExecutorBase(InferenceMixin, Executor):
     """Shared base for Omni-family executors.
 
@@ -58,7 +64,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         self._omni_spec: tuple[Any, ...] | None = None
         self._stage_configs_tmp: Path | None = None
 
-    def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
+    def run(self, task: ExecutorTask, out_dir: Path) -> OmniResult:
         spec = self.require_spec(task, self._TASK_SPEC_TYPE)
         spec_dict = spec.model_dump(by_alias=True)
         out_dir = Path(out_dir).resolve()
@@ -68,7 +74,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
             result = self._run_inner(task, spec, spec_dict, out_dir)
         maybe_upload_artifacts(task, out_dir, logger=logger)
         maybe_upload_traces(task, out_dir, logger=logger)
-        return result
+        return OmniResult.model_validate(result)
 
     @abstractmethod
     def _run_inner(
diff --git a/src/worker/executors/rag_executor.py b/src/worker/executors/rag_executor.py
index 839041bf..a1634258 100644
--- a/src/worker/executors/rag_executor.py
+++ b/src/worker/executors/rag_executor.py
@@ -16,6 +16,7 @@
 from datasets import load_dataset
 from qdrant_client import QdrantClient, models
 
+from shared.schemas.executor_result import BaseExecutorResult
 from shared.tasks.specs import RagSpecStrict
 
 from .base_executor import ExecutionError, Executor, ExecutorTask
@@ -24,10 +25,20 @@
 logger = logging.getLogger("worker.rag")
 
 
+class RAGResult(BaseExecutorResult):
+    ok: bool = True
+    executor: str
+    qdrant: dict[str, Any]
+    embedding: dict[str, Any]
+    search: dict[str, Any]
+    queries: list[dict[str, Any]] = []
+    usage: dict[str, Any] | None = None
+
+
 class RAGExecutor(Executor):
     name = "rag"
 
-    def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
+    def run(self, task: ExecutorTask, out_dir: Path) -> RAGResult:
         start_ts = time.time()
         spec = self.require_spec(task, RagSpecStrict)
 
@@ -180,22 +191,19 @@ def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
                 }
             )
 
-        # Compose response
-        out: dict[str, Any] = {
-            "ok": True,
-            "executor": self.name,
-            "qdrant": {"collection": collection, "url": url},
-            "embedding": {"model": model_name},
-            "search": {"top_k": top_k},
-            "queries": results_per_query,
-            "usage": {
+        logger.info(
+            "RAG query completed queries=%d total_results=%d", len(queries), total_items
+        )
+        return RAGResult(
+            ok=True,
+            executor=self.name,
+            qdrant={"collection": collection, "url": url},
+            embedding={"model": model_name},
+            search={"top_k": top_k},
+            queries=results_per_query,
+            usage={
                 "latency_sec": round(time.time() - start_ts, 4),
                 "num_queries": len(queries),
                 "total_results": total_items,
             },
-        }
-
-        logger.info(
-            "RAG query completed queries=%d total_results=%d", len(queries), total_items
         )
-        return out
diff --git a/src/worker/executors/transformers_executor.py b/src/worker/executors/transformers_executor.py
index c4a3b586..c8a32e95 100644
--- a/src/worker/executors/transformers_executor.py
+++ b/src/worker/executors/transformers_executor.py
@@ -56,6 +56,8 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
+from shared.schemas.artifact import ArtifactRef
+from shared.schemas.executor_result import BaseExecutorResult
 from shared.schemas.governance import SpanType
 from shared.tasks.specs import (
     EmbeddingSpecStrict,
@@ -115,6 +117,16 @@
 logger = logging.getLogger(__name__)
 
 
+class TransformersResult(BaseExecutorResult):
+    ok: bool = True
+    model: str | None = None
+    items: list[dict[str, Any]] = []
+    usage: dict[str, Any] | None = None
+    count: int | None = None
+    embedding_file: ArtifactRef | None = None
+    image_group_sizes: list[int] | None = None
+
+
 class HFTransformersExecutor(InferenceMixin, Executor):
     """Executor that runs text generation via Hugging Face Transformers."""
 
@@ -384,7 +396,7 @@ def _detect_finish_reason(
             return "length"
         return None
 
-    def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:  # type: ignore[override]
+    def run(self, task: ExecutorTask, out_dir: Path) -> TransformersResult:
         configure_hf_library_logging()
         spec = task.spec
         if not isinstance(spec, (InferenceSpecStrict, EmbeddingSpecStrict)):
@@ -399,7 +411,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:  # type: ign
             result = self._run_inner(spec, task_id, out_dir)
         maybe_upload_artifacts(task, out_dir, logger=logger)
         maybe_upload_traces(task, out_dir, logger=logger)
-        return result
+        return TransformersResult.model_validate(result)
 
     def _run_inner(
         self,
diff --git a/src/worker/executors/vllm_executor.py b/src/worker/executors/vllm_executor.py
index 6ad6abd1..7319ea43 100644
--- a/src/worker/executors/vllm_executor.py
+++ b/src/worker/executors/vllm_executor.py
@@ -67,6 +67,7 @@
         StructuredOutputsParams = None  # type: ignore
 
 from shared.schemas.governance import SpanType
+from shared.schemas.executor_result import BaseExecutorResult
 from shared.tasks.specs import InferenceSpecStrict
 
 from .base_executor import ExecutionError, Executor, ExecutorTask
@@ -81,6 +82,13 @@
 logger = logging.getLogger(__name__)
 
 
+class VLLMResult(BaseExecutorResult):
+    ok: bool = True
+    model: str | None = None
+    items: list[dict[str, Any]] = []
+    usage: dict[str, Any] | None = None
+
+
 class _RawJsonSchema:
     """Tag wrapping a JSON schema so ``_build_sampling_params`` can
     ``isinstance``-dispatch raw schemas vs. named-fields pydantic kwargs."""
@@ -849,7 +857,7 @@ def _postprocess_prompts(self, parsed: InferenceEntry) -> PreparedInferenceEntry
     # --------------------------------------------------------------------- #
     # Execution
     # --------------------------------------------------------------------- #
-    def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:  # type: ignore[override]
+    def run(self, task: ExecutorTask, out_dir: Path) -> VLLMResult:
         spec = self.require_spec(task, InferenceSpecStrict)
         task_id = task.task_id.strip()
         if not task_id:
@@ -868,7 +876,7 @@ def _run_inner(
         task: ExecutorTask,
         spec: InferenceSpecStrict,
         out_dir: Path,
-    ) -> dict[str, Any]:
+    ) -> VLLMResult:
         task_id = task.task_id.strip()
         merge_children = task.merged_children or []
         entries: list[PreparedInferenceEntry] = []
@@ -1196,7 +1204,7 @@ def _collect(
             dependencies_by_task=dependencies_by_task,
         )
 
-        return result
+        return VLLMResult.model_validate(result)
 
     def cleanup_after_run(self) -> None:
         if self._llm:
diff --git a/tests/worker/test_transformers_chat_inference.py b/tests/worker/test_transformers_chat_inference.py
index ef05f502..5f67fa80 100644
--- a/tests/worker/test_transformers_chat_inference.py
+++ b/tests/worker/test_transformers_chat_inference.py
@@ -116,17 +116,18 @@ def test_transformers_executor_supports_chat_prompts_and_jsonl_export(
         result = executor.run(task, tmp_path)
     mock_ensure_model.assert_called_once_with(spec)
 
-    assert [item["output"] for item in result["items"]] == [
+    assert [item["output"] for item in result.items] == [
         "first answer",
         "second answer",
     ]
-    assert [item["prompt"] for item in result["items"]] == [
+    assert [item["prompt"] for item in result.items] == [
         "<chat>hello</chat>",
         "<chat>world</chat>",
     ]
-    assert [item["metadata"]["row_id"] for item in result["items"]] == ["a", "b"]
-    assert result["usage"]["num_requests"] == 2
-    assert "latency_sec" in result["usage"]
+    assert [item["metadata"]["row_id"] for item in result.items] == ["a", "b"]
+    assert result.usage is not None
+    assert result.usage["num_requests"] == 2
+    assert "latency_sec" in result.usage
 
     exported = (tmp_path / "artifacts" / "rows.jsonl").read_text(encoding="utf-8")
     assert '"row_id": "a"' in exported

From 08baf4ddbf514e3f663ccd6d2d98f8e4341b5fd4 Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Mon, 18 May 2026 15:10:58 +0000
Subject: [PATCH 06/26] refactor(worker): type training executor results

SFT, LoRA-SFT, DPO, and PPO executors return typed
``BaseExecutorResult`` subclasses (``SFTResult``, ``LoRAResult``,
``DPOResult``, ``PPOResult``). The non-LoRA training shapes are kept
permissive (most fields optional) because the distributed-spawn paths
return a partial dict, either loaded from the subprocess IPC file or
synthesized by the parent. Artifact-bearing fields are typed as
``ArtifactRef`` so the on-disk shape is validated end-to-end.

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 src/worker/executors/dpo_executor.py      | 33 +++++++++++++++------
 src/worker/executors/lora_sft_executor.py | 22 ++++++++++++--
 src/worker/executors/ppo_executor.py      | 33 +++++++++++++++------
 src/worker/executors/sft_executor.py      | 35 +++++++++++++++++------
 4 files changed, 93 insertions(+), 30 deletions(-)

diff --git a/src/worker/executors/dpo_executor.py b/src/worker/executors/dpo_executor.py
index 7dc1e8d2..99e7920b 100644
--- a/src/worker/executors/dpo_executor.py
+++ b/src/worker/executors/dpo_executor.py
@@ -25,6 +25,8 @@
 from trl.trainer.dpo_config import DPOConfig
 from trl.trainer.dpo_trainer import DPOTrainer
 
+from shared.schemas.artifact import ArtifactRef
+from shared.schemas.executor_result import BaseExecutorResult
 from shared.tasks.specs import DPOSpecStrict
 from shared.utils.manifest import scratch_dir
 
@@ -45,6 +47,19 @@
 logger = logging.getLogger("worker.dpo")
 
 
+class DPOResult(BaseExecutorResult):
+    training_successful: bool = True
+    training_time_seconds: float | None = None
+    error_message: str | None = None
+    model_name: str | None = None
+    dataset_size: int = 0
+    output_dir: str | None = None
+    checkpoints_dir: ArtifactRef | None = None
+    final_model: ArtifactRef | None = None
+    final_model_archive: ArtifactRef | None = None
+    spawned_torchrun: bool = False
+
+
 class DPOExecutor(TrainingMixin, Executor):
     """DPO training executor using TRL library."""
 
@@ -59,7 +74,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         self._current_trainer: DPOTrainer | None = None
         self._task_out_dir: Path | None = None
 
-    def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
+    def run(self, task: ExecutorTask, out_dir: Path) -> DPOResult:
         configure_hf_library_logging()
         logger.info("Starting DPO training task")
         spec = self.require_spec(task, DPOSpecStrict)
@@ -80,24 +95,24 @@ def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
                 )
                 ipc_path = scratch_dir(out_dir) / "distributed_result.json"
                 if ipc_path.exists():
-                    return self.load_json(ipc_path)
-                return {
-                    "training_successful": True,
-                    "spawned_torchrun": True,
-                    "model_name": (
+                    return DPOResult.model_validate(self.load_json(ipc_path))
+                return DPOResult(
+                    training_successful=True,
+                    spawned_torchrun=True,
+                    model_name=(
                         spec.model
                         and spec.model.source
                         and spec.model.source.identifier
                     ),
-                    "output_dir": out_dir.as_posix(),
-                }
+                    output_dir=out_dir.as_posix(),
+                )
 
             result = self._execute_training(task, out_dir)
             logger.info(
                 "DPO training task completed in %.2f seconds",
                 result.get("training_time_seconds", 0.0),
             )
-            return result
+            return DPOResult.model_validate(result)
         finally:
             self._task_out_dir = None
 
diff --git a/src/worker/executors/lora_sft_executor.py b/src/worker/executors/lora_sft_executor.py
index f40c99ad..20f9a5f3 100644
--- a/src/worker/executors/lora_sft_executor.py
+++ b/src/worker/executors/lora_sft_executor.py
@@ -18,6 +18,8 @@
 from trl.trainer.sft_config import SFTConfig
 from trl.trainer.sft_trainer import SFTTrainer
 
+from shared.schemas.artifact import ArtifactRef
+from shared.schemas.executor_result import BaseExecutorResult
 from shared.tasks.specs import LoRASFTSpecStrict
 
 from ..utils.logging import configure_hf_library_logging
@@ -48,6 +50,20 @@
 logger = logging.getLogger("worker.sft.lora")
 
 
+class LoRAResult(BaseExecutorResult):
+    task_id: str
+    training_successful: bool
+    training_time_seconds: float
+    error_message: str | None = None
+    model_name: str | None = None
+    dataset_size: int = 0
+    output_dir: str
+    checkpoints_dir: ArtifactRef
+    resume_from_path: str | None = None
+    final_lora: ArtifactRef | None = None
+    final_lora_archive: ArtifactRef | None = None
+
+
 class LoRASFTExecutor(TrainingMixin, Executor):
     """Execute LoRA-based supervised fine-tuning using TRL's SFTTrainer."""
 
@@ -59,7 +75,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         self._current_model: Any | None = None
         self._current_trainer: Any | None = None
 
-    def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
+    def run(self, task: ExecutorTask, out_dir: Path) -> LoRAResult:
         configure_hf_library_logging()
         spec = self.require_spec(task, LoRASFTSpecStrict)
         start_time = time.time()
@@ -321,7 +337,7 @@ def _compute_loss_with_guard(
                 final_adapter_path,
                 archive_path,
             )
-            return result_payload
+            return LoRAResult.model_validate(result_payload)
 
         except Exception as exc:  # pylint: disable=broad-except
             error_msg = str(exc)
@@ -345,7 +361,7 @@ def _compute_loss_with_guard(
         }
 
         if training_successful:
-            return result
+            return LoRAResult.model_validate(result)
 
         write_executor_result(out_dir / "results.json", task.task_id, task.spec, result)
         message = error_msg or "LoRA SFT training failed"
diff --git a/src/worker/executors/ppo_executor.py b/src/worker/executors/ppo_executor.py
index 1cc47183..c9320f58 100644
--- a/src/worker/executors/ppo_executor.py
+++ b/src/worker/executors/ppo_executor.py
@@ -33,6 +33,8 @@
 from trl.trainer.ppo_config import PPOConfig
 from trl.trainer.ppo_trainer import PPOTrainer
 
+from shared.schemas.artifact import ArtifactRef
+from shared.schemas.executor_result import BaseExecutorResult
 from shared.tasks.specs import PPOSpecStrict
 from shared.utils.manifest import scratch_dir
 from shared.utils.parsing import safe_float, safe_int, to_bool
@@ -54,6 +56,19 @@
 logger = logging.getLogger("worker.ppo")
 
 
+class PPOResult(BaseExecutorResult):
+    training_successful: bool = True
+    training_time_seconds: float | None = None
+    error_message: str | None = None
+    model_name: str | None = None
+    dataset_size: int = 0
+    output_dir: str | None = None
+    checkpoints_dir: ArtifactRef | None = None
+    final_model: ArtifactRef | None = None
+    final_model_archive: ArtifactRef | None = None
+    spawned_torchrun: bool = False
+
+
 class _ExternalRewardModel(torch.nn.Module):
     """Wraps a sequence classification model to score decoded PPO responses."""
 
@@ -392,7 +407,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         self._reward_module: _ExternalRewardModel | _RewardAdapter | None = None
         self._task_out_dir: Path | None = None
 
-    def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
+    def run(self, task: ExecutorTask, out_dir: Path) -> PPOResult:
         configure_hf_library_logging()
         logger.info("Starting PPO training task")
         spec = self.require_spec(task, PPOSpecStrict)
@@ -420,14 +435,14 @@ def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
             if ipc_path.exists():
                 result = self.load_json(ipc_path)
                 self._task_out_dir = None
-                return result
+                return PPOResult.model_validate(result)
             self._task_out_dir = None
-            return {
-                "training_successful": True,
-                "spawned_torchrun": True,
-                "model_name": spec.model_name,
-                "output_dir": out_dir.as_posix(),
-            }
+            return PPOResult(
+                training_successful=True,
+                spawned_torchrun=True,
+                model_name=spec.model_name,
+                output_dir=out_dir.as_posix(),
+            )
 
         start_time = time.time()
 
@@ -931,7 +946,7 @@ def build_trainer() -> _EarlyStopPPOTrainer:
 
         logger.info("PPO training task completed in %.2f seconds", training_time)
         self._task_out_dir = None
-        return results
+        return PPOResult.model_validate(results)
 
     def _ensure_jsonl_local(self, jsonl_cfg: dict[str, Any]) -> Path:
         headers_cfg = (
diff --git a/src/worker/executors/sft_executor.py b/src/worker/executors/sft_executor.py
index 4361c94d..4c6cc82b 100644
--- a/src/worker/executors/sft_executor.py
+++ b/src/worker/executors/sft_executor.py
@@ -22,6 +22,8 @@
 from trl.trainer.sft_config import SFTConfig
 from trl.trainer.sft_trainer import SFTTrainer
 
+from shared.schemas.artifact import ArtifactRef
+from shared.schemas.executor_result import BaseExecutorResult
 from shared.tasks.specs import SFTSpecStrict, TaskSpecStrictBase
 from shared.utils.manifest import scratch_dir
 
@@ -43,6 +45,21 @@
 logger = logging.getLogger("worker.sft")
 
 
+class SFTResult(BaseExecutorResult):
+    task_id: str | None = None
+    training_successful: bool = True
+    training_time_seconds: float | None = None
+    error_message: str | None = None
+    model_name: str | None = None
+    dataset_size: int = 0
+    output_dir: str | None = None
+    checkpoints_dir: ArtifactRef | None = None
+    resume_from_path: str | None = None
+    final_model: ArtifactRef | None = None
+    final_model_archive: ArtifactRef | None = None
+    spawned_torchrun: bool = False
+
+
 class SFTExecutor(TrainingMixin, Executor):
     name = "sft_executor"
 
@@ -55,7 +72,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         self._final_model_dir: Path | None = None
         self._task_out_dir: Path | None = None
 
-    def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
+    def run(self, task: ExecutorTask, out_dir: Path) -> SFTResult:
         configure_hf_library_logging()
         spec = self.require_spec(task, SFTSpecStrict)
         requested_gpu_count = self._requested_gpu_count(spec)
@@ -191,14 +208,14 @@ def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
                 if ipc_path.exists():
                     distributed_result = self.load_json(ipc_path)
                     self._task_out_dir = None
-                    return distributed_result
+                    return SFTResult.model_validate(distributed_result)
                 self._task_out_dir = None
-                return {
-                    "training_successful": True,
-                    "spawned_torchrun": True,
-                    "model_name": spec.model_name,
-                    "output_dir": out_dir.as_posix(),
-                }
+                return SFTResult(
+                    training_successful=True,
+                    spawned_torchrun=True,
+                    model_name=spec.model_name,
+                    output_dir=out_dir.as_posix(),
+                )
         except Exception as spawn_exc:
             logger.exception("Failed to launch distributed SFT: %s", spawn_exc)
             raise ExecutionError(
@@ -467,7 +484,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
             self._final_model_dir = None
 
             self._task_out_dir = None
-            return result_payload
+            return SFTResult.model_validate(result_payload)
 
         except Exception as exc:
             error_msg = str(exc)

From 4ad515359a6f4442ee629f68432596b9add6ca14 Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Mon, 18 May 2026 15:15:40 +0000
Subject: [PATCH 07/26] test: cover BaseExecutorResult round-trip and fix
 subclass field serialization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add round-trip tests for the executor result schema: subclass payload
preservation through the base class, ``_artifacts`` alias in both
directions, recursive ``children``, and the full envelope write→read
path.

The recursive-children test exposed a wire-format bug — Pydantic
defaults to serializing fields using their declared type, so a
subclass instance assigned to a ``BaseExecutorResult`` field (or
nested inside ``children``) lost its executor-specific fields on
``model_dump_json``. Pass ``serialize_as_any=True`` at every
worker-side serialization seam (``write_result_in_envelope``,
``_maybe_emit_http``, ``_dump_to_governance``) and on the server's
stage-result load so the actual subclass shape reaches disk and
downstream consumers.

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 src/server/dispatcher/base.py             |  4 +-
 src/shared/schemas/result.py              |  4 +-
 src/worker/executors/mixins/governance.py |  4 +-
 src/worker/runner.py                      |  4 +-
 tests/shared/test_executor_result.py      | 85 +++++++++++++++++++++++
 5 files changed, 97 insertions(+), 4 deletions(-)
 create mode 100644 tests/shared/test_executor_result.py

diff --git a/src/server/dispatcher/base.py b/src/server/dispatcher/base.py
index 2f7c405b..d201c8dc 100644
--- a/src/server/dispatcher/base.py
+++ b/src/server/dispatcher/base.py
@@ -922,7 +922,9 @@ def _load_stage_result(self, stage_task_id: str) -> dict[str, Any]:
                 f"Result for task {stage_task_id} not found at {path}"
             )
         content = json.loads(path.read_text(encoding="utf-8"))
-        return ResultEnvelope.model_validate(content).result.model_dump()
+        return ResultEnvelope.model_validate(content).result.model_dump(
+            serialize_as_any=True
+        )
 
     def _dig_path(self, data: Any, parts: list[str]) -> Any:
         current = data
diff --git a/src/shared/schemas/result.py b/src/shared/schemas/result.py
index 953e7fa7..eb44c67d 100644
--- a/src/shared/schemas/result.py
+++ b/src/shared/schemas/result.py
@@ -48,7 +48,9 @@ def write_result_in_envelope(
     """Wrap ``result`` in a ``ResultEnvelope`` and persist it at ``path``."""
     path.parent.mkdir(parents=True, exist_ok=True)
     envelope = ResultEnvelope(task_id=task_id, result=result)
-    atomic_write_text(path, envelope.model_dump_json(indent=2))
+    atomic_write_text(
+        path, envelope.model_dump_json(indent=2, serialize_as_any=True)
+    )
 
 
 def read_result(base_dir: Path, task_id: str) -> str:
diff --git a/src/worker/executors/mixins/governance.py b/src/worker/executors/mixins/governance.py
index 7a7650ec..c623c934 100644
--- a/src/worker/executors/mixins/governance.py
+++ b/src/worker/executors/mixins/governance.py
@@ -243,7 +243,9 @@ def _dump_to_governance(
         """Write parent + merged-child results and emit asset/lineage rows."""
         parent_deps = dependencies_by_task.get(task_id, [])
         payload = (
-            result.model_dump() if isinstance(result, BaseExecutorResult) else result
+            result.model_dump(serialize_as_any=True)
+            if isinstance(result, BaseExecutorResult)
+            else result
         )
         children_payload = payload.get("children", {}) or {}
 
diff --git a/src/worker/runner.py b/src/worker/runner.py
index f009532c..6d7b17b0 100644
--- a/src/worker/runner.py
+++ b/src/worker/runner.py
@@ -194,7 +194,9 @@ def _maybe_emit_http(
         url = destination.url
         ignore_error = destination.ignore_error
         result_dict = (
-            result.model_dump() if isinstance(result, BaseExecutorResult) else result
+            result.model_dump(serialize_as_any=True)
+            if isinstance(result, BaseExecutorResult)
+            else result
         )
         payload = {
             "task_id": task_id,
diff --git a/tests/shared/test_executor_result.py b/tests/shared/test_executor_result.py
new file mode 100644
index 00000000..0c71bdba
--- /dev/null
+++ b/tests/shared/test_executor_result.py
@@ -0,0 +1,85 @@
+"""Round-trip tests for the shared executor-result schema."""
+
+import json
+from typing import Any
+
+from pydantic import Field
+
+from shared.schemas.artifact import ArtifactContext, ArtifactRef
+from shared.schemas.executor_result import BaseExecutorResult
+
+
+class _SampleResult(BaseExecutorResult):
+    ok: bool = True
+    items: list[dict[str, Any]] = Field(default_factory=list)
+    usage: dict[str, Any] | None = None
+
+
+def test_subclass_round_trip_through_base_preserves_extra_fields() -> None:
+    """A subclass's executor-specific fields survive a JSON trip through the
+    base class via ``extra='allow'``."""
+    original = _SampleResult(
+        ok=True,
+        items=[{"output": "hello"}],
+        usage={"latency_sec": 0.5},
+        artifacts=ArtifactContext(base_dir="/tmp/t", base_url="http://h"),
+    )
+    payload = original.model_dump_json(serialize_as_any=True)
+
+    base = BaseExecutorResult.model_validate_json(payload)
+    redumped = json.loads(base.model_dump_json())
+
+    assert redumped["items"] == [{"output": "hello"}]
+    assert redumped["usage"] == {"latency_sec": 0.5}
+    assert redumped["_artifacts"] == {"base_dir": "/tmp/t", "base_url": "http://h"}
+
+
+def test_artifacts_alias_round_trips_both_directions() -> None:
+    """The wire key is ``_artifacts`` (alias); field-name input is also accepted."""
+    from_alias = BaseExecutorResult.model_validate({"_artifacts": {"base_dir": "/a"}})
+    from_field = BaseExecutorResult.model_validate({"artifacts": {"base_dir": "/a"}})
+
+    assert from_alias.artifacts == from_field.artifacts == ArtifactContext(base_dir="/a")
+
+    dumped = from_alias.model_dump()
+    assert "_artifacts" in dumped
+    assert "artifacts" not in dumped
+
+
+def test_recursive_children_round_trip() -> None:
+    """Nested ``children`` deserialize as ``BaseExecutorResult`` and re-emit
+    their extra fields when serialized with ``serialize_as_any=True``."""
+    parent = _SampleResult(
+        items=[{"output": "p"}],
+        children={
+            "c1": _SampleResult(items=[{"output": "c"}], usage={"total_tokens": 3}),
+        },
+    )
+    payload = parent.model_dump_json(serialize_as_any=True)
+    base = BaseExecutorResult.model_validate_json(payload)
+
+    assert "c1" in base.children
+    child = base.children["c1"]
+    redumped = json.loads(child.model_dump_json())
+    assert redumped["items"] == [{"output": "c"}]
+    assert redumped["usage"] == {"total_tokens": 3}
+
+
+def test_artifact_ref_is_a_typed_path_reference() -> None:
+    ref = ArtifactRef(path="lora/final")
+    assert ref.model_dump() == {"path": "lora/final"}
+
+
+def test_envelope_round_trip_preserves_subclass_payload() -> None:
+    """The production write→read path (``write_result_in_envelope`` →
+    ``ResultEnvelope.model_validate``) round-trips subclass fields."""
+    from shared.schemas.result import ResultEnvelope
+
+    inner = _SampleResult(items=[{"output": "hello"}], usage={"total_tokens": 7})
+    env = ResultEnvelope(task_id="tsk-x", result=inner)
+    raw = env.model_dump_json(serialize_as_any=True)
+
+    parsed = ResultEnvelope.model_validate_json(raw)
+    dumped = parsed.result.model_dump()
+    assert dumped["items"] == [{"output": "hello"}]
+    assert dumped["usage"] == {"total_tokens": 7}

From 4fa6a436517ac1f2db185054712a2445c02cc866 Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Mon, 18 May 2026 15:16:43 +0000
Subject: [PATCH 08/26] docs: describe typed executor result schema

Architecture's task-merging bullet now points at the typed
``BaseExecutorResult``, and EXECUTORS.md gains a Result schema section
covering the cross-cutting base fields, per-executor subclass
placement, artifact resolution, and the ``serialize_as_any`` seam.

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 docs/ARCHITECTURE.md |  7 ++++---
 docs/EXECUTORS.md    | 27 +++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index 08a34223..dac1b885 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -109,9 +109,10 @@ scripts/dev/            compile_protos, sync_requirements, check_env_examples
 - **Task merging.** Compatible adjacent tasks in a DAG (same `taskType`,
   model, hardware shape, and merge key) coalesce into a single dispatch.
   Merged children ride on `WorkerTaskMessage.merged_children`; the worker
-  writes per-child results into `result.children`; the dispatcher fans
-  out synthetic `TASK_SUCCEEDED` / `TASK_FAILED` events. Disable with
-  `ENABLE_TASK_MERGE=false`.
+  writes per-child results into `result.children` on the typed
+  `BaseExecutorResult` (`src/shared/schemas/executor_result.py`); the
+  dispatcher fans out synthetic `TASK_SUCCEEDED` / `TASK_FAILED` events.
+  Disable with `ENABLE_TASK_MERGE=false`.
 - **Stage stickiness** (`ENABLE_STAGE_WEIGHT_STICKINESS=true`) — the
   dispatcher pins stages that reference an upstream stage's checkpoint
   to the worker that produced it, falling back to normal selection when
diff --git a/docs/EXECUTORS.md b/docs/EXECUTORS.md
index ed02e903..959bc9c2 100644
--- a/docs/EXECUTORS.md
+++ b/docs/EXECUTORS.md
@@ -22,6 +22,33 @@ Helper utilities live in `src/worker/executors/utils/` (`artifacts`,
 `src/worker/executors/mixins/` (`data`, `governance`, `inference`,
 `training`).
 
+## Result schema
+
+Every executor's `run()` returns a typed Pydantic subclass of
+`BaseExecutorResult` (`src/shared/schemas/executor_result.py`). The base
+class carries the cross-cutting fields the rest of the runtime relies
+on:
+
+- `children: dict[str, BaseExecutorResult]` — per-child results when
+  merged tasks share a dispatch.
+- `artifacts: ArtifactContext | None` (wire key `_artifacts`) —
+  resolution context for relative artifact refs.
+
+Per-executor subclasses live next to the executor they describe — e.g.
+`VLLMResult` in `src/worker/executors/vllm_executor.py`, `LoRAResult` in
+`src/worker/executors/lora_sft_executor.py`. They add executor-specific
+fields (`items`, `usage`, `final_lora`, `command`, …).
+
+The base class is `extra="allow"`, so the server can deserialize an
+incoming envelope as `BaseExecutorResult` without knowing the concrete
+subclass; the executor-specific fields pass through. Artifact-bearing
+fields use `ArtifactRef` (`{"path": rel_path}`); relative paths resolve
+against the producer's `_artifacts` context via
+`artifact_to_source` / `_render_artifact_ref`.
+
+Serializers at the wire seam pass `serialize_as_any=True` so subclass
+fields survive the round-trip.
+
 ## Agent executor (utu / youtu-agent)
 
 `AgentExecutor` requires the following env vars to run; the executor

From 9d03ecd2a6dd925c6b63b621b14780db7d333d06 Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Mon, 18 May 2026 15:43:23 +0000
Subject: [PATCH 09/26] chore: satisfy pre-commit hooks (black, isort, mypy)

Apply isort / black formatting and resolve mypy errors surfaced by
``pre-commit run --all-files``:

- ``BaseExecutorResult.artifacts`` switches to
  ``validation_alias=AliasChoices("artifacts", "_artifacts")`` +
  ``serialization_alias="_artifacts"`` so ``artifacts=...`` is the
  canonical constructor name while still accepting both keys on
  the wire.
- ``write_result_in_envelope`` validates a dict input into
  ``BaseExecutorResult`` before constructing the envelope.
- Dispatcher's skip-envelope path constructs ``BaseExecutorResult()``
  instead of an empty dict.
- ``SSHResult.command`` / ``entrypoint`` are typed ``list[str] | None``
  to match ``SSHConfig``.
- ``DiffusersExecutor`` types ``generated_images`` as
  ``list[ArtifactRef]``.
- ``DPOExecutor`` narrows the ``model_name`` resolution to ``str |
  None`` via a conditional expression.
- ``get_result`` returns ``result.model_dump(serialize_as_any=True)``
  to match the router's ``dict[str, Any]`` signature.
- Two MP-executor tests assert ``isinstance(result, dict)`` so mypy
  can subscript through the union.

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 src/server/dispatcher/base.py              | 3 ++-
 src/server/routers/v1/results.py           | 4 +++-
 src/shared/schemas/executor_result.py      | 5 +++--
 src/shared/schemas/result.py               | 6 +++---
 src/worker/executors/diffusers_executor.py | 2 +-
 src/worker/executors/dpo_executor.py       | 6 +++---
 src/worker/executors/echo_executor.py      | 4 +---
 src/worker/executors/ssh_executor.py       | 5 +++--
 src/worker/executors/vllm_executor.py      | 2 +-
 tests/shared/test_executor_result.py       | 4 +++-
 tests/worker/test_connector_logging.py     | 1 +
 tests/worker/test_mp_executor_lifecycle.py | 1 +
 12 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/src/server/dispatcher/base.py b/src/server/dispatcher/base.py
index d201c8dc..9ec8e5d8 100644
--- a/src/server/dispatcher/base.py
+++ b/src/server/dispatcher/base.py
@@ -9,6 +9,7 @@
 from pydantic import BaseModel, ValidationError
 
 from shared.schemas.event import TaskEvent
+from shared.schemas.executor_result import BaseExecutorResult
 from shared.schemas.result import ResultEnvelope, result_file_path, write_result
 from shared.tasks import (
     MergedChildTaskStrict,
@@ -994,7 +995,7 @@ def _evaluate_condition_skip(
             )
             skip_envelope = ResultEnvelope(
                 task_id=task_id,
-                result={},
+                result=BaseExecutorResult(),
                 metadata={
                     "skipped": True,
                     "reason": "condition_not_met",
diff --git a/src/server/routers/v1/results.py b/src/server/routers/v1/results.py
index 00086507..85667f4b 100644
--- a/src/server/routers/v1/results.py
+++ b/src/server/routers/v1/results.py
@@ -143,7 +143,9 @@ async def get_result(
             detail=f"Result file is not valid JSON: {exc}",
         ) from exc
     try:
-        return ResultEnvelope.model_validate(content).result
+        return ResultEnvelope.model_validate(content).result.model_dump(
+            serialize_as_any=True
+        )
     except ValidationError as exc:
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
diff --git a/src/shared/schemas/executor_result.py b/src/shared/schemas/executor_result.py
index 5ee39721..f352f9e5 100644
--- a/src/shared/schemas/executor_result.py
+++ b/src/shared/schemas/executor_result.py
@@ -1,4 +1,4 @@
-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import AliasChoices, BaseModel, ConfigDict, Field
 
 from .artifact import ArtifactContext
 
@@ -22,7 +22,8 @@ class BaseExecutorResult(BaseModel):
     )
     artifacts: ArtifactContext | None = Field(
         default=None,
-        alias="_artifacts",
+        validation_alias=AliasChoices("artifacts", "_artifacts"),
+        serialization_alias="_artifacts",
         description="Resolution context for relative artifact refs.",
     )
 
diff --git a/src/shared/schemas/result.py b/src/shared/schemas/result.py
index eb44c67d..6b8831b0 100644
--- a/src/shared/schemas/result.py
+++ b/src/shared/schemas/result.py
@@ -47,10 +47,10 @@ def write_result_in_envelope(
 ) -> None:
     """Wrap ``result`` in a ``ResultEnvelope`` and persist it at ``path``."""
     path.parent.mkdir(parents=True, exist_ok=True)
+    if isinstance(result, dict):
+        result = BaseExecutorResult.model_validate(result)
     envelope = ResultEnvelope(task_id=task_id, result=result)
-    atomic_write_text(
-        path, envelope.model_dump_json(indent=2, serialize_as_any=True)
-    )
+    atomic_write_text(path, envelope.model_dump_json(indent=2, serialize_as_any=True))
 
 
 def read_result(base_dir: Path, task_id: str) -> str:
diff --git a/src/worker/executors/diffusers_executor.py b/src/worker/executors/diffusers_executor.py
index 8855b1ac..b34ed6ff 100644
--- a/src/worker/executors/diffusers_executor.py
+++ b/src/worker/executors/diffusers_executor.py
@@ -344,7 +344,7 @@ def _run_inner(
         image_dir = out_dir / "artifacts" / "images"
         image_dir.mkdir(parents=True, exist_ok=True)
 
-        generated_images: list[dict[str, str]] = []
+        generated_images: list[ArtifactRef] = []
         for idx, img in enumerate(images):
             img.save(image_dir / f"image_{idx}.png", format="PNG")
             generated_images.append(artifact_ref(f"images/image_{idx}.png"))
diff --git a/src/worker/executors/dpo_executor.py b/src/worker/executors/dpo_executor.py
index 99e7920b..9dfbe449 100644
--- a/src/worker/executors/dpo_executor.py
+++ b/src/worker/executors/dpo_executor.py
@@ -100,9 +100,9 @@ def run(self, task: ExecutorTask, out_dir: Path) -> DPOResult:
                     training_successful=True,
                     spawned_torchrun=True,
                     model_name=(
-                        spec.model
-                        and spec.model.source
-                        and spec.model.source.identifier
+                        spec.model.source.identifier
+                        if spec.model and spec.model.source
+                        else None
                     ),
                     output_dir=out_dir.as_posix(),
                 )
diff --git a/src/worker/executors/echo_executor.py b/src/worker/executors/echo_executor.py
index 884cdcf9..cec85193 100644
--- a/src/worker/executors/echo_executor.py
+++ b/src/worker/executors/echo_executor.py
@@ -88,9 +88,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> EchoResult:
                 resolved = self._resolve_item(item, context)
                 self._append_outputs(merged_items, resolved)
 
-            result = EchoResult(
-                ok=True, items=merged_items, count=len(merged_items)
-            )
+            result = EchoResult(ok=True, items=merged_items, count=len(merged_items))
             deps = self._extract_source_data_ids(spec)
             dependencies_by_task = {task_id: deps}
 
diff --git a/src/worker/executors/ssh_executor.py b/src/worker/executors/ssh_executor.py
index ac695e36..4677c532 100644
--- a/src/worker/executors/ssh_executor.py
+++ b/src/worker/executors/ssh_executor.py
@@ -60,8 +60,9 @@
 class SSHResult(BaseExecutorResult):
     session_id: str
     exit_code: int
-    command: str | None = None
-    entrypoint: str | None = None
+    command: list[str] | None = None
+    entrypoint: list[str] | None = None
+
 
 try:
     import docker
diff --git a/src/worker/executors/vllm_executor.py b/src/worker/executors/vllm_executor.py
index 7319ea43..4e6cea06 100644
--- a/src/worker/executors/vllm_executor.py
+++ b/src/worker/executors/vllm_executor.py
@@ -66,8 +66,8 @@
         _HAS_VLLM = False
         StructuredOutputsParams = None  # type: ignore
 
-from shared.schemas.governance import SpanType
 from shared.schemas.executor_result import BaseExecutorResult
+from shared.schemas.governance import SpanType
 from shared.tasks.specs import InferenceSpecStrict
 
 from .base_executor import ExecutionError, Executor, ExecutorTask
diff --git a/tests/shared/test_executor_result.py b/tests/shared/test_executor_result.py
index 0c71bdba..24bb93c8 100644
--- a/tests/shared/test_executor_result.py
+++ b/tests/shared/test_executor_result.py
@@ -39,7 +39,9 @@ def test_artifacts_alias_round_trips_both_directions() -> None:
     from_alias = BaseExecutorResult.model_validate({"_artifacts": {"base_dir": "/a"}})
     from_field = BaseExecutorResult.model_validate({"artifacts": {"base_dir": "/a"}})
 
-    assert from_alias.artifacts == from_field.artifacts == ArtifactContext(base_dir="/a")
+    assert (
+        from_alias.artifacts == from_field.artifacts == ArtifactContext(base_dir="/a")
+    )
 
     dumped = from_alias.model_dump()
     assert "_artifacts" in dumped
diff --git a/tests/worker/test_connector_logging.py b/tests/worker/test_connector_logging.py
index beda187e..0caa357c 100644
--- a/tests/worker/test_connector_logging.py
+++ b/tests/worker/test_connector_logging.py
@@ -90,6 +90,7 @@ def test_connector_logs_printed_to_stderr(tmp_path: Path) -> None:
     mp.cleanup_after_run()
 
     # Verify the executor ran successfully
+    assert isinstance(result, dict)
     assert result["ok"], f"Executor failed: {result}"
     assert (
         result.get("result", {}).get("status") == "completed"
diff --git a/tests/worker/test_mp_executor_lifecycle.py b/tests/worker/test_mp_executor_lifecycle.py
index 3fe73be8..f6882cc5 100644
--- a/tests/worker/test_mp_executor_lifecycle.py
+++ b/tests/worker/test_mp_executor_lifecycle.py
@@ -54,6 +54,7 @@ def test_mp_executor_does_not_start_subprocess_until_first_run(tmp_path: Path) -
     with tempfile.TemporaryDirectory() as out_dir:
         result = mp.run(_simple_task_message(), Path(out_dir))
 
+    assert isinstance(result, dict)
     assert result["ok"] is True
     assert mp._shutdown is False
     assert mp._proc is not None

From cd5fd0c62fbf35277dab3d93cbb89093635615bb Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Tue, 19 May 2026 15:22:46 +0000
Subject: [PATCH 10/26] refactor: tighten executor result typing end-to-end

``Executor.run`` is now strictly typed as ``-> BaseExecutorResult``
(no dict union), and every executor constructs its typed subclass
directly instead of validating a dict at the return boundary.

Schema: ``BaseExecutorResult`` is consolidated into
``shared/schemas/result.py``; ``children`` and ``ResultEnvelope.result``
use ``SerializeAsAny[...]`` so subclass-specific fields survive the
declared-type annotation without per-call ``serialize_as_any=True``.
``upstreamResults`` on ``TaskSpecStrictBase`` /
``TaskSpecTemplateBase`` is typed as ``dict[str, BaseExecutorResult]``.

Plumbing: ``write_result_in_envelope`` is folded into
``write_executor_result`` since the dict-input path is no longer
needed. ``_dump_to_governance``, ``_populate_table``, and
``_maybe_export_jsonl`` take the values they actually use (typed
result, items list) instead of mutating an opaque payload. The
dispatcher's ``_load_stage_result`` returns ``ResultEnvelope``;
``_dig_path`` walks ``BaseModel`` attributes; ``_render_artifact_ref``
reads ``ctx.base_url`` / ``ctx.base_dir`` directly.

Omni: ``OmniResult`` becomes strict (``executor``, ``mode``, ``items``,
``model``) and each concrete omni executor declares its own
``OmniText2*Result`` subclass with default ``executor`` / ``mode``
literals.

Test fixtures (``test_mp_executor_lifecycle``, ``test_connector_logging``)
define typed result subclasses so assertions use natural attribute
access; the multiprocessing boundary pickles the subclass through to
the parent.

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 src/server/dispatcher/base.py                 | 60 ++++++++-------
 src/server/routers/v1/results.py              |  4 +-
 src/shared/schemas/executor_result.py         | 31 --------
 src/shared/schemas/result.py                  | 44 +++++++----
 src/shared/tasks/specs/common.py              |  5 +-
 src/worker/executors/agent_executor.py        | 64 ++++++++--------
 src/worker/executors/api_executor.py          |  4 +-
 src/worker/executors/base_executor.py         | 29 ++++----
 .../executors/data_profiling_executor.py      |  2 +-
 .../executors/data_retrieval_executor.py      |  2 +-
 src/worker/executors/diffusers_executor.py    | 20 ++---
 src/worker/executors/dpo_executor.py          | 54 +++++++-------
 src/worker/executors/echo_executor.py         |  2 +-
 src/worker/executors/lora_sft_executor.py     | 70 +++++++++---------
 src/worker/executors/mixins/data.py           | 10 +--
 src/worker/executors/mixins/governance.py     | 17 ++---
 src/worker/executors/mixins/inference.py      |  3 +-
 src/worker/executors/mp_executor.py           |  6 +-
 src/worker/executors/omni_executor_base.py    | 11 ++-
 .../executors/omni_text2audio_executor.py     | 42 ++++++-----
 .../executors/omni_text2general_executor.py   | 41 ++++++-----
 .../executors/omni_text2image_executor.py     | 28 +++----
 .../executors/omni_text2speech_executor.py    | 35 +++++----
 src/worker/executors/ppo_executor.py          | 54 +++++++-------
 src/worker/executors/rag_executor.py          |  2 +-
 src/worker/executors/sft_executor.py          | 73 +++++++++----------
 src/worker/executors/ssh_executor.py          |  2 +-
 src/worker/executors/transformers_executor.py | 37 ++++------
 src/worker/executors/utils/checkpoints.py     | 15 ++--
 src/worker/executors/vllm_executor.py         | 44 +++++------
 src/worker/runner.py                          | 18 ++---
 tests/shared/test_executor_result.py          | 28 ++-----
 tests/worker/test_connector_logging.py        | 29 +++++---
 tests/worker/test_data_mixin_lineage.py       | 19 +++--
 tests/worker/test_executor_bootstrap.py       |  5 +-
 tests/worker/test_mp_executor_lifecycle.py    | 14 +++-
 36 files changed, 443 insertions(+), 481 deletions(-)
 delete mode 100644 src/shared/schemas/executor_result.py

diff --git a/src/server/dispatcher/base.py b/src/server/dispatcher/base.py
index 9ec8e5d8..2ae68c25 100644
--- a/src/server/dispatcher/base.py
+++ b/src/server/dispatcher/base.py
@@ -9,13 +9,18 @@
 from pydantic import BaseModel, ValidationError
 
 from shared.schemas.event import TaskEvent
-from shared.schemas.executor_result import BaseExecutorResult
-from shared.schemas.result import ResultEnvelope, result_file_path, write_result
+from shared.schemas.result import (
+    BaseExecutorResult,
+    ResultEnvelope,
+    result_file_path,
+    write_result,
+)
 from shared.tasks import (
     MergedChildTaskStrict,
     TaskEnvelope,
     TaskEnvelopeStrict,
     TaskEnvelopeTemplate,
+    TaskSpecStrict,
 )
 from shared.tasks.placeholders import PLACEHOLDER_PATTERN
 from shared.tasks.specs import (
@@ -708,7 +713,7 @@ def _resolve_stage_references(
         self, task_id: str, task: TaskEnvelopeTemplate, record: TaskRecord
     ) -> TaskEnvelopeStrict:
         context = self._build_stage_context(record)
-        resolved_task = task
+        resolved_task: TaskEnvelopeTemplate = task
         if context and task.has_placeholder():
             resolved_task = self._resolve_placeholders(task, context)
 
@@ -717,7 +722,7 @@ def _resolve_stage_references(
         )
         if upstream_results:
             existing = resolved_task.spec.upstreamResults or {}
-            merged: dict[str, Any] = {}
+            merged: dict[str, BaseExecutorResult] = {}
             if isinstance(existing, dict):
                 merged.update(copy.deepcopy(existing))
             merged.update(upstream_results)
@@ -731,7 +736,7 @@ def _resolve_stage_references(
 
         return TaskEnvelopeStrict.model_validate(resolved_task)
 
-    def _resolve_placeholders(self, value: Any, context: dict[str, Any]) -> Any:
+    def _resolve_placeholders(self, value: Any, context: dict[str, TaskRecord]) -> Any:
         if isinstance(value, str):
             exact = PLACEHOLDER_PATTERN.fullmatch(value)
             if exact:
@@ -815,7 +820,7 @@ def _stage_context_keys(record: TaskRecord) -> tuple[str, ...]:
             keys.append(record.graph_node_name)
         return tuple(keys)
 
-    def _resolve_reference(self, expr: str, context: dict[str, Any]) -> Any:
+    def _resolve_reference(self, expr: str, context: dict[str, TaskRecord]) -> Any:
         expr = expr.strip()
         if not expr:
             raise ValueError("Empty stage reference")
@@ -832,49 +837,47 @@ def _resolve_reference(self, expr: str, context: dict[str, Any]) -> Any:
             return stage_record.task_id
         if stage_record.status != "DONE":
             raise StageReferenceNotReady(f"Stage '{stage_name}' has not completed")
-        data = self._load_stage_result(stage_record.task_id)
-        value = self._dig_path(data, path.split("."))
+        envelope = self._load_stage_result(stage_record.task_id)
+        value = self._dig_path(envelope.result, path.split("."))
         if value is None:
             raise ValueError(f"Missing value for reference '{expr}'")
         # If the referenced value is an artifact ref ({path: "..."}), render
         # it as a full URL (when base_url is set) or an absolute filesystem
         # path using the producing stage's top-level _artifacts context.
-        if rendered := self._render_artifact_ref(value, data):
+        if rendered := self._render_artifact_ref(value, envelope):
             return rendered
         return value
 
     @staticmethod
-    def _render_artifact_ref(value: Any, stage_result: Any) -> str | None:
+    def _render_artifact_ref(value: Any, stage_result: ResultEnvelope) -> str | None:
         if not isinstance(value, dict):
             return None
         path_value = value.get("path")
         if not isinstance(path_value, str) or not path_value:
             return None
-        if not isinstance(stage_result, dict):
-            return None
-        ctx = stage_result.get("_artifacts")
-        if not isinstance(ctx, dict):
+        ctx = stage_result.result.artifacts
+        if ctx is None:
             return None
-        base_url = ctx.get("base_url")
-        base_dir = ctx.get("base_dir")
-        if isinstance(base_url, str) and base_url and isinstance(base_dir, str):
+        base_url = ctx.base_url
+        base_dir = ctx.base_dir
+        if base_url and base_dir:
             task_id = Path(base_dir).name
             return f"{base_url.rstrip('/')}/api/v1/results/{task_id}/files/{path_value}"
-        if isinstance(base_dir, str) and base_dir:
+        if base_dir:
             return (Path(base_dir) / "artifacts" / path_value).as_posix()
         return None
 
     def _collect_upstream_results(
         self, context: dict[str, TaskRecord], current_task_id: str
-    ) -> dict[str, Any]:
-        results: dict[str, Any] = {}
+    ) -> dict[str, BaseExecutorResult]:
+        results: dict[str, BaseExecutorResult] = {}
         for name, record in context.items():
             if not record or record.task_id == current_task_id:
                 continue
             if record.status != TaskStatus.DONE:
                 continue
             try:
-                data = self._load_stage_result(record.task_id)
+                envelope = self._load_stage_result(record.task_id)
             except StageReferenceNotReady as exc:
                 raise exc
             except Exception as exc:
@@ -885,11 +888,11 @@ def _collect_upstream_results(
                     exc,
                 )
                 continue
-            results[name] = data
+            results[name] = envelope.result
         return results
 
     def _resolve_upstream_task_ids(
-        self, record: TaskRecord, spec: Any
+        self, record: TaskRecord, spec: TaskSpecStrict
     ) -> dict[str, str] | None:
         if not isinstance(spec, SSHSpecStrict) or not spec.inputs:
             return None
@@ -916,16 +919,14 @@ def _resolve_upstream_task_ids(
             resolved[stage_name] = upstream.task_id
         return resolved or None
 
-    def _load_stage_result(self, stage_task_id: str) -> dict[str, Any]:
+    def _load_stage_result(self, stage_task_id: str) -> ResultEnvelope:
         path = result_file_path(self._results_dir, stage_task_id)
         if not path.exists():
             raise StageReferenceNotReady(
                 f"Result for task {stage_task_id} not found at {path}"
             )
         content = json.loads(path.read_text(encoding="utf-8"))
-        return ResultEnvelope.model_validate(content).result.model_dump(
-            serialize_as_any=True
-        )
+        return ResultEnvelope.model_validate(content)
 
     def _dig_path(self, data: Any, parts: list[str]) -> Any:
         current = data
@@ -949,6 +950,11 @@ def _dig_path(self, data: Any, parts: list[str]) -> Any:
                     return None
                 current = current[idx]
                 continue
+            if isinstance(current, BaseModel):
+                if not hasattr(current, part):
+                    return None
+                current = getattr(current, part)
+                continue
             return None
         return current
 
diff --git a/src/server/routers/v1/results.py b/src/server/routers/v1/results.py
index 85667f4b..99ac8236 100644
--- a/src/server/routers/v1/results.py
+++ b/src/server/routers/v1/results.py
@@ -143,9 +143,7 @@ async def get_result(
             detail=f"Result file is not valid JSON: {exc}",
         ) from exc
     try:
-        return ResultEnvelope.model_validate(content).result.model_dump(
-            serialize_as_any=True
-        )
+        return ResultEnvelope.model_validate(content).result.model_dump()
     except ValidationError as exc:
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
diff --git a/src/shared/schemas/executor_result.py b/src/shared/schemas/executor_result.py
deleted file mode 100644
index f352f9e5..00000000
--- a/src/shared/schemas/executor_result.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from pydantic import AliasChoices, BaseModel, ConfigDict, Field
-
-from .artifact import ArtifactContext
-
-
-class BaseExecutorResult(BaseModel):
-    """Common shape for every executor's result payload.
-
-    ``extra="allow"`` lets the server round-trip subclass payloads through
-    this base class without losing executor-specific fields.
-    """
-
-    model_config = ConfigDict(
-        extra="allow",
-        populate_by_name=True,
-        serialize_by_alias=True,
-    )
-
-    children: dict[str, "BaseExecutorResult"] = Field(
-        default_factory=dict,
-        description="Per-child result payloads for task merging.",
-    )
-    artifacts: ArtifactContext | None = Field(
-        default=None,
-        validation_alias=AliasChoices("artifacts", "_artifacts"),
-        serialization_alias="_artifacts",
-        description="Resolution context for relative artifact refs.",
-    )
-
-
-BaseExecutorResult.model_rebuild()
diff --git a/src/shared/schemas/result.py b/src/shared/schemas/result.py
index 6b8831b0..832fa25c 100644
--- a/src/shared/schemas/result.py
+++ b/src/shared/schemas/result.py
@@ -1,20 +1,47 @@
 """Result envelope schema shared by server and worker."""
 
+# This is necessary to allow for the recursive type hint of `children` in
+# `BaseExecutorResult`.
+from __future__ import annotations
+
 from pathlib import Path
 from typing import Any
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field, SerializeAsAny
 
 from shared.utils.atomic import atomic_write_text
 from shared.utils.manifest import prepare_output_dir
 from shared.utils.time import now_iso
 
-from .executor_result import BaseExecutorResult
+from .artifact import ArtifactContext
+
+
+class BaseExecutorResult(BaseModel):
+    """Common shape for every executor's result payload.
+
+    ``extra="allow"`` lets the server round-trip subclass payloads through
+    this base class without losing executor-specific fields.
+    """
+
+    model_config = ConfigDict(extra="allow", serialize_by_alias=True)
+
+    children: dict[str, SerializeAsAny[BaseExecutorResult]] = Field(
+        default_factory=dict,
+        exclude_if=lambda v: not v,
+        description="Per-child result payloads for task merging.",
+    )
+    artifacts: ArtifactContext | None = Field(
+        default=None,
+        alias="_artifacts",
+        description="Resolution context for relative artifact refs.",
+    )
 
 
 class ResultEnvelope(BaseModel):
     task_id: str = Field(description="Task identifier.")
-    result: BaseExecutorResult = Field(description="Result payload data.")
+    result: SerializeAsAny[BaseExecutorResult] = Field(
+        description="Result payload data."
+    )
     worker_id: str | None = Field(
         default=None, description="Worker identifier submitting the result."
     )
@@ -42,17 +69,6 @@ def write_result(base_dir: Path, envelope: ResultEnvelope) -> Path:
     return path
 
 
-def write_result_in_envelope(
-    path: Path, task_id: str, result: BaseExecutorResult | dict[str, Any]
-) -> None:
-    """Wrap ``result`` in a ``ResultEnvelope`` and persist it at ``path``."""
-    path.parent.mkdir(parents=True, exist_ok=True)
-    if isinstance(result, dict):
-        result = BaseExecutorResult.model_validate(result)
-    envelope = ResultEnvelope(task_id=task_id, result=result)
-    atomic_write_text(path, envelope.model_dump_json(indent=2, serialize_as_any=True))
-
-
 def read_result(base_dir: Path, task_id: str) -> str:
     path = result_file_path(base_dir, task_id)
     return path.read_text(encoding="utf-8")
diff --git a/src/shared/tasks/specs/common.py b/src/shared/tasks/specs/common.py
index d6cb61db..e841e596 100644
--- a/src/shared/tasks/specs/common.py
+++ b/src/shared/tasks/specs/common.py
@@ -2,6 +2,7 @@
 
 from pydantic import Field, model_validator
 
+from ...schemas.result import BaseExecutorResult
 from .._base import StrictBaseModel, TemplateBaseModel
 from ..components import (
     AdapterConfig,
@@ -72,7 +73,7 @@ class TaskSpecStrictBase(StrictBaseModel):
     shard: ShardSpec | None = None
 
     # Server-injected stage context (reserve the user-facing key `_upstreamResults`)
-    upstreamResults: dict[str, Any] | None = Field(
+    upstreamResults: dict[str, BaseExecutorResult] | None = Field(
         default=None, alias="_upstreamResults"
     )
 
@@ -98,7 +99,7 @@ class TaskSpecTemplateBase(TemplateBaseModel):
     condition: ConditionSpec | None = None
     shard: ShardSpecTemplate | None = None
 
-    upstreamResults: dict[str, Any] | None = Field(
+    upstreamResults: dict[str, BaseExecutorResult] | None = Field(
         default=None, alias="_upstreamResults"
     )
 
diff --git a/src/worker/executors/agent_executor.py b/src/worker/executors/agent_executor.py
index d0262dd7..7cff6d0a 100644
--- a/src/worker/executors/agent_executor.py
+++ b/src/worker/executors/agent_executor.py
@@ -17,7 +17,7 @@
 from datasets import load_dataset
 
 from shared.schemas.artifact import ArtifactRef
-from shared.schemas.executor_result import BaseExecutorResult
+from shared.schemas.result import BaseExecutorResult
 from shared.tasks.specs import AgentSpecStrict
 
 from .base_executor import ExecutionError, Executor, ExecutorTask
@@ -57,7 +57,7 @@ def _resolve_task_timeout(agent: dict[str, Any] | None) -> int:
 
 
 class AgentResult(BaseExecutorResult):
-    ok: bool
+    ok: bool = True
     model: str
     items: list[dict[str, Any]] = []
     usage: dict[str, Any] | None = None
@@ -260,32 +260,34 @@ def run(self, task: ExecutorTask, out_dir: Path) -> AgentResult:
                         agent_config_name, self._tasks[0], out_dir, task_timeout
                     )
                 )
+                agent_output_ref = result.get("_agent_output_ref")
 
-                output: dict[str, Any] = {
-                    "ok": True,
-                    "model": agent_config_name,
-                    "items": [
+                output = AgentResult(
+                    model=agent_config_name,
+                    items=[
                         {
                             "index": 0,
                             "output": result.get("output", ""),
                             "finish_reason": "completed",
                         }
                     ],
-                    "usage": {
+                    usage={
                         "execution_time_sec": result.get("usage", {}).get(
                             "execution_time_sec", 0
                         ),
                         "num_requests": 1,
                         "agent_config": agent_config_name,
                     },
-                    "metadata": {
+                    metadata={
                         "task": self._tasks[0],
                         "execution_log": result.get("log", []),
                     },
-                }
-                agent_output_ref = result.get("_agent_output_ref")
-                if isinstance(agent_output_ref, str):
-                    output["agent_output"] = artifact_ref(agent_output_ref)
+                    agent_output=(
+                        artifact_ref(agent_output_ref)
+                        if isinstance(agent_output_ref, str)
+                        else None
+                    ),
+                )
             else:
                 # Batch execution for multiple tasks
                 results = asyncio.run(
@@ -309,51 +311,53 @@ def run(self, task: ExecutorTask, out_dir: Path) -> AgentResult:
                         }
                     )
 
-                output = {
-                    "ok": True,
-                    "model": agent_config_name,
-                    "items": items,
-                    "usage": {
+                batch_summary_ref = results.get("_batch_summary_ref")
+                output = AgentResult(
+                    model=agent_config_name,
+                    items=items,
+                    usage={
                         "execution_time_sec": results.get("usage", {}).get(
                             "execution_time_sec", 0
                         ),
                         "num_requests": len(self._tasks),
                         "agent_config": agent_config_name,
                     },
-                    "metadata": {
+                    metadata={
                         "tasks_count": len(self._tasks),
                         "execution_log": results.get("log", []),
                         "batch_summary": results.get("batch_summary", {}),
                     },
-                }
-                batch_summary_ref = results.get("_batch_summary_ref")
-                if isinstance(batch_summary_ref, str):
-                    output["batch_summary_file"] = artifact_ref(batch_summary_ref)
+                    batch_summary_file=(
+                        artifact_ref(batch_summary_ref)
+                        if isinstance(batch_summary_ref, str)
+                        else None
+                    ),
+                )
 
             maybe_upload_artifacts(task, out_dir, logger=logger)
 
             logger.info(f"Agent execution completed: {len(self._tasks)} task(s)")
-            return AgentResult.model_validate(output)
+            return output
 
         except ExecutionError:
             raise
         except Exception as e:
             logger.exception(f"Agent execution failed: {e}")
-            error_output = {
-                "ok": False,
-                "model": agent_config_name,
-                "items": [],
-                "usage": {
+            error_output = AgentResult(
+                ok=False,
+                model=agent_config_name,
+                items=[],
+                usage={
                     "execution_time_sec": 0,
                     "num_requests": len(self._tasks),
                     "agent_config": agent_config_name,
                 },
-                "metadata": {
+                metadata={
                     "tasks_count": len(self._tasks),
                     "error": str(e),
                     "execution_log": [],
                 },
-            }
+            )
             write_executor_result(
                 out_dir / "results.json", task.task_id, task.spec, error_output
             )
diff --git a/src/worker/executors/api_executor.py b/src/worker/executors/api_executor.py
index 45be3269..a845b31e 100644
--- a/src/worker/executors/api_executor.py
+++ b/src/worker/executors/api_executor.py
@@ -7,7 +7,7 @@
 import httpx
 from pydantic import Field
 
-from shared.schemas.executor_result import BaseExecutorResult
+from shared.schemas.result import BaseExecutorResult
 from shared.tasks.specs import ApiSpecStrict
 
 from .base_executor import ExecutionError, Executor, ExecutorTask
@@ -19,7 +19,7 @@
 
 
 class APIResult(BaseExecutorResult):
-    ok: bool
+    ok: bool = True
     executor: str
     method: str
     url: str
diff --git a/src/worker/executors/base_executor.py b/src/worker/executors/base_executor.py
index ebb58ed4..f17a3d7f 100644
--- a/src/worker/executors/base_executor.py
+++ b/src/worker/executors/base_executor.py
@@ -27,7 +27,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> dict:
 from pathlib import Path
 from typing import Any, TypeVar
 
-from shared.schemas.executor_result import BaseExecutorResult
+from shared.schemas.result import BaseExecutorResult
 from shared.tasks import MergedChildTaskStrict
 from shared.tasks.specs import TaskSpecStrictBase
 from shared.tasks.worker_message import WorkerHardware, WorkerTaskMessage
@@ -85,9 +85,7 @@ def prepare(self) -> None:
         return None
 
     @abstractmethod
-    def run(
-        self, task: ExecutorTask, out_dir: Path
-    ) -> BaseExecutorResult | dict[str, Any]:
+    def run(self, task: ExecutorTask, out_dir: Path) -> BaseExecutorResult:
         """Execute a single task.
 
         Args:
@@ -96,8 +94,7 @@ def run(
             if needed.
 
         Returns:
-            A ``BaseExecutorResult`` subclass instance (preferred) or a
-            JSON-serializable dictionary summarizing the result.
+            A ``BaseExecutorResult`` subclass instance.
 
         Raises:
             ExecutionError: for expected, user-facing failures.
@@ -143,13 +140,19 @@ def load_json(path: Path) -> dict[str, Any]:
 
 
 # -------- Minimal example implementation --------
+class EchoResult(BaseExecutorResult):
+    ok: bool = True
+    executor: str
+    task_id: str
+    task_type: str
+
+
 class EchoExecutor(Executor):
     name = "echo"
 
-    def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
-        return {
-            "ok": True,
-            "executor": self.name,
-            "task_id": task.task_id,
-            "task_type": task.spec.taskType,
-        }
+    def run(self, task: ExecutorTask, out_dir: Path) -> EchoResult:
+        return EchoResult(
+            executor=self.name,
+            task_id=task.task_id,
+            task_type=task.spec.taskType,
+        )
diff --git a/src/worker/executors/data_profiling_executor.py b/src/worker/executors/data_profiling_executor.py
index 20b94645..57f4249c 100644
--- a/src/worker/executors/data_profiling_executor.py
+++ b/src/worker/executors/data_profiling_executor.py
@@ -8,7 +8,7 @@
 from pathlib import Path
 from typing import Any
 
-from shared.schemas.executor_result import BaseExecutorResult
+from shared.schemas.result import BaseExecutorResult
 from shared.tasks.specs import DataProfilingSpecStrict
 from shared.utils.json import to_json_serializable, validate_keys
 
diff --git a/src/worker/executors/data_retrieval_executor.py b/src/worker/executors/data_retrieval_executor.py
index 3775284b..22f964e5 100644
--- a/src/worker/executors/data_retrieval_executor.py
+++ b/src/worker/executors/data_retrieval_executor.py
@@ -10,7 +10,7 @@
 import pandas as pd
 from PIL import Image
 
-from shared.schemas.executor_result import BaseExecutorResult
+from shared.schemas.result import BaseExecutorResult
 from shared.tasks.specs import DataRetrievalSpecStrict
 from shared.utils.json import validate_keys
 
diff --git a/src/worker/executors/diffusers_executor.py b/src/worker/executors/diffusers_executor.py
index b34ed6ff..5a82e1fd 100644
--- a/src/worker/executors/diffusers_executor.py
+++ b/src/worker/executors/diffusers_executor.py
@@ -15,7 +15,7 @@
 from PIL import Image
 
 from shared.schemas.artifact import ArtifactRef
-from shared.schemas.executor_result import BaseExecutorResult
+from shared.schemas.result import BaseExecutorResult
 from shared.tasks.specs import DiffusionSpecStrict
 
 from ..utils.logging import configure_hf_library_logging
@@ -255,17 +255,17 @@ def run(self, task: ExecutorTask, out_dir: Path) -> DiffusersResult:
         with self._task_span(
             task_id, task.workflow_id, out_dir, owner_id=task.owner_id
         ):
-            response = self._run_inner(spec, task_id, out_dir)
+            result = self._run_inner(spec, task_id, out_dir)
         maybe_upload_artifacts(task, out_dir, logger=logger)
         maybe_upload_traces(task, out_dir, logger=logger)
-        return DiffusersResult.model_validate(response)
+        return result
 
     def _run_inner(
         self,
         spec: DiffusionSpecStrict,
         task_id: str,
         out_dir: Path,
-    ) -> dict[str, Any]:
+    ) -> DiffusersResult:
         self._ensure_pipeline(spec)
         assert self._pipe is not None
 
@@ -349,19 +349,13 @@ def _run_inner(
             img.save(image_dir / f"image_{idx}.png", format="PNG")
             generated_images.append(artifact_ref(f"images/image_{idx}.png"))
 
-        response: dict[str, Any] = {
-            "ok": True,
-            "model": self._model_name,
-            "images": generated_images,
-        }
-
+        result = DiffusersResult(model=self._model_name, images=generated_images)
         self._dump_to_governance(
             task_id=task_id,
-            result=response,
+            result=result,
             dependencies_by_task=dependencies_by_task,
         )
-
-        return response
+        return result
 
     def cleanup_after_run(self) -> None:
         if self._pipe is not None:
diff --git a/src/worker/executors/dpo_executor.py b/src/worker/executors/dpo_executor.py
index 9dfbe449..043e0978 100644
--- a/src/worker/executors/dpo_executor.py
+++ b/src/worker/executors/dpo_executor.py
@@ -26,7 +26,7 @@
 from trl.trainer.dpo_trainer import DPOTrainer
 
 from shared.schemas.artifact import ArtifactRef
-from shared.schemas.executor_result import BaseExecutorResult
+from shared.schemas.result import BaseExecutorResult
 from shared.tasks.specs import DPOSpecStrict
 from shared.utils.manifest import scratch_dir
 
@@ -110,9 +110,9 @@ def run(self, task: ExecutorTask, out_dir: Path) -> DPOResult:
             result = self._execute_training(task, out_dir)
             logger.info(
                 "DPO training task completed in %.2f seconds",
-                result.get("training_time_seconds", 0.0),
+                result.training_time_seconds or 0.0,
             )
-            return DPOResult.model_validate(result)
+            return result
         finally:
             self._task_out_dir = None
 
@@ -369,7 +369,7 @@ def _ensure_jsonl_local(jsonl_cfg: dict[str, Any]) -> Path:
 
         return dataset  # type: ignore[return-value]
 
-    def _execute_training(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:
+    def _execute_training(self, task: ExecutorTask, out_dir: Path) -> DPOResult:
         spec = self.require_spec(task, DPOSpecStrict)
         training_config = spec.training or {}
         artifacts_dir = out_dir / "artifacts"
@@ -477,21 +477,21 @@ def _execute_training(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]
                     logger.warning("Failed to save model: %s", exc)
 
             training_time = time.time() - start_time
-            results: dict[str, Any] = {
-                "training_successful": True,
-                "training_time_seconds": training_time,
-                "error_message": None,
-                "model_name": self._model_name,
-                "dataset_size": len(dataset),
-                "output_dir": out_dir.as_posix(),
-                "checkpoints_dir": artifact_ref("checkpoints"),
-            }
+            result = DPOResult(
+                training_successful=True,
+                training_time_seconds=training_time,
+                error_message=None,
+                model_name=self._model_name,
+                dataset_size=len(dataset) if dataset is not None else 0,
+                output_dir=out_dir.as_posix(),
+                checkpoints_dir=artifact_ref("checkpoints"),
+            )
             if final_model_path is not None:
-                results["final_model"] = artifact_ref(
+                result.final_model = artifact_ref(
                     final_model_path.relative_to(artifacts_dir).as_posix()
                 )
             if final_archive_path is not None:
-                results["final_model_archive"] = artifact_ref(
+                result.final_model_archive = artifact_ref(
                     final_archive_path.relative_to(artifacts_dir).as_posix()
                 )
 
@@ -503,24 +503,22 @@ def _execute_training(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]
                 final_model_path,
                 final_archive_path,
             )
-            return results
+            return result
         except Exception as exc:
             training_time = time.time() - start_time
-            results = {
-                "training_successful": False,
-                "training_time_seconds": training_time,
-                "error_message": str(exc),
-                "model_name": self._model_name,
-                "dataset_size": len(dataset) if dataset is not None else 0,
-                "output_dir": out_dir.as_posix(),
-            }
+            result = DPOResult(
+                training_successful=False,
+                training_time_seconds=training_time,
+                error_message=str(exc),
+                model_name=self._model_name,
+                dataset_size=len(dataset) if dataset is not None else 0,
+                output_dir=out_dir.as_posix(),
+            )
             write_executor_result(
-                out_dir / "results.json", task.task_id, task.spec, results
+                out_dir / "results.json", task.task_id, task.spec, result
             )
             logger.exception("DPO training failed: %s", exc)
-            raise ExecutionError(
-                results["error_message"] or "DPO training failed"
-            ) from exc
+            raise ExecutionError(result.error_message or "DPO training failed") from exc
 
     def _spawn_distributed(
         self,
diff --git a/src/worker/executors/echo_executor.py b/src/worker/executors/echo_executor.py
index cec85193..ad9795ee 100644
--- a/src/worker/executors/echo_executor.py
+++ b/src/worker/executors/echo_executor.py
@@ -2,7 +2,7 @@
 from pathlib import Path
 from typing import Any
 
-from shared.schemas.executor_result import BaseExecutorResult
+from shared.schemas.result import BaseExecutorResult
 from shared.tasks.specs import EchoSpecStrict
 
 from .base_executor import ExecutionError, Executor, ExecutorTask
diff --git a/src/worker/executors/lora_sft_executor.py b/src/worker/executors/lora_sft_executor.py
index 20f9a5f3..55deca84 100644
--- a/src/worker/executors/lora_sft_executor.py
+++ b/src/worker/executors/lora_sft_executor.py
@@ -19,7 +19,7 @@
 from trl.trainer.sft_trainer import SFTTrainer
 
 from shared.schemas.artifact import ArtifactRef
-from shared.schemas.executor_result import BaseExecutorResult
+from shared.schemas.result import BaseExecutorResult
 from shared.tasks.specs import LoRASFTSpecStrict
 
 from ..utils.logging import configure_hf_library_logging
@@ -115,6 +115,8 @@ def run(self, task: ExecutorTask, out_dir: Path) -> LoRAResult:
         checkpoint_dir = artifacts_dir / "checkpoints"
         checkpoint_dir.mkdir(parents=True, exist_ok=True)
 
+        resume_path: Path | None = None
+        train_dataset: Dataset | None = None
         try:
             model_name = spec.model_name or "gpt2"
             self._model_name = model_name
@@ -137,7 +139,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> LoRAResult:
             resume_path = determine_resume_path(
                 spec, training_cfg, out_dir, logger=logger
             )
-            resume_str = str(resume_path) if resume_path else None
+            resume_str = resume_path.as_posix() if resume_path else None
 
             if bool(training_cfg.get("gradient_checkpointing", False)):
                 model.gradient_checkpointing_enable()
@@ -155,9 +157,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> LoRAResult:
                     "Resuming LoRA training from local checkpoint %s", resume_path
                 )
                 peft_model = PeftModel.from_pretrained(
-                    model,
-                    str(resume_path),
-                    is_trainable=True,
+                    model, resume_path.as_posix(), is_trainable=True
                 )
                 logger.info("Loaded existing LoRA adapters; continuing fine-tuning")
             else:
@@ -198,7 +198,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> LoRAResult:
                 logger.info("Initialized new LoRA adapters: %s", lora_target_modules)
 
             sft_config = SFTConfig(
-                output_dir=str(checkpoint_dir),
+                output_dir=checkpoint_dir.as_posix(),
                 num_train_epochs=float(training_cfg.get("num_train_epochs", 1.0)),
                 per_device_train_batch_size=int(training_cfg.get("batch_size", 2)),
                 gradient_accumulation_steps=int(
@@ -300,31 +300,29 @@ def _compute_loss_with_guard(
             archive_path: Path | None = None
             if bool(training_cfg.get("save_model", True)):
                 model_path = artifacts_dir / "final_lora"
-                trainer.save_model(str(model_path))
+                trainer.save_model(model_path.as_posix())
                 tokenizer.save_pretrained(model_path)
                 final_adapter_path = model_path
                 logger.info("Saved LoRA-adapted weights to %s", model_path)
 
             training_time = time.time() - start_time
-            result_payload: dict[str, Any] = {
-                "task_id": task.task_id,
-                "training_successful": training_successful,
-                "training_time_seconds": training_time,
-                "error_message": error_msg,
-                "model_name": self._model_name,
-                "dataset_size": (
-                    len(train_dataset) if "train_dataset" in locals() else 0
-                ),
-                "output_dir": out_dir.as_posix(),
-                "checkpoints_dir": artifact_ref("checkpoints"),
-                "resume_from_path": resume_str,
-            }
+            result = LoRAResult(
+                task_id=task.task_id,
+                training_successful=training_successful,
+                training_time_seconds=training_time,
+                error_message=error_msg,
+                model_name=self._model_name,
+                dataset_size=len(train_dataset) if train_dataset is not None else 0,
+                output_dir=out_dir.as_posix(),
+                checkpoints_dir=artifact_ref("checkpoints"),
+                resume_from_path=resume_str,
+            )
             if final_adapter_path is not None:
-                result_payload["final_lora"] = artifact_ref(
+                result.final_lora = artifact_ref(
                     final_adapter_path.relative_to(artifacts_dir).as_posix()
                 )
                 archive_path = archive_model_dir(final_adapter_path)
-                result_payload["final_lora_archive"] = artifact_ref(
+                result.final_lora_archive = artifact_ref(
                     archive_path.relative_to(artifacts_dir).as_posix()
                 )
                 logger.info("Prepared LoRA archive at %s", archive_path)
@@ -337,7 +335,7 @@ def _compute_loss_with_guard(
                 final_adapter_path,
                 archive_path,
             )
-            return LoRAResult.model_validate(result_payload)
+            return result
 
         except Exception as exc:  # pylint: disable=broad-except
             error_msg = str(exc)
@@ -346,22 +344,20 @@ def _compute_loss_with_guard(
 
         training_time = time.time() - start_time
 
-        result: dict[str, Any] = {
-            "task_id": task.task_id,
-            "training_successful": training_successful,
-            "training_time_seconds": training_time,
-            "error_message": error_msg,
-            "model_name": self._model_name,
-            "dataset_size": len(train_dataset) if "train_dataset" in locals() else 0,
-            "output_dir": out_dir.as_posix(),
-            "checkpoints_dir": artifact_ref("checkpoints"),
-            "resume_from_path": (
-                str(resume_path) if "resume_path" in locals() and resume_path else None
-            ),
-        }
+        result = LoRAResult(
+            task_id=task.task_id,
+            training_successful=training_successful,
+            training_time_seconds=training_time,
+            error_message=error_msg,
+            model_name=self._model_name,
+            dataset_size=len(train_dataset) if train_dataset is not None else 0,
+            output_dir=out_dir.as_posix(),
+            checkpoints_dir=artifact_ref("checkpoints"),
+            resume_from_path=resume_path.as_posix() if resume_path else None,
+        )
 
         if training_successful:
-            return LoRAResult.model_validate(result)
+            return result
 
         write_executor_result(out_dir / "results.json", task.task_id, task.spec, result)
         message = error_msg or "LoRA SFT training failed"
diff --git a/src/worker/executors/mixins/data.py b/src/worker/executors/mixins/data.py
index ee45aab7..77c2cff1 100644
--- a/src/worker/executors/mixins/data.py
+++ b/src/worker/executors/mixins/data.py
@@ -702,14 +702,11 @@ def _collect_prompts_for_spec(
         )
 
     def _populate_table(
-        self,
-        payload: dict[str, Any],
-        table_stores_list: list[pd.DataFrame],
-    ):
+        self, items: list[dict[str, Any]], table_stores_list: list[pd.DataFrame]
+    ) -> list[dict[str, Any]]:
         """
         Group row-level generation outputs back into per-table outputs.
         """
-        items = payload["items"]
         cur = 0
         grouped_items: list[dict[str, Any]] = []
         for df in table_stores_list:
@@ -724,8 +721,7 @@ def _populate_table(
                 f"Output length {len(items)} does not match "
                 f"the total number of rows {cur} in table stores."
             )
-        payload["items"] = grouped_items
-        return payload
+        return grouped_items
 
     def _maybe_apply_dataset_shard(self, dataset, spec: TaskSpecStrictBase):
         shard_cfg = spec.shard
diff --git a/src/worker/executors/mixins/governance.py b/src/worker/executors/mixins/governance.py
index c623c934..a302f595 100644
--- a/src/worker/executors/mixins/governance.py
+++ b/src/worker/executors/mixins/governance.py
@@ -13,12 +13,12 @@
 
 from opentelemetry.trace import Span as OTelSpan
 
-from shared.schemas.executor_result import BaseExecutorResult
 from shared.schemas.governance import (
     READY_SPAN_NAME,
     TASK_SPAN_NAME,
     SpanType,
 )
+from shared.schemas.result import BaseExecutorResult
 from shared.tasks.specs import TaskSpecStrictBase
 from shared.utils.time import now_iso
 
@@ -237,32 +237,25 @@ def _extract_source_data_ids(self, spec: TaskSpecStrictBase) -> list[str]:
     def _dump_to_governance(
         self,
         task_id: str,
-        result: BaseExecutorResult | dict[str, Any],
+        result: BaseExecutorResult,
         dependencies_by_task: dict[str, list[str]],
     ) -> None:
         """Write parent + merged-child results and emit asset/lineage rows."""
         parent_deps = dependencies_by_task.get(task_id, [])
-        payload = (
-            result.model_dump(serialize_as_any=True)
-            if isinstance(result, BaseExecutorResult)
-            else result
-        )
-        children_payload = payload.get("children", {}) or {}
-
         collection_jobs: list[dict[str, Any]] = [
             {
                 "task_id": task_id,
-                "result": payload,
+                "result": result.model_dump(),
                 "deps": parent_deps,
                 "is_parent": True,
             }
         ]
-        for child_id, child_result in children_payload.items():
+        for child_id, child_result in result.children.items():
             child_deps = dependencies_by_task.get(child_id, [])
             collection_jobs.append(
                 {
                     "task_id": child_id,
-                    "result": child_result,
+                    "result": child_result.model_dump(),
                     "deps": child_deps,
                     "is_parent": False,
                 }
diff --git a/src/worker/executors/mixins/inference.py b/src/worker/executors/mixins/inference.py
index b2c9d7e3..4076e901 100644
--- a/src/worker/executors/mixins/inference.py
+++ b/src/worker/executors/mixins/inference.py
@@ -228,7 +228,7 @@ def _maybe_export_jsonl(
         self,
         spec: InferenceSpecStrict,
         task_id: str,
-        result: dict[str, Any],
+        items: list[dict[str, Any]],
         out_dir: Path,
     ) -> None:
         post_cfg = (postprocess := spec.postprocess) and postprocess.jsonl_export
@@ -260,7 +260,6 @@ def _maybe_export_jsonl(
             ) from exc
         target_path.parent.mkdir(parents=True, exist_ok=True)
 
-        items = result.get("items") or []
         required_fields = post_cfg.required_fields or []
         records: list[dict[str, Any]] = []
 
diff --git a/src/worker/executors/mp_executor.py b/src/worker/executors/mp_executor.py
index db92eebc..f2b7c199 100644
--- a/src/worker/executors/mp_executor.py
+++ b/src/worker/executors/mp_executor.py
@@ -21,7 +21,7 @@
 
 import psutil
 
-from shared.schemas.executor_result import BaseExecutorResult
+from shared.schemas.result import BaseExecutorResult
 from shared.tasks.worker_message import WorkerHardware
 from worker.config import WorkerConfig
 
@@ -440,9 +440,7 @@ def _loop() -> None:
         self._log_thread = t
         t.start()
 
-    def run(
-        self, task: ExecutorTask, out_dir: Path
-    ) -> BaseExecutorResult | dict[str, Any]:
+    def run(self, task: ExecutorTask, out_dir: Path) -> BaseExecutorResult:
         with self._lock:
             if self._shutdown:
                 logger.info("Starting worker subprocess for %s", self.name)
diff --git a/src/worker/executors/omni_executor_base.py b/src/worker/executors/omni_executor_base.py
index 86021bbd..073d71c8 100644
--- a/src/worker/executors/omni_executor_base.py
+++ b/src/worker/executors/omni_executor_base.py
@@ -19,7 +19,7 @@
 
 import yaml
 
-from shared.schemas.executor_result import BaseExecutorResult
+from shared.schemas.result import BaseExecutorResult
 from shared.tasks.specs import TaskSpecStrictBase
 from shared.utils.parsing import to_bool, to_int
 
@@ -43,7 +43,10 @@
 
 class OmniResult(BaseExecutorResult):
     ok: bool = True
-    model: str | None = None
+    executor: str
+    mode: str
+    model: str | None
+    items: list[dict[str, Any]]
 
 
 class OmniExecutorBase(InferenceMixin, Executor):
@@ -74,7 +77,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> OmniResult:
             result = self._run_inner(task, spec, spec_dict, out_dir)
         maybe_upload_artifacts(task, out_dir, logger=logger)
         maybe_upload_traces(task, out_dir, logger=logger)
-        return OmniResult.model_validate(result)
+        return result
 
     @abstractmethod
     def _run_inner(
@@ -83,7 +86,7 @@ def _run_inner(
         spec: TaskSpecStrictBase,
         spec_dict: dict[str, Any],
         out_dir: Path,
-    ) -> dict[str, Any]:
+    ) -> OmniResult:
         """Run the executor-specific body. ``spec`` is the concrete strict
         spec; subclasses ``assert isinstance(spec, ...)`` to narrow."""
         raise NotImplementedError
diff --git a/src/worker/executors/omni_text2audio_executor.py b/src/worker/executors/omni_text2audio_executor.py
index 952dfb28..77168cbb 100644
--- a/src/worker/executors/omni_text2audio_executor.py
+++ b/src/worker/executors/omni_text2audio_executor.py
@@ -56,16 +56,27 @@
 from shared.utils.parsing import to_float, to_int
 
 from .base_executor import ExecutionError, ExecutorTask
-from .omni_executor_base import OmniExecutorBase, extract_multimodal_output
+from .omni_executor_base import OmniExecutorBase, OmniResult, extract_multimodal_output
 from .utils.checkpoints import artifact_ref
 
 logger = logging.getLogger(__name__)
+EXECUTOR_NAME = "omni_text2audio"
+
+
+class OmniText2AudioResult(OmniResult):
+    executor: str = EXECUTOR_NAME
+    mode: str = "bgm"
+    audio: dict[str, Any]
+    sample_rate: int
+    num_waveforms: int
+    audio_length: float
+    storyboard: dict[str, Any] | None = None
 
 
 class OmniText2AudioExecutor(OmniExecutorBase):
     """Generate background music with Omni diffusion sampling."""
 
-    name = "omni_text2audio"
+    name = EXECUTOR_NAME
     _TASK_SPEC_TYPE = OmniText2AudioSpecStrict
 
     def prepare(self) -> None:
@@ -84,7 +95,7 @@ def _run_inner(
         spec: TaskSpecStrictBase,
         spec_dict: dict[str, Any],
         out_dir: Path,
-    ) -> dict[str, Any]:
+    ) -> OmniText2AudioResult:
         assert isinstance(spec, OmniText2AudioSpecStrict)
         prompts = self._collect_text_inputs(spec, task.task_id)
 
@@ -202,22 +213,15 @@ def _run_inner(
         if not items:
             raise ExecutionError("omni_text2audio produced no savable waveforms.")
 
-        first = items[0]["audio"] if items else {}
-        result: dict[str, Any] = {
-            "ok": True,
-            "executor": self.name,
-            "mode": "bgm",
-            "model": self._model_name,
-            "audio": first,
-            "items": items,
-            "sample_rate": sample_rate,
-            "num_waveforms": len(items),
-            "audio_length": audio_length,
-        }
-        storyboard = spec_dict.get("storyboard")
-        if isinstance(storyboard, dict):
-            result["storyboard"] = dict(storyboard)
-        return result
+        return OmniText2AudioResult(
+            model=self._model_name,
+            audio=items[0]["audio"] if items else {},
+            items=items,
+            sample_rate=sample_rate,
+            num_waveforms=len(items),
+            audio_length=audio_length,
+            storyboard=spec_dict.get("storyboard"),
+        )
 
     # ── model ────────────────────────────────────────────────────────────
 
diff --git a/src/worker/executors/omni_text2general_executor.py b/src/worker/executors/omni_text2general_executor.py
index 5664af99..222a446f 100644
--- a/src/worker/executors/omni_text2general_executor.py
+++ b/src/worker/executors/omni_text2general_executor.py
@@ -32,10 +32,16 @@
 from shared.utils.parsing import as_list, to_bool, to_float, to_int, to_int_list
 
 from .base_executor import ExecutionError, ExecutorTask
-from .omni_executor_base import OmniExecutorBase, extract_audio_from_mm, save_audio
+from .omni_executor_base import (
+    OmniExecutorBase,
+    OmniResult,
+    extract_audio_from_mm,
+    save_audio,
+)
 from .utils.checkpoints import artifact_ref
 
 logger = logging.getLogger(__name__)
+EXECUTOR_NAME = "omni_text2general"
 
 _DEFAULT_SYSTEM_PROMPT = (
     "You are Qwen, a virtual human developed by the Qwen Team, "
@@ -44,10 +50,18 @@
 )
 
 
+class OmniText2GeneralResult(OmniResult):
+    executor: str = EXECUTOR_NAME
+    mode: str = "narration"
+    audio: dict[str, Any]
+    sample_rate: int
+    storyboard: dict[str, Any] | None = None
+
+
 class OmniText2GeneralExecutor(OmniExecutorBase):
     """Generate narration/speech audio using Qwen3-Omni through vllm_omni.Omni."""
 
-    name = "omni_text2general"
+    name = EXECUTOR_NAME
     _TASK_SPEC_TYPE = OmniText2GeneralSpecStrict
 
     def prepare(self) -> None:
@@ -67,7 +81,7 @@ def _run_inner(
         spec: TaskSpecStrictBase,
         spec_dict: dict[str, Any],
         out_dir: Path,
-    ) -> dict[str, Any]:
+    ) -> OmniText2GeneralResult:
         assert isinstance(spec, OmniText2GeneralSpecStrict)
         texts = self._collect_text_inputs(spec, task.task_id)
 
@@ -177,20 +191,13 @@ def _run_inner(
                     item["text"] = text_out
                 items.append(item)
 
-        first = items[0]["audio"] if items else {}
-        result: dict[str, Any] = {
-            "ok": True,
-            "executor": self.name,
-            "mode": "narration",
-            "model": self._model_name,
-            "audio": first,
-            "items": items,
-            "sample_rate": sample_rate,
-        }
-        storyboard = spec_dict.get("storyboard")
-        if isinstance(storyboard, dict):
-            result["storyboard"] = dict(storyboard)
-        return result
+        return OmniText2GeneralResult(
+            model=self._model_name,
+            items=items,
+            audio=items[0]["audio"] if items else {},
+            sample_rate=sample_rate,
+            storyboard=spec_dict.get("storyboard"),
+        )
 
     # ── model ────────────────────────────────────────────────────────────
 
diff --git a/src/worker/executors/omni_text2image_executor.py b/src/worker/executors/omni_text2image_executor.py
index e3b1f234..8254f162 100644
--- a/src/worker/executors/omni_text2image_executor.py
+++ b/src/worker/executors/omni_text2image_executor.py
@@ -21,16 +21,23 @@
 from shared.utils.parsing import as_list
 
 from .base_executor import ExecutionError, ExecutorTask
-from .omni_executor_base import OmniExecutorBase
+from .omni_executor_base import OmniExecutorBase, OmniResult
 from .utils.checkpoints import artifact_ref
 
 logger = logging.getLogger(__name__)
+EXECUTOR_NAME = "omni_text2image"
+
+
+class OmniText2ImageResult(OmniResult):
+    executor: str = EXECUTOR_NAME
+    mode: str = "image"
+    image: dict[str, Any]
 
 
 class OmniText2ImageExecutor(OmniExecutorBase):
     """Generate images using vllm_omni.Omni."""
 
-    name = "omni_text2image"
+    name = EXECUTOR_NAME
     _TASK_SPEC_TYPE = OmniText2ImageSpecStrict
 
     def prepare(self) -> None:
@@ -45,7 +52,7 @@ def _run_inner(
         spec: TaskSpecStrictBase,
         spec_dict: dict[str, Any],
         out_dir: Path,
-    ) -> dict[str, Any]:
+    ) -> OmniText2ImageResult:
         assert isinstance(spec, OmniText2ImageSpecStrict)
         prompts = self._collect_text_inputs(spec, task.task_id)
 
@@ -92,16 +99,11 @@ def _run_inner(
                     }
                 )
 
-        first = items[0]["image"] if items else {}
-        result: dict[str, Any] = {
-            "ok": True,
-            "executor": self.name,
-            "mode": "image",
-            "model": self._model_name,
-            "image": first,
-            "items": items,
-        }
-        return result
+        return OmniText2ImageResult(
+            model=self._model_name,
+            image=items[0]["image"] if items else {},
+            items=items,
+        )
 
     # ── model ────────────────────────────────────────────────────────────
 
diff --git a/src/worker/executors/omni_text2speech_executor.py b/src/worker/executors/omni_text2speech_executor.py
index 5d7be1b7..ff13753c 100644
--- a/src/worker/executors/omni_text2speech_executor.py
+++ b/src/worker/executors/omni_text2speech_executor.py
@@ -24,6 +24,7 @@
 from .base_executor import ExecutionError, ExecutorTask
 from .omni_executor_base import (
     OmniExecutorBase,
+    OmniResult,
     extract_audio_from_mm,
     extract_multimodal_output,
     save_audio,
@@ -31,12 +32,21 @@
 from .utils.checkpoints import artifact_ref
 
 logger = logging.getLogger(__name__)
+EXECUTOR_NAME = "omni_text2speech"
+
+
+class OmniText2SpeechResult(OmniResult):
+    executor: str = EXECUTOR_NAME
+    mode: str = "tts"
+    audio: dict[str, Any]
+    sample_rate: int
+    storyboard: dict[str, Any] | None = None
 
 
 class OmniText2SpeechExecutor(OmniExecutorBase):
     """Generate speech audio using vllm_omni.Omni."""
 
-    name = "omni_text2speech"
+    name = EXECUTOR_NAME
     _TASK_SPEC_TYPE = OmniText2SpeechSpecStrict
 
     def prepare(self) -> None:
@@ -51,7 +61,7 @@ def _run_inner(
         spec: TaskSpecStrictBase,
         spec_dict: dict[str, Any],
         out_dir: Path,
-    ) -> dict[str, Any]:
+    ) -> OmniText2SpeechResult:
         assert isinstance(spec, OmniText2SpeechSpecStrict)
         texts = self._collect_text_inputs(spec, task.task_id)
 
@@ -101,20 +111,13 @@ def _run_inner(
                     }
                 )
 
-        first = items[0]["audio"] if items else {}
-        result: dict[str, Any] = {
-            "ok": True,
-            "executor": self.name,
-            "mode": "tts",
-            "model": self._model_name,
-            "audio": first,
-            "items": items,
-            "sample_rate": sample_rate,
-        }
-        storyboard = spec_dict.get("storyboard")
-        if isinstance(storyboard, dict):
-            result["storyboard"] = dict(storyboard)
-        return result
+        return OmniText2SpeechResult(
+            model=self._model_name,
+            items=items,
+            audio=items[0]["audio"] if items else {},
+            sample_rate=sample_rate,
+            storyboard=spec_dict.get("storyboard"),
+        )
 
     # ── model ────────────────────────────────────────────────────────────
 
diff --git a/src/worker/executors/ppo_executor.py b/src/worker/executors/ppo_executor.py
index c9320f58..4bbae544 100644
--- a/src/worker/executors/ppo_executor.py
+++ b/src/worker/executors/ppo_executor.py
@@ -14,10 +14,7 @@
 from contextlib import contextmanager, nullcontext
 from pathlib import Path
 from types import SimpleNamespace
-from typing import TYPE_CHECKING, Any, cast
-
-if TYPE_CHECKING:
-    from deepspeed.runtime.engine import DeepSpeedEngine
+from typing import Any, cast
 
 import torch
 from datasets import Dataset
@@ -34,7 +31,7 @@
 from trl.trainer.ppo_trainer import PPOTrainer
 
 from shared.schemas.artifact import ArtifactRef
-from shared.schemas.executor_result import BaseExecutorResult
+from shared.schemas.result import BaseExecutorResult
 from shared.tasks.specs import PPOSpecStrict
 from shared.utils.manifest import scratch_dir
 from shared.utils.parsing import safe_float, safe_int, to_bool
@@ -433,9 +430,8 @@ def run(self, task: ExecutorTask, out_dir: Path) -> PPOResult:
             )
             ipc_path = scratch_dir(out_dir) / "distributed_result.json"
             if ipc_path.exists():
-                result = self.load_json(ipc_path)
                 self._task_out_dir = None
-                return PPOResult.model_validate(result)
+                return PPOResult.model_validate(self.load_json(ipc_path))
             self._task_out_dir = None
             return PPOResult(
                 training_successful=True,
@@ -901,37 +897,37 @@ def build_trainer() -> _EarlyStopPPOTrainer:
             logger.exception("PPO training failed: %s", exc)
             training_time = time.time() - start_time
             dataset_size = len(dataset) if "dataset" in locals() else 0  # type: ignore
-            results = {
-                "training_successful": training_successful,
-                "training_time_seconds": training_time,
-                "error_message": error_msg,
-                "model_name": self._model_name,
-                "dataset_size": dataset_size,
-                "output_dir": out_dir.as_posix(),
-            }
+            result = PPOResult(
+                training_successful=training_successful,
+                training_time_seconds=training_time,
+                error_message=error_msg,
+                model_name=self._model_name,
+                dataset_size=dataset_size,
+                output_dir=out_dir.as_posix(),
+            )
             write_executor_result(
-                out_dir / "results.json", task.task_id, task.spec, results
+                out_dir / "results.json", task.task_id, task.spec, result
             )
             self._task_out_dir = None
             raise ExecutionError(error_msg or "PPO training failed") from exc
 
         training_time = time.time() - start_time
 
-        results = {
-            "training_successful": training_successful,
-            "training_time_seconds": training_time,
-            "error_message": error_msg,
-            "model_name": self._model_name,
-            "dataset_size": len(dataset),
-            "output_dir": out_dir.as_posix(),
-            "checkpoints_dir": artifact_ref("checkpoints"),
-        }
+        result = PPOResult(
+            training_successful=training_successful,
+            training_time_seconds=training_time,
+            error_message=error_msg,
+            model_name=self._model_name,
+            dataset_size=len(dataset),
+            output_dir=out_dir.as_posix(),
+            checkpoints_dir=artifact_ref("checkpoints"),
+        )
         if final_model_path is not None:
-            results["final_model"] = artifact_ref(
+            result.final_model = artifact_ref(
                 final_model_path.relative_to(artifacts_dir).as_posix()
             )
         if final_archive_path is not None:
-            results["final_model_archive"] = artifact_ref(
+            result.final_model_archive = artifact_ref(
                 final_archive_path.relative_to(artifacts_dir).as_posix()
             )
 
@@ -946,7 +942,7 @@ def build_trainer() -> _EarlyStopPPOTrainer:
 
         logger.info("PPO training task completed in %.2f seconds", training_time)
         self._task_out_dir = None
-        return PPOResult.model_validate(results)
+        return result
 
     def _ensure_jsonl_local(self, jsonl_cfg: dict[str, Any]) -> Path:
         headers_cfg = (
@@ -1461,7 +1457,7 @@ def _wrapped_save_model(
             output_dir: str | None = None, _internal_call: bool = False
         ) -> None:
             backup_model = ppo_trainer.model
-            backup_deepspeed: DeepSpeedEngine | None = None
+            backup_deepspeed: Any = None
             ppo_trainer.model = self._resolve_model_for_save(backup_model)
             if ppo_trainer.is_deepspeed_enabled:
                 backup_deepspeed = ppo_trainer.deepspeed
diff --git a/src/worker/executors/rag_executor.py b/src/worker/executors/rag_executor.py
index a1634258..24286bcd 100644
--- a/src/worker/executors/rag_executor.py
+++ b/src/worker/executors/rag_executor.py
@@ -16,7 +16,7 @@
 from datasets import load_dataset
 from qdrant_client import QdrantClient, models
 
-from shared.schemas.executor_result import BaseExecutorResult
+from shared.schemas.result import BaseExecutorResult
 from shared.tasks.specs import RagSpecStrict
 
 from .base_executor import ExecutionError, Executor, ExecutorTask
diff --git a/src/worker/executors/sft_executor.py b/src/worker/executors/sft_executor.py
index 4c6cc82b..97085617 100644
--- a/src/worker/executors/sft_executor.py
+++ b/src/worker/executors/sft_executor.py
@@ -23,7 +23,7 @@
 from trl.trainer.sft_trainer import SFTTrainer
 
 from shared.schemas.artifact import ArtifactRef
-from shared.schemas.executor_result import BaseExecutorResult
+from shared.schemas.result import BaseExecutorResult
 from shared.tasks.specs import SFTSpecStrict, TaskSpecStrictBase
 from shared.utils.manifest import scratch_dir
 
@@ -206,9 +206,8 @@ def run(self, task: ExecutorTask, out_dir: Path) -> SFTResult:
                     )
                 ipc_path = scratch_dir(out_dir) / "distributed_result.json"
                 if ipc_path.exists():
-                    distributed_result = self.load_json(ipc_path)
                     self._task_out_dir = None
-                    return SFTResult.model_validate(distributed_result)
+                    return SFTResult.model_validate(self.load_json(ipc_path))
                 self._task_out_dir = None
                 return SFTResult(
                     training_successful=True,
@@ -222,6 +221,8 @@ def run(self, task: ExecutorTask, out_dir: Path) -> SFTResult:
                 "Failed to launch distributed SFT subprocess"
             ) from spawn_exc
 
+        resume_path: Path | None = None
+        train_dataset: Any = None
         try:
             # Proceed with in-process training (single GPU or inside torchrun)
             model_name = spec.model_name or "gpt2"
@@ -230,7 +231,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> SFTResult:
             resume_path = determine_resume_path(
                 spec, training_cfg, out_dir, logger=logger
             )
-            resume_str = str(resume_path) if resume_path else None
+            resume_str = resume_path.as_posix() if resume_path else None
 
             if resume_path:
                 logger.info(
@@ -308,7 +309,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> SFTResult:
                 )
 
             sft_config = SFTConfig(
-                output_dir=str(checkpoint_dir),
+                output_dir=checkpoint_dir.as_posix(),
                 num_train_epochs=float(training_cfg.get("num_train_epochs", 1.0)),
                 per_device_train_batch_size=int(training_cfg.get("batch_size", 2)),
                 gradient_accumulation_steps=int(
@@ -418,7 +419,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> SFTResult:
             final_archive_path: Path | None = None
             if bool(training_cfg.get("save_model", True)):
                 model_path = artifacts_dir / "final_model"
-                trainer.save_model(str(model_path))
+                trainer.save_model(model_path.as_posix())
                 tokenizer.save_pretrained(model_path)
                 final_model_path = model_path
                 logger.info("Saved fine-tuned model to %s", model_path)
@@ -439,28 +440,28 @@ def run(self, task: ExecutorTask, out_dir: Path) -> SFTResult:
                 )
 
             training_time = time.time() - start_time
-            result_payload: dict[str, Any] = {
-                "task_id": task.task_id,
-                "training_successful": training_successful,
-                "training_time_seconds": training_time,
-                "error_message": error_msg,
-                "model_name": self._model_name,
-                "dataset_size": len(train_dataset),
-                "output_dir": out_dir.as_posix(),
-                "checkpoints_dir": artifact_ref("checkpoints"),
-                "resume_from_path": resume_str,
-            }
+            result = SFTResult(
+                task_id=task.task_id,
+                training_successful=training_successful,
+                training_time_seconds=training_time,
+                error_message=error_msg,
+                model_name=self._model_name,
+                dataset_size=len(train_dataset),
+                output_dir=out_dir.as_posix(),
+                checkpoints_dir=artifact_ref("checkpoints"),
+                resume_from_path=resume_str,
+            )
 
             if final_model_path is not None:
                 resolved_model_path = Path(final_model_path)
                 self._final_model_dir = (
                     resolved_model_path if resolved_model_path.exists() else None
                 )
-                result_payload["final_model"] = artifact_ref(
+                result.final_model = artifact_ref(
                     final_model_path.relative_to(artifacts_dir).as_posix()
                 )
                 if final_archive_path is not None:
-                    result_payload["final_model_archive"] = artifact_ref(
+                    result.final_model_archive = artifact_ref(
                         final_archive_path.relative_to(artifacts_dir).as_posix()
                     )
 
@@ -484,7 +485,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> SFTResult:
             self._final_model_dir = None
 
             self._task_out_dir = None
-            return SFTResult.model_validate(result_payload)
+            return result
 
         except Exception as exc:
             error_msg = str(exc)
@@ -501,23 +502,21 @@ def run(self, task: ExecutorTask, out_dir: Path) -> SFTResult:
             self._final_model_dir = None
 
         training_time = time.time() - start_time
-        result: dict[str, Any] = {
-            "task_id": task.task_id,
-            "training_successful": training_successful,
-            "training_time_seconds": training_time,
-            "error_message": error_msg,
-            "model_name": self._model_name,
-            "dataset_size": (
+        result = SFTResult(
+            task_id=task.task_id,
+            training_successful=training_successful,
+            training_time_seconds=training_time,
+            error_message=error_msg,
+            model_name=self._model_name,
+            dataset_size=(
                 len(train_dataset)
                 if "train_dataset" in locals() and train_dataset is not None
                 else 0
             ),
-            "output_dir": out_dir.as_posix(),
-            "checkpoints_dir": artifact_ref("checkpoints"),
-            "resume_from_path": (
-                str(resume_path) if "resume_path" in locals() and resume_path else None
-            ),
-        }
+            output_dir=out_dir.as_posix(),
+            checkpoints_dir=artifact_ref("checkpoints"),
+            resume_from_path=resume_path.as_posix() if resume_path else None,
+        )
         write_executor_result(out_dir / "results.json", task.task_id, task.spec, result)
         if caught_exc is not None:
             self._task_out_dir = None
@@ -619,7 +618,7 @@ def _ensure_jsonl_local(self, jsonl_cfg: dict[str, Any]) -> Path:
                     timeout=timeout,
                     logger=logger,
                 )
-                jsonl_cfg["path"] = str(resolved)
+                jsonl_cfg["path"] = resolved.as_posix()
                 return resolved
             except ExecutionError as exc:
                 last_error = exc
@@ -840,13 +839,13 @@ def _resolve_deepspeed_config(training_cfg: dict[str, Any], log) -> Any | None:
         if isinstance(cfg, dict):
             return cfg
         if isinstance(cfg, (str, Path)):
-            candidate = Path(str(cfg)).expanduser()
+            candidate = Path(cfg).expanduser()
             if candidate.exists():
                 log.info("Using DeepSpeed config at %s", candidate)
-                return str(candidate)
+                return candidate.as_posix()
             # Allow literal identifiers (e.g., 'auto') without file presence.
             log.info("Using DeepSpeed literal configuration '%s'", cfg)
-            return str(cfg)
+            return cfg if isinstance(cfg, str) else cfg.as_posix()
         raise ValueError("training.deepspeed must be a dict, path string, or falsy")
 
     @staticmethod
diff --git a/src/worker/executors/ssh_executor.py b/src/worker/executors/ssh_executor.py
index 4677c532..107e93df 100644
--- a/src/worker/executors/ssh_executor.py
+++ b/src/worker/executors/ssh_executor.py
@@ -29,7 +29,7 @@
 
 import requests
 
-from shared.schemas.executor_result import BaseExecutorResult
+from shared.schemas.result import BaseExecutorResult
 from shared.tasks.components.resources import GPURequirements
 from shared.tasks.specs.ssh import (
     SSHInputSpec,
diff --git a/src/worker/executors/transformers_executor.py b/src/worker/executors/transformers_executor.py
index c8a32e95..f8aa58fa 100644
--- a/src/worker/executors/transformers_executor.py
+++ b/src/worker/executors/transformers_executor.py
@@ -57,8 +57,8 @@
 from typing import TYPE_CHECKING, Any
 
 from shared.schemas.artifact import ArtifactRef
-from shared.schemas.executor_result import BaseExecutorResult
 from shared.schemas.governance import SpanType
+from shared.schemas.result import BaseExecutorResult
 from shared.tasks.specs import (
     EmbeddingSpecStrict,
     InferenceSpecStrict,
@@ -411,14 +411,14 @@ def run(self, task: ExecutorTask, out_dir: Path) -> TransformersResult:
             result = self._run_inner(spec, task_id, out_dir)
         maybe_upload_artifacts(task, out_dir, logger=logger)
         maybe_upload_traces(task, out_dir, logger=logger)
-        return TransformersResult.model_validate(result)
+        return result
 
     def _run_inner(
         self,
         spec: "InferenceSpecStrict | EmbeddingSpecStrict",
         task_id: str,
         out_dir: Path,
-    ) -> dict[str, Any]:
+    ) -> TransformersResult:
         with self._span("model load", span_type=SpanType.COMPUTE):
             self._ensure_model(spec)
 
@@ -435,8 +435,6 @@ def _run_inner(
 
         assert self._model is not None
 
-        result: dict[str, Any] = {}
-
         if self._mode == "visual-embedding":
             assert self._image_processor is not None
             device = self._device or ("cuda" if torch.cuda.is_available() else "cpu")
@@ -491,15 +489,13 @@ def _run_inner(
             emb_path = artifacts_dir / "visual_embeddings.pt"
             torch.save(grouped_visual_embeddings, emb_path)
 
-            result = {
-                "ok": True,
-                "model": self._model_name,
-                "items": [],  # Embeddings are in file
-                "count": len(grouped_visual_embeddings),
-                "embedding_file": artifact_ref("visual_embeddings.pt"),
-            }
-            if image_group_sizes is not None:
-                result["image_group_sizes"] = image_group_sizes
+            result = TransformersResult(
+                model=self._model_name,
+                items=[],
+                count=len(grouped_visual_embeddings),
+                embedding_file=artifact_ref("visual_embeddings.pt"),
+                image_group_sizes=image_group_sizes,
+            )
 
             self._dump_to_governance(
                 task_id=task_id,
@@ -605,21 +601,20 @@ def _run_inner(
             prompt_tokens += int(input_len)
             completion_tokens += int(gen_part.shape[0])
 
-        result = {
-            "ok": True,
-            "model": self._model_name,
-            "items": items,
-            "usage": {
+        result = TransformersResult(
+            model=self._model_name,
+            items=items,
+            usage={
                 "prompt_tokens": int(prompt_tokens),
                 "completion_tokens": int(completion_tokens),
                 "total_tokens": int(prompt_tokens + completion_tokens),
                 "num_requests": len(self._prompts),
                 "latency_sec": latency,
             },
-        }
+        )
 
         if isinstance(spec, InferenceSpecStrict):
-            self._maybe_export_jsonl(spec, task_id, result, out_dir)
+            self._maybe_export_jsonl(spec, task_id, items, out_dir)
 
         self._dump_to_governance(
             task_id=task_id,
diff --git a/src/worker/executors/utils/checkpoints.py b/src/worker/executors/utils/checkpoints.py
index c5c6e59f..9814c976 100644
--- a/src/worker/executors/utils/checkpoints.py
+++ b/src/worker/executors/utils/checkpoints.py
@@ -12,9 +12,9 @@
 import requests
 
 from shared.schemas.artifact import ArtifactContext, ArtifactRef
-from shared.schemas.executor_result import BaseExecutorResult
-from shared.schemas.result import write_result_in_envelope
+from shared.schemas.result import BaseExecutorResult, ResultEnvelope
 from shared.tasks.specs import TaskSpecStrictBase
+from shared.utils.atomic import atomic_write_text
 from shared.utils.http import add_auth_headers
 from shared.utils.parsing import parse_bool_env
 
@@ -416,16 +416,13 @@ def build_artifact_context(spec: TaskSpecStrictBase, out_dir: Path) -> ArtifactC
 
 
 def write_executor_result(
-    path: Path,
-    task_id: str,
-    spec: TaskSpecStrictBase,
-    result: BaseExecutorResult | dict[str, Any],
+    path: Path, task_id: str, spec: TaskSpecStrictBase, result: BaseExecutorResult
 ) -> None:
     """Stamp ``_artifacts`` onto ``result`` and persist the envelope."""
-    if isinstance(result, dict):
-        result = BaseExecutorResult.model_validate(result)
+    path.parent.mkdir(parents=True, exist_ok=True)
     result.artifacts = build_artifact_context(spec, path.parent)
-    write_result_in_envelope(path, task_id, result)
+    envelope = ResultEnvelope(task_id=task_id, result=result)
+    atomic_write_text(path, envelope.model_dump_json(indent=2))
 
 
 def maybe_upload_artifacts(
diff --git a/src/worker/executors/vllm_executor.py b/src/worker/executors/vllm_executor.py
index 4e6cea06..bb94b21d 100644
--- a/src/worker/executors/vllm_executor.py
+++ b/src/worker/executors/vllm_executor.py
@@ -66,8 +66,8 @@
         _HAS_VLLM = False
         StructuredOutputsParams = None  # type: ignore
 
-from shared.schemas.executor_result import BaseExecutorResult
 from shared.schemas.governance import SpanType
+from shared.schemas.result import BaseExecutorResult
 from shared.tasks.specs import InferenceSpecStrict
 
 from .base_executor import ExecutionError, Executor, ExecutorTask
@@ -1157,54 +1157,48 @@ def _collect(
                 "num_requests": len(self._batched_inputs),
             }
 
-            result: dict[str, Any] = {
-                "ok": True,
-                "model": self._model_name,
-                "items": per_task_items.get(task_id, []),
-                "usage": parent_usage,
-            }
-
-            child_results: dict[str, Any] = {}
+            items = per_task_items.get(task_id, [])
+            child_results: dict[str, VLLMResult] = {}
             for child in merge_children:
                 child_id = child.task_id.strip()
                 if not child_id:
                     continue
-                child_payload: dict[str, Any] = {
-                    "items": per_task_items.get(child_id, []),
-                }
+                child_items = per_task_items.get(child_id, [])
                 maybe_usage = usage_by_task.get(child_id)
-                if maybe_usage:
-                    child_payload["usage"] = maybe_usage
-                child_results[child_id] = child_payload
+                child_results[child_id] = VLLMResult(
+                    model=self._model_name, items=child_items, usage=maybe_usage
+                )
 
             if parent_tables := parent_entry.tables:
-                result = self._populate_table(result, parent_tables)
+                items = self._populate_table(items, parent_tables)
             if child_results:
                 for child_id, child_payload in child_results.items():
                     if (child_entry := entry_by_task_id.get(child_id)) and (
                         child_tables := child_entry.tables
                     ):
-                        child_results[child_id] = self._populate_table(
-                            child_payload, child_tables
+                        child_results[child_id].items = self._populate_table(
+                            child_payload.items, child_tables
                         )
 
-            if child_results:
-                result["children"] = child_results
+        result = VLLMResult(
+            children=cast(dict[str, BaseExecutorResult], child_results),
+            model=self._model_name,
+            items=items,
+            usage=parent_usage,
+        )
 
         with self._span(
             "JSONL export",
             span_type=SpanType.COMPUTE,
             attributes={"task_ids": task_ids},
         ):
-            self._maybe_export_jsonl(spec, task_id, result, out_dir)
+            self._maybe_export_jsonl(spec, task_id, result.items, out_dir)
 
         self._dump_to_governance(
-            task_id=task_id,
-            result=result,
-            dependencies_by_task=dependencies_by_task,
+            task_id=task_id, result=result, dependencies_by_task=dependencies_by_task
         )
 
-        return VLLMResult.model_validate(result)
+        return result
 
     def cleanup_after_run(self) -> None:
         if self._llm:
diff --git a/src/worker/runner.py b/src/worker/runner.py
index 6d7b17b0..96c2b2cc 100644
--- a/src/worker/runner.py
+++ b/src/worker/runner.py
@@ -10,7 +10,7 @@
 
 import requests
 
-from shared.schemas.executor_result import BaseExecutorResult
+from shared.schemas.result import BaseExecutorResult
 from shared.tasks import MergedChildTaskStrict
 from shared.tasks.specs import InferenceSpecStrict, TaskSpecStrictBase
 from shared.tasks.worker_message import (
@@ -128,7 +128,7 @@ def _write_results(
         spec: TaskSpecStrictBase,
         merged_children: list[MergedChildTaskStrict],
         out_dir: Path,
-        result: BaseExecutorResult | dict[str, Any] | None,
+        result: BaseExecutorResult | None,
     ):
         if result is None:
             return
@@ -153,7 +153,7 @@ def _write_single_result(
         task_id: str,
         spec: TaskSpecStrictBase,
         out_dir: Path,
-        payload: BaseExecutorResult | dict[str, Any] | None,
+        payload: BaseExecutorResult | None,
     ):
         if payload is None:
             return
@@ -181,10 +181,7 @@ def _simulate_bandwidth_delay(self, payload_bytes: int, destination: str) -> Non
         time.sleep(delay)
 
     def _maybe_emit_http(
-        self,
-        task_id: str,
-        spec: TaskSpecStrictBase,
-        result: BaseExecutorResult | dict[str, Any],
+        self, task_id: str, spec: TaskSpecStrictBase, result: BaseExecutorResult
     ) -> None:
         """Send task results to an HTTP endpoint when requested by the spec."""
         destination = get_http_destination(spec)
@@ -193,14 +190,9 @@ def _maybe_emit_http(
 
         url = destination.url
         ignore_error = destination.ignore_error
-        result_dict = (
-            result.model_dump(serialize_as_any=True)
-            if isinstance(result, BaseExecutorResult)
-            else result
-        )
         payload = {
             "task_id": task_id,
-            "result": result_dict,
+            "result": result.model_dump(),
             "worker_id": self.lifecycle.worker_id,
         }
         payload_size = len(json.dumps(payload, ensure_ascii=False).encode("utf-8"))
diff --git a/tests/shared/test_executor_result.py b/tests/shared/test_executor_result.py
index 24bb93c8..4b5d79d3 100644
--- a/tests/shared/test_executor_result.py
+++ b/tests/shared/test_executor_result.py
@@ -6,7 +6,7 @@
 from pydantic import Field
 
 from shared.schemas.artifact import ArtifactContext, ArtifactRef
-from shared.schemas.executor_result import BaseExecutorResult
+from shared.schemas.result import BaseExecutorResult, ResultEnvelope
 
 
 class _SampleResult(BaseExecutorResult):
@@ -22,9 +22,9 @@ def test_subclass_round_trip_through_base_preserves_extra_fields() -> None:
         ok=True,
         items=[{"output": "hello"}],
         usage={"latency_sec": 0.5},
-        artifacts=ArtifactContext(base_dir="/tmp/t", base_url="http://h"),
+        _artifacts=ArtifactContext(base_dir="/tmp/t", base_url="http://h"),
     )
-    payload = original.model_dump_json(serialize_as_any=True)
+    payload = original.model_dump_json()
 
     base = BaseExecutorResult.model_validate_json(payload)
     redumped = json.loads(base.model_dump_json())
@@ -34,30 +34,16 @@ def test_subclass_round_trip_through_base_preserves_extra_fields() -> None:
     assert redumped["_artifacts"] == {"base_dir": "/tmp/t", "base_url": "http://h"}
 
 
-def test_artifacts_alias_round_trips_both_directions() -> None:
-    """The wire key is ``_artifacts`` (alias); field-name input is also accepted."""
-    from_alias = BaseExecutorResult.model_validate({"_artifacts": {"base_dir": "/a"}})
-    from_field = BaseExecutorResult.model_validate({"artifacts": {"base_dir": "/a"}})
-
-    assert (
-        from_alias.artifacts == from_field.artifacts == ArtifactContext(base_dir="/a")
-    )
-
-    dumped = from_alias.model_dump()
-    assert "_artifacts" in dumped
-    assert "artifacts" not in dumped
-
-
 def test_recursive_children_round_trip() -> None:
     """Nested ``children`` deserialize as ``BaseExecutorResult`` and re-emit
-    their extra fields when serialized with ``serialize_as_any=True``."""
+    their extra fields when serialized."""
     parent = _SampleResult(
         items=[{"output": "p"}],
         children={
             "c1": _SampleResult(items=[{"output": "c"}], usage={"total_tokens": 3}),
         },
     )
-    payload = parent.model_dump_json(serialize_as_any=True)
+    payload = parent.model_dump_json()
     base = BaseExecutorResult.model_validate_json(payload)
 
     assert "c1" in base.children
@@ -75,11 +61,9 @@ def test_artifact_ref_is_a_typed_path_reference() -> None:
 def test_envelope_round_trip_preserves_subclass_payload() -> None:
     """The production write→read path (``write_result_in_envelope`` →
     ``ResultEnvelope.model_validate``) round-trips subclass fields."""
-    from shared.schemas.result import ResultEnvelope
-
     inner = _SampleResult(items=[{"output": "hello"}], usage={"total_tokens": 7})
     env = ResultEnvelope(task_id="tsk-x", result=inner)
-    raw = env.model_dump_json(serialize_as_any=True)
+    raw = env.model_dump_json()
 
     parsed = ResultEnvelope.model_validate_json(raw)
     dumped = parsed.result.model_dump()
diff --git a/tests/worker/test_connector_logging.py b/tests/worker/test_connector_logging.py
index 0caa357c..417e8a98 100644
--- a/tests/worker/test_connector_logging.py
+++ b/tests/worker/test_connector_logging.py
@@ -1,16 +1,24 @@
 """Test that connector logs are properly redirected from worker subprocess to parent."""
 
+import logging
 import tempfile
 import time
 import uuid
 from pathlib import Path
+from typing import Any
 
+from shared.schemas.result import BaseExecutorResult
 from shared.tasks.worker_message import WorkerTaskMessage
 from tests.worker.factories import make_live_worker_config, make_worker_hardware
 from worker.executors.base_executor import Executor
 from worker.executors.mp_executor import MPExecutor
 
 
+class ConnectorLoggingResult(BaseExecutorResult):
+    ok: bool = True
+    result: dict[str, Any]
+
+
 class ConnectorLoggingExecutor(Executor):
     """Simple test executor that uses a connector and logs messages."""
 
@@ -19,9 +27,8 @@ class ConnectorLoggingExecutor(Executor):
     def prepare(self) -> None:
         pass
 
-    def run(self, task, out_dir: Path) -> dict:
+    def run(self, task, out_dir: Path) -> ConnectorLoggingResult:
         """Run a simple test that logs from different modules."""
-        import logging
 
         # Get loggers from different modules that would be used in real execution
         executor_logger = logging.getLogger("executors.test_executor")
@@ -36,13 +43,12 @@ def run(self, task, out_dir: Path) -> dict:
 
         root_logger.info("Message from root logger")
 
-        return {
-            "ok": True,
-            "result": {
+        return ConnectorLoggingResult(
+            result={
                 "status": "completed",
                 "log_test": "Messages logged from different modules",
             },
-        }
+        )
 
     def cleanup_after_run(self) -> None:
         pass
@@ -89,9 +95,8 @@ def test_connector_logs_printed_to_stderr(tmp_path: Path) -> None:
 
     mp.cleanup_after_run()
 
-    # Verify the executor ran successfully
-    assert isinstance(result, dict)
-    assert result["ok"], f"Executor failed: {result}"
-    assert (
-        result.get("result", {}).get("status") == "completed"
-    ), f"Unexpected result: {result}"
+    # Verify the executor ran successfully. The MP boundary pickles the
+    # subclass instance, so the parent receives a ``ConnectorLoggingResult``.
+    assert isinstance(result, ConnectorLoggingResult), f"Unexpected result: {result}"
+    assert result.ok is True
+    assert result.result.get("status") == "completed", f"Unexpected result: {result}"
diff --git a/tests/worker/test_data_mixin_lineage.py b/tests/worker/test_data_mixin_lineage.py
index 48892ee9..491d65a7 100644
--- a/tests/worker/test_data_mixin_lineage.py
+++ b/tests/worker/test_data_mixin_lineage.py
@@ -7,6 +7,7 @@
 
 from PIL import Image
 
+from shared.schemas.result import BaseExecutorResult
 from worker.executors.mixins.data import DataMixin
 
 
@@ -96,14 +97,16 @@ def test_dump_to_governance_with_merged_children(tmp_path: Path) -> None:
     mixin = _Mixin()
     out_dir = tmp_path / "task"
     with mixin._task_span("tsk-parent", "wfl-1", out_dir, owner_id="alice"):
-        result = {
-            "ok": True,
-            "items": [{"output": "p"}],
-            "children": {
-                "tsk-c1": {"items": [{"output": "c1"}]},
-                "tsk-c2": {"items": [{"output": "c2"}]},
-            },
-        }
+        result = BaseExecutorResult.model_validate(
+            {
+                "ok": True,
+                "items": [{"output": "p"}],
+                "children": {
+                    "tsk-c1": {"items": [{"output": "c1"}]},
+                    "tsk-c2": {"items": [{"output": "c2"}]},
+                },
+            }
+        )
         deps = {
             "tsk-parent": ["tsk-up-a"],
             "tsk-c1": ["tsk-up-b"],
diff --git a/tests/worker/test_executor_bootstrap.py b/tests/worker/test_executor_bootstrap.py
index 34f6596c..45277de9 100644
--- a/tests/worker/test_executor_bootstrap.py
+++ b/tests/worker/test_executor_bootstrap.py
@@ -10,6 +10,7 @@
 from pathlib import Path
 from typing import Any
 
+from shared.schemas.result import BaseExecutorResult
 from tests.worker.factories import make_live_worker_config, make_worker_hardware
 from worker.executors.base_executor import Executor, ExecutorTask
 from worker.main import initialize_executors
@@ -23,8 +24,8 @@ class _PassthroughExecutor(Executor):
     def __init__(self, *args: Any, **kwargs: Any) -> None:
         super().__init__(*args, **kwargs)
 
-    def run(self, task: ExecutorTask, out_dir: Path) -> dict[str, Any]:  # noqa: D401
-        return {"ok": True}
+    def run(self, task: ExecutorTask, out_dir: Path) -> BaseExecutorResult:
+        return BaseExecutorResult.model_validate({"ok": True})
 
 
 class TestInitializeExecutorsHardware:
diff --git a/tests/worker/test_mp_executor_lifecycle.py b/tests/worker/test_mp_executor_lifecycle.py
index f6882cc5..6cea3af4 100644
--- a/tests/worker/test_mp_executor_lifecycle.py
+++ b/tests/worker/test_mp_executor_lifecycle.py
@@ -4,6 +4,7 @@
 import uuid
 from pathlib import Path
 
+from shared.schemas.result import BaseExecutorResult
 from shared.tasks import TaskType
 from shared.tasks.specs import EchoSpecStrict
 from shared.tasks.worker_message import WorkerTaskMessage
@@ -16,11 +17,16 @@
 from worker.executors.mp_executor import MPExecutor
 
 
+class _SimpleMPResult(BaseExecutorResult):
+    ok: bool = True
+    task_id: str
+
+
 class _SimpleMPExecutor(Executor):
     name = "simple_mp"
 
-    def run(self, task, out_dir: Path) -> dict:
-        return {"ok": True, "task_id": task.task_id}
+    def run(self, task, out_dir: Path) -> _SimpleMPResult:
+        return _SimpleMPResult(task_id=task.task_id)
 
     def cleanup_after_run(self) -> None:
         return None
@@ -54,8 +60,8 @@ def test_mp_executor_does_not_start_subprocess_until_first_run(tmp_path: Path) -
     with tempfile.TemporaryDirectory() as out_dir:
         result = mp.run(_simple_task_message(), Path(out_dir))
 
-    assert isinstance(result, dict)
-    assert result["ok"] is True
+    assert isinstance(result, _SimpleMPResult)
+    assert result.ok is True
     assert mp._shutdown is False
     assert mp._proc is not None
     assert mp._proc.is_alive()

From 75083c975d044d029e2097a6a23bddc0e0af481f Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Tue, 19 May 2026 16:25:37 +0000
Subject: [PATCH 11/26] refactor(worker): type upstream result and artifact
 resolver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tightens the worker-side mirror of the server's ``_render_artifact_ref``
work: ``artifact_to_source`` and ``maybe_resolve_artifact_ref`` take
``context: dict[str, BaseExecutorResult] | None`` and read
``node_result.artifacts`` directly instead of walking dict shapes
defensively. The legacy "result key" fallback (where some executors
nested their payload under a ``"result"`` key) is removed — no
executor produces that shape after the result-schema refactor.
``GovernanceMixin._spec_upstream_results`` and the data-mixin
``context`` local are retyped accordingly, and ``_evaluate_expr``
grows a ``BaseModel`` branch (with sentinel-default ``getattr``) so
expressions can walk into typed upstream results.

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 src/worker/executors/mixins/data.py           |  3 +-
 src/worker/executors/mixins/governance.py     |  4 +-
 src/worker/executors/utils/artifacts.py       | 35 ++++------
 src/worker/executors/utils/graph_templates.py | 20 ++++--
 tests/worker/test_artifact_utils.py           | 66 ++++++++-----------
 tests/worker/test_data_mixin_lineage.py       | 19 +++---
 6 files changed, 72 insertions(+), 75 deletions(-)

diff --git a/src/worker/executors/mixins/data.py b/src/worker/executors/mixins/data.py
index 77c2cff1..d0ee4324 100644
--- a/src/worker/executors/mixins/data.py
+++ b/src/worker/executors/mixins/data.py
@@ -15,6 +15,7 @@
 from datasets import Dataset, load_dataset
 from PIL import Image
 
+from shared.schemas.result import BaseExecutorResult
 from shared.tasks.specs import TaskSpecStrictBase
 from shared.utils.json import safe_get
 
@@ -403,7 +404,7 @@ def _collect_prompts_for_spec(
                 metadata_raw.append(entry_meta)
         elif dtype == "list":
             items = data.get("items")
-            context: dict[str, Any] | None = None
+            context: dict[str, BaseExecutorResult] | None = None
             root_node: str | None = None
             if items is None:
                 expr = data.get("expr")
diff --git a/src/worker/executors/mixins/governance.py b/src/worker/executors/mixins/governance.py
index a302f595..44617d25 100644
--- a/src/worker/executors/mixins/governance.py
+++ b/src/worker/executors/mixins/governance.py
@@ -210,7 +210,9 @@ def _record_output(
         )
 
     @staticmethod
-    def _spec_upstream_results(spec: TaskSpecStrictBase) -> dict[str, Any]:
+    def _spec_upstream_results(
+        spec: TaskSpecStrictBase,
+    ) -> dict[str, BaseExecutorResult]:
         """Validated ``spec._upstreamResults`` (server-injected stage context)."""
         context = spec.upstreamResults or {}
         if not isinstance(context, dict):
diff --git a/src/worker/executors/utils/artifacts.py b/src/worker/executors/utils/artifacts.py
index 7f4df0be..6935bc22 100644
--- a/src/worker/executors/utils/artifacts.py
+++ b/src/worker/executors/utils/artifacts.py
@@ -6,48 +6,39 @@
 
 import requests
 
+from shared.schemas.result import BaseExecutorResult
 from shared.utils.http import auth_headers
 
 from ..base_executor import ExecutionError
 
 
 def artifact_to_source(
-    ref: dict[str, Any], context: dict[str, Any] | None, node: str | None
+    ref: dict[str, Any], context: dict[str, BaseExecutorResult] | None, node: str | None
 ) -> str:
     """Translate a `{path: ...}` artifact ref into a URL or local path."""
     rel_path = ref.get("path")
     if not isinstance(rel_path, str) or not rel_path:
         raise ExecutionError("Artifact ref must include a non-empty 'path' field")
 
-    ctx: dict[str, Any] = {}
-    if context and isinstance(node, str) and node:
-        node_payload = context.get(node)
-        if isinstance(node_payload, dict):
-            raw_ctx = node_payload.get("_artifacts")
-            if isinstance(raw_ctx, dict):
-                ctx = raw_ctx
-            if not ctx:
-                # Some executors stuff their result under a "result" key.
-                result = node_payload.get("result")
-                if isinstance(result, dict):
-                    inner = result.get("_artifacts")
-                    if isinstance(inner, dict):
-                        ctx = inner
-
-    if ctx:
-        base_url = ctx.get("base_url")
-        base_dir = ctx.get("base_dir")
+    if (
+        context
+        and node
+        and (node_result := context.get(node))
+        and (ctx := node_result.artifacts)
+    ):
+        base_url = ctx.base_url
+        base_dir = ctx.base_dir
     else:
         base_url = base_dir = None
 
     # Check local filesystem first
-    base_dir_path = Path(base_dir) if isinstance(base_dir, str) and base_dir else None
+    base_dir_path = Path(base_dir) if base_dir else None
     local_path = base_dir_path / "artifacts" / rel_path if base_dir_path else None
     if local_path is not None and local_path.is_file():
         return local_path.as_posix()
 
     # Fallback to a URL if base_url is provided
-    if isinstance(base_url, str) and base_url:
+    if base_url:
         if not base_dir_path:
             raise ExecutionError(
                 "Artifact ref with base_url requires upstream base_dir to "
@@ -66,7 +57,7 @@ def artifact_to_source(
 
 
 def maybe_resolve_artifact_ref(
-    value: Any, context: dict[str, Any] | None, node: str | None
+    value: Any, context: dict[str, BaseExecutorResult] | None, node: str | None
 ) -> Any:
     """Convert `{path: ...}` ref dicts to URL/path strings; pass others through."""
     if isinstance(value, dict) and "path" in value:
diff --git a/src/worker/executors/utils/graph_templates.py b/src/worker/executors/utils/graph_templates.py
index eb01ea1a..17958dfa 100644
--- a/src/worker/executors/utils/graph_templates.py
+++ b/src/worker/executors/utils/graph_templates.py
@@ -5,7 +5,9 @@
 from typing import Any
 
 import pandas as pd
+from pydantic import BaseModel
 
+from shared.schemas.result import BaseExecutorResult
 from shared.tasks.specs import TaskSpecStrictBase
 from shared.utils.json import validate_keys
 
@@ -13,6 +15,8 @@
 from ..base_executor import ExecutionError
 from .safe_eval import safe_execute_function, safe_materialize_function
 
+_MISSING: Any = object()
+
 type MessageItem = dict[str, str]
 type Message = Sequence[MessageItem]
 type MaterializedMessage = Sequence[MessageItem]
@@ -580,17 +584,17 @@ def _format_column_line(label: str, value: str) -> str:
     return f"• {label}: {indented}"
 
 
-def _evaluate_expr(expr: str, context: dict[str, Any]) -> Any:
+def _evaluate_expr(expr: str, context: dict[str, BaseExecutorResult]) -> Any:
     if not expr:
         return None
 
     parts = expr.split(".")
     root = parts[0]
-    data = context.get(root)
-    if data is None:
+    result = context.get(root)
+    if result is None:
         return None
 
-    value: Any = data
+    value: Any = result
     for token in parts[1:]:
         if not token:
             continue
@@ -617,6 +621,14 @@ def _evaluate_expr(expr: str, context: dict[str, Any]) -> Any:
                         f"{attr} not a valid column in DataFrame for {token}."
                     )
                 value = value[attr].tolist()
+            elif isinstance(value, BaseModel):
+                resolved = getattr(value, attr, _MISSING)
+                if resolved is _MISSING:
+                    raise ExecutionError(
+                        f"{attr} not a valid attribute of {type(value).__name__} "
+                        f"for {token}."
+                    )
+                value = resolved
             else:
                 raise ExecutionError(
                     f"{attr} in {parts} is not a valid key - "
diff --git a/tests/worker/test_artifact_utils.py b/tests/worker/test_artifact_utils.py
index a30528a5..189733b2 100644
--- a/tests/worker/test_artifact_utils.py
+++ b/tests/worker/test_artifact_utils.py
@@ -8,6 +8,8 @@
 
 import pytest
 
+from shared.schemas.artifact import ArtifactContext
+from shared.schemas.result import BaseExecutorResult
 from worker.executors.base_executor import ExecutionError
 from worker.executors.utils.artifacts import (
     artifact_to_source,
@@ -16,42 +18,26 @@
 
 
 class TestArtifactToSource:
-    def test_unwrapped_upstream_yields_url(self, tmp_path: Path) -> None:
-        upstream = {
-            "_artifacts": {
-                "base_url": "http://host:8010",
-                "base_dir": (tmp_path / "producer-tid").as_posix(),
-            },
-            "result": {"images": [{"path": "a.png"}]},
-        }
+    def test_url_resolution(self, tmp_path: Path) -> None:
+        upstream = BaseExecutorResult(
+            _artifacts=ArtifactContext(
+                base_dir=(tmp_path / "producer-tid").as_posix(),
+                base_url="http://host:8010",
+            ),
+        )
         url = artifact_to_source({"path": "a.png"}, {"producer": upstream}, "producer")
         assert url == "http://host:8010/api/v1/results/producer-tid/files/a.png"
 
-    def test_envelope_wrapped_upstream_yields_url(self, tmp_path: Path) -> None:
-        """Server stores results.json as `{task_id, ..., result: {...}}`; the
-        helper must unwrap one level to find `_artifacts`."""
-        upstream = {
-            "task_id": "producer-tid",
-            "result": {
-                "_artifacts": {
-                    "base_url": "http://host:8010",
-                    "base_dir": (tmp_path / "producer-tid").as_posix(),
-                },
-            },
-        }
-        url = artifact_to_source({"path": "x.png"}, {"producer": upstream}, "producer")
-        assert url == "http://host:8010/api/v1/results/producer-tid/files/x.png"
-
     def test_local_file_takes_fast_path(self, tmp_path: Path) -> None:
         task_root = tmp_path / "producer-tid"
         (task_root / "artifacts").mkdir(parents=True)
         (task_root / "artifacts" / "a.png").write_bytes(b"\x89PNG")
-        upstream = {
-            "_artifacts": {
-                "base_url": "http://host:8010",
-                "base_dir": task_root.as_posix(),
-            }
-        }
+        upstream = BaseExecutorResult(
+            _artifacts=ArtifactContext(
+                base_dir=task_root.as_posix(),
+                base_url="http://host:8010",
+            )
+        )
         resolved = artifact_to_source(
             {"path": "a.png"}, {"producer": upstream}, "producer"
         )
@@ -59,7 +45,9 @@ def test_local_file_takes_fast_path(self, tmp_path: Path) -> None:
 
     def test_local_only_upstream_returns_local_path(self, tmp_path: Path) -> None:
         task_root = tmp_path / "producer-tid"
-        upstream = {"_artifacts": {"base_url": None, "base_dir": task_root.as_posix()}}
+        upstream = BaseExecutorResult(
+            _artifacts=ArtifactContext(base_dir=task_root.as_posix())
+        )
         resolved = artifact_to_source(
             {"path": "a.png"}, {"producer": upstream}, "producer"
         )
@@ -67,11 +55,13 @@ def test_local_only_upstream_returns_local_path(self, tmp_path: Path) -> None:
 
     def test_missing_context_raises(self) -> None:
         with pytest.raises(ExecutionError, match="_artifacts context is missing"):
-            artifact_to_source({"path": "a.png"}, {"producer": {}}, "producer")
+            artifact_to_source(
+                {"path": "a.png"}, {"producer": BaseExecutorResult()}, "producer"
+            )
 
     def test_missing_path_raises(self) -> None:
         with pytest.raises(ExecutionError, match="non-empty 'path' field"):
-            artifact_to_source({}, {"producer": {}}, "producer")
+            artifact_to_source({}, {"producer": BaseExecutorResult()}, "producer")
 
 
 class TestMaybeResolveArtifactRef:
@@ -85,12 +75,12 @@ def test_passes_through_dict_without_path(self) -> None:
         assert maybe_resolve_artifact_ref(value, None, None) is value
 
     def test_resolves_path_dict(self, tmp_path: Path) -> None:
-        upstream = {
-            "_artifacts": {
-                "base_url": "http://host:8010",
-                "base_dir": (tmp_path / "producer-tid").as_posix(),
-            }
-        }
+        upstream = BaseExecutorResult(
+            _artifacts=ArtifactContext(
+                base_url="http://host:8010",
+                base_dir=(tmp_path / "producer-tid").as_posix(),
+            )
+        )
         out = maybe_resolve_artifact_ref(
             {"path": "a.png"}, {"producer": upstream}, "producer"
         )
diff --git a/tests/worker/test_data_mixin_lineage.py b/tests/worker/test_data_mixin_lineage.py
index 491d65a7..69e8856f 100644
--- a/tests/worker/test_data_mixin_lineage.py
+++ b/tests/worker/test_data_mixin_lineage.py
@@ -146,20 +146,21 @@ def test_collect_prompts_resolves_grouped_image_artifact_refs_after_flatten(
     for name, color in (("a.png", "red"), ("b.png", "green"), ("c.png", "blue")):
         Image.new("RGB", (2, 2), color=color).save(artifacts_dir / name)
 
+    result = BaseExecutorResult.model_validate(
+        {
+            "images": [
+                [{"path": "images/a.png"}, {"path": "images/b.png"}],
+                [{"path": "images/c.png"}],
+            ],
+            "_artifacts": {"base_dir": upstream_dir.as_posix()},
+        }
+    )
     spec = cast(
         Any,
         SimpleNamespace(
             data={"type": "list", "expr": "vision.images"},
             inference={},
-            upstreamResults={
-                "vision": {
-                    "images": [
-                        [{"path": "images/a.png"}, {"path": "images/b.png"}],
-                        [{"path": "images/c.png"}],
-                    ],
-                    "_artifacts": {"base_dir": upstream_dir.as_posix()},
-                }
-            },
+            upstreamResults={"vision": result},
         ),
     )
 

From 3b2fe17b0685cbf79ebabc22af27eec1f54deedb Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Tue, 19 May 2026 19:27:36 +0000
Subject: [PATCH 12/26] refactor(worker): inline ArtifactRef constructor at
 call sites

The ``artifact_ref(rel_path)`` helper was a one-liner around
``ArtifactRef(path=rel_path)``; now that ``ArtifactRef`` is the typed
schema callers reach for directly, the wrapper is dead weight. Drop
it and the corresponding test, and inline the constructor at every
call site across the nine executors that used it.

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 src/worker/executors/agent_executor.py             | 10 +++-------
 src/worker/executors/data_retrieval_executor.py    |  9 +++------
 src/worker/executors/diffusers_executor.py         |  8 ++------
 src/worker/executors/dpo_executor.py               | 11 +++++------
 src/worker/executors/lora_sft_executor.py          | 13 ++++++-------
 src/worker/executors/omni_text2audio_executor.py   |  6 +++---
 src/worker/executors/omni_text2general_executor.py |  6 ++++--
 src/worker/executors/omni_text2image_executor.py   |  6 +++---
 src/worker/executors/omni_text2speech_executor.py  |  6 +++---
 src/worker/executors/ppo_executor.py               | 11 +++++------
 src/worker/executors/sft_executor.py               | 13 ++++++-------
 src/worker/executors/transformers_executor.py      |  8 ++------
 src/worker/executors/utils/checkpoints.py          |  8 +-------
 tests/worker/test_checkpoint_utils.py              |  9 +--------
 14 files changed, 47 insertions(+), 77 deletions(-)

diff --git a/src/worker/executors/agent_executor.py b/src/worker/executors/agent_executor.py
index 7cff6d0a..05b37aa4 100644
--- a/src/worker/executors/agent_executor.py
+++ b/src/worker/executors/agent_executor.py
@@ -21,11 +21,7 @@
 from shared.tasks.specs import AgentSpecStrict
 
 from .base_executor import ExecutionError, Executor, ExecutorTask
-from .utils.checkpoints import (
-    artifact_ref,
-    maybe_upload_artifacts,
-    write_executor_result,
-)
+from .utils.checkpoints import maybe_upload_artifacts, write_executor_result
 from .utils.graph_templates import build_prompts_from_graph_template
 
 # Add agent directory to sys.path for utu imports
@@ -283,7 +279,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> AgentResult:
                         "execution_log": result.get("log", []),
                     },
                     agent_output=(
-                        artifact_ref(agent_output_ref)
+                        ArtifactRef(path=agent_output_ref)
                         if isinstance(agent_output_ref, str)
                         else None
                     ),
@@ -328,7 +324,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> AgentResult:
                         "batch_summary": results.get("batch_summary", {}),
                     },
                     batch_summary_file=(
-                        artifact_ref(batch_summary_ref)
+                        ArtifactRef(path=batch_summary_ref)
                         if isinstance(batch_summary_ref, str)
                         else None
                     ),
diff --git a/src/worker/executors/data_retrieval_executor.py b/src/worker/executors/data_retrieval_executor.py
index 22f964e5..1cde3031 100644
--- a/src/worker/executors/data_retrieval_executor.py
+++ b/src/worker/executors/data_retrieval_executor.py
@@ -10,6 +10,7 @@
 import pandas as pd
 from PIL import Image
 
+from shared.schemas.artifact import ArtifactRef
 from shared.schemas.result import BaseExecutorResult
 from shared.tasks.specs import DataRetrievalSpecStrict
 from shared.utils.json import validate_keys
@@ -18,11 +19,7 @@
 from ..utils.serialization import serialize_dataframe
 from .base_executor import ExecutionError, Executor, ExecutorTask
 from .mixins.data import DataMixin
-from .utils.checkpoints import (
-    artifact_ref,
-    maybe_upload_artifacts,
-    maybe_upload_traces,
-)
+from .utils.checkpoints import maybe_upload_artifacts, maybe_upload_traces
 from .utils.graph_templates import _render_template, _resolve_columns
 
 logger = logging.getLogger(__name__)
@@ -346,5 +343,5 @@ def _serialize_s3_content(self, content: Any, out_dir: Path) -> Any:
             filename = f"{uuid.uuid4().hex}.png"
             file_path = images_dir / filename
             content.save(file_path, format="PNG")
-            return artifact_ref(f"s3_images/{filename}")
+            return ArtifactRef(path=f"s3_images/{filename}")
         return content
diff --git a/src/worker/executors/diffusers_executor.py b/src/worker/executors/diffusers_executor.py
index 5a82e1fd..0c782fce 100644
--- a/src/worker/executors/diffusers_executor.py
+++ b/src/worker/executors/diffusers_executor.py
@@ -21,11 +21,7 @@
 from ..utils.logging import configure_hf_library_logging
 from .base_executor import ExecutionError, Executor, ExecutorTask
 from .mixins.data import DataMixin
-from .utils.checkpoints import (
-    artifact_ref,
-    maybe_upload_artifacts,
-    maybe_upload_traces,
-)
+from .utils.checkpoints import maybe_upload_artifacts, maybe_upload_traces
 
 try:
     import torch
@@ -347,7 +343,7 @@ def _run_inner(
         generated_images: list[ArtifactRef] = []
         for idx, img in enumerate(images):
             img.save(image_dir / f"image_{idx}.png", format="PNG")
-            generated_images.append(artifact_ref(f"images/image_{idx}.png"))
+            generated_images.append(ArtifactRef(path=f"images/image_{idx}.png"))
 
         result = DiffusersResult(model=self._model_name, images=generated_images)
         self._dump_to_governance(
diff --git a/src/worker/executors/dpo_executor.py b/src/worker/executors/dpo_executor.py
index 043e0978..a3a2da09 100644
--- a/src/worker/executors/dpo_executor.py
+++ b/src/worker/executors/dpo_executor.py
@@ -35,7 +35,6 @@
 from .mixins.training import TrainingMixin
 from .utils.checkpoints import (
     archive_model_dir,
-    artifact_ref,
     get_http_destination,
     maybe_upload_artifacts,
     write_executor_result,
@@ -484,15 +483,15 @@ def _execute_training(self, task: ExecutorTask, out_dir: Path) -> DPOResult:
                 model_name=self._model_name,
                 dataset_size=len(dataset) if dataset is not None else 0,
                 output_dir=out_dir.as_posix(),
-                checkpoints_dir=artifact_ref("checkpoints"),
+                checkpoints_dir=ArtifactRef(path="checkpoints"),
             )
             if final_model_path is not None:
-                result.final_model = artifact_ref(
-                    final_model_path.relative_to(artifacts_dir).as_posix()
+                result.final_model = ArtifactRef(
+                    path=final_model_path.relative_to(artifacts_dir).as_posix()
                 )
             if final_archive_path is not None:
-                result.final_model_archive = artifact_ref(
-                    final_archive_path.relative_to(artifacts_dir).as_posix()
+                result.final_model_archive = ArtifactRef(
+                    path=final_archive_path.relative_to(artifacts_dir).as_posix()
                 )
 
             maybe_upload_artifacts(task, out_dir, logger=logger)
diff --git a/src/worker/executors/lora_sft_executor.py b/src/worker/executors/lora_sft_executor.py
index 55deca84..c17697ae 100644
--- a/src/worker/executors/lora_sft_executor.py
+++ b/src/worker/executors/lora_sft_executor.py
@@ -28,7 +28,6 @@
 from .sft_executor import SFTExecutor
 from .utils.checkpoints import (
     archive_model_dir,
-    artifact_ref,
     determine_resume_path,
     maybe_upload_artifacts,
     write_executor_result,
@@ -314,16 +313,16 @@ def _compute_loss_with_guard(
                 model_name=self._model_name,
                 dataset_size=len(train_dataset) if train_dataset is not None else 0,
                 output_dir=out_dir.as_posix(),
-                checkpoints_dir=artifact_ref("checkpoints"),
+                checkpoints_dir=ArtifactRef(path="checkpoints"),
                 resume_from_path=resume_str,
             )
             if final_adapter_path is not None:
-                result.final_lora = artifact_ref(
-                    final_adapter_path.relative_to(artifacts_dir).as_posix()
+                result.final_lora = ArtifactRef(
+                    path=final_adapter_path.relative_to(artifacts_dir).as_posix()
                 )
                 archive_path = archive_model_dir(final_adapter_path)
-                result.final_lora_archive = artifact_ref(
-                    archive_path.relative_to(artifacts_dir).as_posix()
+                result.final_lora_archive = ArtifactRef(
+                    path=archive_path.relative_to(artifacts_dir).as_posix()
                 )
                 logger.info("Prepared LoRA archive at %s", archive_path)
 
@@ -352,7 +351,7 @@ def _compute_loss_with_guard(
             model_name=self._model_name,
             dataset_size=len(train_dataset) if train_dataset is not None else 0,
             output_dir=out_dir.as_posix(),
-            checkpoints_dir=artifact_ref("checkpoints"),
+            checkpoints_dir=ArtifactRef(path="checkpoints"),
             resume_from_path=resume_path.as_posix() if resume_path else None,
         )
 
diff --git a/src/worker/executors/omni_text2audio_executor.py b/src/worker/executors/omni_text2audio_executor.py
index 77168cbb..d0913317 100644
--- a/src/worker/executors/omni_text2audio_executor.py
+++ b/src/worker/executors/omni_text2audio_executor.py
@@ -50,6 +50,7 @@
         current_omni_platform = None
     _HAS_OMNI_PLATFORM = False
 
+from shared.schemas.artifact import ArtifactRef
 from shared.schemas.governance import SpanType
 from shared.tasks.specs import TaskSpecStrictBase
 from shared.tasks.specs.omni import OmniText2AudioSpecStrict
@@ -57,7 +58,6 @@
 
 from .base_executor import ExecutionError, ExecutorTask
 from .omni_executor_base import OmniExecutorBase, OmniResult, extract_multimodal_output
-from .utils.checkpoints import artifact_ref
 
 logger = logging.getLogger(__name__)
 EXECUTOR_NAME = "omni_text2audio"
@@ -203,8 +203,8 @@ def _run_inner(
                             "prompt_index": prompt_idx,
                             "waveform_index": local_idx,
                             "prompt": prompt,
-                            "audio": artifact_ref(
-                                self.relative_to(save_path, artifacts_dir)
+                            "audio": ArtifactRef(
+                                path=self.relative_to(save_path, artifacts_dir)
                             ),
                         }
                     )
diff --git a/src/worker/executors/omni_text2general_executor.py b/src/worker/executors/omni_text2general_executor.py
index 222a446f..a80314f5 100644
--- a/src/worker/executors/omni_text2general_executor.py
+++ b/src/worker/executors/omni_text2general_executor.py
@@ -26,6 +26,7 @@
         Omni = None
     _HAS_OMNI = False
 
+from shared.schemas.artifact import ArtifactRef
 from shared.schemas.governance import SpanType
 from shared.tasks.specs import TaskSpecStrictBase
 from shared.tasks.specs.omni import OmniText2GeneralSpecStrict
@@ -38,7 +39,6 @@
     extract_audio_from_mm,
     save_audio,
 )
-from .utils.checkpoints import artifact_ref
 
 logger = logging.getLogger(__name__)
 EXECUTOR_NAME = "omni_text2general"
@@ -184,7 +184,9 @@ def _run_inner(
                     "index": idx,
                     "request_id": rid,
                     "prompt": texts[idx] if idx < len(texts) else None,
-                    "audio": artifact_ref(self.relative_to(save_path, artifacts_dir)),
+                    "audio": ArtifactRef(
+                        path=self.relative_to(save_path, artifacts_dir)
+                    ),
                 }
                 text_out = text_results.get(rid)
                 if text_out:
diff --git a/src/worker/executors/omni_text2image_executor.py b/src/worker/executors/omni_text2image_executor.py
index 8254f162..068202e5 100644
--- a/src/worker/executors/omni_text2image_executor.py
+++ b/src/worker/executors/omni_text2image_executor.py
@@ -15,6 +15,7 @@
         Omni = None
     _HAS_OMNI = False
 
+from shared.schemas.artifact import ArtifactRef
 from shared.schemas.governance import SpanType
 from shared.tasks.specs import TaskSpecStrictBase
 from shared.tasks.specs.omni import OmniText2ImageSpecStrict
@@ -22,7 +23,6 @@
 
 from .base_executor import ExecutionError, ExecutorTask
 from .omni_executor_base import OmniExecutorBase, OmniResult
-from .utils.checkpoints import artifact_ref
 
 logger = logging.getLogger(__name__)
 EXECUTOR_NAME = "omni_text2image"
@@ -93,8 +93,8 @@ def _run_inner(
                     {
                         "index": idx,
                         "prompt": prompt,
-                        "image": artifact_ref(
-                            self.relative_to(save_path, artifacts_dir)
+                        "image": ArtifactRef(
+                            path=self.relative_to(save_path, artifacts_dir)
                         ),
                     }
                 )
diff --git a/src/worker/executors/omni_text2speech_executor.py b/src/worker/executors/omni_text2speech_executor.py
index ff13753c..1cf5e26c 100644
--- a/src/worker/executors/omni_text2speech_executor.py
+++ b/src/worker/executors/omni_text2speech_executor.py
@@ -16,6 +16,7 @@
         Omni = None
     _HAS_OMNI = False
 
+from shared.schemas.artifact import ArtifactRef
 from shared.schemas.governance import SpanType
 from shared.tasks.specs import TaskSpecStrictBase
 from shared.tasks.specs.omni import OmniText2SpeechSpecStrict
@@ -29,7 +30,6 @@
     extract_multimodal_output,
     save_audio,
 )
-from .utils.checkpoints import artifact_ref
 
 logger = logging.getLogger(__name__)
 EXECUTOR_NAME = "omni_text2speech"
@@ -105,8 +105,8 @@ def _run_inner(
                     {
                         "index": idx,
                         "text": text,
-                        "audio": artifact_ref(
-                            self.relative_to(save_path, artifacts_dir)
+                        "audio": ArtifactRef(
+                            path=self.relative_to(save_path, artifacts_dir)
                         ),
                     }
                 )
diff --git a/src/worker/executors/ppo_executor.py b/src/worker/executors/ppo_executor.py
index 4bbae544..be710dbb 100644
--- a/src/worker/executors/ppo_executor.py
+++ b/src/worker/executors/ppo_executor.py
@@ -41,7 +41,6 @@
 from .mixins.training import TrainingMixin
 from .utils.checkpoints import (
     archive_model_dir,
-    artifact_ref,
     get_http_destination,
     maybe_upload_artifacts,
     write_executor_result,
@@ -920,15 +919,15 @@ def build_trainer() -> _EarlyStopPPOTrainer:
             model_name=self._model_name,
             dataset_size=len(dataset),
             output_dir=out_dir.as_posix(),
-            checkpoints_dir=artifact_ref("checkpoints"),
+            checkpoints_dir=ArtifactRef(path="checkpoints"),
         )
         if final_model_path is not None:
-            result.final_model = artifact_ref(
-                final_model_path.relative_to(artifacts_dir).as_posix()
+            result.final_model = ArtifactRef(
+                path=final_model_path.relative_to(artifacts_dir).as_posix()
             )
         if final_archive_path is not None:
-            result.final_model_archive = artifact_ref(
-                final_archive_path.relative_to(artifacts_dir).as_posix()
+            result.final_model_archive = ArtifactRef(
+                path=final_archive_path.relative_to(artifacts_dir).as_posix()
             )
 
         maybe_upload_artifacts(task, out_dir, logger=logger)
diff --git a/src/worker/executors/sft_executor.py b/src/worker/executors/sft_executor.py
index 97085617..453b25eb 100644
--- a/src/worker/executors/sft_executor.py
+++ b/src/worker/executors/sft_executor.py
@@ -32,7 +32,6 @@
 from .mixins.training import TrainingMixin
 from .utils.checkpoints import (
     archive_model_dir,
-    artifact_ref,
     determine_resume_path,
     get_http_destination,
     maybe_upload_artifacts,
@@ -448,7 +447,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> SFTResult:
                 model_name=self._model_name,
                 dataset_size=len(train_dataset),
                 output_dir=out_dir.as_posix(),
-                checkpoints_dir=artifact_ref("checkpoints"),
+                checkpoints_dir=ArtifactRef(path="checkpoints"),
                 resume_from_path=resume_str,
             )
 
@@ -457,12 +456,12 @@ def run(self, task: ExecutorTask, out_dir: Path) -> SFTResult:
                 self._final_model_dir = (
                     resolved_model_path if resolved_model_path.exists() else None
                 )
-                result.final_model = artifact_ref(
-                    final_model_path.relative_to(artifacts_dir).as_posix()
+                result.final_model = ArtifactRef(
+                    path=final_model_path.relative_to(artifacts_dir).as_posix()
                 )
                 if final_archive_path is not None:
-                    result.final_model_archive = artifact_ref(
-                        final_archive_path.relative_to(artifacts_dir).as_posix()
+                    result.final_model_archive = ArtifactRef(
+                        path=final_archive_path.relative_to(artifacts_dir).as_posix()
                     )
 
             maybe_upload_artifacts(task, out_dir, logger=logger)
@@ -514,7 +513,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> SFTResult:
                 else 0
             ),
             output_dir=out_dir.as_posix(),
-            checkpoints_dir=artifact_ref("checkpoints"),
+            checkpoints_dir=ArtifactRef(path="checkpoints"),
             resume_from_path=resume_path.as_posix() if resume_path else None,
         )
         write_executor_result(out_dir / "results.json", task.task_id, task.spec, result)
diff --git a/src/worker/executors/transformers_executor.py b/src/worker/executors/transformers_executor.py
index f8aa58fa..beaed479 100644
--- a/src/worker/executors/transformers_executor.py
+++ b/src/worker/executors/transformers_executor.py
@@ -68,11 +68,7 @@
 from .base_executor import ExecutionError, Executor, ExecutorTask
 from .mixins.data import InferenceEntry
 from .mixins.inference import InferenceMixin
-from .utils.checkpoints import (
-    artifact_ref,
-    maybe_upload_artifacts,
-    maybe_upload_traces,
-)
+from .utils.checkpoints import maybe_upload_artifacts, maybe_upload_traces
 
 try:
     import torch
@@ -493,7 +489,7 @@ def _run_inner(
                 model=self._model_name,
                 items=[],
                 count=len(grouped_visual_embeddings),
-                embedding_file=artifact_ref("visual_embeddings.pt"),
+                embedding_file=ArtifactRef(path="visual_embeddings.pt"),
                 image_group_sizes=image_group_sizes,
             )
 
diff --git a/src/worker/executors/utils/checkpoints.py b/src/worker/executors/utils/checkpoints.py
index 9814c976..acf7d345 100644
--- a/src/worker/executors/utils/checkpoints.py
+++ b/src/worker/executors/utils/checkpoints.py
@@ -11,7 +11,7 @@
 
 import requests
 
-from shared.schemas.artifact import ArtifactContext, ArtifactRef
+from shared.schemas.artifact import ArtifactContext
 from shared.schemas.result import BaseExecutorResult, ResultEnvelope
 from shared.tasks.specs import TaskSpecStrictBase
 from shared.utils.atomic import atomic_write_text
@@ -396,12 +396,6 @@ def is_cleanup_enabled() -> bool:
     return normalized not in {"0", "false", "no", "off"}
 
 
-def artifact_ref(rel_path: str) -> ArtifactRef:
-    """Build an artifact reference. ``rel_path`` is relative to
-    ``out_dir/artifacts/``."""
-    return ArtifactRef(path=rel_path)
-
-
 def build_artifact_context(spec: TaskSpecStrictBase, out_dir: Path) -> ArtifactContext:
     """Top-level ``_artifacts`` context. ``base_url`` is the destination
     origin (scheme://host[:port]) for HTTP, else ``None``."""
diff --git a/tests/worker/test_checkpoint_utils.py b/tests/worker/test_checkpoint_utils.py
index 171a0c1a..7c671641 100644
--- a/tests/worker/test_checkpoint_utils.py
+++ b/tests/worker/test_checkpoint_utils.py
@@ -6,7 +6,7 @@
 
 import pytest
 
-from shared.schemas.artifact import ArtifactContext, ArtifactRef
+from shared.schemas.artifact import ArtifactContext
 from worker.executors.base_executor import TaskReference
 from worker.executors.utils import checkpoints
 
@@ -30,13 +30,6 @@ def _task(
     return cast(TaskReference, SimpleNamespace(task_id="task-1", spec=spec))
 
 
-class TestArtifactRef:
-    def test_returns_path_only(self) -> None:
-        assert checkpoints.artifact_ref("images/foo.png") == ArtifactRef(
-            path="images/foo.png"
-        )
-
-
 class TestBuildArtifactContext:
     def test_http_destination_strips_api_suffix(self, tmp_path: Path) -> None:
         out_dir = tmp_path / "task-1"

From e541a6201d53e8dcb3813403c472cb257151325d Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Tue, 19 May 2026 19:27:49 +0000
Subject: [PATCH 13/26] refactor(server): return typed BaseExecutorResult from
 get_result

``GET /results/{task_id}`` now returns the validated
``BaseExecutorResult`` directly instead of immediately flattening it
back to ``dict[str, Any]`` via ``model_dump``. FastAPI serializes the
model via Pydantic (alias and ``extra="allow"`` preserved), and the
endpoint's signature documents the actual shape the client sees.

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 src/server/routers/v1/results.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/server/routers/v1/results.py b/src/server/routers/v1/results.py
index 99ac8236..e38e8d76 100644
--- a/src/server/routers/v1/results.py
+++ b/src/server/routers/v1/results.py
@@ -4,7 +4,6 @@
 import tarfile
 import tempfile
 from pathlib import Path
-from typing import Any
 
 from fastapi import (
     APIRouter,
@@ -20,6 +19,7 @@
 from pydantic import ValidationError
 
 from shared.schemas.result import (
+    BaseExecutorResult,
     ResultEnvelope,
     read_result,
     result_file_path,
@@ -115,7 +115,7 @@ async def get_result(
     principal: PrincipalContext = Depends(authenticate_connection),
     results_dir: Path = Depends(get_results_dir),
     logger: logging.Logger = Depends(get_logger),
-) -> dict[str, Any]:
+) -> BaseExecutorResult:
     task_id = (task_id or "").strip()
     if not task_id:
         raise HTTPException(
@@ -143,7 +143,7 @@ async def get_result(
             detail=f"Result file is not valid JSON: {exc}",
         ) from exc
     try:
-        return ResultEnvelope.model_validate(content).result.model_dump()
+        return ResultEnvelope.model_validate(content).result
     except ValidationError as exc:
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,

From 62cfd96f596960cd25813c33003ee205090e3459 Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Tue, 19 May 2026 19:37:08 +0000
Subject: [PATCH 14/26] refactor: unify sentinel attribute lookup in dispatcher
 and template walker
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces ``_dig_path``'s ``hasattr`` + ``getattr`` double-lookup with
the same sentinel-default pattern ``_evaluate_expr`` already uses, and
unifies the constant name (``_MISSING`` → ``_SENTINEL``) across both
files so the two dynamic-attribute walkers stay aligned.

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 src/server/dispatcher/base.py                 | 6 ++++--
 src/worker/executors/utils/graph_templates.py | 6 +++---
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/server/dispatcher/base.py b/src/server/dispatcher/base.py
index 2ae68c25..a370fb05 100644
--- a/src/server/dispatcher/base.py
+++ b/src/server/dispatcher/base.py
@@ -38,6 +38,8 @@
 from ..utils.time import now_iso
 from .worker_selector import DEFAULT_WORKER_SELECTION, select_worker
 
+_SENTINEL: Any = object()
+
 
 class StageReferenceNotReady(Exception):
     """Raised when a task references a stage whose artifacts are not yet available."""
@@ -951,9 +953,9 @@ def _dig_path(self, data: Any, parts: list[str]) -> Any:
                 current = current[idx]
                 continue
             if isinstance(current, BaseModel):
-                if not hasattr(current, part):
+                current = getattr(current, part, _SENTINEL)
+                if current is _SENTINEL:
                     return None
-                current = getattr(current, part)
                 continue
             return None
         return current
diff --git a/src/worker/executors/utils/graph_templates.py b/src/worker/executors/utils/graph_templates.py
index 17958dfa..c21cd339 100644
--- a/src/worker/executors/utils/graph_templates.py
+++ b/src/worker/executors/utils/graph_templates.py
@@ -15,7 +15,7 @@
 from ..base_executor import ExecutionError
 from .safe_eval import safe_execute_function, safe_materialize_function
 
-_MISSING: Any = object()
+_SENTINEL: Any = object()
 
 type MessageItem = dict[str, str]
 type Message = Sequence[MessageItem]
@@ -622,8 +622,8 @@ def _evaluate_expr(expr: str, context: dict[str, BaseExecutorResult]) -> Any:
                     )
                 value = value[attr].tolist()
             elif isinstance(value, BaseModel):
-                resolved = getattr(value, attr, _MISSING)
-                if resolved is _MISSING:
+                resolved = getattr(value, attr, _SENTINEL)
+                if resolved is _SENTINEL:
                     raise ExecutionError(
                         f"{attr} not a valid attribute of {type(value).__name__} "
                         f"for {token}."

From 3c9835808b01c2df0e151b1bca4fd73752f3ab5a Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Tue, 19 May 2026 20:33:19 +0000
Subject: [PATCH 15/26] refactor(sdk): port typed result and artifact schemas

Mirrors the server's ``BaseExecutorResult``, ``ArtifactContext``, and
``ArtifactRef`` Pydantic models on the SDK side so ``materialize()``
walks typed attributes instead of poking raw dicts under
``envelope.result["_artifacts"]``. ``ResultEnvelope.result`` becomes
``SerializeAsAny[BaseExecutorResult]`` to preserve subclass payloads
through the declared base type, matching the server's wire shape.

Shared ``ArtifactContext.base_url`` gains ``exclude_if=lambda v: v is
None`` so setting the field to ``None`` drops the key on serialization,
replacing the previous ``ctx.pop("base_url", None)`` dict mutation.

``tests/sdk/test_schema_compat.py`` parametrizes the three new model
pairs so server / SDK drift is caught at CI time.

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 sdk/src/flowmesh/models/__init__.py   |  6 +++++-
 sdk/src/flowmesh/models/artifacts.py  | 10 ++++++++++
 sdk/src/flowmesh/models/results.py    | 19 +++++++++++++++++--
 sdk/src/flowmesh/resources/results.py |  7 +++----
 src/shared/schemas/artifact.py        |  4 +++-
 tests/sdk/test_schema_compat.py       | 10 ++++++++++
 6 files changed, 48 insertions(+), 8 deletions(-)
 create mode 100644 sdk/src/flowmesh/models/artifacts.py

diff --git a/sdk/src/flowmesh/models/__init__.py b/sdk/src/flowmesh/models/__init__.py
index e2043805..ea08aa5a 100644
--- a/sdk/src/flowmesh/models/__init__.py
+++ b/sdk/src/flowmesh/models/__init__.py
@@ -1,5 +1,6 @@
 """FlowMesh SDK data models."""
 
+from .artifacts import ArtifactContext, ArtifactRef
 from .common import (
     LogEntry,
     LogEvent,
@@ -19,7 +20,7 @@
     NodeWorkerInfo,
     WorkerRegisterResponse,
 )
-from .results import PathResponse, ResultEnvelope
+from .results import BaseExecutorResult, PathResponse, ResultEnvelope
 from .tasks import HardwareUsage, TaskInfo, TaskUsage
 from .traces import (
     ActiveWaitBreakdown,
@@ -54,7 +55,10 @@
 
 __all__ = [
     "ActiveWaitBreakdown",
+    "ArtifactContext",
+    "ArtifactRef",
     "AssetSummary",
+    "BaseExecutorResult",
     "CPUInfo",
     "CriticalPathSummary",
     "E2EBreakdown",
diff --git a/sdk/src/flowmesh/models/artifacts.py b/sdk/src/flowmesh/models/artifacts.py
new file mode 100644
index 00000000..fdbb3edb
--- /dev/null
+++ b/sdk/src/flowmesh/models/artifacts.py
@@ -0,0 +1,10 @@
+from pydantic import BaseModel, Field
+
+
+class ArtifactRef(BaseModel):
+    path: str
+
+
+class ArtifactContext(BaseModel):
+    base_dir: str
+    base_url: str | None = Field(default=None, exclude_if=lambda v: v is None)
diff --git a/sdk/src/flowmesh/models/results.py b/sdk/src/flowmesh/models/results.py
index 3368f1bc..b872d31b 100644
--- a/sdk/src/flowmesh/models/results.py
+++ b/sdk/src/flowmesh/models/results.py
@@ -1,8 +1,14 @@
 """Result-related models."""
 
+# This is necessary to allow for the recursive type hint of `children` in
+# `BaseExecutorResult`.
+from __future__ import annotations
+
 from typing import Any
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field, SerializeAsAny
+
+from .artifacts import ArtifactContext
 
 
 class PathResponse(BaseModel):
@@ -10,11 +16,20 @@ class PathResponse(BaseModel):
     path: str
 
 
+class BaseExecutorResult(BaseModel):
+    model_config = ConfigDict(extra="allow", serialize_by_alias=True)
+
+    children: dict[str, SerializeAsAny[BaseExecutorResult]] = Field(
+        default_factory=dict, exclude_if=lambda v: not v
+    )
+    artifacts: ArtifactContext | None = Field(default=None, alias="_artifacts")
+
+
 class ResultEnvelope(BaseModel):
     """Canonical on-disk shape of ``results.json`` (mirrors the server)."""
 
     task_id: str
-    result: dict[str, Any]
+    result: SerializeAsAny[BaseExecutorResult]
     worker_id: str | None = None
     metadata: dict[str, Any] | None = None
     received_at: str | None = Field(default=None)
diff --git a/sdk/src/flowmesh/resources/results.py b/sdk/src/flowmesh/resources/results.py
index 1f5e99cf..671aaa4b 100644
--- a/sdk/src/flowmesh/resources/results.py
+++ b/sdk/src/flowmesh/resources/results.py
@@ -198,10 +198,9 @@ def _finalize_materialize(
         return {}, json_path, extracted
 
     envelope = ResultEnvelope.model_validate_json(json_path.read_text())
-    if _wants_artifacts(sections):
-        ctx = envelope.result["_artifacts"]
-        ctx["base_dir"] = (output_dir / task_id).resolve().as_posix()
-        ctx.pop("base_url", None)
+    if _wants_artifacts(sections) and (ctx := envelope.result.artifacts):
+        ctx.base_dir = (output_dir / task_id).resolve().as_posix()
+        ctx.base_url = None
     payload = envelope.model_dump(mode="json")
     json_path.write_text(json.dumps(payload, indent=2))
     return payload, json_path, extracted
diff --git a/src/shared/schemas/artifact.py b/src/shared/schemas/artifact.py
index d9a232cc..724d8819 100644
--- a/src/shared/schemas/artifact.py
+++ b/src/shared/schemas/artifact.py
@@ -8,5 +8,7 @@ class ArtifactRef(BaseModel):
 class ArtifactContext(BaseModel):
     base_dir: str = Field(description="Producing task's output directory.")
     base_url: str | None = Field(
-        default=None, description="HTTP origin (scheme://host[:port]) for upload."
+        default=None,
+        exclude_if=lambda v: v is None,
+        description="HTTP origin (scheme://host[:port]) for upload.",
     )
diff --git a/tests/sdk/test_schema_compat.py b/tests/sdk/test_schema_compat.py
index b757060f..d4092a7e 100644
--- a/tests/sdk/test_schema_compat.py
+++ b/tests/sdk/test_schema_compat.py
@@ -8,6 +8,9 @@
 
 # SDK-side imports
 from flowmesh.models import (
+    ArtifactContext,
+    ArtifactRef,
+    BaseExecutorResult,
     CPUInfo,
     GpuInfo,
     GpuPlatformInfo,
@@ -79,6 +82,9 @@
 )
 from server.task.models import TaskInfo as SrvTaskInfo
 from server.task.models import TaskUsage as SrvTaskUsage
+from shared.schemas.artifact import ArtifactContext as SrvArtifactContext
+from shared.schemas.artifact import ArtifactRef as SrvArtifactRef
+from shared.schemas.result import BaseExecutorResult as SrvBaseExecutorResult
 from shared.schemas.result import ResultEnvelope as SrvResultEnvelope
 from shared.schemas.worker import SSHLimits as SrvSSHLimits
 from shared.tasks.task_type import TaskType as SrvTaskType
@@ -123,7 +129,11 @@
     # Common
     (SrvOkResponse, OkResponse),
     # Results
+    (SrvBaseExecutorResult, BaseExecutorResult),
     (SrvResultEnvelope, ResultEnvelope),
+    # Artifacts
+    (SrvArtifactContext, ArtifactContext),
+    (SrvArtifactRef, ArtifactRef),
 ]
 
 

From 576231282bf649b1dd0a4ca56cb1eb84997dfbeb Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Tue, 19 May 2026 20:33:40 +0000
Subject: [PATCH 16/26] refactor(worker): construct executor results without
 post-hoc mutation

Each typed executor result (``AgentResult``, ``DataRetrievalResult``,
``DPOResult``, ``EchoResult``, ``LoRAResult``, ``PPOResult``,
``RAGResult``, ``SFTResult``) is now built with every field passed at
construction time instead of being mutated afterwards
(``result.final_model = ArtifactRef(...)``). Conditional ``ArtifactRef``
fields are folded into the constructor call as inline ternaries; the
SFT/LoRA pair stages locals first because the truthy branch also
runs ``archive_model_dir`` / sets ``self._final_model_dir``. Executors
whose result schema already pins a default (``ok=True``,
``executor=EXECUTOR_NAME``) drop the redundant kwarg at the call site.

``Runner._write_results`` now reads ``result.children`` directly: the
caller already guarantees a ``BaseExecutorResult``, so the
``isinstance`` + ``result.get("children")`` dict fallback is dead.

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 src/worker/executors/agent_executor.py        | 12 +++----
 .../executors/data_retrieval_executor.py      |  3 --
 src/worker/executors/dpo_executor.py          | 22 ++++++++-----
 src/worker/executors/echo_executor.py         |  2 +-
 src/worker/executors/lora_sft_executor.py     | 22 +++++++------
 src/worker/executors/ppo_executor.py          | 20 +++++++-----
 src/worker/executors/rag_executor.py          |  7 ++--
 src/worker/executors/sft_executor.py          | 32 +++++++++++--------
 src/worker/runner.py                          |  6 +---
 9 files changed, 69 insertions(+), 57 deletions(-)

diff --git a/src/worker/executors/agent_executor.py b/src/worker/executors/agent_executor.py
index 05b37aa4..db59687b 100644
--- a/src/worker/executors/agent_executor.py
+++ b/src/worker/executors/agent_executor.py
@@ -279,9 +279,9 @@ def run(self, task: ExecutorTask, out_dir: Path) -> AgentResult:
                         "execution_log": result.get("log", []),
                     },
                     agent_output=(
-                        ArtifactRef(path=agent_output_ref)
-                        if isinstance(agent_output_ref, str)
-                        else None
+                        None
+                        if agent_output_ref is None
+                        else ArtifactRef(path=agent_output_ref)
                     ),
                 )
             else:
@@ -324,9 +324,9 @@ def run(self, task: ExecutorTask, out_dir: Path) -> AgentResult:
                         "batch_summary": results.get("batch_summary", {}),
                     },
                     batch_summary_file=(
-                        ArtifactRef(path=batch_summary_ref)
-                        if isinstance(batch_summary_ref, str)
-                        else None
+                        None
+                        if batch_summary_ref is None
+                        else ArtifactRef(path=batch_summary_ref)
                     ),
                 )
 
diff --git a/src/worker/executors/data_retrieval_executor.py b/src/worker/executors/data_retrieval_executor.py
index 1cde3031..86e75145 100644
--- a/src/worker/executors/data_retrieval_executor.py
+++ b/src/worker/executors/data_retrieval_executor.py
@@ -151,7 +151,6 @@ def _run_sql(
                 )
 
         return DataRetrievalResult(
-            ok=True,
             items=items,
             count=len(items),
         )
@@ -214,7 +213,6 @@ def _run_s3(
                 items.append(item)
 
         return DataRetrievalResult(
-            ok=True,
             type="s3",
             items=items,
             metadata=s3_result["metadata"],  # type: ignore
@@ -320,7 +318,6 @@ def _run_agent(
                 )
 
         return DataRetrievalResult(
-            ok=True,
             type="agent",
             items=items,
             count=len(items),
diff --git a/src/worker/executors/dpo_executor.py b/src/worker/executors/dpo_executor.py
index a3a2da09..824cda27 100644
--- a/src/worker/executors/dpo_executor.py
+++ b/src/worker/executors/dpo_executor.py
@@ -484,15 +484,21 @@ def _execute_training(self, task: ExecutorTask, out_dir: Path) -> DPOResult:
                 dataset_size=len(dataset) if dataset is not None else 0,
                 output_dir=out_dir.as_posix(),
                 checkpoints_dir=ArtifactRef(path="checkpoints"),
+                final_model=(
+                    ArtifactRef(
+                        path=final_model_path.relative_to(artifacts_dir).as_posix()
+                    )
+                    if final_model_path
+                    else None
+                ),
+                final_model_archive=(
+                    ArtifactRef(
+                        path=final_archive_path.relative_to(artifacts_dir).as_posix()
+                    )
+                    if final_archive_path
+                    else None
+                ),
             )
-            if final_model_path is not None:
-                result.final_model = ArtifactRef(
-                    path=final_model_path.relative_to(artifacts_dir).as_posix()
-                )
-            if final_archive_path is not None:
-                result.final_model_archive = ArtifactRef(
-                    path=final_archive_path.relative_to(artifacts_dir).as_posix()
-                )
 
             maybe_upload_artifacts(task, out_dir, logger=logger)
 
diff --git a/src/worker/executors/echo_executor.py b/src/worker/executors/echo_executor.py
index ad9795ee..52c0e25d 100644
--- a/src/worker/executors/echo_executor.py
+++ b/src/worker/executors/echo_executor.py
@@ -88,7 +88,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> EchoResult:
                 resolved = self._resolve_item(item, context)
                 self._append_outputs(merged_items, resolved)
 
-            result = EchoResult(ok=True, items=merged_items, count=len(merged_items))
+            result = EchoResult(items=merged_items, count=len(merged_items))
             deps = self._extract_source_data_ids(spec)
             dependencies_by_task = {task_id: deps}
 
diff --git a/src/worker/executors/lora_sft_executor.py b/src/worker/executors/lora_sft_executor.py
index c17697ae..157bbb25 100644
--- a/src/worker/executors/lora_sft_executor.py
+++ b/src/worker/executors/lora_sft_executor.py
@@ -305,6 +305,17 @@ def _compute_loss_with_guard(
                 logger.info("Saved LoRA-adapted weights to %s", model_path)
 
             training_time = time.time() - start_time
+            final_lora: ArtifactRef | None = None
+            final_lora_archive: ArtifactRef | None = None
+            if final_adapter_path:
+                final_lora = ArtifactRef(
+                    path=final_adapter_path.relative_to(artifacts_dir).as_posix()
+                )
+                archive_path = archive_model_dir(final_adapter_path)
+                final_lora_archive = ArtifactRef(
+                    path=archive_path.relative_to(artifacts_dir).as_posix()
+                )
+                logger.info("Prepared LoRA archive at %s", archive_path)
             result = LoRAResult(
                 task_id=task.task_id,
                 training_successful=training_successful,
@@ -315,16 +326,9 @@ def _compute_loss_with_guard(
                 output_dir=out_dir.as_posix(),
                 checkpoints_dir=ArtifactRef(path="checkpoints"),
                 resume_from_path=resume_str,
+                final_lora=final_lora,
+                final_lora_archive=final_lora_archive,
             )
-            if final_adapter_path is not None:
-                result.final_lora = ArtifactRef(
-                    path=final_adapter_path.relative_to(artifacts_dir).as_posix()
-                )
-                archive_path = archive_model_dir(final_adapter_path)
-                result.final_lora_archive = ArtifactRef(
-                    path=archive_path.relative_to(artifacts_dir).as_posix()
-                )
-                logger.info("Prepared LoRA archive at %s", archive_path)
 
             maybe_upload_artifacts(task, out_dir, logger=logger)
 
diff --git a/src/worker/executors/ppo_executor.py b/src/worker/executors/ppo_executor.py
index be710dbb..cd7acedb 100644
--- a/src/worker/executors/ppo_executor.py
+++ b/src/worker/executors/ppo_executor.py
@@ -920,15 +920,19 @@ def build_trainer() -> _EarlyStopPPOTrainer:
             dataset_size=len(dataset),
             output_dir=out_dir.as_posix(),
             checkpoints_dir=ArtifactRef(path="checkpoints"),
+            final_model=(
+                ArtifactRef(path=final_model_path.relative_to(artifacts_dir).as_posix())
+                if final_model_path
+                else None
+            ),
+            final_model_archive=(
+                ArtifactRef(
+                    path=final_archive_path.relative_to(artifacts_dir).as_posix()
+                )
+                if final_archive_path
+                else None
+            ),
         )
-        if final_model_path is not None:
-            result.final_model = ArtifactRef(
-                path=final_model_path.relative_to(artifacts_dir).as_posix()
-            )
-        if final_archive_path is not None:
-            result.final_model_archive = ArtifactRef(
-                path=final_archive_path.relative_to(artifacts_dir).as_posix()
-            )
 
         maybe_upload_artifacts(task, out_dir, logger=logger)
 
diff --git a/src/worker/executors/rag_executor.py b/src/worker/executors/rag_executor.py
index 24286bcd..0b9c41ae 100644
--- a/src/worker/executors/rag_executor.py
+++ b/src/worker/executors/rag_executor.py
@@ -23,11 +23,12 @@
 from .utils.graph_templates import Message, build_prompts_from_graph_template
 
 logger = logging.getLogger("worker.rag")
+EXECUTOR_NAME = "rag"
 
 
 class RAGResult(BaseExecutorResult):
     ok: bool = True
-    executor: str
+    executor: str = EXECUTOR_NAME
     qdrant: dict[str, Any]
     embedding: dict[str, Any]
     search: dict[str, Any]
@@ -36,7 +37,7 @@ class RAGResult(BaseExecutorResult):
 
 
 class RAGExecutor(Executor):
-    name = "rag"
+    name = EXECUTOR_NAME
 
     def run(self, task: ExecutorTask, out_dir: Path) -> RAGResult:
         start_ts = time.time()
@@ -195,8 +196,6 @@ def run(self, task: ExecutorTask, out_dir: Path) -> RAGResult:
             "RAG query completed queries=%d total_results=%d", len(queries), total_items
         )
         return RAGResult(
-            ok=True,
-            executor=self.name,
             qdrant={"collection": collection, "url": url},
             embedding={"model": model_name},
             search={"top_k": top_k},
diff --git a/src/worker/executors/sft_executor.py b/src/worker/executors/sft_executor.py
index 453b25eb..ec194087 100644
--- a/src/worker/executors/sft_executor.py
+++ b/src/worker/executors/sft_executor.py
@@ -439,6 +439,23 @@ def run(self, task: ExecutorTask, out_dir: Path) -> SFTResult:
                 )
 
             training_time = time.time() - start_time
+            final_model: ArtifactRef | None = None
+            final_model_archive: ArtifactRef | None = None
+            if final_model_path:
+                resolved_model_path = Path(final_model_path)
+                self._final_model_dir = (
+                    resolved_model_path if resolved_model_path.exists() else None
+                )
+                final_model = ArtifactRef(
+                    path=final_model_path.relative_to(artifacts_dir).as_posix()
+                )
+                final_model_archive = (
+                    ArtifactRef(
+                        path=final_archive_path.relative_to(artifacts_dir).as_posix()
+                    )
+                    if final_archive_path
+                    else None
+                )
             result = SFTResult(
                 task_id=task.task_id,
                 training_successful=training_successful,
@@ -449,21 +466,10 @@ def run(self, task: ExecutorTask, out_dir: Path) -> SFTResult:
                 output_dir=out_dir.as_posix(),
                 checkpoints_dir=ArtifactRef(path="checkpoints"),
                 resume_from_path=resume_str,
+                final_model=final_model,
+                final_model_archive=final_model_archive,
             )
 
-            if final_model_path is not None:
-                resolved_model_path = Path(final_model_path)
-                self._final_model_dir = (
-                    resolved_model_path if resolved_model_path.exists() else None
-                )
-                result.final_model = ArtifactRef(
-                    path=final_model_path.relative_to(artifacts_dir).as_posix()
-                )
-                if final_archive_path is not None:
-                    result.final_model_archive = ArtifactRef(
-                        path=final_archive_path.relative_to(artifacts_dir).as_posix()
-                    )
-
             maybe_upload_artifacts(task, out_dir, logger=logger)
 
             self._cleanup_local_artifacts(
diff --git a/src/worker/runner.py b/src/worker/runner.py
index 96c2b2cc..7b7086de 100644
--- a/src/worker/runner.py
+++ b/src/worker/runner.py
@@ -135,11 +135,7 @@ def _write_results(
         self._write_single_result(task_id, spec, out_dir, result)
 
         child_lookup = {entry.task_id: entry for entry in merged_children}
-        if isinstance(result, BaseExecutorResult):
-            children_payload: dict[str, Any] = dict(result.children)
-        else:
-            children_payload = result.get("children") or {}
-        for child_id, child_result in children_payload.items():
+        for child_id, child_result in result.children.items():
             child_info = child_lookup.get(child_id)
             if child_info is None:
                 continue

From f75aba81a4e76d7f4fb36b6aa2e61d9f89c1d10f Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Tue, 19 May 2026 20:45:19 +0000
Subject: [PATCH 17/26] docs: correct result schema references
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both ARCHITECTURE.md and EXECUTORS.md pointed at
``src/shared/schemas/executor_result.py``, which was consolidated into
``src/shared/schemas/result.py`` later in the refactor. Update the
paths.

While here, drop the ``extra="allow"`` round-trip explanation and the
``SerializeAsAny`` paragraph from the EXECUTORS.md result schema
section — both are implementation justifications that already live in
``BaseExecutorResult``'s docstring / field annotations, and the
executor-author guide only needs the contract: return type, base
fields, and how artifact refs resolve.

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 docs/ARCHITECTURE.md |  7 +++----
 docs/EXECUTORS.md    | 17 +++++------------
 2 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index dac1b885..08a34223 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -109,10 +109,9 @@ scripts/dev/            compile_protos, sync_requirements, check_env_examples
 - **Task merging.** Compatible adjacent tasks in a DAG (same `taskType`,
   model, hardware shape, and merge key) coalesce into a single dispatch.
   Merged children ride on `WorkerTaskMessage.merged_children`; the worker
-  writes per-child results into `result.children` on the typed
-  `BaseExecutorResult` (`src/shared/schemas/executor_result.py`); the
-  dispatcher fans out synthetic `TASK_SUCCEEDED` / `TASK_FAILED` events.
-  Disable with `ENABLE_TASK_MERGE=false`.
+  writes per-child results into `result.children`; the dispatcher fans
+  out synthetic `TASK_SUCCEEDED` / `TASK_FAILED` events. Disable with
+  `ENABLE_TASK_MERGE=false`.
 - **Stage stickiness** (`ENABLE_STAGE_WEIGHT_STICKINESS=true`) — the
   dispatcher pins stages that reference an upstream stage's checkpoint
   to the worker that produced it, falling back to normal selection when
diff --git a/docs/EXECUTORS.md b/docs/EXECUTORS.md
index 959bc9c2..155efb08 100644
--- a/docs/EXECUTORS.md
+++ b/docs/EXECUTORS.md
@@ -24,10 +24,9 @@ Helper utilities live in `src/worker/executors/utils/` (`artifacts`,
 
 ## Result schema
 
-Every executor's `run()` returns a typed Pydantic subclass of
-`BaseExecutorResult` (`src/shared/schemas/executor_result.py`). The base
-class carries the cross-cutting fields the rest of the runtime relies
-on:
+Every executor's `run()` returns a subclass of `BaseExecutorResult`
+(`src/shared/schemas/result.py`). The base class carries two
+cross-cutting fields:
 
 - `children: dict[str, BaseExecutorResult]` — per-child results when
   merged tasks share a dispatch.
@@ -39,16 +38,10 @@ Per-executor subclasses live next to the executor they describe — e.g.
 `src/worker/executors/lora_sft_executor.py`. They add executor-specific
 fields (`items`, `usage`, `final_lora`, `command`, …).
 
-The base class is `extra="allow"`, so the server can deserialize an
-incoming envelope as `BaseExecutorResult` without knowing the concrete
-subclass; the executor-specific fields pass through. Artifact-bearing
-fields use `ArtifactRef` (`{"path": rel_path}`); relative paths resolve
-against the producer's `_artifacts` context via
+Artifact-bearing fields use `ArtifactRef` (`{"path": rel_path}`);
+relative paths resolve against the producer's `_artifacts` context via
 `artifact_to_source` / `_render_artifact_ref`.
 
-Serializers at the wire seam pass `serialize_as_any=True` so subclass
-fields survive the round-trip.
-
 ## Agent executor (utu / youtu-agent)
 
 `AgentExecutor` requires the following env vars to run; the executor

From 034a21a26c71a0377c15726335b3ba698bc6604f Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Tue, 19 May 2026 21:01:04 +0000
Subject: [PATCH 18/26] fix(worker): retype omni summary fields as ArtifactRef
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The four Omni result subclasses declared their singular summary field
(``image`` / ``audio``) as ``dict[str, Any]``, but each constructor
passed ``items[0]["image" | "audio"]`` — an ``ArtifactRef`` instance —
which Pydantic v2 rejects with ``Input should be a valid dictionary``.
The validation fired on the success path (whenever ``items`` was
non-empty), so every successful Omni run would have raised
``ValidationError`` at result construction.

Retype each field as ``ArtifactRef | None`` and replace the empty-dict
fallback with ``None`` so both the populated and empty-items paths
validate cleanly.

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 src/worker/executors/omni_text2audio_executor.py   | 4 ++--
 src/worker/executors/omni_text2general_executor.py | 4 ++--
 src/worker/executors/omni_text2image_executor.py   | 4 ++--
 src/worker/executors/omni_text2speech_executor.py  | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/worker/executors/omni_text2audio_executor.py b/src/worker/executors/omni_text2audio_executor.py
index d0913317..ef4ff3c1 100644
--- a/src/worker/executors/omni_text2audio_executor.py
+++ b/src/worker/executors/omni_text2audio_executor.py
@@ -66,7 +66,7 @@
 class OmniText2AudioResult(OmniResult):
     executor: str = EXECUTOR_NAME
     mode: str = "bgm"
-    audio: dict[str, Any]
+    audio: ArtifactRef | None
     sample_rate: int
     num_waveforms: int
     audio_length: float
@@ -215,7 +215,7 @@ def _run_inner(
 
         return OmniText2AudioResult(
             model=self._model_name,
-            audio=items[0]["audio"] if items else {},
+            audio=items[0]["audio"] if items else None,
             items=items,
             sample_rate=sample_rate,
             num_waveforms=len(items),
diff --git a/src/worker/executors/omni_text2general_executor.py b/src/worker/executors/omni_text2general_executor.py
index a80314f5..9c48763c 100644
--- a/src/worker/executors/omni_text2general_executor.py
+++ b/src/worker/executors/omni_text2general_executor.py
@@ -53,7 +53,7 @@
 class OmniText2GeneralResult(OmniResult):
     executor: str = EXECUTOR_NAME
     mode: str = "narration"
-    audio: dict[str, Any]
+    audio: ArtifactRef | None
     sample_rate: int
     storyboard: dict[str, Any] | None = None
 
@@ -196,7 +196,7 @@ def _run_inner(
         return OmniText2GeneralResult(
             model=self._model_name,
             items=items,
-            audio=items[0]["audio"] if items else {},
+            audio=items[0]["audio"] if items else None,
             sample_rate=sample_rate,
             storyboard=spec_dict.get("storyboard"),
         )
diff --git a/src/worker/executors/omni_text2image_executor.py b/src/worker/executors/omni_text2image_executor.py
index 068202e5..f11db25b 100644
--- a/src/worker/executors/omni_text2image_executor.py
+++ b/src/worker/executors/omni_text2image_executor.py
@@ -31,7 +31,7 @@
 class OmniText2ImageResult(OmniResult):
     executor: str = EXECUTOR_NAME
     mode: str = "image"
-    image: dict[str, Any]
+    image: ArtifactRef | None
 
 
 class OmniText2ImageExecutor(OmniExecutorBase):
@@ -101,7 +101,7 @@ def _run_inner(
 
         return OmniText2ImageResult(
             model=self._model_name,
-            image=items[0]["image"] if items else {},
+            image=items[0]["image"] if items else None,
             items=items,
         )
 
diff --git a/src/worker/executors/omni_text2speech_executor.py b/src/worker/executors/omni_text2speech_executor.py
index 1cf5e26c..b37aa74a 100644
--- a/src/worker/executors/omni_text2speech_executor.py
+++ b/src/worker/executors/omni_text2speech_executor.py
@@ -38,7 +38,7 @@
 class OmniText2SpeechResult(OmniResult):
     executor: str = EXECUTOR_NAME
     mode: str = "tts"
-    audio: dict[str, Any]
+    audio: ArtifactRef | None
     sample_rate: int
     storyboard: dict[str, Any] | None = None
 
@@ -114,7 +114,7 @@ def _run_inner(
         return OmniText2SpeechResult(
             model=self._model_name,
             items=items,
-            audio=items[0]["audio"] if items else {},
+            audio=items[0]["audio"] if items else None,
             sample_rate=sample_rate,
             storyboard=spec_dict.get("storyboard"),
         )

From 210c6d2523a89c1c268ae954327af75b6702c504 Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Tue, 19 May 2026 21:01:18 +0000
Subject: [PATCH 19/26] refactor(worker): align lora/sft result schemas with
 peer training executors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drops the inner ``task_id`` field from ``SFTResult`` and ``LoRAResult``
— ``ResultEnvelope`` already owns ``task_id`` at the envelope level, so
duplicating it inside the result is redundant — and loosens
``LoRAResult``'s formerly-required fields (``training_successful``,
``training_time_seconds``, ``output_dir``, ``checkpoints_dir``) to the
default-bearing shape ``DPOResult`` / ``PPOResult`` / ``SFTResult``
already use. The lax defaults are alive code: ``SFTExecutor``'s
torchrun-spawn early-return path constructs a partial ``SFTResult``
that omits ``checkpoints_dir`` and the error / timing fields.

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 src/worker/executors/lora_sft_executor.py | 11 ++++-------
 src/worker/executors/sft_executor.py      |  3 ---
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/src/worker/executors/lora_sft_executor.py b/src/worker/executors/lora_sft_executor.py
index 157bbb25..4453e6cb 100644
--- a/src/worker/executors/lora_sft_executor.py
+++ b/src/worker/executors/lora_sft_executor.py
@@ -50,14 +50,13 @@
 
 
 class LoRAResult(BaseExecutorResult):
-    task_id: str
-    training_successful: bool
-    training_time_seconds: float
+    training_successful: bool = True
+    training_time_seconds: float | None = None
     error_message: str | None = None
     model_name: str | None = None
     dataset_size: int = 0
-    output_dir: str
-    checkpoints_dir: ArtifactRef
+    output_dir: str | None = None
+    checkpoints_dir: ArtifactRef | None = None
     resume_from_path: str | None = None
     final_lora: ArtifactRef | None = None
     final_lora_archive: ArtifactRef | None = None
@@ -317,7 +316,6 @@ def _compute_loss_with_guard(
                 )
                 logger.info("Prepared LoRA archive at %s", archive_path)
             result = LoRAResult(
-                task_id=task.task_id,
                 training_successful=training_successful,
                 training_time_seconds=training_time,
                 error_message=error_msg,
@@ -348,7 +346,6 @@ def _compute_loss_with_guard(
         training_time = time.time() - start_time
 
         result = LoRAResult(
-            task_id=task.task_id,
             training_successful=training_successful,
             training_time_seconds=training_time,
             error_message=error_msg,
diff --git a/src/worker/executors/sft_executor.py b/src/worker/executors/sft_executor.py
index ec194087..065bb122 100644
--- a/src/worker/executors/sft_executor.py
+++ b/src/worker/executors/sft_executor.py
@@ -45,7 +45,6 @@
 
 
 class SFTResult(BaseExecutorResult):
-    task_id: str | None = None
     training_successful: bool = True
     training_time_seconds: float | None = None
     error_message: str | None = None
@@ -457,7 +456,6 @@ def run(self, task: ExecutorTask, out_dir: Path) -> SFTResult:
                     else None
                 )
             result = SFTResult(
-                task_id=task.task_id,
                 training_successful=training_successful,
                 training_time_seconds=training_time,
                 error_message=error_msg,
@@ -508,7 +506,6 @@ def run(self, task: ExecutorTask, out_dir: Path) -> SFTResult:
 
         training_time = time.time() - start_time
         result = SFTResult(
-            task_id=task.task_id,
             training_successful=training_successful,
             training_time_seconds=training_time,
             error_message=error_msg,

From 909a45a123743ba2b1b3dc13486b72ca878e03d4 Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Wed, 20 May 2026 12:56:55 +0000
Subject: [PATCH 20/26] fix(worker): type upstream context in graph-template
 and data-retrieval helpers

Tightens ``context: dict[str, Any]`` to ``dict[str, BaseExecutorResult]``
on ``_build_graph_template_messages``, ``_resolve_image_embedding_count``,
``_resolve_columns`` (``graph_templates.py``); ``_run_sql``, ``_run_s3``,
``_run_agent`` (``data_retrieval_executor.py``); and ``_resolve_expr_item``,
``_resolve_item`` (``echo_executor.py``).

Fixes a silent regression in ``_resolve_image_embedding_count``: after the
typed-result refactor, ``isinstance(upstream, dict)`` was always ``False``
(upstream is a ``BaseExecutorResult``), so the image-embedding broadcast
multiplier always returned ``None``. Replace the dict-isinstance + dict
``.get("count")`` with a sentinel-default ``getattr(upstream, "count",
_SENTINEL)``, which resolves declared fields on subclasses (``EchoResult``,
``DataRetrievalResult``) and ``model_extra``-stored extras on the base.

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 src/worker/executors/data_retrieval_executor.py |  8 ++++----
 src/worker/executors/echo_executor.py           |  8 ++++++--
 src/worker/executors/utils/graph_templates.py   | 12 +++++++-----
 3 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/src/worker/executors/data_retrieval_executor.py b/src/worker/executors/data_retrieval_executor.py
index 86e75145..bb9dfe9b 100644
--- a/src/worker/executors/data_retrieval_executor.py
+++ b/src/worker/executors/data_retrieval_executor.py
@@ -77,7 +77,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> DataRetrievalResult:
         return result
 
     def _run_sql(
-        self, data_cfg: dict[str, Any], context: dict[str, Any]
+        self, data_cfg: dict[str, Any], context: dict[str, BaseExecutorResult]
     ) -> DataRetrievalResult:
         """
         Execute SQL queries based on the provided data configuration and context.
@@ -85,7 +85,7 @@ def _run_sql(
         :param data_cfg: Description
         :type data_cfg: dict[str, Any]
         :param context: Description
-        :type context: dict[str, Any]
+        :type context: dict[str, BaseExecutorResult]
         :return: Description
         :rtype: dict[str, Any]
         """
@@ -158,7 +158,7 @@ def _run_sql(
     def _run_s3(
         self,
         data_cfg: dict[str, Any],
-        context: dict[str, Any],
+        context: dict[str, BaseExecutorResult],
         out_dir: Path,
     ) -> DataRetrievalResult:
         validate_keys(
@@ -221,7 +221,7 @@ def _run_s3(
     def _run_agent(
         self,
         data_cfg: dict[str, Any],
-        context: dict[str, Any],
+        context: dict[str, BaseExecutorResult],
         out_dir: Path,
     ) -> DataRetrievalResult:
         """Drive lumid.data's data agent for NL-driven retrieval."""
diff --git a/src/worker/executors/echo_executor.py b/src/worker/executors/echo_executor.py
index 52c0e25d..e70f1412 100644
--- a/src/worker/executors/echo_executor.py
+++ b/src/worker/executors/echo_executor.py
@@ -32,7 +32,9 @@ def _append_outputs(self, out_items: list[dict[str, Any]], value: Any) -> None:
         out_items.append({"output": value})
 
     @staticmethod
-    def _resolve_expr_item(item: dict[str, Any], context: dict[str, Any]) -> Any:
+    def _resolve_expr_item(
+        item: dict[str, Any], context: dict[str, BaseExecutorResult]
+    ) -> Any:
         expr = item.get("expr")
         if not expr:
             node = item.get("node")
@@ -51,7 +53,9 @@ def _resolve_expr_item(item: dict[str, Any], context: dict[str, Any]) -> Any:
             )
         return resolved
 
-    def _resolve_item(self, item: EchoItem, context: dict[str, Any]) -> Any:
+    def _resolve_item(
+        self, item: EchoItem, context: dict[str, BaseExecutorResult]
+    ) -> Any:
         if isinstance(item, str):
             return item
         elif isinstance(item, dict):
diff --git a/src/worker/executors/utils/graph_templates.py b/src/worker/executors/utils/graph_templates.py
index c21cd339..e51aad54 100644
--- a/src/worker/executors/utils/graph_templates.py
+++ b/src/worker/executors/utils/graph_templates.py
@@ -85,7 +85,7 @@ def build_prompts_from_graph_template(
 def _maybe_broadcast_image_prompts(
     prompts: Sequence[str | Message],
     data_cfg: dict[str, Any],
-    context: dict[str, Any],
+    context: dict[str, BaseExecutorResult],
 ) -> list[str | Message]:
     image_embedding = data_cfg.get("image_embedding")
     if not isinstance(image_embedding, dict):
@@ -99,7 +99,7 @@ def _maybe_broadcast_image_prompts(
 
 
 def _resolve_image_embedding_count(
-    image_embedding: dict[str, Any], context: dict[str, Any]
+    image_embedding: dict[str, Any], context: dict[str, BaseExecutorResult]
 ) -> int | None:
     node = image_embedding.get("node")
     if not isinstance(node, str):
@@ -109,15 +109,17 @@ def _resolve_image_embedding_count(
     if not isinstance(node, str) or not node:
         return None
     upstream = context.get(node)
-    if not isinstance(upstream, dict):
+    if upstream is None:
         return None
-    count = upstream.get("count")
+    count = getattr(upstream, "count", _SENTINEL)
     if isinstance(count, int) and count > 0:
         return count
     return None
 
 
-def _resolve_columns(columns_cfg: Any, context: dict[str, Any]) -> list[dict[str, Any]]:
+def _resolve_columns(
+    columns_cfg: Any, context: dict[str, BaseExecutorResult]
+) -> list[dict[str, Any]]:
     if not isinstance(columns_cfg, list):
         raise ExecutionError("graph_template.template.columns must be a list.")
 

From 0a42ae22d9d465b7fd35eb6930e07c59de686b25 Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Wed, 20 May 2026 12:57:55 +0000
Subject: [PATCH 21/26] refactor: standardize ok across executor results and
 remove training_successful
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Lifts ``ok: bool = True`` to ``BaseExecutorResult`` (shared and SDK).
Drops the now-redundant declaration from ``APIResult``, ``EchoResult``,
``DataRetrievalResult``, ``OmniResult``, ``RAGResult``, and replaces
``training_successful: bool`` with the inherited ``ok`` across
``SFTResult``, ``LoRAResult``, ``DPOResult``, ``PPOResult`` — including
the corresponding constructor kwargs, local variables, and the LoRA
``if training_successful: return result`` guard.

Wire-format consequence: on-disk training result JSONs no longer carry a
``training_successful`` key; ``ok`` is emitted by every result.

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 sdk/src/flowmesh/models/results.py              |  1 +
 src/shared/schemas/result.py                    |  1 +
 src/worker/executors/api_executor.py            |  1 -
 src/worker/executors/data_retrieval_executor.py |  1 -
 src/worker/executors/dpo_executor.py            |  5 +----
 src/worker/executors/echo_executor.py           |  1 -
 src/worker/executors/lora_sft_executor.py       | 13 ++++++-------
 src/worker/executors/omni_executor_base.py      |  1 -
 src/worker/executors/ppo_executor.py            | 10 ++++------
 src/worker/executors/rag_executor.py            |  1 -
 src/worker/executors/sft_executor.py            | 12 +++++-------
 11 files changed, 18 insertions(+), 29 deletions(-)

diff --git a/sdk/src/flowmesh/models/results.py b/sdk/src/flowmesh/models/results.py
index b872d31b..8a1ae05d 100644
--- a/sdk/src/flowmesh/models/results.py
+++ b/sdk/src/flowmesh/models/results.py
@@ -19,6 +19,7 @@ class PathResponse(BaseModel):
 class BaseExecutorResult(BaseModel):
     model_config = ConfigDict(extra="allow", serialize_by_alias=True)
 
+    ok: bool = True
     children: dict[str, SerializeAsAny[BaseExecutorResult]] = Field(
         default_factory=dict, exclude_if=lambda v: not v
     )
diff --git a/src/shared/schemas/result.py b/src/shared/schemas/result.py
index 832fa25c..5a876f81 100644
--- a/src/shared/schemas/result.py
+++ b/src/shared/schemas/result.py
@@ -25,6 +25,7 @@ class BaseExecutorResult(BaseModel):
 
     model_config = ConfigDict(extra="allow", serialize_by_alias=True)
 
+    ok: bool = Field(default=True, description="Whether task execution succeeded.")
     children: dict[str, SerializeAsAny[BaseExecutorResult]] = Field(
         default_factory=dict,
         exclude_if=lambda v: not v,
diff --git a/src/worker/executors/api_executor.py b/src/worker/executors/api_executor.py
index a845b31e..bc0e9bde 100644
--- a/src/worker/executors/api_executor.py
+++ b/src/worker/executors/api_executor.py
@@ -19,7 +19,6 @@
 
 
 class APIResult(BaseExecutorResult):
-    ok: bool = True
     executor: str
     method: str
     url: str
diff --git a/src/worker/executors/data_retrieval_executor.py b/src/worker/executors/data_retrieval_executor.py
index bb9dfe9b..4347a262 100644
--- a/src/worker/executors/data_retrieval_executor.py
+++ b/src/worker/executors/data_retrieval_executor.py
@@ -26,7 +26,6 @@
 
 
 class DataRetrievalResult(BaseExecutorResult):
-    ok: bool = True
     type: str | None = None
     items: list[dict[str, Any]] = []
     count: int | None = None
diff --git a/src/worker/executors/dpo_executor.py b/src/worker/executors/dpo_executor.py
index 824cda27..b62256f2 100644
--- a/src/worker/executors/dpo_executor.py
+++ b/src/worker/executors/dpo_executor.py
@@ -47,7 +47,6 @@
 
 
 class DPOResult(BaseExecutorResult):
-    training_successful: bool = True
     training_time_seconds: float | None = None
     error_message: str | None = None
     model_name: str | None = None
@@ -96,7 +95,6 @@ def run(self, task: ExecutorTask, out_dir: Path) -> DPOResult:
                 if ipc_path.exists():
                     return DPOResult.model_validate(self.load_json(ipc_path))
                 return DPOResult(
-                    training_successful=True,
                     spawned_torchrun=True,
                     model_name=(
                         spec.model.source.identifier
@@ -477,7 +475,6 @@ def _execute_training(self, task: ExecutorTask, out_dir: Path) -> DPOResult:
 
             training_time = time.time() - start_time
             result = DPOResult(
-                training_successful=True,
                 training_time_seconds=training_time,
                 error_message=None,
                 model_name=self._model_name,
@@ -512,7 +509,7 @@ def _execute_training(self, task: ExecutorTask, out_dir: Path) -> DPOResult:
         except Exception as exc:
             training_time = time.time() - start_time
             result = DPOResult(
-                training_successful=False,
+                ok=False,
                 training_time_seconds=training_time,
                 error_message=str(exc),
                 model_name=self._model_name,
diff --git a/src/worker/executors/echo_executor.py b/src/worker/executors/echo_executor.py
index e70f1412..1a03662f 100644
--- a/src/worker/executors/echo_executor.py
+++ b/src/worker/executors/echo_executor.py
@@ -16,7 +16,6 @@
 
 
 class EchoResult(BaseExecutorResult):
-    ok: bool = True
     items: list[dict[str, Any]] = []
     count: int = 0
 
diff --git a/src/worker/executors/lora_sft_executor.py b/src/worker/executors/lora_sft_executor.py
index 4453e6cb..9e36e220 100644
--- a/src/worker/executors/lora_sft_executor.py
+++ b/src/worker/executors/lora_sft_executor.py
@@ -50,7 +50,6 @@
 
 
 class LoRAResult(BaseExecutorResult):
-    training_successful: bool = True
     training_time_seconds: float | None = None
     error_message: str | None = None
     model_name: str | None = None
@@ -77,7 +76,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> LoRAResult:
         configure_hf_library_logging()
         spec = self.require_spec(task, LoRASFTSpecStrict)
         start_time = time.time()
-        training_successful = False
+        ok = False
         error_msg: str | None = None
 
         if (
@@ -291,7 +290,7 @@ def _compute_loss_with_guard(
 
             logger.info("Starting LoRA supervised fine-tuning run")
             trainer.train()
-            training_successful = True
+            ok = True
             logger.info("LoRA SFT training completed")
 
             final_adapter_path: Path | None = None
@@ -316,7 +315,7 @@ def _compute_loss_with_guard(
                 )
                 logger.info("Prepared LoRA archive at %s", archive_path)
             result = LoRAResult(
-                training_successful=training_successful,
+                ok=ok,
                 training_time_seconds=training_time,
                 error_message=error_msg,
                 model_name=self._model_name,
@@ -340,13 +339,13 @@ def _compute_loss_with_guard(
 
         except Exception as exc:  # pylint: disable=broad-except
             error_msg = str(exc)
-            training_successful = False
+            ok = False
             logger.exception("LoRA SFT training failed: %s", exc)
 
         training_time = time.time() - start_time
 
         result = LoRAResult(
-            training_successful=training_successful,
+            ok=ok,
             training_time_seconds=training_time,
             error_message=error_msg,
             model_name=self._model_name,
@@ -356,7 +355,7 @@ def _compute_loss_with_guard(
             resume_from_path=resume_path.as_posix() if resume_path else None,
         )
 
-        if training_successful:
+        if ok:
             return result
 
         write_executor_result(out_dir / "results.json", task.task_id, task.spec, result)
diff --git a/src/worker/executors/omni_executor_base.py b/src/worker/executors/omni_executor_base.py
index 073d71c8..8fd9ec21 100644
--- a/src/worker/executors/omni_executor_base.py
+++ b/src/worker/executors/omni_executor_base.py
@@ -42,7 +42,6 @@
 
 
 class OmniResult(BaseExecutorResult):
-    ok: bool = True
     executor: str
     mode: str
     model: str | None
diff --git a/src/worker/executors/ppo_executor.py b/src/worker/executors/ppo_executor.py
index cd7acedb..4c859719 100644
--- a/src/worker/executors/ppo_executor.py
+++ b/src/worker/executors/ppo_executor.py
@@ -53,7 +53,6 @@
 
 
 class PPOResult(BaseExecutorResult):
-    training_successful: bool = True
     training_time_seconds: float | None = None
     error_message: str | None = None
     model_name: str | None = None
@@ -433,7 +432,6 @@ def run(self, task: ExecutorTask, out_dir: Path) -> PPOResult:
                 return PPOResult.model_validate(self.load_json(ipc_path))
             self._task_out_dir = None
             return PPOResult(
-                training_successful=True,
                 spawned_torchrun=True,
                 model_name=spec.model_name,
                 output_dir=out_dir.as_posix(),
@@ -863,7 +861,7 @@ def build_trainer() -> _EarlyStopPPOTrainer:
                 pass
             logger.info("PPO training completed")
 
-            training_successful = True
+            ok = True
             error_msg = None
 
             # Save model if requested
@@ -891,13 +889,13 @@ def build_trainer() -> _EarlyStopPPOTrainer:
                     logger.warning("Failed to save model: %s", exc)
 
         except Exception as exc:
-            training_successful = False
+            ok = False
             error_msg = str(exc)
             logger.exception("PPO training failed: %s", exc)
             training_time = time.time() - start_time
             dataset_size = len(dataset) if "dataset" in locals() else 0  # type: ignore
             result = PPOResult(
-                training_successful=training_successful,
+                ok=ok,
                 training_time_seconds=training_time,
                 error_message=error_msg,
                 model_name=self._model_name,
@@ -913,7 +911,7 @@ def build_trainer() -> _EarlyStopPPOTrainer:
         training_time = time.time() - start_time
 
         result = PPOResult(
-            training_successful=training_successful,
+            ok=ok,
             training_time_seconds=training_time,
             error_message=error_msg,
             model_name=self._model_name,
diff --git a/src/worker/executors/rag_executor.py b/src/worker/executors/rag_executor.py
index 0b9c41ae..06b3c705 100644
--- a/src/worker/executors/rag_executor.py
+++ b/src/worker/executors/rag_executor.py
@@ -27,7 +27,6 @@
 
 
 class RAGResult(BaseExecutorResult):
-    ok: bool = True
     executor: str = EXECUTOR_NAME
     qdrant: dict[str, Any]
     embedding: dict[str, Any]
diff --git a/src/worker/executors/sft_executor.py b/src/worker/executors/sft_executor.py
index 065bb122..030dcd81 100644
--- a/src/worker/executors/sft_executor.py
+++ b/src/worker/executors/sft_executor.py
@@ -45,7 +45,6 @@
 
 
 class SFTResult(BaseExecutorResult):
-    training_successful: bool = True
     training_time_seconds: float | None = None
     error_message: str | None = None
     model_name: str | None = None
@@ -75,7 +74,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> SFTResult:
         spec = self.require_spec(task, SFTSpecStrict)
         requested_gpu_count = self._requested_gpu_count(spec)
         start_time = time.time()
-        training_successful = False
+        ok = False
         error_msg: str | None = None
         caught_exc: Exception | None = None
         self._task_out_dir = out_dir
@@ -208,7 +207,6 @@ def run(self, task: ExecutorTask, out_dir: Path) -> SFTResult:
                     return SFTResult.model_validate(self.load_json(ipc_path))
                 self._task_out_dir = None
                 return SFTResult(
-                    training_successful=True,
                     spawned_torchrun=True,
                     model_name=spec.model_name,
                     output_dir=out_dir.as_posix(),
@@ -410,7 +408,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> SFTResult:
 
             logger.info("Starting supervised fine-tuning")
             trainer.train()
-            training_successful = True
+            ok = True
             logger.info("Training finished")
 
             final_model_path: Path | None = None
@@ -456,7 +454,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> SFTResult:
                     else None
                 )
             result = SFTResult(
-                training_successful=training_successful,
+                ok=ok,
                 training_time_seconds=training_time,
                 error_message=error_msg,
                 model_name=self._model_name,
@@ -492,7 +490,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> SFTResult:
 
         except Exception as exc:
             error_msg = str(exc)
-            training_successful = False
+            ok = False
             caught_exc = exc
             logger.exception("SFT training failed: %s", exc)
             trainer = None
@@ -506,7 +504,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> SFTResult:
 
         training_time = time.time() - start_time
         result = SFTResult(
-            training_successful=training_successful,
+            ok=ok,
             training_time_seconds=training_time,
             error_message=error_msg,
             model_name=self._model_name,

From 9a24320b0a17f9a83ebbe6d62fa50dd6eeb87236 Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Wed, 20 May 2026 12:58:26 +0000
Subject: [PATCH 22/26] fix(shared): rename internal artifacts field to
 artifacts_ and lock against subclass redef
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Renames ``BaseExecutorResult.artifacts`` to ``artifacts_`` (wire alias
``_artifacts`` unchanged) and adds a ``__pydantic_init_subclass__`` hook
that rejects any subclass declaring its own ``artifacts_`` annotation.
The trailing-underscore Python name + locked field signals that this
context is internal — set by the runner via ``result.artifacts_ =
build_artifact_context(...)``, never by executor code — while freeing
executor result subclasses to declare their own ``artifacts`` field
without colliding with the base.

Updates the four attribute-access sites: ``_render_artifact_ref``
(``dispatcher/base.py``), ``artifact_to_source`` (``worker/utils/
artifacts.py``), ``write_executor_result`` (``worker/utils/
checkpoints.py``), and ``_finalize_materialize`` (``sdk/resources/
results.py``).

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 sdk/src/flowmesh/models/results.py        | 11 ++++++++++-
 sdk/src/flowmesh/resources/results.py     |  2 +-
 src/server/dispatcher/base.py             |  2 +-
 src/shared/schemas/result.py              | 11 ++++++++++-
 src/worker/executors/utils/artifacts.py   |  2 +-
 src/worker/executors/utils/checkpoints.py |  2 +-
 6 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/sdk/src/flowmesh/models/results.py b/sdk/src/flowmesh/models/results.py
index 8a1ae05d..399ea7b8 100644
--- a/sdk/src/flowmesh/models/results.py
+++ b/sdk/src/flowmesh/models/results.py
@@ -23,7 +23,16 @@ class BaseExecutorResult(BaseModel):
     children: dict[str, SerializeAsAny[BaseExecutorResult]] = Field(
         default_factory=dict, exclude_if=lambda v: not v
     )
-    artifacts: ArtifactContext | None = Field(default=None, alias="_artifacts")
+    artifacts_: ArtifactContext | None = Field(default=None, alias="_artifacts")
+
+    @classmethod
+    def __pydantic_init_subclass__(cls, **kwargs: Any) -> None:
+        super().__pydantic_init_subclass__(**kwargs)
+        if "artifacts_" in cls.__annotations__:
+            raise TypeError(
+                f"{cls.__name__} may not redefine the internal "
+                "BaseExecutorResult.artifacts_ field"
+            )
 
 
 class ResultEnvelope(BaseModel):
diff --git a/sdk/src/flowmesh/resources/results.py b/sdk/src/flowmesh/resources/results.py
index 671aaa4b..3742106b 100644
--- a/sdk/src/flowmesh/resources/results.py
+++ b/sdk/src/flowmesh/resources/results.py
@@ -198,7 +198,7 @@ def _finalize_materialize(
         return {}, json_path, extracted
 
     envelope = ResultEnvelope.model_validate_json(json_path.read_text())
-    if _wants_artifacts(sections) and (ctx := envelope.result.artifacts):
+    if _wants_artifacts(sections) and (ctx := envelope.result.artifacts_):
         ctx.base_dir = (output_dir / task_id).resolve().as_posix()
         ctx.base_url = None
     payload = envelope.model_dump(mode="json")
diff --git a/src/server/dispatcher/base.py b/src/server/dispatcher/base.py
index a370fb05..501a9b2d 100644
--- a/src/server/dispatcher/base.py
+++ b/src/server/dispatcher/base.py
@@ -857,7 +857,7 @@ def _render_artifact_ref(value: Any, stage_result: ResultEnvelope) -> str | None
         path_value = value.get("path")
         if not isinstance(path_value, str) or not path_value:
             return None
-        ctx = stage_result.result.artifacts
+        ctx = stage_result.result.artifacts_
         if ctx is None:
             return None
         base_url = ctx.base_url
diff --git a/src/shared/schemas/result.py b/src/shared/schemas/result.py
index 5a876f81..e4378f90 100644
--- a/src/shared/schemas/result.py
+++ b/src/shared/schemas/result.py
@@ -31,12 +31,21 @@ class BaseExecutorResult(BaseModel):
         exclude_if=lambda v: not v,
         description="Per-child result payloads for task merging.",
     )
-    artifacts: ArtifactContext | None = Field(
+    artifacts_: ArtifactContext | None = Field(
         default=None,
         alias="_artifacts",
         description="Resolution context for relative artifact refs.",
     )
 
+    @classmethod
+    def __pydantic_init_subclass__(cls, **kwargs: Any) -> None:
+        super().__pydantic_init_subclass__(**kwargs)
+        if "artifacts_" in cls.__annotations__:
+            raise TypeError(
+                f"{cls.__name__} may not redefine the internal "
+                "BaseExecutorResult.artifacts_ field"
+            )
+
 
 class ResultEnvelope(BaseModel):
     task_id: str = Field(description="Task identifier.")
diff --git a/src/worker/executors/utils/artifacts.py b/src/worker/executors/utils/artifacts.py
index 6935bc22..8605433b 100644
--- a/src/worker/executors/utils/artifacts.py
+++ b/src/worker/executors/utils/artifacts.py
@@ -24,7 +24,7 @@ def artifact_to_source(
         context
         and node
         and (node_result := context.get(node))
-        and (ctx := node_result.artifacts)
+        and (ctx := node_result.artifacts_)
     ):
         base_url = ctx.base_url
         base_dir = ctx.base_dir
diff --git a/src/worker/executors/utils/checkpoints.py b/src/worker/executors/utils/checkpoints.py
index acf7d345..ed57f369 100644
--- a/src/worker/executors/utils/checkpoints.py
+++ b/src/worker/executors/utils/checkpoints.py
@@ -414,7 +414,7 @@ def write_executor_result(
 ) -> None:
     """Stamp ``_artifacts`` onto ``result`` and persist the envelope."""
     path.parent.mkdir(parents=True, exist_ok=True)
-    result.artifacts = build_artifact_context(spec, path.parent)
+    result.artifacts_ = build_artifact_context(spec, path.parent)
     envelope = ResultEnvelope(task_id=task_id, result=result)
     atomic_write_text(path, envelope.model_dump_json(indent=2))
 

From bdbf74b650e9d7df88873a18cec1aef79e1012a6 Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Wed, 20 May 2026 12:58:47 +0000
Subject: [PATCH 23/26] chore: tighten SDK pydantic pin to >=2.12.3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The SDK's ``pydantic>=2.0.0`` floor was too loose — ``exclude_if``
(used on ``BaseExecutorResult.children`` and ``ArtifactContext.
base_url``) was added in pydantic v2.12.0, so older 2.x versions would
silently ignore the kwarg and emit unwanted null/empty keys. Pin the
SDK floor to ``>=2.12.3``, matching the runtime workspace.

(The reviewer's request to bump to ``>=2.12.4`` is blocked upstream by
``vllm-omni 0.18 → gradio 5.50 → pydantic<=2.12.3``; the 2.12.4 change
only refines JSON Schema's ``required`` list interaction with
``exclude_if`` and doesn't affect the ``model_dump`` path FlowMesh
uses.)

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 sdk/pyproject.toml | 2 +-
 uv.lock            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml
index 881e7689..97ca5abc 100644
--- a/sdk/pyproject.toml
+++ b/sdk/pyproject.toml
@@ -13,7 +13,7 @@ license-files = ["LICENSE"]
 dependencies = [
     "httpx>=0.27.0",
     "pandas>=2.3.3",
-    "pydantic>=2.0.0",
+    "pydantic>=2.12.3",
     "pyyaml>=6.0.0",
 ]
 
diff --git a/uv.lock b/uv.lock
index ea21fb32..650e27a2 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2355,7 +2355,7 @@ dependencies = [
 requires-dist = [
     { name = "httpx", specifier = ">=0.27.0" },
     { name = "pandas", specifier = ">=2.3.3" },
-    { name = "pydantic", specifier = ">=2.0.0" },
+    { name = "pydantic", specifier = ">=2.12.3" },
     { name = "pyyaml", specifier = ">=6.0.0" },
 ]
 

From 48406cf2ffe44a735e59c0f0bf69e7985c0257f9 Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Wed, 20 May 2026 12:58:56 +0000
Subject: [PATCH 24/26] docs(worker): describe typed result contract in
 base_executor docstring

Updates the module docstring's usage example and contract block to use
``BaseExecutorResult`` instead of the legacy ``-> dict`` shape. The
example now declares a ``MyResult(BaseExecutorResult)`` subclass and
returns it, matching the actual typed contract every executor follows
post-result-schema refactor. Import-line in the snippet now points at
the canonical locations: ``BaseExecutorResult`` from
``shared.schemas.result``, ``Executor`` / ``ExecutionError`` from
``worker.executors.base_executor``.

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 src/worker/executors/base_executor.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/worker/executors/base_executor.py b/src/worker/executors/base_executor.py
index f17a3d7f..f5a20245 100644
--- a/src/worker/executors/base_executor.py
+++ b/src/worker/executors/base_executor.py
@@ -2,19 +2,23 @@
 Executor base class and a minimal example implementation.
 
 Usage:
-    from executor_base import Executor, ExecutionError, EchoExecutor
+    from shared.schemas.result import BaseExecutorResult
+    from worker.executors.base_executor import Executor, ExecutionError
+
+    class MyResult(BaseExecutorResult):
+        echo: str
 
     class MyExecutor(Executor):
         name = "my-executor"
-        def run(self, task: ExecutorTask, out_dir: Path) -> dict:
+        def run(self, task: ExecutorTask, out_dir: Path) -> MyResult:
             # ... your logic ...
-            return {"ok": True, "echo": task.task_id}
+            return MyResult(echo=task.task_id)
 
 Contract:
-- Implement `run(task: ExecutorTask, out_dir: Path) -> dict`. The runner
-  writes the returned dict to `out_dir/results.json` and injects the
-  top-level `_artifacts` context — executors should not write that file
-  themselves on the success path.
+- Implement `run(task: ExecutorTask, out_dir: Path) -> BaseExecutorResult`.
+  The runner writes the returned model to `out_dir/results.json` and
+  injects the top-level `_artifacts` context — executors should not write
+  that file themselves on the success path.
 - Drop generated files under `out_dir/artifacts/` (uploaded to the server
   when the task has an HTTP destination) or `scratch_dir(out_dir)` for
   local-only scratch data.

From 2ef78e03b7df6d2e1a11ebfc1ae8a6cb4db2ea56 Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Wed, 20 May 2026 13:15:54 +0000
Subject: [PATCH 25/26] chore(security): ignore CVEs with no fix versions in
 pip-audit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

pip-audit flagged 22 new advisories (torch, transformers, nltk, pyjwt,
joblib) — all with no fix version published. Add ``--ignore-vuln``
flags for each PYSEC ID in the worker CPU and worker GPU steps, and
extend the documented advisory table in ``docs/CODE_STYLE.md``
(renaming the column from GHSA to Advisory since PYSEC IDs now appear
alongside GHSA IDs).

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 .github/workflows/security.yml | 44 ++++++++++++++++++++++++++++++++++
 docs/CODE_STYLE.md             | 34 +++++++++++++++++++++-----
 2 files changed, 72 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml
index 0859dfd0..41a753ea 100644
--- a/.github/workflows/security.yml
+++ b/.github/workflows/security.yml
@@ -105,6 +105,28 @@ jobs:
             --ignore-vuln GHSA-vfmq-68hx-4jfw \
             --ignore-vuln GHSA-j7w6-vpvq-j3gm \
             --ignore-vuln GHSA-98h9-4798-4q5v \
+            --ignore-vuln PYSEC-2025-189 \
+            --ignore-vuln PYSEC-2025-190 \
+            --ignore-vuln PYSEC-2025-191 \
+            --ignore-vuln PYSEC-2025-192 \
+            --ignore-vuln PYSEC-2025-193 \
+            --ignore-vuln PYSEC-2025-194 \
+            --ignore-vuln PYSEC-2025-195 \
+            --ignore-vuln PYSEC-2025-196 \
+            --ignore-vuln PYSEC-2025-197 \
+            --ignore-vuln PYSEC-2025-210 \
+            --ignore-vuln PYSEC-2026-139 \
+            --ignore-vuln PYSEC-2025-211 \
+            --ignore-vuln PYSEC-2025-212 \
+            --ignore-vuln PYSEC-2025-213 \
+            --ignore-vuln PYSEC-2025-214 \
+            --ignore-vuln PYSEC-2025-215 \
+            --ignore-vuln PYSEC-2025-216 \
+            --ignore-vuln PYSEC-2025-217 \
+            --ignore-vuln PYSEC-2025-218 \
+            --ignore-vuln PYSEC-2026-97 \
+            --ignore-vuln PYSEC-2025-183 \
+            --ignore-vuln PYSEC-2024-277 \
             -r /tmp/requirements-worker-cpu-audit.txt
       - name: Run pip-audit (worker GPU delta)  # no --strict: flashinfer-jit-cache is unauditable on PyPI
         run: |
@@ -130,4 +152,26 @@ jobs:
             --ignore-vuln GHSA-83vm-p52w-f9pw \
             --ignore-vuln GHSA-j7w6-vpvq-j3gm \
             --ignore-vuln GHSA-98h9-4798-4q5v \
+            --ignore-vuln PYSEC-2025-189 \
+            --ignore-vuln PYSEC-2025-190 \
+            --ignore-vuln PYSEC-2025-191 \
+            --ignore-vuln PYSEC-2025-192 \
+            --ignore-vuln PYSEC-2025-193 \
+            --ignore-vuln PYSEC-2025-194 \
+            --ignore-vuln PYSEC-2025-195 \
+            --ignore-vuln PYSEC-2025-196 \
+            --ignore-vuln PYSEC-2025-197 \
+            --ignore-vuln PYSEC-2025-210 \
+            --ignore-vuln PYSEC-2026-139 \
+            --ignore-vuln PYSEC-2025-211 \
+            --ignore-vuln PYSEC-2025-212 \
+            --ignore-vuln PYSEC-2025-213 \
+            --ignore-vuln PYSEC-2025-214 \
+            --ignore-vuln PYSEC-2025-215 \
+            --ignore-vuln PYSEC-2025-216 \
+            --ignore-vuln PYSEC-2025-217 \
+            --ignore-vuln PYSEC-2025-218 \
+            --ignore-vuln PYSEC-2026-97 \
+            --ignore-vuln PYSEC-2025-183 \
+            --ignore-vuln PYSEC-2024-277 \
             -r src/worker/requirements/requirements.gpu.txt
diff --git a/docs/CODE_STYLE.md b/docs/CODE_STYLE.md
index 33ed7383..c63edf27 100644
--- a/docs/CODE_STYLE.md
+++ b/docs/CODE_STYLE.md
@@ -89,13 +89,13 @@ CI runs `pip-audit` against each generated requirements file
 When pip-audit reports a new CVE, the only real fix is to bump the
 offending dep in `pyproject.toml`, then `uv lock` and `uv run
 scripts/dev/sync_requirements.py --write`. Silencing via `--ignore-vuln`
-is a last resort; every silenced GHSA needs a written upgrade-blocker.
-The currently-ignored advisories and the upgrade blocker that justifies
-each are listed below; the same list is encoded as `--ignore-vuln`
-flags in `.github/workflows/security.yml`.
+is a last resort; every silenced advisory needs a written
+upgrade-blocker. The currently-ignored advisories and the upgrade
+blocker that justifies each are listed below; the same list is encoded
+as `--ignore-vuln` flags in `.github/workflows/security.yml`.
 
-| GHSA | Package | Fix version | Why ignored |
-|------|---------|-------------|-------------|
+| Advisory | Package | Fix version | Why ignored |
+|----------|---------|-------------|-------------|
 | GHSA-69w3-r845-3855 | transformers | 5.0.0rc3 | held by vllm/vllm-omni 0.18 compatibility |
 | GHSA-pf3h-qjgv-vcpr | vllm | 0.19.0 | held by transformers 4.57 + adjacent inference deps |
 | GHSA-pq5c-rjhq-qp7p | vllm | 0.19.0 | same |
@@ -117,6 +117,28 @@ flags in `.github/workflows/security.yml`.
 | GHSA-w8v5-vhqr-4h9v | diskcache | (none) | upstream unmaintained, no fixed version published |
 | GHSA-j7w6-vpvq-j3gm | diffusers | 0.38.0 | fix requires safetensors>=0.8.0rc0 pre-release; uv lock won't pick up pre-releases without explicit opt-in |
 | GHSA-98h9-4798-4q5v | diffusers | 0.38.0 | same blocker as GHSA-j7w6-vpvq-j3gm — both fixed in 0.38.0 |
+| PYSEC-2025-189 | torch | (none) | no fix version published |
+| PYSEC-2025-190 | torch | (none) | same |
+| PYSEC-2025-191 | torch | (none) | same |
+| PYSEC-2025-192 | torch | (none) | same |
+| PYSEC-2025-193 | torch | (none) | same |
+| PYSEC-2025-194 | torch | (none) | same |
+| PYSEC-2025-195 | torch | (none) | same |
+| PYSEC-2025-196 | torch | (none) | same |
+| PYSEC-2025-197 | torch | (none) | same |
+| PYSEC-2025-210 | torch | (none) | same |
+| PYSEC-2026-139 | torch | (none) | same |
+| PYSEC-2025-211 | transformers | (none) | no fix version published; transformers also held by vllm-omni 0.18 |
+| PYSEC-2025-212 | transformers | (none) | same |
+| PYSEC-2025-213 | transformers | (none) | same |
+| PYSEC-2025-214 | transformers | (none) | same |
+| PYSEC-2025-215 | transformers | (none) | same |
+| PYSEC-2025-216 | transformers | (none) | same |
+| PYSEC-2025-217 | transformers | (none) | same |
+| PYSEC-2025-218 | transformers | (none) | same |
+| PYSEC-2026-97 | nltk | (none) | no fix version published |
+| PYSEC-2025-183 | pyjwt | (none) | no fix version published |
+| PYSEC-2024-277 | joblib | (none) | no fix version published |
 
 When a blocker lifts (e.g. transformers 5 ↔ vllm 0.19 line stabilizes),
 drop the corresponding `--ignore-vuln` flag from the workflow and the

From c99082e32b704971c0b24858c01195c6f845dea6 Mon Sep 17 00:00:00 2001
From: Noppanat Wadlom <noppanat.wad@gmail.com>
Date: Wed, 20 May 2026 13:31:08 +0000
Subject: [PATCH 26/26] chore(security): ignore vllm/gradio CVEs surfaced by
 GPU pip-audit step
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PYSEC-2025-222 (vllm 0.18.0) and PYSEC-2024-274 (gradio 5.50.0) — both
with no fix version published and both blocked by the vllm-omni 0.18
pin. Add ``--ignore-vuln`` flags to the worker-GPU step and document
each in ``docs/CODE_STYLE.md``.

Signed-off-by: Noppanat Wadlom <noppanat.wad@gmail.com>
---
 .github/workflows/security.yml | 2 ++
 docs/CODE_STYLE.md             | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml
index 41a753ea..d2540862 100644
--- a/.github/workflows/security.yml
+++ b/.github/workflows/security.yml
@@ -174,4 +174,6 @@ jobs:
             --ignore-vuln PYSEC-2026-97 \
             --ignore-vuln PYSEC-2025-183 \
             --ignore-vuln PYSEC-2024-277 \
+            --ignore-vuln PYSEC-2025-222 \
+            --ignore-vuln PYSEC-2024-274 \
             -r src/worker/requirements/requirements.gpu.txt
diff --git a/docs/CODE_STYLE.md b/docs/CODE_STYLE.md
index c63edf27..394628a9 100644
--- a/docs/CODE_STYLE.md
+++ b/docs/CODE_STYLE.md
@@ -139,6 +139,8 @@ as `--ignore-vuln` flags in `.github/workflows/security.yml`.
 | PYSEC-2026-97 | nltk | (none) | no fix version published |
 | PYSEC-2025-183 | pyjwt | (none) | no fix version published |
 | PYSEC-2024-277 | joblib | (none) | no fix version published |
+| PYSEC-2025-222 | vllm | (none) | no fix version published; held by vllm-omni 0.18 pin |
+| PYSEC-2024-274 | gradio | (none) | no fix version published; vllm-omni 0.18 pins gradio==5.50 |
 
 When a blocker lifts (e.g. transformers 5 ↔ vllm 0.19 line stabilizes),
 drop the corresponding `--ignore-vuln` flag from the workflow and the