diff --git a/LIMITATIONS.md b/LIMITATIONS.md index 3708f16..a897115 100644 --- a/LIMITATIONS.md +++ b/LIMITATIONS.md @@ -16,6 +16,14 @@ The TEE-sealed signing key is generated inside the enclave and cannot be extract **Phase 2 completeness: server-side attestation** Phase 1 attests the gateway boundary. It does not attest what happens on the other side of that boundary. The `tool_transcript.hash` field in the TRACE Claim records a hash of the audit chain tip, but the tool transcript binding that ties a specific tool execution to a specific response is Phase 2 work. Phase 1 partially addresses P1.4 (transitive trust into upstream dependencies) and P4.1 (typosquatted packages added to catalog) -- both are fully closed by Phase 2. Any compliance claim that relies on server-side proof must wait for Phase 2. +**Tool server non-repudiation** +The audit chain records a `response_payload_hash` for each tool call and an `evidence_class` that indicates the assurance level of the recorded response: + +- **`tls-pinned`**: The tool server URL uses HTTPS and has a non-placeholder TLS certificate fingerprint in the catalog. The response was received over a TLS connection whose certificate was pinned at catalog-load time. A verifier can confirm the server identity against the catalog fingerprint. +- **`hash-only`**: The tool server uses HTTP, has no TLS fingerprint assigned in the catalog (dev placeholder), or TLS pinning could not be enforced on the current platform. The hash proves what the gateway received, but the server identity cannot be independently verified from the audit record alone. + +Tool servers do not sign their individual responses. A `tls-pinned` entry proves the response came from a server holding the catalog-pinned certificate but does not prevent the server itself from later denying it produced a specific response. For strong non-repudiation, configure non-placeholder TLS fingerprints for all upstream servers so all evidence is `tls-pinned`, and treat the TEE attestation as the binding authority for what the gateway recorded. + **LLM inference and model output** cMCP intercepts tool calls at the MCP protocol boundary. It does not observe or modify LLM inference, the contents of the agent's context window, or model outputs that do not produce a tool call. A model could hallucinate a response, leak sensitive context in a chat reply, or receive a poisoned tool response that influences subsequent reasoning -- none of these are visible to the gateway. cMCP controls the tool boundary, not the model boundary. diff --git a/src/cmcp_runtime/audit/chain.py b/src/cmcp_runtime/audit/chain.py index 624e63d..9fe9e9d 100644 --- a/src/cmcp_runtime/audit/chain.py +++ b/src/cmcp_runtime/audit/chain.py @@ -61,6 +61,7 @@ class AuditEntry: detail: dict[str, str | int | float] | None # optional structured detail (e.g. suspicious_call_sequence) workflow_id: str | None prev_entry_hash: str # "genesis" for first entry + evidence_class: str = field(default="hash-only") # "tls-pinned" when server TLS cert pin is enforced entry_hash: str = field(default="") # computed after construction def _canonical_body(self) -> bytes: @@ -156,6 +157,7 @@ def append( latency_us: int | None = None, request_payload_hash: str | None = None, response_payload_hash: str | None = None, + evidence_class: str = "hash-only", response_inspection_result: InspectionResult | None = None, session_sensitivity_before: str | None = None, session_sensitivity_after: str | None = None, @@ -183,6 +185,7 @@ def append( request_payload_hash=request_payload_hash, response_payload_hash=response_payload_hash, response_inspection_result=response_inspection_result, + evidence_class=evidence_class, session_sensitivity_before=session_sensitivity_before, session_sensitivity_after=session_sensitivity_after, detail=detail, diff --git a/src/cmcp_runtime/mcp/proxy.py b/src/cmcp_runtime/mcp/proxy.py index b6c5458..cc528d3 100644 --- a/src/cmcp_runtime/mcp/proxy.py +++ b/src/cmcp_runtime/mcp/proxy.py @@ -813,6 +813,17 @@ async def call_tool( # egress check saw (post-scan, possibly sanitized) so a verifier can match # the audited response against what the caller actually received. response_payload_hash = f"sha256:{hashlib.sha256(response_bytes).hexdigest()}" + # Evidence class: tls-pinned when the upstream server has a real cert pin in the catalog. + from cmcp_runtime.mcp import tls_pinning as _tls_mod + _fp = entry.server.tls_fingerprint if entry else "" + evidence_class = ( + "tls-pinned" + if entry + and entry.server.url.startswith("https://") + and _fp + and _fp != _tls_mod.PLACEHOLDER_FINGERPRINT + else "hash-only" + ) # INJECT-003: include injection scanner and pattern in audit detail when detected injection_detail: dict[str, str | int | float] | None = ( { @@ -834,6 +845,7 @@ async def call_tool( latency_us=latency_us, request_payload_hash=request_payload_hash, response_payload_hash=response_payload_hash, + evidence_class=evidence_class, session_sensitivity_before=sensitivity_before, session_sensitivity_after=self._session.max_sensitivity, workflow_id=workflow_id,