From f31dcab37ef8b251625687ed629001513eddfbb2 Mon Sep 17 00:00:00 2001 From: BELLO SHEHU <1739677116@qq.com> Date: Sun, 26 Apr 2026 15:40:51 +0000 Subject: [PATCH 1/3] v0.8: docs/LIFE_BINDING_SPEC.md + schemas/binding.schema.json + sanity tests (#103) Per-topic normative spec for Topic 3 (Runtime Binding) of the v0.8 asset-architecture epic (#106). Defines binding/runtime_binding.json and encodes the four locked Topic-3 decisions: - D1=C hybrid capability vocabulary (~20 core enum + 'x-' extension). - D2=C issuer-self-decided engine strictness (strict: true | false). - D4=C hybrid hard_constraints keys; runtime MUST fail-close on unknown keys, and the schema enforces this statically via patternProperties + additionalProperties: false. - D5=A AND-gate hosted-API decision (issuer half only; user half is policy/hosted_api.json from v0.6); spec defaults to allowed:false when binding omits the section. Also encodes Topic 4 D4=C three-field surface shape (supported / preferred / minimum_required), and references the tier system (#104) via an optional capability_binding.tier_floor. Sanity tests: 52 cases (4 happy-path + 48 negative) wired into batch_validate.py; full suite 20/20 green. Co-Authored-By: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- CHANGELOG.md | 17 ++ docs/LIFE_BINDING_SPEC.md | 439 +++++++++++++++++++++++++++++++++++ schemas/binding.schema.json | 232 ++++++++++++++++++ tools/batch_validate.py | 1 + tools/test_binding_schema.py | 281 ++++++++++++++++++++++ 5 files changed, 970 insertions(+) create mode 100644 docs/LIFE_BINDING_SPEC.md create mode 100644 schemas/binding.schema.json create mode 100644 tools/test_binding_schema.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 0223824..03badfd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,8 +39,25 @@ Sub-issues #100–#105. `compute.hosted_api_providers[]`. [#101] - `tools/test_genesis_schema.py` — 36 sanity-test cases (4 happy-path + 32 negative) wired into `tools/batch_validate.py`. [#101] +- `docs/LIFE_BINDING_SPEC.md` — per-topic normative spec for Topic 3 + (Runtime Binding). Defines `binding/runtime_binding.json` and + encodes the four locked Topic-3 decisions: hybrid capability + vocabulary (D1=C, ~20 core enum + `x-` extension); issuer-self + -decided engine strictness (D2=C, `strict: true | false`); hybrid + hard-constraints keys with runtime fail-close on unknown keys + (D4=C); AND-gate hosted-API decision (D5=A, issuer half only — + user half is `policy/hosted_api.json` from v0.6). [#103] +- `schemas/binding.schema.json` — JSON Schema for the binding file + format (`dlrs-life-binding/0.1`). `patternProperties` enforce both + the capability-name hybrid vocabulary and the hard-constraints + hybrid keyspace; `additionalProperties: false` makes unknown + non-`x-` keys reject statically (decision D4=C fail-close at schema + layer). [#103] +- `tools/test_binding_schema.py` — 52 sanity-test cases (4 happy-path + + 48 negative) wired into `tools/batch_validate.py`. [#103] [#101]: https://github.com/Digital-Life-Repository-Standard/DLRS/issues/101 +[#103]: https://github.com/Digital-Life-Repository-Standard/DLRS/issues/103 ## v0.7-vision-shift (2026-04-26) diff --git a/docs/LIFE_BINDING_SPEC.md b/docs/LIFE_BINDING_SPEC.md new file mode 100644 index 0000000..b7661dd --- /dev/null +++ b/docs/LIFE_BINDING_SPEC.md @@ -0,0 +1,439 @@ +# `.life` Runtime Binding Specification (v0.8) + +> **Status**: Normative draft, part of the `.life` Asset Architecture +> epic ([#106]). This file is the authoritative spec for how a `.life` +> package tells a runtime **which asset goes to which capability**, +> **which engines may host it**, **what is forbidden**, **what surface +> the user sees**, and **whether hosted APIs may be called**. +> Sub-issue [#103]. +> +> This document is the per-topic normative spec for **Topic 3 +> (Binding)** of the v0.8 architecture discussion. Decisions made +> during that discussion are summarised in +> [`LIFE_ASSET_ARCHITECTURE.md`](LIFE_ASSET_ARCHITECTURE.md) §4. +> When this spec and the architecture overview disagree, **this spec +> wins**. + +[#106]: https://github.com/Digital-Life-Repository-Standard/DLRS/issues/106 +[#103]: https://github.com/Digital-Life-Repository-Standard/DLRS/issues/103 + +Cross-references: + +- Schema: [`schemas/binding.schema.json`](../schemas/binding.schema.json) +- Sanity tests: [`tools/test_binding_schema.py`](../tools/test_binding_schema.py) +- Architecture overview: [`docs/LIFE_ASSET_ARCHITECTURE.md`](LIFE_ASSET_ARCHITECTURE.md) +- Genesis spec (asset provenance): [`docs/LIFE_GENESIS_SPEC.md`](LIFE_GENESIS_SPEC.md) +- Lifecycle spec (asset evolution): [`docs/LIFE_LIFECYCLE_SPEC.md`](LIFE_LIFECYCLE_SPEC.md) +- File-format spec: [`docs/LIFE_FILE_STANDARD.md`](LIFE_FILE_STANDARD.md) +- Runtime protocol: [`docs/LIFE_RUNTIME_STANDARD.md`](LIFE_RUNTIME_STANDARD.md) + +--- + +## 1. Purpose + +A `.life` package ships **assets** (Genesis, Topic 1) and tracks how +they **evolve** (Lifecycle, Topic 2). Binding is what tells a runtime +**how to actually use them**: which voice clone backs the +`voice_synthesis` capability, which embeddings store backs +`memory_recall`, which LLM is preferred for orchestration, what +prohibitions the issuer wants enforced, and what user-facing modes +are supported. + +Without a binding, a runtime would have to guess. Two compliant +runtimes loading the same `.life` would disagree about which engine +to use, and an issuer's prohibition (e.g. "no political advocacy") +would silently drift between implementations. Binding is the spec +that makes runtimes interchangeable. + +### Non-goals + +- **Provider implementation.** The binding declares engine names and + version ranges; how a particular runtime resolves those to + installed providers is part of the runtime / assembly spec + (Topic 4 / [#105]). +- **User-side hosted-API consent.** The binding's + `hosted_api_preference` is the **issuer-side** half of the AND-gate + (decision D5=A); the **user-side** half lives in + `policy/hosted_api.json` (already shipped in v0.6). +- **Tier evaluation.** Binding may reference tier (`tier_floor`) but + the tier system itself is defined by the per-topic spec landed via + sub-issue [#104]. + +[#105]: https://github.com/Digital-Life-Repository-Standard/DLRS/issues/105 +[#104]: https://github.com/Digital-Life-Repository-Standard/DLRS/issues/104 + +--- + +## 2. Conformance language + +The keywords **MUST**, **MUST NOT**, **REQUIRED**, **SHALL**, +**SHALL NOT**, **SHOULD**, **SHOULD NOT**, **RECOMMENDED**, **MAY**, +and **OPTIONAL** are interpreted per RFC 2119. + +This spec applies to: + +- **Producers** (issuers, build tooling). MUST emit one + `binding/runtime_binding.json` per package, validating against + [`schemas/binding.schema.json`](../schemas/binding.schema.json). +- **Loaders / runtimes**. MUST evaluate the binding before any + user-facing interaction. MUST refuse the load on any of: + - unknown `schema_version`, + - `minimum_runtime_version` exceeds the runtime's own version, + - `surface.minimum_required` cannot be rendered, + - any `hard_constraints` key is unknown to the runtime + (fail-close, decision D4=C), + - any capability points at an unknown `asset_id`, + - no `engine_compatibility` entry resolves to an installed + provider AND no `fallback_capability` chain exists, + - any `engine_compatibility` entry has `engine_kind: + bundled_in_life` (forbidden in v0.8 per Topic 4 D2=B). + +--- + +## 3. Document layout + +The binding is a single file at the canonical path: + +``` +binding/runtime_binding.json +``` + +It MUST exist for every v0.8-compliant package. The file is JSON, +UTF-8, no BOM. The file MUST validate against +[`schemas/binding.schema.json`](../schemas/binding.schema.json) at +schema version `dlrs-life-binding/0.1`. + +--- + +## 4. Top-level fields + +| Field | Type | Required | Notes | +|---|---|---|---| +| `schema_version` | const | yes | `dlrs-life-binding/0.1`. | +| `binding_version` | semver | yes | Issuer-controlled binding revision. | +| `minimum_runtime_version` | semver-ish | yes | Lowest `dlrs-runtime` that suffices. | +| `capabilities` | object | yes | At least one capability binding (§5). | +| `orchestration` | object | no | LLM orchestration shape (§6). | +| `hard_constraints` | object | yes | Issuer prohibitions (§7); MAY be empty. | +| `surface` | object | yes | User-facing modes (§8). | +| `hosted_api_preference` | object | no | Issuer half of the AND-gate (§9). | + +The schema enforces `additionalProperties: false` at the top level — +no unknown root keys allowed. Lower levels follow the same pattern +unless explicitly noted. + +--- + +## 5. Capabilities (decision **D1 = C**, hybrid vocabulary) + +`capabilities` is a map from **capability name** to capability +binding. The schema enforces a **hybrid vocabulary**: + +- **Core enum (~20 names)**: `voice_synthesis`, `voice_recognition`, + `memory_recall`, `persona`, `knowledge_qa`, `image_recall`, + `video_recall`, `chat`, `agent_tool_use`, `planning`, + `emotion_synthesis`, `prosody_control`, `memorial_voice`, + `text_persona`, `image_avatar`, `video_avatar`, `interactive_chat`, + `moderation`, `disclosure_renderer`, `context_summary`. +- **`x-` prefix extensions**: any `x-` followed by lowercase ASCII + word characters is allowed for ecosystem extensions. The runtime + MAY refuse `x-` capabilities it does not recognise; well-formed + `x-` capabilities at least pass schema validation. + +Anything that is neither a core name nor `x-`-prefixed MUST be +rejected at schema validation time. This is the policy lever that +keeps the core vocabulary additive (new capabilities ship in spec +versions, not in private packages). + +### 5.1 Capability binding shape + +```json +{ + "asset_id": "voice-master-v1", + "engine_compatibility": [ + { "name": "xtts-v2", "version_range": "^2.0.0", "strict": true, + "engine_kind": "user_installed" } + ], + "params": { "temperature": 0.7 }, + "fallback_capability": "voice_recognition", + "tier_floor": "VII" +} +``` + +| Field | Required | Notes | +|---|---|---| +| `asset_id` | yes | MUST refer to a manifest entry. | +| `engine_compatibility` | yes | Non-empty ordered list of engines (§5.2). | +| `params` | no | Free-form per-capability parameters fed to the chosen engine. | +| `fallback_capability` | no | Pointer to a less-strict capability (only fires when at least one engine has `strict: false`). | +| `tier_floor` | no | Roman numeral I–XII; loader SHOULD warn below this tier (definition lands via #104). | + +### 5.2 Engine entry (decision **D2 = C**, issuer-self-decided strictness) + +Each `engine_compatibility` entry names one engine that can host the +capability and declares how strict the runtime should be when +matching versions: + +- `strict: true` (**default**) — loader MUST honour `version_range` + exactly. +- `strict: false` — loader MAY accept compatible interfaces outside + the range; the issuer accepts the looser match. + +The `engine_kind` field lets the binding hint at sandboxing class +(decision Topic 4 D1=C): + +- `built_in` — ships with the runtime. +- `user_installed` — installed via OS package or `lifectl`. +- `bundled_in_life` — vendored inside the `.life` zip. **Loaders + MUST refuse v0.8 `bundled_in_life` engines** (decision Topic 4 + D2=B); whitelisted-issuer support is deferred to v1.0+. + +Iteration order of `engine_compatibility` is significant: loaders +MUST try entries in declared order. The first installed engine +matching the entry's name, version_range, and strictness wins. + +--- + +## 6. Orchestration (LLM shape) + +Optional but RECOMMENDED for any package that exposes `chat` or +`interactive_chat`. Loaders MAY substitute another LLM that +satisfies `minimum_llm_capabilities` if the named one is unavailable +(see §5.2 for the strict/loose pattern). + +```json +"orchestration": { + "default_llm": { "name": "llama3", "version_range": "^3.0" }, + "minimum_llm_capabilities": ["chat", "function_calling"], + "context_strategy": "rolling_window", + "max_context_tokens": 8000 +} +``` + +`max_context_tokens` is an **issuer ceiling**. Loaders MUST NOT +exceed it even if the underlying LLM permits more. + +--- + +## 7. Hard constraints (decision **D4 = C**, hybrid keyspace + fail-close) + +`hard_constraints` is a map of issuer prohibitions. The schema +enforces **hybrid keyspace**: ~30 fixed core keys (see schema's +`patternProperties` regex) plus `x-` extensions for ecosystem custom +keys. **Anything else is rejected at schema validation time** — this +is what implements decision D4=C ("runtime MUST fail-close on any +unrecognised constraint key"). The loader's runtime check is a +defence in depth: the schema rejects packages that try to ship +unknown non-`x-` keys to begin with. + +The 30 core keys cover six categories: + +- **Content**: `no_image_generation`, `no_video_generation`, + `no_voice_clone_for_third_party`, `no_political_advocacy`, + `no_religious_advocacy`, `no_unattributed_quotes`. +- **Domain**: `no_medical_advice`, `no_legal_advice`, + `no_financial_advice`, `no_explicit_sexual`, + `no_self_harm_methods`. +- **Subject**: `no_minors_likeness`, `no_deceased_likeness_outside_memorial`. +- **Quotas**: `max_memory_horizon_days`, `max_concurrent_sessions`, + `max_session_duration_minutes`, `max_messages_per_session`, + `max_tokens_per_response`. +- **Geo / network**: `geo_restrictions`, `disallow_offline`, + `disallow_cloud`, `disallow_screen_recording`, `disallow_export`, + `disallow_copy`. +- **Workflow**: `require_disclosure_prefix`, `require_watermark`, + `require_user_age_attestation`, `require_human_in_the_loop`, + `require_audit_emit`, `require_consent_recheck_every_minutes`, + `forbidden_third_party_use`. + +Values are intentionally **left untyped** by the schema beyond +key-name validation; semantic typing per key is the runtime's job and +is out of scope for v0.8. (A future `dlrs-life-binding/0.2` MAY pin +per-key value types.) + +`x-` keys MAY take any value. Loaders that don't understand a +specific `x-` key MUST fail-close (refuse the load) — same +default-deny stance as core keys. + +--- + +## 8. Surface (decision Topic 4 **D4 = C**, three-field shape) + +`surface` declares user-facing modes the package supports. Three +required fields: + +- `supported[]` — every mode the runtime MAY pick. +- `preferred` — issuer's recommended default. Loader SHOULD honour + when its capabilities allow. +- `minimum_required` — floor mode. Loaders unable to render this + mode MUST refuse to bind. + +Mode enum (low → high in capability requirements): + +``` +text_only < chat < voice_chat < avatar_2d < avatar_3d < vr +``` + +The schema does not enforce ordering between `preferred` / +`minimum_required` / `supported`; the runtime applies these checks +at bind time. (Encoding three-way ordering in JSON Schema is +expressible but obscures the intent; loaders MUST validate.) + +```json +"surface": { + "supported": ["chat", "voice_chat", "avatar_2d"], + "preferred": "voice_chat", + "minimum_required": "chat", + "ui_hints": { + "disclosure_label": "I am an AI digital life of Alice.", + "color_scheme": "auto" + } +} +``` + +`ui_hints.disclosure_label` MUST be surfaced verbatim by loaders +when present — this is the legal disclosure copy. Other ui_hints are +advisory. + +--- + +## 9. Hosted API preference (decision **D5 = A**, AND-gate, issuer half) + +`hosted_api_preference` is the **issuer-side** half of the AND-gate +that determines whether a hosted API may be called. The user-side +half lives in `policy/hosted_api.json` (v0.6). + +``` +ALLOW HOSTED CALL ⇔ binding.hosted_api_preference.allowed + AND policy/hosted_api.json permits this provider/capability +``` + +Both halves MUST allow for the hosted call to fire. Either rejecting +is sufficient. + +Fields: + +| Field | Required | Notes | +|---|---|---| +| `allowed` | yes | Master issuer-side switch. False forbids ANY hosted call regardless of user policy. | +| `preferred_for` | no | Capabilities for which the issuer recommends going hosted when permitted. | +| `must_be_local_for` | no | Capabilities for which the issuer FORBIDS hosted calls even if user policy allows. Loaders MUST honour. | +| `providers_whitelist_ref` | no | Path inside `.life` (typically `policy/hosted_api.json`) declaring acceptable providers. Loaders MUST refuse providers outside the whitelist. | + +`hosted_api_preference` MAY be omitted from the binding entirely; in +that case loaders MUST treat it as if `allowed: false` (default-deny: +no hosted calls without an explicit issuer green light). + +--- + +## 10. Decisions encoded in this spec + +| # | Decision | Schema realisation | +|---|---|---| +| **D1=C** | Hybrid capability vocabulary | `capabilities.patternProperties` core enum + `x-` extension regex; everything else rejected. | +| **D2=C** | Issuer-self-decided engine strictness | `engine_entry.strict` boolean (default `true`). | +| **D3 → tier system** | Replaced by tier (#104) | `capability_binding.tier_floor` references it; full definition lives in [#104]. | +| **D4=C** | Hybrid hard_constraints keys + fail-close | `hard_constraints.patternProperties` ~30 core keys + `x-` regex; `additionalProperties: false`. | +| **D5=A** | AND-gate hosted-API decision | `hosted_api_preference` (issuer half) + `policy/hosted_api.json` (user half); spec defaults to `allowed: false` when omitted. | +| Topic 4 **D4=C** | Three-field surface shape | `supported` / `preferred` / `minimum_required`. | + +--- + +## 11. Sanity tests + +`tools/test_binding_schema.py` ships **52 cases** (4 happy-path + 48 +negative) covering every required field, every conditional, every +hybrid-vocabulary boundary, and every `additionalProperties: false` +boundary. Run via: + +```bash +python tools/test_binding_schema.py +# or as part of the full suite: +python tools/batch_validate.py +``` + +--- + +## 12. What is left out (deferred) + +- **Per-key value typing** for `hard_constraints` (e.g. type + `geo_restrictions` as `string[]`) — deferred to a future schema + version. +- **Provider resolution algorithm** (how a runtime maps engine name + + version range + strict to an installed provider) — Runtime spec + ([#105]). +- **Sandbox enforcement details** for `engine_kind` — Runtime spec + ([#105]). +- **Multi-binding support** (a single `.life` shipping multiple + bindings for different runtime profiles) — out of scope for v0.8. + +--- + +## Appendix A: Worked example + +A minimal voice-clone-only `.life`: + +```json +{ + "schema_version": "dlrs-life-binding/0.1", + "binding_version": "0.1.0", + "minimum_runtime_version": "0.1", + "capabilities": { + "voice_synthesis": { + "asset_id": "voice-master-v1", + "engine_compatibility": [ + { "name": "xtts-v2", "version_range": "^2.0.0", "strict": true, + "engine_kind": "user_installed" } + ], + "params": { "temperature": 0.7 } + }, + "chat": { + "asset_id": "persona-v1", + "engine_compatibility": [ + { "name": "ollama", "version_range": "^0.5", "strict": false } + ] + } + }, + "hard_constraints": { + "no_voice_clone_for_third_party": true, + "no_image_generation": true, + "max_concurrent_sessions": 1, + "require_disclosure_prefix": true + }, + "surface": { + "supported": ["chat", "voice_chat"], + "preferred": "voice_chat", + "minimum_required": "chat", + "ui_hints": { + "disclosure_label": "I am an AI digital life of Alice." + } + }, + "hosted_api_preference": { + "allowed": false + } +} +``` + +Loader behaviour, given this binding: + +1. Validates the JSON against the schema. (Pass.) +2. Checks `minimum_runtime_version` ≤ self. (Pass.) +3. Reads `surface.minimum_required` = `chat`; runtime supports + `chat`. (Pass.) +4. Iterates `hard_constraints`: every key is known. (Pass — fail-close + would have triggered on any unknown key.) +5. Resolves `voice_synthesis` → `voice-master-v1` → tries `xtts-v2 ^2.0.0` + (strict). If installed, binds. Else fails (no fallback declared). +6. Resolves `chat` → `persona-v1` → tries `ollama ^0.5` (loose). If + installed at any version, attempts to bind via the loose interface. +7. `hosted_api_preference.allowed: false` → all hosted calls + rejected regardless of user policy. +8. `surface.preferred = voice_chat`; runtime renders voice chat with + the disclosure label prefixed to every utterance. + +--- + +[#106]: https://github.com/Digital-Life-Repository-Standard/DLRS/issues/106 +[#103]: https://github.com/Digital-Life-Repository-Standard/DLRS/issues/103 +[#104]: https://github.com/Digital-Life-Repository-Standard/DLRS/issues/104 +[#105]: https://github.com/Digital-Life-Repository-Standard/DLRS/issues/105 diff --git a/schemas/binding.schema.json b/schemas/binding.schema.json new file mode 100644 index 0000000..5950eb0 --- /dev/null +++ b/schemas/binding.schema.json @@ -0,0 +1,232 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://dlrs.standard/schemas/binding.schema.json", + "title": "DLRS .life Runtime Binding", + "description": "Runtime binding for a `.life` package (v0.8 asset architecture epic, sub-issue #103). Maps each declared capability to the asset that fulfils it, the engines that may host it, and the params runtimes feed in. Also carries hard constraints (machine-checkable issuer prohibitions), surface (what user-facing modes the package supports), orchestration (LLM and context shape), and hosted-API preference (one half of the AND-gate that gives the actual decision; the other half is the user's `policy/hosted_api.json`). Lives at `binding/runtime_binding.json` inside the `.life` zip. See docs/LIFE_BINDING_SPEC.md for the normative text.", + "type": "object", + "required": [ + "schema_version", + "binding_version", + "minimum_runtime_version", + "capabilities", + "hard_constraints", + "surface" + ], + "properties": { + "schema_version": { + "type": "string", + "description": "Version of this schema. Bumped on any breaking change. Loaders MUST refuse binding documents with an unknown schema_version.", + "const": "dlrs-life-binding/0.1" + }, + "binding_version": { + "type": "string", + "description": "Issuer-controlled binding revision. SemVer; bumped when the issuer changes any field in this document. Lets loaders cache provider resolution by `(package_sha256, binding_version)`.", + "pattern": "^[0-9]+\\.[0-9]+\\.[0-9]+(?:-[A-Za-z0-9.-]+)?$" + }, + "minimum_runtime_version": { + "type": "string", + "description": "Lowest `dlrs-runtime` SemVer that the issuer asserts is sufficient to honour every field in this binding. Loaders running an older runtime MUST refuse to load.", + "pattern": "^[0-9]+\\.[0-9]+(?:\\.[0-9]+)?$" + }, + "capabilities": { + "type": "object", + "description": "Map from capability name to its binding. Capability names are drawn from the hybrid vocabulary (decision D1=C): ~20 fixed core enum members plus `x-`-prefixed extensions. Unknown names without the `x-` prefix MUST be rejected (the schema enforces this via `patternProperties`).", + "patternProperties": { + "^(voice_synthesis|voice_recognition|memory_recall|persona|knowledge_qa|image_recall|video_recall|chat|agent_tool_use|planning|emotion_synthesis|prosody_control|memorial_voice|text_persona|image_avatar|video_avatar|interactive_chat|moderation|disclosure_renderer|context_summary)$": { "$ref": "#/$defs/capability_binding" }, + "^x-[a-z][a-z0-9_-]*$": { "$ref": "#/$defs/capability_binding" } + }, + "additionalProperties": false, + "minProperties": 1 + }, + "orchestration": { + "$ref": "#/$defs/orchestration" + }, + "hard_constraints": { + "$ref": "#/$defs/hard_constraints" + }, + "surface": { + "$ref": "#/$defs/surface" + }, + "hosted_api_preference": { + "$ref": "#/$defs/hosted_api_preference" + } + }, + "additionalProperties": false, + "$defs": { + "capability_binding": { + "type": "object", + "description": "Binds a single capability to the asset that fulfils it plus the engines that may host that asset. Decision D2=C: each engine entry decides its own version-match strictness via `strict: true | false`.", + "required": ["asset_id", "engine_compatibility"], + "properties": { + "asset_id": { + "type": "string", + "description": "MUST refer to an asset declared in `manifest/manifest.json` and (post-v0.8) MUST have a sibling `genesis/.genesis.json`. Loaders MUST refuse a binding pointing at an unknown asset_id.", + "pattern": "^[a-z][a-z0-9_-]{2,127}$" + }, + "engine_compatibility": { + "type": "array", + "description": "Ordered list of engines the issuer asserts can host this asset. Loaders MUST iterate in declared order; the first installed engine matching the entry's version range and strictness wins. Empty arrays are forbidden — every capability MUST list at least one engine.", + "items": { "$ref": "#/$defs/engine_entry" }, + "minItems": 1 + }, + "params": { + "type": "object", + "description": "Free-form per-capability parameters fed to the chosen engine (e.g. voice synthesis temperature, memory recall top-k). Schemas for individual capabilities live in their respective specs; this binding only declares the values.", + "additionalProperties": true + }, + "fallback_capability": { + "type": "string", + "description": "Optional pointer to a less-strict capability the loader MAY use if no engine satisfies any `engine_compatibility` entry. The fallback target MUST itself be declared in `capabilities`. Decision D2 (loose-mode opt-in): only fires when at least one entry has `strict: false`." + }, + "tier_floor": { + "type": "string", + "description": "Optional minimum tier (Schema D Cosmic Evolution roman numeral, e.g. `VII`) below which the loader SHOULD warn before binding. Tier definitions land via #104.", + "pattern": "^(I|II|III|IV|V|VI|VII|VIII|IX|X|XI|XII)$" + } + }, + "additionalProperties": false + }, + "engine_entry": { + "type": "object", + "description": "A single engine candidate. Decision D2=C: `strict` is issuer-self-decided. `strict: true` (default) means the loader MUST honour the SemVer range exactly. `strict: false` opts into looser matching: the loader MAY accept any engine that exposes the same capability interface, even at a different SemVer range.", + "required": ["name", "version_range"], + "properties": { + "name": { + "type": "string", + "description": "Engine identifier. Free-form but stable (the same engine MUST always use the same name). Examples: `xtts-v2`, `tortoise-tts`, `whisper`, `qdrant-local`, `ollama-llama3`.", + "minLength": 1 + }, + "version_range": { + "type": "string", + "description": "SemVer range expression. Examples: `^2.0.0`, `>=1.5 <2`, `=1.2.3`. Loaders MUST parse with a SemVer-compliant matcher.", + "minLength": 1 + }, + "strict": { + "type": "boolean", + "description": "When true (default), the loader MUST honour `version_range` exactly. When false, the loader MAY accept compatible interfaces outside the range; the issuer accepts the looser match.", + "default": true + }, + "engine_kind": { + "type": "string", + "description": "Optional classification used by sandboxing decisions (#105 / Topic 4 D1). `built_in` = ships with the runtime; `user_installed` = installed by the user (OS package, lifectl); `bundled_in_life` = vendored inside the `.life` zip. Loaders MUST refuse `bundled_in_life` engines in v0.8 (decision Topic 4 D2=B); whitelisted issuer support is deferred to v1.0+.", + "enum": ["built_in", "user_installed", "bundled_in_life"] + } + }, + "additionalProperties": false + }, + "orchestration": { + "type": "object", + "description": "How the runtime orchestrates the underlying LLM and context window. Optional but RECOMMENDED for any package with `chat` or `interactive_chat` capability.", + "properties": { + "default_llm": { + "type": "object", + "description": "Issuer's preferred LLM for orchestration. Loaders MAY substitute another LLM that satisfies `minimum_llm_capabilities` if the named one is unavailable.", + "required": ["name"], + "properties": { + "name": { "type": "string", "minLength": 1 }, + "version_range": { "type": "string", "minLength": 1 } + }, + "additionalProperties": false + }, + "minimum_llm_capabilities": { + "type": "array", + "description": "Capabilities any substitute LLM MUST expose. Loaders MUST refuse to bind an LLM lacking any of these.", + "items": { + "type": "string", + "enum": ["chat", "function_calling", "structured_output", "tool_use", "vision_input", "audio_input", "long_context"] + }, + "uniqueItems": true, + "minItems": 1 + }, + "context_strategy": { + "type": "string", + "description": "How conversation history is shaped. `rolling_window` keeps the most recent N tokens. `summarised` collapses older history into a recursive summary. `hybrid` keeps a rolling window plus a top-K memory recall.", + "enum": ["rolling_window", "summarised", "hybrid"] + }, + "max_context_tokens": { + "type": "integer", + "description": "Issuer's hard ceiling on total context tokens per turn. Loaders MUST NOT exceed it even if the underlying LLM permits more.", + "minimum": 256 + } + }, + "additionalProperties": false + }, + "hard_constraints": { + "type": "object", + "description": "Machine-checkable prohibitions. Decision D4=C: hybrid keyspace — known core keys (~30) plus `x-`-prefixed custom keys. Loaders MUST fail-close on any unrecognised constraint key (never silently ignore an issuer's prohibition); the schema rejects unknown non-`x-` keys statically. The structure-level rule is encoded via `patternProperties` + `additionalProperties: false`.", + "patternProperties": { + "^(no_image_generation|no_video_generation|no_voice_clone_for_third_party|no_political_advocacy|no_religious_advocacy|no_unattributed_quotes|no_medical_advice|no_legal_advice|no_financial_advice|no_explicit_sexual|no_self_harm_methods|no_minors_likeness|no_deceased_likeness_outside_memorial|max_memory_horizon_days|max_concurrent_sessions|max_session_duration_minutes|max_messages_per_session|max_tokens_per_response|geo_restrictions|disallow_offline|disallow_cloud|disallow_screen_recording|disallow_export|disallow_copy|require_disclosure_prefix|require_watermark|require_user_age_attestation|require_human_in_the_loop|require_audit_emit|require_consent_recheck_every_minutes|forbidden_third_party_use)$": {}, + "^x-[a-z][a-z0-9_-]*$": {} + }, + "additionalProperties": false + }, + "surface": { + "type": "object", + "description": "User-facing modes the issuer asserts the package supports. Three fields per decision Topic 4 D4=C: `supported[]` lists every mode the runtime MAY pick, `preferred` recommends the default, `minimum_required` is the floor below which the loader MUST refuse to bind.", + "required": ["supported", "preferred", "minimum_required"], + "properties": { + "supported": { + "type": "array", + "description": "Every surface mode the issuer asserts is renderable. The runtime MUST pick from this set.", + "items": { "$ref": "#/$defs/surface_mode" }, + "minItems": 1, + "uniqueItems": true + }, + "preferred": { + "$ref": "#/$defs/surface_mode", + "description": "Issuer's preferred mode. The loader SHOULD honour it when its capabilities allow; otherwise the loader MAY downgrade to any other mode in `supported` that is at or above `minimum_required`." + }, + "minimum_required": { + "$ref": "#/$defs/surface_mode", + "description": "Floor mode. Loaders unable to render this mode MUST refuse to bind." + }, + "ui_hints": { + "type": "object", + "description": "Optional rendering hints. Free-form, but loaders MUST surface `disclosure_label` verbatim if present (legal disclosure copy).", + "properties": { + "disclosure_label": { "type": "string", "minLength": 1, "maxLength": 1024 }, + "color_scheme": { "type": "string", "enum": ["light", "dark", "auto"] }, + "avatar_image_ref": { "type": "string", "minLength": 1 }, + "background_audio_ref": { "type": "string", "minLength": 1 } + }, + "additionalProperties": false + } + }, + "additionalProperties": false + }, + "surface_mode": { + "type": "string", + "enum": ["text_only", "chat", "voice_chat", "avatar_2d", "avatar_3d", "vr"], + "description": "Ordered low → high in capability requirements: text_only < chat < voice_chat < avatar_2d < avatar_3d < vr. The schema does not enforce ordering between `preferred` / `minimum_required` (the runtime applies it at bind time)." + }, + "hosted_api_preference": { + "type": "object", + "description": "Issuer half of the AND-gate (decision D5=A). Actual hosted-API decision = `binding.hosted_api_preference.allowed` AND `policy/hosted_api.json` (loader-side opt-in). Both halves MUST allow for the hosted call to fire.", + "required": ["allowed"], + "properties": { + "allowed": { + "type": "boolean", + "description": "Issuer-side switch. False forbids ANY hosted-API call regardless of the loader's policy. True permits hosted calls, subject to the user's policy." + }, + "preferred_for": { + "type": "array", + "description": "Capability names for which the issuer recommends going to a hosted API when permitted (e.g. high-quality voice synthesis). Loaders MAY respect or ignore.", + "items": { "type": "string", "minLength": 1 }, + "uniqueItems": true + }, + "must_be_local_for": { + "type": "array", + "description": "Capability names for which the issuer FORBIDS hosted calls even if the loader's policy allows them. Loaders MUST honour this list.", + "items": { "type": "string", "minLength": 1 }, + "uniqueItems": true + }, + "providers_whitelist_ref": { + "type": "string", + "description": "Optional path inside `.life` (typically `policy/hosted_api.json`) referencing the issuer's whitelist of acceptable hosted providers. Loaders MUST refuse providers outside the whitelist when present.", + "minLength": 1 + } + }, + "additionalProperties": false + } + } +} diff --git a/tools/batch_validate.py b/tools/batch_validate.py index e7a57c6..dd940e9 100644 --- a/tools/batch_validate.py +++ b/tools/batch_validate.py @@ -38,6 +38,7 @@ ("test_entity_graph_schema", [sys.executable, str(TOOLS / "test_entity_graph_schema.py")]), ("test_life_package_schema", [sys.executable, str(TOOLS / "test_life_package_schema.py")]), ("test_genesis_schema", [sys.executable, str(TOOLS / "test_genesis_schema.py")]), + ("test_binding_schema", [sys.executable, str(TOOLS / "test_binding_schema.py")]), # The 'pipelines' step calls tools/test_pipelines.py, which itself # dispatches every per-pipeline test plus the v0.6 cross-cutting # tests transitively. The cross-cutting tests are also listed diff --git a/tools/test_binding_schema.py b/tools/test_binding_schema.py new file mode 100644 index 0000000..faebf4f --- /dev/null +++ b/tools/test_binding_schema.py @@ -0,0 +1,281 @@ +#!/usr/bin/env python3 +"""Sanity tests for ``schemas/binding.schema.json`` (v0.8 sub-issue #103). + +Exercises the v0.8 `dlrs-life-binding/0.1` shape: + +* every required top-level field +* hybrid capability-name vocabulary (core enum + ``x-`` extensions) +* engine_compatibility ``strict`` self-decision (decision D2=C) +* hard_constraints fail-close on unknown non-``x-`` keys (decision D4=C) +* surface three-field shape (decision Topic 4 D4=C) +* hosted_api_preference structure (decision D5=A AND-gate, issuer half) +* ``additionalProperties: false`` at every closed level + +Each case is a tuple ``(name, doc, expect_valid)``. The driver counts +mismatches and exits non-zero on any failure. +""" +from __future__ import annotations + +import json +import sys +from copy import deepcopy +from pathlib import Path + +import jsonschema + +ROOT = Path(__file__).resolve().parent.parent +SCHEMA_PATH = ROOT / "schemas" / "binding.schema.json" + + +def _good_capability() -> dict: + return { + "asset_id": "voice-master-v1", + "engine_compatibility": [ + {"name": "xtts-v2", "version_range": "^2.0.0", "strict": True, "engine_kind": "user_installed"} + ], + "params": {"temperature": 0.7}, + } + + +def _good_binding() -> dict: + return { + "schema_version": "dlrs-life-binding/0.1", + "binding_version": "0.1.0", + "minimum_runtime_version": "0.1", + "capabilities": { + "voice_synthesis": _good_capability(), + "memory_recall": { + "asset_id": "memory-atoms-v1", + "engine_compatibility": [ + {"name": "qdrant-local", "version_range": ">=1.5 <2"} + ], + }, + "x-custom_persona": { + "asset_id": "persona-v1", + "engine_compatibility": [ + {"name": "ollama", "version_range": "^0.5"} + ], + }, + }, + "orchestration": { + "default_llm": {"name": "llama3", "version_range": "^3.0"}, + "minimum_llm_capabilities": ["chat", "function_calling"], + "context_strategy": "rolling_window", + "max_context_tokens": 8000, + }, + "hard_constraints": { + "no_image_generation": True, + "no_voice_clone_for_third_party": True, + "max_memory_horizon_days": 365, + "geo_restrictions": ["CN", "EU"], + "x-tenant_only": True, + }, + "surface": { + "supported": ["chat", "voice_chat", "avatar_2d"], + "preferred": "voice_chat", + "minimum_required": "chat", + "ui_hints": { + "disclosure_label": "I am an AI digital life of Alice.", + "color_scheme": "auto", + }, + }, + "hosted_api_preference": { + "allowed": True, + "preferred_for": ["voice_synthesis"], + "must_be_local_for": ["memory_recall"], + "providers_whitelist_ref": "policy/hosted_api.json", + }, + } + + +def main() -> int: + schema = json.loads(SCHEMA_PATH.read_text(encoding="utf-8")) + validator = jsonschema.Draft202012Validator(schema) + + cases: list[tuple[str, dict, bool]] = [] + + # ----- happy paths ----- + cases.append(("good binding (full)", _good_binding(), True)) + + # minimal binding: only required fields + minimal = { + "schema_version": "dlrs-life-binding/0.1", + "binding_version": "0.1.0", + "minimum_runtime_version": "0.1", + "capabilities": {"chat": _good_capability()}, + "hard_constraints": {}, + "surface": {"supported": ["chat"], "preferred": "chat", "minimum_required": "chat"}, + } + cases.append(("minimal binding", minimal, True)) + + # x- capability only (legal extension namespace) + g = _good_binding(); g["capabilities"] = {"x-custom_only": _good_capability()} + cases.append(("x-only capability set", g, True)) + + # engine entry without strict (default true) is fine + g = _good_binding() + g["capabilities"]["voice_synthesis"]["engine_compatibility"][0].pop("strict") + cases.append(("engine without explicit strict (default true)", g, True)) + + # ----- top-level required missing ----- + for missing in ("schema_version", "binding_version", "minimum_runtime_version", "capabilities", "hard_constraints", "surface"): + g = _good_binding(); g.pop(missing) + cases.append((f"missing top-level {missing}", g, False)) + + # wrong schema_version + g = _good_binding(); g["schema_version"] = "dlrs-life-binding/0.2" + cases.append(("schema_version wrong", g, False)) + + # binding_version not semver + g = _good_binding(); g["binding_version"] = "v1" + cases.append(("binding_version not semver", g, False)) + + # minimum_runtime_version bad shape + g = _good_binding(); g["minimum_runtime_version"] = "latest" + cases.append(("minimum_runtime_version not semver", g, False)) + + # ----- capabilities ----- + g = _good_binding(); g["capabilities"] = {} + cases.append(("capabilities empty", g, False)) + + # unknown core capability without x- prefix -> reject (D1=C hybrid) + g = _good_binding(); g["capabilities"]["mind_reading"] = _good_capability() + cases.append(("unknown non-x capability rejected", g, False)) + + # x- prefix with disallowed chars + g = _good_binding(); g["capabilities"]["x-Bad_Name"] = _good_capability() + cases.append(("x- capability uppercase rejected", g, False)) + + # capability missing asset_id + g = _good_binding(); g["capabilities"]["voice_synthesis"].pop("asset_id") + cases.append(("capability missing asset_id", g, False)) + + # asset_id pattern violation + g = _good_binding(); g["capabilities"]["voice_synthesis"]["asset_id"] = "Voice" + cases.append(("asset_id uppercase rejected", g, False)) + + # capability missing engine_compatibility + g = _good_binding(); g["capabilities"]["voice_synthesis"].pop("engine_compatibility") + cases.append(("capability missing engine_compatibility", g, False)) + + # engine_compatibility empty -> reject + g = _good_binding(); g["capabilities"]["voice_synthesis"]["engine_compatibility"] = [] + cases.append(("engine_compatibility empty", g, False)) + + # engine entry missing required + g = _good_binding(); g["capabilities"]["voice_synthesis"]["engine_compatibility"][0].pop("name") + cases.append(("engine entry missing name", g, False)) + g = _good_binding(); g["capabilities"]["voice_synthesis"]["engine_compatibility"][0].pop("version_range") + cases.append(("engine entry missing version_range", g, False)) + + # engine_kind off-enum + g = _good_binding(); g["capabilities"]["voice_synthesis"]["engine_compatibility"][0]["engine_kind"] = "magic" + cases.append(("engine_kind off-enum", g, False)) + + # capability additionalProperties=false + g = _good_binding(); g["capabilities"]["voice_synthesis"]["random"] = 1 + cases.append(("capability unknown field", g, False)) + + # tier_floor pattern + g = _good_binding(); g["capabilities"]["voice_synthesis"]["tier_floor"] = "VII" + cases.append(("tier_floor VII (valid roman)", g, True)) + g = _good_binding(); g["capabilities"]["voice_synthesis"]["tier_floor"] = "13" + cases.append(("tier_floor not roman", g, False)) + + # ----- orchestration ----- + g = _good_binding(); g["orchestration"]["context_strategy"] = "magic" + cases.append(("orchestration.context_strategy off-enum", g, False)) + + g = _good_binding(); g["orchestration"]["minimum_llm_capabilities"] = [] + cases.append(("orchestration.minimum_llm_capabilities empty", g, False)) + + g = _good_binding(); g["orchestration"]["minimum_llm_capabilities"] = ["chat", "chat"] + cases.append(("orchestration.minimum_llm_capabilities duplicates", g, False)) + + g = _good_binding(); g["orchestration"]["max_context_tokens"] = 100 + cases.append(("orchestration.max_context_tokens below floor", g, False)) + + g = _good_binding(); g["orchestration"]["unknown"] = 1 + cases.append(("orchestration unknown field", g, False)) + + # ----- hard_constraints (decision D4=C fail-close) ----- + g = _good_binding(); g["hard_constraints"]["no_image_generation"] = False + cases.append(("hard_constraints known key with explicit false (allowed)", g, True)) + + g = _good_binding(); g["hard_constraints"]["totally_unknown_key"] = True + cases.append(("hard_constraints unknown non-x key REJECTED (fail-close)", g, False)) + + g = _good_binding(); g["hard_constraints"]["x-anything"] = "any value" + cases.append(("hard_constraints x-prefixed unknown key allowed", g, True)) + + g = _good_binding(); g["hard_constraints"]["x-Bad"] = True + cases.append(("hard_constraints x- with uppercase rejected", g, False)) + + # ----- surface (Topic 4 D4=C three fields) ----- + for field in ("supported", "preferred", "minimum_required"): + g = _good_binding(); g["surface"].pop(field) + cases.append((f"surface missing {field}", g, False)) + + g = _good_binding(); g["surface"]["supported"] = [] + cases.append(("surface.supported empty", g, False)) + + g = _good_binding(); g["surface"]["supported"] = ["chat", "chat"] + cases.append(("surface.supported duplicates", g, False)) + + g = _good_binding(); g["surface"]["preferred"] = "telepathy" + cases.append(("surface.preferred off-enum", g, False)) + + g = _good_binding(); g["surface"]["minimum_required"] = "telepathy" + cases.append(("surface.minimum_required off-enum", g, False)) + + g = _good_binding(); g["surface"]["ui_hints"]["color_scheme"] = "rainbow" + cases.append(("surface.ui_hints.color_scheme off-enum", g, False)) + + g = _good_binding(); g["surface"]["ui_hints"]["unknown"] = 1 + cases.append(("surface.ui_hints unknown field", g, False)) + + g = _good_binding(); g["surface"]["unknown"] = 1 + cases.append(("surface unknown field", g, False)) + + # ----- hosted_api_preference (decision D5=A) ----- + g = _good_binding(); g["hosted_api_preference"].pop("allowed") + cases.append(("hosted_api_preference missing allowed", g, False)) + + g = _good_binding(); g["hosted_api_preference"]["preferred_for"] = ["a", "a"] + cases.append(("hosted_api_preference.preferred_for duplicates", g, False)) + + g = _good_binding(); g["hosted_api_preference"]["must_be_local_for"] = [] + cases.append(("hosted_api_preference.must_be_local_for empty array (uniqueItems-only constraint)", g, True)) + + g = _good_binding(); g["hosted_api_preference"]["providers_whitelist_ref"] = "" + cases.append(("hosted_api_preference.providers_whitelist_ref empty", g, False)) + + g = _good_binding(); g["hosted_api_preference"]["unknown"] = 1 + cases.append(("hosted_api_preference unknown field", g, False)) + + # allowed:false binding (no preferred_for is fine without it) + g = _good_binding() + g["hosted_api_preference"] = {"allowed": False, "must_be_local_for": ["voice_synthesis"]} + cases.append(("hosted_api_preference allowed=false (issuer-side ban)", g, True)) + + # ----- top-level additionalProperties ----- + g = _good_binding(); g["unknown_top_level"] = 1 + cases.append(("unknown top-level field", g, False)) + + failures = 0 + for name, doc, expect_valid in cases: + errors = list(validator.iter_errors(doc)) + ok = len(errors) == 0 + if ok != expect_valid: + failures += 1 + print(f"FAIL {name} (got valid={ok}, expected valid={expect_valid})") + for err in errors[:3]: + print(f" - {err.message} at {list(err.path)}") + else: + print(f"OK {name}") + print(f"\nrun: {len(cases)} cases, failures: {failures}") + return 1 if failures else 0 + + +if __name__ == "__main__": + sys.exit(main()) From e01df2f2c01f37ed9493869e60be3bddcd2254d1 Mon Sep 17 00:00:00 2001 From: BELLO SHEHU <1739677116@qq.com> Date: Sun, 26 Apr 2026 15:53:53 +0000 Subject: [PATCH 2/3] fix(#103 review): jsonschema import try/except + CHANGELOG breakdown 9/43 Two findings from Devin Review on PR #111: 1. (yellow) tools/test_binding_schema.py imported jsonschema at module level and called jsonschema.Draft202012Validator directly, breaking the convention used by every other test_*_schema.py in the repo (try/except ImportError around the import + clean exit code 2). Restructured to follow the pattern: from jsonschema import Draft202012Validator inside main() with try/except, return 2 on missing dep. Also removed unused 'from copy import deepcopy' import. 2. (yellow) CHANGELOG entry breakdown was '52 sanity-test cases (4 happy-path + 48 negative)' but the test file actually has 9 happy-path + 43 negative (the additional 5 happy paths are tier_floor VII, hard_constraints known-key=false, hard_constraints x-prefixed-unknown allowed, hosted_api_preference.must_be_local_for empty array, hosted_api_preference allowed=false). Total 52 unchanged; breakdown corrected to (9 + 43). Co-Authored-By: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- CHANGELOG.md | 4 ++-- tools/test_binding_schema.py | 11 +++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e9ce63..fd3c7e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -76,8 +76,8 @@ Sub-issues #100–#105. hybrid keyspace; `additionalProperties: false` makes unknown non-`x-` keys reject statically (decision D4=C fail-close at schema layer). [#103] -- `tools/test_binding_schema.py` — 52 sanity-test cases (4 happy-path - + 48 negative) wired into `tools/batch_validate.py`. [#103] +- `tools/test_binding_schema.py` — 52 sanity-test cases (9 happy-path + + 43 negative) wired into `tools/batch_validate.py`. [#103] [#101]: https://github.com/Digital-Life-Repository-Standard/DLRS/issues/101 [#102]: https://github.com/Digital-Life-Repository-Standard/DLRS/issues/102 diff --git a/tools/test_binding_schema.py b/tools/test_binding_schema.py index faebf4f..cf37db7 100644 --- a/tools/test_binding_schema.py +++ b/tools/test_binding_schema.py @@ -18,11 +18,8 @@ import json import sys -from copy import deepcopy from pathlib import Path -import jsonschema - ROOT = Path(__file__).resolve().parent.parent SCHEMA_PATH = ROOT / "schemas" / "binding.schema.json" @@ -89,8 +86,14 @@ def _good_binding() -> dict: def main() -> int: + try: + from jsonschema import Draft202012Validator + except ImportError: + print("ERROR: jsonschema not installed; run: pip install -r tools/requirements.txt") + return 2 + schema = json.loads(SCHEMA_PATH.read_text(encoding="utf-8")) - validator = jsonschema.Draft202012Validator(schema) + validator = Draft202012Validator(schema) cases: list[tuple[str, dict, bool]] = [] From 3e788277b86df87b5c06cf4e3e36efacf912b3a5 Mon Sep 17 00:00:00 2001 From: BELLO SHEHU <1739677116@qq.com> Date: Sun, 26 Apr 2026 16:02:20 +0000 Subject: [PATCH 3/3] address PR #111 review: providers_whitelist_ref path-traversal + spec count - schemas/binding.schema.json: add ^(?!/)(?!.*\.\.).+$ pattern on providers_whitelist_ref to match life-package contents[].path and lifecycle.mutation_log_ref. Defence in depth at schema layer. - tools/test_binding_schema.py: 3 new negatives (absolute path, ../etc traversal, embedded ..). Total 52 -> 55 (9 + 46). - docs/LIFE_BINDING_SPEC.md: fix sanity-test count from 4+48 to 9+46 (was 9+43; +3 from this fix). - CHANGELOG: bump to 55 with note on the bump source. Co-Authored-By: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++-- docs/LIFE_BINDING_SPEC.md | 2 +- registry/index.html | 2 +- schemas/binding.schema.json | 5 +++-- tools/test_binding_schema.py | 12 ++++++++++++ 5 files changed, 22 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 18ef5ea..58dbc98 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -79,8 +79,11 @@ Sub-issues #100–#105. hybrid keyspace; `additionalProperties: false` makes unknown non-`x-` keys reject statically (decision D4=C fail-close at schema layer). [#103] -- `tools/test_binding_schema.py` — 52 sanity-test cases (9 happy-path - + 43 negative) wired into `tools/batch_validate.py`. [#103] +- `tools/test_binding_schema.py` — 55 sanity-test cases (9 happy-path + + 46 negative) wired into `tools/batch_validate.py`. The 55 includes + three new negatives covering `providers_whitelist_ref` path-traversal + rejection (absolute paths and `..` segments), tightening the + schema-layer defence in depth introduced after PR #111 review. [#103] [#101]: https://github.com/Digital-Life-Repository-Standard/DLRS/issues/101 [#102]: https://github.com/Digital-Life-Repository-Standard/DLRS/issues/102 diff --git a/docs/LIFE_BINDING_SPEC.md b/docs/LIFE_BINDING_SPEC.md index b7661dd..fd89d41 100644 --- a/docs/LIFE_BINDING_SPEC.md +++ b/docs/LIFE_BINDING_SPEC.md @@ -341,7 +341,7 @@ no hosted calls without an explicit issuer green light). ## 11. Sanity tests -`tools/test_binding_schema.py` ships **52 cases** (4 happy-path + 48 +`tools/test_binding_schema.py` ships **55 cases** (9 happy-path + 46 negative) covering every required field, every conditional, every hybrid-vocabulary boundary, and every `additionalProperties: false` boundary. Run via: diff --git a/registry/index.html b/registry/index.html index 935adf3..7cf0e12 100644 --- a/registry/index.html +++ b/registry/index.html @@ -45,7 +45,7 @@

DLRS public registry

diff --git a/schemas/binding.schema.json b/schemas/binding.schema.json index 5950eb0..786b7e9 100644 --- a/schemas/binding.schema.json +++ b/schemas/binding.schema.json @@ -222,8 +222,9 @@ }, "providers_whitelist_ref": { "type": "string", - "description": "Optional path inside `.life` (typically `policy/hosted_api.json`) referencing the issuer's whitelist of acceptable hosted providers. Loaders MUST refuse providers outside the whitelist when present.", - "minLength": 1 + "description": "Optional path inside `.life` (typically `policy/hosted_api.json`) referencing the issuer's whitelist of acceptable hosted providers. Loaders MUST refuse providers outside the whitelist when present. Path is relative to the `.life` root; absolute paths and `..` segments are rejected at the schema layer (cross-schema convention with `life-package.schema.json::contents[].path` and `lifecycle.schema.json::mutation_log_ref`).", + "minLength": 1, + "pattern": "^(?!/)(?!.*\\.\\.).+$" } }, "additionalProperties": false diff --git a/tools/test_binding_schema.py b/tools/test_binding_schema.py index cf37db7..ca4192e 100644 --- a/tools/test_binding_schema.py +++ b/tools/test_binding_schema.py @@ -253,6 +253,18 @@ def main() -> int: g = _good_binding(); g["hosted_api_preference"]["providers_whitelist_ref"] = "" cases.append(("hosted_api_preference.providers_whitelist_ref empty", g, False)) + # Cross-schema convention: path-inside-.life fields reject absolute paths + # and `..` segments at the schema layer (matches life-package.schema.json + # contents[].path and lifecycle.schema.json mutation_log_ref). + g = _good_binding(); g["hosted_api_preference"]["providers_whitelist_ref"] = "/etc/passwd" + cases.append(("hosted_api_preference.providers_whitelist_ref absolute path", g, False)) + + g = _good_binding(); g["hosted_api_preference"]["providers_whitelist_ref"] = "../etc/passwd" + cases.append(("hosted_api_preference.providers_whitelist_ref parent-dir traversal", g, False)) + + g = _good_binding(); g["hosted_api_preference"]["providers_whitelist_ref"] = "policy/../etc/passwd" + cases.append(("hosted_api_preference.providers_whitelist_ref embedded `..` segment", g, False)) + g = _good_binding(); g["hosted_api_preference"]["unknown"] = 1 cases.append(("hosted_api_preference unknown field", g, False))