diff --git a/README.md b/README.md index 0428203b..61f8e976 100644 --- a/README.md +++ b/README.md @@ -128,7 +128,7 @@ For verification: ```bash [...]$ model_signing verify bert-base-uncased \ - --signature model.sig \ + --signature claims.jsonl \ --trust_config client_trust_config.json --identity "$identity" --identity_provider "$oidc_provider" @@ -160,7 +160,7 @@ All signing methods support changing the signature name and location via the `--signature` flag: ```bash -[...]$ model_signing sign bert-base-uncased --signature model.sig +[...]$ model_signing sign bert-base-uncased --signature claims.jsonl ``` Consult the help for a list of all flags (`model_signing --help`, or directly @@ -171,7 +171,7 @@ model we use ```bash [...]$ model_signing verify bert-base-uncased \ - --signature model.sig \ + --signature claims.jsonl \ --identity "$identity" \ --identity_provider "$oidc_provider" ``` @@ -234,7 +234,7 @@ With a PKCS #11 URI describing the private key, we can use the following for signing: ```bash -[...]$ model_signing sign pkcs11-key --signature model.sig \ +[...]$ model_signing sign pkcs11-key --signature claims.jsonl \ --pkcs11_uri "pkcs11:..." /path/to/your/model ``` @@ -242,7 +242,7 @@ For signature verification it is necessary to retrieve the public key from the PKCS #11 device and store it in a file in PEM format. With can then use: ```bash -[...]$ model_signing verify key --signature model.sig\ +[...]$ model_signing verify key --signature claims.jsonl\ --public_key key.pub /path/to/your/model ``` @@ -333,7 +333,7 @@ The simplest way to generate a signature using Sigstore is: ```python import model_signing -model_signing.signing.sign("bert-base-uncased", "model.sig") +model_signing.signing.sign("bert-base-uncased", "claims.jsonl") ``` This will run the same OIDC flow as when signing with Sigstore from the CLI. diff --git a/docs/demo.ipynb b/docs/demo.ipynb index 9167d14f..b9d852ac 100644 --- a/docs/demo.ipynb +++ b/docs/demo.ipynb @@ -541,7 +541,7 @@ "id": "L2zQrDPnBDcu" }, "source": [ - "By default, the signature is in `model.sig`. First, we can look at its size:" + "By default, the signature is in `claims.jsonl`. First, we can look at its size:" ] }, { @@ -559,12 +559,12 @@ "output_type": "stream", "name": "stdout", "text": [ - "-rw-r--r-- 1 root root 11345 Oct 10 18:00 model.sig\n" + "-rw-r--r-- 1 root root 11345 Oct 10 18:00 claims.jsonl\n" ] } ], "source": [ - "!ls -l model.sig" + "!ls -l claims.jsonl" ] }, { @@ -597,7 +597,7 @@ } ], "source": [ - "!model_signing verify bert-base-uncased --signature model.sig --identity \"$identity\" --identity_provider \"$oidc_provider\"" + "!model_signing verify bert-base-uncased --signature claims.jsonl --identity \"$identity\" --identity_provider \"$oidc_provider\"" ] }, { @@ -785,7 +785,7 @@ } ], "source": [ - "!model_signing verify resnet-50 --signature model.sig --identity \"$identity\" --identity_provider \"$oidc_provider\"" + "!model_signing verify resnet-50 --signature claims.jsonl --identity \"$identity\" --identity_provider \"$oidc_provider\"" ] }, { @@ -818,7 +818,7 @@ } ], "source": [ - "!model_signing verify bert-base-uncased --signature model.sig --identity \"FAKE_IDENTITY\" --identity_provider \"$oidc_provider\"" + "!model_signing verify bert-base-uncased --signature claims.jsonl --identity \"FAKE_IDENTITY\" --identity_provider \"$oidc_provider\"" ] }, { @@ -853,7 +853,7 @@ } ], "source": [ - "!model_signing verify bert-base-uncased --signature model.sig --identity \"$identity\" --identity_provider \"FAKE_PROVIDER\"" + "!model_signing verify bert-base-uncased --signature claims.jsonl --identity \"$identity\" --identity_provider \"FAKE_PROVIDER\"" ] }, { diff --git a/docs/model_signing_format.md b/docs/model_signing_format.md index f0da38c0..3ec7959d 100644 --- a/docs/model_signing_format.md +++ b/docs/model_signing_format.md @@ -71,7 +71,7 @@ transparency log. Below is an example of the Sigstore bundle showing each of the layers described above. ```bash -$ cat model.sig | jq . +$ cat claims.jsonl | jq . { "mediaType": "application/vnd.dev.sigstore.bundle.v0.3+json", "verificationMaterial": { @@ -127,7 +127,7 @@ $ cat model.sig | jq . } } -$ cat model.sig | jq .dsseEnvelope.payload -r | base64 -d | jq . +$ cat claims.jsonl | jq .dsseEnvelope.payload -r | base64 -d | jq . { "_type": "https://in-toto.io/Statement/v1", "subject": [ diff --git a/src/model_signing/_cli.py b/src/model_signing/_cli.py index 2d90f135..243b021a 100644 --- a/src/model_signing/_cli.py +++ b/src/model_signing/_cli.py @@ -53,8 +53,11 @@ def set_attribute(self, key, value): "--signature", type=pathlib.Path, metavar="SIGNATURE_PATH", - default=pathlib.Path("model.sig"), - help="Location of the signature file to generate. Defaults to `model.sig`.", + default=pathlib.Path("claims.jsonl"), + help=( + "Location of the signature file to generate. " + "Defaults to `claims.jsonl`." + ), ) diff --git a/src/model_signing/_signing/sign_sigstore.py b/src/model_signing/_signing/sign_sigstore.py index 080f98ab..bb336841 100644 --- a/src/model_signing/_signing/sign_sigstore.py +++ b/src/model_signing/_signing/sign_sigstore.py @@ -51,13 +51,24 @@ def __init__(self, bundle: sigstore_models.Bundle): @override def write(self, path: pathlib.Path) -> None: - path.write_text(self.bundle.to_json(), encoding="utf-8") + # Convert to compact JSON (single line) for JSONL format + # by removing newlines from the bundle's JSON output + bundle_json = self.bundle.to_json().replace("\n", "") + + # Append to file if it exists (for accumulating attestations) + # Otherwise create new file + mode = "a" if path.exists() else "w" + with path.open(mode, encoding="utf-8") as f: + f.write(bundle_json + "\n") @classmethod @override def read(cls, path: pathlib.Path) -> Self: content = path.read_text(encoding="utf-8") - return cls(sigstore_models.Bundle.from_json(content)) + # Handle JSONL format: read the last line (most recent attestation) + lines = content.strip().split("\n") + last_line = lines[-1] + return cls(sigstore_models.Bundle.from_json(last_line)) class Signer(signing.Signer): diff --git a/src/model_signing/_signing/sign_sigstore_pb.py b/src/model_signing/_signing/sign_sigstore_pb.py index 5a1fcc0d..282934bf 100644 --- a/src/model_signing/_signing/sign_sigstore_pb.py +++ b/src/model_signing/_signing/sign_sigstore_pb.py @@ -105,13 +105,24 @@ def __init__(self, bundle: bundle_pb.Bundle): @override def write(self, path: pathlib.Path) -> None: - path.write_text(self.bundle.to_json(), encoding="utf-8") + # Convert to compact JSON (single line) for JSONL format + # by removing newlines from the bundle's JSON output + bundle_json = self.bundle.to_json().replace("\n", "") + + # Append to file if it exists (for accumulating attestations) + # Otherwise create new file + mode = "a" if path.exists() else "w" + with path.open(mode, encoding="utf-8") as f: + f.write(bundle_json + "\n") @classmethod @override def read(cls, path: pathlib.Path) -> Self: content = path.read_text(encoding="utf-8") - parsed_dict = json.loads(content) + # Handle JSONL format: read the last line (most recent attestation) + lines = content.strip().split("\n") + last_line = lines[-1] + parsed_dict = json.loads(last_line) # adjust parsed_dict due to previous usage of protobufs if "tlogEntries" not in parsed_dict["verificationMaterial"]: diff --git a/src/model_signing/_signing/signing.py b/src/model_signing/_signing/signing.py index 13681ac5..6a774fea 100644 --- a/src/model_signing/_signing/signing.py +++ b/src/model_signing/_signing/signing.py @@ -216,7 +216,7 @@ class Payload: "hash_type": "sha256", "allow_symlinks": true "ignore_paths": [ - "model.sig", + "claims.jsonl", ".git", ".gitattributes", ".github", diff --git a/tests/_signing/sigstore_test.py b/tests/_signing/sigstore_test.py index 68dbd26f..d628a618 100644 --- a/tests/_signing/sigstore_test.py +++ b/tests/_signing/sigstore_test.py @@ -332,3 +332,45 @@ def test_verify_not_intoto_statement( with pytest.raises(ValueError, match="Expected in-toto .* payload"): self._verify_dsse_signature(signature_path) + + def test_append_to_existing_claims_jsonl( + self, sample_model_folder, mocked_sigstore, tmp_path + ): + """Test that signing appends to existing claims.jsonl file. + + This implements the unified bundle layout from issue #587, where + attestations accumulate in a single claims.jsonl file as the model + moves through its lifecycle. + """ + serializer = file.Serializer( + self._file_hasher_factory, allow_symlinks=True + ) + manifest = serializer.serialize(sample_model_folder) + signature_path = tmp_path / "claims.jsonl" + + # First signing - should create the file + self._sign_manifest(manifest, signature_path, sigstore.Signer) + + # Verify file exists and has one line + assert signature_path.exists() + lines = signature_path.read_text(encoding="utf-8").strip().split("\n") + assert len(lines) == 1 + # Verify it's valid JSON + first_bundle = json.loads(lines[0]) + assert "_type" in first_bundle + + # Second signing - should append to the file + self._sign_manifest(manifest, signature_path, sigstore.Signer) + + # Verify file now has two lines + lines = signature_path.read_text(encoding="utf-8").strip().split("\n") + assert len(lines) == 2 + + # Verify both lines are valid JSON bundles + first_bundle = json.loads(lines[0]) + second_bundle = json.loads(lines[1]) + assert "_type" in first_bundle + assert "_type" in second_bundle + + # Both bundles should be independently valid + # (We can't fully verify with mocked sigstore, but structure is valid) diff --git a/tests/api_test.py b/tests/api_test.py index fda2857e..6a9fa90f 100644 --- a/tests/api_test.py +++ b/tests/api_test.py @@ -59,14 +59,20 @@ def populate_tmpdir(tmp_path: Path) -> Path: def get_signed_files(modelsig: Path) -> list[str]: with open(modelsig, "r") as file: - signature = json.load(file) + content = file.read().strip() + # Handle JSONL format: read last line (most recent attestation) + lines = content.split("\n") + signature = json.loads(lines[-1]) payload = json.loads(b64decode(signature["dsseEnvelope"]["payload"])) return [entry["name"] for entry in payload["predicate"]["resources"]] def get_ignore_paths(modelsig: Path) -> list[str]: with open(modelsig, "r") as file: - signature = json.load(file) + content = file.read().strip() + # Handle JSONL format: read last line (most recent attestation) + lines = content.split("\n") + signature = json.loads(lines[-1]) payload = json.loads(b64decode(signature["dsseEnvelope"]["payload"])) ignore_paths = payload["predicate"]["serialization"]["ignore_paths"] ignore_paths.sort() @@ -87,7 +93,10 @@ def check_ignore_paths( def get_model_name(modelsig: Path) -> str: with open(modelsig, "r") as file: - signature = json.load(file) + content = file.read().strip() + # Handle JSONL format: read last line (most recent attestation) + lines = content.split("\n") + signature = json.loads(lines[-1]) payload = json.loads(b64decode(signature["dsseEnvelope"]["payload"])) return payload["subject"][0]["name"]