From 94262bdbc92c5f1c6690ee84e98f4abbbd3b3da3 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Mon, 25 May 2026 09:12:11 +0500 Subject: [PATCH 01/57] test: #80: refactor e2e test to make it cleaner --- tests/c2pa/e2e_test.py | 74 +++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 29 deletions(-) diff --git a/tests/c2pa/e2e_test.py b/tests/c2pa/e2e_test.py index 1dfe9d5..b15e8b6 100644 --- a/tests/c2pa/e2e_test.py +++ b/tests/c2pa/e2e_test.py @@ -1,5 +1,4 @@ import json -import os import shutil import subprocess from pathlib import Path @@ -27,36 +26,58 @@ def get_test_file_full_path(filename: str) -> Path: path = FIXTURES_DIR / filename + if not path.exists(): raise FileNotFoundError(f"Fixture not found: {path}") + return path -def copy_test_file(source_path: str, destination_path: Path) -> None: +def copy_test_file( + source_path: str, + destination_path: Path, +) -> None: source_full_path = get_test_file_full_path(source_path) - destination_path.parent.mkdir(parents=True, exist_ok=True) - shutil.copyfile(source_full_path, destination_path) + + destination_path.parent.mkdir( + parents=True, + exist_ok=True, + ) + + shutil.copyfile( + source_full_path, + destination_path, + ) def has_c2patool() -> bool: return shutil.which("c2patool") is not None -def _c2pa_json_report(asset_path: str) -> dict: +def _validate_using_c2patool_and_return_json_report(asset_path: Path) -> dict: """ Return c2patool's JSON report. If parsing fails, raise with stdout/stderr for debugging. """ c2patool_launch_command = ["c2patool", asset_path, "-d"] - cp2atool_result = subprocess.run(c2patool_launch_command, capture_output=True, text=True) - evaluation_result = cp2atool_result + cp2atool_result = subprocess.run( + c2patool_launch_command, + # If set to False (by default), 'stdout' and 'stderr' outputs + # will not be available via '.stderr' and '.stdout', correspondingly. + capture_output=True, + # If set to False (by default), a byte stream will be + # returned instead of a string. + text=True, + ) + if cp2atool_result.returncode == 0: - return json.loads(cp2atool_result.stdout or "{}") + return json.loads(cp2atool_result.stdout) + pytest.fail( "c2patool failed or did not output JSON.\n" - f"args={evaluation_result.args if evaluation_result else None}\n" - f"stdout={evaluation_result.stdout if evaluation_result else None}\n" - f"stderr={evaluation_result.stderr if evaluation_result else None}" + f"args={cp2atool_result.args if cp2atool_result else None}\n" + f"stdout={cp2atool_result.stdout if cp2atool_result else None}\n" + f"stderr={cp2atool_result.stderr if cp2atool_result else None}" ) @@ -64,35 +85,30 @@ def _c2pa_json_report(asset_path: str) -> dict: def test_e2e_signing_with_c2patool_validation(tmp_path): if not has_c2patool(): pytest.skip("c2patool not available") + if not sign_file: pytest.skip("sign_file function not available yet") - os.environ["C2PA_BACKEND"] = "tool" - for content_type in C2PA_ContentTypes: input_file = tmp_path / f"in.{content_type.name}" output_file = tmp_path / f"out.{content_type.name}" for test_file in test_files_by_extension[content_type.name]: - copy_test_file(f"./{test_file}", input_file) + copy_test_file( + test_file, + input_file, + ) - try: - sign_file( - input_path=input_file, - output_path=output_file, - ) - except NotImplementedError: - pytest.xfail("sign_file function not implemented yet") + sign_file( + input_path=input_file, + output_path=output_file, + ) - data = _c2pa_json_report(str(output_file)) - assert "manifests" in data or "manifest" in data + report = _validate_using_c2patool_and_return_json_report(output_file) + assert "manifests" in report - manifests = data.get("manifests") + manifests = report.get("manifests") assert manifests, "no manifests in output" - if isinstance(manifests, dict): - manifests_list = list(manifests.values()) - else: - manifests_list = manifests - + manifests_list = list(manifests.values()) assert manifests_list, "empty manifests list after normalization" From fb379a551f324b245d061f261ecab436c4da58cf Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Mon, 25 May 2026 10:39:12 +0500 Subject: [PATCH 02/57] test: #80: add a test to verify that specified exclusion value matches size of serialized app11 segments --- c2pie/interface.py | 3 ++ tests/c2pa/interface_test.py | 55 ++++++++++++++++++++++++++++-------- 2 files changed, 46 insertions(+), 12 deletions(-) diff --git a/c2pie/interface.py b/c2pie/interface.py index 58f2b65..a613afd 100644 --- a/c2pie/interface.py +++ b/c2pie/interface.py @@ -82,6 +82,9 @@ def c2pie_GenerateManifestStore( tsa_url: str | None, require_tsa: bool, tsa_log_dir: str | None, + tsa_url: str | None = None, + require_tsa: bool = False, + tsa_log_dir: str | None = None, previous_manifest_boxes: list[Manifest] | None = None, ) -> ManifestStore: """ diff --git a/tests/c2pa/interface_test.py b/tests/c2pa/interface_test.py index d06cb70..244bb34 100644 --- a/tests/c2pa/interface_test.py +++ b/tests/c2pa/interface_test.py @@ -1,4 +1,5 @@ from pathlib import Path +from unittest.mock import patch from c2pie.c2pa.manifest_store import ManifestStore from c2pie.interface import ( @@ -75,9 +76,6 @@ def test_generate_manifest_returns_manifest_store(): private_key=key, certificate_chain=cert, file_name=Path("test.jpg").name, - tsa_url=None, - require_tsa=False, - tsa_log_dir=None, ) assert isinstance(manifest_store, ManifestStore) @@ -95,9 +93,6 @@ def test_generate_manifest_contains_one_manifest(): private_key=key, certificate_chain=cert, file_name=Path("test.jpg").name, - tsa_url=None, - require_tsa=False, - tsa_log_dir=None, ) assert len(manifest_store.manifests) == 1 @@ -115,9 +110,6 @@ def test_generate_manifest_label_follows_urn_c2pa_format(): private_key=key, certificate_chain=cert, file_name=Path("test.jpg").name, - tsa_url=None, - require_tsa=False, - tsa_log_dir=None, ) label = manifest_store.manifests[0].get_manifest_label() @@ -142,9 +134,6 @@ def test_emplace_manifest_returns_bytes_with_jpeg_signature(): private_key=key, certificate_chain=cert, file_name=Path("test.jpg").name, - tsa_url=None, - require_tsa=False, - tsa_log_dir=None, ) result = c2pie_EmplaceManifest( @@ -156,3 +145,45 @@ def test_emplace_manifest_returns_bytes_with_jpeg_signature(): assert isinstance(result, bytes) assert result[:2] == b"\xff\xd8" + + +def test_calculated_exclusion_covers_the_full_app11(): + with open(KEY_FILEPATH, "rb") as f: + key = f.read() + with open(CERT_FILEPATH, "rb") as f: + cert = f.read() + with open("tests/test_files/test_image.jpg", "rb") as f: + jpeg_bytes = f.read() + + assertions = [ + c2pie_GenerateHashDataAssertion( + cai_offset=2, + hashed_data=b"\x00" * 32, + ), + ] + + manifest_store = c2pie_GenerateManifestStore( + assertions=assertions, + private_key=key, + certificate_chain=cert, + file_name=Path("test.jpg").name, + ) + + # 2 bytes - jpeg marker + # 2 bytes - segment lenght + # 2 bytes - CI + # 2 bytes - EN + # 4 bytes - Z + serialized_manifest_store_lenght = len(manifest_store.serialize()) + 2 + 2 + 2 + 2 + 4 + + with patch("c2pie.c2pa.manifest_store.ManifestStore.set_hash_data_length_for_all") as mock_func: + c2pie_EmplaceManifest( + format_type=C2PA_ContentTypes.jpg, + content_bytes=jpeg_bytes, + c2pa_offset=2, + manifest_store=manifest_store, + ) + + last_call = mock_func.call_args + + assert serialized_manifest_store_lenght == last_call.args[0] From 99d5d15d9794c375ad3ed32fe072a4fbd9f79b67 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Mon, 25 May 2026 15:05:17 +0500 Subject: [PATCH 03/57] refactor: #80: refactor c2pa storage data hash exclusion calculation flow --- c2pie/c2pa/assertion.py | 23 +++++------ c2pie/interface.py | 39 +++++++++---------- .../assertions/data_hash_assertion_test.py | 6 +-- tests/c2pa/interface_test.py | 15 +++++++ 4 files changed, 45 insertions(+), 38 deletions(-) diff --git a/c2pie/c2pa/assertion.py b/c2pie/c2pa/assertion.py index fc57807..e540926 100644 --- a/c2pie/c2pa/assertion.py +++ b/c2pie/c2pa/assertion.py @@ -71,7 +71,7 @@ def __init__( exclusions: list[dict[str, int]] = [ { "start": cai_offset, - "length": 65535, + "length": 0, }, ] @@ -79,11 +79,10 @@ def __init__( exclusions.extend(additional_exclusions) schema: dict[str, Any] = { - "name": "jumbf manifest", "exclusions": exclusions, "alg": "sha256", "hash": hashed_data, - "pad": [], + "pad": b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", } super().__init__(C2PA_AssertionTypes.data_hash, schema) @@ -91,23 +90,19 @@ def set_hash_data_length( self, length: int, ) -> None: - if self.schema.get("name") != "jumbf manifest": - raise ValueError("c2pa.hash.data: jumbf manifest is missing") + exclusions = self.schema["exclusions"] + previous_exclusion_lenght = len(cbor_to_bytes(exclusions)) - exclusions = self.schema.get("exclusions", []) + self.schema["exclusions"][0]["length"] = length + current_exclusion_lenght = len(cbor_to_bytes(exclusions)) - if not exclusions: - raise ValueError("c2pa.hash.data: exclusions are missing") + difference = current_exclusion_lenght - previous_exclusion_lenght - exclusions[0]["length"] = int(length) + self.schema["pad"] = self.schema["pad"][difference:] payload = self.get_payload_from_schema() + if self.content_boxes: - self.content_boxes[0] = ContentBox( - box_type=get_assertion_content_box_type(self.type), - payload=payload, - ) - else: self.content_boxes = [ ContentBox( box_type=get_assertion_content_box_type(self.type), diff --git a/c2pie/interface.py b/c2pie/interface.py index a613afd..b1a9a00 100644 --- a/c2pie/interface.py +++ b/c2pie/interface.py @@ -10,7 +10,6 @@ from c2pie.c2pa.assertion_store import AssertionStore from c2pie.c2pa.claim import Claim from c2pie.c2pa.claim_signature import ClaimSignature -from c2pie.c2pa.config import RETRY_SIGNATURE from c2pie.c2pa.manifest import Manifest from c2pie.c2pa.manifest_store import ManifestStore from c2pie.c2pa_injection.jpg_injection import JpgSegmentApp11Storage @@ -82,9 +81,6 @@ def c2pie_GenerateManifestStore( tsa_url: str | None, require_tsa: bool, tsa_log_dir: str | None, - tsa_url: str | None = None, - require_tsa: bool = False, - tsa_log_dir: str | None = None, previous_manifest_boxes: list[Manifest] | None = None, ) -> ManifestStore: """ @@ -126,28 +122,29 @@ def c2pie_EmplaceManifest( manifest_store: ManifestStore, ) -> bytes: if format_type == C2PA_ContentTypes.jpg or format_type == C2PA_ContentTypes.jpeg: - assumed_hash_data_len = 0 - final_length = -1 - tail = b"" + serialized_manifest_store = manifest_store.serialize() - for _ in range(RETRY_SIGNATURE): - manifest_store.set_hash_data_length_for_all(assumed_hash_data_len) + app11_storage = JpgSegmentApp11Storage( + app11_segment_box_length=manifest_store.get_length(), + app11_segment_box_type=manifest_store.get_type(), + payload=serialized_manifest_store, + ) - payload = manifest_store.serialize() - storage = JpgSegmentApp11Storage( - app11_segment_box_length=manifest_store.get_length(), - app11_segment_box_type=manifest_store.get_type(), - payload=payload, - ) + app11_storage.serialize() - tail = storage.serialize() - total_len = len(tail) + serialized_app11_storage_lenght = app11_storage.get_serialized_length() - if total_len == final_length: - break + manifest_store.set_hash_data_length_for_all(serialized_app11_storage_lenght) - final_length = total_len - assumed_hash_data_len = total_len + serialized_manifest_store = manifest_store.serialize() + + app11_storage = JpgSegmentApp11Storage( + app11_segment_box_length=manifest_store.get_length(), + app11_segment_box_type=manifest_store.get_type(), + payload=serialized_manifest_store, + ) + + tail = app11_storage.serialize() return content_bytes[:c2pa_offset] + tail + content_bytes[c2pa_offset:] diff --git a/tests/c2pa/assertions/data_hash_assertion_test.py b/tests/c2pa/assertions/data_hash_assertion_test.py index 20ef37d..2bcea07 100644 --- a/tests/c2pa/assertions/data_hash_assertion_test.py +++ b/tests/c2pa/assertions/data_hash_assertion_test.py @@ -38,12 +38,12 @@ def test_hash_data_assertion_schema_alg_is_sha256(): assert data_hash_assertion.schema["alg"] == "sha256" -def test_hash_data_assertion_schema_pad_is_empty(): +def test_hash_data_assertion_schema_pad_is_16_bytes_lenght(): data_hash_assertion = HashDataAssertion( cai_offset=CAI_OFFSET, hashed_data=HASHED_DATA, ) - assert data_hash_assertion.schema["pad"] == [] + assert data_hash_assertion.schema["pad"] == b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" def test_hash_data_assertion_has_correct_offset(): @@ -59,7 +59,7 @@ def test_hash_data_assertion_default_exclusion_length(): cai_offset=CAI_OFFSET, hashed_data=HASHED_DATA, ) - assert data_hash_assertion.schema["exclusions"][0]["length"] == 65535 + assert data_hash_assertion.schema["exclusions"][0]["length"] == 0 def test_hash_data_assertion_has_correct_hash(): diff --git a/tests/c2pa/interface_test.py b/tests/c2pa/interface_test.py index 244bb34..2014e54 100644 --- a/tests/c2pa/interface_test.py +++ b/tests/c2pa/interface_test.py @@ -76,6 +76,9 @@ def test_generate_manifest_returns_manifest_store(): private_key=key, certificate_chain=cert, file_name=Path("test.jpg").name, + tsa_url=None, + require_tsa=False, + tsa_log_dir=None, ) assert isinstance(manifest_store, ManifestStore) @@ -93,6 +96,9 @@ def test_generate_manifest_contains_one_manifest(): private_key=key, certificate_chain=cert, file_name=Path("test.jpg").name, + tsa_url=None, + require_tsa=False, + tsa_log_dir=None, ) assert len(manifest_store.manifests) == 1 @@ -110,6 +116,9 @@ def test_generate_manifest_label_follows_urn_c2pa_format(): private_key=key, certificate_chain=cert, file_name=Path("test.jpg").name, + tsa_url=None, + require_tsa=False, + tsa_log_dir=None, ) label = manifest_store.manifests[0].get_manifest_label() @@ -134,6 +143,9 @@ def test_emplace_manifest_returns_bytes_with_jpeg_signature(): private_key=key, certificate_chain=cert, file_name=Path("test.jpg").name, + tsa_url=None, + require_tsa=False, + tsa_log_dir=None, ) result = c2pie_EmplaceManifest( @@ -167,6 +179,9 @@ def test_calculated_exclusion_covers_the_full_app11(): private_key=key, certificate_chain=cert, file_name=Path("test.jpg").name, + tsa_url=None, + require_tsa=False, + tsa_log_dir=None, ) # 2 bytes - jpeg marker From 904cd63855baf4d70ffa94aec1d4dff415a04de8 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Tue, 26 May 2026 13:41:58 +0500 Subject: [PATCH 04/57] test: #80: modify exclusion cover test so that it covers pdf case --- tests/c2pa/interface_test.py | 54 +++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/tests/c2pa/interface_test.py b/tests/c2pa/interface_test.py index 2014e54..65caee3 100644 --- a/tests/c2pa/interface_test.py +++ b/tests/c2pa/interface_test.py @@ -1,6 +1,8 @@ from pathlib import Path from unittest.mock import patch +import pytest + from c2pie.c2pa.manifest_store import ManifestStore from c2pie.interface import ( c2pie_EmplaceManifest, @@ -159,13 +161,27 @@ def test_emplace_manifest_returns_bytes_with_jpeg_signature(): assert result[:2] == b"\xff\xd8" -def test_calculated_exclusion_covers_the_full_app11(): +FIXTURES_FOLDER_PATH = Path(__file__).parent.parent / "test_files" + +test_cases = [ + Path(FIXTURES_FOLDER_PATH / "test_image.jpg"), + Path(FIXTURES_FOLDER_PATH / "test_doc.pdf"), +] + + +@pytest.mark.parametrize( + "file", + test_cases, + ids=lambda x: x.suffix[1:], +) +def test_calculated_exclusion_covers_the_full_storage(file): with open(KEY_FILEPATH, "rb") as f: key = f.read() with open(CERT_FILEPATH, "rb") as f: cert = f.read() - with open("tests/test_files/test_image.jpg", "rb") as f: - jpeg_bytes = f.read() + + with open(file, "rb") as f: + raw_bytes = f.read() assertions = [ c2pie_GenerateHashDataAssertion( @@ -178,27 +194,39 @@ def test_calculated_exclusion_covers_the_full_app11(): assertions=assertions, private_key=key, certificate_chain=cert, - file_name=Path("test.jpg").name, + file_name=file.name, tsa_url=None, require_tsa=False, tsa_log_dir=None, ) - # 2 bytes - jpeg marker - # 2 bytes - segment lenght - # 2 bytes - CI - # 2 bytes - EN - # 4 bytes - Z - serialized_manifest_store_lenght = len(manifest_store.serialize()) + 2 + 2 + 2 + 2 + 4 + file_extension = C2PA_ContentTypes(file.suffix) + + if file_extension == C2PA_ContentTypes.jpeg or file_extension == C2PA_ContentTypes.jpg: + """ + Expected length of serialized data in JPEG/JPG format consists + of APP11 segment header + payload (serialized ManifestStore). + + More info about APP11 segment you can see here: docs/JPG-structure-overview.md + """ + expected_serialized_lenght = 2 + 2 + 2 + 2 + 4 + len(manifest_store.serialize()) + elif file_extension == C2PA_ContentTypes.pdf: + """ + Expected length of serialized data in PDF format consists + of boby (serialized ManifestStore) + updated cross-ref table and trailer. + + More info about PDF Incremental Update you can see here: docs/PDF-structure-overview.md + """ + expected_serialized_lenght = 7115 with patch("c2pie.c2pa.manifest_store.ManifestStore.set_hash_data_length_for_all") as mock_func: c2pie_EmplaceManifest( - format_type=C2PA_ContentTypes.jpg, - content_bytes=jpeg_bytes, + format_type=file_extension, + content_bytes=raw_bytes, c2pa_offset=2, manifest_store=manifest_store, ) last_call = mock_func.call_args - assert serialized_manifest_store_lenght == last_call.args[0] + assert expected_serialized_lenght == last_call.args[0] From 03ca72871642eb6c2f81d6001234ff86708e1d27 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Tue, 26 May 2026 13:42:34 +0500 Subject: [PATCH 05/57] refactor: #80: refactor c2pa storage data hash exclusion calculation flow --- c2pie/c2pa_injection/pdf_injection.py | 234 ++++++++++++++------------ 1 file changed, 127 insertions(+), 107 deletions(-) diff --git a/c2pie/c2pa_injection/pdf_injection.py b/c2pie/c2pa_injection/pdf_injection.py index 6a9fc41..6ed964c 100644 --- a/c2pie/c2pa_injection/pdf_injection.py +++ b/c2pie/c2pa_injection/pdf_injection.py @@ -4,7 +4,6 @@ from pypdf import PdfWriter -from c2pie.c2pa.config import RETRY_SIGNATURE from c2pie.c2pa.manifest_store import ManifestStore @@ -66,7 +65,7 @@ def _xref_entry(offset: int) -> bytes: def emplace_manifest_into_pdf( initial_content: bytes, - manifests: ManifestStore, + manifest_store: ManifestStore, *, author: str | None = None, ) -> bytes: @@ -80,6 +79,7 @@ def emplace_manifest_into_pdf( except ValueError: initial_content = _read_pdf_using_pypdf(initial_content=initial_content) info = _scan_pdf_to_get_its_data(initial_content) + initial_length_of_file = len(initial_content) pointer_on_previous_xref = info.startxref starting_value = info.max_obj + 1 @@ -89,114 +89,134 @@ def emplace_manifest_into_pdf( author_info_required = bool(author) - assumed_hash_data_len = 0 - last = -1 - for _ in range(RETRY_SIGNATURE): - manifests.set_hash_data_length_for_all(assumed_hash_data_len) - store = manifests.serialize() - length_of_c2pa_manifest = len(store) - - object_1 = ( - f"{starting_value} 0 obj\n".encode("ascii") - + f"<< /Type /EmbeddedFile /Subtype {subtype} /Length {length_of_c2pa_manifest} >>\n".encode("ascii") - + b"stream\n" - + store - + b"\nendstream\nendobj\n" - ) - object_2 = ( - f"{starting_value + 1} 0 obj\n".encode("ascii") - + ( - f"<< /Type /Filespec /AFRelationship /C2PA_Manifest " - f"/F ({fname}) /UF ({fname}) /Desc (C2PA Manifest Store) " - f"/Subtype {subtype} /EF << /F {starting_value} 0 R >> >>\n" - ).encode("ascii") - + b"endobj\n" - ) - object_3 = ( - f"{starting_value + 2} 0 obj\n".encode("ascii") - + f"<< /Type /Names /Names [ ({fname}) {starting_value + 1} 0 R ] >>\n".encode("ascii") - + b"endobj\n" - ) - object_4 = ( - f"{starting_value + 3} 0 obj\n".encode("ascii") - + f"<< /Type /Names /EmbeddedFiles {starting_value + 2} 0 R >>\n".encode("ascii") - + b"endobj\n" - ) - object_5 = ( - f"{starting_value + 4} 0 obj\n".encode("ascii") - + ( - f"<< /Type /Catalog /Pages {info.pages_ref} /Names " - f"{starting_value + 3} 0 R /AF [ {starting_value + 1} 0 R ] >>\n" - ).encode("ascii") + serialized_manifest_store = manifest_store.serialize() + + serialized_manifest_store_lenght = len(serialized_manifest_store) + + object_1 = ( + f"{starting_value} 0 obj\n".encode("ascii") + + f"<< /Type /EmbeddedFile /Subtype {subtype} /Length {serialized_manifest_store_lenght} >>\n".encode("ascii") + + b"stream\n" + + serialized_manifest_store + + b"\nendstream\nendobj\n" + ) + object_2 = ( + f"{starting_value + 1} 0 obj\n".encode("ascii") + + ( + f"<< /Type /Filespec /AFRelationship /C2PA_Manifest " + f"/F ({fname}) /UF ({fname}) /Desc (C2PA Manifest Store) " + f"/Subtype {subtype} /EF << /F {starting_value} 0 R >> >>\n" + ).encode("ascii") + + b"endobj\n" + ) + object_3 = ( + f"{starting_value + 2} 0 obj\n".encode("ascii") + + f"<< /Type /Names /Names [ ({fname}) {starting_value + 1} 0 R ] >>\n".encode("ascii") + + b"endobj\n" + ) + object_4 = ( + f"{starting_value + 3} 0 obj\n".encode("ascii") + + f"<< /Type /Names /EmbeddedFiles {starting_value + 2} 0 R >>\n".encode("ascii") + + b"endobj\n" + ) + object_5 = ( + f"{starting_value + 4} 0 obj\n".encode("ascii") + + ( + f"<< /Type /Catalog /Pages {info.pages_ref} /Names " + f"{starting_value + 3} 0 R /AF [ {starting_value + 1} 0 R ] >>\n" + ).encode("ascii") + + b"endobj\n" + ) + + if author_info_required: + author_s = author.replace(")", r"\)") if author else "" + object_6 = ( + f"{starting_value + 5} 0 obj\n".encode("ascii") + + f"<< /Author ({author_s}) >>\n".encode("ascii") + b"endobj\n" ) + else: + object_6 = b"" + + sep = b"\n" + offset_of_object_1 = initial_length_of_file + len(sep) + offset_of_object_2 = offset_of_object_1 + len(object_1) + offset_of_object_3 = offset_of_object_2 + len(object_2) + offset_of_object_4 = offset_of_object_3 + len(object_3) + offset_of_object_5 = offset_of_object_4 + len(object_4) + + if author_info_required: + offset_of_object_6 = offset_of_object_5 + len(object_5) + xref_pos = offset_of_object_6 + len(object_6) + else: + xref_pos = offset_of_object_5 + len(object_5) + + count = 5 + (1 if author_info_required else 0) + xref = b"xref\n" + f"{starting_value} {count}\n".encode("ascii") + xref += ( + _xref_entry(offset_of_object_1) + + _xref_entry(offset_of_object_2) + + _xref_entry(offset_of_object_3) + + _xref_entry(offset_of_object_4) + + _xref_entry(offset_of_object_5) + ) - if author_info_required: - author_s = author.replace(")", r"\)") if author else "" - object_6 = ( - f"{starting_value + 5} 0 obj\n".encode("ascii") - + f"<< /Author ({author_s}) >>\n".encode("ascii") - + b"endobj\n" - ) - else: - object_6 = b"" - - sep = b"\n" - offset_of_object_1 = initial_length_of_file + len(sep) - offset_of_object_2 = offset_of_object_1 + len(object_1) - offset_of_object_3 = offset_of_object_2 + len(object_2) - offset_of_object_4 = offset_of_object_3 + len(object_3) - offset_of_object_5 = offset_of_object_4 + len(object_4) - if author_info_required: - offset_of_object_6 = offset_of_object_5 + len(object_5) - xref_pos = offset_of_object_6 + len(object_6) - else: - xref_pos = offset_of_object_5 + len(object_5) - - count = 5 + (1 if author_info_required else 0) - xref = b"xref\n" + f"{starting_value} {count}\n".encode("ascii") - xref += ( - _xref_entry(offset_of_object_1) - + _xref_entry(offset_of_object_2) - + _xref_entry(offset_of_object_3) - + _xref_entry(offset_of_object_4) - + _xref_entry(offset_of_object_5) - ) - if author_info_required: - xref += _xref_entry(offset_of_object_6) - - size_val = starting_value + count - trailer = ( - b"trailer\n<< " - + f"/Size {size_val} ".encode("ascii") - + f"/Root {starting_value + 4} 0 R ".encode("ascii") - + f"/Prev {pointer_on_previous_xref} ".encode("ascii") - ) - if author_info_required: - trailer += f"/Info {starting_value + 5} 0 R ".encode("ascii") - trailer += b">>\n" - - tail = ( - sep - + object_1 - + object_2 - + object_3 - + object_4 - + object_5 - + object_6 - + xref - + trailer - + b"startxref\n" - + str(xref_pos).encode("ascii") - + b"\n%%EOF\n" - ) + if author_info_required: + xref += _xref_entry(offset_of_object_6) + + size_val = starting_value + count + trailer = ( + b"trailer\n<< " + + f"/Size {size_val} ".encode("ascii") + + f"/Root {starting_value + 4} 0 R ".encode("ascii") + + f"/Prev {pointer_on_previous_xref} ".encode("ascii") + ) - total_len = len(tail) - if total_len == last: - return initial_content + tail - last = total_len - assumed_hash_data_len = total_len + if author_info_required: + trailer += f"/Info {starting_value + 5} 0 R ".encode("ascii") + + trailer += b">>\n" + + tail = ( + sep + + object_1 + + object_2 + + object_3 + + object_4 + + object_5 + + object_6 + + xref + + trailer + + b"startxref\n" + + str(xref_pos).encode("ascii") + + b"\n%%EOF\n" + ) + + manifest_store.set_hash_data_length_for_all(len(tail)) + + serialized_manifest_store = manifest_store.serialize() + + object_1 = ( + f"{starting_value} 0 obj\n".encode("ascii") + + f"<< /Type /EmbeddedFile /Subtype {subtype} /Length {serialized_manifest_store_lenght} >>\n".encode("ascii") + + b"stream\n" + + serialized_manifest_store + + b"\nendstream\nendobj\n" + ) + + tail = ( + sep + + object_1 + + object_2 + + object_3 + + object_4 + + object_5 + + object_6 + + xref + + trailer + + b"startxref\n" + + str(xref_pos).encode("ascii") + + b"\n%%EOF\n" + ) - manifests.set_hash_data_length_for_all(assumed_hash_data_len) - store = manifests.serialize() return initial_content + tail From c751c4ac2542b95c6fd382e91f98d5741ce53f94 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Tue, 26 May 2026 13:43:21 +0500 Subject: [PATCH 06/57] chore: #80: rename pdf test file so that filename more readability --- tests/c2pa/e2e_test.py | 2 +- .../{test_doc2.pdf => test_broken_doc.pdf} | Bin 2 files changed, 1 insertion(+), 1 deletion(-) rename tests/test_files/{test_doc2.pdf => test_broken_doc.pdf} (100%) diff --git a/tests/c2pa/e2e_test.py b/tests/c2pa/e2e_test.py index b15e8b6..2e1ba56 100644 --- a/tests/c2pa/e2e_test.py +++ b/tests/c2pa/e2e_test.py @@ -13,7 +13,7 @@ test_files_by_extension = { "pdf": [ "test_doc.pdf", - "test_doc2.pdf", + "test_broken_doc.pdf", ], "jpg": [ "test_image.jpg", diff --git a/tests/test_files/test_doc2.pdf b/tests/test_files/test_broken_doc.pdf similarity index 100% rename from tests/test_files/test_doc2.pdf rename to tests/test_files/test_broken_doc.pdf From bb26494f9f1b4567095c10d3759521ece7778ac4 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Wed, 27 May 2026 09:14:15 +0500 Subject: [PATCH 07/57] refactor: #80: move logic for adding exclusions to C2PA structure into separate method --- c2pie/c2pa/assertion.py | 69 ++++++++++----- c2pie/c2pa/assertion_store.py | 9 +- c2pie/c2pa/manifest.py | 8 +- c2pie/c2pa/manifest_store.py | 8 +- c2pie/c2pa_injection/pdf_injection.py | 6 +- c2pie/interface.py | 17 ++-- c2pie/jumbf_boxes/super_box.py | 6 +- c2pie/signing.py | 1 - tests/c2pa/assertions/assertion_test.py | 10 ++- .../assertions/data_hash_assertion_test.py | 85 +++++-------------- tests/c2pa/interface_test.py | 8 +- tests/c2pa/manifest_store_test.py | 3 +- 12 files changed, 122 insertions(+), 108 deletions(-) diff --git a/c2pie/c2pa/assertion.py b/c2pie/c2pa/assertion.py index e540926..00ad5cc 100644 --- a/c2pie/c2pa/assertion.py +++ b/c2pie/c2pa/assertion.py @@ -37,7 +37,12 @@ def __init__( if not content_boxes: payload = self.get_payload_from_schema() box_type_hex = get_assertion_content_box_type(self.type) - content_boxes = [ContentBox(box_type=box_type_hex, payload=payload)] + content_boxes = [ + ContentBox( + box_type=box_type_hex, + payload=payload, + ) + ] super().__init__( content_type=get_assertion_content_type(self.type), @@ -64,16 +69,10 @@ class HashDataAssertion(Assertion): def __init__( self, - cai_offset: int, hashed_data: bytes, additional_exclusions: list[dict[str, int]] | None = None, ): - exclusions: list[dict[str, int]] = [ - { - "start": cai_offset, - "length": 0, - }, - ] + exclusions: list[dict[str, int]] = [] if additional_exclusions: exclusions.extend(additional_exclusions) @@ -82,33 +81,63 @@ def __init__( "exclusions": exclusions, "alg": "sha256", "hash": hashed_data, - "pad": b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + "pad": b"" + + b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + + b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + + b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + + b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", } - super().__init__(C2PA_AssertionTypes.data_hash, schema) - def set_hash_data_length( + super().__init__( + C2PA_AssertionTypes.data_hash, + schema, + ) + + def add_full_c2pa_structure_exclusion( self, + offset: int, length: int, ) -> None: exclusions = self.schema["exclusions"] previous_exclusion_lenght = len(cbor_to_bytes(exclusions)) - self.schema["exclusions"][0]["length"] = length + self.schema["exclusions"].extend( + [ + { + "start": offset, + "length": length, + }, + ] + ) + current_exclusion_lenght = len(cbor_to_bytes(exclusions)) difference = current_exclusion_lenght - previous_exclusion_lenght - self.schema["pad"] = self.schema["pad"][difference:] + """ + Important! If the Data Hash Assertion is less than 24 bytes or greater than + 255 bytes, the size of the cbor header will change during conversion to cbor + and will occupy less than 2 bytes or more than 2 bytes, correspondingly. + """ + + schema_length = len(cbor_to_bytes(self.schema)) + additional_byte = 0 # 2 byte CBOR header case + + if schema_length < 24: # 1 byte CBOR header case + additional_byte = -1 + elif schema_length > 255: # 3 byte CBOR header case + additional_byte = 1 + + self.schema["pad"] = self.schema["pad"][difference + additional_byte :] payload = self.get_payload_from_schema() - if self.content_boxes: - self.content_boxes = [ - ContentBox( - box_type=get_assertion_content_box_type(self.type), - payload=payload, - ) - ] + self.content_boxes = [ + ContentBox( + box_type=get_assertion_content_box_type(self.type), + payload=payload, + ) + ] self.sync_payload() diff --git a/c2pie/c2pa/assertion_store.py b/c2pie/c2pa/assertion_store.py index 8108049..d5b3189 100644 --- a/c2pie/c2pa/assertion_store.py +++ b/c2pie/c2pa/assertion_store.py @@ -20,11 +20,16 @@ def __init__( def get_assertions(self) -> list: return self.assertions - def set_hash_data_length( + def add_full_c2pa_structure_exclusion( self, + offset: int, length: int, ) -> None: for assertion in self.assertions: if assertion.type == C2PA_AssertionTypes.data_hash: - assertion.set_hash_data_length(length) + assertion.add_full_c2pa_structure_exclusion( + offset, + length, + ) + self.sync_payload() diff --git a/c2pie/c2pa/manifest.py b/c2pie/c2pa/manifest.py index 224f68f..cc7beb8 100644 --- a/c2pie/c2pa/manifest.py +++ b/c2pie/c2pa/manifest.py @@ -44,8 +44,9 @@ def get_assertions(self): return self.assertion_store.get_assertions() return - def set_hash_data_length( + def add_full_c2pa_structure_exclusion( self, + offset: int, length: int, ): """ @@ -53,7 +54,10 @@ def set_hash_data_length( and ClaimSignature (COSE Sign1 detached over Claim CBOR). """ if self.assertion_store and self.claim and self.claim_signature: - self.assertion_store.set_hash_data_length(length) + self.assertion_store.add_full_c2pa_structure_exclusion( + offset, + length, + ) self.claim.set_assertion_store(self.assertion_store) self.claim_signature.set_claim(self.claim) diff --git a/c2pie/c2pa/manifest_store.py b/c2pie/c2pa/manifest_store.py index 1377ca7..bf0bf03 100644 --- a/c2pie/c2pa/manifest_store.py +++ b/c2pie/c2pa/manifest_store.py @@ -25,11 +25,15 @@ def __init__( def sync_payload(self): super().sync_payload() - def set_hash_data_length_for_all( + def add_full_c2pa_structure_exclusion( self, + offset: int, length: int, ) -> None: - self.manifests[-1].set_hash_data_length(length) + self.manifests[-1].add_full_c2pa_structure_exclusion( + offset, + length, + ) super().sync_payload() diff --git a/c2pie/c2pa_injection/pdf_injection.py b/c2pie/c2pa_injection/pdf_injection.py index 6ed964c..0375d08 100644 --- a/c2pie/c2pa_injection/pdf_injection.py +++ b/c2pie/c2pa_injection/pdf_injection.py @@ -66,6 +66,7 @@ def _xref_entry(offset: int) -> bytes: def emplace_manifest_into_pdf( initial_content: bytes, manifest_store: ManifestStore, + c2pa_offset: int, *, author: str | None = None, ) -> bytes: @@ -192,7 +193,10 @@ def emplace_manifest_into_pdf( + b"\n%%EOF\n" ) - manifest_store.set_hash_data_length_for_all(len(tail)) + manifest_store.add_full_c2pa_structure_exclusion( + c2pa_offset, + len(tail), + ) serialized_manifest_store = manifest_store.serialize() diff --git a/c2pie/interface.py b/c2pie/interface.py index b1a9a00..9490516 100644 --- a/c2pie/interface.py +++ b/c2pie/interface.py @@ -24,13 +24,9 @@ def c2pie_GenerateAssertion(assertion_type: C2PA_AssertionTypes, assertion_schem def c2pie_GenerateHashDataAssertion( - cai_offset: int, hashed_data: bytes, ) -> HashDataAssertion: - return HashDataAssertion( - cai_offset, - hashed_data, - ) + return HashDataAssertion(hashed_data) def c2pie_GenerateActionsAssertion( @@ -134,7 +130,10 @@ def c2pie_EmplaceManifest( serialized_app11_storage_lenght = app11_storage.get_serialized_length() - manifest_store.set_hash_data_length_for_all(serialized_app11_storage_lenght) + manifest_store.add_full_c2pa_structure_exclusion( + c2pa_offset, + serialized_app11_storage_lenght, + ) serialized_manifest_store = manifest_store.serialize() @@ -149,6 +148,10 @@ def c2pie_EmplaceManifest( return content_bytes[:c2pa_offset] + tail + content_bytes[c2pa_offset:] if format_type == C2PA_ContentTypes.pdf: - return emplace_manifest_into_pdf(content_bytes, manifest_store) + return emplace_manifest_into_pdf( + content_bytes, + manifest_store, + c2pa_offset, + ) raise ValueError(f"Unsupported content type {format_type}!") diff --git a/c2pie/jumbf_boxes/super_box.py b/c2pie/jumbf_boxes/super_box.py index 5a039bb..e20e4bf 100644 --- a/c2pie/jumbf_boxes/super_box.py +++ b/c2pie/jumbf_boxes/super_box.py @@ -15,7 +15,11 @@ def __init__( label: str = "", content_boxes: list | None = None, ): - self.description_box = DescriptionBox(content_type=content_type, label=label) + self.description_box = DescriptionBox( + content_type=content_type, + label=label, + ) + self.content_boxes = [] if content_boxes is None else content_boxes payload = self.description_box.serialize() + self.serialize_content_boxes() diff --git a/c2pie/signing.py b/c2pie/signing.py index 2537772..37ae4cc 100644 --- a/c2pie/signing.py +++ b/c2pie/signing.py @@ -145,7 +145,6 @@ def sign_file( assertions = [] hash_data_assertion = c2pie_GenerateHashDataAssertion( - cai_offset=cai_offset, hashed_data=hashlib.sha256(raw_bytes).digest(), ) diff --git a/tests/c2pa/assertions/assertion_test.py b/tests/c2pa/assertions/assertion_test.py index b41122a..bb77c73 100644 --- a/tests/c2pa/assertions/assertion_test.py +++ b/tests/c2pa/assertions/assertion_test.py @@ -79,8 +79,14 @@ def test_assertion_content_boxes_not_empty(): # noqa: F811 def test_additional_extensions_adding_for_hash_data_assertions(): - additional_exclusion = {"some_extension": 343} + additional_exclusion = { + "start": 100, + "length": 1000, + } + data_hash_assertion = HashDataAssertion( - cai_offset=124, hashed_data=b"", additional_exclusions=[additional_exclusion] + hashed_data=b"", + additional_exclusions=[additional_exclusion], ) + assert additional_exclusion in data_hash_assertion.schema["exclusions"] diff --git a/tests/c2pa/assertions/data_hash_assertion_test.py b/tests/c2pa/assertions/data_hash_assertion_test.py index 2bcea07..0f85dd5 100644 --- a/tests/c2pa/assertions/data_hash_assertion_test.py +++ b/tests/c2pa/assertions/data_hash_assertion_test.py @@ -7,75 +7,38 @@ def test_hash_data_assertion_has_correct_type(): - data_hash_assertion = HashDataAssertion( - cai_offset=CAI_OFFSET, - hashed_data=HASHED_DATA, - ) + data_hash_assertion = HashDataAssertion(hashed_data=HASHED_DATA) assert data_hash_assertion.type == C2PA_AssertionTypes.data_hash def test_hash_data_assertion_content_type_is_cbor(): - data_hash_assertion = HashDataAssertion( - cai_offset=CAI_OFFSET, - hashed_data=HASHED_DATA, - ) + data_hash_assertion = HashDataAssertion(hashed_data=HASHED_DATA) assert data_hash_assertion.get_content_type() == jumbf_content_types["cbor"] def test_hash_data_assertion_label(): - data_hash_assertion = HashDataAssertion( - cai_offset=CAI_OFFSET, - hashed_data=HASHED_DATA, - ) + data_hash_assertion = HashDataAssertion(hashed_data=HASHED_DATA) assert data_hash_assertion.get_label() == "c2pa.hash.data" def test_hash_data_assertion_schema_alg_is_sha256(): - data_hash_assertion = HashDataAssertion( - cai_offset=CAI_OFFSET, - hashed_data=HASHED_DATA, - ) + data_hash_assertion = HashDataAssertion(hashed_data=HASHED_DATA) assert data_hash_assertion.schema["alg"] == "sha256" def test_hash_data_assertion_schema_pad_is_16_bytes_lenght(): - data_hash_assertion = HashDataAssertion( - cai_offset=CAI_OFFSET, - hashed_data=HASHED_DATA, - ) + data_hash_assertion = HashDataAssertion(hashed_data=HASHED_DATA) assert data_hash_assertion.schema["pad"] == b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" -def test_hash_data_assertion_has_correct_offset(): - data_hash_assertion = HashDataAssertion( - cai_offset=CAI_OFFSET, - hashed_data=HASHED_DATA, - ) - assert data_hash_assertion.schema["exclusions"][0]["start"] == CAI_OFFSET - - -def test_hash_data_assertion_default_exclusion_length(): - data_hash_assertion = HashDataAssertion( - cai_offset=CAI_OFFSET, - hashed_data=HASHED_DATA, - ) - assert data_hash_assertion.schema["exclusions"][0]["length"] == 0 - - def test_hash_data_assertion_has_correct_hash(): expected_hashed_data = b"\xab" * 32 - data_hash_assertion = HashDataAssertion( - cai_offset=CAI_OFFSET, - hashed_data=expected_hashed_data, - ) + data_hash_assertion = HashDataAssertion(hashed_data=expected_hashed_data) assert data_hash_assertion.schema["hash"] == expected_hashed_data def test_hash_data_assertion_serializes_as_cbor(): - data_hash_assertion = HashDataAssertion( - cai_offset=CAI_OFFSET, - hashed_data=HASHED_DATA, - ) + data_hash_assertion = HashDataAssertion(hashed_data=HASHED_DATA) expected_payload = cbor_to_bytes(data_hash_assertion.schema) assert len(data_hash_assertion.content_boxes) == 1 assert data_hash_assertion.content_boxes[0].payload == expected_payload @@ -88,38 +51,34 @@ def test_hash_data_assertion_with_additional_exclusions(): "length": 200, }, ] + data_hash_assertion = HashDataAssertion( - cai_offset=CAI_OFFSET, hashed_data=HASHED_DATA, additional_exclusions=additional, ) + exclusions = data_hash_assertion.schema["exclusions"] - assert len(exclusions) == 2 - assert exclusions[1] == {"start": 100, "length": 200} + assert len(exclusions) == 1 + assert exclusions[0] == { + "start": 100, + "length": 200, + } -def test_hash_data_assertion_without_additional_exclusions_has_one_exclusion(): - data_hash_assertion = HashDataAssertion( - cai_offset=CAI_OFFSET, - hashed_data=HASHED_DATA, - ) - assert len(data_hash_assertion.schema["exclusions"]) == 1 + +def test_hash_data_assertion_without_additional_exclusions_has_not_exclusions(): + data_hash_assertion = HashDataAssertion(hashed_data=HASHED_DATA) + assert len(data_hash_assertion.schema["exclusions"]) == 0 def test_set_hash_data_length_updates_exclusion(): - data_hash_assertion = HashDataAssertion( - cai_offset=CAI_OFFSET, - hashed_data=HASHED_DATA, - ) - data_hash_assertion.set_hash_data_length(200) + data_hash_assertion = HashDataAssertion(hashed_data=HASHED_DATA) + data_hash_assertion.add_full_c2pa_structure_exclusion(CAI_OFFSET, 200) assert data_hash_assertion.schema["exclusions"][0]["length"] == 200 def test_set_hash_data_length_updates_content_box_payload(): - data_hash_assertion = HashDataAssertion( - cai_offset=CAI_OFFSET, - hashed_data=HASHED_DATA, - ) - data_hash_assertion.set_hash_data_length(200) + data_hash_assertion = HashDataAssertion(hashed_data=HASHED_DATA) + data_hash_assertion.add_full_c2pa_structure_exclusion(CAI_OFFSET, 200) expected_payload = cbor_to_bytes(data_hash_assertion.schema) assert data_hash_assertion.content_boxes[0].payload == expected_payload diff --git a/tests/c2pa/interface_test.py b/tests/c2pa/interface_test.py index 65caee3..e06fe60 100644 --- a/tests/c2pa/interface_test.py +++ b/tests/c2pa/interface_test.py @@ -43,7 +43,6 @@ def test_generate_hash_data_assertion_returns_hash_data_assertion_instance(): from c2pie.c2pa.assertion import HashDataAssertion hash_data_assertion = c2pie_GenerateHashDataAssertion( - cai_offset=2, hashed_data=b"\x00" * 32, ) assert isinstance(hash_data_assertion, HashDataAssertion) @@ -136,7 +135,7 @@ def test_emplace_manifest_returns_bytes_with_jpeg_signature(): jpeg_bytes = f.read() assertions = [ - c2pie_GenerateHashDataAssertion(cai_offset=2, hashed_data=b"\x00" * 32), + c2pie_GenerateHashDataAssertion(hashed_data=b"\x00" * 32), c2pie_GenerateActionsAssertion(action="c2pa.created"), ] @@ -185,7 +184,6 @@ def test_calculated_exclusion_covers_the_full_storage(file): assertions = [ c2pie_GenerateHashDataAssertion( - cai_offset=2, hashed_data=b"\x00" * 32, ), ] @@ -219,7 +217,7 @@ def test_calculated_exclusion_covers_the_full_storage(file): """ expected_serialized_lenght = 7115 - with patch("c2pie.c2pa.manifest_store.ManifestStore.set_hash_data_length_for_all") as mock_func: + with patch("c2pie.c2pa.manifest_store.ManifestStore.add_full_c2pa_structure_exclusion") as mock_func: c2pie_EmplaceManifest( format_type=file_extension, content_bytes=raw_bytes, @@ -229,4 +227,4 @@ def test_calculated_exclusion_covers_the_full_storage(file): last_call = mock_func.call_args - assert expected_serialized_lenght == last_call.args[0] + assert expected_serialized_lenght == last_call.args[1] diff --git a/tests/c2pa/manifest_store_test.py b/tests/c2pa/manifest_store_test.py index b25ea0a..feeb715 100644 --- a/tests/c2pa/manifest_store_test.py +++ b/tests/c2pa/manifest_store_test.py @@ -77,7 +77,6 @@ def test_manifest_store_with_previous_manifests_serializes_without_error(): def test_manifest_store_set_hash_data_length_only_affects_new_manifests(): data_hash_assertion = HashDataAssertion( - 0, b"\x00\x00\x00", ) @@ -115,7 +114,7 @@ def test_manifest_store_set_hash_data_length_only_affects_new_manifests(): ] ) - manifest_store.set_hash_data_length_for_all(1024) + manifest_store.add_full_c2pa_structure_exclusion(2, 1024) previous_box = manifest_store.content_boxes[0] assert previous_box == previous_manifest From e0cfcc75b50a177d2892b2a3421e13d9de0018fd Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Wed, 27 May 2026 15:18:40 +0500 Subject: [PATCH 08/57] test: #80: add test to align serialized COSE_Sign1 --- tests/c2pa/claim_signature_test.py | 48 ++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/c2pa/claim_signature_test.py b/tests/c2pa/claim_signature_test.py index 078bbfa..c1b0098 100644 --- a/tests/c2pa/claim_signature_test.py +++ b/tests/c2pa/claim_signature_test.py @@ -1,3 +1,5 @@ +import cbor2 + from c2pie.c2pa.assertion import Assertion from c2pie.c2pa.assertion_store import AssertionStore from c2pie.c2pa.claim import Claim @@ -57,3 +59,49 @@ def test_create_claim_signature_with_non_empty_claim(): assert claim_signature.claim is not None # noqa: B015 assert claim_signature.content_boxes[0].get_type() == b"cbor".hex() # noqa: B015 + + +def test_serialization_cose_sign1_is_performed_with_alignment(): + claim_signature = ClaimSignature.__new__(ClaimSignature) + claim_signature.serialized_length = 0 + + cose_sign1 = [ + "protected_header", + { + "pad": b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + }, + "payload", + "signature", + ] + + serialized_cose_sign1_cbor_1 = claim_signature.serialize_cose_sign1_tagged_with_alignment(cose_sign1) + + assert claim_signature.serialized_length != 0 + assert cbor2.loads(serialized_cose_sign1_cbor_1).tag == 18 + assert cbor2.loads(serialized_cose_sign1_cbor_1).value[1]["pad"] == cose_sign1[1]["pad"] + + cose_sign1 = [ + "protected_header", + { + "pad": b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + }, + "payload", + "signature2", + ] + + serialized_cose_sign1_cbor_2 = claim_signature.serialize_cose_sign1_tagged_with_alignment(cose_sign1) + + assert len(serialized_cose_sign1_cbor_1) == len(serialized_cose_sign1_cbor_2) + + cose_sign1 = [ + "protected_header", + { + "pad": b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + }, + "", + "signature", + ] + + serialized_cose_sign1_cbor_3 = claim_signature.serialize_cose_sign1_tagged_with_alignment(cose_sign1) + + assert len(serialized_cose_sign1_cbor_1) == len(serialized_cose_sign1_cbor_3) From f7ad081822e093c375d6cc6cbda06f08df604b48 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Wed, 27 May 2026 15:19:37 +0500 Subject: [PATCH 09/57] feat: #80: add alignment logic for serialized COSE_Sign1 --- c2pie/c2pa/claim_signature.py | 41 ++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/c2pie/c2pa/claim_signature.py b/c2pie/c2pa/claim_signature.py index 8492362..5477483 100644 --- a/c2pie/c2pa/claim_signature.py +++ b/c2pie/c2pa/claim_signature.py @@ -64,6 +64,8 @@ def __init__( self.require_tsa = require_tsa self.tsa_log_dir = tsa_log_dir + self.serialized_length = 0 + content_boxes = self._generate_payload() super().__init__( @@ -129,10 +131,45 @@ def _generate_unprotected_header(self, serialized_sig_structure: bytes) -> bytes }, ], }, + "pad": b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", } return unprotected_header + def serialize_cose_sign1_tagged_with_alignment( + self, + cose_sign1: list, + ) -> bytes: + """ + Takes a COSE_Sign1 as an array containing protected_header, unprotected_header, + payload, and signature, and returns a serialized COSE_Sign1_Tagged structure. + """ + cose_sign1_tagged_cbor = cbor2.dumps( + cbor2.CBORTag(18, cose_sign1), + canonical=True, + ) + + """ + The length of a TSA token can be variable. To ensure that a new token does not exceed + the exclusion boundary for the C2PA structure, we need to align the length of + the Claim Signature using the pad field, similar to the Data Hash Assertion. + """ + if self.serialized_length == 0: + self.serialized_length = len(cose_sign1_tagged_cbor) + elif self.serialized_length != len(cose_sign1_tagged_cbor): + difference = self.serialized_length - len(cose_sign1_tagged_cbor) + + if difference > len(cose_sign1[1]["pad"]): + raise ValueError("Difference in length exceeds the predefined pad") + + cose_sign1[1]["pad"] = b"\x00" * (len(cose_sign1[1]["pad"]) + difference) + cose_sign1_tagged_cbor = cbor2.dumps( + cbor2.CBORTag(18, cose_sign1), + canonical=True, + ) + + return cose_sign1_tagged_cbor + def _create_cose_sign1_tagged(self) -> bytes: """ COSE_Sign1 = [ @@ -179,4 +216,6 @@ def _create_cose_sign1_tagged(self) -> bytes: cose_sign1 = [serialized_protected_header, unprotected_header, None, signature] - return cbor2.dumps(cbor2.CBORTag(18, cose_sign1), canonical=True) + cose_sign1_tagged_cbor = self.serialize_cose_sign1_tagged_with_alignment(cose_sign1) + + return cose_sign1_tagged_cbor From 836a96cf1dfcfa26e665e93db0522d3bf14b5f5d Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Wed, 27 May 2026 15:26:42 +0500 Subject: [PATCH 10/57] test: #80: update to Data Hash Assertion test, as pad size has been updated --- tests/c2pa/assertions/data_hash_assertion_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/c2pa/assertions/data_hash_assertion_test.py b/tests/c2pa/assertions/data_hash_assertion_test.py index 0f85dd5..f89cd3d 100644 --- a/tests/c2pa/assertions/data_hash_assertion_test.py +++ b/tests/c2pa/assertions/data_hash_assertion_test.py @@ -26,9 +26,9 @@ def test_hash_data_assertion_schema_alg_is_sha256(): assert data_hash_assertion.schema["alg"] == "sha256" -def test_hash_data_assertion_schema_pad_is_16_bytes_lenght(): +def test_hash_data_assertion_schema_pad_is_64_bytes_lenght(): data_hash_assertion = HashDataAssertion(hashed_data=HASHED_DATA) - assert data_hash_assertion.schema["pad"] == b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + assert data_hash_assertion.schema["pad"] == b"\x00" * 64 def test_hash_data_assertion_has_correct_hash(): From 00a5a80e44a4fbdaef7217e270eaeecea494567f Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Wed, 27 May 2026 15:48:00 +0500 Subject: [PATCH 11/57] test: #80: update exclusion coverage test to reflect the changes made to the Data Hash Assertion constructor --- tests/c2pa/interface_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/c2pa/interface_test.py b/tests/c2pa/interface_test.py index e06fe60..29a417f 100644 --- a/tests/c2pa/interface_test.py +++ b/tests/c2pa/interface_test.py @@ -215,7 +215,7 @@ def test_calculated_exclusion_covers_the_full_storage(file): More info about PDF Incremental Update you can see here: docs/PDF-structure-overview.md """ - expected_serialized_lenght = 7115 + expected_serialized_lenght = 7148 with patch("c2pie.c2pa.manifest_store.ManifestStore.add_full_c2pa_structure_exclusion") as mock_func: c2pie_EmplaceManifest( From a08dccc67a246b972f96836e12dc65904eeb90ac Mon Sep 17 00:00:00 2001 From: Workflow Action Date: Wed, 27 May 2026 10:50:54 +0000 Subject: [PATCH 12/57] docs(readme): bring test coverage score up to date --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f04f8f3..e9a49b8 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,9 @@ [![Linting](https://github.com/TourmalineCore/c2pie/actions/workflows/lint-on-pull-request.yml/badge.svg?branch=develop)](https://github.com/TourmalineCore/c2pie/actions/workflows/lint-on-pull-request.yml) [![c2pa](https://img.shields.io/badge/c2pa-v1.4-seagreen.svg)](https://c2pa.org/) -[![coverage](https://img.shields.io/badge/e2e_coverage-71.13%25-yellow)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) -[![coverage](https://img.shields.io/badge/units_coverage-79.65%25-yellow)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) -[![coverage](https://img.shields.io/badge/full_coverage-91.80%25-forestgreen)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/e2e_coverage-71.18%25-yellow)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/units_coverage-85.02%25-olivedrab)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/full_coverage-92.15%25-forestgreen)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) [![latest](https://img.shields.io/pypi/v/c2pie?label=latest&colorB=fc8021)](https://pypi.org/project/c2pie/)
From 1323202817e15ff1e2a154582009457f4497ace5 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Thu, 28 May 2026 09:43:52 +0500 Subject: [PATCH 13/57] feat: #80: add handling for CBOR boundary violation cases for TSA tokens --- c2pie/c2pa/claim_signature.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/c2pie/c2pa/claim_signature.py b/c2pie/c2pa/claim_signature.py index 5477483..2fb09ba 100644 --- a/c2pie/c2pa/claim_signature.py +++ b/c2pie/c2pa/claim_signature.py @@ -140,20 +140,14 @@ def serialize_cose_sign1_tagged_with_alignment( self, cose_sign1: list, ) -> bytes: - """ - Takes a COSE_Sign1 as an array containing protected_header, unprotected_header, - payload, and signature, and returns a serialized COSE_Sign1_Tagged structure. - """ cose_sign1_tagged_cbor = cbor2.dumps( cbor2.CBORTag(18, cose_sign1), canonical=True, ) - """ - The length of a TSA token can be variable. To ensure that a new token does not exceed - the exclusion boundary for the C2PA structure, we need to align the length of - the Claim Signature using the pad field, similar to the Data Hash Assertion. - """ + # The length of a TSA token can be variable. To ensure that a new token does not exceed + # the exclusion boundary for the C2PA structure, we need to align the length of + # the Claim Signature using the pad field, similar to the Data Hash Assertion. if self.serialized_length == 0: self.serialized_length = len(cose_sign1_tagged_cbor) elif self.serialized_length != len(cose_sign1_tagged_cbor): @@ -162,7 +156,16 @@ def serialize_cose_sign1_tagged_with_alignment( if difference > len(cose_sign1[1]["pad"]): raise ValueError("Difference in length exceeds the predefined pad") - cose_sign1[1]["pad"] = b"\x00" * (len(cose_sign1[1]["pad"]) + difference) + updated_pad_length = len(cose_sign1[1]["pad"]) + difference + + # If a CBOR overflow is not handled, the extra length byte that + # would be added in this case will not be taken into account. + if updated_pad_length > 23: + difference += 1 + elif updated_pad_length > 255: + difference += 2 + + cose_sign1[1]["pad"] = b"\x00" * updated_pad_length cose_sign1_tagged_cbor = cbor2.dumps( cbor2.CBORTag(18, cose_sign1), canonical=True, From 31025d98b4a0270f7af886017aedbc81ddcee8a6 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Thu, 28 May 2026 09:44:45 +0500 Subject: [PATCH 14/57] refactor: #80: rework logic for handling CBOR boundary violations for Data Hash Assertion --- c2pie/c2pa/assertion.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/c2pie/c2pa/assertion.py b/c2pie/c2pa/assertion.py index 00ad5cc..10de072 100644 --- a/c2pie/c2pa/assertion.py +++ b/c2pie/c2pa/assertion.py @@ -112,23 +112,22 @@ def add_full_c2pa_structure_exclusion( current_exclusion_lenght = len(cbor_to_bytes(exclusions)) - difference = current_exclusion_lenght - previous_exclusion_lenght + difference = previous_exclusion_lenght - current_exclusion_lenght - """ - Important! If the Data Hash Assertion is less than 24 bytes or greater than - 255 bytes, the size of the cbor header will change during conversion to cbor - and will occupy less than 2 bytes or more than 2 bytes, correspondingly. - """ + if difference > len(self.schema["pad"]): + raise ValueError("Difference in length exceeds the predefined pad") - schema_length = len(cbor_to_bytes(self.schema)) - additional_byte = 0 # 2 byte CBOR header case + # Important! If the pad is less than 24 bytes the size of the cbor header + # will change during conversion to cbor and will occupy less than 2 bytes. + additional_byte = 0 + updated_pad_length = len(self.schema["pad"]) + difference - if schema_length < 24: # 1 byte CBOR header case - additional_byte = -1 - elif schema_length > 255: # 3 byte CBOR header case - additional_byte = 1 + # If a CBOR overflow is not handled, the extra length byte that + # would be added in this case will not be taken into account. + if updated_pad_length < 24: + additional_byte -= 1 - self.schema["pad"] = self.schema["pad"][difference + additional_byte :] + self.schema["pad"] = b"\x00" * updated_pad_length payload = self.get_payload_from_schema() From c62e2cb1e078e10595ae91090ec129c96d8c4948 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Thu, 28 May 2026 10:15:36 +0500 Subject: [PATCH 15/57] chore: #80: rename serialized_length to serialized_cose_sign1_length so that make it more readability --- c2pie/c2pa/claim_signature.py | 6 +++++- tests/c2pa/claim_signature_test.py | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/c2pie/c2pa/claim_signature.py b/c2pie/c2pa/claim_signature.py index 2fb09ba..c4d9c47 100644 --- a/c2pie/c2pa/claim_signature.py +++ b/c2pie/c2pa/claim_signature.py @@ -64,7 +64,7 @@ def __init__( self.require_tsa = require_tsa self.tsa_log_dir = tsa_log_dir - self.serialized_length = 0 + self.serialized_cose_sign1_length = 0 content_boxes = self._generate_payload() @@ -152,6 +152,10 @@ def serialize_cose_sign1_tagged_with_alignment( self.serialized_length = len(cose_sign1_tagged_cbor) elif self.serialized_length != len(cose_sign1_tagged_cbor): difference = self.serialized_length - len(cose_sign1_tagged_cbor) + if self.serialized_cose_sign1_length == 0: + self.serialized_cose_sign1_length = len(cose_sign1_tagged_cbor) + elif self.serialized_cose_sign1_length != len(cose_sign1_tagged_cbor): + difference = self.serialized_cose_sign1_length - len(cose_sign1_tagged_cbor) if difference > len(cose_sign1[1]["pad"]): raise ValueError("Difference in length exceeds the predefined pad") diff --git a/tests/c2pa/claim_signature_test.py b/tests/c2pa/claim_signature_test.py index c1b0098..c90c20f 100644 --- a/tests/c2pa/claim_signature_test.py +++ b/tests/c2pa/claim_signature_test.py @@ -63,7 +63,7 @@ def test_create_claim_signature_with_non_empty_claim(): def test_serialization_cose_sign1_is_performed_with_alignment(): claim_signature = ClaimSignature.__new__(ClaimSignature) - claim_signature.serialized_length = 0 + claim_signature.serialized_cose_sign1_length = 0 cose_sign1 = [ "protected_header", @@ -76,7 +76,7 @@ def test_serialization_cose_sign1_is_performed_with_alignment(): serialized_cose_sign1_cbor_1 = claim_signature.serialize_cose_sign1_tagged_with_alignment(cose_sign1) - assert claim_signature.serialized_length != 0 + assert claim_signature.serialized_cose_sign1_length != 0 assert cbor2.loads(serialized_cose_sign1_cbor_1).tag == 18 assert cbor2.loads(serialized_cose_sign1_cbor_1).value[1]["pad"] == cose_sign1[1]["pad"] From 0ef4d7b1cdc9eb3ea5a13a89336fca69a7415ee7 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Thu, 28 May 2026 10:16:25 +0500 Subject: [PATCH 16/57] fix: #80: correct of a typo in the condition --- c2pie/c2pa/assertion.py | 2 +- c2pie/c2pa/claim_signature.py | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/c2pie/c2pa/assertion.py b/c2pie/c2pa/assertion.py index 10de072..4463e76 100644 --- a/c2pie/c2pa/assertion.py +++ b/c2pie/c2pa/assertion.py @@ -114,7 +114,7 @@ def add_full_c2pa_structure_exclusion( difference = previous_exclusion_lenght - current_exclusion_lenght - if difference > len(self.schema["pad"]): + if -difference > len(self.schema["pad"]): raise ValueError("Difference in length exceeds the predefined pad") # Important! If the pad is less than 24 bytes the size of the cbor header diff --git a/c2pie/c2pa/claim_signature.py b/c2pie/c2pa/claim_signature.py index c4d9c47..6c4657a 100644 --- a/c2pie/c2pa/claim_signature.py +++ b/c2pie/c2pa/claim_signature.py @@ -148,16 +148,12 @@ def serialize_cose_sign1_tagged_with_alignment( # The length of a TSA token can be variable. To ensure that a new token does not exceed # the exclusion boundary for the C2PA structure, we need to align the length of # the Claim Signature using the pad field, similar to the Data Hash Assertion. - if self.serialized_length == 0: - self.serialized_length = len(cose_sign1_tagged_cbor) - elif self.serialized_length != len(cose_sign1_tagged_cbor): - difference = self.serialized_length - len(cose_sign1_tagged_cbor) if self.serialized_cose_sign1_length == 0: self.serialized_cose_sign1_length = len(cose_sign1_tagged_cbor) elif self.serialized_cose_sign1_length != len(cose_sign1_tagged_cbor): difference = self.serialized_cose_sign1_length - len(cose_sign1_tagged_cbor) - if difference > len(cose_sign1[1]["pad"]): + if -difference > len(cose_sign1[1]["pad"]): raise ValueError("Difference in length exceeds the predefined pad") updated_pad_length = len(cose_sign1[1]["pad"]) + difference From ab941f2941c3f50620f93afc5f3da532225ff2ba Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Thu, 28 May 2026 10:17:34 +0500 Subject: [PATCH 17/57] test: #80: add a test that checks for a ValueError exception if the difference exceeds the length of the pad --- .../assertions/data_hash_assertion_test.py | 23 +++++++++++++++++-- tests/c2pa/claim_signature_test.py | 18 +++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/tests/c2pa/assertions/data_hash_assertion_test.py b/tests/c2pa/assertions/data_hash_assertion_test.py index f89cd3d..ae62e31 100644 --- a/tests/c2pa/assertions/data_hash_assertion_test.py +++ b/tests/c2pa/assertions/data_hash_assertion_test.py @@ -1,3 +1,5 @@ +import pytest + from c2pie.c2pa.assertion import HashDataAssertion from c2pie.utils.assertion_schemas import C2PA_AssertionTypes, cbor_to_bytes from c2pie.utils.content_types import jumbf_content_types @@ -73,12 +75,29 @@ def test_hash_data_assertion_without_additional_exclusions_has_not_exclusions(): def test_set_hash_data_length_updates_exclusion(): data_hash_assertion = HashDataAssertion(hashed_data=HASHED_DATA) - data_hash_assertion.add_full_c2pa_structure_exclusion(CAI_OFFSET, 200) + data_hash_assertion.add_full_c2pa_structure_exclusion( + CAI_OFFSET, + 200, + ) assert data_hash_assertion.schema["exclusions"][0]["length"] == 200 def test_set_hash_data_length_updates_content_box_payload(): data_hash_assertion = HashDataAssertion(hashed_data=HASHED_DATA) - data_hash_assertion.add_full_c2pa_structure_exclusion(CAI_OFFSET, 200) + data_hash_assertion.add_full_c2pa_structure_exclusion( + CAI_OFFSET, + 200, + ) expected_payload = cbor_to_bytes(data_hash_assertion.schema) assert data_hash_assertion.content_boxes[0].payload == expected_payload + + +def test_align_hash_data_with_large_difference_causes_error(): + data_hash_assertion = HashDataAssertion(hashed_data=HASHED_DATA) + data_hash_assertion.schema["pad"] = b"\x00" + + with pytest.raises(ValueError, match="Difference in length exceeds the predefined pad"): + data_hash_assertion.add_full_c2pa_structure_exclusion( + CAI_OFFSET, + 200, + ) diff --git a/tests/c2pa/claim_signature_test.py b/tests/c2pa/claim_signature_test.py index c90c20f..5382bad 100644 --- a/tests/c2pa/claim_signature_test.py +++ b/tests/c2pa/claim_signature_test.py @@ -1,4 +1,5 @@ import cbor2 +import pytest from c2pie.c2pa.assertion import Assertion from c2pie.c2pa.assertion_store import AssertionStore @@ -105,3 +106,20 @@ def test_serialization_cose_sign1_is_performed_with_alignment(): serialized_cose_sign1_cbor_3 = claim_signature.serialize_cose_sign1_tagged_with_alignment(cose_sign1) assert len(serialized_cose_sign1_cbor_1) == len(serialized_cose_sign1_cbor_3) + + +def test_align_cose_sign1_with_large_difference_causes_error(): + claim_signature = ClaimSignature.__new__(ClaimSignature) + claim_signature.serialized_cose_sign1_length = 1 + + cose_sign1 = [ + "protected_header", + { + "pad": b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + }, + "payload", + "signature", + ] + + with pytest.raises(ValueError, match="Difference in length exceeds the predefined pad"): + claim_signature.serialize_cose_sign1_tagged_with_alignment(cose_sign1) From 0edc64898741f7c6122d92d84d3aa431c87b9888 Mon Sep 17 00:00:00 2001 From: Workflow Action Date: Thu, 28 May 2026 05:21:26 +0000 Subject: [PATCH 18/57] docs(readme): bring test coverage score up to date --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index e9a49b8..5edda78 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,9 @@ [![Linting](https://github.com/TourmalineCore/c2pie/actions/workflows/lint-on-pull-request.yml/badge.svg?branch=develop)](https://github.com/TourmalineCore/c2pie/actions/workflows/lint-on-pull-request.yml) [![c2pa](https://img.shields.io/badge/c2pa-v1.4-seagreen.svg)](https://c2pa.org/) -[![coverage](https://img.shields.io/badge/e2e_coverage-71.18%25-yellow)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) -[![coverage](https://img.shields.io/badge/units_coverage-85.02%25-olivedrab)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) -[![coverage](https://img.shields.io/badge/full_coverage-92.15%25-forestgreen)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/e2e_coverage-70.75%25-yellow)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/units_coverage-85.07%25-olivedrab)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/full_coverage-92.17%25-forestgreen)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) [![latest](https://img.shields.io/pypi/v/c2pie?label=latest&colorB=fc8021)](https://pypi.org/project/c2pie/)
From e9a364d80068b94a3e71c4dc47427366269ada6e Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Thu, 28 May 2026 10:26:20 +0500 Subject: [PATCH 19/57] docs: #80: add a clarifying comment for the additional_exclusions parameter in the Data Hash Assertion --- c2pie/c2pa/assertion.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/c2pie/c2pa/assertion.py b/c2pie/c2pa/assertion.py index 4463e76..e81bd66 100644 --- a/c2pie/c2pa/assertion.py +++ b/c2pie/c2pa/assertion.py @@ -70,12 +70,14 @@ class HashDataAssertion(Assertion): def __init__( self, hashed_data: bytes, - additional_exclusions: list[dict[str, int]] | None = None, + # TODO: Need to add handling for exclusions during + # hash calculation in order to use additional_exclusions + # additional_exclusions: list[dict[str, int]] | None = None, ): exclusions: list[dict[str, int]] = [] - if additional_exclusions: - exclusions.extend(additional_exclusions) + # if additional_exclusions: + # exclusions.extend(additional_exclusions) schema: dict[str, Any] = { "exclusions": exclusions, From b3f25df7111206019555529a79697a2721d409de Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Thu, 28 May 2026 10:30:01 +0500 Subject: [PATCH 20/57] test: #80: add a test to verify the CBOR tag in COSE_Sign1_Tagged --- tests/c2pa/claim_signature_test.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tests/c2pa/claim_signature_test.py b/tests/c2pa/claim_signature_test.py index 5382bad..e26ae35 100644 --- a/tests/c2pa/claim_signature_test.py +++ b/tests/c2pa/claim_signature_test.py @@ -78,7 +78,6 @@ def test_serialization_cose_sign1_is_performed_with_alignment(): serialized_cose_sign1_cbor_1 = claim_signature.serialize_cose_sign1_tagged_with_alignment(cose_sign1) assert claim_signature.serialized_cose_sign1_length != 0 - assert cbor2.loads(serialized_cose_sign1_cbor_1).tag == 18 assert cbor2.loads(serialized_cose_sign1_cbor_1).value[1]["pad"] == cose_sign1[1]["pad"] cose_sign1 = [ @@ -123,3 +122,21 @@ def test_align_cose_sign1_with_large_difference_causes_error(): with pytest.raises(ValueError, match="Difference in length exceeds the predefined pad"): claim_signature.serialize_cose_sign1_tagged_with_alignment(cose_sign1) + + +def test_check(): + claim_signature = ClaimSignature.__new__(ClaimSignature) + claim_signature.serialized_cose_sign1_length = 0 + + cose_sign1 = [ + "protected_header", + { + "pad": b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + }, + "payload", + "signature", + ] + + serialized_cose_sign1_cbor = claim_signature.serialize_cose_sign1_tagged_with_alignment(cose_sign1) + + assert cbor2.loads(serialized_cose_sign1_cbor).tag == 18 From 3c342ae37afddaa7a94ba3e42dece80211dec44d Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Thu, 28 May 2026 11:38:50 +0500 Subject: [PATCH 21/57] fix: #80: fix for an error related to the pad not changing when the CBOR boundary is violated --- c2pie/c2pa/assertion.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/c2pie/c2pa/assertion.py b/c2pie/c2pa/assertion.py index e81bd66..2e06dcf 100644 --- a/c2pie/c2pa/assertion.py +++ b/c2pie/c2pa/assertion.py @@ -119,15 +119,14 @@ def add_full_c2pa_structure_exclusion( if -difference > len(self.schema["pad"]): raise ValueError("Difference in length exceeds the predefined pad") - # Important! If the pad is less than 24 bytes the size of the cbor header + # If the pad is less than 24 bytes the size of the cbor header # will change during conversion to cbor and will occupy less than 2 bytes. - additional_byte = 0 updated_pad_length = len(self.schema["pad"]) + difference # If a CBOR overflow is not handled, the extra length byte that # would be added in this case will not be taken into account. if updated_pad_length < 24: - additional_byte -= 1 + updated_pad_length -= 1 self.schema["pad"] = b"\x00" * updated_pad_length From ef985d710a9c85c891e0ee2da97abc76a394b5b5 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Thu, 28 May 2026 11:40:20 +0500 Subject: [PATCH 22/57] test: #80: move the test for checking additional_exclusions to the appropriate file; comment them out --- tests/c2pa/assertions/assertion_test.py | 13 ----- .../assertions/data_hash_assertion_test.py | 53 ++++++++++++------- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/tests/c2pa/assertions/assertion_test.py b/tests/c2pa/assertions/assertion_test.py index bb77c73..823d443 100644 --- a/tests/c2pa/assertions/assertion_test.py +++ b/tests/c2pa/assertions/assertion_test.py @@ -77,16 +77,3 @@ def test_assertion_content_boxes_not_empty(): # noqa: F811 actions_assertion = Assertion(C2PA_AssertionTypes.actions, {}) assert len(actions_assertion.content_boxes) != 0 - -def test_additional_extensions_adding_for_hash_data_assertions(): - additional_exclusion = { - "start": 100, - "length": 1000, - } - - data_hash_assertion = HashDataAssertion( - hashed_data=b"", - additional_exclusions=[additional_exclusion], - ) - - assert additional_exclusion in data_hash_assertion.schema["exclusions"] diff --git a/tests/c2pa/assertions/data_hash_assertion_test.py b/tests/c2pa/assertions/data_hash_assertion_test.py index ae62e31..2b44b25 100644 --- a/tests/c2pa/assertions/data_hash_assertion_test.py +++ b/tests/c2pa/assertions/data_hash_assertion_test.py @@ -46,26 +46,39 @@ def test_hash_data_assertion_serializes_as_cbor(): assert data_hash_assertion.content_boxes[0].payload == expected_payload -def test_hash_data_assertion_with_additional_exclusions(): - additional = [ - { - "start": 100, - "length": 200, - }, - ] - - data_hash_assertion = HashDataAssertion( - hashed_data=HASHED_DATA, - additional_exclusions=additional, - ) - - exclusions = data_hash_assertion.schema["exclusions"] - - assert len(exclusions) == 1 - assert exclusions[0] == { - "start": 100, - "length": 200, - } +# def test_hash_data_assertion_with_additional_exclusions(): +# additional = [ +# { +# "start": 100, +# "length": 200, +# }, +# ] + +# data_hash_assertion = HashDataAssertion( +# hashed_data=HASHED_DATA, +# additional_exclusions=additional, +# ) + +# exclusions = data_hash_assertion.schema["exclusions"] + +# assert len(exclusions) == 1 +# assert exclusions[0] == { +# "start": 100, +# "length": 200, +# } + +# def test_additional_extensions_adding_for_hash_data_assertions(): +# additional_exclusion = { +# "start": 100, +# "length": 1000, +# } + +# data_hash_assertion = HashDataAssertion( +# hashed_data=b"", +# additional_exclusions=[additional_exclusion], +# ) + +# assert additional_exclusion in data_hash_assertion.schema["exclusions"] def test_hash_data_assertion_without_additional_exclusions_has_not_exclusions(): From 36ec0592cf13f100d6dd05d7521a30bd92dc3752 Mon Sep 17 00:00:00 2001 From: Workflow Action Date: Thu, 28 May 2026 06:43:15 +0000 Subject: [PATCH 23/57] docs(readme): bring test coverage score up to date --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 5edda78..9a9626d 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,9 @@ [![Linting](https://github.com/TourmalineCore/c2pie/actions/workflows/lint-on-pull-request.yml/badge.svg?branch=develop)](https://github.com/TourmalineCore/c2pie/actions/workflows/lint-on-pull-request.yml) [![c2pa](https://img.shields.io/badge/c2pa-v1.4-seagreen.svg)](https://c2pa.org/) -[![coverage](https://img.shields.io/badge/e2e_coverage-70.75%25-yellow)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) -[![coverage](https://img.shields.io/badge/units_coverage-85.07%25-olivedrab)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) -[![coverage](https://img.shields.io/badge/full_coverage-92.17%25-forestgreen)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/e2e_coverage-70.76%25-yellow)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/units_coverage-85.02%25-olivedrab)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/full_coverage-92.15%25-forestgreen)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) [![latest](https://img.shields.io/pypi/v/c2pie?label=latest&colorB=fc8021)](https://pypi.org/project/c2pie/)
From 0b0d6786ff7fc609aa8f82410c061da5747d10c2 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Thu, 28 May 2026 13:45:52 +0500 Subject: [PATCH 24/57] docs: #80: add a comment explaining how JPG_SEGMENT_MAX_PAYLOAD_LENGTH was determined --- c2pie/c2pa_injection/jpg_injection.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/c2pie/c2pa_injection/jpg_injection.py b/c2pie/c2pa_injection/jpg_injection.py index 957ede1..933d9c8 100644 --- a/c2pie/c2pa_injection/jpg_injection.py +++ b/c2pie/c2pa_injection/jpg_injection.py @@ -1,3 +1,11 @@ +# JPG_SEGMENT_MAX_PAYLOAD_LENGTH = +# 65535 (max segment length) +# - 2 (bytes of length) +# - 2 (bytes of CI) +# - 2 (bytes of EN) +# - 4 (bytes of Z) +# - 4 (bytes of LBox) +# - 4 (bytes of TBox) JPG_SEGMENT_MAX_PAYLOAD_LENGTH = 65517 From 6cdb5ac6bb5ee1af156c187dd24e12dd508bcfff Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Thu, 28 May 2026 13:47:45 +0500 Subject: [PATCH 25/57] refactor: #80: move logic for emplacing the APP11 storage bytes into a separate method; --- c2pie/c2pa_injection/jpg_injection.py | 35 ++++++++++++++++++++++++ c2pie/interface.py | 38 +++++---------------------- 2 files changed, 42 insertions(+), 31 deletions(-) diff --git a/c2pie/c2pa_injection/jpg_injection.py b/c2pie/c2pa_injection/jpg_injection.py index 933d9c8..a25182b 100644 --- a/c2pie/c2pa_injection/jpg_injection.py +++ b/c2pie/c2pa_injection/jpg_injection.py @@ -1,3 +1,5 @@ +from c2pie.c2pa.manifest_store import ManifestStore + # JPG_SEGMENT_MAX_PAYLOAD_LENGTH = # 65535 (max segment length) # - 2 (bytes of length) @@ -106,3 +108,36 @@ def serialize(self): self.serialized_length = len(serialized_storage_data) return serialized_storage_data + + +def create_and_serialize_app11_storage( + manifest_store: ManifestStore, +) -> bytes: + serialized_manifest_store = manifest_store.serialize() + + app11_storage = JpgSegmentApp11Storage( + app11_segment_box_length=manifest_store.get_length(), + app11_segment_box_type=manifest_store.get_type(), + payload=serialized_manifest_store, + ) + + return app11_storage.serialize() + + +def emplace_manifest_into_jpeg( + content_bytes: bytes, + manifest_store: ManifestStore, + c2pa_offset: int, +) -> bytes: + serialized_app11_storage = create_and_serialize_app11_storage(manifest_store) + + serialized_app11_storage_lenght = len(serialized_app11_storage) + + manifest_store.add_full_c2pa_structure_exclusion( + c2pa_offset, + serialized_app11_storage_lenght, + ) + + tail = create_and_serialize_app11_storage(manifest_store) + + return content_bytes[:c2pa_offset] + tail + content_bytes[c2pa_offset:] diff --git a/c2pie/interface.py b/c2pie/interface.py index 9490516..ac1394f 100644 --- a/c2pie/interface.py +++ b/c2pie/interface.py @@ -12,7 +12,7 @@ from c2pie.c2pa.claim_signature import ClaimSignature from c2pie.c2pa.manifest import Manifest from c2pie.c2pa.manifest_store import ManifestStore -from c2pie.c2pa_injection.jpg_injection import JpgSegmentApp11Storage +from c2pie.c2pa_injection.jpg_injection import emplace_manifest_into_jpeg from c2pie.c2pa_injection.pdf_injection import emplace_manifest_into_pdf from c2pie.jumbf_boxes.box import Box from c2pie.utils.assertion_schemas import C2PA_AssertionTypes @@ -118,40 +118,16 @@ def c2pie_EmplaceManifest( manifest_store: ManifestStore, ) -> bytes: if format_type == C2PA_ContentTypes.jpg or format_type == C2PA_ContentTypes.jpeg: - serialized_manifest_store = manifest_store.serialize() - - app11_storage = JpgSegmentApp11Storage( - app11_segment_box_length=manifest_store.get_length(), - app11_segment_box_type=manifest_store.get_type(), - payload=serialized_manifest_store, - ) - - app11_storage.serialize() - - serialized_app11_storage_lenght = app11_storage.get_serialized_length() - - manifest_store.add_full_c2pa_structure_exclusion( + return emplace_manifest_into_jpeg( + content_bytes, + manifest_store, c2pa_offset, - serialized_app11_storage_lenght, ) - - serialized_manifest_store = manifest_store.serialize() - - app11_storage = JpgSegmentApp11Storage( - app11_segment_box_length=manifest_store.get_length(), - app11_segment_box_type=manifest_store.get_type(), - payload=serialized_manifest_store, - ) - - tail = app11_storage.serialize() - - return content_bytes[:c2pa_offset] + tail + content_bytes[c2pa_offset:] - - if format_type == C2PA_ContentTypes.pdf: + elif format_type == C2PA_ContentTypes.pdf: return emplace_manifest_into_pdf( content_bytes, manifest_store, c2pa_offset, ) - - raise ValueError(f"Unsupported content type {format_type}!") + else: + raise ValueError(f"Unsupported content type {format_type}!") From c9497920224265bcce5304dd9b667aff14b470d9 Mon Sep 17 00:00:00 2001 From: Workflow Action Date: Thu, 28 May 2026 08:52:21 +0000 Subject: [PATCH 26/57] docs(readme): bring test coverage score up to date --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9a9626d..100acee 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,9 @@ [![Linting](https://github.com/TourmalineCore/c2pie/actions/workflows/lint-on-pull-request.yml/badge.svg?branch=develop)](https://github.com/TourmalineCore/c2pie/actions/workflows/lint-on-pull-request.yml) [![c2pa](https://img.shields.io/badge/c2pa-v1.4-seagreen.svg)](https://c2pa.org/) -[![coverage](https://img.shields.io/badge/e2e_coverage-70.76%25-yellow)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) -[![coverage](https://img.shields.io/badge/units_coverage-85.02%25-olivedrab)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) -[![coverage](https://img.shields.io/badge/full_coverage-92.15%25-forestgreen)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/e2e_coverage-70.75%25-yellow)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/units_coverage-84.96%25-olivedrab)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/full_coverage-92.07%25-forestgreen)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) [![latest](https://img.shields.io/pypi/v/c2pie?label=latest&colorB=fc8021)](https://pypi.org/project/c2pie/)
From 849f3a6b3464013b40cbf3b1492387cc695cb783 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Thu, 28 May 2026 14:20:31 +0500 Subject: [PATCH 27/57] refactor: #80: minimize pad size for unprotected header, since the difference is no more than 3 bytes --- c2pie/c2pa/claim_signature.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c2pie/c2pa/claim_signature.py b/c2pie/c2pa/claim_signature.py index 6c4657a..93a0a5d 100644 --- a/c2pie/c2pa/claim_signature.py +++ b/c2pie/c2pa/claim_signature.py @@ -131,7 +131,7 @@ def _generate_unprotected_header(self, serialized_sig_structure: bytes) -> bytes }, ], }, - "pad": b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + "pad": b"\x00\x00\x00\x00", } return unprotected_header From 9d3ea986e2828ebb932031fbbe8e23129b02802e Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Thu, 28 May 2026 14:45:30 +0500 Subject: [PATCH 28/57] test: #80: update tests after changing pad length --- tests/c2pa/claim_signature_test.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/c2pa/claim_signature_test.py b/tests/c2pa/claim_signature_test.py index e26ae35..a87e636 100644 --- a/tests/c2pa/claim_signature_test.py +++ b/tests/c2pa/claim_signature_test.py @@ -69,7 +69,7 @@ def test_serialization_cose_sign1_is_performed_with_alignment(): cose_sign1 = [ "protected_header", { - "pad": b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + "pad": b"\x00\x00\x00\x00", }, "payload", "signature", @@ -83,7 +83,7 @@ def test_serialization_cose_sign1_is_performed_with_alignment(): cose_sign1 = [ "protected_header", { - "pad": b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + "pad": b"\x00\x00\x00\x00", }, "payload", "signature2", @@ -96,7 +96,7 @@ def test_serialization_cose_sign1_is_performed_with_alignment(): cose_sign1 = [ "protected_header", { - "pad": b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + "pad": b"\x00\x00\x00\x00", }, "", "signature", @@ -114,7 +114,7 @@ def test_align_cose_sign1_with_large_difference_causes_error(): cose_sign1 = [ "protected_header", { - "pad": b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + "pad": b"\x00\x00\x00\x00", }, "payload", "signature", @@ -131,7 +131,7 @@ def test_check(): cose_sign1 = [ "protected_header", { - "pad": b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + "pad": b"\x00\x00\x00\x00", }, "payload", "signature", From 09217a07cc8f006e41ea8f0ddd8e6cef76d20ebc Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Thu, 28 May 2026 14:47:35 +0500 Subject: [PATCH 29/57] fix: #80: correction of a typo in the calculation of the additional length byte in the CBOR header --- c2pie/c2pa/claim_signature.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/c2pie/c2pa/claim_signature.py b/c2pie/c2pa/claim_signature.py index 93a0a5d..a31d521 100644 --- a/c2pie/c2pa/claim_signature.py +++ b/c2pie/c2pa/claim_signature.py @@ -161,9 +161,9 @@ def serialize_cose_sign1_tagged_with_alignment( # If a CBOR overflow is not handled, the extra length byte that # would be added in this case will not be taken into account. if updated_pad_length > 23: - difference += 1 + updated_pad_length += 1 elif updated_pad_length > 255: - difference += 2 + updated_pad_length += 2 cose_sign1[1]["pad"] = b"\x00" * updated_pad_length cose_sign1_tagged_cbor = cbor2.dumps( From 5f71f19c5467742d494bc9fc77f149216ea43d6c Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Thu, 28 May 2026 16:34:52 +0500 Subject: [PATCH 30/57] refactor: #80: refactoring --- c2pie/c2pa/assertion.py | 6 +----- c2pie/c2pa/claim_signature.py | 4 +--- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/c2pie/c2pa/assertion.py b/c2pie/c2pa/assertion.py index 2e06dcf..64aa91e 100644 --- a/c2pie/c2pa/assertion.py +++ b/c2pie/c2pa/assertion.py @@ -83,11 +83,7 @@ def __init__( "exclusions": exclusions, "alg": "sha256", "hash": hashed_data, - "pad": b"" - + b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" - + b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" - + b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" - + b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + "pad": b"\x00" * 64, } super().__init__( diff --git a/c2pie/c2pa/claim_signature.py b/c2pie/c2pa/claim_signature.py index a31d521..580f3be 100644 --- a/c2pie/c2pa/claim_signature.py +++ b/c2pie/c2pa/claim_signature.py @@ -131,7 +131,7 @@ def _generate_unprotected_header(self, serialized_sig_structure: bytes) -> bytes }, ], }, - "pad": b"\x00\x00\x00\x00", + "pad": b"\x00" * 4, } return unprotected_header @@ -162,8 +162,6 @@ def serialize_cose_sign1_tagged_with_alignment( # would be added in this case will not be taken into account. if updated_pad_length > 23: updated_pad_length += 1 - elif updated_pad_length > 255: - updated_pad_length += 2 cose_sign1[1]["pad"] = b"\x00" * updated_pad_length cose_sign1_tagged_cbor = cbor2.dumps( From 55793d34acc18c011c535c46ddc82445e383f261 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Thu, 28 May 2026 16:35:34 +0500 Subject: [PATCH 31/57] test: #80: add tests to verify the calculation of the CBOR length in bytes --- .../assertions/data_hash_assertion_test.py | 39 ++++++++++++++++ tests/c2pa/claim_signature_test.py | 44 ++++++++++++++++++- 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/tests/c2pa/assertions/data_hash_assertion_test.py b/tests/c2pa/assertions/data_hash_assertion_test.py index 2b44b25..765b0f5 100644 --- a/tests/c2pa/assertions/data_hash_assertion_test.py +++ b/tests/c2pa/assertions/data_hash_assertion_test.py @@ -114,3 +114,42 @@ def test_align_hash_data_with_large_difference_causes_error(): CAI_OFFSET, 200, ) + + +def test_exceed_cbor_23_bytes_limit_add_1_byte_to_lenght(): + data_hash_assertion = HashDataAssertion(hashed_data=HASHED_DATA) + + # Example empty list (exclusions) serialized in CBOR: + # [] + # + # has 1 bytes in length + + # We need to set the exclusion so that the difference + # is less than 24 bytes: + # current pad - x <= 23 + # ~ 64 > x >= 41 (64 - 23) + + # Example not empty list (exclusions) serialized in CBOR: + # [ + # { + # "start": 2, + # "length": "", + # } + # ] + # + # has 17 bytes in length + + # not empty list - empty list = difference + # x - 1 = 41 + # x = 42 + # 42 - 17 - 1 (CBOR header additional byte, because pad > 23) = 24 + fake_payload = b"\x00" * 24 + + data_hash_assertion.add_full_c2pa_structure_exclusion( + CAI_OFFSET, + fake_payload, + ) + + # current pad - difference - additional_byte = aligned pad + # 64 - 41 = 23 - 1 (additional_byte) = 22 + assert len(data_hash_assertion.schema["pad"]) == 22 diff --git a/tests/c2pa/claim_signature_test.py b/tests/c2pa/claim_signature_test.py index a87e636..eddf75b 100644 --- a/tests/c2pa/claim_signature_test.py +++ b/tests/c2pa/claim_signature_test.py @@ -124,7 +124,7 @@ def test_align_cose_sign1_with_large_difference_causes_error(): claim_signature.serialize_cose_sign1_tagged_with_alignment(cose_sign1) -def test_check(): +def test_cose_sign1_tagged_tag_value_is_18(): claim_signature = ClaimSignature.__new__(ClaimSignature) claim_signature.serialized_cose_sign1_length = 0 @@ -140,3 +140,45 @@ def test_check(): serialized_cose_sign1_cbor = claim_signature.serialize_cose_sign1_tagged_with_alignment(cose_sign1) assert cbor2.loads(serialized_cose_sign1_cbor).tag == 18 + + +def test_exceed_cbor_limit_add_1_bytes_to_lenght(): + claim_signature = ClaimSignature.__new__(ClaimSignature) + + # We must ensure that the difference is such + # that the pad size is greater than 23 bytes. + + # Current length of cose_sign1 serialized in CBOR is 47 bytes. + cose_sign1 = [ + "protected_header", + { + "pad": b"\x00" * 4, + }, + "payload", + "signature", + ] + + # cose_sign1 CBOR encoded + CBOR limit - current pad + # ~ 47 + 24 - 4 + claim_signature.serialized_cose_sign1_length = 67 + + serialized_cose_sign1_cbor = claim_signature.serialize_cose_sign1_tagged_with_alignment(cose_sign1) + + assert len(cbor2.loads(serialized_cose_sign1_cbor).value[1]["pad"]) == 25 + + # # We must ensure that the difference is such + # # that the pad size is greater than 255 bytes. + + # # Current length of cose_sign1 serialized in CBOR is 108 bytes. + # cose_sign1 = [ + # "protected_header", + # { + # "pad": b"\x00" * 64, + # }, + # "payload", + # "signature", + # ] + + # serialized_cose_sign1_cbor = claim_signature.serialize_cose_sign1_tagged_with_alignment(cose_sign1) + + # assert len(cbor2.loads(serialized_cose_sign1_cbor).value[1]["pad"]) == 42 From 8e316a66c878d619ea9b6d24e2e97d98ba003941 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Thu, 28 May 2026 16:37:10 +0500 Subject: [PATCH 32/57] docs: #80: add clarifying comments --- tests/c2pa/assertions/data_hash_assertion_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/c2pa/assertions/data_hash_assertion_test.py b/tests/c2pa/assertions/data_hash_assertion_test.py index 765b0f5..e8bdf9b 100644 --- a/tests/c2pa/assertions/data_hash_assertion_test.py +++ b/tests/c2pa/assertions/data_hash_assertion_test.py @@ -126,7 +126,7 @@ def test_exceed_cbor_23_bytes_limit_add_1_byte_to_lenght(): # We need to set the exclusion so that the difference # is less than 24 bytes: - # current pad - x <= 23 + # current pad - x (difference) <= 23 # ~ 64 > x >= 41 (64 - 23) # Example not empty list (exclusions) serialized in CBOR: @@ -141,7 +141,7 @@ def test_exceed_cbor_23_bytes_limit_add_1_byte_to_lenght(): # not empty list - empty list = difference # x - 1 = 41 - # x = 42 + # x (not empty list) = 42 # 42 - 17 - 1 (CBOR header additional byte, because pad > 23) = 24 fake_payload = b"\x00" * 24 From dd14037852b6268e4a227e6a2eb20103d86aa849 Mon Sep 17 00:00:00 2001 From: Workflow Action Date: Thu, 28 May 2026 11:39:51 +0000 Subject: [PATCH 33/57] docs(readme): bring test coverage score up to date --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 100acee..a6fd4c1 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,9 @@ [![Linting](https://github.com/TourmalineCore/c2pie/actions/workflows/lint-on-pull-request.yml/badge.svg?branch=develop)](https://github.com/TourmalineCore/c2pie/actions/workflows/lint-on-pull-request.yml) [![c2pa](https://img.shields.io/badge/c2pa-v1.4-seagreen.svg)](https://c2pa.org/) -[![coverage](https://img.shields.io/badge/e2e_coverage-70.75%25-yellow)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) -[![coverage](https://img.shields.io/badge/units_coverage-84.96%25-olivedrab)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) -[![coverage](https://img.shields.io/badge/full_coverage-92.07%25-forestgreen)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/e2e_coverage-70.90%25-yellow)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/units_coverage-85.24%25-olivedrab)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/full_coverage-92.36%25-forestgreen)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) [![latest](https://img.shields.io/pypi/v/c2pie?label=latest&colorB=fc8021)](https://pypi.org/project/c2pie/)
From 375074ac03d8620f80e5baed819a5dba0cfa99d7 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Thu, 28 May 2026 16:41:23 +0500 Subject: [PATCH 34/57] fix: #80: formatting corrections --- tests/c2pa/assertions/assertion_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/c2pa/assertions/assertion_test.py b/tests/c2pa/assertions/assertion_test.py index 823d443..2de9004 100644 --- a/tests/c2pa/assertions/assertion_test.py +++ b/tests/c2pa/assertions/assertion_test.py @@ -76,4 +76,3 @@ def test_serialize_cbor_assertion(): def test_assertion_content_boxes_not_empty(): # noqa: F811 actions_assertion = Assertion(C2PA_AssertionTypes.actions, {}) assert len(actions_assertion.content_boxes) != 0 - From 778febfb2bb318f7be9d665aac0bfb26859d61d2 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Thu, 28 May 2026 16:45:02 +0500 Subject: [PATCH 35/57] fix: #80: formatting corrections --- tests/c2pa/assertions/assertion_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/c2pa/assertions/assertion_test.py b/tests/c2pa/assertions/assertion_test.py index 2de9004..66d2795 100644 --- a/tests/c2pa/assertions/assertion_test.py +++ b/tests/c2pa/assertions/assertion_test.py @@ -1,4 +1,4 @@ -from c2pie.c2pa.assertion import Assertion, HashDataAssertion +from c2pie.c2pa.assertion import Assertion from c2pie.utils.assertion_schemas import C2PA_AssertionTypes, cbor_to_bytes, json_to_bytes from c2pie.utils.content_types import jumbf_content_types From 6517e4b26ca0dc26b55d4f10c9d5475c3b3b10cc Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Thu, 28 May 2026 17:14:33 +0500 Subject: [PATCH 36/57] test: #80: add a stress test to verify the signature --- tests/c2pa/e2e_test.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/c2pa/e2e_test.py b/tests/c2pa/e2e_test.py index 2e1ba56..1c3058b 100644 --- a/tests/c2pa/e2e_test.py +++ b/tests/c2pa/e2e_test.py @@ -112,3 +112,39 @@ def test_e2e_signing_with_c2patool_validation(tmp_path): manifests_list = list(manifests.values()) assert manifests_list, "empty manifests list after normalization" + + +@pytest.mark.e2e +@pytest.mark.parametrize( + "iteration", + range(30), +) +def test_e2e_signature_stability( + iteration, + tmp_path, +): + if not has_c2patool(): + pytest.skip("c2patool not available") + + if not sign_file: + pytest.skip("sign_file function not available yet") + + for content_type in C2PA_ContentTypes: + input_file = tmp_path / f"in.{content_type.name}" + output_file = tmp_path / f"out.{content_type.name}" + + for test_file in test_files_by_extension[content_type.name]: + copy_test_file( + test_file, + input_file, + ) + + sign_file( + input_path=input_file, + output_path=output_file, + ) + + report = _validate_using_c2patool_and_return_json_report(output_file) + validation_state = report.get("validation_state") + + assert validation_state == "Valid" From 97cd1a4902850be99d1ebc271e14a66691fec2f7 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Thu, 28 May 2026 17:17:25 +0500 Subject: [PATCH 37/57] fix: #80: fix an error in the calculation of incorrect exclusions in broken pdf case --- c2pie/c2pa_injection/pdf_injection.py | 23 ++++++++++++++++++----- c2pie/signing.py | 2 ++ 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/c2pie/c2pa_injection/pdf_injection.py b/c2pie/c2pa_injection/pdf_injection.py index 0375d08..dd8167e 100644 --- a/c2pie/c2pa_injection/pdf_injection.py +++ b/c2pie/c2pa_injection/pdf_injection.py @@ -63,6 +63,23 @@ def _xref_entry(offset: int) -> bytes: return f"{offset:010d} 00000 n \n".encode("ascii") +def prepare_pdf_bytes(content: bytes) -> bytes: + """ + Returns the PDF bytes ready for signing repaired via pypdf if the raw + bytes lack a parseable structure. + + Must be called before hashing so that the hash and cai_offset are + computed against the same byte sequence that will be written to disk. + """ + try: + _scan_pdf_to_get_its_data(content) + return content + except ValueError: + repaired = _read_pdf_using_pypdf(content) + _scan_pdf_to_get_its_data(repaired) + return repaired + + def emplace_manifest_into_pdf( initial_content: bytes, manifest_store: ManifestStore, @@ -75,11 +92,7 @@ def emplace_manifest_into_pdf( - Exception c2pa.hash.data: start == len(initial_content), length == length of the entire tail (see C2PA 2.2). - Sign the claim, build the jumbf store, place it as EmbeddedFile, write xref/trailer correctly. """ - try: - info = _scan_pdf_to_get_its_data(initial_content) - except ValueError: - initial_content = _read_pdf_using_pypdf(initial_content=initial_content) - info = _scan_pdf_to_get_its_data(initial_content) + info = _scan_pdf_to_get_its_data(initial_content) initial_length_of_file = len(initial_content) pointer_on_previous_xref = info.startxref diff --git a/c2pie/signing.py b/c2pie/signing.py index 37ae4cc..1ed0df1 100644 --- a/c2pie/signing.py +++ b/c2pie/signing.py @@ -3,6 +3,7 @@ from pathlib import Path from typing import Literal +from c2pie.c2pa_injection.pdf_injection import prepare_pdf_bytes from c2pie.c2pa_parsing.jumbf_parsing import extract_manifest_boxes, get_active_manifest_uuid from c2pie.c2pa_parsing.manifest_extractor import extract_manifest_store_bytes from c2pie.interface import ( @@ -138,6 +139,7 @@ def sign_file( file_type: C2PA_ContentTypes = _get_content_type_by_filepath(input_path) if file_type.name == "pdf": + raw_bytes = prepare_pdf_bytes(raw_bytes) cai_offset = len(raw_bytes) else: cai_offset = 2 From ffd70bf6833eb18f4c5b6af3bdd9c99b2a03cdf1 Mon Sep 17 00:00:00 2001 From: Workflow Action Date: Thu, 28 May 2026 12:21:58 +0000 Subject: [PATCH 38/57] docs(readme): bring test coverage score up to date --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a6fd4c1..e04328e 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,9 @@ [![Linting](https://github.com/TourmalineCore/c2pie/actions/workflows/lint-on-pull-request.yml/badge.svg?branch=develop)](https://github.com/TourmalineCore/c2pie/actions/workflows/lint-on-pull-request.yml) [![c2pa](https://img.shields.io/badge/c2pa-v1.4-seagreen.svg)](https://c2pa.org/) -[![coverage](https://img.shields.io/badge/e2e_coverage-70.90%25-yellow)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) -[![coverage](https://img.shields.io/badge/units_coverage-85.24%25-olivedrab)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) -[![coverage](https://img.shields.io/badge/full_coverage-92.36%25-forestgreen)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/e2e_coverage-71.08%25-yellow)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/units_coverage-84.82%25-olivedrab)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/full_coverage-92.41%25-forestgreen)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) [![latest](https://img.shields.io/pypi/v/c2pie?label=latest&colorB=fc8021)](https://pypi.org/project/c2pie/)
From 1055767c4a880324c325403d412b867eeedb68e4 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Fri, 29 May 2026 08:21:58 +0500 Subject: [PATCH 39/57] chore: #80: remove unused file --- c2pie/c2pa/config.py | 1 - 1 file changed, 1 deletion(-) delete mode 100644 c2pie/c2pa/config.py diff --git a/c2pie/c2pa/config.py b/c2pie/c2pa/config.py deleted file mode 100644 index f2b0abc..0000000 --- a/c2pie/c2pa/config.py +++ /dev/null @@ -1 +0,0 @@ -RETRY_SIGNATURE = 8 # max retries, might be terminated earliers From ed82a61f453ed5bff74fa126035b8f0dd6479317 Mon Sep 17 00:00:00 2001 From: Workflow Action Date: Fri, 29 May 2026 03:26:08 +0000 Subject: [PATCH 40/57] docs(readme): bring test coverage score up to date --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index e04328e..a700715 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,9 @@ [![Linting](https://github.com/TourmalineCore/c2pie/actions/workflows/lint-on-pull-request.yml/badge.svg?branch=develop)](https://github.com/TourmalineCore/c2pie/actions/workflows/lint-on-pull-request.yml) [![c2pa](https://img.shields.io/badge/c2pa-v1.4-seagreen.svg)](https://c2pa.org/) -[![coverage](https://img.shields.io/badge/e2e_coverage-71.08%25-yellow)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) -[![coverage](https://img.shields.io/badge/units_coverage-84.82%25-olivedrab)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) -[![coverage](https://img.shields.io/badge/full_coverage-92.41%25-forestgreen)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/e2e_coverage-71.15%25-yellow)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/units_coverage-84.91%25-olivedrab)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/full_coverage-92.51%25-forestgreen)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) [![latest](https://img.shields.io/pypi/v/c2pie?label=latest&colorB=fc8021)](https://pypi.org/project/c2pie/)
From 218b1a31ff09eed5f02218e1e05000d08a93e86b Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Mon, 1 Jun 2026 11:50:18 +0500 Subject: [PATCH 41/57] docs: #80: add clarifying comment about additional byte of length for serialized exclusions array --- c2pie/c2pa/assertion.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/c2pie/c2pa/assertion.py b/c2pie/c2pa/assertion.py index 64aa91e..ce2205a 100644 --- a/c2pie/c2pa/assertion.py +++ b/c2pie/c2pa/assertion.py @@ -108,6 +108,9 @@ def add_full_c2pa_structure_exclusion( ] ) + # NOTE: If the number of exclusions exceeds 23, an additional length byte + # will be added to the CBOR header of serialized exclusions array. This byte + # is included in the recalculation of the serialized exclusions. current_exclusion_lenght = len(cbor_to_bytes(exclusions)) difference = previous_exclusion_lenght - current_exclusion_lenght From 388f10ac5123aa99688986aa01da30fe1194fd44 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Mon, 1 Jun 2026 11:51:29 +0500 Subject: [PATCH 42/57] test: #80: add test for check redundant lenght byte was removed from pad when exclusions array length exceed 23 --- .../c2pa/assertions/data_hash_assertion_test.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/c2pa/assertions/data_hash_assertion_test.py b/tests/c2pa/assertions/data_hash_assertion_test.py index e8bdf9b..f126d55 100644 --- a/tests/c2pa/assertions/data_hash_assertion_test.py +++ b/tests/c2pa/assertions/data_hash_assertion_test.py @@ -153,3 +153,20 @@ def test_exceed_cbor_23_bytes_limit_add_1_byte_to_lenght(): # current pad - difference - additional_byte = aligned pad # 64 - 41 = 23 - 1 (additional_byte) = 22 assert len(data_hash_assertion.schema["pad"]) == 22 + + +def test_data_hash_assertion_exclusions_more_then_23(): + data_hash_assertion = HashDataAssertion(hashed_data=HASHED_DATA) + data_hash_assertion.schema = { + "exclusions": [{"start": 0, "length": 0}] * 23, + "alg": "sha256", + "hash": b"\x00\x00\x00", + "pad": b"\x00" * 64, + } + + data_hash_assertion.add_full_c2pa_structure_exclusion( + CAI_OFFSET, + 0, + ) + + assert len(data_hash_assertion.schema["pad"]) == 47 From 743808f7021fa4473540d54fd81ca2013831f122 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Mon, 1 Jun 2026 13:03:29 +0500 Subject: [PATCH 43/57] test: #80: change hash in the test schema override for Data Hash Assertion --- tests/c2pa/assertions/data_hash_assertion_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/c2pa/assertions/data_hash_assertion_test.py b/tests/c2pa/assertions/data_hash_assertion_test.py index f126d55..8f511a7 100644 --- a/tests/c2pa/assertions/data_hash_assertion_test.py +++ b/tests/c2pa/assertions/data_hash_assertion_test.py @@ -160,7 +160,7 @@ def test_data_hash_assertion_exclusions_more_then_23(): data_hash_assertion.schema = { "exclusions": [{"start": 0, "length": 0}] * 23, "alg": "sha256", - "hash": b"\x00\x00\x00", + "hash": HASHED_DATA, "pad": b"\x00" * 64, } From 363151ebe2960a6db261d48af1a478a791f0d841 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Mon, 1 Jun 2026 15:16:04 +0500 Subject: [PATCH 44/57] fix: #80: fix for error caused by incorrect length specified in the pdf object --- c2pie/c2pa_injection/pdf_injection.py | 1 + 1 file changed, 1 insertion(+) diff --git a/c2pie/c2pa_injection/pdf_injection.py b/c2pie/c2pa_injection/pdf_injection.py index dd8167e..10c9423 100644 --- a/c2pie/c2pa_injection/pdf_injection.py +++ b/c2pie/c2pa_injection/pdf_injection.py @@ -212,6 +212,7 @@ def emplace_manifest_into_pdf( ) serialized_manifest_store = manifest_store.serialize() + serialized_manifest_store_lenght = len(serialized_manifest_store) object_1 = ( f"{starting_value} 0 obj\n".encode("ascii") From 5538e429a2cdb021f6f4689196b5e9afdedc1a6f Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Mon, 1 Jun 2026 15:53:14 +0500 Subject: [PATCH 45/57] docs: #80: add clarifying comments for value stored in pad field in Data Hash Assertion and unprotected header --- c2pie/c2pa/assertion.py | 3 +++ c2pie/c2pa/claim_signature.py | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/c2pie/c2pa/assertion.py b/c2pie/c2pa/assertion.py index ce2205a..85e6206 100644 --- a/c2pie/c2pa/assertion.py +++ b/c2pie/c2pa/assertion.py @@ -83,6 +83,9 @@ def __init__( "exclusions": exclusions, "alg": "sha256", "hash": hashed_data, + # The specification recommends setting the pad to at least 16 bytes. We use 64 bytes + # to allow for some extra space before the 23-byte limit is exceeded, since otherwise + # the CBOR header of the pad field would be reduced by 1 byte. "pad": b"\x00" * 64, } diff --git a/c2pie/c2pa/claim_signature.py b/c2pie/c2pa/claim_signature.py index 580f3be..eb72b68 100644 --- a/c2pie/c2pa/claim_signature.py +++ b/c2pie/c2pa/claim_signature.py @@ -131,7 +131,10 @@ def _generate_unprotected_header(self, serialized_sig_structure: bytes) -> bytes }, ], }, - "pad": b"\x00" * 4, + # The specification recommends setting the pad to at least 16 bytes. We use 64 bytes + # to allow for some extra space before the 23-byte limit is exceeded, since otherwise + # the CBOR header of the pad field would be reduced by 1 byte. + "pad": b"\x00" * 8, } return unprotected_header From fb6cbe138d8a2342e7dd1c7091551f7d1b326d56 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Mon, 1 Jun 2026 15:54:13 +0500 Subject: [PATCH 46/57] test: #80: update test after changing value of pad field in unprotected header --- tests/c2pa/claim_signature_test.py | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/tests/c2pa/claim_signature_test.py b/tests/c2pa/claim_signature_test.py index eddf75b..0ce23ff 100644 --- a/tests/c2pa/claim_signature_test.py +++ b/tests/c2pa/claim_signature_test.py @@ -148,37 +148,20 @@ def test_exceed_cbor_limit_add_1_bytes_to_lenght(): # We must ensure that the difference is such # that the pad size is greater than 23 bytes. - # Current length of cose_sign1 serialized in CBOR is 47 bytes. + # Current length of cose_sign1 serialized in CBOR is 50 bytes. cose_sign1 = [ "protected_header", { - "pad": b"\x00" * 4, + "pad": b"\x00" * 8, }, "payload", "signature", ] - # cose_sign1 CBOR encoded + CBOR limit - current pad - # ~ 47 + 24 - 4 + # cose_sign1 CBOR encoded + CBOR limit - current pad + 1 (COSE tag) + # ~ 50 + 24 - 8 + 1 claim_signature.serialized_cose_sign1_length = 67 serialized_cose_sign1_cbor = claim_signature.serialize_cose_sign1_tagged_with_alignment(cose_sign1) assert len(cbor2.loads(serialized_cose_sign1_cbor).value[1]["pad"]) == 25 - - # # We must ensure that the difference is such - # # that the pad size is greater than 255 bytes. - - # # Current length of cose_sign1 serialized in CBOR is 108 bytes. - # cose_sign1 = [ - # "protected_header", - # { - # "pad": b"\x00" * 64, - # }, - # "payload", - # "signature", - # ] - - # serialized_cose_sign1_cbor = claim_signature.serialize_cose_sign1_tagged_with_alignment(cose_sign1) - - # assert len(cbor2.loads(serialized_cose_sign1_cbor).value[1]["pad"]) == 42 From c5c0a10a900ab00ce227b67afac6de8e3faee9a1 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Mon, 1 Jun 2026 15:56:17 +0500 Subject: [PATCH 47/57] chore: #80: fix a typo --- c2pie/c2pa/assertion.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/c2pie/c2pa/assertion.py b/c2pie/c2pa/assertion.py index 85e6206..9303457 100644 --- a/c2pie/c2pa/assertion.py +++ b/c2pie/c2pa/assertion.py @@ -100,7 +100,7 @@ def add_full_c2pa_structure_exclusion( length: int, ) -> None: exclusions = self.schema["exclusions"] - previous_exclusion_lenght = len(cbor_to_bytes(exclusions)) + previous_exclusion_length = len(cbor_to_bytes(exclusions)) self.schema["exclusions"].extend( [ @@ -114,9 +114,9 @@ def add_full_c2pa_structure_exclusion( # NOTE: If the number of exclusions exceeds 23, an additional length byte # will be added to the CBOR header of serialized exclusions array. This byte # is included in the recalculation of the serialized exclusions. - current_exclusion_lenght = len(cbor_to_bytes(exclusions)) + current_exclusion_length = len(cbor_to_bytes(exclusions)) - difference = previous_exclusion_lenght - current_exclusion_lenght + difference = previous_exclusion_length - current_exclusion_length if -difference > len(self.schema["pad"]): raise ValueError("Difference in length exceeds the predefined pad") From f6f130fbc13e336a83bb9ad2ad6a16ee02e72362 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Mon, 1 Jun 2026 15:59:23 +0500 Subject: [PATCH 48/57] docs: #80: add number of issue to todo comment for additional_exclusions --- c2pie/c2pa/assertion.py | 2 +- c2pie/interface.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/c2pie/c2pa/assertion.py b/c2pie/c2pa/assertion.py index 9303457..9dd3268 100644 --- a/c2pie/c2pa/assertion.py +++ b/c2pie/c2pa/assertion.py @@ -70,7 +70,7 @@ class HashDataAssertion(Assertion): def __init__( self, hashed_data: bytes, - # TODO: Need to add handling for exclusions during + # TODO: #91: Need to add handling for exclusions during # hash calculation in order to use additional_exclusions # additional_exclusions: list[dict[str, int]] | None = None, ): diff --git a/c2pie/interface.py b/c2pie/interface.py index ac1394f..029e257 100644 --- a/c2pie/interface.py +++ b/c2pie/interface.py @@ -73,7 +73,7 @@ def c2pie_GenerateManifestStore( private_key: bytes, certificate_chain: bytes, file_name: str, - # TODO: #66 : move that variables to configfile + # TODO: #66: move that variables to configfile tsa_url: str | None, require_tsa: bool, tsa_log_dir: str | None, From ef646b48e36c90743c4058b67436abb70586bc1e Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Mon, 1 Jun 2026 16:09:38 +0500 Subject: [PATCH 49/57] test: #80: add test for check that pad calcutation was performed correctly --- .../assertions/data_hash_assertion_test.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/c2pa/assertions/data_hash_assertion_test.py b/tests/c2pa/assertions/data_hash_assertion_test.py index 8f511a7..ec3206a 100644 --- a/tests/c2pa/assertions/data_hash_assertion_test.py +++ b/tests/c2pa/assertions/data_hash_assertion_test.py @@ -157,12 +157,7 @@ def test_exceed_cbor_23_bytes_limit_add_1_byte_to_lenght(): def test_data_hash_assertion_exclusions_more_then_23(): data_hash_assertion = HashDataAssertion(hashed_data=HASHED_DATA) - data_hash_assertion.schema = { - "exclusions": [{"start": 0, "length": 0}] * 23, - "alg": "sha256", - "hash": HASHED_DATA, - "pad": b"\x00" * 64, - } + data_hash_assertion.schema["exclusions"] = [{"start": 0, "length": 0}] * 23 data_hash_assertion.add_full_c2pa_structure_exclusion( CAI_OFFSET, @@ -170,3 +165,14 @@ def test_data_hash_assertion_exclusions_more_then_23(): ) assert len(data_hash_assertion.schema["pad"]) == 47 + + +def test_calculation_of_pad_inside_data_hash_assertion_was_performed_correctly(): + data_hash_assertion = HashDataAssertion(hashed_data=HASHED_DATA) + + data_hash_assertion.add_full_c2pa_structure_exclusion( + CAI_OFFSET, + 0, + ) + + assert len(data_hash_assertion.schema["pad"]) == 48 From 3a3a0b65496ed87313b5c5084c838cbbe4edeff4 Mon Sep 17 00:00:00 2001 From: Workflow Action Date: Mon, 1 Jun 2026 11:14:02 +0000 Subject: [PATCH 50/57] docs(readme): bring test coverage score up to date --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a700715..728db56 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,8 @@ [![Linting](https://github.com/TourmalineCore/c2pie/actions/workflows/lint-on-pull-request.yml/badge.svg?branch=develop)](https://github.com/TourmalineCore/c2pie/actions/workflows/lint-on-pull-request.yml) [![c2pa](https://img.shields.io/badge/c2pa-v1.4-seagreen.svg)](https://c2pa.org/) -[![coverage](https://img.shields.io/badge/e2e_coverage-71.15%25-yellow)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) -[![coverage](https://img.shields.io/badge/units_coverage-84.91%25-olivedrab)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/e2e_coverage-71.18%25-yellow)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/units_coverage-84.92%25-olivedrab)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) [![coverage](https://img.shields.io/badge/full_coverage-92.51%25-forestgreen)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) [![latest](https://img.shields.io/pypi/v/c2pie?label=latest&colorB=fc8021)](https://pypi.org/project/c2pie/) From d3ea1d8093c81d01968dbcf15ba3188629377804 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Tue, 2 Jun 2026 08:51:24 +0500 Subject: [PATCH 51/57] fix: #80: remove redundant call of _scan_pdf_to_get_its_data() --- c2pie/c2pa_injection/pdf_injection.py | 1 - 1 file changed, 1 deletion(-) diff --git a/c2pie/c2pa_injection/pdf_injection.py b/c2pie/c2pa_injection/pdf_injection.py index 10c9423..4f9ccf7 100644 --- a/c2pie/c2pa_injection/pdf_injection.py +++ b/c2pie/c2pa_injection/pdf_injection.py @@ -76,7 +76,6 @@ def prepare_pdf_bytes(content: bytes) -> bytes: return content except ValueError: repaired = _read_pdf_using_pypdf(content) - _scan_pdf_to_get_its_data(repaired) return repaired From e7941d1874d226f5fa39270ee7f56b9acfc9b049 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Tue, 2 Jun 2026 08:53:45 +0500 Subject: [PATCH 52/57] fix: #80: fix a typo --- c2pie/c2pa_injection/jpg_injection.py | 4 ++-- c2pie/c2pa_injection/pdf_injection.py | 8 ++++---- tests/c2pa/assertions/data_hash_assertion_test.py | 4 ++-- tests/c2pa/claim_signature_test.py | 2 +- tests/c2pa/interface_test.py | 6 +++--- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/c2pie/c2pa_injection/jpg_injection.py b/c2pie/c2pa_injection/jpg_injection.py index a25182b..fc1f755 100644 --- a/c2pie/c2pa_injection/jpg_injection.py +++ b/c2pie/c2pa_injection/jpg_injection.py @@ -131,11 +131,11 @@ def emplace_manifest_into_jpeg( ) -> bytes: serialized_app11_storage = create_and_serialize_app11_storage(manifest_store) - serialized_app11_storage_lenght = len(serialized_app11_storage) + serialized_app11_storage_length = len(serialized_app11_storage) manifest_store.add_full_c2pa_structure_exclusion( c2pa_offset, - serialized_app11_storage_lenght, + serialized_app11_storage_length, ) tail = create_and_serialize_app11_storage(manifest_store) diff --git a/c2pie/c2pa_injection/pdf_injection.py b/c2pie/c2pa_injection/pdf_injection.py index 4f9ccf7..6af43a9 100644 --- a/c2pie/c2pa_injection/pdf_injection.py +++ b/c2pie/c2pa_injection/pdf_injection.py @@ -104,11 +104,11 @@ def emplace_manifest_into_pdf( serialized_manifest_store = manifest_store.serialize() - serialized_manifest_store_lenght = len(serialized_manifest_store) + serialized_manifest_store_length = len(serialized_manifest_store) object_1 = ( f"{starting_value} 0 obj\n".encode("ascii") - + f"<< /Type /EmbeddedFile /Subtype {subtype} /Length {serialized_manifest_store_lenght} >>\n".encode("ascii") + + f"<< /Type /EmbeddedFile /Subtype {subtype} /Length {serialized_manifest_store_length} >>\n".encode("ascii") + b"stream\n" + serialized_manifest_store + b"\nendstream\nendobj\n" @@ -211,11 +211,11 @@ def emplace_manifest_into_pdf( ) serialized_manifest_store = manifest_store.serialize() - serialized_manifest_store_lenght = len(serialized_manifest_store) + serialized_manifest_store_length = len(serialized_manifest_store) object_1 = ( f"{starting_value} 0 obj\n".encode("ascii") - + f"<< /Type /EmbeddedFile /Subtype {subtype} /Length {serialized_manifest_store_lenght} >>\n".encode("ascii") + + f"<< /Type /EmbeddedFile /Subtype {subtype} /Length {serialized_manifest_store_length} >>\n".encode("ascii") + b"stream\n" + serialized_manifest_store + b"\nendstream\nendobj\n" diff --git a/tests/c2pa/assertions/data_hash_assertion_test.py b/tests/c2pa/assertions/data_hash_assertion_test.py index ec3206a..cdbbfa6 100644 --- a/tests/c2pa/assertions/data_hash_assertion_test.py +++ b/tests/c2pa/assertions/data_hash_assertion_test.py @@ -28,7 +28,7 @@ def test_hash_data_assertion_schema_alg_is_sha256(): assert data_hash_assertion.schema["alg"] == "sha256" -def test_hash_data_assertion_schema_pad_is_64_bytes_lenght(): +def test_hash_data_assertion_schema_pad_is_64_bytes_length(): data_hash_assertion = HashDataAssertion(hashed_data=HASHED_DATA) assert data_hash_assertion.schema["pad"] == b"\x00" * 64 @@ -116,7 +116,7 @@ def test_align_hash_data_with_large_difference_causes_error(): ) -def test_exceed_cbor_23_bytes_limit_add_1_byte_to_lenght(): +def test_exceed_cbor_23_bytes_limit_add_1_byte_to_length(): data_hash_assertion = HashDataAssertion(hashed_data=HASHED_DATA) # Example empty list (exclusions) serialized in CBOR: diff --git a/tests/c2pa/claim_signature_test.py b/tests/c2pa/claim_signature_test.py index 0ce23ff..955666f 100644 --- a/tests/c2pa/claim_signature_test.py +++ b/tests/c2pa/claim_signature_test.py @@ -142,7 +142,7 @@ def test_cose_sign1_tagged_tag_value_is_18(): assert cbor2.loads(serialized_cose_sign1_cbor).tag == 18 -def test_exceed_cbor_limit_add_1_bytes_to_lenght(): +def test_exceed_cbor_limit_add_1_bytes_to_length(): claim_signature = ClaimSignature.__new__(ClaimSignature) # We must ensure that the difference is such diff --git a/tests/c2pa/interface_test.py b/tests/c2pa/interface_test.py index 29a417f..e124aba 100644 --- a/tests/c2pa/interface_test.py +++ b/tests/c2pa/interface_test.py @@ -207,7 +207,7 @@ def test_calculated_exclusion_covers_the_full_storage(file): More info about APP11 segment you can see here: docs/JPG-structure-overview.md """ - expected_serialized_lenght = 2 + 2 + 2 + 2 + 4 + len(manifest_store.serialize()) + expected_serialized_length = 2 + 2 + 2 + 2 + 4 + len(manifest_store.serialize()) elif file_extension == C2PA_ContentTypes.pdf: """ Expected length of serialized data in PDF format consists @@ -215,7 +215,7 @@ def test_calculated_exclusion_covers_the_full_storage(file): More info about PDF Incremental Update you can see here: docs/PDF-structure-overview.md """ - expected_serialized_lenght = 7148 + expected_serialized_length = 7148 with patch("c2pie.c2pa.manifest_store.ManifestStore.add_full_c2pa_structure_exclusion") as mock_func: c2pie_EmplaceManifest( @@ -227,4 +227,4 @@ def test_calculated_exclusion_covers_the_full_storage(file): last_call = mock_func.call_args - assert expected_serialized_lenght == last_call.args[1] + assert expected_serialized_length == last_call.args[1] From 4b5e6c102f3b96a3d7c62e7633d95b018f38b68d Mon Sep 17 00:00:00 2001 From: Workflow Action Date: Tue, 2 Jun 2026 03:58:11 +0000 Subject: [PATCH 53/57] docs(readme): bring test coverage score up to date --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 728db56..599fb38 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,8 @@ [![Linting](https://github.com/TourmalineCore/c2pie/actions/workflows/lint-on-pull-request.yml/badge.svg?branch=develop)](https://github.com/TourmalineCore/c2pie/actions/workflows/lint-on-pull-request.yml) [![c2pa](https://img.shields.io/badge/c2pa-v1.4-seagreen.svg)](https://c2pa.org/) -[![coverage](https://img.shields.io/badge/e2e_coverage-71.18%25-yellow)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) -[![coverage](https://img.shields.io/badge/units_coverage-84.92%25-olivedrab)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/e2e_coverage-71.15%25-yellow)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) +[![coverage](https://img.shields.io/badge/units_coverage-85.01%25-olivedrab)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) [![coverage](https://img.shields.io/badge/full_coverage-92.51%25-forestgreen)](https://github.com/TourmalineCore/c2pie/actions/workflows/calculate-tests-coverage-on-pull-request.yml) [![latest](https://img.shields.io/pypi/v/c2pie?label=latest&colorB=fc8021)](https://pypi.org/project/c2pie/) From 727c1815597703a0dde4c4d71b972a4e02fe22e0 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov <106321977+Infindery@users.noreply.github.com> Date: Tue, 2 Jun 2026 09:04:58 +0500 Subject: [PATCH 54/57] refactor: #80: change variable name from FIXTURES_DIR to TEST_FILES_DIR --- tests/c2pa/e2e_test.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/c2pa/e2e_test.py b/tests/c2pa/e2e_test.py index 1c3058b..f7d9ead 100644 --- a/tests/c2pa/e2e_test.py +++ b/tests/c2pa/e2e_test.py @@ -8,7 +8,7 @@ from c2pie.signing import sign_file from c2pie.utils.content_types import C2PA_ContentTypes -FIXTURES_DIR = Path(__file__).parent.parent / "test_files" +TEST_FILES_DIR = Path(__file__).parent.parent / "test_files" test_files_by_extension = { "pdf": [ @@ -25,8 +25,7 @@ def get_test_file_full_path(filename: str) -> Path: - path = FIXTURES_DIR / filename - + path = TEST_FILES_DIR / filename if not path.exists(): raise FileNotFoundError(f"Fixture not found: {path}") From 0816a36a7532be4a89eae86e411b1b44dc49ae08 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov Date: Wed, 3 Jun 2026 08:25:36 +0500 Subject: [PATCH 55/57] refactor: #80: remove of the unused additional_exclusions parameter and related tests --- c2pie/c2pa/assertion.py | 6 ---- .../assertions/data_hash_assertion_test.py | 35 ------------------- 2 files changed, 41 deletions(-) diff --git a/c2pie/c2pa/assertion.py b/c2pie/c2pa/assertion.py index 9dd3268..76bad2e 100644 --- a/c2pie/c2pa/assertion.py +++ b/c2pie/c2pa/assertion.py @@ -70,15 +70,9 @@ class HashDataAssertion(Assertion): def __init__( self, hashed_data: bytes, - # TODO: #91: Need to add handling for exclusions during - # hash calculation in order to use additional_exclusions - # additional_exclusions: list[dict[str, int]] | None = None, ): exclusions: list[dict[str, int]] = [] - # if additional_exclusions: - # exclusions.extend(additional_exclusions) - schema: dict[str, Any] = { "exclusions": exclusions, "alg": "sha256", diff --git a/tests/c2pa/assertions/data_hash_assertion_test.py b/tests/c2pa/assertions/data_hash_assertion_test.py index cdbbfa6..1a63557 100644 --- a/tests/c2pa/assertions/data_hash_assertion_test.py +++ b/tests/c2pa/assertions/data_hash_assertion_test.py @@ -46,41 +46,6 @@ def test_hash_data_assertion_serializes_as_cbor(): assert data_hash_assertion.content_boxes[0].payload == expected_payload -# def test_hash_data_assertion_with_additional_exclusions(): -# additional = [ -# { -# "start": 100, -# "length": 200, -# }, -# ] - -# data_hash_assertion = HashDataAssertion( -# hashed_data=HASHED_DATA, -# additional_exclusions=additional, -# ) - -# exclusions = data_hash_assertion.schema["exclusions"] - -# assert len(exclusions) == 1 -# assert exclusions[0] == { -# "start": 100, -# "length": 200, -# } - -# def test_additional_extensions_adding_for_hash_data_assertions(): -# additional_exclusion = { -# "start": 100, -# "length": 1000, -# } - -# data_hash_assertion = HashDataAssertion( -# hashed_data=b"", -# additional_exclusions=[additional_exclusion], -# ) - -# assert additional_exclusion in data_hash_assertion.schema["exclusions"] - - def test_hash_data_assertion_without_additional_exclusions_has_not_exclusions(): data_hash_assertion = HashDataAssertion(hashed_data=HASHED_DATA) assert len(data_hash_assertion.schema["exclusions"]) == 0 From 00d1f3cd87849e07b1c832e4d9eec02ca86256b7 Mon Sep 17 00:00:00 2001 From: Artem Sheptunov Date: Wed, 3 Jun 2026 10:44:00 +0500 Subject: [PATCH 56/57] docs: #80: refactor clarifying test comment --- .../assertions/data_hash_assertion_test.py | 34 ++++++------------- 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/tests/c2pa/assertions/data_hash_assertion_test.py b/tests/c2pa/assertions/data_hash_assertion_test.py index 1a63557..0bceac6 100644 --- a/tests/c2pa/assertions/data_hash_assertion_test.py +++ b/tests/c2pa/assertions/data_hash_assertion_test.py @@ -84,39 +84,25 @@ def test_align_hash_data_with_large_difference_causes_error(): def test_exceed_cbor_23_bytes_limit_add_1_byte_to_length(): data_hash_assertion = HashDataAssertion(hashed_data=HASHED_DATA) - # Example empty list (exclusions) serialized in CBOR: - # [] - # - # has 1 bytes in length - - # We need to set the exclusion so that the difference - # is less than 24 bytes: - # current pad - x (difference) <= 23 - # ~ 64 > x >= 41 (64 - 23) - - # Example not empty list (exclusions) serialized in CBOR: - # [ - # { - # "start": 2, - # "length": "", - # } - # ] + # Empty list (default value) of exclusions serialized + # in CBOR has 1 bytes in length. + + # Following list of exclusions serialized in CBOR: + # [{ "start": 2, "length": "", }] # # has 17 bytes in length - # not empty list - empty list = difference - # x - 1 = 41 - # x (not empty list) = 42 - # 42 - 17 - 1 (CBOR header additional byte, because pad > 23) = 24 - fake_payload = b"\x00" * 24 + # We need to set the exclusion size so that the difference + # is greater than 41 bytes (64 - 23 = 41). + + fake_payload = b"\x00" * 24 # 41 - 17 = 24 data_hash_assertion.add_full_c2pa_structure_exclusion( CAI_OFFSET, fake_payload, ) - # current pad - difference - additional_byte = aligned pad - # 64 - 41 = 23 - 1 (additional_byte) = 22 + # Don`t forget about additional byte assert len(data_hash_assertion.schema["pad"]) == 22 From 4e64b601d33f4b650ada70e833291f9a973eeffc Mon Sep 17 00:00:00 2001 From: Artem Sheptunov Date: Wed, 3 Jun 2026 11:37:45 +0500 Subject: [PATCH 57/57] refactor: #80: formatting changes --- tests/c2pa/assertions/data_hash_assertion_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/c2pa/assertions/data_hash_assertion_test.py b/tests/c2pa/assertions/data_hash_assertion_test.py index 0bceac6..b903819 100644 --- a/tests/c2pa/assertions/data_hash_assertion_test.py +++ b/tests/c2pa/assertions/data_hash_assertion_test.py @@ -84,7 +84,7 @@ def test_align_hash_data_with_large_difference_causes_error(): def test_exceed_cbor_23_bytes_limit_add_1_byte_to_length(): data_hash_assertion = HashDataAssertion(hashed_data=HASHED_DATA) - # Empty list (default value) of exclusions serialized + # Empty list (default value) of exclusions serialized # in CBOR has 1 bytes in length. # Following list of exclusions serialized in CBOR: @@ -92,10 +92,10 @@ def test_exceed_cbor_23_bytes_limit_add_1_byte_to_length(): # # has 17 bytes in length - # We need to set the exclusion size so that the difference + # We need to set the exclusion size so that the difference # is greater than 41 bytes (64 - 23 = 41). - fake_payload = b"\x00" * 24 # 41 - 17 = 24 + fake_payload = b"\x00" * 24 # 41 - 17 = 24 data_hash_assertion.add_full_c2pa_structure_exclusion( CAI_OFFSET,