diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1b0867f..9bc97c5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,6 +17,8 @@ jobs: steps: - uses: actions/checkout@v6 + with: + submodules: true - name: Install Rust uses: dtolnay/rust-toolchain@stable @@ -72,6 +74,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 + with: + submodules: true - name: Install Rust 1.88 uses: dtolnay/rust-toolchain@1.88 @@ -104,6 +108,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 + with: + submodules: true - name: Install Rust uses: dtolnay/rust-toolchain@stable @@ -122,6 +128,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 + with: + submodules: true - name: Install Rust uses: dtolnay/rust-toolchain@stable @@ -243,6 +251,8 @@ jobs: - "signatures,encryption" steps: - uses: actions/checkout@v6 + with: + submodules: true - name: Install Rust uses: dtolnay/rust-toolchain@stable diff --git a/.github/workflows/spec-check.yml b/.github/workflows/spec-check.yml new file mode 100644 index 0000000..18ef098 --- /dev/null +++ b/.github/workflows/spec-check.yml @@ -0,0 +1,32 @@ +name: Spec Staleness Check + +on: + schedule: + - cron: '0 9 * * 1' # Weekly, Monday 9am UTC + workflow_dispatch: {} + +jobs: + check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + with: + submodules: true + + - name: Check for upstream spec updates + run: | + cd spec + git fetch origin main + LOCAL=$(git rev-parse HEAD) + REMOTE=$(git rev-parse origin/main) + if [ "$LOCAL" != "$REMOTE" ]; then + BEHIND=$(git rev-list --count HEAD..origin/main) + echo "::warning::Spec submodule is $BEHIND commits behind upstream" + gh issue create --title "Spec submodule is $BEHIND commits behind" \ + --body "The spec submodule is pinned at \`$LOCAL\` but upstream is at \`$REMOTE\` ($BEHIND commits behind).\n\nRun:\n\`\`\`\ngit submodule update --remote spec\n\`\`\`" \ + --label "dependencies" || true + else + echo "Spec submodule is up to date" + fi + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..00e4df6 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "spec"] + path = spec + url = https://github.com/Entrolution/codex-file-format-spec.git diff --git a/cdx-core/Cargo.toml b/cdx-core/Cargo.toml index 213e072..5e342d8 100644 --- a/cdx-core/Cargo.toml +++ b/cdx-core/Cargo.toml @@ -110,6 +110,7 @@ tempfile = "3.14" pretty_assertions = "1.4" proptest = "1.5" criterion = { version = "0.8", features = ["html_reports"] } +jsonschema = "0.28" [[bench]] name = "document" diff --git a/cdx-core/tests/conformance.rs b/cdx-core/tests/conformance.rs index 32dd6b9..8ec6f6f 100644 --- a/cdx-core/tests/conformance.rs +++ b/cdx-core/tests/conformance.rs @@ -91,108 +91,9 @@ fn mixed_mark_array_deserializes() { assert_eq!(marks[2], Mark::Italic); } -#[test] -fn extension_mark_serializes_without_wrapper() { - use cdx_core::content::ExtensionMark; - - let mark = Mark::Extension(ExtensionMark::citation("smith2023")); - let json = serde_json::to_string(&mark).unwrap(); - let val: serde_json::Value = serde_json::from_str(&json).unwrap(); - - // Type should be "semantic:citation", not "extension" - assert_eq!(val["type"], "semantic:citation"); - assert_eq!(val["refs"], serde_json::json!(["smith2023"])); - - // Should NOT have wrapper fields - assert!(val.get("namespace").is_none()); - assert!(val.get("markType").is_none()); - // Should NOT have old singular "ref" - assert!(val.get("ref").is_none()); -} - -#[test] -fn extension_mark_deserializes_new_format() { - let json = r#"{"type":"semantic:citation","refs":["smith2023"]}"#; - let mark: Mark = serde_json::from_str(json).unwrap(); - - if let Mark::Extension(ext) = &mark { - assert_eq!(ext.namespace, "semantic"); - assert_eq!(ext.mark_type, "citation"); - assert_eq!( - ext.get_string_array_attribute("refs"), - Some(vec!["smith2023"]) - ); - } else { - panic!("Expected Extension mark, got {mark:?}"); - } -} - -#[test] -fn extension_mark_deserializes_old_format() { - // Backward compat: old "extension" wrapper format - let json = r#"{"type":"extension","namespace":"semantic","markType":"citation","attributes":{"ref":"smith2023"}}"#; - let mark: Mark = serde_json::from_str(json).unwrap(); - - if let Mark::Extension(ext) = &mark { - assert_eq!(ext.namespace, "semantic"); - assert_eq!(ext.mark_type, "citation"); - // Old format preserves "ref" as-is in attributes; use get_citation_refs for compat - assert_eq!(ext.get_citation_refs(), Some(vec!["smith2023"])); - } else { - panic!("Expected Extension mark, got {mark:?}"); - } -} - -#[test] -fn citation_mark_backward_compat_singular_ref() { - // Old format with singular "ref" string - let json = r#"{"type":"semantic:citation","ref":"smith2023"}"#; - let mark: Mark = serde_json::from_str(json).unwrap(); - - if let Mark::Extension(ext) = &mark { - assert_eq!(ext.get_citation_refs(), Some(vec!["smith2023"])); - } else { - panic!("Expected Extension mark, got {mark:?}"); - } -} - -#[test] -fn citation_mark_multi_refs_roundtrip() { - use cdx_core::content::ExtensionMark; - - let refs = vec!["smith2023".into(), "jones2024".into()]; - let mark = Mark::Extension(ExtensionMark::multi_citation(&refs)); - - let json = serde_json::to_string(&mark).unwrap(); - let val: serde_json::Value = serde_json::from_str(&json).unwrap(); - assert_eq!(val["refs"], serde_json::json!(["smith2023", "jones2024"])); - - // Round-trip - let parsed: Mark = serde_json::from_str(&json).unwrap(); - if let Mark::Extension(ext) = &parsed { - assert_eq!( - ext.get_string_array_attribute("refs"), - Some(vec!["smith2023", "jones2024"]) - ); - } else { - panic!("Expected Extension mark"); - } -} - -#[test] -fn citation_mark_normalize_attrs() { - use cdx_core::content::ExtensionMark; - - let mut ext = ExtensionMark::new("semantic", "citation") - .with_attributes(serde_json::json!({"ref": "smith2023"})); - ext.normalize_citation_attrs(); - - assert_eq!( - ext.get_string_array_attribute("refs"), - Some(vec!["smith2023"]) - ); - assert!(ext.get_string_attribute("ref").is_none()); -} +// NOTE: Extension mark serialization, deserialization, and backward-compat +// tests for citations and glossary have been consolidated into +// tests/spec_compliance.rs (mark_schema_validation + backward_compatibility). #[test] fn math_mark_uses_source_field() { @@ -326,32 +227,7 @@ fn spec_example_text_with_bold_string_mark() { assert_eq!(output_val, spec_val); } -#[test] -fn spec_example_text_with_citation_mark() { - // Spec: extension marks use "namespace:markType" as type, attributes flattened - let spec_json = r#"{"value":"important claim","marks":[{"type":"semantic:citation","refs":["smith2023"]}]}"#; - - let text: Text = serde_json::from_str(spec_json).unwrap(); - assert_eq!(text.value, "important claim"); - assert_eq!(text.marks.len(), 1); - - if let Mark::Extension(ext) = &text.marks[0] { - assert_eq!(ext.namespace, "semantic"); - assert_eq!(ext.mark_type, "citation"); - assert_eq!( - ext.get_string_array_attribute("refs"), - Some(vec!["smith2023"]) - ); - } else { - panic!("Expected Extension mark"); - } - - // Re-serialize matches spec format - let output = serde_json::to_string(&text).unwrap(); - let output_val: serde_json::Value = serde_json::from_str(&output).unwrap(); - let spec_val: serde_json::Value = serde_json::from_str(spec_json).unwrap(); - assert_eq!(output_val, spec_val); -} +// NOTE: spec_example_text_with_citation_mark moved to spec_compliance.rs #[test] fn spec_example_extension_block_academic_theorem() { diff --git a/cdx-core/tests/extension_schema.rs b/cdx-core/tests/extension_schema.rs deleted file mode 100644 index 2b49245..0000000 --- a/cdx-core/tests/extension_schema.rs +++ /dev/null @@ -1,338 +0,0 @@ -//! Extension attribute schema tests. -//! -//! Table-driven tests verifying field names and types for every `ExtensionMark` -//! convenience constructor. These tests prevent field-level spec divergences by -//! asserting the exact attribute schema each constructor produces. - -use cdx_core::content::{ExtensionMark, Mark, Text}; -use serde_json::Value; - -// ===== Helper functions ===== - -/// Serialize an `ExtensionMark` via a `Text` node and return the mark's JSON value. -fn mark_to_json(mark: &ExtensionMark) -> Value { - let text = Text::with_marks("test", vec![Mark::Extension(mark.clone())]); - let json = serde_json::to_value(&text).unwrap(); - // marks is an array; grab the first mark - json["marks"][0].clone() -} - -/// Assert that a JSON object contains a key with a string value. -fn assert_string_field(val: &Value, key: &str, context: &str) { - let field = &val[key]; - assert!( - field.is_string(), - "{context}: expected '{key}' to be a string, got {field}" - ); -} - -/// Assert that a JSON object contains a key with a string array value. -fn assert_string_array_field(val: &Value, key: &str, min_len: usize, context: &str) { - let field = &val[key]; - assert!( - field.is_array(), - "{context}: expected '{key}' to be an array, got {field}" - ); - let arr = field.as_array().unwrap(); - assert!( - arr.len() >= min_len, - "{context}: expected '{key}' array to have >= {min_len} items, got {}", - arr.len() - ); - for (i, item) in arr.iter().enumerate() { - assert!( - item.is_string(), - "{context}: expected '{key}[{i}]' to be a string, got {item}" - ); - } -} - -/// Assert that a JSON object does NOT contain a key. -fn assert_field_absent(val: &Value, key: &str, context: &str) { - assert!( - val.get(key).is_none(), - "{context}: expected '{key}' to be absent, but found {}", - val[key] - ); -} - -// ===== Citation constructors ===== - -#[test] -fn schema_citation_emits_refs_array() { - let mark = ExtensionMark::citation("smith2023"); - let json = mark_to_json(&mark); - - assert_eq!(json["type"], "semantic:citation", "type field"); - assert_string_array_field(&json, "refs", 1, "citation"); - assert_eq!(json["refs"][0], "smith2023"); - assert_field_absent(&json, "ref", "citation must not emit legacy 'ref'"); -} - -#[test] -fn schema_citation_with_page_emits_refs_array_and_locator() { - let mark = ExtensionMark::citation_with_page("smith2023", "42-45"); - let json = mark_to_json(&mark); - - assert_eq!(json["type"], "semantic:citation"); - assert_string_array_field(&json, "refs", 1, "citation_with_page"); - assert_string_field(&json, "locator", "citation_with_page"); - assert_string_field(&json, "locatorType", "citation_with_page"); - assert_eq!(json["locator"], "42-45"); - assert_eq!(json["locatorType"], "page"); - assert_field_absent( - &json, - "ref", - "citation_with_page must not emit legacy 'ref'", - ); -} - -#[test] -fn schema_multi_citation_emits_refs_array_with_multiple_items() { - let refs = vec!["smith2023".to_string(), "jones2024".to_string()]; - let mark = ExtensionMark::multi_citation(&refs); - let json = mark_to_json(&mark); - - assert_eq!(json["type"], "semantic:citation"); - assert_string_array_field(&json, "refs", 2, "multi_citation"); - assert_eq!(json["refs"][0], "smith2023"); - assert_eq!(json["refs"][1], "jones2024"); - assert_field_absent(&json, "ref", "multi_citation must not emit legacy 'ref'"); -} - -// ===== Entity link ===== - -#[test] -fn schema_entity_emits_uri_and_entity_type() { - let mark = ExtensionMark::entity("https://example.org/entity/1", "person"); - let json = mark_to_json(&mark); - - assert_eq!(json["type"], "semantic:entity"); - assert_string_field(&json, "uri", "entity"); - assert_string_field(&json, "entityType", "entity"); - assert_eq!(json["uri"], "https://example.org/entity/1"); - assert_eq!(json["entityType"], "person"); -} - -// ===== Glossary ===== - -#[test] -fn schema_glossary_emits_ref() { - let mark = ExtensionMark::glossary("ai"); - let json = mark_to_json(&mark); - - assert_eq!(json["type"], "semantic:glossary"); - assert_string_field(&json, "ref", "glossary"); - assert_eq!(json["ref"], "ai"); - assert_field_absent(&json, "termId", "glossary must not emit legacy 'termId'"); -} - -// ===== Index ===== - -#[test] -fn schema_index_emits_term() { - let mark = ExtensionMark::index("machine learning"); - let json = mark_to_json(&mark); - - assert_eq!(json["type"], "presentation:index"); - assert_string_field(&json, "term", "index"); - assert_eq!(json["term"], "machine learning"); -} - -#[test] -fn schema_index_with_subterm_emits_term_and_subterm() { - let mark = ExtensionMark::index_with_subterm("algorithms", "quicksort"); - let json = mark_to_json(&mark); - - assert_eq!(json["type"], "presentation:index"); - assert_string_field(&json, "term", "index_with_subterm"); - assert_string_field(&json, "subterm", "index_with_subterm"); - assert_eq!(json["term"], "algorithms"); - assert_eq!(json["subterm"], "quicksort"); -} - -// ===== Academic: equation references ===== - -#[test] -fn schema_equation_ref_emits_target() { - let mark = ExtensionMark::equation_ref("#eq-pythagoras"); - let json = mark_to_json(&mark); - - assert_eq!(json["type"], "academic:equation-ref"); - assert_string_field(&json, "target", "equation_ref"); - assert_eq!(json["target"], "#eq-pythagoras"); -} - -#[test] -fn schema_equation_ref_formatted_emits_target_and_format() { - let mark = ExtensionMark::equation_ref_formatted("#eq-1", "Eq. ({number})"); - let json = mark_to_json(&mark); - - assert_eq!(json["type"], "academic:equation-ref"); - assert_string_field(&json, "target", "equation_ref_formatted"); - assert_string_field(&json, "format", "equation_ref_formatted"); - assert_eq!(json["target"], "#eq-1"); - assert_eq!(json["format"], "Eq. ({number})"); -} - -// ===== Academic: algorithm references ===== - -#[test] -fn schema_algorithm_ref_emits_target() { - let mark = ExtensionMark::algorithm_ref("#alg-quicksort"); - let json = mark_to_json(&mark); - - assert_eq!(json["type"], "academic:algorithm-ref"); - assert_string_field(&json, "target", "algorithm_ref"); - assert_eq!(json["target"], "#alg-quicksort"); -} - -#[test] -fn schema_algorithm_ref_line_emits_target_and_line() { - let mark = ExtensionMark::algorithm_ref_line("#alg-quicksort", "5"); - let json = mark_to_json(&mark); - - assert_eq!(json["type"], "academic:algorithm-ref"); - assert_string_field(&json, "target", "algorithm_ref_line"); - assert_string_field(&json, "line", "algorithm_ref_line"); - assert_eq!(json["target"], "#alg-quicksort"); - assert_eq!(json["line"], "5"); -} - -#[test] -fn schema_algorithm_ref_formatted_emits_target_and_format() { - let mark = ExtensionMark::algorithm_ref_formatted("#alg-1", "Algorithm {number}"); - let json = mark_to_json(&mark); - - assert_eq!(json["type"], "academic:algorithm-ref"); - assert_string_field(&json, "target", "algorithm_ref_formatted"); - assert_string_field(&json, "format", "algorithm_ref_formatted"); -} - -#[test] -fn schema_algorithm_ref_line_formatted_emits_all_fields() { - let mark = ExtensionMark::algorithm_ref_line_formatted("#alg-1", "5", "Alg. {number}, L{line}"); - let json = mark_to_json(&mark); - - assert_eq!(json["type"], "academic:algorithm-ref"); - assert_string_field(&json, "target", "algorithm_ref_line_formatted"); - assert_string_field(&json, "line", "algorithm_ref_line_formatted"); - assert_string_field(&json, "format", "algorithm_ref_line_formatted"); - assert_eq!(json["target"], "#alg-1"); - assert_eq!(json["line"], "5"); - assert_eq!(json["format"], "Alg. {number}, L{line}"); -} - -// ===== Academic: theorem references ===== - -#[test] -fn schema_theorem_ref_emits_target() { - let mark = ExtensionMark::theorem_ref("#thm-pythagoras"); - let json = mark_to_json(&mark); - - assert_eq!(json["type"], "academic:theorem-ref"); - assert_string_field(&json, "target", "theorem_ref"); - assert_eq!(json["target"], "#thm-pythagoras"); -} - -#[test] -fn schema_theorem_ref_formatted_emits_target_and_format() { - let mark = ExtensionMark::theorem_ref_formatted("#thm-1", "Theorem {number}"); - let json = mark_to_json(&mark); - - assert_eq!(json["type"], "academic:theorem-ref"); - assert_string_field(&json, "target", "theorem_ref_formatted"); - assert_string_field(&json, "format", "theorem_ref_formatted"); - assert_eq!(json["target"], "#thm-1"); - assert_eq!(json["format"], "Theorem {number}"); -} - -// ===== Collaboration: highlight ===== - -#[test] -fn schema_highlight_emits_color() { - let mark = ExtensionMark::highlight("yellow"); - let json = mark_to_json(&mark); - - assert_eq!(json["type"], "collaboration:highlight"); - assert_string_field(&json, "color", "highlight"); - assert_eq!(json["color"], "yellow"); -} - -// ===== Citation backward-compatibility deserialization ===== - -#[test] -fn schema_citation_deserializes_from_new_refs_format() { - let json_str = - r#"{"value":"cited text","marks":[{"type":"semantic:citation","refs":["smith2023"]}]}"#; - let text: Text = serde_json::from_str(json_str).unwrap(); - let mark = &text.marks[0]; - if let cdx_core::content::Mark::Extension(ext) = mark { - assert_eq!( - ext.get_string_array_attribute("refs"), - Some(vec!["smith2023"]) - ); - } else { - panic!("Expected extension mark"); - } -} - -#[test] -fn schema_citation_deserializes_from_legacy_ref_format() { - let json_str = - r#"{"value":"cited text","marks":[{"type":"semantic:citation","ref":"smith2023"}]}"#; - let text: Text = serde_json::from_str(json_str).unwrap(); - let mark = &text.marks[0]; - if let cdx_core::content::Mark::Extension(ext) = mark { - // Legacy "ref" should be accessible via get_citation_refs() - let refs = ext.get_citation_refs().expect("should have citation refs"); - assert_eq!(refs, vec!["smith2023"]); - } else { - panic!("Expected extension mark"); - } -} - -// ===== Citation struct schema tests ===== - -#[test] -fn schema_citation_struct_emits_refs_not_ref() { - use cdx_core::extensions::Citation; - - let cite = Citation::new("smith2023"); - let json = serde_json::to_value(&cite).unwrap(); - - assert!(json.get("refs").is_some(), "Citation must emit 'refs'"); - assert!( - json.get("ref").is_none(), - "Citation must not emit legacy 'ref'" - ); - assert!(json["refs"].is_array()); - assert_eq!(json["refs"][0], "smith2023"); -} - -#[test] -fn schema_citation_struct_accepts_both_ref_and_refs() { - use cdx_core::extensions::Citation; - - // New format: "refs" array - let new_json = r#"{"refs":["smith2023","jones2024"]}"#; - let cite: Citation = serde_json::from_str(new_json).unwrap(); - assert_eq!(cite.refs, vec!["smith2023", "jones2024"]); - - // Legacy format: "ref" string - let old_json = r#"{"ref":"smith2023"}"#; - let cite: Citation = serde_json::from_str(old_json).unwrap(); - assert_eq!(cite.refs, vec!["smith2023"]); -} - -#[test] -fn schema_citation_struct_multi_ref_roundtrip() { - use cdx_core::extensions::Citation; - - let cite = Citation::multi(vec!["a".into(), "b".into(), "c".into()]).with_page("10"); - let json = serde_json::to_string(&cite).unwrap(); - let parsed: Citation = serde_json::from_str(&json).unwrap(); - - assert_eq!(parsed.refs, vec!["a", "b", "c"]); - assert_eq!(parsed.locator, Some("10".to_string())); -} diff --git a/cdx-core/tests/spec_compliance.rs b/cdx-core/tests/spec_compliance.rs new file mode 100644 index 0000000..995c832 --- /dev/null +++ b/cdx-core/tests/spec_compliance.rs @@ -0,0 +1,462 @@ +//! Spec compliance tests. +//! +//! These tests validate cdx-core against the external Codex file format +//! specification (pinned as a git submodule at `spec/`). Three modules: +//! +//! - **`mark_schema_validation`**: Serialized mark output validated against spec JSON schemas +//! - **`backward_compatibility`**: Legacy format deserialization and migration helpers +//! - **`example_deserialization`**: Spec example documents deserialized and roundtripped + +use std::path::{Path, PathBuf}; + +use cdx_core::content::{ExtensionMark, Mark, Text}; +use serde_json::Value; + +// ============================================================================ +// Helpers +// ============================================================================ + +/// Path to the spec submodule root. Panics with a helpful message if missing. +fn spec_dir() -> PathBuf { + let dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("../spec"); + assert!( + dir.join("schemas").exists(), + "Spec submodule not found. Run: git submodule update --init" + ); + dir +} + +/// Load a `$defs` entry from a spec schema file and return it as a standalone schema. +fn load_schema_def(schema_file: &str, def_name: &str) -> Value { + let path = spec_dir().join("schemas").join(schema_file); + let content = std::fs::read_to_string(&path) + .unwrap_or_else(|e| panic!("Failed to read {}: {e}", path.display())); + let schema: Value = serde_json::from_str(&content) + .unwrap_or_else(|e| panic!("Failed to parse {}: {e}", path.display())); + schema["$defs"][def_name].clone() +} + +/// Serialize an `ExtensionMark` to its JSON representation via a `Text` node, +/// then strip the namespace prefix from the `type` field so it matches the +/// spec's mark-level schema (which uses e.g. `"citation"` not `"semantic:citation"`). +fn mark_to_json_for_schema(mark: &ExtensionMark) -> Value { + let text = Text::with_marks("test", vec![Mark::Extension(mark.clone())]); + let json = serde_json::to_value(&text).unwrap(); + let mut mark_json = json["marks"][0].clone(); + + // Strip namespace prefix: "semantic:citation" → "citation" + if let Some(type_val) = mark_json.get("type").and_then(Value::as_str) { + if let Some((_ns, mt)) = type_val.split_once(':') { + mark_json["type"] = Value::String(mt.to_string()); + } + } + + mark_json +} + +/// Validate a JSON value against a schema, returning a list of error messages. +fn validate_against_schema(instance: &Value, schema: &Value) -> Vec { + let validator = jsonschema::validator_for(schema).expect("Failed to compile JSON schema"); + validator + .iter_errors(instance) + .map(|e| format!("{e} at {}", e.instance_path)) + .collect() +} + +// ============================================================================ +// Module: mark_schema_validation +// ============================================================================ + +mod mark_schema_validation { + use super::*; + + fn assert_valid_against_spec(mark: &ExtensionMark, schema_file: &str, def_name: &str) { + let schema = load_schema_def(schema_file, def_name); + assert!( + schema.is_object(), + "Schema definition '{def_name}' not found in {schema_file}" + ); + let mark_json = mark_to_json_for_schema(mark); + let errors = validate_against_schema(&mark_json, &schema); + assert!( + errors.is_empty(), + "Mark failed schema validation against {schema_file}#/$defs/{def_name}:\n \ + Mark JSON: {mark_json}\n Errors:\n {}", + errors.join("\n ") + ); + } + + // ----- Semantic marks ----- + + #[test] + fn citation_validates_against_spec() { + let mark = ExtensionMark::citation("smith2023"); + assert_valid_against_spec(&mark, "semantic.schema.json", "citationMark"); + } + + #[test] + fn citation_with_page_validates_against_spec() { + // Note: cdx-core emits "locatorType" which is not in the spec schema, + // but the schema allows additional properties so this still validates. + let mark = ExtensionMark::citation_with_page("smith2023", "42-45"); + assert_valid_against_spec(&mark, "semantic.schema.json", "citationMark"); + } + + #[test] + fn multi_citation_validates_against_spec() { + let refs = vec!["smith2023".to_string(), "jones2024".to_string()]; + let mark = ExtensionMark::multi_citation(&refs); + assert_valid_against_spec(&mark, "semantic.schema.json", "citationMark"); + } + + #[test] + fn glossary_validates_against_spec() { + let mark = ExtensionMark::glossary("ai"); + assert_valid_against_spec(&mark, "semantic.schema.json", "glossaryMark"); + } + + #[test] + fn entity_validates_against_spec() { + // Schema entityType enum uses PascalCase: "Person", "Organization", etc. + let mark = ExtensionMark::entity("https://www.wikidata.org/wiki/Q7251", "Person"); + assert_valid_against_spec(&mark, "semantic.schema.json", "entityMark"); + } + + // ----- Academic marks ----- + + #[test] + fn equation_ref_validates_against_spec() { + let mark = ExtensionMark::equation_ref("#eq-pythagoras"); + assert_valid_against_spec(&mark, "academic.schema.json", "equationRefMark"); + } + + #[test] + fn algorithm_ref_validates_against_spec() { + let mark = ExtensionMark::algorithm_ref("#alg-quicksort"); + assert_valid_against_spec(&mark, "academic.schema.json", "algorithmRefMark"); + } + + #[test] + fn theorem_ref_validates_against_spec() { + let mark = ExtensionMark::theorem_ref("#thm-pythagoras"); + assert_valid_against_spec(&mark, "academic.schema.json", "theoremRefMark"); + } + + // ----- Presentation marks ----- + + #[test] + fn index_validates_against_spec() { + let mark = ExtensionMark::index("machine learning"); + assert_valid_against_spec(&mark, "presentation.schema.json", "indexMark"); + } + + #[test] + fn index_with_subterm_validates_against_spec() { + let mark = ExtensionMark::index_with_subterm("algorithms", "quicksort"); + assert_valid_against_spec(&mark, "presentation.schema.json", "indexMark"); + } +} + +// ============================================================================ +// Module: backward_compatibility +// ============================================================================ + +mod backward_compatibility { + use super::*; + + // ----- Citation backward compat ----- + + #[test] + fn citation_deserializes_from_new_refs_format() { + let json_str = + r#"{"value":"cited text","marks":[{"type":"semantic:citation","refs":["smith2023"]}]}"#; + let text: Text = serde_json::from_str(json_str).unwrap(); + let mark = &text.marks[0]; + if let Mark::Extension(ext) = mark { + assert_eq!( + ext.get_string_array_attribute("refs"), + Some(vec!["smith2023"]) + ); + } else { + panic!("Expected extension mark"); + } + } + + #[test] + fn citation_deserializes_from_legacy_ref_format() { + let json_str = + r#"{"value":"cited text","marks":[{"type":"semantic:citation","ref":"smith2023"}]}"#; + let text: Text = serde_json::from_str(json_str).unwrap(); + let mark = &text.marks[0]; + if let Mark::Extension(ext) = mark { + let refs = ext.get_citation_refs().expect("should have citation refs"); + assert_eq!(refs, vec!["smith2023"]); + } else { + panic!("Expected extension mark"); + } + } + + #[test] + fn citation_multi_refs_roundtrip() { + let refs = vec!["smith2023".into(), "jones2024".into()]; + let mark = Mark::Extension(ExtensionMark::multi_citation(&refs)); + + let json = serde_json::to_string(&mark).unwrap(); + let val: Value = serde_json::from_str(&json).unwrap(); + assert_eq!(val["refs"], serde_json::json!(["smith2023", "jones2024"])); + + let parsed: Mark = serde_json::from_str(&json).unwrap(); + if let Mark::Extension(ext) = &parsed { + assert_eq!( + ext.get_string_array_attribute("refs"), + Some(vec!["smith2023", "jones2024"]) + ); + } else { + panic!("Expected Extension mark"); + } + } + + #[test] + fn citation_normalize_attrs_rewrites_ref_to_refs() { + let mut ext = ExtensionMark::new("semantic", "citation") + .with_attributes(serde_json::json!({"ref": "smith2023"})); + ext.normalize_citation_attrs(); + assert_eq!( + ext.get_string_array_attribute("refs"), + Some(vec!["smith2023"]) + ); + assert!(ext.get_string_attribute("ref").is_none()); + } + + #[test] + fn citation_struct_emits_refs_not_ref() { + use cdx_core::extensions::Citation; + + let cite = Citation::new("smith2023"); + let json = serde_json::to_value(&cite).unwrap(); + assert!(json.get("refs").is_some(), "Citation must emit 'refs'"); + assert!(json.get("ref").is_none(), "Citation must not emit 'ref'"); + assert!(json["refs"].is_array()); + assert_eq!(json["refs"][0], "smith2023"); + } + + #[test] + fn citation_struct_accepts_both_ref_and_refs() { + use cdx_core::extensions::Citation; + + let new_json = r#"{"refs":["smith2023","jones2024"]}"#; + let cite: Citation = serde_json::from_str(new_json).unwrap(); + assert_eq!(cite.refs, vec!["smith2023", "jones2024"]); + + let old_json = r#"{"ref":"smith2023"}"#; + let cite: Citation = serde_json::from_str(old_json).unwrap(); + assert_eq!(cite.refs, vec!["smith2023"]); + } + + #[test] + fn citation_struct_multi_ref_roundtrip() { + use cdx_core::extensions::Citation; + + let cite = Citation::multi(vec!["a".into(), "b".into(), "c".into()]).with_page("10"); + let json = serde_json::to_string(&cite).unwrap(); + let parsed: Citation = serde_json::from_str(&json).unwrap(); + assert_eq!(parsed.refs, vec!["a", "b", "c"]); + assert_eq!(parsed.locator, Some("10".to_string())); + } + + // ----- Glossary backward compat ----- + + #[test] + fn glossary_deserializes_from_new_ref_format() { + let json_str = r#"{"value":"term","marks":[{"type":"semantic:glossary","ref":"ai"}]}"#; + let text: Text = serde_json::from_str(json_str).unwrap(); + if let Mark::Extension(ext) = &text.marks[0] { + assert_eq!(ext.get_glossary_ref(), Some("ai")); + } else { + panic!("Expected extension mark"); + } + } + + #[test] + fn glossary_deserializes_from_legacy_term_id_format() { + let json_str = r#"{"value":"term","marks":[{"type":"semantic:glossary","termId":"ai"}]}"#; + let text: Text = serde_json::from_str(json_str).unwrap(); + if let Mark::Extension(ext) = &text.marks[0] { + assert_eq!(ext.get_glossary_ref(), Some("ai")); + } else { + panic!("Expected extension mark"); + } + } + + #[test] + fn glossary_normalize_attrs_rewrites_term_id_to_ref() { + let mut ext = ExtensionMark::new("semantic", "glossary") + .with_attributes(serde_json::json!({"termId": "ai"})); + ext.normalize_glossary_attrs(); + assert_eq!(ext.get_string_attribute("ref"), Some("ai")); + assert!(ext.get_string_attribute("termId").is_none()); + } + + #[test] + fn glossary_ref_struct_emits_ref_not_term_id() { + use cdx_core::extensions::GlossaryRef; + + let gref = GlossaryRef::new("ai"); + let json = serde_json::to_value(&gref).unwrap(); + assert!(json.get("ref").is_some(), "GlossaryRef must emit 'ref'"); + assert!( + json.get("termId").is_none(), + "GlossaryRef must not emit 'termId'" + ); + assert_eq!(json["ref"], "ai"); + } + + #[test] + fn glossary_ref_struct_accepts_both_term_id_and_ref() { + use cdx_core::extensions::GlossaryRef; + + let new_json = r#"{"ref":"ai"}"#; + let gref: GlossaryRef = serde_json::from_str(new_json).unwrap(); + assert_eq!(gref.term_id, "ai"); + + let old_json = r#"{"termId":"ai"}"#; + let gref: GlossaryRef = serde_json::from_str(old_json).unwrap(); + assert_eq!(gref.term_id, "ai"); + } + + // ----- Extension mark format backward compat ----- + + #[test] + fn extension_mark_deserializes_old_wrapper_format() { + let json = r#"{"type":"extension","namespace":"semantic","markType":"citation","attributes":{"ref":"smith2023"}}"#; + let mark: Mark = serde_json::from_str(json).unwrap(); + if let Mark::Extension(ext) = &mark { + assert_eq!(ext.namespace, "semantic"); + assert_eq!(ext.mark_type, "citation"); + assert_eq!(ext.get_citation_refs(), Some(vec!["smith2023"])); + } else { + panic!("Expected Extension mark, got {mark:?}"); + } + } + + #[test] + fn extension_mark_deserializes_non_namespaced_type() { + // Spec examples use non-namespaced types for marks (e.g., "citation" not "semantic:citation") + let json = r#"{"type":"citation","refs":["smith2023"]}"#; + let mark: Mark = serde_json::from_str(json).unwrap(); + if let Mark::Extension(ext) = &mark { + assert_eq!(ext.namespace, "semantic"); + assert_eq!(ext.mark_type, "citation"); + assert_eq!( + ext.get_string_array_attribute("refs"), + Some(vec!["smith2023"]) + ); + } else { + panic!("Expected Extension mark, got {mark:?}"); + } + } + + #[test] + fn glossary_mark_deserializes_non_namespaced_type() { + let json = r#"{"type":"glossary","ref":"turing-machine"}"#; + let mark: Mark = serde_json::from_str(json).unwrap(); + if let Mark::Extension(ext) = &mark { + assert_eq!(ext.namespace, "semantic"); + assert_eq!(ext.mark_type, "glossary"); + assert_eq!(ext.get_glossary_ref(), Some("turing-machine")); + } else { + panic!("Expected Extension mark, got {mark:?}"); + } + } +} + +// ============================================================================ +// Module: example_deserialization +// ============================================================================ + +mod example_deserialization { + use super::*; + use cdx_core::content::Content; + + /// Deserialize a spec example content document and verify it roundtrips. + fn assert_example_roundtrips(example_name: &str) { + let path = spec_dir() + .join("examples") + .join(example_name) + .join("content/document.json"); + let raw = std::fs::read_to_string(&path) + .unwrap_or_else(|e| panic!("Failed to read {}: {e}", path.display())); + + // Step 1: Deserialize spec JSON into cdx-core Content + let content: Content = serde_json::from_str(&raw) + .unwrap_or_else(|e| panic!("{example_name}: failed to deserialize: {e}")); + assert!( + !content.is_empty(), + "{example_name}: deserialized content should not be empty" + ); + + // Step 2: Serialize back to JSON and re-deserialize + let serialized = serde_json::to_string(&content) + .unwrap_or_else(|e| panic!("{example_name}: failed to serialize: {e}")); + let roundtripped: Content = serde_json::from_str(&serialized) + .unwrap_or_else(|e| panic!("{example_name}: failed to re-deserialize: {e}")); + + // Step 3: Compare the two Content values (structural equality) + assert_eq!( + content, roundtripped, + "{example_name}: roundtrip produced different Content" + ); + } + + #[test] + fn simple_document() { + assert_example_roundtrips("simple-document"); + } + + #[test] + fn semantic_document() { + assert_example_roundtrips("semantic-document"); + } + + #[test] + fn academic_document() { + assert_example_roundtrips("academic-document"); + } + + #[test] + fn collaboration_document() { + assert_example_roundtrips("collaboration-document"); + } + + #[test] + #[ignore = "spec uses block-level content in tableCell; cdx-core expects inline text"] + fn presentation_document() { + assert_example_roundtrips("presentation-document"); + } + + #[test] + fn forms_document() { + assert_example_roundtrips("forms-document"); + } + + #[test] + fn phantoms_document() { + assert_example_roundtrips("phantoms-document"); + } + + #[test] + fn legal_document() { + assert_example_roundtrips("legal-document"); + } + + #[test] + fn signed_document() { + assert_example_roundtrips("signed-document"); + } + + #[test] + #[ignore = "spec uses block-level content in tableCell; cdx-core expects inline text"] + fn comprehensive_document() { + assert_example_roundtrips("comprehensive-document"); + } +} diff --git a/spec b/spec new file mode 160000 index 0000000..dc01039 --- /dev/null +++ b/spec @@ -0,0 +1 @@ +Subproject commit dc01039a6561292e082d435d61cf3b654a01a271