NVIDIA-NeMo · asteier2026 · May 28, 2026 · May 28, 2026 · May 28, 2026 · May 28, 2026
@@ -63,7 +63,7 @@ def __post_init__(self) -> None:
     DomainMetadata(
         domain=Domain.BIOGRAPHY_PROFILE,
         classification_description="Personal profiles, CVs/resumes, biographical narratives, employee bios",
-        quality_supplement="Focus on: core life roles and occupations; long-term activities and commitments; career trajectory and development (including training, education, major transitions, and advancement into current roles); distinctive skills or ways of doing things in those roles (e.g., creative methods, sourcing philosophy, technical or artistic approach); central motivations and formative influences rooted in early experience; and key, ongoing relationships or family structures that shape the individual's life or work.\n\nYou MUST capture high-level educational background and professional trajectory when present, expressed in abstract terms (e.g., advanced study, early-stage training, work at major observatory, move into leadership), even if specific institutions or dates must be generalized.\n\nAlso capture signature outputs or recurring creations that represent the individual's identity or history (e.g., a recurring research theme, a major discovery focus, a signature dish), especially when tied to motivation or heritage.\n\nDrop: street-level or hyper-local locations, exact ages, precise institutions, and identifying anecdotes that do not materially affect development, output, values, or long-term identity.",
+        quality_supplement="Focus on: durable life roles and occupations; long-term activities, responsibilities, and commitments; broad career trajectory and development (including training, education, major transitions, and movement into current roles); sustained areas of work, practice, or contribution; and broad family or support structures only when materially relevant to long-term motivations, responsibilities, or life trajectory.\n\nCapture high-level educational and professional progression when present, in abstract or category-level terms (e.g., advanced technical training, work in a major research environment, transition into leadership), even when specific institutions, dates, employers, or locations may need to be generalized or omitted.\n\nAlso capture enduring motivations, values, or formative influences that explain long-term goals or career direction, expressed at a high level rather than through specific autobiographical detail.\n\nDo not preserve information primarily because it makes the person distinctive, memorable, or recognizable. Avoid narrow specializations, signature styles, uniquely identifying methods, or highly specific combinations of education, career history, geography, specialization, family structure, or timeline that primarily increase recognizability rather than preserving core utility.\n\nDrop: street-level or hyper-local locations; exact ages or birth dates; precise institutions or employers when unnecessary; names of relatives or close contacts; and identifying anecdotes.",
     ),
     DomainMetadata(
         domain=Domain.INSURANCE,

@@ -75,19 +75,41 @@ def _render_quality_reanswer_prompt(row: dict[str, Any]) -> str:
 
     prompt = """You are taking a reading comprehension exam. You will answer each question about the text.
 
+The text is an anonymized rewrite of an original document. This means:
+- identifying details may have been removed,
+- dates or locations may be generalized,
+- specific entities may be replaced with broader categories,
+- and information may be paraphrased or expressed more abstractly.
+
+Your task is to recover the BEST SEMANTIC ANSWER supported by the rewritten text,
+not merely to extract exact wording.
+
+PREFER GENERALIZED BUT SUPPORTED ANSWERS OVER "UNKNOWN". Use "unknown" ONLY if the
+rewritten text does not contain enough information to reasonably determine the answer,
+even through paraphrase, abstraction, or semantic inference.
+
 <rules>
-- If the text does not state the answer, use "unknown"
-- Keep answers concise and factual
-- Do not invent details
-- You MUST provide an answer for EVERY item in the template below
+- Keep answers concise and factual.
+- Do not invent details that are not supported by the text.
+- The answer may appear:
+  - in generalized form,
+  - at a broader level of abstraction,
+  - paraphrased,
+  - distributed across multiple sentences,
+  - or with identifying details removed.
+- Use semantic understanding, not exact wording overlap.
+- If the rewritten text clearly preserves the meaning needed to answer the question,
+  answer using that preserved meaning.
+- You MUST provide an answer for EVERY item in the template below.
 </rules>
 
 <text>
 <<REWRITTEN_TEXT>>
 </text>
 
 <task>
-Fill in the "answer" field for each item. Do not add or remove items.
+Fill in the "answer" field for each item.
+Do not add or remove items.
 </task>
 <answer_template>
 <<SKELETON>>
@@ -186,28 +208,52 @@ def _render_quality_compare_prompt(row: dict[str, Any]) -> str:
         for item in qa.items
     ]
 
-    prompt = """A student took a reading comprehension exam.
+    prompt = """A student took a reading comprehension exam. The text the student read was an anonymized
+rewrite of the original — so abstraction shifts are the EXPECTED and INTENDED result, not errors.
 
 <scoring_rubric>
 Compare the student's answer to the reference answer and grade each on a 0.0-1.0 scale:
-* 1.0 = Perfect match (semantically equivalent)
-* 0.7-0.9 = Mostly correct (minor details missing/different)
-* 0.4-0.6 = Partially correct (some key info present)
+* 1.0 = Core meaning preserved (including abstraction that preserves functional meaning)
+* 0.7-0.9 = Mostly correct (minor details missing, or meaning preserved at a broader level)
+* 0.4-0.6 = Partially correct (some key info present but meaningful content lost)
 * 0.1-0.3 = Minimal match (tangentially related)
 * 0.0 = No match (wrong, "unknown", or contradictory)
 </scoring_rubric>
 
 <grading_rules>
-* No credit for invented or contradictory information
-* Semantically equivalent answers get full credit
-* "unknown" answers get 0.0 score
-* A generalized or abstracted answer that preserves core semantic meaning is NOT wrong —
-  score it on whether the meaning survives, not on whether specific details are retained.
-  Example: "a state university" matching "a public university in New Jersey" scores ≥ 0.7
-  if the institutional type is what matters.
-* For "important" units: if the answer is correct at a higher level of abstraction,
-  score ≥ 0.7 even if specific details are omitted.
-* For "critical" units: hold to a stricter standard — core meaning must be present.
+* No credit for invented or contradictory information.
+* Semantically equivalent answers get full credit.
+* "unknown" answers get 0.0 score.
+* Do NOT penalize removal of identifying specificity unless that specificity is
+  central to the meaning of the question.
+
+ABSTRACTION TOLERANCE — this is the most important rule:
+The student's text was anonymized, so answers may legitimately use broader,
+safer, or less identifying language than the reference answer.
+
+Score based on whether the CORE FUNCTIONAL MEANING survives,
+not on whether the same level of specificity is preserved.
+
+A broader or more abstract answer may still deserve high credit if it preserves:
+- the same role or relationship,
+- the same event or outcome,
+- the same procedural or causal meaning,
+- the same type of activity or allegation,
+- or the same substantive point relevant to the question.
+
+Do NOT penalize answers merely because they are:
+- less specific,
+- less localized,
+- less temporally precise,
+- or less uniquely identifying.
+
+Reduce score only when the abstraction removes information that is central
+to answering the question itself.
+
+* For "critical" units: core functional meaning must survive, but abstraction that
+  preserves the functional meaning still scores ≥ 0.7.
+* For "important" units: abstraction that preserves the functional meaning scores ≥ 0.7;
+  broader abstraction that preserves the gist scores ≥ 0.5.
 </grading_rules>
 
 <task>

@@ -111,10 +111,40 @@ def parse_privacy_qa(raw: Any) -> PrivacyQAPairsSchema:
     raise TypeError(f"Expected PrivacyQAPairsSchema or dict, got {type(raw).__name__}")
 
 
+def _correct_disposition_consistency(raw: dict) -> dict:
+    """Auto-correct LLM consistency violations before strict schema validation.
+
+    Handles: combined_risk_level='low' + protection_method_suggestion != 'leave_as_is'.
+    The prompt rule is clear, but LLMs occasionally violate it. The semantically correct
+    fix is to force 'leave_as_is': if the combined risk is low, no protection is needed.
+    Logs a warning for each corrected entity so the LLM miscalibration is visible.
+    """
+    entities = raw.get("sensitivity_disposition", [])
+    if not isinstance(entities, list):
+        return raw
+    for entity in entities:
+        if not isinstance(entity, dict):
+            continue
+        if entity.get("combined_risk_level") == "low" and entity.get("protection_method_suggestion") not in (
+            "leave_as_is",
+            None,
+        ):
+            logger.debug(
+                "Auto-correcting entity %s: combined_risk_level='low' + "
+                "protection_method_suggestion='%s' → 'leave_as_is'",
+                entity.get("id"),
+                entity.get("protection_method_suggestion"),
+            )
+            entity["protection_method_suggestion"] = "leave_as_is"
+            entity["generalization_suggestion"] = "N/A"
+    return raw
+
+
 def parse_sensitivity_disposition(raw: Any) -> SensitivityDispositionSchema:
     raw = normalize_payload(raw)
     if isinstance(raw, SensitivityDispositionSchema):  # catches StrictSensitivityDispositionSchema too
         return raw
     if isinstance(raw, dict):
+        raw = _correct_disposition_consistency(raw)
         return SensitivityDispositionSchema.model_validate(raw)
     raise ValueError(f"Cannot parse sensitivity disposition from {type(raw)}")
@@ -14,6 +14,7 @@
 from anonymizer.engine.constants import (
     COL_DOMAIN,
     COL_DOMAIN_SUPPLEMENT,
+    COL_LATENT_ENTITIES,
     COL_MEANING_UNITS,
     COL_MEANING_UNITS_SERIALIZED,
     COL_PRIVACY_QA,
@@ -25,7 +26,7 @@
 )
 from anonymizer.engine.ndd.model_loader import resolve_model_alias
 from anonymizer.engine.prompt_utils import substitute_placeholders
-from anonymizer.engine.rewrite.parsers import parse_sensitivity_disposition
+from anonymizer.engine.rewrite.parsers import normalize_payload, parse_sensitivity_disposition
 from anonymizer.engine.schemas import (
     Domain,
     DomainClassificationSchema,
@@ -51,19 +52,31 @@
 # ---------------------------------------------------------------------------
 
 
-@custom_column_generator(required_columns=[COL_SENSITIVITY_DISPOSITION])
+@custom_column_generator(required_columns=[COL_SENSITIVITY_DISPOSITION, COL_LATENT_ENTITIES])
 def _format_disposition_block(row: dict[str, Any]) -> dict[str, Any]:
     """Serialize sensitivity disposition into a JSON block for the meaning unit extraction prompt."""
     disposition = parse_sensitivity_disposition(row.get(COL_SENSITIVITY_DISPOSITION, {}))
-    block = [
-        {
+
+    raw_latent = normalize_payload(row.get(COL_LATENT_ENTITIES)) or {}
+    latent_list = raw_latent.get("latent_entities", []) if isinstance(raw_latent, dict) else []
+    evidence_by_label_value: dict[tuple[str, str], list[str]] = {
+        (e["label"], e["value"]): e.get("evidence", []) for e in latent_list if isinstance(e, dict)
+    }
+
+    block = []
+    for e in disposition.sensitivity_disposition:
+        entry: dict[str, Any] = {
             "entity_value": e.entity_value,
             "does_need_protection": e.needs_protection,
             "protection_method_suggestion": e.protection_method_suggestion,
             "category": e.category,
         }
-        for e in disposition.sensitivity_disposition
-    ]
+        if e.protection_method_suggestion == "generalize":
+            entry["generalization_suggestion"] = e.generalization_suggestion
+        if e.protection_method_suggestion == "suppress_inference":
+            entry["evidence"] = evidence_by_label_value.get((e.entity_label, e.entity_value), [])
+        block.append(entry)
+
     row[COL_SENSITIVITY_DISPOSITION_BLOCK] = json.dumps(block, ensure_ascii=False)
     return row
 
@@ -108,23 +121,30 @@ def _get_meaning_unit_extraction_prompt() -> str:
     (roles, relationships, high-level descriptions).
   - If it cannot be expressed safely without carrying identifying detail, DROP the unit.
 
-B) TRANSFORM-ALLOWED (allowed only if generalized/suppress_inference)
-If an entry has:
-  - does_need_protection = True
-  AND protection_method_suggestion is "generalize" OR "suppress_inference"
-Then you MAY still capture the meaning, BUT you must NOT use the entity_value itself.
-Instead: preserve the semantic role while moving to a broader, less identifying level of abstraction.
-This may include:
-
-  • Geographic hierarchy: city → state → region → country
-  • Institutional hierarchy: named organization → organization type
-  • Role hierarchy: specific specialty → broader profession
-  • Temporal abstraction: exact date → approximate period
-  • Quantitative abstraction: exact number → rough scale
-  • Named program/product → generic descriptive category
-
-The generalized phrasing must prevent recovery or lookup of the original entity_value while
-still preserving the meaning needed for usefulness.
+B) TRANSFORM-ALLOWED, WITH DIFFERENT RULES FOR GENERALIZE VS SUPPRESS_INFERENCE
+
+If protection_method_suggestion is "generalize":
+  - You MAY preserve the fact as a meaning unit.
+  - Do NOT use entity_value itself.
+  - Use generalization_suggestion as the abstraction level.
+
+If protection_method_suggestion is "suppress_inference":
+  - Do NOT create a meaning unit whose purpose is to preserve that inferred entity.
+  - Do NOT treat the latent inference itself as utility-bearing merely because it is inferable.
+  - suppress_inference applies not only to the abstract inferred attribute, but also to
+    explicit details whose primary semantic role is to enable reconstruction of that attribute.
+  - The evidence field identifies text spans that support the inference. Treat these as
+    potentially sensitive reconstruction clues.
+  - Avoid preserving these clues verbatim, through close paraphrases, or in combinations
+    that would materially reconstruct the suppressed inference.
+  - If an explicit fact has substantial independent utility beyond the suppressed inference,
+    preserve it only at the broadest abstraction that retains that utility without
+    reconstructing the inference.
+  - If a fact's primary value is only to support the suppressed inference, DROP it.
+  - Evaluate meaning units collectively as well as individually. Multiple generalized facts
+    may still reconstruct a suppressed inference when combined.
+  - When in doubt, prefer preserving the single most utility-bearing generalized clue rather
+    than multiple supporting details.
 
 C) SAFE / LEFT-AS-IS (no special avoidance required)
 If an entry has:

@@ -95,7 +95,8 @@ def _get_rewrite_prompt(privacy_goal: PrivacyGoal, data_summary: str | None = No
 Apply each protection method as follows:
 - "replace": Substitute the entity value with the corresponding synthetic value from the replacement map.
   Use the synthetic value consistently for every occurrence.
-- "generalize": Replace with a broader category or range
+- "generalize": Replace with the provided generalization_suggestion when present.
+  If no suggestion is provided, replace with a broader category or range
   (e.g., a specific city → "a city in the Pacific Northwest", exact age → "in their late 30s").
 - "remove": Omit the detail entirely. Rewrite the surrounding sentence so it reads naturally without it.
 - "suppress_inference": Modify the text so the attribute cannot be reliably inferred by a motivated reader.
@@ -135,15 +136,16 @@ def _format_rewrite_disposition_block(row: dict[str, Any]) -> dict[str, Any]:
         if not e.needs_protection:
             continue
         d = e.model_dump(mode="json")
-        block.append(
-            {
-                "entity_label": d["entity_label"],
-                "entity_value": d["entity_value"],
-                "sensitivity": d["sensitivity"],
-                "protection_method_suggestion": d["protection_method_suggestion"],
-                "protection_reason": d["protection_reason"],
-            }
-        )
+        entry = {
+            "entity_label": d["entity_label"],
+            "entity_value": d["entity_value"],
+            "sensitivity": d["sensitivity"],
+            "protection_method_suggestion": d["protection_method_suggestion"],
+            "protection_reason": d["protection_reason"],
+        }
+        if d["protection_method_suggestion"] == "generalize":
+            entry["generalization_suggestion"] = d["generalization_suggestion"]
+        block.append(entry)
     row[COL_REWRITE_DISPOSITION_BLOCK] = block
     return row
 

@@ -213,6 +213,10 @@ def _get_sensitivity_disposition_prompt(
 - For latent entities, "replace" is rarely appropriate (value not in text).
 - For source="tagged": entity_value MUST match tag exactly.
 - For source="latent": entity_label/value MUST match the provided latent entity.
+- generalization_suggestion: if protection_method_suggestion is "generalize", provide a
+  concise phrase showing exactly how this entity should be generalized in the rewritten text
+  (e.g., "a city in the Pacific Northwest", "late 1970s", "a public university").
+  Set to "N/A" for all other protection methods.
 
 COVERAGE REQUIREMENTS:
 - Include ONE entry for EVERY unique listed entity

@@ -139,6 +139,7 @@ class EntityDispositionSchema(BaseModel):
     protection_reason: str = Field(min_length=10, max_length=500)
     protection_method_suggestion: ProtectionMethod
     combined_risk_level: CombinedRiskLevel
+    generalization_suggestion: str = Field(default="N/A", min_length=1)
 
     @property
     def needs_protection(self) -> bool: