diff --git a/docs/code-review.md b/docs/code-review.md index 6f3ef7518..fe05e074a 100644 --- a/docs/code-review.md +++ b/docs/code-review.md @@ -33,7 +33,7 @@ Unlike the pass/fail categories, code review is scored with **Precision / Recall
No results available yet. Check back soon!
{% endif %} +## Experiment Leaderboard + +Compares review-knowledge configurations for the same model (see the Baseline Leaderboard above for the plain agent): + +- **Inline knowledge (pre-#8700)** — the review checklists BCApps shipped inline before adopting BCQuality, injected as custom instructions. +- **BCQuality (live skills)** — the agent dynamically consumes the live BCQuality skill tree. + +{% assign experiment_rows = site.data.code-review.aggregate | where_exp: "agg", "agg.experiment" %} +{% if experiment_rows and experiment_rows.size > 0 %} +| Variant | +Agent | +Model | +Micro F1 (95% CI) | +Macro F1 (95% CI) | +Precision | +Recall | +Avg Time | +Ver | +
|---|---|---|---|---|---|---|---|---|
| + {%- if agg.experiment.bcquality -%}BCQuality (live skills) + {%- elsif agg.experiment.custom_instructions -%}Inline knowledge (pre-#8700) + {%- else -%}Other{%- endif -%} + | +{{ agg.agent_name }} | +{{ agg.model }} | +{{ agg.f1 | times: 100.0 | round: 1 }}%{% if agg.f1_ci_low %} ({{ agg.f1_ci_low | times: 100.0 | round: 1 }}-{{ agg.f1_ci_high | times: 100.0 | round: 1 }}%){% endif %} | +{{ agg.macro_f1 | times: 100.0 | round: 1 }}%{% if agg.macro_f1_ci_low %} ({{ agg.macro_f1_ci_low | times: 100.0 | round: 1 }}-{{ agg.macro_f1_ci_high | times: 100.0 | round: 1 }}%){% endif %} | +{{ agg.precision | times: 100.0 | round: 1 }}% | +{{ agg.recall | times: 100.0 | round: 1 }}% | +{{ agg.average_duration | round: 1 }}s | +{{ agg.benchmark_version }} | +
No experiment results available yet. Check back soon!
+{% endif %} + ## How metrics are computed - **Precision** — of the comments the agent generated, the fraction that matched an expected finding. Penalizes noisy reviews. diff --git a/src/bcbench/agent/copilot/agent.py b/src/bcbench/agent/copilot/agent.py index a7879cacc..1a3d9899f 100644 --- a/src/bcbench/agent/copilot/agent.py +++ b/src/bcbench/agent/copilot/agent.py @@ -10,8 +10,10 @@ from bcbench.agent.copilot.metrics import parse_metrics from bcbench.agent.shared import build_al_lsp_plugin, build_mcp_config, build_prompt, parse_tool_usage_from_hooks +from bcbench.agent.shared.codereview_bcquality import parse_bcquality_config, prepare_bcquality_workspace from bcbench.config import get_config from bcbench.dataset import BaseDatasetEntry +from bcbench.evaluate.codereview import REVIEW_OUTPUT_FILE from bcbench.exceptions import AgentError, AgentTimeoutError from bcbench.logger import get_logger from bcbench.operations import setup_agent_skills, setup_custom_agent, setup_hooks, setup_instructions_from_config @@ -41,22 +43,49 @@ def run_copilot_agent( logger.info(f"Running GitHub Copilot CLI on: {entry.instance_id}") - prompt: str = build_prompt(entry, repo_path, copilot_config, category, al_mcp=al_mcp) mcp_config_json, mcp_server_names = build_mcp_config(copilot_config, entry, repo_path, al_mcp=al_mcp, container_name=container_name) lsp_plugin_dir: Path | None = build_al_lsp_plugin(entry, category, repo_path, AgentType.COPILOT, al_lsp=al_lsp, container_name=container_name) - instructions_enabled: bool = setup_instructions_from_config(copilot_config, entry, repo_path, agent_type=AgentType.COPILOT) - skills_enabled: bool = setup_agent_skills(copilot_config, entry, repo_path, agent_type=AgentType.COPILOT) - custom_agent: str | None = setup_custom_agent(copilot_config, entry, repo_path, agent_type=AgentType.COPILOT) - tool_log_path: Path = setup_hooks(repo_path, AgentType.COPILOT, output_dir) - config = ExperimentConfiguration( - mcp_servers=mcp_server_names, - al_lsp_enabled=lsp_plugin_dir is not None, - custom_instructions=instructions_enabled, - skills_enabled=skills_enabled, - custom_agent=custom_agent, - ) - - logger.info(f"Executing Copilot CLI in directory: {repo_path}") + + bcquality_config = parse_bcquality_config(copilot_config) + bcquality_live: bool = category == EvaluationCategory.CODE_REVIEW and bcquality_config is not None and bcquality_config.enabled + + if bcquality_live: + assert bcquality_config is not None + # Live BCQuality consumption: clone+filter BCQuality and route the agent through skills/entry.md. + # The filtered clone (not the repo) becomes the Copilot CLI working directory; the repo under + # review is granted via --add-dir. No static instruction/skill/agent injection in this mode. + bootstrap_template: str = copilot_config["prompt"]["bcquality-bootstrap-template"] + bcquality_root, prompt = prepare_bcquality_workspace(bcquality_config, output_dir / "bcquality-clone", repo_path, REVIEW_OUTPUT_FILE, bootstrap_template) + work_dir: Path = bcquality_root + instructions_enabled: bool = False + skills_enabled: bool = False + custom_agent: str | None = None + # Copilot reads hooks from the CWD's .github/hooks, so install them into the clone to keep tool-usage metrics. + tool_log_path: Path = setup_hooks(bcquality_root, AgentType.COPILOT, output_dir) + config = ExperimentConfiguration( + mcp_servers=mcp_server_names, + al_lsp_enabled=lsp_plugin_dir is not None, + custom_instructions=False, + skills_enabled=False, + custom_agent=None, + bcquality=True, + ) + else: + prompt = build_prompt(entry, repo_path, copilot_config, category, al_mcp=al_mcp) + work_dir = repo_path + instructions_enabled = setup_instructions_from_config(copilot_config, entry, repo_path, agent_type=AgentType.COPILOT) + skills_enabled = setup_agent_skills(copilot_config, entry, repo_path, agent_type=AgentType.COPILOT) + custom_agent = setup_custom_agent(copilot_config, entry, repo_path, agent_type=AgentType.COPILOT) + tool_log_path = setup_hooks(repo_path, AgentType.COPILOT, output_dir) + config = ExperimentConfiguration( + mcp_servers=mcp_server_names, + al_lsp_enabled=lsp_plugin_dir is not None, + custom_instructions=instructions_enabled, + skills_enabled=skills_enabled, + custom_agent=custom_agent, + ) + + logger.info(f"Executing Copilot CLI in directory: {work_dir}") logger.debug(f"Using prompt:\n{prompt}") # Prefer copilot.exe over copilot.bat/copilot.cmd shims on Windows: the .bat shim invokes PowerShell, @@ -83,12 +112,15 @@ def run_copilot_agent( cmd_args.append(f"--plugin-dir={lsp_plugin_dir}") if custom_agent: cmd_args.append(f"--agent={custom_agent}") + if bcquality_live: + # Grant the agent access to the repo under review (it lives outside the BCQuality CWD). + cmd_args.extend(["--add-dir", str(repo_path)]) logger.debug(f"Copilot command args: {cmd_args}") result = subprocess.run( cmd_args, - cwd=str(repo_path), + cwd=str(work_dir), env={ **os.environ, "GITHUB_COPILOT_PROMPT_MODE_REPO_HOOKS": "true", diff --git a/src/bcbench/agent/shared/codereview_bcquality.py b/src/bcbench/agent/shared/codereview_bcquality.py new file mode 100644 index 000000000..100cb14ab --- /dev/null +++ b/src/bcbench/agent/shared/codereview_bcquality.py @@ -0,0 +1,309 @@ +"""Live BCQuality consumption for the code-review category. + +Faithfully replicates how microsoft/BCApps consumes microsoft/BCQuality today +(BCApps#8700): clone BCQuality at a pinned SHA, filter the clone to the allowed +layers/knowledge, make the filtered clone the agent working directory, and route +the agent through skills/entry.md with a task-context document. + +This module only provides the building blocks (config parsing, clone, filter, +task-context, bootstrap prompt). Wiring into the copilot agent lives separately so +the bug-fix / test-generation categories are unaffected. + +SECURITY: the clone becomes the agent CWD, so its skill/knowledge files are read +before the diff. `ref` MUST be a reviewed full commit SHA and `repo` MUST be an +http(s) URL pointing at a trusted source. +""" + +from __future__ import annotations + +import json +import re +import shutil +import subprocess +from dataclasses import asdict, dataclass, field +from pathlib import Path + +from jinja2 import Template + +from bcbench.logger import get_logger + +logger = get_logger(__name__) + +__all__ = [ + "BCQualityConfig", + "FilterReport", + "RemovedEntry", + "build_bootstrap_prompt", + "build_task_context", + "clone_bcquality", + "filter_clone", + "glob_match", + "parse_bcquality_config", + "prepare_bcquality_workspace", + "write_task_context", +] + +_LAYERS: tuple[str, ...] = ("microsoft", "community", "custom") +_KNOWN_LAYERS: frozenset[str] = frozenset(_LAYERS) +_SHA_RE = re.compile(r"^[0-9a-f]{40}$") +_TASK_CONTEXT_DIMENSIONS: tuple[str, ...] = ("technologies", "countries", "application-area", "bc-version") +_TASK_CONTEXT_FILENAME = "_task-context.json" +_FILTER_REPORT_FILENAME = "_filter-report.json" + + +@dataclass(frozen=True) +class BCQualityConfig: + enabled: bool + repo: str + ref: str + enabled_layers: tuple[str, ...] + disabled_skills: tuple[str, ...] + knowledge_allow: tuple[str, ...] + knowledge_deny: tuple[str, ...] + goal: str + inputs_available: tuple[str, ...] + task_context: dict[str, tuple[str, ...]] + + @classmethod + def from_agent_config(cls, agent_config: dict) -> BCQualityConfig | None: + raw = agent_config.get("bcquality") + if not isinstance(raw, dict): + return None + + knowledge = raw.get("knowledge") or {} + task_context_raw = raw.get("task-context") or {} + task_context = {dim: _as_str_tuple(task_context_raw.get(dim)) for dim in _TASK_CONTEXT_DIMENSIONS if dim in task_context_raw} + + config = cls( + enabled=bool(raw.get("enabled", False)), + repo=str(raw.get("repo", "")).strip(), + ref=str(raw.get("ref", "")).strip(), + enabled_layers=_as_str_tuple(raw.get("enabled-layers")) or ("microsoft",), + disabled_skills=_as_str_tuple(raw.get("disabled-skills")), + knowledge_allow=_as_str_tuple(knowledge.get("allow")), + knowledge_deny=_as_str_tuple(knowledge.get("deny")), + goal=str(task_context_raw.get("goal", "")).strip(), + inputs_available=_as_str_tuple(task_context_raw.get("inputs-available")), + task_context=task_context, + ) + config.validate() + return config + + def validate(self) -> None: + unknown = [layer for layer in self.enabled_layers if layer not in _KNOWN_LAYERS] + if unknown: + raise ValueError(f"Unknown bcquality enabled-layers value(s): {unknown}. Allowed: {sorted(_KNOWN_LAYERS)}.") + if not self.enabled: + return + if not _SHA_RE.match(self.ref): + raise ValueError(f"bcquality.ref must be a full 40-character commit SHA when enabled (got {self.ref!r}). Pin to a reviewed SHA for security.") + if not re.match(r"^https?://", self.repo): + raise ValueError(f"bcquality.repo must be an http(s) URL (got {self.repo!r}).") + + +@dataclass(frozen=True) +class RemovedEntry: + path: str + kind: str # "knowledge" | "skill" + reason: str + + +@dataclass +class FilterReport: + bcquality_root: str + enabled_layers: list[str] + disabled_skills: list[str] + knowledge_allow: list[str] + knowledge_deny: list[str] + removed: list[RemovedEntry] = field(default_factory=list) + + @property + def removed_count(self) -> int: + return len(self.removed) + + def to_dict(self) -> dict: + data = asdict(self) + data["removed_count"] = self.removed_count + return data + + +def parse_bcquality_config(agent_config: dict) -> BCQualityConfig | None: + return BCQualityConfig.from_agent_config(agent_config) + + +def _as_str_tuple(value: object) -> tuple[str, ...]: + if value is None: + return () + if isinstance(value, str): + return (value,) + if isinstance(value, (list, tuple)): + return tuple(str(item).strip() for item in value if str(item).strip()) + return (str(value).strip(),) + + +def _glob_to_regex(pattern: str) -> str: + p = pattern.replace("\\", "/").strip() + parts: list[str] = ["^"] + i = 0 + while i < len(p): + c = p[i] + if c == "*": + if i + 1 < len(p) and p[i + 1] == "*": + parts.append(".*") + i += 2 + if i < len(p) and p[i] == "/": + i += 1 + continue + parts.append("[^/]*") + elif c == "?": + parts.append("[^/]") + else: + parts.append(re.escape(c)) + i += 1 + parts.append("$") + return "".join(parts) + + +def glob_match(path: str, pattern: str) -> bool: + normalized = pattern.replace("\\", "/").strip() + if not normalized: + return False + return re.match(_glob_to_regex(normalized), path) is not None + + +def _run_git(args: list[str], cwd: Path) -> None: + result = subprocess.run(["git", *args], cwd=cwd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True, check=False) + if result.returncode != 0: + raise RuntimeError(f"git {' '.join(args)} failed (exit {result.returncode}): {result.stderr.strip()}") + + +def clone_bcquality(config: BCQualityConfig, dest: Path) -> Path: + """Shallow-clone BCQuality at the pinned SHA into dest (overwriting if present).""" + config.validate() + if dest.exists(): + shutil.rmtree(dest) + dest.mkdir(parents=True) + + logger.info(f"Cloning BCQuality {config.repo}@{config.ref} into {dest}") + _run_git(["init", "--quiet"], cwd=dest) + _run_git(["remote", "add", "origin", config.repo], cwd=dest) + _run_git(["fetch", "--quiet", "--depth", "1", "origin", config.ref], cwd=dest) + _run_git(["checkout", "--quiet", "--detach", "FETCH_HEAD"], cwd=dest) + return dest + + +def _is_within(target: Path, root: Path) -> bool: + try: + target.relative_to(root) + return True + except ValueError: + return False + + +def filter_clone(root: Path, config: BCQualityConfig, report_path: Path | None = None) -> FilterReport: + """Prune a BCQuality clone per enabled-layers + knowledge allow/deny globs. + + Mirrors BCApps tools/BCQuality/scripts/Invoke-BCQualityFilter.ps1. Meta-skills + under the top-level /skills/ are never removed. + """ + if not root.is_dir(): + raise FileNotFoundError(f"BCQuality root not found: {root}") + report_path = report_path or (root / _FILTER_REPORT_FILENAME) + root_resolved = root.resolve() + removed: list[RemovedEntry] = [] + + for layer in _LAYERS: + kb_root = root / layer / "knowledge" + if not kb_root.is_dir(): + continue + for md in sorted(kb_root.rglob("*.md")): + rel = md.relative_to(root).as_posix() + reason: str | None = None + if layer not in config.enabled_layers: + reason = "layer-disabled" + elif config.knowledge_allow and not any(glob_match(rel, pat) for pat in config.knowledge_allow): + reason = "allow-list-miss" + if reason is None and config.knowledge_deny and any(glob_match(rel, pat) for pat in config.knowledge_deny): + reason = "deny-list-hit" + if reason: + md.unlink() + removed.append(RemovedEntry(path=rel, kind="knowledge", reason=reason)) + + for layer in _LAYERS: + skills_root = root / layer / "skills" + if not skills_root.is_dir(): + continue + if layer not in config.enabled_layers: + for md in sorted(skills_root.rglob("*.md")): + rel = md.relative_to(root).as_posix() + md.unlink() + removed.append(RemovedEntry(path=rel, kind="skill", reason="layer-disabled")) + continue + for disabled in config.disabled_skills: + normalized = disabled.replace("\\", "/").strip() + if not normalized or not normalized.startswith(f"{layer}/"): + continue + target = (root / normalized).resolve() + if not _is_within(target, root_resolved): + logger.warning(f"Skipping unsafe disabled-skill path '{normalized}' (escapes BCQuality root).") + continue + if target.is_file(): + target.unlink() + removed.append(RemovedEntry(path=normalized, kind="skill", reason="configuration")) + + report = FilterReport( + bcquality_root=str(root), + enabled_layers=list(config.enabled_layers), + disabled_skills=list(config.disabled_skills), + knowledge_allow=list(config.knowledge_allow), + knowledge_deny=list(config.knowledge_deny), + removed=removed, + ) + report_path.write_text(json.dumps(report.to_dict(), indent=2), encoding="utf-8") + logger.info(f"BCQuality filter: removed {report.removed_count} file(s). Report: {report_path}") + return report + + +def build_task_context(config: BCQualityConfig) -> dict: + context: dict[str, object] = { + "goal": config.goal, + "inputs-available": list(config.inputs_available), + "enabled-layers": list(config.enabled_layers), + "disabled-skills": list(config.disabled_skills), + } + for dim in _TASK_CONTEXT_DIMENSIONS: + if dim in config.task_context: + context[dim] = list(config.task_context[dim]) + return context + + +def write_task_context(root: Path, context: dict) -> Path: + path = root / _TASK_CONTEXT_FILENAME + path.write_text(json.dumps(context, indent=2), encoding="utf-8") + logger.info(f"Task context written to {path}") + return path + + +def build_bootstrap_prompt(template: str, repo_path: Path, task_context_filename: str, review_output_file: str) -> str: + return Template(template).render( + repo=repo_path.as_posix(), + task_context_filename=task_context_filename, + review_output_file=review_output_file, + ) + + +def prepare_bcquality_workspace(config: BCQualityConfig, clone_dest: Path, repo_path: Path, review_output_file: str, bootstrap_template: str) -> tuple[Path, str]: + """Clone + filter BCQuality, write task-context, and render the bootstrap prompt. + + Returns: + Tuple of (filtered BCQuality clone root, bootstrap prompt string). + """ + clone_bcquality(config, clone_dest) + entry_skill = clone_dest / "skills" / "entry.md" + if not entry_skill.exists(): + raise FileNotFoundError(f"BCQuality clone at {clone_dest} is missing skills/entry.md; check bcquality repo and ref.") + filter_clone(clone_dest, config) + context = build_task_context(config) + context_path = write_task_context(clone_dest, context) + prompt = build_bootstrap_prompt(bootstrap_template, repo_path, context_path.name, review_output_file) + return clone_dest, prompt diff --git a/src/bcbench/agent/shared/config.yaml b/src/bcbench/agent/shared/config.yaml index b5dd27f64..0816bd9b8 100644 --- a/src/bcbench/agent/shared/config.yaml +++ b/src/bcbench/agent/shared/config.yaml @@ -66,6 +66,48 @@ prompt: If there are no findings, write an empty array. Write only valid JSON to review.json, with no surrounding markdown or commentary. + # Bootstrap prompt for the live-BCQuality code-review arm (bcquality.enabled: true). + # Rendered with Jinja2; variables: repo, task_context_filename, review_output_file. + bcquality-bootstrap-template: | + TASK: + Review the uncommitted working-tree changes in the Business Central (AL) repository at {{repo}}. Review only the uncommitted working-tree changes (git diff HEAD); do not compare commits such as HEAD~1..HEAD or origin/main. + + Use git to analyze the changes: + - git -C "{{repo}}" diff HEAD to see all working-tree changes + - git -C "{{repo}}" diff HEAD --| ` column +headers, so field captions are the only accessible labels. + +**A layout table where editable fields keep their visible captions is NOT a +violation.** For example, a grid where fields do not have `ShowCaption = false` +simply renders as a layout table with each field labeled by its own caption — +this is a valid, accessible pattern. DO NOT flag a grid as a violation merely +because it does not meet the data table heuristic. + +A non-editable field with `ShowCaption = false` is acceptable in a layout +table ONLY when the field is **standalone content** — it displays a value +that is meaningful on its own (e.g., a status message, a description) and +is NOT intended to label or be labeled by another field in the grid. + +Good — layout table with standalone content field: +```al +grid(InfoGrid) +{ + GridLayout = Columns; + group(LeftColumn) + { + field(Address; Rec.Address) + { + // ShowCaption defaults to true — field has its own label + } + field(City; Rec.City) + { + } + } + group(RightColumn) + { + field(StatusMessage; StatusText) + { + Editable = false; + ShowCaption = false; // OK — standalone content, not labeling another field + } + } +} +``` + +ANTI-PATTERN — THE ACCIDENTAL MIX: +The most common accessibility bug in grid layouts is partially following the +data table conventions. This happens when a developer arranges fields with +tabular intent (one field serves as a label or row header for another) but +the grid does NOT satisfy all the data table heuristic conditions. The +client falls back to layout table rendering, and the tabular relationships +between fields are lost — screen readers cannot associate a "header" field +with its corresponding "value" field. + +There are two ways this manifests: + +1. **Hidden captions on editable fields in a non-data-table grid.** + The field has `ShowCaption = false` but there are no ` | ` headers to + compensate. The field has no accessible label at all. + +2. **Fields used as labels for other fields.** + One field (e.g., "Statement Period") is intended to serve as a row header + for another field (e.g., "Statement Balance"), but since it renders as a + layout table, there is no programmatic association between them. A screen + reader will announce each field independently with no relationship. + +Flag a grid as an accessibility issue when ANY of these are true: +- An editable field has `ShowCaption = false` and the grid does NOT meet + ALL data table conditions +- Fields are arranged so that one field is clearly intended to label or + describe another field (tabular data intent), but the grid does NOT meet + ALL data table conditions +- A grid is **nested inside another grid**. Nested grids are not a supported + pattern. Even if an inner grid independently meets the data table heuristic, + the outer grid fails because its groups contain non-field children (the + inner grids). Always flag nested grids as a violation. + +Bad — loose field in grid forces layout table, but captions are hidden: +```al +grid(DataGrid) +{ + GridLayout = Columns; + field(Name; Rec.Name) // Field directly in grid — not in a group + { + ShowCaption = false; // No table header AND no caption — inaccessible + } + group(Column2) + { + ShowCaption = false; + field(Balance; Rec.Balance) + { + ShowCaption = false; // Same problem + } + } +} +``` + +Bad — non-field child in group breaks data table heuristic, captionless fields lose labels: +```al +grid(MixedGrid) +{ + GridLayout = Columns; + group(Names) + { + ShowCaption = false; + field(Name; Rec.Name) + { + ShowCaption = false; // Intended as data table column + } + group(SubGroup) // Nested group — not a field, breaks heuristic + { + field(Alias; Rec.Alias) + { + ShowCaption = false; + } + } + } + group(Amounts) + { + ShowCaption = false; + field(Balance; Rec.Balance) + { + ShowCaption = false; // Falls back to layout table — no label at all + } + } +} +``` + +Bad — fields with tabular intent but heuristic fails due to a field keeping its caption: +```al +grid(StatementGrid) +{ + GridLayout = Columns; + group(Periods) + { + ShowCaption = false; + field(StatementPeriod; Rec."Statement Period") + { + Editable = false; + ShowCaption = false; // Developer intends this as a row header for Balance + } + } + group(Balances) + { + ShowCaption = false; + field(StatementBalance; Rec."Statement Balance") + { + Editable = false; + ShowCaption = false; // Intended to be "labeled by" StatementPeriod + } + field(DueDate; Rec."Due Date") + { + // ShowCaption defaults to true — this one field with a visible + // caption causes the entire grid to fall back to layout table. + // Now StatementPeriod and StatementBalance lose their tabular + // relationship and have no accessible labels. + } + } +} +``` + +GENERAL GUIDANCE: +- **Minimize use of grid and fixed layouts.** Simple groups and fields reflow + better and produce correct semantic markup automatically. +- If you need forced column layout, prefer simple groups over grid unless you + truly need data-table semantics. +- When reviewing a grid or fixed layout, first check: does it meet ALL data + table conditions? If yes, `ShowCaption = false` is correct. If no, ask: is + the developer arranging fields with tabular intent (one field labels + another)? If so, the grid must be fixed to meet data table conditions. + Otherwise, ensure editable fields keep their captions and only standalone + content fields hide theirs. + +## STYLE PROPERTY — COSMETIC VS SEMANTIC STYLES + +The `Style` property on page fields controls text formatting. Some style +values are purely cosmetic (visual formatting only), while others carry +semantic meaning that is conveyed through color. For accessibility, assume +that the style is completely invisible to the user — the meaning must be +fully determinable from the field caption, value, or adjacent fields. + +COSMETIC STYLES (always safe — DO NOT flag these): +These styles change visual appearance but do not convey semantic meaning. +They NEVER require additional context and must NOT be reported as findings: +- None, Standard +- StandardAccent (Blue) +- Strong (Bold), StrongAccent (Blue + Bold) +- Attention (Red + Italic), AttentionAccent (Blue + Italic) +- Subordinate (Grey) + +This applies whether the cosmetic style is set via `Style` or via a +`StyleExpr` Text variable. If the resolved style is cosmetic, it is safe. + +SEMANTIC STYLES (require additional context — flag ONLY these three): +Only the following three styles carry semantic meaning through color: +- **Favorable** (Bold + Green) — implies a positive outcome +- **Unfavorable** (Bold + Italic + Red) — implies a negative outcome +- **Ambiguous** (Yellow) — implies an uncertain or mixed outcome + +EXCEPTION — CUE TILES (fields inside a `cuegroup`): +Fields inside a `cuegroup` render as cue tiles. The client automatically +provides an accessible label for semantic +styles on cue tiles (e.g., "Favorable", "Unfavorable"), so semantic styles +in a `cuegroup` do NOT need additional context and can be ignored for this +analysis. + +RULE: When a semantic style (Favorable, Unfavorable, Ambiguous) is used, +the semantic meaning MUST be independently determinable without seeing the +color. At least one of these conditions must be true: +1. The **field caption** matches the semantic meaning (e.g., caption is + "Error" with Style = Unfavorable, or "Profit" with Style = Favorable) +2. The **field value** communicates the meaning (e.g., value is "Success!" + with Favorable, or a negative number with Unfavorable, or "Something + went wrong" with Unfavorable) +3. An **adjacent field** provides a textual representation of the semantic + meaning (e.g., a separate "Status" column reads "High" / "Medium" / + "Low" alongside a percentage field styled with Favorable / Ambiguous / + Unfavorable) + +This rule applies equally whether `Style` is set to a literal value or to +a variable that evaluates to a semantic style at runtime. + +NOTE ON `StyleExpr`: In AL, `StyleExpr` serves two distinct purposes +depending on its type: +- **Boolean**: When `StyleExpr` is a Boolean (or Boolean expression), it + controls whether the `Style` property is applied. In this case, analyze + the `Style` property value — `StyleExpr` itself can be ignored. +- **Text**: When `StyleExpr` is a Text variable (e.g., `StyleExpr = StatusStyle` + where `StatusStyle` is declared as `Text`), the variable contains the style + name at runtime (e.g., `StatusStyle := 'Favorable'`). In this case, there + may be no `Style` property at all — the `StyleExpr` variable IS the style. + Trace the variable assignments in OnAfterGetRecord or OnAfterGetCurrRecord + to determine which semantic styles may be applied, then apply the same + rules as for a literal `Style` value. + +Good — field value communicates the semantic meaning: +```al +field(ProfitMargin; Rec."Profit Margin") +{ + // Positive values show as green, negative as red. + // The sign of the number (+/-) independently conveys the meaning. + Style = Favorable; + StyleExpr = IsProfitable; // Boolean — toggles whether Style is applied +} +field(OverdueAmount; Rec."Overdue Amount") +{ + // Caption "Overdue Amount" already implies unfavorable. + Style = Unfavorable; +} +``` + +Good — StyleExpr as Text variable with values that match field meaning: +```al +field(Status; Rec.Status) +{ + // Status is an Option: Open, In Progress, Completed, Overdue. + // The option text values themselves communicate the meaning. + StyleExpr = StatusStyle; // Text — contains 'Favorable', 'Unfavorable', etc. +} +// In OnAfterGetRecord: +// case Rec.Status of +// Rec.Status::Open: StatusStyle := 'Standard'; +// Rec.Status::Completed: StatusStyle := 'Favorable'; +// Rec.Status::Overdue: StatusStyle := 'Unfavorable'; +// end; +``` + +Good — adjacent field provides semantic context: +```al +// In a grid/repeater with columns: +field(Confidence; Rec."Confidence %") +{ + StyleExpr = ConfidenceStyle; // Text — 'Favorable'/'Ambiguous'/'Unfavorable' +} +field(ConfidenceLevel; Rec."Confidence Level") +{ + // This adjacent column shows "High", "Medium", or "Low" — + // providing the textual meaning that the color alone cannot. +} +``` + +Bad — semantic style with no independent way to determine meaning: +```al +field(Confidence; Rec."Confidence %") +{ + // StyleExpr is 'Favorable' above 90%, 'Ambiguous' 70-90%, 'Unfavorable' below 70%. + // But the caption ("Confidence") and value ("85%") do not tell the user + // whether 85% is good or bad. Only the color communicates the threshold. + StyleExpr = ConfidenceStyle; // Text variable +} +``` + +Bad — semantic style used for purely cosmetic purposes: +```al +field(CompanyName; Rec."Company Name") +{ + Style = Favorable; // Green text for aesthetics — misleading, implies + // the company name is a positive value +} +``` + +COMMON ACCEPTABLE PATTERNS — DO NOT flag these: +- A **balance or amount** field styled Favorable for positive values and + Unfavorable for negative values. The sign (+/-) of the number conveys + the meaning independently. +- A field whose **caption already implies the semantic meaning**: "Overdue + Amount" with Unfavorable, "Profit" with Favorable, "Error Count" with + Unfavorable. The caption tells the user what the value means. +- An **Option or Enum** field where the option text values communicate the + state (e.g., "Open", "Completed", "Overdue") and the style matches + the text (e.g., Favorable for "Completed", Unfavorable for "Overdue"). +- A `StyleExpr` Text variable that resolves to a **cosmetic style** (e.g., + 'Attention', 'Strong'). Cosmetic styles are always safe regardless of + context. + +## JAVASCRIPT CONTROL ADD-INS + +When a developer builds a JavaScript control add-in, they bypass the +Business Central framework's built-in accessibility support and take full +responsibility for the accessibility of the rendered HTML, JavaScript, and +CSS. Review changes to control add-in implementation files for WCAG 2.1 AA +compliance and general accessibility best practices. + +NOTE TO REVIEWER: Automated review of control add-in code is inherently +non-exhaustive. Many accessibility issues (keyboard flow, screen reader +announcements, dynamic behavior) require manual testing. + +WHEN TO FLAG FOR MANUAL REVIEW: +If a control add-in diff contains changes that affect UI rendering, ALWAYS +include a finding recommending a manual accessibility review. UI changes +include modifications to: +- HTML templates or DOM manipulation (createElement, innerHTML, appendChild, + JSX/TSX markup, template literals producing HTML) +- CSS or SCSS files (any change to styling, layout, colors, visibility) +- Event handlers for user interaction (click, keydown, focus, blur) +- ARIA attributes or roles +- Dynamic visibility or content updates + +If no specific accessibility issues are found but UI-rendering changes exist, +output a single finding with severity "Low" recommending a manual review. +Do NOT output an empty array when UI-rendering changes are present — the empty array rule applies only when there are no issues and no UI-rendering changes. + +Do NOT flag for manual review if the only changes are to pure business +logic, data processing, API calls, or other non-rendering code that does +not touch the DOM or styling. + +When reporting issues in control add-in code, include a note that a manual accessibility +review is recommended for any control add-in that renders a UI. + +KEY AREAS TO CHECK: + +1. **ARIA and semantic HTML** + - Interactive elements must have accessible names (aria-label, + aria-labelledby, or visible text content) + - Use semantic HTML elements where possible (` |
|---|