diff --git a/tests/unit/test_audit_command.py b/tests/unit/test_audit_command.py index 8c92c90b..d5b3216f 100644 --- a/tests/unit/test_audit_command.py +++ b/tests/unit/test_audit_command.py @@ -6,9 +6,8 @@ import pytest from click.testing import CliRunner -from apm_cli.commands.audit import audit, _scan_single_file, _apply_strip -from apm_cli.security.content_scanner import ContentScanner - +from apm_cli.commands.audit import _apply_strip, _scan_single_file, audit +from apm_cli.security.content_scanner import ContentScanner, ScanFinding # ── Fixtures ──────────────────────────────────────────────────────── @@ -31,7 +30,7 @@ def warning_file(tmp_path): """A file containing zero-width spaces (warning-level).""" p = tmp_path / "warning.md" p.write_text( - "Hello\u200Bworld\nSecond\u200Dline\n", + "Hello\u200bworld\nSecond\u200dline\n", encoding="utf-8", ) return p @@ -43,7 +42,7 @@ def critical_file(tmp_path): p = tmp_path / "critical.md" # U+E0001 = LANGUAGE TAG, U+E0068 = TAG LATIN SMALL LETTER H p.write_text( - "Normal text\U000E0001\U000E0068\U000E0065\U000E006Cmore text\n", + "Normal text\U000e0001\U000e0068\U000e0065\U000e006cmore text\n", encoding="utf-8", ) return p @@ -54,7 +53,7 @@ def mixed_file(tmp_path): """A file with both critical and warning characters.""" p = tmp_path / "mixed.md" p.write_text( - "line one\u200B\nline two\U000E0041\n", + "line one\u200b\nline two\U000e0041\n", encoding="utf-8", ) return p @@ -64,7 +63,7 @@ def mixed_file(tmp_path): def info_only_file(tmp_path): """A file with only info-level findings (NBSP).""" p = tmp_path / "info.md" - p.write_text("Hello\u00A0world\n", encoding="utf-8") + p.write_text("Hello\u00a0world\n", encoding="utf-8") return p @@ -91,7 +90,7 @@ def lockfile_project(tmp_path): "Clean prompt content\n", encoding="utf-8" ) (tmp_path / ".github" / "instructions" / "guide.md").write_text( - "Guide with hidden\u200Bchar\n", encoding="utf-8" + "Guide with hidden\u200bchar\n", encoding="utf-8" ) return tmp_path @@ -114,7 +113,7 @@ def lockfile_project_critical(tmp_path): (tmp_path / ".github" / "prompts").mkdir(parents=True) (tmp_path / ".github" / "prompts" / "evil.md").write_text( - "Looks normal\U000E0001hidden\n", encoding="utf-8" + "Looks normal\U000e0001hidden\n", encoding="utf-8" ) return tmp_path @@ -138,7 +137,7 @@ def lockfile_project_with_dir(tmp_path): skill_dir = tmp_path / ".github" / "skills" / "my-skill" skill_dir.mkdir(parents=True) (skill_dir / "SKILL.md").write_text( - "skill with hidden\u200Bchar\n", encoding="utf-8" + "skill with hidden\u200bchar\n", encoding="utf-8" ) (skill_dir / "helper.md").write_text("clean helper\n", encoding="utf-8") @@ -182,7 +181,7 @@ def test_directory_errors(self, runner, tmp_path): def test_verbose_shows_info(self, runner, tmp_path): """--verbose includes info-level findings.""" p = tmp_path / "info.md" - p.write_text("Hello\u00A0world\n", encoding="utf-8") # NBSP = info + p.write_text("Hello\u00a0world\n", encoding="utf-8") # NBSP = info result = runner.invoke(audit, ["--file", str(p), "--verbose"]) # Info findings should appear in verbose output assert "U+00A0" in result.output @@ -220,7 +219,10 @@ def test_warning_findings_exit_two(self, runner, lockfile_project, monkeypatch): assert result.exit_code == 2 def test_critical_findings_exit_one( - self, runner, lockfile_project_critical, monkeypatch, + self, + runner, + lockfile_project_critical, + monkeypatch, ): monkeypatch.chdir(lockfile_project_critical) result = runner.invoke(audit, []) @@ -240,7 +242,10 @@ def test_package_filter_not_found(self, runner, lockfile_project, monkeypatch): assert "not found" in result.output.lower() def test_dir_entries_scanned_recursively( - self, runner, lockfile_project_with_dir, monkeypatch, + self, + runner, + lockfile_project_with_dir, + monkeypatch, ): """Skill directories recorded in deployed_files should be scanned.""" monkeypatch.chdir(lockfile_project_with_dir) @@ -248,6 +253,64 @@ def test_dir_entries_scanned_recursively( # SKILL.md has a zero-width char → warning → exit 2 assert result.exit_code == 2 + def test_corrupt_lockfile_skipped(self, runner, tmp_path, monkeypatch): + """A corrupt/invalid lockfile should result in a clean exit.""" + (tmp_path / "apm.lock.yaml").write_text( + "invalid: yaml: [broken", encoding="utf-8" + ) + monkeypatch.chdir(tmp_path) + result = runner.invoke(audit, []) + assert result.exit_code == 0 + + def test_missing_deployed_file_skipped(self, runner, tmp_path, monkeypatch): + """Files listed in the lockfile but absent on disk are silently skipped.""" + lock_content = textwrap.dedent("""\ + lockfile_version: '1' + generated_at: '2025-01-01T00:00:00Z' + dependencies: + - repo_url: https://github.com/test/test-pkg + resolved_ref: main + resolved_commit: abc123 + deployed_files: + - .github/prompts/missing.md + """) + (tmp_path / "apm.lock.yaml").write_text(lock_content, encoding="utf-8") + # Do NOT create the deployed file — it should be skipped + monkeypatch.chdir(tmp_path) + result = runner.invoke(audit, []) + assert result.exit_code == 0 + + def test_symlink_skipped_in_dir_scan(self, runner, tmp_path, monkeypatch): + """Symlinked files inside skill directories are skipped.""" + lock_content = textwrap.dedent("""\ + lockfile_version: '1' + generated_at: '2025-01-01T00:00:00Z' + dependencies: + - repo_url: https://github.com/test/skill-pkg + resolved_ref: main + resolved_commit: abc123 + deployed_files: + - .github/skills/my-skill/ + """) + (tmp_path / "apm.lock.yaml").write_text(lock_content, encoding="utf-8") + + skill_dir = tmp_path / ".github" / "skills" / "my-skill" + skill_dir.mkdir(parents=True) + (skill_dir / "SKILL.md").write_text("clean content\n", encoding="utf-8") + + # Create a symlink pointing outside the package tree + external = tmp_path / "external.md" + external.write_text("external\u200bcontent\n", encoding="utf-8") + try: + (skill_dir / "link.md").symlink_to(external) + except (OSError, NotImplementedError): + pytest.skip("Symlinks not supported on this platform") + + monkeypatch.chdir(tmp_path) + result = runner.invoke(audit, []) + # Only SKILL.md scanned (symlink skipped) → clean → exit 0 + assert result.exit_code == 0 + def test_path_traversal_rejected(self, runner, tmp_path, monkeypatch): """Paths with .. should be rejected to prevent lockfile attacks.""" lock_content = textwrap.dedent("""\ @@ -278,8 +341,8 @@ def test_strip_removes_warnings(self, runner, warning_file): assert result.exit_code == 0 # File should now be clean content = warning_file.read_text(encoding="utf-8") - assert "\u200B" not in content - assert "\u200D" not in content + assert "\u200b" not in content + assert "\u200d" not in content def test_strip_preserves_critical(self, runner, critical_file): result = runner.invoke(audit, ["--file", str(critical_file), "--strip"]) @@ -287,14 +350,14 @@ def test_strip_preserves_critical(self, runner, critical_file): assert result.exit_code == 1 content = critical_file.read_text(encoding="utf-8") # Critical tag chars should still be present - assert "\U000E0001" in content + assert "\U000e0001" in content def test_strip_mixed_removes_warnings_keeps_critical(self, runner, mixed_file): result = runner.invoke(audit, ["--file", str(mixed_file), "--strip"]) assert result.exit_code == 1 # critical still present content = mixed_file.read_text(encoding="utf-8") - assert "\u200B" not in content # warning stripped - assert "\U000E0041" in content # critical preserved + assert "\u200b" not in content # warning stripped + assert "\U000e0041" in content # critical preserved def test_strip_clean_file_noop(self, runner, clean_file): original = clean_file.read_text(encoding="utf-8") @@ -309,10 +372,64 @@ def test_strip_lockfile_mode(self, runner, lockfile_project, monkeypatch): # The warning char in guide.md should be stripped guide = lockfile_project / ".github" / "instructions" / "guide.md" content = guide.read_text(encoding="utf-8") - assert "\u200B" not in content + assert "\u200b" not in content + + def test_info_and_warning_shows_combined_note(self, runner, tmp_path): + """_render_summary notes info findings when warnings are also present.""" + # NBSP (info) + zero-width space (warning) in the same file + p = tmp_path / "mixed_info.md" + p.write_text("Hello\u00a0world\u200bbye\n", encoding="utf-8") + result = runner.invoke(audit, ["--file", str(p)]) + # Warning is highest severity → exit 2; info line is shown + assert result.exit_code == 2 + # The summary should mention the info-level finding + assert "info" in result.output.lower() + def test_verbose_shows_info_on_warning_file(self, runner, tmp_path): + """--verbose shows info findings even when warnings are present.""" + p = tmp_path / "both.md" + p.write_text("Hello\u00a0world\u200bbye\n", encoding="utf-8") + result = runner.invoke(audit, ["--file", str(p), "--verbose"]) + assert "U+00A0" in result.output # info finding + assert "U+200B" in result.output # warning finding + + +# ── _apply_strip edge case tests ───────────────────────────────── + + +class TestApplyStripEdgeCases: + """Edge cases for _apply_strip not covered by TestApplyStrip.""" + + def test_skips_nonexistent_absolute_path(self, tmp_path): + """An absolute path that no longer exists is silently skipped.""" + ghost = str(tmp_path / "ghost.md") + findings_by_file = { + ghost: [ + ScanFinding( + ghost, + 1, + 5, + repr("\u200b"), + "U+200B", + "warning", + "zero-width", + "Zero-width space", + ) + ] + } + modified = _apply_strip(findings_by_file, tmp_path) + assert modified == 0 -# ── _scan_single_file helper tests ─────────────────────────────── + def test_write_error_handled_gracefully(self, runner, warning_file): + """If writing the cleaned file fails, _apply_strip warns but does not crash.""" + # Make the file read-only to trigger a PermissionError on write + warning_file.chmod(0o444) + try: + result = runner.invoke(audit, ["--file", str(warning_file), "--strip"]) + # Should not raise; exit code 0 (no critical findings) + assert result.exit_code == 0 + finally: + warning_file.chmod(0o644) class TestScanSingleFile: @@ -352,7 +469,7 @@ def test_rejects_path_outside_root(self, tmp_path): """_apply_strip must not write files outside project root.""" evil_path = tmp_path / "outside" / "evil.md" evil_path.parent.mkdir(parents=True) - evil_path.write_text("Hello\u200Bworld\n", encoding="utf-8") + evil_path.write_text("Hello\u200bworld\n", encoding="utf-8") findings = ContentScanner.scan_file(evil_path) # Use a relative path that tries to escape diff --git a/tests/unit/test_content_scanner.py b/tests/unit/test_content_scanner.py index 23876d3b..ef598855 100644 --- a/tests/unit/test_content_scanner.py +++ b/tests/unit/test_content_scanner.py @@ -29,7 +29,7 @@ def test_whitespace_only_returns_empty(self): def test_tag_character_detected_as_critical(self): """U+E0001 (language tag) must be flagged as critical.""" - content = f"Hello \U000E0001 world" + content = f"Hello \U000e0001 world" findings = ContentScanner.scan_text(content, filename="test.md") assert len(findings) == 1 assert findings[0].severity == "critical" @@ -61,7 +61,7 @@ def test_tag_cancel_detected(self): def test_bidi_lro_detected(self): """U+202D (LRO) left-to-right override.""" - content = f"normal \u202D overridden" + content = f"normal \u202d overridden" findings = ContentScanner.scan_text(content) assert len(findings) == 1 assert findings[0].severity == "critical" @@ -69,7 +69,7 @@ def test_bidi_lro_detected(self): def test_bidi_rlo_detected(self): """U+202E (RLO) right-to-left override.""" - content = f"normal \u202E reversed" + content = f"normal \u202e reversed" findings = ContentScanner.scan_text(content) assert len(findings) == 1 assert findings[0].severity == "critical" @@ -86,7 +86,7 @@ def test_bidi_isolates_detected(self): def test_zero_width_space_detected(self): """U+200B zero-width space.""" - content = f"hello\u200Bworld" + content = f"hello\u200bworld" findings = ContentScanner.scan_text(content) assert len(findings) == 1 assert findings[0].severity == "warning" @@ -94,7 +94,7 @@ def test_zero_width_space_detected(self): def test_zwj_detected(self): """U+200D zero-width joiner.""" - content = f"hello\u200Dworld" + content = f"hello\u200dworld" findings = ContentScanner.scan_text(content) assert len(findings) == 1 assert findings[0].severity == "warning" @@ -102,7 +102,7 @@ def test_zwj_detected(self): def test_zwnj_detected(self): """U+200C zero-width non-joiner.""" - content = f"hello\u200Cworld" + content = f"hello\u200cworld" findings = ContentScanner.scan_text(content) assert len(findings) == 1 assert findings[0].severity == "warning" @@ -116,7 +116,7 @@ def test_word_joiner_detected(self): def test_soft_hyphen_detected(self): """U+00AD soft hyphen.""" - content = f"hel\u00ADlo" + content = f"hel\u00adlo" findings = ContentScanner.scan_text(content) assert len(findings) == 1 assert findings[0].severity == "warning" @@ -126,7 +126,7 @@ def test_soft_hyphen_detected(self): def test_nbsp_detected_as_info(self): """U+00A0 non-breaking space.""" - content = f"hello\u00A0world" + content = f"hello\u00a0world" findings = ContentScanner.scan_text(content) assert len(findings) == 1 assert findings[0].severity == "info" @@ -150,7 +150,7 @@ def test_ideographic_space(self): def test_bom_at_start_is_info(self): """BOM (U+FEFF) at file start is standard — info severity.""" - content = "\uFEFF# My Document" + content = "\ufeff# My Document" findings = ContentScanner.scan_text(content) assert len(findings) == 1 assert findings[0].severity == "info" @@ -160,7 +160,7 @@ def test_bom_at_start_is_info(self): def test_bom_mid_file_is_warning(self): """BOM in the middle of a file is suspicious.""" - content = "line one\n\uFEFFline two" + content = "line one\n\ufeffline two" findings = ContentScanner.scan_text(content) assert len(findings) == 1 assert findings[0].severity == "warning" @@ -172,14 +172,14 @@ def test_bom_mid_file_is_warning(self): def test_line_column_accuracy(self): """Findings report correct 1-based line and column numbers.""" # Place a zero-width space at line 3, col 6 - content = "line1\nline2\nline3\u200Brest" + content = "line1\nline2\nline3\u200brest" findings = ContentScanner.scan_text(content) assert len(findings) == 1 assert findings[0].line == 3 assert findings[0].column == 6 def test_multiple_findings_on_same_line(self): - content = f"a\u200Bb\u200Cc" + content = f"a\u200bb\u200cc" findings = ContentScanner.scan_text(content) assert len(findings) == 2 assert findings[0].column == 2 @@ -189,7 +189,7 @@ def test_multiple_findings_on_same_line(self): def test_mixed_severities(self): """Content with chars from all severity levels.""" - content = f"\u00A0visible\u200Btext\u202Ehidden" + content = f"\u00a0visible\u200btext\u202ehidden" findings = ContentScanner.scan_text(content) severities = {f.severity for f in findings} assert severities == {"info", "warning", "critical"} @@ -212,7 +212,7 @@ def test_scan_clean_file(self, tmp_path): def test_scan_file_with_findings(self, tmp_path): f = tmp_path / "suspicious.md" - f.write_text(f"hello\u200Bworld", encoding="utf-8") + f.write_text(f"hello\u200bworld", encoding="utf-8") findings = ContentScanner.scan_file(f) assert len(findings) == 1 assert findings[0].file == str(f) @@ -238,7 +238,7 @@ def test_latin1_file_returns_empty(self, tmp_path): def test_bom_plus_critical_detected(self, tmp_path): """Files with BOM and critical chars should report both.""" f = tmp_path / "bom_critical.md" - f.write_text("\ufeff" + "tag\U000E0041char\n", encoding="utf-8") + f.write_text("\ufeff" + "tag\U000e0041char\n", encoding="utf-8") findings = ContentScanner.scan_file(f) severities = {fnd.severity for fnd in findings} assert "critical" in severities @@ -274,14 +274,59 @@ def test_mixed(self): assert result == {"critical": 2, "warning": 1, "info": 1} +class TestClassify: + """Tests for ContentScanner.classify() — combined has_critical + summarize.""" + + def test_empty_returns_false_and_zero_counts(self): + has_crit, counts = ContentScanner.classify([]) + assert has_crit is False + assert counts == {"critical": 0, "warning": 0, "info": 0} + + def test_critical_finding_sets_flag(self): + findings = [ + ScanFinding("f", 1, 1, "", "U+E0001", "critical", "tag-character", "") + ] + has_crit, counts = ContentScanner.classify(findings) + assert has_crit is True + assert counts["critical"] == 1 + + def test_warning_only_does_not_set_flag(self): + findings = [ScanFinding("f", 1, 1, "", "U+200B", "warning", "zero-width", "")] + has_crit, counts = ContentScanner.classify(findings) + assert has_crit is False + assert counts["warning"] == 1 + assert counts["critical"] == 0 + + def test_mixed_findings_comprehensive(self): + findings = [ + ScanFinding("f", 1, 1, "", "U+E0041", "critical", "tag-character", ""), + ScanFinding("f", 1, 2, "", "U+200B", "warning", "zero-width", ""), + ScanFinding("f", 1, 3, "", "U+00A0", "info", "unusual-whitespace", ""), + ] + has_crit, counts = ContentScanner.classify(findings) + assert has_crit is True + assert counts == {"critical": 1, "warning": 1, "info": 1} + + def test_multiple_critical_all_counted(self): + findings = [ + ScanFinding( + "f", 1, i, "", f"U+E{0x0041 + i:04X}", "critical", "tag-character", "" + ) + for i in range(3) + ] + has_crit, counts = ContentScanner.classify(findings) + assert has_crit is True + assert counts["critical"] == 3 + + class TestStripNonCritical: def test_strips_zero_width_chars(self): - content = f"hello\u200Bworld" + content = f"hello\u200bworld" result = ContentScanner.strip_non_critical(content) assert result == "helloworld" def test_strips_nbsp(self): - content = f"hello\u00A0world" + content = f"hello\u00a0world" result = ContentScanner.strip_non_critical(content) assert result == "helloworld" @@ -293,12 +338,12 @@ def test_preserves_critical_chars(self): assert tag in result def test_strips_leading_bom(self): - content = f"\uFEFF# Title" + content = f"\ufeff# Title" result = ContentScanner.strip_non_critical(content) assert result == "# Title" def test_strips_mid_file_bom(self): - content = f"line1\n\uFEFFline2" + content = f"line1\n\ufeffline2" result = ContentScanner.strip_non_critical(content) assert result == "line1\nline2" @@ -308,6 +353,6 @@ def test_clean_content_unchanged(self): assert result == content def test_strips_soft_hyphen(self): - content = f"hel\u00ADlo" + content = f"hel\u00adlo" result = ContentScanner.strip_non_critical(content) assert result == "hello"