Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 37 additions & 1 deletion tests/test_check_duplicate_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,17 @@ def test_expand_paths_returns_json_files(tmp_path):
missing = tmp_path / 'missing.json'
with pytest.raises(Exception, match='Could not find file or directory'):
check_module.expand_paths([str(missing)])

with pytest.raises(Exception, match='Could not find file or directory'):
Comment on lines +94 to +95
Copy link

Copilot AI Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the .txt path case, the test matches the same 'Could not find file or directory' message used for missing paths. That message is a bit misleading for an existing-but-unsupported file, and matching on it makes the intent (unsupported extension rejection) less clear. Consider either asserting only that an exception is raised for non-JSON files, or (longer-term) updating expand_paths to raise a distinct exception/message for unsupported extensions.

Suggested change
with pytest.raises(Exception, match='Could not find file or directory'):
with pytest.raises(Exception):

Copilot uses AI. Check for mistakes.
check_module.expand_paths([str(ignored)])


def test_main_raises_on_invalid_json(tmp_path):
bad = tmp_path / 'bad.json'
bad.write_text('{not valid json', encoding='utf-8')

with pytest.raises(json.JSONDecodeError):
check_module.main([str(bad)])


def test_main_reports_duplicates(sample_payloads, tmp_path, capsys):
Expand All @@ -103,4 +114,29 @@ def test_main_reports_duplicates(sample_payloads, tmp_path, capsys):

assert check_module.main([str(file_a), str(file_b)]) == 1
captured = capsys.readouterr().out
assert 'Found duplicate entries' in captured
assert 'Found duplicate entries (ignoring keys: `evaluation_id`, `retrieved_timestamp`)' in captured
Copy link

Copilot AI Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The assertion for the duplicate warning hard-codes the ignored key list. Since the CLI message is derived from check_module.IGNORE_KEYS, consider building the expected substring from check_module.IGNORE_KEYS (sorted) to keep the test in sync if the ignore set changes.

Suggested change
assert 'Found duplicate entries (ignoring keys: `evaluation_id`, `retrieved_timestamp`)' in captured
ignored_keys = ', '.join(
f'`{key}`' for key in sorted(check_module.IGNORE_KEYS)
)
assert f'Found duplicate entries (ignoring keys: {ignored_keys})' in captured

Copilot uses AI. Check for mistakes.


def test_main_reports_no_duplicates(sample_payloads, tmp_path, capsys):
payload = sample_payloads[0]
file_a = tmp_path / 'a.json'
file_c = tmp_path / 'c.json'
write_json(file_a, payload)


payload_a = clone_payload(payload)
payload_a['evaluation_id'] = 'eval-c'
payload_a['retrieved_timestamp'] = '2024-01-03'
if (
isinstance(payload_a.get('evaluation_results'), list)
and payload_a['evaluation_results']
):
payload_a['evaluation_results'][0]['score_details']['score'] = (
payload_a['evaluation_results'][0]['score_details']['score'] + 0.001
)

write_json(file_c, payload_a)

Comment on lines +126 to +139
Copy link

Copilot AI Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In test_main_reports_no_duplicates, the variable payload_a is actually the modified payload written to file_c, which is a bit confusing when reading the test. Renaming it to something like payload_c (and cleaning up the extra blank lines/trailing whitespace in this block) would improve readability and keep formatting consistent with the rest of the tests.

Suggested change
payload_a = clone_payload(payload)
payload_a['evaluation_id'] = 'eval-c'
payload_a['retrieved_timestamp'] = '2024-01-03'
if (
isinstance(payload_a.get('evaluation_results'), list)
and payload_a['evaluation_results']
):
payload_a['evaluation_results'][0]['score_details']['score'] = (
payload_a['evaluation_results'][0]['score_details']['score'] + 0.001
)
write_json(file_c, payload_a)
payload_c = clone_payload(payload)
payload_c['evaluation_id'] = 'eval-c'
payload_c['retrieved_timestamp'] = '2024-01-03'
if (
isinstance(payload_c.get('evaluation_results'), list)
and payload_c['evaluation_results']
):
payload_c['evaluation_results'][0]['score_details']['score'] = (
payload_c['evaluation_results'][0]['score_details']['score'] + 0.001
)
write_json(file_c, payload_c)

Copilot uses AI. Check for mistakes.
assert check_module.main([str(file_a), str(file_c)]) == 0
captured = capsys.readouterr().out
assert 'No duplicates found.' in captured