From cd881d027b023334e163cb2280363d853b8f8521 Mon Sep 17 00:00:00 2001 From: francose <13445813+francose@users.noreply.github.com> Date: Sun, 10 May 2026 14:03:14 -0400 Subject: [PATCH 1/2] Add pytest test suite and CI workflow for data_format module --- .github/workflows/tests.yml | 31 ++++++++ tests/__init__.py | 0 tests/test_data_format.py | 136 ++++++++++++++++++++++++++++++++++++ 3 files changed, 167 insertions(+) create mode 100644 .github/workflows/tests.yml create mode 100644 tests/__init__.py create mode 100644 tests/test_data_format.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..2a85303 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,31 @@ +name: Tests + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9", "3.10", "3.11"] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install pytest + + - name: Run tests + run: python -m pytest tests/ -v diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_data_format.py b/tests/test_data_format.py new file mode 100644 index 0000000..e502946 --- /dev/null +++ b/tests/test_data_format.py @@ -0,0 +1,136 @@ +""" +Tests for xgitguard.common.data_format + +Covers keys_extractor(), credential_extractor(), and the URL/special-char +stripping helpers. Not exhaustive — just enough to catch regressions and +prove the detection pipeline works end to end. +""" + +import pytest +import sys +import os + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from xgitguard.common.data_format import ( + keys_extractor, + credential_extractor, + remove_url_from_keys, + remove_url_from_creds, +) + + +# --------------------------------------------------------------------------- +# keys_extractor — true positives +# --------------------------------------------------------------------------- + +# Build strings via concat so secret scanners don't flag the test file. +_AWS_KEY = "AKIA" + "IOSFODNN7EXAMPLE" +_PRIVATE_KEY = "-----BEGIN RSA PRIVATE KEY-----" +_SLACK_WEBHOOK = "T12345678/B12345678/abcdefghijklmnopqrstuvwx" + + +class TestKeysExtractorDetects: + def test_aws_access_key(self): + result = keys_extractor(f"config = {_AWS_KEY}") + assert any(_AWS_KEY in k for k in result) + + def test_rsa_private_key_header(self): + result = keys_extractor(f"key = {_PRIVATE_KEY}") + assert any("BEGIN RSA PRIVATE KEY" in k for k in result) + + def test_ec_private_key_header(self): + result = keys_extractor("-----BEGIN EC PRIVATE KEY-----") + assert any("BEGIN EC PRIVATE KEY" in k for k in result) + + def test_pgp_private_key_header(self): + result = keys_extractor("-----BEGIN PGP PRIVATE KEY BLOCK-----") + assert any("BEGIN PGP PRIVATE KEY" in k for k in result) + + def test_slack_webhook(self): + result = keys_extractor(f"webhook = {_SLACK_WEBHOOK}") + assert any(_SLACK_WEBHOOK in k for k in result) + + +# --------------------------------------------------------------------------- +# keys_extractor — true negatives +# --------------------------------------------------------------------------- + +class TestKeysExtractorIgnores: + def test_plain_text(self): + assert keys_extractor("the weather is nice today") == [] + + def test_short_random_string(self): + assert keys_extractor("abc123") == [] + + def test_empty_string(self): + assert keys_extractor("") == [] + + +# --------------------------------------------------------------------------- +# remove_url_from_keys +# --------------------------------------------------------------------------- + +class TestRemoveUrlFromKeys: + def test_strips_http_url(self): + result = remove_url_from_keys("check https://example.com/path for info") + assert "https" not in result + assert "example.com" not in result + + def test_strips_email(self): + result = remove_url_from_keys("contact admin@company.com please") + assert "@" not in result + + def test_strips_special_chars(self): + result = remove_url_from_keys("key={value}") + assert "{" not in result + assert "}" not in result + + +# --------------------------------------------------------------------------- +# remove_url_from_creds +# --------------------------------------------------------------------------- + +class TestRemoveUrlFromCreds: + def test_returns_list(self): + result = remove_url_from_creds("some code content here", "key") + assert isinstance(result, list) + + def test_strips_urls(self): + result = remove_url_from_creds( + "token = abc123 https://evil.com/steal", "key" + ) + assert not any("evil.com" in word for word in result) + + +# --------------------------------------------------------------------------- +# credential_extractor +# --------------------------------------------------------------------------- + +class TestCredentialExtractor: + def test_extracts_alphanumeric_creds(self): + words = ["shortpw", "MyP4ssw0rdIsStr0ng", "hello", "12345"] + stop_words = [] + result = credential_extractor(words, stop_words) + assert "MyP4ssw0rdIsStr0ng" in result + + def test_skips_stop_words(self): + words = ["MyP4ssw0rd"] + stop_words = ["MyP4ssw0rd"] + result = credential_extractor(words, stop_words) + assert result == [] + + def test_skips_hex_prefix(self): + words = ["0xDEADBEEF1234"] + result = credential_extractor(words, []) + assert result == [] + + def test_skips_short_strings(self): + words = ["Ab1234"] + result = credential_extractor(words, []) + assert result == [] + + def test_skips_http(self): + words = ["https://example.com"] + result = credential_extractor(words, []) + assert result == [] From 5767bdc45f860f241408d87c32caba267c92dca4 Mon Sep 17 00:00:00 2001 From: francose <13445813+francose@users.noreply.github.com> Date: Sun, 10 May 2026 19:40:23 -0400 Subject: [PATCH 2/2] Address review feedback: drop unused imports, slim CI deps - Remove unused pytest import and sys.path hack from test module - Install only urlextract+pytest in CI instead of full requirements.txt - Drop Python 3.11 from matrix (pinned numpy/pandas don't support it) --- .github/workflows/tests.yml | 5 ++--- tests/test_data_format.py | 6 ------ 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2a85303..63f6f18 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10"] steps: - uses: actions/checkout@v4 @@ -24,8 +24,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -r requirements.txt - pip install pytest + pip install urlextract pytest - name: Run tests run: python -m pytest tests/ -v diff --git a/tests/test_data_format.py b/tests/test_data_format.py index e502946..3fe7c2b 100644 --- a/tests/test_data_format.py +++ b/tests/test_data_format.py @@ -6,12 +6,6 @@ prove the detection pipeline works end to end. """ -import pytest -import sys -import os - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) - from xgitguard.common.data_format import ( keys_extractor, credential_extractor,