From cd2d9cfae28422fcd03f5a0c8ca506bf08d72ced Mon Sep 17 00:00:00 2001 From: Osaro Ochuko Adade Date: Thu, 3 Apr 2025 11:16:23 +0100 Subject: [PATCH 1/4] Changed "Know Limitations..." to "Known Limitations..." #7 --- change-log/v0.1.0.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/change-log/v0.1.0.md b/change-log/v0.1.0.md index 871434f..f8cce94 100644 --- a/change-log/v0.1.0.md +++ b/change-log/v0.1.0.md @@ -70,7 +70,7 @@ - [Adding a license to a repository](https://docs.github.com/en/communities/setting-up-your-project-for-healthy-contributions/adding-a-license-to-a-repository) - [The MIT License](https://opensource.org/license/mit) -### Know Limitations +### Known Limitations - Large files may experience performance degradation. - Date pattern matching is syntactic only; does not validate for semantic correctness (e.g. will match Feb 30) From f50f32e092910231a524cd2b3d1e8df122a9e162 Mon Sep 17 00:00:00 2001 From: Osaro Ochuko Adade Date: Sat, 5 Apr 2025 17:25:45 +0100 Subject: [PATCH 2/4] Add PR test workflow and update plan.md #9 #10 --- .github/workflows/test-pr.yml | 28 ++++++++++++++++++++++++++++ planning/plan.md | 4 ++-- 2 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/test-pr.yml diff --git a/.github/workflows/test-pr.yml b/.github/workflows/test-pr.yml new file mode 100644 index 0000000..1fbcf17 --- /dev/null +++ b/.github/workflows/test-pr.yml @@ -0,0 +1,28 @@ +name: PR Test Workflow + +on: + pull_request: + branches: [ main ] + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.8' + cache: 'pip' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + # Install the package in development mode + pip install -e ".[dev]" + + - name: Run tests + run: pytest --cov=pattern_seek \ No newline at end of file diff --git a/planning/plan.md b/planning/plan.md index 57799ad..2cb6b8c 100644 --- a/planning/plan.md +++ b/planning/plan.md @@ -18,8 +18,8 @@ In order of priority, going from simplest to most complex: - [ ] **Context Display**: Show surrounding lines of text around matches for better understanding of results. - [ ] **Configurable Output Formats**: Support for colored terminal output, JSON, CSV, or markdown for easier integration into other tools/workflows. - [ ] **Search History & Caching**: Remember previous searches and cache results for frequently searched files. -- [ ] **Regular Expression Support**: Allow customer regex pattern search beyond the built in ones. -- [ ] **File Type Filtering**: Limit searches to specifi file types or exclude certain files. +- [ ] **Regular Expression Support**: Allow custom regex pattern search beyond the built in ones. +- [ ] **File Type Filtering**: Limit searches to specific file types or exclude certain files. - [ ] **Syntax Highlighting**: Colorize code or highlighted matched patterns for better readability. - [ ] **Batch Processing**: Run multiple search queries at once and combine results. - [ ] **Vector Database Integration**: Store embeddings in a vector database for faster semantic search on large datasets/files. From 00a02cc8feaa3fdf9d580da4e7aa00152add3b4a Mon Sep 17 00:00:00 2001 From: Osaro Ochuko Adade Date: Sat, 5 Apr 2025 17:25:52 +0100 Subject: [PATCH 3/4] Add PR test workflow and update plan.md #9 #10 --- pyproject.toml | 1 + src/pattern_seek/cli.py | 64 +++++++- src/pattern_seek/transform/__init__.py | 4 + src/pattern_seek/transform/common.py | 32 ++++ src/pattern_seek/transform/csv_transform.py | 77 ++++++++++ src/pattern_seek/transform/output.py | 61 ++++++++ tests/transform/__init__.py | 0 tests/transform/test_common.py | 42 ++++++ tests/transform/test_csv_transform.py | 158 ++++++++++++++++++++ tests/transform/test_integration.py | 137 +++++++++++++++++ tests/transform/test_output.py | 115 ++++++++++++++ 11 files changed, 689 insertions(+), 2 deletions(-) create mode 100644 src/pattern_seek/transform/__init__.py create mode 100644 src/pattern_seek/transform/common.py create mode 100644 src/pattern_seek/transform/csv_transform.py create mode 100644 src/pattern_seek/transform/output.py create mode 100644 tests/transform/__init__.py create mode 100644 tests/transform/test_common.py create mode 100644 tests/transform/test_csv_transform.py create mode 100644 tests/transform/test_integration.py create mode 100644 tests/transform/test_output.py diff --git a/pyproject.toml b/pyproject.toml index e917288..c72d8cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "click>=8.0.0", # CLI lib: https://click.palletsprojects.com/en/stable/ "colorama>=0.4.4", # produce colored terminal text: https://pypi.org/project/colorama/ "regex>=2021.4.4", # handle regular expressions: https://pypi.org/project/regex/ + "tabulate>=0.9.0" # pretty-print tabular data http://pypi.org/project/tabulate/ ] [project.optional-dependencies] diff --git a/src/pattern_seek/cli.py b/src/pattern_seek/cli.py index 19d6aa8..538b388 100644 --- a/src/pattern_seek/cli.py +++ b/src/pattern_seek/cli.py @@ -5,6 +5,11 @@ from pattern_seek.core import search_files from pattern_seek.output import print_matches +from pattern_seek.transform import ( + transform_csv, + print_csv_matches, + save_csv_matches +) @click.command() @click.argument('paths', nargs=-1, required=True) @@ -41,6 +46,31 @@ is_flag=True, help='Disable colored output' ) +@click.option( + '--transform', '-TT', + type=click.Choice(['csv',]), + help='Transform structured file formats (e.g., CSV) based on query' +) +@click.option( + '--query', '-q', + type=str, + help='Search query for transform mode' +) +@click.option( + '--column', '-col', + type=str, + help='Column name to search in for transform mode' +) +@click.option( + '--matchword', '-m', + is_flag=True, + help='Match whole words only for transform mode' +) +@click.option( + '--save', '-s', + is_flag=True, + help='Save transformed results to a new file' +) def main( paths: List[str], pattern: List[str], @@ -48,7 +78,12 @@ def main( case_sensitive: bool, whole_word: bool, context: int, - no_color: bool + no_color: bool, + transform: Optional[str], + query: Optional[str], + column: Optional[str], + matchword: bool, + save: bool ) -> None: """ Pattern-seek: Search text files for specific patterns. @@ -56,8 +91,33 @@ def main( PATHS: One or more files or directories to search. Wildcards are supported, e.g., *.txt """ + + # Handle --transform option + if transform == 'csv': + if not query: + click.echo("Error: --query must be provided when using --transform csv", err=True) + sys.exit(1) + + for path in paths: + try: + result = transform_csv( + path, + query=query, + column=column, + case_sensitive=case_sensitive, + matchword=matchword, + save=save + ) + + if not save: + print_csv_matches(result) + + except Exception as e: + click.echo(f"Error transforming {path}: {str(e)}", err=True) + + return # Skip the rest of the pattern-based logic - # Determine which patterns to search for + # Determine which patterns to search for if 'all' in pattern: pattern_types = ['email', 'guid', 'date', 'url', 'ip'] else: diff --git a/src/pattern_seek/transform/__init__.py b/src/pattern_seek/transform/__init__.py new file mode 100644 index 0000000..a1a6109 --- /dev/null +++ b/src/pattern_seek/transform/__init__.py @@ -0,0 +1,4 @@ +from .csv_transform import transform_csv +from .output import print_csv_matches, save_csv_matches + +__all__ = ["transform_csv", "print_csv_matches", "save_csv_matches"] \ No newline at end of file diff --git a/src/pattern_seek/transform/common.py b/src/pattern_seek/transform/common.py new file mode 100644 index 0000000..b9fcdb4 --- /dev/null +++ b/src/pattern_seek/transform/common.py @@ -0,0 +1,32 @@ +""" +common.py + +This module provides reusable logic for matching text values, used across various transformation formats (e.g., CSV, XLSX). It supports case sensitivity and whole-word matching options and is used to determine if a cell value matches a given query string. +""" + +import re + +def is_match(cell_value: str, query: str, case_sensitive: bool, matchword: bool) -> bool: + """ + Checks if a cell value matches a given query based on specified criteria. + + Args: + cell_value (str): The value in the cell to check. + query (str): The query string to match against. + case_sensitive (bool): If True, the match is case-sensitive. Default is False. + matchword (bool): If True, matches only whole words. Default is False. + + Returns: + bool: True if the cell value matches the query, False otherwise. + """ + if not case_sensitive: + cell_value = cell_value.lower() + query = query.lower() + + if matchword: + # Match only if the query appears as a full word using word boundaries + pattern = r'\b' + re.escape(query) + r'\b' + return bool(re.search(pattern, cell_value)) + + # Basic substring search + return query in cell_value \ No newline at end of file diff --git a/src/pattern_seek/transform/csv_transform.py b/src/pattern_seek/transform/csv_transform.py new file mode 100644 index 0000000..a62580c --- /dev/null +++ b/src/pattern_seek/transform/csv_transform.py @@ -0,0 +1,77 @@ +""" +Module: csv_transform.py + +This module provides functionality for transforming CSV files by searching rows based on a query. It supports column-specific searches, case sensitivity, and whole-word matching. Optionally, matched results can be saved to a new CSV file. +""" + +import csv +from typing import List, Dict, Optional +from .common import is_match + +def transform_csv( + file_path: str, + query: str, + column: Optional[str] = None, + case_sensitive: bool = False, + matchword: bool = False, + save: bool = False +) -> Dict: + """ + Searches a CSV file for rows where the query string appears in one or more fields. + + Args: + file_path (str): Path to the CSV file. + query (str): The text to search for in the file. + column (Optional[str]): If provided, only search this specific column. + case_sensitive (bool): Whether the search should respect letter casing. + matchword (bool): Whether to match whole words only. + save (bool): If True, save the matching rows to a new CSV file. + + Returns: + Dict: A dictionary containing the file path, CSV header, and matched rows. + If save=True, returns an empty dict and saves results to file. + """ + matches = [] + + # Open and read the CSV file as dictionaries (fieldname => value) + with open(file_path, mode='r', newline='', encoding='utf-8') as csvfile: + reader = csv.DictReader(csvfile) + header = reader.fieldnames + + if not header: + raise ValueError("CSV file has no header row.") + + for row in reader: + # If a specific column is given, limit search to it; otherwise search all fields + search_fields = [column] if column else header + + for field in search_fields: + value = row.get(field, "") + # Check if the current field value matches the query + if is_match(value, query, case_sensitive, matchword): + matches.append(row) + break # stop checking this row once a match is found + + if save: + if not matches: + print(f"No matches found in {file_path}. Nothing was saved.") + return {} + + from .output import save_csv_matches + save_path = file_path.replace('.csv', '-transformed.csv') + + # Save the matched rows to a new CSV file + save_csv_matches({ + "file": file_path, + "header": header, + "matches": matches + }, save_path) + + print(f"\nSaved transformed results to {save_path}") + return {} + + return { + "file": file_path, + "header": header, + "matches": matches + } \ No newline at end of file diff --git a/src/pattern_seek/transform/output.py b/src/pattern_seek/transform/output.py new file mode 100644 index 0000000..b870d15 --- /dev/null +++ b/src/pattern_seek/transform/output.py @@ -0,0 +1,61 @@ +""" +output.py + +This module is responsible for displaying matched CSV rows in a tabular format +and optionally saving them to a new CSV file. It's part of the transform feature +in pattern-seek and helps visualize or persist filtered CSV data. + +Functions: +- print_csv_matches(result): Pretty-prints the matched CSV rows using tabulate. +- save_csv_matches(result, output_file): Writes matched CSV rows to a new CSV file. +""" + +import csv +from typing import Dict +from colorama import Fore, Style, init +from tabulate import tabulate + +init() + +def print_csv_matches(result: Dict) -> None: + """ + Prints the matched rows from a CSV file in a tabular format. + + Args: + result (Dict): The result dictionary containing file path, header, and matches. + """ + file = result["file"] + rows = result["matches"] + header = result["header"] + + if not rows: + print(f"{Fore.YELLOW}No matches found in {file}.{Style.RESET_ALL}") + return + + # Display the filename + print(f"\n{Fore.CYAN}{Style.BRIGHT}File: {file}{Style.RESET_ALL}") + + # Display the matching rows in a table format + print(tabulate(rows, headers="keys", tablefmt="grid")) + + +def save_csv_matches(result: Dict, output_file: str) -> None: + """ + Saves the matched rows from a CSV file to a new CSV file. + + Args: + result (Dict): The result dictionary containing file path, header, and matches. + output_file (str): The path to the output CSV file. + """ + rows = result["matches"] + header = result["header"] + + if not rows: + print(f"{Fore.YELLOW}No matches found in {result['file']}.{Style.RESET_ALL}") + return + + # Write matched rows to a new CSV file with headers + with open(output_file, mode='w', newline='', encoding='utf-8') as outfile: + writer = csv.DictWriter(outfile, fieldnames=result["header"]) + writer.writeheader() # Write the header to the CSV file + writer.writerows(result["matches"]) # Write the matched rows to the CSV file diff --git a/tests/transform/__init__.py b/tests/transform/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/transform/test_common.py b/tests/transform/test_common.py new file mode 100644 index 0000000..fe820d5 --- /dev/null +++ b/tests/transform/test_common.py @@ -0,0 +1,42 @@ +""" +test_common.py + +Unit tests for the is_match function in pattern_seek.transform.common. +Covers all combinations of case sensitivity and whole-word matching logic. +""" + +import pytest +from pattern_seek.transform.common import is_match + +def test_is_match(): + """Tests default is_match behavior: case-insensitive, partial match.""" + assert is_match("Apple", "app", False, False) == True + assert is_match("Banana", "nan", False, False) == True + assert is_match("APPLE", "apple", False, False) == True + assert is_match("apple", "APPLE", False, False) == True + assert is_match("Apple", "orange", False, False) == False + +def test_is_match_case_sensitive(): + """Tests is_match with case-sensitive, partial match.""" + assert is_match("Apple", "app", True, False) == False # 'App' != 'app' + assert is_match("Apple", "App", True, False) == True + assert is_match("BANANA", "BAN", True, False) == True + assert is_match("BANANA", "ban", True, False) == False + +def test_is_match_whole_word(): + """Tests is_match with whole-word matching (case-insensitive).""" + assert is_match("Apple Pie", "Apple", False, True) == True + assert is_match("Pineaple", "Apple", False, True) == False + assert is_match("An apple a day", "Apple", False, True) == True + assert is_match("Apples are good", "apple", False, True) == False # 'apple != 'Apples' + + # Punctuation and spacing + assert is_match("Hello, word!", "Hello", False, True) == True + assert is_match("Hello, world!", "world", False, True) == True + +def test_is_match_case_sensitive_whole_word(): + """Tests is_match with both case sensitivity and whole-word matching.""" + assert is_match("Apple Pie", "apple", True, True) == False # case mismatch + assert is_match("Apple Pie", "Apple", True, True) == True + assert is_match("APPLE PIE", "APPLE", True, True) == True + assert is_match("APPLE_PIE", "apple", True, True) == False \ No newline at end of file diff --git a/tests/transform/test_csv_transform.py b/tests/transform/test_csv_transform.py new file mode 100644 index 0000000..72450f2 --- /dev/null +++ b/tests/transform/test_csv_transform.py @@ -0,0 +1,158 @@ +""" +test_csv_transform.py + +This file contains unit tests for the CSV transformation feature of the pattern-seek CLI tool. +It tests core behaviors such as text matching, column filtering, case sensitivity, and saving +matched rows to a new file. These tests ensure the transform_csv function behaves as expected +under various scenarios and edge cases. +""" + +import pytest +import os +import csv +from unittest.mock import patch, mock_open +from io import StringIO + +from pattern_seek.transform.csv_transform import transform_csv + +@pytest.fixture +def sample_csv_content(): + """Create sample CSV content for testing.""" + return ( + "name,age,city\n" + "Alice,30,New York\n" + "Bob,25,Los Angeles\n" + "Charlie,35,Chicago\n" + "David,40,New York\n" + "John Doe,30,New York\n" + "Jane Smith,25,Los Angeles\n" + "Bob Johnson,40,Chicago\n" + "Alice Brown,35,San Francisco\n" + ) + +@pytest.fixture +def temp_csv_file(tmp_path, sample_csv_content): + """Create a temporary CSV file for testing.""" + file_path = tmp_path / "test.csv" + with open(file_path, mode='w', newline='', encoding='utf-8') as f: + f.write(sample_csv_content) + return str(file_path) + +def test_transform_csv_basic_search(temp_csv_file): + """Test basic search functionality without any additional parameters.""" + result = transform_csv(temp_csv_file, "John") + + assert result["file"] == temp_csv_file + assert result["header"] == ["name", "age", "city"] + assert len(result["matches"]) == 2 # Both "John Doe" and "Bob Johnson" should match. + + # Verify both expected matches are present + john_doe = None + bob_johnson = None + + for match in result["matches"]: + if match["name"] == "John Doe": + john_doe = match + elif match["name"] == "Bob Johnson": + bob_johnson = match + + assert john_doe is not None + assert john_doe["age"] == "30" + assert john_doe["city"] == "New York" + + assert bob_johnson is not None + assert bob_johnson["age"] == "40" + assert bob_johnson["city"] == "Chicago" + +def test_transform_csv_case_sensitivity(temp_csv_file): + """Test case sensitivity in searching.""" + + # Case insensitive (default) + result = transform_csv(temp_csv_file, "john") + assert len(result["matches"]) == 2 # Should find "John Doe" and "Bob Johnson". + + # Case sensitive + result = transform_csv(temp_csv_file, "john", case_sensitive=True) + assert len(result["matches"]) == 0 # Should find no matches for lowercase "john". + + # Case sensitive with exact match + result = transform_csv(temp_csv_file, "John", case_sensitive=True) + assert len(result["matches"]) == 2 # Should find "John Doe" and "Bob Johnson". + +def test_transform_csv_matchword(temp_csv_file): + """Test matchword functionality.""" + + # Without matchword, "John" should match "John Doe" and "Bob Johnson". + result = transform_csv(temp_csv_file, "John") + assert len(result["matches"]) == 2 # Should find both "John Doe" and "Bob Johnson". + + # With matchword, "John" should only match "John Doe". + result = transform_csv(temp_csv_file, "John", matchword=True) + assert len(result["matches"]) == 1 # Should find only "John Doe". + assert result["matches"][0]["name"] == "John Doe" + + # With matchword and case sensitivity, "john" should not match anything. + result = transform_csv(temp_csv_file, "john", matchword=True, case_sensitive=True) + assert len(result["matches"]) == 0 # Should find no matches. + + result = transform_csv(temp_csv_file, "John", matchword=True, case_sensitive=True) + assert len(result["matches"]) == 1 # Should find only "John Doe". + +def test_transform_csv_column_specific_search(temp_csv_file): + """Test searching in a specific column.""" + + # Search for "New" but only in the city column + result = transform_csv(temp_csv_file, "New", column="city") + + assert len(result["matches"]) == 3 # "Alice", "David", and "John Doe" should match. + assert result["matches"][0]["name"] == "Alice" + assert result["matches"][1]["name"] == "David" + assert result["matches"][2]["name"] == "John Doe" + + # This should find nothing as "John" is not in the age column + result = transform_csv(temp_csv_file, "John", column="age") + assert len(result["matches"]) == 0 + +def test_transform_csv_no_matches(temp_csv_file): + """Test behavior when no matches are found.""" + result = transform_csv(temp_csv_file, "NonExistentName") + + assert result["file"] == temp_csv_file + assert result["header"] == ["name", "age", "city"] + assert len(result["matches"]) == 0 + +def test_transform_csv_empty_file(): + """Test handling of an empty CSV file.""" + with patch("builtins.open", mock_open(read_data="name,age,city\n")): + result = transform_csv("dummy.csv", "test") + + assert result["header"] == ["name", "age", "city"] + assert len(result["matches"]) == 0 + +def test_transform_csv_no_header(): + """Test handling of a CSV file with no header.""" + with patch("builtins.open", mock_open(read_data="")): + with pytest.raises(ValueError, match="CSV file has no header row."): + transform_csv("dummy.csv", "test") + +def test_transform_csv_with_basic_save(temp_csv_file): + """Test that save=True returns empty dict and prints expected message.""" + with patch("builtins.print") as mock_print: + # Verify that we get an empty dict when save=True + result = transform_csv(temp_csv_file, "John", save=True) + assert result == {} + + #Verify that the expected message is printed + expected_save_path = temp_csv_file.replace('.csv', '-transformed.csv') + mock_print.assert_called_once_with(f"\nSaved transformed results to {expected_save_path}") + +def test_transform_csv_with_no_matches_save(temp_csv_file): + """Test save=True when no matches are found; should skip file writing and print a message.""" + with patch("builtins.print") as mock_print: + result = transform_csv(temp_csv_file, "NonExistentName", save=True) + + # Check that the result is an empty dict + assert result == {} + + # Check that the appropriate message is printed + mock_print.assert_called_once_with(f"No matches found in {temp_csv_file}. Nothing was saved.") \ No newline at end of file diff --git a/tests/transform/test_integration.py b/tests/transform/test_integration.py new file mode 100644 index 0000000..14a7419 --- /dev/null +++ b/tests/transform/test_integration.py @@ -0,0 +1,137 @@ +""" +test_integration.py + +Integration tests for the CSV transform feature. These tests verify that +transform_csv, save_csv_matches, and print_csv_matches work together as expected. +It covers full transform flows including saving and printing results. +""" + +import pytest +import os +import csv + +from pattern_seek.transform.csv_transform import transform_csv +from pattern_seek.transform.output import print_csv_matches, save_csv_matches + +@pytest.fixture +def sample_csv_content(): + """Create sample CSV content for testing.""" + return ( + "name,age,city\n" + "Alice,30,New York\n" + "Bob,25,Los Angeles\n" + "Charlie,35,Chicago\n" + "David,40,New York\n" + "John Doe,30,New York\n" + "Jane Smith,25,Los Angeles\n" + "Bob Johnson,40,Chicago\n" + "Alice Brown,35,San Francisco\n" + ) + +@pytest.fixture +def temp_csv_file(tmp_path, sample_csv_content): + """Create a temporary CSV file for testing.""" + file_path = tmp_path / "test.csv" + with open(file_path, mode='w', newline='', encoding='utf-8') as f: + f.write(sample_csv_content) + return str(file_path) + +def test_end_to_end_transform_and_save(temp_csv_file): + """Test the full transform and save pipeline, then verify saved file content.""" + # Expected output file path + output_path = temp_csv_file.replace(".csv", "-transformed.csv") + + # Run transform with save + transform_csv(temp_csv_file, "John", save=True) + + # Verify output file exists + assert os.path.exists(output_path) + + # Verify output file's contents + with open(output_path, 'r', newline='', encoding='utf-8') as csvfile: + reader = csv.DictReader(csvfile) + rows = list(reader) + + # Check that the output file has the expected number of rows + assert len(rows) == 2 + + # Verify contents + john_row = None + bob_row = None + + for row in rows: + if row["name"] == "John Doe": + john_row = row + elif row["name"] == "Bob Johnson": + bob_row = row + + assert john_row is not None + assert john_row["age"] == "30" + assert john_row["city"] == "New York" + + assert bob_row is not None + assert bob_row["age"] == "40" + assert bob_row["city"] == "Chicago" + +def test_end_to_end_with_different_search_params(temp_csv_file): + """Test transform and save with matchword and column-specific filtering.""" + # Test with matchword=True + exact_output = temp_csv_file.replace(".csv", "-transformed.csv") + transform_csv(temp_csv_file, "John", matchword=True, save=True) + + # Verify output + with open(exact_output, 'r', newline='', encoding='utf-8') as csvfile: + reader = csv.DictReader(csvfile) + rows = list(reader) + + # Should have only one row (John Doe, not Bob Johnson) + assert len(rows) == 1 + assert rows[0]["name"] == "John Doe" + + # Test with column-specific search + os.remove(exact_output) # Clean up previous output + transform_csv(temp_csv_file, "New York", column="city", save=True) + + # Verify output + with open(exact_output, 'r', newline='', encoding='utf-8') as csvfile: + reader = csv.DictReader(csvfile) + rows = list(reader) + + # Should have only three rows + assert len(rows) == 3 + assert rows[0]["name"] == "Alice" + assert rows[1]["name"] == "David" + assert rows[2]["name"] == "John Doe" + + # Test with column-specific search + os.remove(exact_output) # Clean up previous output + transform_csv(temp_csv_file, "New", column="city", save=True) + + # Verify output + with open(exact_output, 'r', newline='', encoding='utf-8') as csvfile: + reader = csv.DictReader(csvfile) + rows = list(reader) + + # Should have only three rows + assert len(rows) == 3 + assert rows[0]["name"] == "Alice" + assert rows[1]["name"] == "David" + assert rows[2]["name"] == "John Doe" + +def test_transform_and_print_integration(temp_csv_file, capsys): + """Test that print_csv_matches prints output from transform_csv correctly.""" + # Get transform results + result = transform_csv(temp_csv_file, "John") + + # Print the results + print_csv_matches(result) + + # Check the output + captured = capsys.readouterr() + + # Verify output contains expected content + assert f"File: {temp_csv_file}" in captured.out + assert "John Doe" in captured.out + assert "New York" in captured.out + assert "Bob Johnson" in captured.out + assert "Chicago" in captured.out \ No newline at end of file diff --git a/tests/transform/test_output.py b/tests/transform/test_output.py new file mode 100644 index 0000000..01aa72c --- /dev/null +++ b/tests/transform/test_output.py @@ -0,0 +1,115 @@ +""" +test_output.py + +Unit tests for the output module, which handles printing and saving of matched +CSV data. These tests verify that print_csv_matches displays correct output and +that save_csv_matches writes the expected files and handles edge cases properly. +""" + +import pytest +import os +import csv + +from unittest.mock import patch +from io import StringIO +from colorama import Fore, Style + +from pattern_seek.transform.output import print_csv_matches, save_csv_matches + +@pytest.fixture +def sample_matches_data(): + """Provides a sample dictionary with matched CSV rows for testing.""" + return { + "file": "test.csv", + "header": ["name", "age", "city"], + "matches": [ + {"name": "John Doe", "age": "30", "city": "New York"}, + {"name": "Bob Johnson", "age": "40", "city": "Chicago"} + ] + } + +@pytest.fixture +def empty_matches_data(): + """Provides a sample dictionary with no matched rows (empty results).""" + return { + "file": "test.csv", + "header": ["name", "age", "city"], + "matches": [] + } + +def test_print_csv_matches(sample_matches_data, capsys): + """Test that print_csv_matches displays the expected output""" + # Run the function and capture stdout + print_csv_matches(sample_matches_data) + + # Get captured stdout + captured = capsys.readouterr() + + # Verify output includes expected file reference and row content + assert f"File: {sample_matches_data['file']}" in captured.out + assert "John Doe" in captured.out + assert "New York" in captured.out + assert "Bob Johnson" in captured.out + assert "Chicago" in captured.out + + # Should contain tabulate's grid format + assert "+" in captured.out # Grid lines + assert "|" in captured.out # Grid columns + +def test_print_csv_matches_no_matches(empty_matches_data, capsys): + """Test that print_csv_matches handles empty results correctly""" + # Run the function to print matches + print_csv_matches(empty_matches_data) + + # Get captured stdout + captured = capsys.readouterr() + + # Check output contains expected message for no matches + assert f"No matches found in {empty_matches_data['file']}" in captured.out + +def test_save_csv_matches_creates_file(tmp_path, sample_matches_data): + """Test that save_csv_matches creates a file with the expected content.""" + # Prepare output file path using pytest's tmp_path fixture + output_path = os.path.join(tmp_path, "output.csv") + + # Save matched row to a CSV file + save_csv_matches(sample_matches_data, output_path) + + # Verify file exists + assert os.path.exists(output_path) + + # Read the saved file and verify it matches input data + with open(output_path, 'r', newline='', encoding='utf-8') as csvfile: + reader = csv.DictReader(csvfile) + rows = list(reader) + + # Check that we have correct number of rows + assert len(rows) == 2 + + # Check that headers are correct + assert reader.fieldnames == sample_matches_data["header"] + + # Check content of the first row + assert rows[0]["name"] == "John Doe" + assert rows[0]["age"] == "30" + assert rows[0]["city"] == "New York" + + # Check content of the second row + assert rows[1]["name"] == "Bob Johnson" + assert rows[1]["age"] == "40" + assert rows[1]["city"] == "Chicago" + +def test_save_csv_matches_empty_data(tmp_path, empty_matches_data, capsys): + """Test that save_csv_matches handles empty data correctly.""" + # Prepare test file path + output_path = os.path.join(tmp_path, "empty.csv") + + # Save matches + save_csv_matches(empty_matches_data, output_path) + + # Check that appropriate message was printed + captured = capsys.readouterr() + assert f"No matches found in {empty_matches_data['file']}" in captured.out + + # File should not be created + assert not os.path.exists(output_path) \ No newline at end of file From b441a6e458e157edbdfc1436dc943d467106e09b Mon Sep 17 00:00:00 2001 From: Osaro Ochuko Adade Date: Sat, 5 Apr 2025 17:44:04 +0100 Subject: [PATCH 4/4] Revert "Add PR test workflow and update plan.md #9 #10" This reverts commit 00a02cc8feaa3fdf9d580da4e7aa00152add3b4a. --- pyproject.toml | 1 - src/pattern_seek/cli.py | 64 +------- src/pattern_seek/transform/__init__.py | 4 - src/pattern_seek/transform/common.py | 32 ---- src/pattern_seek/transform/csv_transform.py | 77 ---------- src/pattern_seek/transform/output.py | 61 -------- tests/transform/__init__.py | 0 tests/transform/test_common.py | 42 ------ tests/transform/test_csv_transform.py | 158 -------------------- tests/transform/test_integration.py | 137 ----------------- tests/transform/test_output.py | 115 -------------- 11 files changed, 2 insertions(+), 689 deletions(-) delete mode 100644 src/pattern_seek/transform/__init__.py delete mode 100644 src/pattern_seek/transform/common.py delete mode 100644 src/pattern_seek/transform/csv_transform.py delete mode 100644 src/pattern_seek/transform/output.py delete mode 100644 tests/transform/__init__.py delete mode 100644 tests/transform/test_common.py delete mode 100644 tests/transform/test_csv_transform.py delete mode 100644 tests/transform/test_integration.py delete mode 100644 tests/transform/test_output.py diff --git a/pyproject.toml b/pyproject.toml index c72d8cc..e917288 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,6 @@ dependencies = [ "click>=8.0.0", # CLI lib: https://click.palletsprojects.com/en/stable/ "colorama>=0.4.4", # produce colored terminal text: https://pypi.org/project/colorama/ "regex>=2021.4.4", # handle regular expressions: https://pypi.org/project/regex/ - "tabulate>=0.9.0" # pretty-print tabular data http://pypi.org/project/tabulate/ ] [project.optional-dependencies] diff --git a/src/pattern_seek/cli.py b/src/pattern_seek/cli.py index 538b388..19d6aa8 100644 --- a/src/pattern_seek/cli.py +++ b/src/pattern_seek/cli.py @@ -5,11 +5,6 @@ from pattern_seek.core import search_files from pattern_seek.output import print_matches -from pattern_seek.transform import ( - transform_csv, - print_csv_matches, - save_csv_matches -) @click.command() @click.argument('paths', nargs=-1, required=True) @@ -46,31 +41,6 @@ is_flag=True, help='Disable colored output' ) -@click.option( - '--transform', '-TT', - type=click.Choice(['csv',]), - help='Transform structured file formats (e.g., CSV) based on query' -) -@click.option( - '--query', '-q', - type=str, - help='Search query for transform mode' -) -@click.option( - '--column', '-col', - type=str, - help='Column name to search in for transform mode' -) -@click.option( - '--matchword', '-m', - is_flag=True, - help='Match whole words only for transform mode' -) -@click.option( - '--save', '-s', - is_flag=True, - help='Save transformed results to a new file' -) def main( paths: List[str], pattern: List[str], @@ -78,12 +48,7 @@ def main( case_sensitive: bool, whole_word: bool, context: int, - no_color: bool, - transform: Optional[str], - query: Optional[str], - column: Optional[str], - matchword: bool, - save: bool + no_color: bool ) -> None: """ Pattern-seek: Search text files for specific patterns. @@ -91,33 +56,8 @@ def main( PATHS: One or more files or directories to search. Wildcards are supported, e.g., *.txt """ - - # Handle --transform option - if transform == 'csv': - if not query: - click.echo("Error: --query must be provided when using --transform csv", err=True) - sys.exit(1) - - for path in paths: - try: - result = transform_csv( - path, - query=query, - column=column, - case_sensitive=case_sensitive, - matchword=matchword, - save=save - ) - - if not save: - print_csv_matches(result) - - except Exception as e: - click.echo(f"Error transforming {path}: {str(e)}", err=True) - - return # Skip the rest of the pattern-based logic - # Determine which patterns to search for + # Determine which patterns to search for if 'all' in pattern: pattern_types = ['email', 'guid', 'date', 'url', 'ip'] else: diff --git a/src/pattern_seek/transform/__init__.py b/src/pattern_seek/transform/__init__.py deleted file mode 100644 index a1a6109..0000000 --- a/src/pattern_seek/transform/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .csv_transform import transform_csv -from .output import print_csv_matches, save_csv_matches - -__all__ = ["transform_csv", "print_csv_matches", "save_csv_matches"] \ No newline at end of file diff --git a/src/pattern_seek/transform/common.py b/src/pattern_seek/transform/common.py deleted file mode 100644 index b9fcdb4..0000000 --- a/src/pattern_seek/transform/common.py +++ /dev/null @@ -1,32 +0,0 @@ -""" -common.py - -This module provides reusable logic for matching text values, used across various transformation formats (e.g., CSV, XLSX). It supports case sensitivity and whole-word matching options and is used to determine if a cell value matches a given query string. -""" - -import re - -def is_match(cell_value: str, query: str, case_sensitive: bool, matchword: bool) -> bool: - """ - Checks if a cell value matches a given query based on specified criteria. - - Args: - cell_value (str): The value in the cell to check. - query (str): The query string to match against. - case_sensitive (bool): If True, the match is case-sensitive. Default is False. - matchword (bool): If True, matches only whole words. Default is False. - - Returns: - bool: True if the cell value matches the query, False otherwise. - """ - if not case_sensitive: - cell_value = cell_value.lower() - query = query.lower() - - if matchword: - # Match only if the query appears as a full word using word boundaries - pattern = r'\b' + re.escape(query) + r'\b' - return bool(re.search(pattern, cell_value)) - - # Basic substring search - return query in cell_value \ No newline at end of file diff --git a/src/pattern_seek/transform/csv_transform.py b/src/pattern_seek/transform/csv_transform.py deleted file mode 100644 index a62580c..0000000 --- a/src/pattern_seek/transform/csv_transform.py +++ /dev/null @@ -1,77 +0,0 @@ -""" -Module: csv_transform.py - -This module provides functionality for transforming CSV files by searching rows based on a query. It supports column-specific searches, case sensitivity, and whole-word matching. Optionally, matched results can be saved to a new CSV file. -""" - -import csv -from typing import List, Dict, Optional -from .common import is_match - -def transform_csv( - file_path: str, - query: str, - column: Optional[str] = None, - case_sensitive: bool = False, - matchword: bool = False, - save: bool = False -) -> Dict: - """ - Searches a CSV file for rows where the query string appears in one or more fields. - - Args: - file_path (str): Path to the CSV file. - query (str): The text to search for in the file. - column (Optional[str]): If provided, only search this specific column. - case_sensitive (bool): Whether the search should respect letter casing. - matchword (bool): Whether to match whole words only. - save (bool): If True, save the matching rows to a new CSV file. - - Returns: - Dict: A dictionary containing the file path, CSV header, and matched rows. - If save=True, returns an empty dict and saves results to file. - """ - matches = [] - - # Open and read the CSV file as dictionaries (fieldname => value) - with open(file_path, mode='r', newline='', encoding='utf-8') as csvfile: - reader = csv.DictReader(csvfile) - header = reader.fieldnames - - if not header: - raise ValueError("CSV file has no header row.") - - for row in reader: - # If a specific column is given, limit search to it; otherwise search all fields - search_fields = [column] if column else header - - for field in search_fields: - value = row.get(field, "") - # Check if the current field value matches the query - if is_match(value, query, case_sensitive, matchword): - matches.append(row) - break # stop checking this row once a match is found - - if save: - if not matches: - print(f"No matches found in {file_path}. Nothing was saved.") - return {} - - from .output import save_csv_matches - save_path = file_path.replace('.csv', '-transformed.csv') - - # Save the matched rows to a new CSV file - save_csv_matches({ - "file": file_path, - "header": header, - "matches": matches - }, save_path) - - print(f"\nSaved transformed results to {save_path}") - return {} - - return { - "file": file_path, - "header": header, - "matches": matches - } \ No newline at end of file diff --git a/src/pattern_seek/transform/output.py b/src/pattern_seek/transform/output.py deleted file mode 100644 index b870d15..0000000 --- a/src/pattern_seek/transform/output.py +++ /dev/null @@ -1,61 +0,0 @@ -""" -output.py - -This module is responsible for displaying matched CSV rows in a tabular format -and optionally saving them to a new CSV file. It's part of the transform feature -in pattern-seek and helps visualize or persist filtered CSV data. - -Functions: -- print_csv_matches(result): Pretty-prints the matched CSV rows using tabulate. -- save_csv_matches(result, output_file): Writes matched CSV rows to a new CSV file. -""" - -import csv -from typing import Dict -from colorama import Fore, Style, init -from tabulate import tabulate - -init() - -def print_csv_matches(result: Dict) -> None: - """ - Prints the matched rows from a CSV file in a tabular format. - - Args: - result (Dict): The result dictionary containing file path, header, and matches. - """ - file = result["file"] - rows = result["matches"] - header = result["header"] - - if not rows: - print(f"{Fore.YELLOW}No matches found in {file}.{Style.RESET_ALL}") - return - - # Display the filename - print(f"\n{Fore.CYAN}{Style.BRIGHT}File: {file}{Style.RESET_ALL}") - - # Display the matching rows in a table format - print(tabulate(rows, headers="keys", tablefmt="grid")) - - -def save_csv_matches(result: Dict, output_file: str) -> None: - """ - Saves the matched rows from a CSV file to a new CSV file. - - Args: - result (Dict): The result dictionary containing file path, header, and matches. - output_file (str): The path to the output CSV file. - """ - rows = result["matches"] - header = result["header"] - - if not rows: - print(f"{Fore.YELLOW}No matches found in {result['file']}.{Style.RESET_ALL}") - return - - # Write matched rows to a new CSV file with headers - with open(output_file, mode='w', newline='', encoding='utf-8') as outfile: - writer = csv.DictWriter(outfile, fieldnames=result["header"]) - writer.writeheader() # Write the header to the CSV file - writer.writerows(result["matches"]) # Write the matched rows to the CSV file diff --git a/tests/transform/__init__.py b/tests/transform/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/transform/test_common.py b/tests/transform/test_common.py deleted file mode 100644 index fe820d5..0000000 --- a/tests/transform/test_common.py +++ /dev/null @@ -1,42 +0,0 @@ -""" -test_common.py - -Unit tests for the is_match function in pattern_seek.transform.common. -Covers all combinations of case sensitivity and whole-word matching logic. -""" - -import pytest -from pattern_seek.transform.common import is_match - -def test_is_match(): - """Tests default is_match behavior: case-insensitive, partial match.""" - assert is_match("Apple", "app", False, False) == True - assert is_match("Banana", "nan", False, False) == True - assert is_match("APPLE", "apple", False, False) == True - assert is_match("apple", "APPLE", False, False) == True - assert is_match("Apple", "orange", False, False) == False - -def test_is_match_case_sensitive(): - """Tests is_match with case-sensitive, partial match.""" - assert is_match("Apple", "app", True, False) == False # 'App' != 'app' - assert is_match("Apple", "App", True, False) == True - assert is_match("BANANA", "BAN", True, False) == True - assert is_match("BANANA", "ban", True, False) == False - -def test_is_match_whole_word(): - """Tests is_match with whole-word matching (case-insensitive).""" - assert is_match("Apple Pie", "Apple", False, True) == True - assert is_match("Pineaple", "Apple", False, True) == False - assert is_match("An apple a day", "Apple", False, True) == True - assert is_match("Apples are good", "apple", False, True) == False # 'apple != 'Apples' - - # Punctuation and spacing - assert is_match("Hello, word!", "Hello", False, True) == True - assert is_match("Hello, world!", "world", False, True) == True - -def test_is_match_case_sensitive_whole_word(): - """Tests is_match with both case sensitivity and whole-word matching.""" - assert is_match("Apple Pie", "apple", True, True) == False # case mismatch - assert is_match("Apple Pie", "Apple", True, True) == True - assert is_match("APPLE PIE", "APPLE", True, True) == True - assert is_match("APPLE_PIE", "apple", True, True) == False \ No newline at end of file diff --git a/tests/transform/test_csv_transform.py b/tests/transform/test_csv_transform.py deleted file mode 100644 index 72450f2..0000000 --- a/tests/transform/test_csv_transform.py +++ /dev/null @@ -1,158 +0,0 @@ -""" -test_csv_transform.py - -This file contains unit tests for the CSV transformation feature of the pattern-seek CLI tool. -It tests core behaviors such as text matching, column filtering, case sensitivity, and saving -matched rows to a new file. These tests ensure the transform_csv function behaves as expected -under various scenarios and edge cases. -""" - -import pytest -import os -import csv -from unittest.mock import patch, mock_open -from io import StringIO - -from pattern_seek.transform.csv_transform import transform_csv - -@pytest.fixture -def sample_csv_content(): - """Create sample CSV content for testing.""" - return ( - "name,age,city\n" - "Alice,30,New York\n" - "Bob,25,Los Angeles\n" - "Charlie,35,Chicago\n" - "David,40,New York\n" - "John Doe,30,New York\n" - "Jane Smith,25,Los Angeles\n" - "Bob Johnson,40,Chicago\n" - "Alice Brown,35,San Francisco\n" - ) - -@pytest.fixture -def temp_csv_file(tmp_path, sample_csv_content): - """Create a temporary CSV file for testing.""" - file_path = tmp_path / "test.csv" - with open(file_path, mode='w', newline='', encoding='utf-8') as f: - f.write(sample_csv_content) - return str(file_path) - -def test_transform_csv_basic_search(temp_csv_file): - """Test basic search functionality without any additional parameters.""" - result = transform_csv(temp_csv_file, "John") - - assert result["file"] == temp_csv_file - assert result["header"] == ["name", "age", "city"] - assert len(result["matches"]) == 2 # Both "John Doe" and "Bob Johnson" should match. - - # Verify both expected matches are present - john_doe = None - bob_johnson = None - - for match in result["matches"]: - if match["name"] == "John Doe": - john_doe = match - elif match["name"] == "Bob Johnson": - bob_johnson = match - - assert john_doe is not None - assert john_doe["age"] == "30" - assert john_doe["city"] == "New York" - - assert bob_johnson is not None - assert bob_johnson["age"] == "40" - assert bob_johnson["city"] == "Chicago" - -def test_transform_csv_case_sensitivity(temp_csv_file): - """Test case sensitivity in searching.""" - - # Case insensitive (default) - result = transform_csv(temp_csv_file, "john") - assert len(result["matches"]) == 2 # Should find "John Doe" and "Bob Johnson". - - # Case sensitive - result = transform_csv(temp_csv_file, "john", case_sensitive=True) - assert len(result["matches"]) == 0 # Should find no matches for lowercase "john". - - # Case sensitive with exact match - result = transform_csv(temp_csv_file, "John", case_sensitive=True) - assert len(result["matches"]) == 2 # Should find "John Doe" and "Bob Johnson". - -def test_transform_csv_matchword(temp_csv_file): - """Test matchword functionality.""" - - # Without matchword, "John" should match "John Doe" and "Bob Johnson". - result = transform_csv(temp_csv_file, "John") - assert len(result["matches"]) == 2 # Should find both "John Doe" and "Bob Johnson". - - # With matchword, "John" should only match "John Doe". - result = transform_csv(temp_csv_file, "John", matchword=True) - assert len(result["matches"]) == 1 # Should find only "John Doe". - assert result["matches"][0]["name"] == "John Doe" - - # With matchword and case sensitivity, "john" should not match anything. - result = transform_csv(temp_csv_file, "john", matchword=True, case_sensitive=True) - assert len(result["matches"]) == 0 # Should find no matches. - - result = transform_csv(temp_csv_file, "John", matchword=True, case_sensitive=True) - assert len(result["matches"]) == 1 # Should find only "John Doe". - -def test_transform_csv_column_specific_search(temp_csv_file): - """Test searching in a specific column.""" - - # Search for "New" but only in the city column - result = transform_csv(temp_csv_file, "New", column="city") - - assert len(result["matches"]) == 3 # "Alice", "David", and "John Doe" should match. - assert result["matches"][0]["name"] == "Alice" - assert result["matches"][1]["name"] == "David" - assert result["matches"][2]["name"] == "John Doe" - - # This should find nothing as "John" is not in the age column - result = transform_csv(temp_csv_file, "John", column="age") - assert len(result["matches"]) == 0 - -def test_transform_csv_no_matches(temp_csv_file): - """Test behavior when no matches are found.""" - result = transform_csv(temp_csv_file, "NonExistentName") - - assert result["file"] == temp_csv_file - assert result["header"] == ["name", "age", "city"] - assert len(result["matches"]) == 0 - -def test_transform_csv_empty_file(): - """Test handling of an empty CSV file.""" - with patch("builtins.open", mock_open(read_data="name,age,city\n")): - result = transform_csv("dummy.csv", "test") - - assert result["header"] == ["name", "age", "city"] - assert len(result["matches"]) == 0 - -def test_transform_csv_no_header(): - """Test handling of a CSV file with no header.""" - with patch("builtins.open", mock_open(read_data="")): - with pytest.raises(ValueError, match="CSV file has no header row."): - transform_csv("dummy.csv", "test") - -def test_transform_csv_with_basic_save(temp_csv_file): - """Test that save=True returns empty dict and prints expected message.""" - with patch("builtins.print") as mock_print: - # Verify that we get an empty dict when save=True - result = transform_csv(temp_csv_file, "John", save=True) - assert result == {} - - #Verify that the expected message is printed - expected_save_path = temp_csv_file.replace('.csv', '-transformed.csv') - mock_print.assert_called_once_with(f"\nSaved transformed results to {expected_save_path}") - -def test_transform_csv_with_no_matches_save(temp_csv_file): - """Test save=True when no matches are found; should skip file writing and print a message.""" - with patch("builtins.print") as mock_print: - result = transform_csv(temp_csv_file, "NonExistentName", save=True) - - # Check that the result is an empty dict - assert result == {} - - # Check that the appropriate message is printed - mock_print.assert_called_once_with(f"No matches found in {temp_csv_file}. Nothing was saved.") \ No newline at end of file diff --git a/tests/transform/test_integration.py b/tests/transform/test_integration.py deleted file mode 100644 index 14a7419..0000000 --- a/tests/transform/test_integration.py +++ /dev/null @@ -1,137 +0,0 @@ -""" -test_integration.py - -Integration tests for the CSV transform feature. These tests verify that -transform_csv, save_csv_matches, and print_csv_matches work together as expected. -It covers full transform flows including saving and printing results. -""" - -import pytest -import os -import csv - -from pattern_seek.transform.csv_transform import transform_csv -from pattern_seek.transform.output import print_csv_matches, save_csv_matches - -@pytest.fixture -def sample_csv_content(): - """Create sample CSV content for testing.""" - return ( - "name,age,city\n" - "Alice,30,New York\n" - "Bob,25,Los Angeles\n" - "Charlie,35,Chicago\n" - "David,40,New York\n" - "John Doe,30,New York\n" - "Jane Smith,25,Los Angeles\n" - "Bob Johnson,40,Chicago\n" - "Alice Brown,35,San Francisco\n" - ) - -@pytest.fixture -def temp_csv_file(tmp_path, sample_csv_content): - """Create a temporary CSV file for testing.""" - file_path = tmp_path / "test.csv" - with open(file_path, mode='w', newline='', encoding='utf-8') as f: - f.write(sample_csv_content) - return str(file_path) - -def test_end_to_end_transform_and_save(temp_csv_file): - """Test the full transform and save pipeline, then verify saved file content.""" - # Expected output file path - output_path = temp_csv_file.replace(".csv", "-transformed.csv") - - # Run transform with save - transform_csv(temp_csv_file, "John", save=True) - - # Verify output file exists - assert os.path.exists(output_path) - - # Verify output file's contents - with open(output_path, 'r', newline='', encoding='utf-8') as csvfile: - reader = csv.DictReader(csvfile) - rows = list(reader) - - # Check that the output file has the expected number of rows - assert len(rows) == 2 - - # Verify contents - john_row = None - bob_row = None - - for row in rows: - if row["name"] == "John Doe": - john_row = row - elif row["name"] == "Bob Johnson": - bob_row = row - - assert john_row is not None - assert john_row["age"] == "30" - assert john_row["city"] == "New York" - - assert bob_row is not None - assert bob_row["age"] == "40" - assert bob_row["city"] == "Chicago" - -def test_end_to_end_with_different_search_params(temp_csv_file): - """Test transform and save with matchword and column-specific filtering.""" - # Test with matchword=True - exact_output = temp_csv_file.replace(".csv", "-transformed.csv") - transform_csv(temp_csv_file, "John", matchword=True, save=True) - - # Verify output - with open(exact_output, 'r', newline='', encoding='utf-8') as csvfile: - reader = csv.DictReader(csvfile) - rows = list(reader) - - # Should have only one row (John Doe, not Bob Johnson) - assert len(rows) == 1 - assert rows[0]["name"] == "John Doe" - - # Test with column-specific search - os.remove(exact_output) # Clean up previous output - transform_csv(temp_csv_file, "New York", column="city", save=True) - - # Verify output - with open(exact_output, 'r', newline='', encoding='utf-8') as csvfile: - reader = csv.DictReader(csvfile) - rows = list(reader) - - # Should have only three rows - assert len(rows) == 3 - assert rows[0]["name"] == "Alice" - assert rows[1]["name"] == "David" - assert rows[2]["name"] == "John Doe" - - # Test with column-specific search - os.remove(exact_output) # Clean up previous output - transform_csv(temp_csv_file, "New", column="city", save=True) - - # Verify output - with open(exact_output, 'r', newline='', encoding='utf-8') as csvfile: - reader = csv.DictReader(csvfile) - rows = list(reader) - - # Should have only three rows - assert len(rows) == 3 - assert rows[0]["name"] == "Alice" - assert rows[1]["name"] == "David" - assert rows[2]["name"] == "John Doe" - -def test_transform_and_print_integration(temp_csv_file, capsys): - """Test that print_csv_matches prints output from transform_csv correctly.""" - # Get transform results - result = transform_csv(temp_csv_file, "John") - - # Print the results - print_csv_matches(result) - - # Check the output - captured = capsys.readouterr() - - # Verify output contains expected content - assert f"File: {temp_csv_file}" in captured.out - assert "John Doe" in captured.out - assert "New York" in captured.out - assert "Bob Johnson" in captured.out - assert "Chicago" in captured.out \ No newline at end of file diff --git a/tests/transform/test_output.py b/tests/transform/test_output.py deleted file mode 100644 index 01aa72c..0000000 --- a/tests/transform/test_output.py +++ /dev/null @@ -1,115 +0,0 @@ -""" -test_output.py - -Unit tests for the output module, which handles printing and saving of matched -CSV data. These tests verify that print_csv_matches displays correct output and -that save_csv_matches writes the expected files and handles edge cases properly. -""" - -import pytest -import os -import csv - -from unittest.mock import patch -from io import StringIO -from colorama import Fore, Style - -from pattern_seek.transform.output import print_csv_matches, save_csv_matches - -@pytest.fixture -def sample_matches_data(): - """Provides a sample dictionary with matched CSV rows for testing.""" - return { - "file": "test.csv", - "header": ["name", "age", "city"], - "matches": [ - {"name": "John Doe", "age": "30", "city": "New York"}, - {"name": "Bob Johnson", "age": "40", "city": "Chicago"} - ] - } - -@pytest.fixture -def empty_matches_data(): - """Provides a sample dictionary with no matched rows (empty results).""" - return { - "file": "test.csv", - "header": ["name", "age", "city"], - "matches": [] - } - -def test_print_csv_matches(sample_matches_data, capsys): - """Test that print_csv_matches displays the expected output""" - # Run the function and capture stdout - print_csv_matches(sample_matches_data) - - # Get captured stdout - captured = capsys.readouterr() - - # Verify output includes expected file reference and row content - assert f"File: {sample_matches_data['file']}" in captured.out - assert "John Doe" in captured.out - assert "New York" in captured.out - assert "Bob Johnson" in captured.out - assert "Chicago" in captured.out - - # Should contain tabulate's grid format - assert "+" in captured.out # Grid lines - assert "|" in captured.out # Grid columns - -def test_print_csv_matches_no_matches(empty_matches_data, capsys): - """Test that print_csv_matches handles empty results correctly""" - # Run the function to print matches - print_csv_matches(empty_matches_data) - - # Get captured stdout - captured = capsys.readouterr() - - # Check output contains expected message for no matches - assert f"No matches found in {empty_matches_data['file']}" in captured.out - -def test_save_csv_matches_creates_file(tmp_path, sample_matches_data): - """Test that save_csv_matches creates a file with the expected content.""" - # Prepare output file path using pytest's tmp_path fixture - output_path = os.path.join(tmp_path, "output.csv") - - # Save matched row to a CSV file - save_csv_matches(sample_matches_data, output_path) - - # Verify file exists - assert os.path.exists(output_path) - - # Read the saved file and verify it matches input data - with open(output_path, 'r', newline='', encoding='utf-8') as csvfile: - reader = csv.DictReader(csvfile) - rows = list(reader) - - # Check that we have correct number of rows - assert len(rows) == 2 - - # Check that headers are correct - assert reader.fieldnames == sample_matches_data["header"] - - # Check content of the first row - assert rows[0]["name"] == "John Doe" - assert rows[0]["age"] == "30" - assert rows[0]["city"] == "New York" - - # Check content of the second row - assert rows[1]["name"] == "Bob Johnson" - assert rows[1]["age"] == "40" - assert rows[1]["city"] == "Chicago" - -def test_save_csv_matches_empty_data(tmp_path, empty_matches_data, capsys): - """Test that save_csv_matches handles empty data correctly.""" - # Prepare test file path - output_path = os.path.join(tmp_path, "empty.csv") - - # Save matches - save_csv_matches(empty_matches_data, output_path) - - # Check that appropriate message was printed - captured = capsys.readouterr() - assert f"No matches found in {empty_matches_data['file']}" in captured.out - - # File should not be created - assert not os.path.exists(output_path) \ No newline at end of file