diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0aa261a --- /dev/null +++ b/.gitignore @@ -0,0 +1,125 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +pytestdebug.log + +# Jupyter Notebook +.ipynb_checkpoints + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +pythonenv* + +# IDE specific files +.vscode/ +.idea/ +*.swp +*.swo +*~ +.DS_Store +*.sublime-workspace +*.sublime-project + +# Editor directories and files +.idea +.vscode +*.suo +*.ntvs* +*.njsproj +*.sln +*.sw? + +# Log files +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# Local development files +local_settings.py +.env.local +.env.development +.env.test + +# Documentation +docs/_build/ + +# Example output files +examples/output/ + +# MacOS specific +.DS_Store +._* + +# Windows specific +Thumbs.db +ehthumbs.db +Desktop.ini +$RECYCLE.BIN/ + +# Linux specific +*~ + +# Visual Studio Code specific +*.code-workspace + +# PyCharm specific +.idea/ + +# Virtual environment specific +*.venv/ + +# Build and release specific +*.pyc +*.pyo +*.pyd +__pycache__/ +*.so + +project_structure.txt \ No newline at end of file diff --git a/docs/API.md b/docs/API.md new file mode 100644 index 0000000..57dd78d --- /dev/null +++ b/docs/API.md @@ -0,0 +1,153 @@ +# ImageCompare API Documentation + +## Table of Contents +- [REST API Endpoints](#rest-api-endpoints) +- [Python API](#python-api) +- [CLI Interface](#cli-interface) +- [Error Handling](#error-handling) +- [Examples](#examples) + +## REST API Endpoints + +### Base URL +`http://localhost:8000` (when running locally) + +### 1. Compare Two Images +`POST /compare` + +**Parameters:** +- `file1`: First image file (required) +- `file2`: Second image file (required) +- `method`: Comparison method (`pixel`, `histogram`, `phash`, `ssim`) + +**Example Request:** +```bash +curl -X POST "http://localhost:8000/compare?method=phash" \ + -H "accept: application/json" \ + -F "file1=@image1.jpg" \ + -F "file2=@image2.jpg" +``` + +**Response:** +```json +{ + "match": true, + "confidence": 0.92, + "method": "phash", + "threshold": 0.85 +} +``` + +### 2. Cascading Comparison +`POST /compare/cascade` + +**Parameters:** +- `file1`: Base image file +- `file2`: Comparison image file +- `confidence_threshold`: Minimum confidence (0.0-1.0) +- `timeout`: Max processing time in seconds + +**Example Response:** +```json +{ + "best_match": { + "method": "phash", + "confidence": 0.92, + "match": true + }, + "all_results": [ + { + "method": "pixel", + "confidence": 0.45, + "match": false + }, + { + "method": "phash", + "confidence": 0.92, + "match": true + } + ] +} +``` + +## Python API + +### Basic Usage +```python +from imagecompare import compare_images + +match, confidence = compare_images("img1.jpg", "img2.jpg", method="phash") +``` + +### Available Methods +```python +from imagecompare import get_comparison_methods + +print(get_comparison_methods()) +# Output: {'pixel': 'Fast comparison', 'phash': 'Perceptual hash', ...} +``` + +### Advanced Usage +```python +from imagecompare.core.comparators import SSIMComparator + +comparator = SSIMComparator(threshold=0.8) +match, confidence = comparator.compare(img1_array, img2_array) +``` + +## CLI Interface + +### Basic Comparison +```bash +imagecompare image1.jpg image2.jpg --method phash +``` + +### Batch Processing +```bash +imagecompare base.jpg compare*.jpg --output json > results.json +``` + +### Available Options +``` +Options: + -m, --method TEXT Comparison method (pixel, histogram, phash, ssim) + -t, --threshold FLOAT Similarity threshold (0.0-1.0) + -o, --output TEXT Output format (simple, json, verbose) +``` + +## Error Handling + +### Common Errors +| Code | Error | Resolution | +|------|----------------------|-------------------------------------| +| 400 | Invalid method | Use one of: pixel, histogram, etc. | +| 422 | Invalid file upload | Check image file format | +| 500 | Processing error | Check server logs | + +## Examples + +### 1. Find duplicate images in a folder +```python +from glob import glob +from imagecompare import compare_images + +images = glob("*.jpg") +for i, img1 in enumerate(images): + for img2 in images[i+1:]: + match, _ = compare_images(img1, img2) + if match: + print(f"Duplicates: {img1} and {img2}") +``` + +### 2. Web API with Python +```python +import requests + +response = requests.post( + "http://your-api/compare", + files={"file1": open("img1.jpg", "rb"), + "file2": open("img2.jpg", "rb")}, + params={"method": "phash"} +) +print(response.json()) +``` \ No newline at end of file diff --git a/examples/basic_usage.py b/examples/basic_usage.py new file mode 100644 index 0000000..6bbf567 --- /dev/null +++ b/examples/basic_usage.py @@ -0,0 +1,24 @@ +""" +Basic usage example of ImageCompare Python API +""" +from imagecompare import compare_images, get_comparison_methods + +# List available methods +print("Available methods:") +for name, desc in get_comparison_methods().items(): + print(f"- {name}: {desc}") + +# Compare two images +image1 = "tests/test_data/images/base/empuran_prithvi_1.jpg" +image2 = "tests/test_data/images/variants/empuran_prithvi_variant_2.jpg" +differentImage = "tests/test_data/images/base/thudarum_1.png" + +print("\nComparing images:") +match, confidence = compare_images(image1, image2, method="phash") +print(f"Result: {'MATCH' if match else 'NO MATCH'}") +print(f"Confidence: {confidence:.2f}") + +print("\nComparing different images:") +match, confidence = compare_images(image1, differentImage, method="phash") +print(f"Result: {'MATCH' if match else 'NO MATCH'}") +print(f"Confidence: {confidence:.2f}") diff --git a/imagecompare/__init__.py b/imagecompare/__init__.py new file mode 100644 index 0000000..5782772 --- /dev/null +++ b/imagecompare/__init__.py @@ -0,0 +1,67 @@ +""" +ImageCompare - A versatile image comparison toolkit +""" + +from pathlib import Path +from typing import Union, Tuple, Dict +from .core.comparators import ( + PixelComparator, + HistogramComparator, + PHashComparator, + SSIMComparator +) +from .core.preprocessing import normalize_for_comparison + +# Initialize default comparators +_DEFAULT_COMPARATORS = { + "pixel": PixelComparator(threshold=0.95), + "histogram": HistogramComparator(threshold=0.85), + "phash": PHashComparator(threshold=0.85), + "ssim": SSIMComparator(threshold=0.8) +} + +def compare_images( + image1: Union[str, Path], + image2: Union[str, Path], + method: str = "phash", + **kwargs +) -> Tuple[bool, float]: + """ + Compare two images using specified method + + Args: + image1: Path to first image + image2: Path to second image + method: Comparison method (pixel, histogram, phash, ssim) + **kwargs: Additional comparator-specific parameters + + Returns: + Tuple of (match: bool, confidence: float) + + Example: + >>> from imagecompare import compare_images + >>> match, confidence = compare_images("img1.jpg", "img2.jpg", method="phash") + >>> print(f"Match: {match}, Confidence: {confidence:.2f}") + """ + if method not in _DEFAULT_COMPARATORS: + raise ValueError( + f"Invalid method '{method}'. Available: {list(_DEFAULT_COMPARATORS.keys())}" + ) + + comparator = _DEFAULT_COMPARATORS[method] + + return comparator.compare(str(image1), str(image2)) + +def get_comparison_methods() -> Dict[str, str]: + """ + Get available comparison methods with descriptions + + Returns: + Dictionary of method names to descriptions + """ + return { + "pixel": "Fast pixel-level comparison (strict)", + "histogram": "Color distribution comparison", + "phash": "Perceptual hash (balanced speed/accuracy)", + "ssim": "Structural Similarity (most accurate but slow)" + } \ No newline at end of file diff --git a/imagecompare/api/cli.py b/imagecompare/api/cli.py new file mode 100644 index 0000000..fd2ddf7 --- /dev/null +++ b/imagecompare/api/cli.py @@ -0,0 +1,92 @@ +import argparse +import sys +from pathlib import Path +from typing import List, Optional +from imagecompare.core.comparators import ( + PixelComparator, + HistogramComparator, + PHashComparator, + SSIMComparator +) + +def main(): + parser = argparse.ArgumentParser( + description="Image Comparison CLI Tool", + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + # Main arguments + parser.add_argument("image1", type=str, help="Path to first image") + parser.add_argument("image2", type=str, help="Path to second image") + + # Comparison options + parser.add_argument( + "-m", "--method", + choices=["pixel", "histogram", "phash", "ssim", "all"], + default="phash", + help="Comparison method to use" + ) + parser.add_argument( + "-t", "--threshold", + type=float, + default=0.85, + help="Similarity threshold (0.0-1.0)" + ) + parser.add_argument( + "-o", "--output", + choices=["simple", "json", "verbose"], + default="simple", + help="Output format" + ) + + args = parser.parse_args() + + try: + # Initialize comparators + comparators = { + "pixel": PixelComparator(threshold=args.threshold), + "histogram": HistogramComparator(threshold=args.threshold), + "phash": PHashComparator(threshold=args.threshold), + "ssim": SSIMComparator(threshold=args.threshold) + } + + results = [] + + if args.method == "all": + methods = comparators.keys() + else: + methods = [args.method] + + for method in methods: + comparator = comparators[method] + is_match, confidence = comparator.compare(args.image1, args.image2) + + results.append({ + "method": method, + "match": is_match, + "confidence": confidence, + "threshold": args.threshold + }) + + # Output results + if args.output == "json": + import json + print(json.dumps(results, indent=2)) + elif args.output == "verbose": + for result in results: + print(f"Method: {result['method'].upper()}") + print(f" Similarity: {result['confidence']:.2f}") + print(f" Threshold: {result['threshold']}") + print(f" Match: {'YES' if result['match'] else 'NO'}\n") + else: + for result in results: + print(f"{result['method']}: {'MATCH' if result['match'] else 'NO MATCH'} ({result['confidence']:.2f})") + + sys.exit(0 if results[0]['match'] else 1) + + except Exception as e: + print(f"Error: {str(e)}", file=sys.stderr) + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/imagecompare/api/web.py b/imagecompare/api/web.py new file mode 100644 index 0000000..1589d66 --- /dev/null +++ b/imagecompare/api/web.py @@ -0,0 +1,258 @@ +from fastapi import FastAPI, UploadFile, File, HTTPException +from fastapi.responses import JSONResponse +from typing import List, Optional +import tempfile +import time +import os +from imagecompare.core.comparators import ( # Changed from relative to absolute import + PixelComparator, + HistogramComparator, + PHashComparator, + SSIMComparator +) + +app = FastAPI( + title="Image Comparison API", + description="API for comparing images using various algorithms", + version="1.0.0" +) + +COMPARISON_CASCADE = [ + ("pixel", "Fast pixel-level comparison"), + ("histogram", "Color histogram comparison"), + ("phash", "Perceptual hash comparison"), + ("ssim", "Structural similarity index") +] + +# Initialize comparators with default thresholds +comparators = { + "pixel": PixelComparator(threshold=0.95), + "histogram": HistogramComparator(threshold=0.85), + "phash": PHashComparator(threshold=0.85), + "ssim": SSIMComparator(threshold=0.8) +} + +def save_upload_file(upload_file: UploadFile) -> str: + """Save uploaded file to temporary location""" + try: + suffix = os.path.splitext(upload_file.filename)[1] + with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: + tmp.write(upload_file.file.read()) + return tmp.name + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error saving file: {str(e)}") + finally: + upload_file.file.close() + +@app.post("/compare/cascade") +async def cascading_compare( + file1: UploadFile = File(...), + file2: UploadFile = File(...), + confidence_threshold: float = 0.85, + timeout: float = 5.0 +) -> JSONResponse: + """ + Compare images using cascading fallback approach (fastest to most accurate) + + Parameters: + - file1: First image file + - file2: Second image file + - confidence_threshold: Minimum confidence to consider a match (0.0-1.0) + - timeout: Maximum time to spend on comparison (seconds) + + Returns: + - JSON with best comparison result and method used + """ + try: + # Save uploaded files + path1 = save_upload_file(file1) + path2 = save_upload_file(file2) + + start_time = time.time() + results = [] + + for method, description in COMPARISON_CASCADE: + if time.time() - start_time > timeout: + break + + try: + comparator = comparators[method] + is_match, confidence = comparator.compare(path1, path2) + + results.append({ + "method": method, + "description": description, + "match": is_match, + "confidence": float(confidence), + "threshold": comparator.threshold, + "time_taken": time.time() - start_time + }) + + # Early exit if we found a confident match + if confidence >= confidence_threshold: + break + + except Exception as e: + # Log but continue to next method + print(f"Method {method} failed: {str(e)}") + continue + + # Clean up files + os.unlink(path1) + os.unlink(path2) + + if not results: + raise HTTPException( + status_code=500, + detail="All comparison methods failed" + ) + + # Select the best result (highest confidence that meets threshold) + best_result = max( + results, + key=lambda x: x["confidence"] if x["confidence"] >= confidence_threshold else -1 + ) + + return JSONResponse({ + "best_match": best_result, + "all_results": results, + "timeout_reached": time.time() - start_time >= timeout + }) + + except Exception as e: + # Clean up if files were created + if 'path1' in locals() and os.path.exists(path1): + os.unlink(path1) + if 'path2' in locals() and os.path.exists(path2): + os.unlink(path2) + raise HTTPException( + status_code=500, + detail=f"Comparison failed: {str(e)}" + ) + +@app.post("/compare") +async def compare_images( + file1: UploadFile = File(...), + file2: UploadFile = File(...), + method: Optional[str] = "phash" +) -> JSONResponse: + """ + Compare two images using specified method + + Parameters: + - file1: First image file + - file2: Second image file + - method: Comparison method (pixel, histogram, phash, ssim) + + Returns: + - JSON with comparison result and confidence score + """ + if method not in comparators: + raise HTTPException( + status_code=400, + detail=f"Invalid method. Choose from: {', '.join(comparators.keys())}" + ) + + try: + # Save uploaded files to temp locations + path1 = save_upload_file(file1) + path2 = save_upload_file(file2) + + # Get the selected comparator + comparator = comparators[method] + + # Perform comparison + is_match, confidence = comparator.compare(path1, path2) + + # Clean up temp files + os.unlink(path1) + os.unlink(path2) + + return JSONResponse({ + "match": is_match, + "confidence": float(confidence), + "method": method, + "threshold": comparator.threshold + }) + + except Exception as e: + # Clean up if files were created + if 'path1' in locals() and os.path.exists(path1): + os.unlink(path1) + if 'path2' in locals() and os.path.exists(path2): + os.unlink(path2) + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/compare/batch") +async def batch_compare( + base_image: UploadFile = File(...), + compare_images: List[UploadFile] = File(...), + method: Optional[str] = "phash" +) -> JSONResponse: + """ + Compare a base image against multiple images + + Parameters: + - base_image: Reference image + - compare_images: List of images to compare against + - method: Comparison method + + Returns: + - List of comparison results for each image + """ + if method not in comparators: + raise HTTPException( + status_code=400, + detail=f"Invalid method. Choose from: {', '.join(comparators.keys())}" + ) + + try: + # Save base image + base_path = save_upload_file(base_image) + comparator = comparators[method] + results = [] + + for img in compare_images: + # Save each comparison image + compare_path = save_upload_file(img) + + # Perform comparison + is_match, confidence = comparator.compare(base_path, compare_path) + + results.append({ + "filename": img.filename, + "match": is_match, + "confidence": float(confidence) + }) + + # Clean up + os.unlink(compare_path) + + # Clean up base image + os.unlink(base_path) + + return JSONResponse({ + "method": method, + "threshold": comparator.threshold, + "results": results + }) + + except Exception as e: + # Clean up any remaining files + if 'base_path' in locals() and os.path.exists(base_path): + os.unlink(base_path) + if 'compare_path' in locals() and os.path.exists(compare_path): + os.unlink(compare_path) + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/methods") +async def list_methods() -> JSONResponse: + """List available comparison methods""" + methods_info = [] + for name, comparator in comparators.items(): + methods_info.append({ + "name": name, + "description": comparator.__doc__, + "threshold": comparator.threshold + }) + return JSONResponse({"methods": methods_info}) \ No newline at end of file diff --git a/imagecompare/core/comparators/__init__.py b/imagecompare/core/comparators/__init__.py new file mode 100644 index 0000000..2bc2a7a --- /dev/null +++ b/imagecompare/core/comparators/__init__.py @@ -0,0 +1,11 @@ +from .pixel import PixelComparator +from .histogram import HistogramComparator +from .phash import PHashComparator +from .ssim import SSIMComparator + +__all__ = [ + 'PixelComparator', + 'HistogramComparator', + 'PHashComparator', + 'SSIMComparator' +] \ No newline at end of file diff --git a/imagecompare/core/comparators/histogram.py b/imagecompare/core/comparators/histogram.py new file mode 100644 index 0000000..e296b1d --- /dev/null +++ b/imagecompare/core/comparators/histogram.py @@ -0,0 +1,44 @@ +import cv2 +import numpy as np +from typing import Tuple + +class HistogramComparator: + """ + Color histogram-based image comparison + Good for images with similar color distributions but different content + """ + + def __init__(self, threshold: float = 0.85, bins: int = 8): + self.threshold = threshold + self.bins = bins + + def compare(self, img1_path: str, img2_path: str) -> Tuple[bool, float]: + """ + Compare two images using color histograms + Returns: (bool: match, float: similarity_score) + """ + img1 = cv2.imread(img1_path) + img2 = cv2.imread(img2_path) + + if img1 is None or img2 is None: + raise ValueError("Could not load one or both images") + + # Calculate histograms + hist1 = self._calc_histogram(img1) + hist2 = self._calc_histogram(img2) + + # Compare histograms + similarity = cv2.compareHist(hist1, hist2, cv2.HISTCMP_CORREL) + + return similarity >= self.threshold, similarity + + def _calc_histogram(self, img): + """Calculate normalized color histogram""" + hist = cv2.calcHist( + [img], + [0, 1, 2], # Channels + None, # Mask + [self.bins] * 3, # Bins per channel + [0, 256] * 3 # Range + ) + return cv2.normalize(hist, hist).flatten() \ No newline at end of file diff --git a/imagecompare/core/comparators/phash.py b/imagecompare/core/comparators/phash.py new file mode 100644 index 0000000..cd1be7f --- /dev/null +++ b/imagecompare/core/comparators/phash.py @@ -0,0 +1,47 @@ +import cv2 +import numpy as np +from typing import Tuple + +class PHashComparator: + """ + Perceptual hash (pHash) image comparison + Robust against resizing, format changes, and minor modifications + """ + + def __init__(self, threshold: float = 0.85, hash_size: int = 8, highfreq_factor: int = 4): + self.threshold = threshold + self.hash_size = hash_size + self.highfreq_factor = highfreq_factor + + def compare(self, img1_path: str, img2_path: str) -> Tuple[bool, float]: + """ + Compare two images using perceptual hashing + Returns: (bool: match, float: similarity_score) + """ + hash1 = self._calculate_phash(img1_path) + hash2 = self._calculate_phash(img2_path) + + # Calculate Hamming distance + hamming_dist = np.count_nonzero(hash1 != hash2) + similarity = 1.0 - (hamming_dist / len(hash1)) + + return similarity >= self.threshold, similarity + + def _calculate_phash(self, img_path: str): + """Compute perceptual hash for an image""" + # Read and preprocess image + img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) + if img is None: + raise ValueError(f"Could not load image: {img_path}") + + # Resize and compute DCT + img_size = self.hash_size * self.highfreq_factor + img_resized = cv2.resize(img, (img_size, img_size), interpolation=cv2.INTER_AREA) + dct = cv2.dct(np.float32(img_resized)) + + # Reduce DCT (keep top-left hash_size x hash_size) + dct_reduced = dct[:self.hash_size, :self.hash_size] + + # Compute hash (1 where > median, else 0) + median = np.median(dct_reduced) + return (dct_reduced > median).flatten().astype(int) \ No newline at end of file diff --git a/imagecompare/core/comparators/pixel.py b/imagecompare/core/comparators/pixel.py new file mode 100644 index 0000000..f09eaf7 --- /dev/null +++ b/imagecompare/core/comparators/pixel.py @@ -0,0 +1,38 @@ +import cv2 +import numpy as np +from typing import Tuple + +class PixelComparator: + """ + Fast pixel-level image comparison + Best for detecting exact duplicates or near-identical images + """ + + def __init__(self, threshold: float = 0.95, resize: Tuple[int, int] = (256, 256)): + self.threshold = threshold + self.resize = resize + + def compare(self, img1_path: str, img2_path: str) -> Tuple[bool, float]: + """ + Compare two images using pixel difference + Returns: (bool: match, float: similarity_score) + """ + img1 = self._preprocess(img1_path) + img2 = self._preprocess(img2_path) + + if img1.shape != img2.shape: + return False, 0.0 + + diff = cv2.absdiff(img1, img2) + non_zero = np.count_nonzero(diff) + similarity = 1.0 - (non_zero / diff.size) + + return similarity >= self.threshold, similarity + + def _preprocess(self, img_path: str): + img = cv2.imread(img_path) + if img is None: + raise ValueError(f"Could not load image: {img_path}") + if self.resize: + img = cv2.resize(img, self.resize, interpolation=cv2.INTER_AREA) + return img \ No newline at end of file diff --git a/imagecompare/core/comparators/ssim.py b/imagecompare/core/comparators/ssim.py new file mode 100644 index 0000000..7550cae --- /dev/null +++ b/imagecompare/core/comparators/ssim.py @@ -0,0 +1,47 @@ +import cv2 +import numpy as np +from skimage.metrics import structural_similarity as ssim +from typing import Tuple + +class SSIMComparator: + """ + Structural Similarity Index (SSIM) comparison + Most accurate but computationally expensive + """ + + def __init__(self, threshold: float = 0.8, win_size: int = 7, + dynamic_range: int = 255, multichannel: bool = False): + self.threshold = threshold + self.win_size = win_size + self.dynamic_range = dynamic_range + self.multichannel = multichannel + + def compare(self, img1_path: str, img2_path: str) -> Tuple[bool, float]: + """ + Compare two images using SSIM + Returns: (bool: match, float: similarity_score) + """ + img1 = cv2.imread(img1_path) + img2 = cv2.imread(img2_path) + + if img1 is None or img2 is None: + raise ValueError("Could not load one or both images") + + # Ensure images have same dimensions + if img1.shape != img2.shape: + return False, 0.0 + + # Convert to grayscale if not multichannel + if not self.multichannel: + img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY) + img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY) + + # Compute SSIM + score = ssim( + img1, img2, + win_size=self.win_size, + data_range=self.dynamic_range, + multichannel=self.multichannel + ) + + return score >= self.threshold, score \ No newline at end of file diff --git a/imagecompare/core/preprocessing.py b/imagecompare/core/preprocessing.py new file mode 100644 index 0000000..479eee6 --- /dev/null +++ b/imagecompare/core/preprocessing.py @@ -0,0 +1,129 @@ +import cv2 +import numpy as np +from pathlib import Path +from typing import Optional, Tuple, Union +import logging + +# Set up logging +logger = logging.getLogger(__name__) + +def normalize_for_comparison( + img_path: Union[str, Path, np.ndarray], + target_size: Optional[Tuple[int, int]] = (256, 256), + normalize_method: str = "robust", + convert_to: str = "LAB" +) -> np.ndarray: + """ + Normalize an image for comparison by: + 1. Loading/resizing (if needed) + 2. Converting to specified color space + 3. Applying normalization + + Args: + img_path: Path to image or numpy array + target_size: Optional resize dimensions (width, height) + normalize_method: "minmax", "zscore", or "robust" (default) + convert_to: Color space ("BGR", "LAB", "HSV", "GRAY") + + Returns: + Normalized image in float32 format (0-1 range) + + Raises: + ValueError: If image cannot be loaded or invalid parameters + """ + try: + # Load image if path provided + if isinstance(img_path, (str, Path)): + img = cv2.imread(str(img_path)) + if img is None: + raise ValueError(f"Could not load image from {img_path}") + else: + img = img_path.copy() + + # Validate image + if len(img.shape) not in (2, 3): + raise ValueError(f"Invalid image dimensions: {img.shape}") + + # Convert grayscale to BGR if needed + if len(img.shape) == 2 or img.shape[2] == 1: + img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + + # Resize if requested + if target_size is not None: + img = cv2.resize(img, target_size, interpolation=cv2.INTER_AREA) + + # Convert color space + img = _convert_color_space(img, convert_to) + + # Normalize channels + img = _normalize_channels(img, normalize_method) + + return img.astype(np.float32) + + except Exception as e: + logger.error(f"Normalization failed: {str(e)}") + raise + +def _convert_color_space(img: np.ndarray, space: str) -> np.ndarray: + """Convert image to specified color space""" + space = space.upper() + if space == "BGR": + return img + elif space == "LAB": + return cv2.cvtColor(img, cv2.COLOR_BGR2LAB) + elif space == "HSV": + return cv2.cvtColor(img, cv2.COLOR_BGR2HSV) + elif space == "GRAY": + return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)[:,:,np.newaxis] + else: + raise ValueError(f"Unsupported color space: {space}") + +def _normalize_channels( + img: np.ndarray, + method: str, + clip: bool = True +) -> np.ndarray: + """ + Normalize image channels using specified method + + Args: + img: Input image (H,W,C) + method: Normalization method + clip: Whether to clip values to [0,1] range + + Returns: + Normalized image in float32 + """ + img = img.astype(np.float32) + normalized = np.zeros_like(img) + + for c in range(img.shape[2]): + channel = img[:,:,c] + + if method == "minmax": + min_val, max_val = np.min(channel), np.max(channel) + if max_val - min_val > 0: + normalized[:,:,c] = (channel - min_val) / (max_val - min_val) + + elif method == "zscore": + mean, std = np.mean(channel), np.std(channel) + if std > 0: + normalized[:,:,c] = (channel - mean) / std + else: + normalized[:,:,c] = channel - mean + + elif method == "robust": + lower, upper = np.percentile(channel, [1, 99]) + if upper - lower > 0: + normalized[:,:,c] = np.clip(channel, lower, upper) + normalized[:,:,c] = (normalized[:,:,c] - lower) / (upper - lower) + else: + normalized[:,:,c] = channel - lower + + else: + raise ValueError(f"Unknown normalization method: {method}") + + if clip: + normalized = np.clip(normalized, 0, 1) + + return normalized \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7e414ec --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,13 @@ +# pyproject.toml +[build-system] +requires = ["setuptools>=42"] +build-backend = "setuptools.build_meta" + +[project] +name = "imagecompare" +version = "0.1.0" +description = "Image comparison toolkit with multiple algorithms and interfaces" + +[project.optional-dependencies] +cli = ["click>=8.0"] # Only needed for CLI +web = ["fastapi>=0.85", "uvicorn>=0.19"] # Only needed for web API \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..06829e5 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,19 @@ +import os +import unittest + +class ImageComparisonTestCase(unittest.TestCase): + """Base class for image comparison tests""" + + @classmethod + def setUpClass(cls): + """Set up test data paths""" + cls.test_dir = os.path.dirname(os.path.abspath(__file__)) + cls.test_data_dir = os.path.join(cls.test_dir, 'test_data', 'images', 'base') + + # Ensure test directory exists + if not os.path.exists(cls.test_data_dir): + os.makedirs(cls.test_data_dir, exist_ok=True) + + def get_image_path(self, filename): + """Get full path to test image""" + return os.path.join(self.test_data_dir, filename) \ No newline at end of file diff --git a/tests/run_tests.py b/tests/run_tests.py new file mode 100644 index 0000000..3f45449 --- /dev/null +++ b/tests/run_tests.py @@ -0,0 +1,11 @@ +import unittest + +def run_all_tests(): + """Discover and run all tests""" + loader = unittest.TestLoader() + suite = loader.discover('tests', pattern='test_*.py') + runner = unittest.TextTestRunner(verbosity=2) + runner.run(suite) + +if __name__ == '__main__': + run_all_tests() \ No newline at end of file diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 0000000..3d04b27 --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,58 @@ +import unittest +import os +from fastapi.testclient import TestClient +from imagecompare.api.web import app + +class TestAPIEndpoints(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.client = TestClient(app) + cls.test_data_dir = os.path.join(os.path.dirname(__file__), 'test_data', 'images') + + # Paths to test images + cls.identical1 = os.path.join(cls.test_data_dir, 'base', 'empuran_prithvi_1.jpg') + cls.identical2 = os.path.join(cls.test_data_dir, 'variants', 'empuran_prithvi_variant_1.jpg') + cls.different = os.path.join(cls.test_data_dir, 'base', 'thudarum_1.png') + + def test_compare_endpoint(self): + """Test basic comparison endpoint""" + with open(self.identical1, 'rb') as f1, open(self.identical2, 'rb') as f2: + response = self.client.post( + "/compare?method=phash", + files={"file1": f1, "file2": f2} + ) + self.assertEqual(response.status_code, 200) + self.assertTrue(response.json()["match"]) + self.assertGreaterEqual(response.json()["confidence"], 0.9) + + def test_cascading_compare(self): + """Test the cascading comparison endpoint""" + with open(self.identical1, 'rb') as f1, open(self.different, 'rb') as f2: + response = self.client.post( + "/compare/cascade", + files={"file1": f1, "file2": f2} + ) + self.assertEqual(response.status_code, 200) + results = response.json() + self.assertIn("best_match", results) + self.assertIn("all_results", results) + self.assertFalse(results["best_match"]["match"]) + + def test_invalid_method(self): + """Test with invalid comparison method""" + with open(self.identical1, 'rb') as f1, open(self.identical2, 'rb') as f2: + response = self.client.post( + "/compare?method=invalid", + files={"file1": f1, "file2": f2} + ) + self.assertEqual(response.status_code, 400) + self.assertIn("Invalid method", response.json()["detail"]) + + def test_missing_file(self): + """Test with missing file upload""" + with open(self.identical1, 'rb') as f1: + response = self.client.post( + "/compare?method=phash", + files={"file1": f1} # Missing file2 + ) + self.assertEqual(response.status_code, 422) # Unprocessable Entity \ No newline at end of file diff --git a/tests/test_data/images/base/empuran_prithvi_1.jpg b/tests/test_data/images/base/empuran_prithvi_1.jpg new file mode 100644 index 0000000..e808a2f Binary files /dev/null and b/tests/test_data/images/base/empuran_prithvi_1.jpg differ diff --git a/tests/test_data/images/base/thudarum_1.png b/tests/test_data/images/base/thudarum_1.png new file mode 100644 index 0000000..234e27d Binary files /dev/null and b/tests/test_data/images/base/thudarum_1.png differ diff --git a/tests/test_data/images/variants/empuran_prithvi_variant_1.jpg b/tests/test_data/images/variants/empuran_prithvi_variant_1.jpg new file mode 100644 index 0000000..27a0d65 Binary files /dev/null and b/tests/test_data/images/variants/empuran_prithvi_variant_1.jpg differ diff --git a/tests/test_data/images/variants/empuran_prithvi_variant_2.jpg b/tests/test_data/images/variants/empuran_prithvi_variant_2.jpg new file mode 100644 index 0000000..ab1822c Binary files /dev/null and b/tests/test_data/images/variants/empuran_prithvi_variant_2.jpg differ diff --git a/tests/test_data/images/variants/empuran_prithvi_variant_3.jpg b/tests/test_data/images/variants/empuran_prithvi_variant_3.jpg new file mode 100644 index 0000000..6646367 Binary files /dev/null and b/tests/test_data/images/variants/empuran_prithvi_variant_3.jpg differ