redis-performance · fcostaoliveira · Feb 12, 2026 · Feb 11, 2026 · Feb 11, 2026 · Feb 11, 2026
diff --git a/.dockerignore b/.dockerignore
@@ -84,9 +84,10 @@ Thumbs.db
 .github/
 
 # Documentation
-README.md
-LICENSE
+# README.md and LICENSE are needed for Poetry package metadata
 *.md
+!README.md
+!LICENSE
 docs/
 
 # Temporary files

diff --git a/.github/workflows/docker-build-pr.yml b/.github/workflows/docker-build-pr.yml
@@ -93,22 +93,22 @@ jobs:
 
           # Test help command
           echo "Testing --help command..."
-          docker run --rm ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} run.py --help
+          docker run --rm ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} vector-db-benchmark --help
 
           # Test Python environment
           echo "Testing Python environment..."
-          docker run --rm ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} -c "import sys; print(f'Python {sys.version}'); import redis; print('Redis module available')"
+          docker run --rm --entrypoint python ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} -c "import sys; print(f'Python {sys.version}'); import redis; print('Redis module available')"
 
           # Test Redis connectivity
           echo "Testing Redis connectivity..."
-          docker run --rm --network host ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} \
+          docker run --rm --network host --entrypoint python ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} \
             -c "import redis; r = redis.Redis(host='localhost', port=6379); r.ping(); print('Redis connection successful')"
 
           # Test benchmark execution with specific configuration
           echo "Testing benchmark execution with redis-m-16-ef-64 configuration..."
           mkdir -p ./test-results
-          docker run --rm --network host -v "$(pwd)/test-results:/app/results" ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} \
-            run.py --host localhost --engines redis --dataset random-100 --experiment redis-m-16-ef-64 --skip-upload --skip-search || echo "Benchmark test completed (expected to fail without proper dataset setup)"
+          docker run --rm --network host -v "$(pwd)/test-results:/code/results" ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} \
+            vector-db-benchmark --host localhost --engines redis --dataset random-100 --experiment redis-m-16-ef-64 --skip-upload --skip-search || echo "Benchmark test completed (expected to fail without proper dataset setup)"
 
           echo "✅ Docker image tests passed!"
 

diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
@@ -0,0 +1,43 @@
+name: Publish to PyPI
+on:
+  release:
+    types: [published]
+  workflow_dispatch:  # Allow manual triggering
+
+jobs:
+  pypi:
+    name: Publish to PyPI
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/vector-benchmark
+    permissions:
+      contents: read
+      id-token: write
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.10'
+
+    - name: Install Poetry
+      uses: snok/install-poetry@v1
+      with:
+        version: latest
+        virtualenvs-create: true
+        virtualenvs-in-project: true
+
+    - name: Build package
+      run: poetry build
+
+    - name: Check package
+      run: |
+        pip install twine
+        twine check dist/*
+
+    - name: Publish to PyPI
+      env:
+        POETRY_PYPI_TOKEN_PYPI: ${{ secrets.PYPI_TOKEN }}
+      run: poetry publish
diff --git a/Dockerfile b/Dockerfile
@@ -30,6 +30,15 @@ WORKDIR /code
 
 # Copy dependency files first for better caching
 COPY poetry.lock pyproject.toml /code/
+COPY README.md /code/
+
+# Copy package directories needed by Poetry
+COPY benchmark /code/benchmark
+COPY dataset_reader /code/dataset_reader
+COPY engine /code/engine
+COPY datasets /code/datasets
+COPY experiments /code/experiments
+COPY run.py /code/run.py
 
 # Configure Poetry and install dependencies
 RUN poetry config virtualenvs.create false \
@@ -38,7 +47,7 @@ RUN poetry config virtualenvs.create false \
 # Install additional dependencies
 RUN pip install "boto3"
 
-# Copy source code
+# Copy remaining source code
 COPY . /code
 
 # Store Git information
@@ -65,31 +74,28 @@ RUN apt-get update && apt-get install -y \
 
 
 # Set working directory
-WORKDIR /app
+WORKDIR /code
 
 # Copy Python environment from builder
 COPY --from=builder /usr/local/lib/python3.10/site-packages /usr/local/lib/python3.10/site-packages
 COPY --from=builder /usr/local/bin /usr/local/bin
 
 # Copy application code
-COPY --from=builder /code /app
+COPY --from=builder /code /code
 
 # Create directories with proper permissions
-RUN mkdir -p /app/results /app/datasets && \
-
-    chmod -R 777 /app/results /app/datasets && \
-    chmod -R 755 /app
+RUN mkdir -p /code/results /code/datasets && \
+    chmod -R 777 /code/results /code/datasets && \
+    chmod -R 755 /code
 
 # Create entrypoint script to handle user permissions
 RUN echo '#!/bin/bash\n\
 # Handle user permissions for volume mounts\n\
-if [ "$1" = "run.py" ]; then\n\
-    # Ensure results directory is writable\n\
-    mkdir -p /app/results\n\
-    chmod 777 /app/results\n\
-fi\n\
-exec python "$@"' > /app/entrypoint.sh && \
-    chmod +x /app/entrypoint.sh
+# Ensure results directory is writable\n\
+mkdir -p /code/results\n\
+chmod 777 /code/results\n\
+exec "$@"' > /code/entrypoint.sh && \
+    chmod +x /code/entrypoint.sh
 
 
 # Health check
@@ -100,10 +106,8 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
 EXPOSE 6379 6380
 
 # Set entrypoint
-
-ENTRYPOINT ["/app/entrypoint.sh"]
-
+ENTRYPOINT ["/code/entrypoint.sh"]
 
 # Default command (show help)
-CMD ["run.py", "--help"]
+CMD ["vector-db-benchmark", "--help"]
 
diff --git a/benchmark/dataset.py b/benchmark/dataset.py
@@ -179,7 +179,7 @@ def download(self):
                     except botocore.exceptions.NoCredentialsError:
                         print("Credentials not found, downloading without boto3")
                 if not downloaded_withboto:
-                    print(f"Downloading from URL {self.config.link}...")
+                    print(f"Downloading from URL {self.config.link} to {target_path}...")
                     tmp_path, _ = download_with_headers(
                         self.config.link, None, show_progress
                     )
@@ -191,7 +191,7 @@ def _download_file(self, relative_path: str, url: str):
             print(f"{target_path} already exists")
             return
 
-        print(f"Downloading from {url} to {target_path}")
+        print(f"Downloading from {url} to {target_path}...")
         tmp_path, _ = download_with_headers(url, None, show_progress)
         self._extract_or_move_file(tmp_path, target_path)
 
@@ -210,7 +210,7 @@ def _extract_or_move_file(self, tmp_path, target_path):
                 final_target_path = str(target_path)[:-4]  # Remove .bz2
             else:
                 final_target_path = target_path
-            
+
             with bz2.BZ2File(tmp_path, 'rb') as f_in:
                 with open(final_target_path, 'wb') as f_out:
                     shutil.copyfileobj(f_in, f_out)
@@ -227,7 +227,7 @@ def _download_from_s3(self, link, target_path):
         tmp_path = f"/tmp/{os.path.basename(s3_key)}"
 
         print(
-            f"Downloading from S3: {link}... bucket_name={bucket_name}, s3_key={s3_key}"
+            f"Downloading from S3: {link} to {target_path}... (bucket={bucket_name}, key={s3_key})"
         )
         object_info = s3.head_object(Bucket=bucket_name, Key=s3_key)
         total_size = object_info["ContentLength"]

diff --git a/datasets/__init__.py b/datasets/__init__.py
@@ -0,0 +1,2 @@
+# This file makes the datasets directory a Python package
+
diff --git a/docker-test.sh b/docker-test.sh
@@ -61,7 +61,7 @@ print_step "Testing basic functionality..."
 
 # Test help command
 print_info "Testing --help command..."
-if docker run --rm "$FULL_IMAGE_NAME" run.py --help > /dev/null; then
+if docker run --rm "$FULL_IMAGE_NAME" vector-db-benchmark --help > /dev/null; then
     print_info "✅ Help command works"
 else
     print_error "❌ Help command failed"
@@ -70,7 +70,7 @@ fi
 
 # Test Python environment
 print_info "Testing Python environment..."
-if docker run --rm "$FULL_IMAGE_NAME" -c "import sys; print(f'Python {sys.version}'); import redis; print('Redis module available')" > /dev/null; then
+if docker run --rm --entrypoint python "$FULL_IMAGE_NAME" -c "import sys; print(f'Python {sys.version}'); import redis; print('Redis module available')" > /dev/null; then
     print_info "✅ Python environment works"
 else
     print_error "❌ Python environment test failed"
@@ -91,14 +91,14 @@ if docker run -d --name "$REDIS_CONTAINER_NAME" -p 6379:6379 redis:8.2-rc1-bookw
     sleep 5
 
     # Test basic connection
-    if timeout 10 docker run --rm --network=host "$FULL_IMAGE_NAME" \
+    if timeout 10 docker run --rm --network=host --entrypoint python "$FULL_IMAGE_NAME" \
         -c "import redis; r = redis.Redis(host='localhost', port=6379); r.ping(); print('Redis connection successful')" > /dev/null 2>&1; then
         print_info "✅ Redis connectivity test passed"
 
         # Test benchmark execution with specific configuration
         print_info "Testing benchmark execution with redis-default-simple configuration..."
-        if timeout 120 docker run --rm --network=host -v "$(pwd)/results:/app/results" "$FULL_IMAGE_NAME" \
-            run.py --host localhost --engines redis --dataset random-100 --experiment redis-default-simple > /dev/null 2>&1; then
+        if timeout 120 docker run --rm --network=host -v "$(pwd)/results:/code/results" "$FULL_IMAGE_NAME" \
+            vector-db-benchmark --host localhost --engines redis --dataset random-100 --experiment redis-default-simple > /dev/null 2>&1; then
             print_info "✅ Benchmark execution test passed"
         else
             print_warning "⚠️ Benchmark execution test failed (this may be expected without proper dataset setup)"
@@ -118,8 +118,8 @@ fi
 # Step 4: Test file output permissions
 print_step "Testing file output permissions..."
 TEMP_DIR=$(mktemp -d)
-if docker run --rm -v "$TEMP_DIR:/app/results" "$FULL_IMAGE_NAME" \
-    -c "import os; os.makedirs('/app/results', exist_ok=True); open('/app/results/test.txt', 'w').write('test'); print('File write successful')" > /dev/null 2>&1; then
+if docker run --rm -v "$TEMP_DIR:/code/results" --entrypoint python "$FULL_IMAGE_NAME" \
+    -c "import os; os.makedirs('/code/results', exist_ok=True); open('/code/results/test.txt', 'w').write('test'); print('File write successful')" > /dev/null 2>&1; then
     if [ -f "$TEMP_DIR/test.txt" ]; then
         print_info "✅ File output test passed"
     else
@@ -137,8 +137,8 @@ print_info "Image size: $IMAGE_SIZE"
 
 # Step 6: Test benchmark configuration loading
 print_step "Testing benchmark configuration loading..."
-if docker run --rm "$FULL_IMAGE_NAME" \
-    -c "import json; import os; print('Configuration loading test'); print(os.listdir('/app'))" > /dev/null 2>&1; then
+if docker run --rm --entrypoint python "$FULL_IMAGE_NAME" \
+    -c "import json; import os; print('Configuration loading test'); print(os.listdir('/code'))" > /dev/null 2>&1; then
     print_info "✅ Configuration loading test passed"
 else
     print_warning "⚠️ Configuration loading test completed with warnings"

diff --git a/experiments/__init__.py b/experiments/__init__.py
@@ -0,0 +1,2 @@
+# This file makes the experiments directory a Python package
+
diff --git a/experiments/configurations/__init__.py b/experiments/configurations/__init__.py
@@ -0,0 +1,2 @@
+# This file makes the configurations directory a Python package
+
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,8 +1,38 @@
 [tool.poetry]
-name = "vector-db-benchmark"
+name = "vector-benchmark"
 version = "0.1.0"
-description = ""
-authors = ["Kacper Łukawski <kacper.lukawski@qdrant.com>"]
+description = "Benchmark suite for vector databases with Redis support. Forked from the original vector-db-benchmark project."
+authors = ["Redis Performance Team <performance@redis.com>"]
+readme = "README.md"
+license = "LICENSE"
+homepage = "https://github.com/redislabs/vector-db-benchmark"
+repository = "https://github.com/redislabs/vector-db-benchmark"
+keywords = ["vector", "database", "benchmark", "redis", "similarity-search"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "Topic :: Software Development :: Testing",
+    "Topic :: Database",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+]
+packages = [
+    { include = "benchmark" },
+    { include = "dataset_reader" },
+    { include = "engine" },
+]
+include = [
+    "run.py",
+    "datasets/__init__.py",
+    "datasets/datasets.json",
+    "datasets/random-100/**/*",
+    "experiments/__init__.py",
+    "experiments/configurations/**/*",
+]
 
 [tool.poetry.dependencies]
 python = ">=3.9,<3.14"
@@ -29,6 +59,9 @@ boto3 = "^1.39.4"
 pre-commit = "^2.20.0"
 pytest = "^7.1"
 
+[tool.poetry.scripts]
+vector-db-benchmark = "run:app"
+
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
diff --git a/run.py b/run.py
@@ -83,16 +83,15 @@ def run(
     verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed information when using --describe"),
 ):
     """
+    Run vector database benchmarks.
+
     Examples:
-        # Use pattern matching to select engines (original behavior)
-        python3 run.py --engines *-m-16-* --engines qdrant-* --datasets glove-*
-
-        # Use engines from a specific JSON file
-        python3 run.py --engines-file my_engines.json --datasets glove-*
-
-        # Describe available options
-        python3 run.py --describe datasets
-        python3 run.py --describe engines --verbose
+
+      vector-db-benchmark --engines redis-* --datasets glove-*
+
+      vector-db-benchmark --describe datasets
+
+      vector-db-benchmark --describe engines --verbose
     """
     # Handle describe option first
     if describe:
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# This file makes the datasets directory a Python package
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# This file makes the experiments directory a Python package
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# This file makes the configurations directory a Python package