Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,10 @@ Thumbs.db
.github/

# Documentation
README.md
LICENSE
# README.md and LICENSE are needed for Poetry package metadata
*.md
!README.md
!LICENSE
docs/

# Temporary files
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/docker-build-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,22 +93,22 @@ jobs:

# Test help command
echo "Testing --help command..."
docker run --rm ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} run.py --help
docker run --rm ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} vector-db-benchmark --help

# Test Python environment
echo "Testing Python environment..."
docker run --rm ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} -c "import sys; print(f'Python {sys.version}'); import redis; print('Redis module available')"
docker run --rm --entrypoint python ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} -c "import sys; print(f'Python {sys.version}'); import redis; print('Redis module available')"

# Test Redis connectivity
echo "Testing Redis connectivity..."
docker run --rm --network host ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} \
docker run --rm --network host --entrypoint python ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} \
-c "import redis; r = redis.Redis(host='localhost', port=6379); r.ping(); print('Redis connection successful')"

# Test benchmark execution with specific configuration
echo "Testing benchmark execution with redis-m-16-ef-64 configuration..."
mkdir -p ./test-results
docker run --rm --network host -v "$(pwd)/test-results:/app/results" ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} \
run.py --host localhost --engines redis --dataset random-100 --experiment redis-m-16-ef-64 --skip-upload --skip-search || echo "Benchmark test completed (expected to fail without proper dataset setup)"
docker run --rm --network host -v "$(pwd)/test-results:/code/results" ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} \
vector-db-benchmark --host localhost --engines redis --dataset random-100 --experiment redis-m-16-ef-64 --skip-upload --skip-search || echo "Benchmark test completed (expected to fail without proper dataset setup)"

echo "✅ Docker image tests passed!"

Expand Down
43 changes: 43 additions & 0 deletions .github/workflows/publish-pypi.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name: Publish to PyPI
on:
release:
types: [published]
workflow_dispatch: # Allow manual triggering

jobs:
pypi:
name: Publish to PyPI
runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/vector-benchmark
permissions:
contents: read
id-token: write
steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'

- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: latest
virtualenvs-create: true
virtualenvs-in-project: true

- name: Build package
run: poetry build

- name: Check package
run: |
pip install twine
twine check dist/*

- name: Publish to PyPI
env:
POETRY_PYPI_TOKEN_PYPI: ${{ secrets.PYPI_TOKEN }}
run: poetry publish
40 changes: 22 additions & 18 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,15 @@ WORKDIR /code

# Copy dependency files first for better caching
COPY poetry.lock pyproject.toml /code/
COPY README.md /code/

# Copy package directories needed by Poetry
COPY benchmark /code/benchmark
COPY dataset_reader /code/dataset_reader
COPY engine /code/engine
COPY datasets /code/datasets
COPY experiments /code/experiments
COPY run.py /code/run.py

# Configure Poetry and install dependencies
RUN poetry config virtualenvs.create false \
Expand All @@ -38,7 +47,7 @@ RUN poetry config virtualenvs.create false \
# Install additional dependencies
RUN pip install "boto3"

# Copy source code
# Copy remaining source code
COPY . /code

# Store Git information
Expand All @@ -65,31 +74,28 @@ RUN apt-get update && apt-get install -y \


# Set working directory
WORKDIR /app
WORKDIR /code

# Copy Python environment from builder
COPY --from=builder /usr/local/lib/python3.10/site-packages /usr/local/lib/python3.10/site-packages
COPY --from=builder /usr/local/bin /usr/local/bin

# Copy application code
COPY --from=builder /code /app
COPY --from=builder /code /code

# Create directories with proper permissions
RUN mkdir -p /app/results /app/datasets && \

chmod -R 777 /app/results /app/datasets && \
chmod -R 755 /app
RUN mkdir -p /code/results /code/datasets && \
chmod -R 777 /code/results /code/datasets && \
chmod -R 755 /code

# Create entrypoint script to handle user permissions
RUN echo '#!/bin/bash\n\
# Handle user permissions for volume mounts\n\
if [ "$1" = "run.py" ]; then\n\
# Ensure results directory is writable\n\
mkdir -p /app/results\n\
chmod 777 /app/results\n\
fi\n\
exec python "$@"' > /app/entrypoint.sh && \
chmod +x /app/entrypoint.sh
# Ensure results directory is writable\n\
mkdir -p /code/results\n\
chmod 777 /code/results\n\
exec "$@"' > /code/entrypoint.sh && \
chmod +x /code/entrypoint.sh


# Health check
Expand All @@ -100,10 +106,8 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
EXPOSE 6379 6380

# Set entrypoint

ENTRYPOINT ["/app/entrypoint.sh"]

ENTRYPOINT ["/code/entrypoint.sh"]

# Default command (show help)
CMD ["run.py", "--help"]
CMD ["vector-db-benchmark", "--help"]

8 changes: 4 additions & 4 deletions benchmark/dataset.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def download(self):
except botocore.exceptions.NoCredentialsError:
print("Credentials not found, downloading without boto3")
if not downloaded_withboto:
print(f"Downloading from URL {self.config.link}...")
print(f"Downloading from URL {self.config.link} to {target_path}...")
tmp_path, _ = download_with_headers(
self.config.link, None, show_progress
)
Expand All @@ -191,7 +191,7 @@ def _download_file(self, relative_path: str, url: str):
print(f"{target_path} already exists")
return

print(f"Downloading from {url} to {target_path}")
print(f"Downloading from {url} to {target_path}...")
tmp_path, _ = download_with_headers(url, None, show_progress)
self._extract_or_move_file(tmp_path, target_path)

Expand All @@ -210,7 +210,7 @@ def _extract_or_move_file(self, tmp_path, target_path):
final_target_path = str(target_path)[:-4] # Remove .bz2
else:
final_target_path = target_path

with bz2.BZ2File(tmp_path, 'rb') as f_in:
with open(final_target_path, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
Expand All @@ -227,7 +227,7 @@ def _download_from_s3(self, link, target_path):
tmp_path = f"/tmp/{os.path.basename(s3_key)}"

print(
f"Downloading from S3: {link}... bucket_name={bucket_name}, s3_key={s3_key}"
f"Downloading from S3: {link} to {target_path}... (bucket={bucket_name}, key={s3_key})"
)
object_info = s3.head_object(Bucket=bucket_name, Key=s3_key)
total_size = object_info["ContentLength"]
Expand Down
2 changes: 2 additions & 0 deletions datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# This file makes the datasets directory a Python package

18 changes: 9 additions & 9 deletions docker-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ print_step "Testing basic functionality..."

# Test help command
print_info "Testing --help command..."
if docker run --rm "$FULL_IMAGE_NAME" run.py --help > /dev/null; then
if docker run --rm "$FULL_IMAGE_NAME" vector-db-benchmark --help > /dev/null; then
print_info "✅ Help command works"
else
print_error "❌ Help command failed"
Expand All @@ -70,7 +70,7 @@ fi

# Test Python environment
print_info "Testing Python environment..."
if docker run --rm "$FULL_IMAGE_NAME" -c "import sys; print(f'Python {sys.version}'); import redis; print('Redis module available')" > /dev/null; then
if docker run --rm --entrypoint python "$FULL_IMAGE_NAME" -c "import sys; print(f'Python {sys.version}'); import redis; print('Redis module available')" > /dev/null; then
print_info "✅ Python environment works"
else
print_error "❌ Python environment test failed"
Expand All @@ -91,14 +91,14 @@ if docker run -d --name "$REDIS_CONTAINER_NAME" -p 6379:6379 redis:8.2-rc1-bookw
sleep 5

# Test basic connection
if timeout 10 docker run --rm --network=host "$FULL_IMAGE_NAME" \
if timeout 10 docker run --rm --network=host --entrypoint python "$FULL_IMAGE_NAME" \
-c "import redis; r = redis.Redis(host='localhost', port=6379); r.ping(); print('Redis connection successful')" > /dev/null 2>&1; then
print_info "✅ Redis connectivity test passed"

# Test benchmark execution with specific configuration
print_info "Testing benchmark execution with redis-default-simple configuration..."
if timeout 120 docker run --rm --network=host -v "$(pwd)/results:/app/results" "$FULL_IMAGE_NAME" \
run.py --host localhost --engines redis --dataset random-100 --experiment redis-default-simple > /dev/null 2>&1; then
if timeout 120 docker run --rm --network=host -v "$(pwd)/results:/code/results" "$FULL_IMAGE_NAME" \
vector-db-benchmark --host localhost --engines redis --dataset random-100 --experiment redis-default-simple > /dev/null 2>&1; then
print_info "✅ Benchmark execution test passed"
else
print_warning "⚠️ Benchmark execution test failed (this may be expected without proper dataset setup)"
Expand All @@ -118,8 +118,8 @@ fi
# Step 4: Test file output permissions
print_step "Testing file output permissions..."
TEMP_DIR=$(mktemp -d)
if docker run --rm -v "$TEMP_DIR:/app/results" "$FULL_IMAGE_NAME" \
-c "import os; os.makedirs('/app/results', exist_ok=True); open('/app/results/test.txt', 'w').write('test'); print('File write successful')" > /dev/null 2>&1; then
if docker run --rm -v "$TEMP_DIR:/code/results" --entrypoint python "$FULL_IMAGE_NAME" \
-c "import os; os.makedirs('/code/results', exist_ok=True); open('/code/results/test.txt', 'w').write('test'); print('File write successful')" > /dev/null 2>&1; then
if [ -f "$TEMP_DIR/test.txt" ]; then
print_info "✅ File output test passed"
else
Expand All @@ -137,8 +137,8 @@ print_info "Image size: $IMAGE_SIZE"

# Step 6: Test benchmark configuration loading
print_step "Testing benchmark configuration loading..."
if docker run --rm "$FULL_IMAGE_NAME" \
-c "import json; import os; print('Configuration loading test'); print(os.listdir('/app'))" > /dev/null 2>&1; then
if docker run --rm --entrypoint python "$FULL_IMAGE_NAME" \
-c "import json; import os; print('Configuration loading test'); print(os.listdir('/code'))" > /dev/null 2>&1; then
print_info "✅ Configuration loading test passed"
else
print_warning "⚠️ Configuration loading test completed with warnings"
Expand Down
2 changes: 2 additions & 0 deletions experiments/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# This file makes the experiments directory a Python package

2 changes: 2 additions & 0 deletions experiments/configurations/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# This file makes the configurations directory a Python package

39 changes: 36 additions & 3 deletions pyproject.toml
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,8 +1,38 @@
[tool.poetry]
name = "vector-db-benchmark"
name = "vector-benchmark"
version = "0.1.0"
description = ""
authors = ["Kacper Łukawski <kacper.lukawski@qdrant.com>"]
description = "Benchmark suite for vector databases with Redis support. Forked from the original vector-db-benchmark project."
authors = ["Redis Performance Team <performance@redis.com>"]
readme = "README.md"
license = "LICENSE"
homepage = "https://github.com/redislabs/vector-db-benchmark"
repository = "https://github.com/redislabs/vector-db-benchmark"
keywords = ["vector", "database", "benchmark", "redis", "similarity-search"]
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"Topic :: Software Development :: Testing",
"Topic :: Database",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
]
packages = [
{ include = "benchmark" },
{ include = "dataset_reader" },
{ include = "engine" },
]
include = [
"run.py",
"datasets/__init__.py",
"datasets/datasets.json",
"datasets/random-100/**/*",
"experiments/__init__.py",
"experiments/configurations/**/*",
]

[tool.poetry.dependencies]
python = ">=3.9,<3.14"
Expand All @@ -29,6 +59,9 @@ boto3 = "^1.39.4"
pre-commit = "^2.20.0"
pytest = "^7.1"

[tool.poetry.scripts]
vector-db-benchmark = "run:app"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
17 changes: 8 additions & 9 deletions run.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -83,16 +83,15 @@ def run(
verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed information when using --describe"),
):
"""
Run vector database benchmarks.

Examples:
# Use pattern matching to select engines (original behavior)
python3 run.py --engines *-m-16-* --engines qdrant-* --datasets glove-*

# Use engines from a specific JSON file
python3 run.py --engines-file my_engines.json --datasets glove-*

# Describe available options
python3 run.py --describe datasets
python3 run.py --describe engines --verbose

vector-db-benchmark --engines redis-* --datasets glove-*

vector-db-benchmark --describe datasets

vector-db-benchmark --describe engines --verbose
"""
# Handle describe option first
if describe:
Expand Down