From 263ae9495781c9f6c68892750249a7ee964975b8 Mon Sep 17 00:00:00 2001 From: Jianning Wang Date: Thu, 12 Feb 2026 16:10:46 +0800 Subject: [PATCH] ci: continuous benching --- .github/workflows/continuous_bench.yml | 26 ++++++++ .github/workflows/scripts/run_vdb.sh | 88 ++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 .github/workflows/continuous_bench.yml create mode 100644 .github/workflows/scripts/run_vdb.sh diff --git a/.github/workflows/continuous_bench.yml b/.github/workflows/continuous_bench.yml new file mode 100644 index 00000000..ecf35aa9 --- /dev/null +++ b/.github/workflows/continuous_bench.yml @@ -0,0 +1,26 @@ +name: Continuous Benchmark +on: + push: + branches: [ "main", "ci/continuous_bench_squash" ] + paths-ignore: + - '**.md' + workflow_dispatch: + +concurrency: + group: cb-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + benchmark: + runs-on: vdbbench + steps: + - uses: actions/checkout@v4 + + - name: Run VectorDBBench + env: + DATABASE_URL: ${{ secrets.DATABASE_URL }} + run: | + bash .github/workflows/scripts/run_vdb.sh \ No newline at end of file diff --git a/.github/workflows/scripts/run_vdb.sh b/.github/workflows/scripts/run_vdb.sh new file mode 100644 index 00000000..f153a598 --- /dev/null +++ b/.github/workflows/scripts/run_vdb.sh @@ -0,0 +1,88 @@ +set -e + +QUANTIZE_TYPE_LIST="int8 int4 fp16 fp32" +CASE_TYPE_LIST="Performance768D1M Performance768D10M Performance1536D500K" # respectively test cosine, ip # Performance960D1M l2 metrics +LOG_FILE="bench.log" +DATE=$(date +%Y-%m-%d_%H-%M-%S) +NPROC=$(nproc 2>/dev/null || getconf _NPROCESSORS_ONLN 2>/dev/null || echo 2) + +# COMMIT_ID = branch-date-sha +COMMIT_ID=${GITHUB_REF_NAME}-"$DATE"-$(echo ${GITHUB_WORKFLOW_SHA} | cut -c1-8) +COMMIT_ID=$(echo "$COMMIT_ID" | sed 's/\//_/g') +echo "COMMIT_ID: $COMMIT_ID" +echo "GITHUB_WORKFLOW_SHA: $GITHUB_WORKFLOW_SHA" +echo "workspace: $GITHUB_WORKSPACE" +DB_LABEL_PREFIX="Zvec16c64g-$COMMIT_ID" + +# install zvec +git submodule update --init + +# for debug +#cd .. +#export SKBUILD_BUILD_DIR="$GITHUB_WORKSPACE/../build" +pwd + +python3 -m venv .venv +source .venv/bin/activate +pip install cmake ninja psycopg2-binary loguru fire +pip install -e /opt/VectorDBBench + +CMAKE_GENERATOR="Unix Makefiles" \ +CMAKE_BUILD_PARALLEL_LEVEL="$NPROC" \ +pip install -v "$GITHUB_WORKSPACE" + +for CASE_TYPE in $CASE_TYPE_LIST; do + echo "Running VectorDBBench for $CASE_TYPE" + DATASET_DESC="" + if [ "$CASE_TYPE" == "Performance768D1M" ]; then + DATASET_DESC="Performance768D1M - Cohere Cosine" + elif [ "$CASE_TYPE" == "Performance768D10M" ]; then + DATASET_DESC="Performance768D10M - Cohere Cosine" + else + DATASET_DESC="Performance1536D500K - OpenAI IP" + fi + + for QUANTIZE_TYPE in $QUANTIZE_TYPE_LIST; do + DB_LABEL="$DB_LABEL_PREFIX-$CASE_TYPE-$QUANTIZE_TYPE" + echo "Running VectorDBBench for $DB_LABEL" + + VDB_PARAMS="--path ${DB_LABEL} --db-label ${DB_LABEL} --case-type ${CASE_TYPE} --num-concurrency 12,14,16,18,20" + if [ "$CASE_TYPE" == "Performance768D1M" ]; then + VDB_PARAMS="${VDB_PARAMS} --m 15 --ef-search 180" + elif [ "$CASE_TYPE" == "Performance768D10M" ]; then + VDB_PARAMS="${VDB_PARAMS} --m 50 --ef-search 118 --is-using-refiner" + else #Performance1536D500K using default params + refiner to monitor performance degradation + VDB_PARAMS="${VDB_PARAMS} --m 50 --ef-search 100 --is-using-refiner" + fi + + if [ "$QUANTIZE_TYPE" == "fp32" ]; then + vectordbbench zvec ${VDB_PARAMS} 2>&1 | tee $LOG_FILE + else + vectordbbench zvec ${VDB_PARAMS} --quantize-type "${QUANTIZE_TYPE}" 2>&1 | tee $LOG_FILE + fi + + RESULT_JSON_PATH=$(grep -o "/opt/VectorDBBench/.*\.json" $LOG_FILE) + QPS=$(jq -r '.results[0].metrics.qps' "$RESULT_JSON_PATH") + RECALL=$(jq -r '.results[0].metrics.recall' "$RESULT_JSON_PATH") + LATENCY_P99=$(jq -r '.results[0].metrics.serial_latency_p99' "$RESULT_JSON_PATH") + LOAD_DURATION=$(jq -r '.results[0].metrics.load_duration' "$RESULT_JSON_PATH") + + #quote the var to avoid space in the label + label_list="case_type=\"${CASE_TYPE}\",dataset_desc=\"${DATASET_DESC}\",db_label=\"${DB_LABEL}\",commit=\"${COMMIT_ID}\",date=\"${DATE}\",quantize_type=\"${QUANTIZE_TYPE}\"" + # replace `/` with `_` in label_list + label_list=$(echo "$label_list" | sed 's/\//_/g') + cat < prom_metrics.txt + # TYPE vdb_bench_qps gauge + vdb_bench_qps{$label_list} $QPS + # TYPE vdb_bench_recall gauge + vdb_bench_recall{$label_list} $RECALL + # TYPE vdb_bench_latency_p99 gauge + vdb_bench_latency_p99{$label_list} $LATENCY_P99 + # TYPE vdb_bench_load_duration gauge + vdb_bench_load_duration{$label_list} $LOAD_DURATION +EOF + echo "prom_metrics:" + cat prom_metrics.txt + curl --data-binary @prom_metrics.txt "http://47.93.34.27:9091/metrics/job/benchmarks-${CASE_TYPE}/case_type/${CASE_TYPE}/quantize_type/${QUANTIZE_TYPE}" -v + done +done \ No newline at end of file