-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathlivecodebenchv5_eval.sh
More file actions
69 lines (59 loc) · 2.23 KB
/
livecodebenchv5_eval.sh
File metadata and controls
69 lines (59 loc) · 2.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env bash
set -euo pipefail
for cmd in python3 evalscope; do
command -v "$cmd" >/dev/null 2>&1 || {
echo "[ERROR] $cmd is required but was not found in PATH." >&2
exit 1
}
done
: "${MODEL_PATH:?Set MODEL_PATH to the weights directory or checkpoint.}"
SERVED_NAME=${SERVED_NAME:-$(basename "$MODEL_PATH")}
API_PORT=${API_PORT:-8801}
TP_SIZE=${TP_SIZE:-1}
WARMUPS=${WARMUPS:-1}
MAX_RUNNING_REQUESTS=${MAX_RUNNING_REQUESTS:-8}
CHUNKED_PREFILL_SIZE=${CHUNKED_PREFILL_SIZE:-4096}
SERVER_READY_WAIT=${SERVER_READY_WAIT:-5}
EVAL_BATCH=${EVAL_BATCH:-8}
EVAL_TIMEOUT=${EVAL_TIMEOUT:-18000}
EVAL_DATASET=${EVAL_DATASET:-live_code_bench}
EVAL_WORK_DIR=${EVAL_WORK_DIR:-$(pwd)}
API_KEY=${API_KEY:-EMPTY}
if [[ -z "${DATASET_ARGS:-}" ]]; then
if [[ -n "${DATASET_LOCAL:-}" ]]; then
DATASET_ARGS=$(printf '{"live_code_bench": {"local_path": "%s", "filters": {"remove_until": "</think>"}, "extra_params": {"start_date": "2024-08-01", "end_date": "2025-02-01"}}}' "$DATASET_LOCAL")
else
DATASET_ARGS='{"live_code_bench": {"filters": {"remove_until": "</think>"}, "extra_params": {"start_date": "2024-08-01", "end_date": "2025-02-01"}}}'
fi
fi
if [[ -z "${GENERATION_CONFIG:-}" ]]; then
GENERATION_CONFIG=$(printf '{"do_sample": true, "temperature": %s, "top_p": %s, "max_new_tokens": %s, "n": %s}' \
"${TEMPERATURE:-0.65}" \
"${TOP_P:-0.95}" \
"${MAX_NEW_TOKENS:-27000}" \
"${GENERATED_NUM:-8}")
fi
python3 -m sglang.launch_server \
--model "${MODEL_PATH}" \
--served-model-name "${SERVED_NAME}" \
--port "${API_PORT}" \
--trust-remote-code \
--tensor-parallel-size "${TP_SIZE}" \
--warmups "${WARMUPS}" \
--max-running-requests "${MAX_RUNNING_REQUESTS}" \
--chunked-prefill-size "${CHUNKED_PREFILL_SIZE}" &
SERVER_PID=$!
sleep "${SERVER_READY_WAIT}"
evalscope eval \
--model "${SERVED_NAME}" \
--generation-config "${GENERATION_CONFIG}" \
--api-url "http://127.0.0.1:${API_PORT}/v1/chat/completions" \
--api-key "${API_KEY}" \
--eval-type service \
--work-dir "${EVAL_WORK_DIR}" \
--datasets "${EVAL_DATASET}" \
--dataset-args "${DATASET_ARGS}" \
--eval-batch-size "${EVAL_BATCH}" \
--timeout "${EVAL_TIMEOUT}"
kill "$SERVER_PID" >/dev/null 2>&1 || true
wait "$SERVER_PID" >/dev/null 2>&1 || true