- 将论文中Embedding model 替换为 BAAI/bge-small-en-v1.5 ,无需调用openai API
- 将 LLM 替换为Qwen3.5-9B,更加轻量化。单张5090即可跑起来
服务器上默认使用:
/code/melon/agentdojo
/root/autodl-tmp/models/Qwen3.5-9B
/root/autodl-tmp/models/bge-small-en-v1.5
/root/autodl-tmp/hf-cacheconda create -n agentdojo python=3.11 -y
conda activate agentdojo
cd /code/melon/agentdojo
python -m pip install -U pip
pip install -e ".[transformers]"conda create -n vllm-qwen35 python=3.12 -y
conda activate vllm-qwen35
python -m pip install -U pip
pip install -U vllm --extra-index-url https://wheels.vllm.ai/nightly如果是 5090 / Blackwell,不稳时用:
pip install -U vllm --extra-index-url https://wheels.vllm.ai/nightly/cu130启动前确认:
ls -lah /root/autodl-tmp/models/Qwen3.5-9B/config.json
ls -lah /root/autodl-tmp/models/bge-small-en-v1.5/config.jsonconda activate vllm-qwen35
export HF_HOME=/root/autodl-tmp/hf-cache
unset OMP_NUM_THREADS
export OMP_NUM_THREADS=1
vllm serve /root/autodl-tmp/models/Qwen3.5-9B \
--host 0.0.0.0 \
--port 8000 \
--tensor-parallel-size 1 \
--gpu-memory-utilization 0.88 \
--max-model-len 65536 \
--max-num-seqs 1 \
--max-cudagraph-capture-size 128 \
--enable-auto-tool-choice \
--tool-call-parser qwen3_coder \
--reasoning-parser qwen3 \
--default-chat-template-kwargs '{"enable_thinking": false}' \
--language-model-only 2>&1 | tee vllm.log检查:
curl http://127.0.0.1:8000/health
curl http://127.0.0.1:8000/v1/modelsconda activate /root/autodl-tmp/conda/envs/agentdojo
cd /code/melon/agentdojo
./util_scripts/run_melon_minimal.sh默认:
VLLM_PARSEDtool_knowledgemelonslack
conda activate /root/autodl-tmp/conda/envs/agentdojo
cd /code/melon/agentdojo
./util_scripts/run_melon_full.sh默认:
workspace slack travel bankingtool_knowledgemelonVLLM_PARSED
SUITE=workspace ./util_scripts/run_melon_minimal.sh
LOGDIR=./runs/test_min ./util_scripts/run_melon_minimal.sh
MELON_EMBED_DEVICE=cuda ./util_scripts/run_melon_minimal.shSUITES="slack workspace" ./util_scripts/run_melon_full.sh
FORCE_RERUN=1 ./util_scripts/run_melon_full.sh
LOGDIR=./runs/test_full ./util_scripts/run_melon_full.shtail -f vllm.log默认:
runs/qwen35-9b-melon-local-embed/benchmark.log默认:
runs/qwen35-9b-melon-full/benchmark_workspace.log
runs/qwen35-9b-melon-full/benchmark_slack.log
runs/qwen35-9b-melon-full/benchmark_travel.log
runs/qwen35-9b-melon-full/benchmark_banking.logThis project is built on MELON (Zhu et al., ICML 2025).
@inproceedings{zhu2025melon,
title={MELON: Provable Defense Against Indirect Prompt Injection Attacks in AI Agents},
author={Zhu, Kaijie and Yang, Xianjun and Wang, Jindong and Guo, Wenbo and Wang, William Yang},
booktitle={International Conference on Machine Learning},
year={2025}
}