-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
45 lines (36 loc) · 1012 Bytes
/
docker-compose.yml
File metadata and controls
45 lines (36 loc) · 1012 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
version: '3.8'
services:
llm-proxy:
image: llm-proxy:latest
container_name: llm-proxy
restart: unless-stopped
network_mode: host
environment:
# 必需的环境变量
- API_KEY=sk-you-model-key
- BASE_URL=you-model-endpoint
# 模型配置
- BIG_MODEL=qwen3-coder
- SMALL_MODEL=qwen3-coder
# 服务器配置
- HOST=0.0.0.0
- PORT=4000
- LOG_LEVEL=INFO
- MAX_TOKENS_LIMIT=16384
# 连接配置
- REQUEST_TIMEOUT=90
- MAX_RETRIES=1
# 流式配置
- MAX_STREAMING_RETRIES=12
- FORCE_DISABLE_STREAMING=true
- EMERGENCY_DISABLE_STREAMING=false
# Tokenizer 配置
- TOKENIZER_FILE=tokenizers/qwen3coder30b_tokenizer.json
# Worker 配置
- WORKERS=4
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:4000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s