diff --git a/docs/online_serving/router.md b/docs/online_serving/router.md
index 82940e5680c..7abc9c06af3 100644
--- a/docs/online_serving/router.md
+++ b/docs/online_serving/router.md
@@ -194,7 +194,7 @@ scheduler:
   policy: "power_of_two" # Scheduling policy (optional): random, power_of_two, round_robin, process_tokens, request_num, cache_aware, remote_cache_aware, fd_metrics_score, fd_remote_metrics_score
   prefill-policy: "cache_aware" # Prefill scheduling policy in PD mode
   decode-policy: "request_num" # Decode scheduling policy in PD mode
-  eviction-interval-secs: 60 # Cache eviction interval for CacheAware scheduling
+  eviction-interval-secs: 60 # Counter eviction interval for CacheAware scheduling
   eviction-duration-mins: 30 # Eviction duration for cache-aware radix tree nodes (minutes); default: 30
   balance-abs-threshold: 1 # Absolute threshold for CacheAware balancing
   balance-rel-threshold: 0.2 # Relative threshold for CacheAware balancing
diff --git a/docs/online_serving/router_faq.md b/docs/online_serving/router_faq.md
index 49083539d4c..c0fb8cba4bf 100644
--- a/docs/online_serving/router_faq.md
+++ b/docs/online_serving/router_faq.md
@@ -29,6 +29,24 @@ For basic Router usage, please refer to [Load-Balancing Scheduling Router](route
 | `empty baseURL provided` | Health check received an empty base URL | Health check cannot be performed | Registration parameters |
 | `failed to create request: {error}` | Failed to create health check request | The instance may be marked as unhealthy | Network environment |
 | `failed to read response body: {error}` | Failed to read health check response body | The instance may be marked as unhealthy | Backend instance status |
+| `Failed to select mixed worker: {error}` | Failed to select Mixed worker in centralized mode | Current request returns 502 | Health status, scheduling strategy |
+| `Failed to select prefill worker: {error}` | Failed to select Prefill worker in PD disaggregated mode | Current request returns 502 | Health status, scheduling strategy |
+| `Failed to read register request body: {error}` | Failed to read registration request body | Registration request returns 400 | Request format |
+| `Failed to unmarshal register request JSON: {error}` | Failed to parse registration request JSON | Registration request returns 400 | Request format |
+| `Failed to create decode request for {url}: {error}` | Failed to create HTTP request to Decode instance | Current request fails | Network environment |
+| `Failed to create prefill request for {url}: {error}` | Failed to create HTTP request to Prefill instance | Current request fails | Network environment |
+| `Decode request failed for {url}: {error}` | Request to Decode instance failed | Current request fails | Backend instance status, network connectivity |
+| `Prefill request failed for {url}: {error}` | Request to Prefill instance failed | Current request fails | Backend instance status, network connectivity |
+| `Failed to read request body: {error}` | Failed to read inference request body | Current request returns 400 | Request format |
+| `Failed to unmarshal request JSON: {error}` | Failed to parse inference request JSON | Current request returns 400 | Request format |
+| `Failed to select worker pair: {error}` | Failed to select worker pair in PD disaggregated mode | Current request returns 502 | Health status, scheduling strategy |
+| `Failed to build disaggregate_info: {error}` | Failed to build PD disaggregation communication info | Current request returns 500 | Registration parameters (connector_port, device_ids, etc.) |
+| `Failed to encode modified request: {error}` | Failed to encode modified request body | Current request returns 500 | Request content |
+| `Failed to select worker: {error}` | Failed to select worker in centralized mode | Current request returns 502 | Health status, scheduling strategy |
+| `Failed to connect to backend service: {error}` | Failed to connect to backend inference instance (after 3 retries) | Current request returns 502 | Backend instance status, network connectivity |
+| `Request failed (attempt {n}/{max}): {error}` | Request attempt {n} failed | If retries exhausted, request returns 502 | Backend instance status, network connectivity |
+| `Failed to create backend request for {url}: {error}` | Failed to create HTTP request to backend | Current request fails | Network environment |
+| `Backend request failed for {url}: {error}` | Request to backend instance failed | Current request fails | Backend instance status, network connectivity |
 
 ### Warn-Level Logs
 
@@ -37,8 +55,9 @@ For basic Router usage, please refer to [Load-Balancing Scheduling Router](route
 | `Server {url} is not healthy` | The instance at this URL failed health check | Router cannot register the instance, or will remove it from the registered list | Health status |
 | `Instance {url} role is unknown` | Instance role cannot be recognized | The instance will not be added to the scheduling list | Registration parameters |
 | `cache-aware prefill: tokenizer failed, fallback to char tokens: {error}` | Tokenizer service call failed, automatically falling back to character-based tokenization | cache_aware strategy remains active, using character-based tokenization for cache matching instead of the Tokenizer; normal request processing is not affected | Tokenizer service status |
-| `cache-aware prefill: tokenize failed, fallback to process_tokens: {error}` | Tokenization completely failed (e.g., empty input), falling back to process_tokens strategy | Prefill scheduling temporarily does not use cache_aware strategy; normal request processing is not affected | Request content, Tokenizer service status |
-| `cache-aware prefill: final strategy: process_tokens, reason: tokenize failed: {error}. ts_ms={ts}` | Tokenization failed (new format), falling back to process_tokens strategy | Prefill scheduling temporarily does not use cache_aware strategy; normal request processing is not affected | Request content, Tokenizer service status |
+| `GetRemoteMetrics failed for {url}, falling back to local counter: {error}` | Failed to fetch remote metrics, falling back to local counter | Scheduling accuracy may decrease; normal request processing is not affected | Backend instance metrics port, network connectivity |
+| `release worker: {url} skipped, counter already cleaned up` | Worker counter was already cleaned up when trying to release | May occur when a worker is removed by health check while requests are still in-flight | Health status, request timing |
+| `release worker: {url} skipped, counter already zero (possible double-release)` | Worker counter is already zero when trying to release | Possible duplicate counter release | Request processing logic |
 
 ### Info-Level Logs
 
@@ -49,7 +68,6 @@ For basic Router usage, please refer to [Load-Balancing Scheduling Router](route
 | `Successfully registered instance from index {index}` | Instance from config file registered successfully | Normal startup log |
 | `No instances found in config file {path}` | No instances found in the registration config file | Check whether register.yaml is empty |
 | `Request completed successfully.` | Request processing completed | Normal operation log |
-| `Request failed, retrying...` | Request failed, retrying | Router will retry up to 3 times |
 | `select worker (prefill): {url}, tokens: {tokens}` | Prefill scheduler selected a worker, showing current token processing count | Normal operation log |
 | `select worker ({type}): {url}, count: {count}` | Decode/Mixed scheduler selected a worker, showing current request concurrency | Normal operation log |
 | `release worker: {url}, count: {count}` | Request ended, worker counter released | Normal operation log |
@@ -58,7 +76,6 @@ For basic Router usage, please refer to [Load-Balancing Scheduling Router](route
 | `removed counters for {count} unhealthy workers: {urls}` | Batch cleanup of counters for unhealthy workers | Normal operation log |
 | `[stats] total_running={n}, workers: [{loads}], cache_hit_rate={rate}% (hits={hits}/total={total})` | Periodic stats: total requests, worker loads, cache hit rate | Normal operation log, useful for monitoring and tuning |
 | `Parsing completed; starting worker selection.` | Request parsing completed, starting worker selection | Normal operation log |
-| `Request completed with an error.` | Request processing completed with an error | Check backend instance status |
 | `[SelectWorkerPair] decode selection failed, releasing prefill counter url={url}` | Decode selection failed in PD disaggregated mode, releasing Prefill counter | Error handling log |
 | `[prefill] first chunk received, release counter url={url}` | Prefill streaming response received first chunk, counter released | Normal operation log |
 | `[prefill] non-stream prefill response done, release counter url={url}` | Prefill non-streaming response completed, counter released | Normal operation log |
@@ -66,12 +83,17 @@ For basic Router usage, please refer to [Load-Balancing Scheduling Router](route
 | `[prefill] release in defer (fallback) url={url}, isStream={bool}` | Fallback resource release when Prefill request exits abnormally | Error handling log |
 | `[prefill] release in CommonCompletions defer (error path) url={url}` | Prefill resource release on error path | Error handling log |
 | `cache-aware prefill: final strategy: process_tokens, reason: strategy not initialized` | cache_aware strategy not initialized, falling back to process_tokens | Check cache_aware configuration |
+| `cache-aware prefill: final strategy: process_tokens, reason: tokenize failed: {error}. ts_ms={ts}` | Tokenization failed, falling back to process_tokens strategy | Prefill scheduling temporarily does not use cache_aware strategy; normal request processing is not affected |
 | `cache-aware prefill: final strategy: process_tokens, reason: load imbalanced, loads={loads}. ts_ms={ts}` | Load imbalanced across instances, falling back to process_tokens strategy | Normal operation log, automatic load balancing switch |
 | `cache-aware prefill: final strategy: cache_aware_scoring, selected={url}, loads={loads}, hitRatios={ratios}. ts_ms={ts}` | cache_aware scoring strategy selected a worker | Normal operation log, showing loads and hit ratios |
 | `[{method}] {path} {proto} {status} {latency} {clientIP}` | HTTP request access log | Normal operation log, records basic info for each request |
 | `before SelectWorker prefill. ts_ms={ts}` | Starting Prefill worker selection in PD disaggregated mode | Normal operation log, for performance tracing |
 | `before SelectWorker decode, after prefill. ts_ms={ts}` | Starting Decode worker selection after Prefill selection | Normal operation log, for performance tracing |
 | `after SelectWorker decode, before return. ts_ms={ts}` | Decode worker selection completed | Normal operation log, for performance tracing |
+| `unhealthy worker counter preserved (inflight requests): {url}, count: {count}` | Unhealthy worker still has in-flight requests, counter temporarily preserved | Normal operation log, will be auto-cleaned after in-flight requests complete |
+| `unhealthy worker token counter preserved (inflight requests): {url}, tokens: {tokens}` | Unhealthy worker still has in-flight token load, token counter temporarily preserved | Normal operation log, will be auto-cleaned after in-flight requests complete |
+| `cleanup unhealthy worker token counter: {url}` | Cleaned up token counter for unhealthy worker | Normal operation log |
+| `preserved counters for {count} workers with inflight requests: {urls}` | Batch preserved counters for workers with in-flight requests | Normal operation log |
 
 ### Debug-Level Logs
 
@@ -100,6 +122,10 @@ For basic Router usage, please refer to [Load-Balancing Scheduling Router](route
 | `{"error": "Failed to build disaggregate_info"}` | 500 | Failed to build PD disaggregation communication info | Registration parameters (connector_port, device_ids, etc.) |
 | `{"error": "Invalid request body"}` | 400 | Failed to read request body | Request format |
 | `{"error": "Invalid JSON format"}` | 400 | Failed to parse request body JSON | Request format |
+| `{"error": "Failed to encode modified request: {error}"}` | 500 | Failed to encode modified request body | Request content |
+| `{"code": 500, "msg": "Internal server error"}` | 500 | A panic occurred during request processing and was recovered | Backend instance status, request content |
+
+> **Note**: In PD disaggregated (splitwise) mode, the above error responses include an additional `request_id` field, e.g., `{"error": "...", "request_id": "xxx"}`. Additionally, `Invalid request body` and `Invalid JSON format` responses include specific error details, e.g., `{"error": "Invalid request body: EOF"}`.
 
 ### Registration Request Errors (/register)
 
@@ -111,6 +137,7 @@ For basic Router usage, please refer to [Load-Balancing Scheduling Router](route
 | `{"code": 400, "msg": "splitwise mode only supports PREFILL/DECODE instances"}` | 400 | MIXED instances are not allowed in PD disaggregated mode | Deployment mode, instance role |
 | `{"code": 400, "msg": "only MIXED instances are allowed"}` | 400 | Only MIXED instances are allowed in centralized mode | Deployment mode, instance role |
 | `{"code": 400, "msg": "invalid InstanceInfo format: {error}"}` | 400 | Instance registration info validation failed | Registration parameters |
+| `{"code": 400, "msg": "DefaultManager is nil"}` | 400 | Router internal manager not initialized | Router startup status |
 | `{"code": 200, "msg": "Register success"}` | 200 | Registration successful | — |
 
 ### Common Registration Parameter Validation Errors
@@ -124,6 +151,10 @@ For basic Router usage, please refer to [Load-Balancing Scheduling Router](route
 | `port is required` | Missing port field | Add the port field |
 | `invalid port: {port}` | port is not a valid port number | Provide a port number in the range 1-65535 |
 | `invalid protocol: {protocol}` | Invalid transfer protocol | Use a valid protocol value: ipc / rdma |
+| `invalid connector_port: {port}` | connector_port is not a valid port number | Provide a port number in the range 1-65535 |
+| `invalid engine_worker_queue_port: {port}` | engine_worker_queue_port is not a valid port number | Provide a port number in the range 1-65535 |
+| `invalid metrics_port: {port}` | metrics_port is not a valid port number | Provide a port number in the range 1-65535 |
+| `rdma_ports[{index}] invalid port: {port}` | Port at index {index} in RDMA ports list is not valid | Provide a port number in the range 1-65535 |
 
 ## Troubleshooting Guide
 
@@ -236,7 +267,7 @@ If `Failed to start server` appears in startup logs, check:
 When using the `cache_aware` scheduling strategy, the Router calls a Tokenizer service to tokenize requests for cache hit ratio computation. When the Tokenizer service is unavailable, the Router has a two-level degradation mechanism:
 
 1. **Fallback to character-based tokenization** (common case): The log will show `tokenizer failed, fallback to char tokens`. The cache_aware strategy remains active, using character-based tokenization for cache matching instead of the Tokenizer. Cache hit accuracy may decrease, but normal request processing is not affected.
-2. **Fallback to process_tokens strategy** (extreme case): When tokenization completely fails (e.g., empty request content), the log will show `tokenize failed, fallback to process_tokens`. The cache_aware strategy temporarily becomes inactive, and scheduling falls back to token processing volume. Normal request processing is not affected.
+2. **Fallback to process_tokens strategy** (extreme case): When tokenization completely fails (e.g., empty request content), the log will show `cache-aware prefill: final strategy: process_tokens, reason: tokenize failed: {error}. ts_ms={ts}` (Info level). The cache_aware strategy temporarily becomes inactive, and scheduling falls back to token processing volume. Normal request processing is not affected.
 
 To restore full cache_aware functionality:
 
diff --git a/docs/zh/online_serving/router.md b/docs/zh/online_serving/router.md
index 0ace28c2da1..375f036ad2c 100644
--- a/docs/zh/online_serving/router.md
+++ b/docs/zh/online_serving/router.md
@@ -194,7 +194,7 @@ scheduler:
   policy: "power_of_two" # 调度策略(可选): random, power_of_two, round_robin, process_tokens, request_num, cache_aware, remote_cache_aware, fd_metrics_score, fd_remote_metrics_score; 默认: request_num
   prefill-policy: "cache_aware" # pd分离模式下prefill节点调度策略; 默认: process_tokens
   decode-policy: "request_num" # pd分离模式下decode节点调度策略; 默认: request_num
-  eviction-interval-secs: 60 # cache-aware策略清理过期cache的间隔时间
+  eviction-interval-secs: 60 # cache-aware策略清理过期计数器的间隔时间
   eviction-duration-mins: 30 # cache-aware策略radix tree节点驱逐时间(分钟); 默认: 30
   balance-abs-threshold: 1 # cache-aware策略绝对阈值
   balance-rel-threshold: 0.2 # cache-aware策略相对阈值
diff --git a/docs/zh/online_serving/router_faq.md b/docs/zh/online_serving/router_faq.md
index a42ed015283..9c32726f4dc 100644
--- a/docs/zh/online_serving/router_faq.md
+++ b/docs/zh/online_serving/router_faq.md
@@ -29,6 +29,24 @@ Router 的基本使用方式请参考 [负载均衡调度 Router](router.md)。
 | `empty baseURL provided` | 健康检查时传入了空的基础 URL | 健康检查无法执行 | 注册参数 |
 | `failed to create request: {error}` | 创建健康检查请求失败 | 该实例可能被判定为不健康 | 网络环境 |
 | `failed to read response body: {error}` | 读取健康检查响应体失败 | 该实例可能被判定为不健康 | 后端实例状态 |
+| `Failed to select mixed worker: {error}` | 集中式模式下选择 Mixed Worker 失败 | 当前请求返回 502 | 健康状况、调度策略 |
+| `Failed to select prefill worker: {error}` | PD 分离模式下选择 Prefill Worker 失败 | 当前请求返回 502 | 健康状况、调度策略 |
+| `Failed to read register request body: {error}` | 读取注册请求体失败 | 该注册请求返回 400 | 请求格式 |
+| `Failed to unmarshal register request JSON: {error}` | 解析注册请求 JSON 失败 | 该注册请求返回 400 | 请求格式 |
+| `Failed to create decode request for {url}: {error}` | 创建发往 Decode 实例的 HTTP 请求失败 | 当前请求失败 | 网络环境 |
+| `Failed to create prefill request for {url}: {error}` | 创建发往 Prefill 实例的 HTTP 请求失败 | 当前请求失败 | 网络环境 |
+| `Decode request failed for {url}: {error}` | 发往 Decode 实例的请求失败 | 当前请求失败 | 后端实例状态、网络连通性 |
+| `Prefill request failed for {url}: {error}` | 发往 Prefill 实例的请求失败 | 当前请求失败 | 后端实例状态、网络连通性 |
+| `Failed to read request body: {error}` | 读取推理请求体失败 | 当前请求返回 400 | 请求格式 |
+| `Failed to unmarshal request JSON: {error}` | 解析推理请求 JSON 失败 | 当前请求返回 400 | 请求格式 |
+| `Failed to select worker pair: {error}` | PD 分离模式下选择 Worker 对失败 | 当前请求返回 502 | 健康状况、调度策略 |
+| `Failed to build disaggregate_info: {error}` | 构建 PD 分离通信信息失败 | 当前请求返回 500 | 注册参数（connector_port、device_ids 等） |
+| `Failed to encode modified request: {error}` | 编码修改后的请求体失败 | 当前请求返回 500 | 请求内容 |
+| `Failed to select worker: {error}` | 集中式模式下选择 Worker 失败 | 当前请求返回 502 | 健康状况、调度策略 |
+| `Failed to connect to backend service: {error}` | 连接后端推理实例失败（已重试 3 次仍失败） | 当前请求返回 502 | 后端实例状态、网络连通性 |
+| `Request failed (attempt {n}/{max}): {error}` | 请求发送第 {n} 次尝试失败 | 若重试耗尽则请求返回 502 | 后端实例状态、网络连通性 |
+| `Failed to create backend request for {url}: {error}` | 创建发往后端的 HTTP 请求失败 | 当前请求失败 | 网络环境 |
+| `Backend request failed for {url}: {error}` | 发往后端实例的请求失败 | 当前请求失败 | 后端实例状态、网络连通性 |
 
 ### Warn 级别日志
 
@@ -37,8 +55,9 @@ Router 的基本使用方式请参考 [负载均衡调度 Router](router.md)。
 | `Server {url} is not healthy` | 该 URL 对应的实例未通过健康检查 | Router 无法注册该实例，或将该实例从已注册列表中移除 | 健康状况 |
 | `Instance {url} role is unknown` | 实例角色无法识别 | 该实例不会被加入调度列表 | 注册参数 |
 | `cache-aware prefill: tokenizer failed, fallback to char tokens: {error}` | Tokenizer 服务调用失败，已自动回退至字符级分词 | cache_aware 策略仍然生效，使用字符级分词代替 Tokenizer 进行缓存匹配，不影响正常请求处理 | Tokenizer 服务状态 |
-| `cache-aware prefill: tokenize failed, fallback to process_tokens: {error}` | 分词彻底失败（如输入为空），回退至 process_tokens 策略 | Prefill 调度暂时不使用 cache_aware 策略，不影响正常请求处理 | 请求内容、Tokenizer 服务状态 |
-| `cache-aware prefill: final strategy: process_tokens, reason: tokenize failed: {error}. ts_ms={ts}` | 分词失败（新格式），回退至 process_tokens 策略 | Prefill 调度暂时不使用 cache_aware 策略，不影响正常请求处理 | 请求内容、Tokenizer 服务状态 |
+| `GetRemoteMetrics failed for {url}, falling back to local counter: {error}` | 获取远程 metrics 失败，已回退至本地计数器 | 调度精度可能下降，不影响正常请求处理 | 后端实例 metrics 端口、网络连通性 |
+| `release worker: {url} skipped, counter already cleaned up` | 释放 Worker 计数器时发现已被清理 | 可能是 Worker 被健康检查移除后仍有在途请求完成 | 健康状况、请求时序 |
+| `release worker: {url} skipped, counter already zero (possible double-release)` | 释放 Worker 计数器时发现已归零 | 可能存在计数器重复释放 | 请求处理逻辑 |
 
 ### Info 级别日志
 
@@ -49,7 +68,6 @@ Router 的基本使用方式请参考 [负载均衡调度 Router](router.md)。
 | `Successfully registered instance from index {index}` | 配置文件中的实例注册成功 | 正常启动日志 |
 | `No instances found in config file {path}` | 注册配置文件中未找到实例信息 | 请检查 register.yaml 内容是否为空 |
 | `Request completed successfully.` | 请求处理完成 | 正常运行日志 |
-| `Request failed, retrying...` | 请求失败，正在进行重试 | Router 最多重试 3 次 |
 | `select worker (prefill): {url}, tokens: {tokens}` | Prefill 调度选中 Worker，显示当前 token 处理量 | 正常运行日志 |
 | `select worker ({type}): {url}, count: {count}` | Decode/Mixed 调度选中 Worker，显示当前请求并发数 | 正常运行日志 |
 | `release worker: {url}, count: {count}` | 请求结束，释放 Worker 计数器 | 正常运行日志 |
@@ -58,7 +76,6 @@ Router 的基本使用方式请参考 [负载均衡调度 Router](router.md)。
 | `removed counters for {count} unhealthy workers: {urls}` | 批量清理不健康 Worker 的计数器 | 正常运行日志 |
 | `[stats] total_running={n}, workers: [{loads}], cache_hit_rate={rate}% (hits={hits}/total={total})` | 周期性统计：总请求数、各 Worker 负载、缓存命中率 | 正常运行日志，用于监控调优 |
 | `Parsing completed; starting worker selection.` | 请求解析完成，开始选择 Worker | 正常运行日志 |
-| `Request completed with an error.` | 请求处理完成但发生错误 | 请排查后端实例状态 |
 | `[SelectWorkerPair] decode selection failed, releasing prefill counter url={url}` | PD 分离模式下 Decode 选择失败，释放 Prefill 计数器 | 异常处理日志 |
 | `[prefill] first chunk received, release counter url={url}` | Prefill 流式响应收到首个数据块，释放计数器 | 正常运行日志 |
 | `[prefill] non-stream prefill response done, release counter url={url}` | Prefill 非流式响应完成，释放计数器 | 正常运行日志 |
@@ -72,6 +89,11 @@ Router 的基本使用方式请参考 [负载均衡调度 Router](router.md)。
 | `before SelectWorker prefill. ts_ms={ts}` | PD 分离模式下开始选择 Prefill Worker | 正常运行日志，用于性能追踪 |
 | `before SelectWorker decode, after prefill. ts_ms={ts}` | Prefill 选择完成后开始选择 Decode Worker | 正常运行日志，用于性能追踪 |
 | `after SelectWorker decode, before return. ts_ms={ts}` | Decode Worker 选择完成 | 正常运行日志，用于性能追踪 |
+| `cache-aware prefill: final strategy: process_tokens, reason: tokenize failed: {error}. ts_ms={ts}` | 分词失败，回退至 process_tokens 策略 | Prefill 调度暂时不使用 cache_aware 策略，不影响正常请求处理 |
+| `unhealthy worker counter preserved (inflight requests): {url}, count: {count}` | 不健康 Worker 仍有在途请求，计数器暂时保留 | 正常运行日志，待在途请求完成后自动清理 |
+| `unhealthy worker token counter preserved (inflight requests): {url}, tokens: {tokens}` | 不健康 Worker 仍有在途 token 负载，token 计数器暂时保留 | 正常运行日志，待在途请求完成后自动清理 |
+| `cleanup unhealthy worker token counter: {url}` | 清理不健康 Worker 的 token 计数器 | 正常运行日志 |
+| `preserved counters for {count} workers with inflight requests: {urls}` | 批量保留仍有在途请求的 Worker 计数器 | 正常运行日志 |
 
 ### Debug 级别日志
 
@@ -100,6 +122,10 @@ Router 的基本使用方式请参考 [负载均衡调度 Router](router.md)。
 | `{"error": "Failed to build disaggregate_info"}` | 500 | 构建 PD 分离通信信息失败 | 注册参数（connector_port、device_ids 等） |
 | `{"error": "Invalid request body"}` | 400 | 请求体读取失败 | 请求格式 |
 | `{"error": "Invalid JSON format"}` | 400 | 请求体 JSON 解析失败 | 请求格式 |
+| `{"error": "Failed to encode modified request: {error}"}` | 500 | 编码修改后的请求体失败 | 请求内容 |
+| `{"code": 500, "msg": "Internal server error"}` | 500 | 请求处理过程中发生 panic 并被恢复 | 后端实例状态、请求内容 |
+
+> **说明**：在 PD 分离（splitwise）模式下，以上错误响应会额外包含 `request_id` 字段，如 `{"error": "...", "request_id": "xxx"}`。此外，`Invalid request body` 和 `Invalid JSON format` 的实际输出会包含具体的错误详情，如 `{"error": "Invalid request body: EOF"}`。
 
 ### 注册请求错误（/register）
 
@@ -112,6 +138,7 @@ Router 的基本使用方式请参考 [负载均衡调度 Router](router.md)。
 | `{"code": 400, "msg": "only MIXED instances are allowed"}` | 400 | 集中式模式下只允许注册 MIXED 实例 | 部署模式、实例角色 |
 | `{"code": 400, "msg": "invalid InstanceInfo format: {error}"}` | 400 | 实例注册信息校验失败 | 注册参数 |
 | `{"code": 200, "msg": "Register success"}` | 200 | 注册成功 | — |
+| `{"code": 400, "msg": "DefaultManager is nil"}` | 400 | Router 内部管理器未初始化 | Router 启动状态 |
 
 ### 常见注册参数校验错误
 
@@ -124,6 +151,10 @@ Router 的基本使用方式请参考 [负载均衡调度 Router](router.md)。
 | `port is required` | 缺少 port 字段 | 添加 port 字段 |
 | `invalid port: {port}` | port 不是合法的端口号 | 填写 1-65535 范围内的端口号 |
 | `invalid protocol: {protocol}` | 传输协议不合法 | 使用合法的协议值：ipc / rdma |
+| `invalid connector_port: {port}` | connector_port 不是合法的端口号 | 填写 1-65535 范围内的端口号 |
+| `invalid engine_worker_queue_port: {port}` | engine_worker_queue_port 不是合法的端口号 | 填写 1-65535 范围内的端口号 |
+| `invalid metrics_port: {port}` | metrics_port 不是合法的端口号 | 填写 1-65535 范围内的端口号 |
+| `rdma_ports[{index}] invalid port: {port}` | RDMA 端口列表中第 {index} 个端口号不合法 | 填写 1-65535 范围内的端口号 |
 
 ## 常见问题排查方式
 
@@ -236,7 +267,7 @@ PD 分离模式下建议完整配置以下参数，以确保 KV Cache 传输正
 使用 `cache_aware` 调度策略时，Router 会调用 Tokenizer 服务对请求进行分词以计算缓存命中率。当 Tokenizer 服务不可用时，Router 内置了两级退化机制：
 
 1. **回退至字符级分词**（常见情况）：日志出现 `tokenizer failed, fallback to char tokens`。此时 cache_aware 策略仍然生效，只是使用字符级分词代替 Tokenizer 进行缓存匹配，缓存命中精度会有所下降，但不影响正常请求处理。
-2. **回退至 process_tokens 策略**（极端情况）：当分词彻底失败（如请求内容为空）时，日志出现 `tokenize failed, fallback to process_tokens`。此时 cache_aware 策略暂时不生效，改为按 token 处理量进行调度，同样不影响正常请求处理。
+2. **回退至 process_tokens 策略**（极端情况）：当分词彻底失败（如请求内容为空）时，日志出现 `cache-aware prefill: final strategy: process_tokens, reason: tokenize failed: {error}. ts_ms={ts}`（Info 级别）。此时 cache_aware 策略暂时不生效，改为按 token 处理量进行调度，同样不影响正常请求处理。
 
 如需恢复 cache_aware 策略的完整功能：
 
diff --git a/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/SKILL.md b/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/SKILL.md
new file mode 100644
index 00000000000..097a10f8163
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/SKILL.md
@@ -0,0 +1,124 @@
+---
+name: stat-cache-hitrate
+description: >
+  统计 FastDeploy Go Router 日志中的三层 cache 命中率指标，生成可视化报告。
+  三层指标：Prefix Hit Ratio（KV Cache 内容复用度）、Session Hit Rate（请求级路由粘性）、
+  Per-Worker Cache Stats（各 prefill worker 的缓存利用排名）。支持全量统计、tail 快速查看、
+  指定时间段统计（--start/--end）。
+
+  当用户提到以下内容时触发此 skill：统计/查看 cache 命中率、查看 cache-aware 调度效果、
+  查看缓存预热情况、统计 hitRatio、查看 prefix 命中率、session hit rate。
+  关键词：cache 命中率、hitRatio、cache-aware、prefix hit、session hit rate、
+  缓存预热、/stat-cache-hitrate。
+
+IMPORTANT: 执行前阅读 references/log_formats.md 了解日志格式和解析规则。
+---
+
+# Cache Hit Rate Statistics
+
+统计 FastDeploy Go Router 的三层 cache 命中率，生成可视化报告。
+
+## 执行前交互
+
+运行脚本前，Claude 必须先向用户确认以下参数：
+
+### 1. 日志文件路径
+使用 AskUserQuestion 工具向用户询问日志文件路径。提供两个常用快捷选项（客户端会自动提供 Other 自定义输入）：
+- 选项 1: `logs/router.log`（默认）
+- 选项 2: `fd-router.log`（golang_router 根目录常用文件名）
+
+**重要规则**：
+- 如果用户已经在消息中明确指定了日志路径，直接使用该路径，跳过询问步骤
+- 用户指定路径后不要质疑、推荐替代文件、或以任何理由尝试切换到其他文件
+- 支持绝对路径（如 `/home/user/logs/xxx.log`）和相对路径（如 `logs/fd-router (2).log`）
+
+如果用户直接确认或未指定路径，使用默认值 `logs/router.log`。
+
+### 2. 分析模式
+必须使用 **AskUserQuestion 的离散选项**（不要只发纯文本编号，避免客户端偶发不显示第 4 项）：
+- 选项 1: `全量统计（默认）` — 扫描完整日志
+- 选项 2: `快速查看尾部` — 只看最近的数据（支持 `2000`、`1k`、`1w` 等行数写法）
+- 选项 3: `指定时间段` — 分析特定时间范围（如 `--start "16:00" --end "17:00"`）
+
+**若用户选择"快速查看尾部"，必须再询问行数**，提供选项：
+- 选项 1: `2000 行（默认）`
+- 选项 2: `5000 行`
+- 选项 3: `1万行`
+
+若用户选择”指定时间段”，直接让用户填写：
+- 从 `xxx` 开始，到 `xxx` 结束（`start/end` 可只填一个）；
+- 支持相对时间写法：`30m`、`2h`、`1d`、`最后30分钟` 等（换算为绝对时间）
+- 然后映射为 `--start/--end` 参数执行。
+
+如果用户未选择，默认使用全量统计。
+
+`--start/--end` 与 `--tail` 互斥。`--start` 和 `--end` 可单独或同时指定。
+`--tail` 仅支持”行数”语义（如 `2000`，也兼容 `1k/1w` 自动换算），不再支持 `30m/2h/1d` 这类时间窗口；按时间请使用 `--start/--end`。
+时间格式灵活：支持 `YYYY/MM/DD HH:MM:SS`、`HH:MM:SS`、`HH:MM`、`MM/DD`、`MM/DD HH:MM`、相对时间（`30m`、`2h`、`1d`、`最后30分钟`）。
+缺失部分自动从日志首末行推断。
+
+### 3. 输出目录
+分析结果默认保存到 `skill_output/stat-cache-hitrate/<YYYYMMDD_HHMMSS>/`（自动按运行时间创建子目录）。
+用户可通过 `--output` 指定**基目录**，脚本会继续在其下创建 `<YYYYMMDD_HHMMSS>/summary` 与 `<YYYYMMDD_HHMMSS>/detail`，避免覆盖历史明细。
+
+## 使用方式
+
+运行统计脚本（相对于 `fastdeploy/golang_router/` 目录）：
+
+```bash
+# 全量统计
+python3 .claude/skills/stat-cache-hitrate/scripts/stat_cache_hitrate.py <日志文件> --output skill_output/stat-cache-hitrate/
+
+# 快速查看尾部数据
+python3 .claude/skills/stat-cache-hitrate/scripts/stat_cache_hitrate.py <日志文件> --tail       # 默认最后 2000 行
+python3 .claude/skills/stat-cache-hitrate/scripts/stat_cache_hitrate.py <日志文件> --tail 5000   # 指定行数
+python3 .claude/skills/stat-cache-hitrate/scripts/stat_cache_hitrate.py <日志文件> --tail 1k     # 行数缩写（自动换算）
+# 指定时间段（需要按时间筛选时使用；--start 和 --end 可单独或同时使用）
+python3 .claude/skills/stat-cache-hitrate/scripts/stat_cache_hitrate.py <日志文件> --start "16:00:00" --end "17:00:00"
+python3 .claude/skills/stat-cache-hitrate/scripts/stat_cache_hitrate.py <日志文件> --start "2026/03/31 16:00:00"
+python3 .claude/skills/stat-cache-hitrate/scripts/stat_cache_hitrate.py <日志文件> --start "03/31" --end "03/31 18:00"
+```
+
+默认日志路径：`logs/router.log`（相对于 `fastdeploy/golang_router/`）。常用备选：`fd-router.log`（根目录）。不传 `--output` 时自动输出到 `skill_output/stat-cache-hitrate/<timestamp>/`。
+
+脚本会自动根据文件大小选择解析策略：小文件（<5000 行）在内存中处理，大文件用 grep + 管道流式处理。
+
+## 输出说明
+
+### 三层指标
+
+| 层级 | 指标 | 含义 |
+|------|------|------|
+| 第一层 | Prefix Hit Ratio | 被选中 worker 的 KV cache 命中率，反映内容级复用度 |
+| 第二层 | Session Hit Rate | 带 session_id 的请求被路由到同一 worker 的比例 |
+| 第三层 | Per-Worker Stats | 每个 prefill worker 被选中的次数和平均命中率排名 |
+
+### 输出文件位置
+
+详细报告和图表输出到 `skill_output/stat-cache-hitrate/<YYYYMMDD_HHMMSS>/` 目录，每次运行自动创建带时间戳的子目录。
+
+- `summary/cache_hitrate_report.md` — Per-Worker 统计 + Fallback 明细 + 详情链接
+- `detail/per_window_data.md` — 每5s窗口明细（连续空窗口自动合并为 3 行：起始/合并说明/结束）
+- `detail/session_hit_details.md` — 每个 session（无 session_id 时回退 trace_id）的命中明细（Markdown 表格），包含 `id序号 / req_count / first_hit / avg-hit(=去首请求平均命中率) / max_hit / min_hit / all_hits / purl_cnt / prefill_urls`，并附「序号与会话ID映射」「切换 reqid 明细（含 session 时间段，可跳转）」。
+
+### 交叉诊断矩阵
+
+| Session HR | Prefix HR | 诊断 |
+|------------|-----------|------|
+| 高 | 高 | cache-aware 策略运行良好 |
+| 高 | 低 | session 粘性好但 prompt 内容变化大，KV cache 实际复用低 |
+| 低 | 高 | 换 worker 了但新 worker 也有类似前缀缓存 |
+| 低 | 低 | 负载均衡强制分散或缓存未预热 |
+
+## 重要规则
+
+1. **`[stats]` 计数器 per-interval**：每 5s `atomic.Swap(0)` 重置，必须 sum 所有行计算累计值
+2. **Session HR 只统计带 session_id 的请求**
+3. **Prefix HR 取 selected worker 的值**：不在 hitRatios map 中则为 0
+4. **此 skill 只关注 cache 命中率**：延迟/错误/健康等排查由 troubleshoot skill 负责
+5. **与 troubleshoot-cache 互补**：本 skill 做数值统计，troubleshoot-cache 做调度策略诊断
+
+## 参考文件
+
+- `references/log_formats.md` — 日志格式和解析规则
+- `references/report_templates.md` — 终端报告和详细导出的模板
diff --git a/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/evals/trigger_eval.json b/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/evals/trigger_eval.json
new file mode 100644
index 00000000000..23c7f6d86aa
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/evals/trigger_eval.json
@@ -0,0 +1,18 @@
+[
+  {"query": "帮我统计一下 router 的 cache 命中率，日志在 logs/fd-router.log", "should_trigger": true},
+  {"query": "我想看看 cache-aware 调度的效果怎么样，hitRatio 数据是多少", "should_trigger": true},
+  {"query": "prefix hit ratio 和 session hit rate 分别是多少？分析一下 logs/router.log", "should_trigger": true},
+  {"query": "看一下最近30分钟的缓存预热情况，用 tail 模式快速扫一下", "should_trigger": true},
+  {"query": "我刚部署了新的 cache-aware 策略，帮我跑一下 /stat-cache-hitrate 看看效果", "should_trigger": true},
+  {"query": "每个 prefill worker 的缓存利用率排名是怎样的？哪个 worker 命中率最高", "should_trigger": true},
+  {"query": "stat cache hitrate on our go router log, need to check the KV cache reuse rate", "should_trigger": true},
+  {"query": "持续监控 cache 命中率变化趋势，我想看实时数据", "should_trigger": true},
+  {"query": "router 最近老是返回 502，帮我排查一下什么问题", "should_trigger": false},
+  {"query": "分析一下 router 的请求延迟，p99 是不是太高了", "should_trigger": false},
+  {"query": "帮我 trace 一下这个请求 ID: abc-123-def，看看整个链路", "should_trigger": false},
+  {"query": "Worker 健康状态怎么样？有没有频繁下线的", "should_trigger": false},
+  {"query": "帮我写一个 Go 语言的 HTTP 路由框架", "should_trigger": false},
+  {"query": "分析一下 nginx 的 access log，统计各个 URL 的访问量", "should_trigger": false},
+  {"query": "router 负载不均衡，某些 worker 的 running 计数异常高", "should_trigger": false},
+  {"query": "帮我看看 FastDeploy 的部署文档，我想部署一个新模型", "should_trigger": false}
+]
diff --git a/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/references/log_formats.md b/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/references/log_formats.md
new file mode 100644
index 00000000000..bc29a4cbb25
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/references/log_formats.md
@@ -0,0 +1,139 @@
+# 日志格式参考
+
+本文件描述 FastDeploy Go Router 的日志格式和解析规则。统计 cache 命中率前必须阅读。
+
+---
+
+## 通用日志行格式
+
+```
+[LEVEL] YYYY/MM/DD HH:MM:SS logger.go:<line>: <optional_context_prefixes> <message>
+```
+
+- **Level**：`[INFO]`、`[ERROR]`、`[WARN]`、`[DEBUG]`
+- **Timestamp**：`YYYY/MM/DD HH:MM:SS`
+- **可选 context 前缀**：`[trace_id:...]`、`[req_id:...]`、`[session_id:...]`、`[request_id:...]` 可能出现在 `logger.go:XX:` 和实际消息之间，顺序固定（trace_id → req_id → session_id → request_id），但不一定全部出现
+
+---
+
+## 类别 A：Cache-Aware 策略行
+
+### A1. cache_aware_scoring（正常走 cache-aware 路径）
+
+```
+[INFO] 2026/03/30 20:16:57 logger.go:79: [session_id:slimshetty/swebench-verified:sweb.eval.x86_64.psf__requests-1766] [request_id:565a594c-...] cache-aware prefill: final strategy: cache_aware_scoring, selected=http://10.52.95.17:9263, loads=map[http://10.52.95.146:9263:20 http://10.52.95.17:9263:20 ...], hitRatios=map[http://10.52.95.17:9263:100]. ts_ms=2026-03-30 20:16:57.021
+```
+
+**提取字段**：
+- `selected=<url>` — 被选中的 worker URL，格式 `http://IP:PORT`
+- `hitRatios=map[...]` — Go map 格式，详见下方解析规则
+- `loads=map[...]` — 各 worker 的负载
+
+### A2. process_tokens fallback（未走 cache-aware 路径）
+
+```
+cache-aware prefill: final strategy: process_tokens, reason: load imbalanced, loads=map[...]
+cache-aware prefill: final strategy: process_tokens, reason: tokenize failed: <error>
+cache-aware prefill: final strategy: process_tokens, reason: strategy not initialized
+```
+
+---
+
+## 类别 B：Stats 行
+
+```
+[INFO] 2026/03/30 20:14:38 logger.go:79: [stats] total_running=14, workers: [http://10.52.96.143:9867: running=0, http://10.52.95.26:9867: running=1, ...], cache_hit_rate=0.00% (hits=0/total=7)
+```
+
+**提取字段**：
+- `total_running=<N>` — 所有 worker 的运行请求总数
+- `workers: [...]` — 各 worker 的 `running=N`
+- `cache_hit_rate=<X.XX>%` — 该窗口的命中率百分比
+- `(hits=<N>/total=<M>)` — 该 5s 窗口的命中次数和总次数
+
+**关键**：`hits` 和 `total` 是 **per-interval** 的，代码使用 `atomic.Swap(0)` 每 5s 重置为 0。
+
+---
+
+## 类别 C：推理请求行
+
+```
+[INFO] 2026/03/30 18:25:49 logger.go:79: [POST] /v1/chat/completions HTTP/1.1 200 2.798235ms 10.52.95.139
+```
+
+格式：`[METHOD] /path HTTP/1.1 <status_code> <duration> <client_ip>`
+
+延迟单位可能是 `s`、`ms`、`µs`/`us`。
+
+**注意**：仅 `POST /v1/chat/completions` 和 `POST /v1/completions` 为推理请求。其余路径（`/register`、`/registered_number`、`/registered`、`/health_generate`、`/metrics`）为管理/监控请求，统计推理吞吐量时应排除。
+
+---
+
+## Go Map 解析规则
+
+Go 的 `fmt.Sprintf("%v", map)` 输出格式：`map[key1:val1 key2:val2 ...]`
+
+### hitRatios 的特殊挑战
+
+Worker URL 包含 `:`（如 `http://10.52.95.17:9263`），而 Go map 的 key-value 分隔符也是 `:`。
+因此 `hitRatios=map[http://10.52.95.17:9263:100]` 中：
+- URL = `http://10.52.95.17:9263`
+- Ratio = `100`
+
+### 推荐解析方法
+
+**方法 1：正则匹配**（推荐）
+
+提取 `hitRatios=map[` 和 `]` 之间的内容，然后用正则匹配每个 entry：
+
+```
+正则：(http://[^\s:]+:\d+):(\d+)
+```
+
+示例：
+```
+输入：http://10.52.95.17:9263:100 http://10.52.96.143:9867:50
+匹配1：group1=http://10.52.95.17:9263, group2=100
+匹配2：group1=http://10.52.96.143:9867, group2=50
+```
+
+**方法 2：从右分割**
+
+对 map 内容按空格分割每个 token，然后对每个 token 找最后一个 `:` 分割：
+```
+token = "http://10.52.95.17:9263:100"
+lastColon = 最后一个 ":" 的位置
+url = token[:lastColon]  → "http://10.52.95.17:9263"
+ratio = token[lastColon+1:]  → "100"
+```
+
+### 空 map
+
+`hitRatios=map[]` 表示冷启动，没有任何 worker 有匹配的前缀缓存。
+
+### loads map 解析
+
+同样的规则适用于 `loads=map[...]`，value 是负载数：
+```
+loads=map[http://10.52.95.146:9263:20 http://10.52.95.17:9263:20]
+```
+
+### workers 列表解析（stats 行）
+
+`workers: [http://10.52.96.143:9867: running=0, ...]` 格式不同：
+- 用 `,` 分割每个 entry
+- 每个 entry 格式：`http://IP:PORT: running=N`
+- 注意 URL 后面跟的是 `: running=`（带空格），不是 Go map 的 `:val`
+
+---
+
+## 时间戳解析
+
+日志时间戳格式：`YYYY/MM/DD HH:MM:SS`
+
+提取正则：`(\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2})`
+
+用于：
+- 确定日志时间跨度
+- 按时间分窗口（5s、1min 等）
+- 按 quartile 分段统计趋势
diff --git a/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/references/report_templates.md b/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/references/report_templates.md
new file mode 100644
index 00000000000..ebca39be2c4
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/references/report_templates.md
@@ -0,0 +1,201 @@
+# 报告输出模板
+
+本文件包含 cache 命中率分析报告的终端输出模板和详细数据导出模板。
+
+---
+
+## 终端概览报告模板
+
+```
+## Cache Hit Rate Analysis Report
+**File**: <path> | **Lines**: N | **Span**: <start> ~ <end> (<duration>)
+
+### 1. Prefix Hit Ratio (KV Cache 内容复用度)
+  累计平均: XX.X% (被选中 worker)
+  分布:
+    0-20%  ██░░░░░░░░░░░░░░░░░░  X%   (N=xxx)
+   20-40%  ███░░░░░░░░░░░░░░░░░  X%   (N=xxx)
+   40-60%  █████░░░░░░░░░░░░░░░  X%   (N=xxx)
+   60-80%  ████████████░░░░░░░░  X%   (N=xxx)
+  80-100%  ████████████████████  X%   (N=xxx)
+  冷启动率: X.X%
+  趋势: Q1=X% → Q2=X% → Q3=X% → Q4=X% ↑/↓/→
+
+  Prefix Hit Ratio (5s 窗口):
+  100%|                              ·····················
+   80%|                     ····· ···
+   60%|               ·····
+   40%|          ·····
+   20%|    ······
+    0%|····
+      +---+---+---+---+---+---+---+---+---+---→ time
+       18:25 18:26 18:27 18:28 18:29 18:30
+
+### 2. Session Hit Rate (请求级路由粘性)
+  累计: XX.X% (hits=N / total=N)
+  覆盖率: X.X% 的推理请求带 session_id
+  趋势: Q1=X% → Q2=X% → Q3=X% → Q4=X%
+
+  Session Hit Rate (5s 窗口):
+  100%|                                    ····················
+   80%|                          ··········
+   60%|               ···········
+   40%|
+   20%|
+    0%|·······
+      +---+---+---+---+---+---+---+---+---+---→ time
+
+### 3. Per-Worker Cache Stats
+  ┌───────────────────────────┬──────────┬──────────┬─────────────────┐
+  │ Prefill Worker            │ Selected │ Select % │ Avg Hit(Select) │
+  ├───────────────────────────┼──────────┼──────────┼─────────────────┤
+  │ http://10.52.95.17:9263   │   1,234  │  15.2%   │      82%        │
+  │ http://10.52.96.143:9867  │     890  │  11.0%   │      74%        │
+  │ ...                       │    ...   │   ...    │      ...        │
+  └───────────────────────────┴──────────┴──────────┴─────────────────┘
+
+### 4. Scheduling Strategy
+  cache_aware_scoring: N (X%) | fallback: N (X%)
+    fallback reasons: load_imbalanced=N, tokenize_failed=N, not_initialized=N
+  非最优命中选择: X% (负载均衡优先于命中率的比例)
+
+### 5. Diagnosis
+  ✅/⚠/❌ <综合诊断>
+
+### 图表说明（Legend）
+  - Unicode 柱状图：每个区间的请求占比，条越长占比越高
+  - ASCII 折线图：横轴是时间窗口，纵轴是命中率（0-100%）
+  - Q1→Q4 趋势：按时间四等分后的均值变化（↑/↓/→）
+
+📄 详细数据见:
+  - 报告文件: /abs/path/to/skill_output/stat-cache-hitrate/<YYYYMMDD_HHMMSS>/cache_hitrate_report_<timestamp>.md
+    URI: file:///abs/path/to/skill_output/stat-cache-hitrate/<YYYYMMDD_HHMMSS>/cache_hitrate_report_<timestamp>.md
+  - 窗口明细: /abs/path/to/skill_output/stat-cache-hitrate/<YYYYMMDD_HHMMSS>/details/per_window_data.md
+    URI: file:///abs/path/to/skill_output/stat-cache-hitrate/<YYYYMMDD_HHMMSS>/details/per_window_data.md
+  - Session 命中详情: /abs/path/to/skill_output/stat-cache-hitrate/<YYYYMMDD_HHMMSS>/details/session_hit_details.md
+    URI: file:///abs/path/to/skill_output/stat-cache-hitrate/<YYYYMMDD_HHMMSS>/details/session_hit_details.md
+    (含 prefill_urls、worker 切换前后 request_id，以及命中率突降 request_id)
+```
+
+---
+
+## 格式规则
+
+### Unicode 柱状图
+
+- 总宽度 20 个字符
+- `█` 表示已填充部分，`░` 表示空白部分
+- 后跟百分比和绝对数量
+
+```
+计算方法：
+filled = round(percentage / 100 * 20)
+bar = "█" * filled + "░" * (20 - filled)
+output = f"{bar}  {percentage}%   (N={count})"
+```
+
+示例：
+```
+████████████░░░░░░░░  60%   (N=1200)
+██████████████████░░  90%   (N=1800)
+██░░░░░░░░░░░░░░░░░░  10%   (N=200)
+```
+
+### ASCII 折线图
+
+- Y 轴：0-100% 范围，6 行（0%, 20%, 40%, 60%, 80%, 100%）
+- X 轴：时间，标注关键时间点
+- 数据点用 `·` 绘制
+- 坐标轴用 `|` `+` `─` `→`
+
+```
+时间粒度自动调整：
+- 日志跨度 <30min → 5s 原始粒度
+- 日志跨度 <3h → 1min 粒度
+- 日志跨度 >3h → 5min 粒度
+```
+
+图表宽度约 60 列。数据点太多时自动聚合到更粗的粒度。
+
+### 表格
+
+使用 Unicode box-drawing 字符：
+
+```
+┌ ─ ┬ ─ ┐    顶部
+│   │   │    数据行
+├ ─ ┼ ─ ┤    分隔行
+│   │   │    数据行
+└ ─ ┴ ─ ┘    底部
+```
+
+### 趋势箭头
+
+- `↑` — 上升趋势（Q4 > Q1 + 10%）
+- `↓` — 下降趋势（Q4 < Q1 - 10%）
+- `→` — 稳定（变化 < 10%）
+
+---
+
+## 详细数据导出模板
+
+主报告：`skill_output/stat-cache-hitrate/<YYYYMMDD_HHMMSS>/cache_hitrate_report_<YYYYMMDD_HHMMSS>.md`
+每窗口明细：`skill_output/stat-cache-hitrate/<YYYYMMDD_HHMMSS>/details/per_window_data.md`
+
+### 主报告
+
+```markdown
+# Cache Hit Rate Detailed Report
+
+**Generated**: <timestamp>
+**Source**: <log_file_path>
+
+## 1. Per-Worker 完整统计
+
+| Worker | Selected | Select % | Avg Hit (Selected) | Avg Hit (All) | Max Hit |
+|--------|----------|----------|--------------------|----- ---------|---------|
+| http://10.52.95.17:9263 | 1,234 | 15.2% | 82% | 68% | 100% |
+| ... | ... | ... | ... | ... | ... |
+
+## 2. Fallback 明细
+
+### 3.1 load imbalanced (N 次)
+| Time | Loads |
+|------|-------|
+| 20:15:03 | map[...] |
+
+### 3.2 tokenize failed (N 次)
+| Time | Error |
+|------|-------|
+| ... | ... |
+
+## 4. 非最优命中选择明细
+
+| Time | Selected | Selected HR | Best Worker | Best HR | Load Diff |
+|------|----------|-------------|-------------|---------|-----------|
+| 20:15:10 | w1:9263 | 60% | w2:9867 | 85% | w1=5, w2=18 |
+| ... | ... | ... | ... | ... | ... |
+```
+
+---
+
+## --tail 快速查看模板
+
+`--tail` 模式下只输出核心指标：
+
+```
+## Cache Hit Rate (Recent)
+**File**: <path> | **tail <N> lines** | **Span**: <start> ~ <end>
+
+  Prefix Hit Ratio:  XX.X% (avg) | Cold start: X.X%
+  Session Hit Rate:  XX.X% (hits=N/total=N) | Coverage: X.X%
+  Strategy: scoring N (X%) | fallback N (X%)
+
+  Recent trend (1min buckets):
+  100%|          ·····
+   80%|     ·····
+   60%|·····
+      +---+---+---+---+---→
+       -5m  -4m  -3m  -2m  -1m
+
+```
diff --git a/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/scripts/chart.py b/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/scripts/chart.py
new file mode 100644
index 00000000000..cc5534a757d
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/scripts/chart.py
@@ -0,0 +1,249 @@
+#!/usr/bin/env python3
+"""
+Chart — 终端可视化渲染工具
+
+提供 sparkline 折线图、Unicode 柱状图、Markdown 表格的渲染函数。
+所有函数返回字符串（不直接打印），方便组装到报告中。
+
+Python 3 stdlib only，零依赖。
+"""
+
+
+# ════════════════════════════════════════════════════════════════
+# Sparkline 折线图
+# ════════════════════════════════════════════════════════════════
+
+BLOCK_CHARS = " ▁▂▃▄▅▆▇█"
+
+
+def render_sparkline(
+    records, value_field="value", bucket_field="bucket", title=None, y_label=None, y_range=None, width=60
+):
+    """渲染 8 级 Unicode sparkline 折线图。
+
+    Args:
+        records: dict 列表，每个 dict 包含 bucket_field 和 value_field
+        value_field: 数值字段名
+        bucket_field: 时间桶字段名
+        title: 图表标题
+        y_label: Y 轴标签（如 '%'）
+        y_range: Y 轴范围 (min, max) 元组，None 则自动
+        width: 图表宽度（字符数）
+
+    Returns:
+        str: 渲染后的图表文本
+    """
+    if not records:
+        return "  (no data)"
+
+    all_values = []
+    for r in records:
+        v = r.get(value_field)
+        if v is not None:
+            all_values.append(float(v))
+
+    if not all_values:
+        return "  (no numeric data)"
+
+    # Y 轴范围
+    if y_range:
+        y_min, y_max = y_range
+    else:
+        y_min = min(all_values)
+        y_max = max(all_values)
+        if y_max == y_min:
+            y_min = 0 if y_max > 0 else y_max - 1
+            y_max = max(y_max, 1)
+
+    y_span = y_max - y_min if y_max != y_min else 1
+
+    # 降采样
+    n = len(records)
+    if n > width:
+        step = n / width
+        sampled = []
+        for i in range(width):
+            start_idx = int(i * step)
+            end_idx = int((i + 1) * step)
+            chunk = records[start_idx:end_idx]
+            vals = [float(r.get(value_field, 0)) for r in chunk if r.get(value_field) is not None]
+            avg_record = {
+                bucket_field: chunk[0].get(bucket_field, ""),
+                value_field: sum(vals) / len(vals) if vals else 0,
+            }
+            sampled.append(avg_record)
+        records = sampled
+
+    lines = []
+
+    # 标题行
+    def fmt_val(v):
+        if abs(v) >= 1000:
+            return f"{v:.0f}"
+        elif abs(v) >= 10:
+            return f"{v:.1f}"
+        return f"{v:.2f}"
+
+    header_parts = []
+    if title:
+        header_parts.append(title)
+    header_parts.append(f"min={fmt_val(min(all_values))}")
+    header_parts.append(f"max={fmt_val(max(all_values))}")
+    if y_label:
+        header_parts.append(f"({y_label})")
+    lines.append("  " + "  ".join(header_parts))
+
+    # Sparkline 字符
+    spark_chars = []
+    for r in records:
+        v = r.get(value_field)
+        if v is None:
+            spark_chars.append(" ")
+            continue
+        v = float(v)
+        normalized = (v - y_min) / y_span
+        level = max(0, min(8, round(normalized * 8)))
+        spark_chars.append(BLOCK_CHARS[level])
+    lines.append("  " + "".join(spark_chars))
+
+    # X 轴标签
+    data_width = len(records)
+    if data_width > 0:
+
+        def short_bucket(r):
+            b = str(r.get(bucket_field, ""))
+            if " " in b:
+                b = b.split(" ")[-1]
+            return b[:5] if len(b) >= 5 else b
+
+        lbl_width = 6
+        max_labels = max(1, data_width // lbl_width)
+        n_records = len(records)
+
+        if n_records <= 2:
+            indices = list(range(n_records))
+        elif n_records <= max_labels:
+            indices = [0, n_records - 1]
+        else:
+            n_labels = min(5, max(2, max_labels))
+            indices = [int(i * (n_records - 1) / (n_labels - 1)) for i in range(n_labels)]
+
+        label_line = [" "] * (data_width + lbl_width + 2)
+        last_end = -1
+        for idx in indices:
+            lbl = short_bucket(records[idx])
+            pos = idx
+            if pos < last_end:
+                continue
+            for ci, c in enumerate(lbl):
+                p = pos + ci
+                if p < len(label_line):
+                    label_line[p] = c
+            last_end = pos + len(lbl) + 1
+        lines.append("  " + "".join(label_line).rstrip())
+
+    return "\n".join(lines)
+
+
+# ════════════════════════════════════════════════════════════════
+# Unicode 柱状图
+# ════════════════════════════════════════════════════════════════
+
+
+def render_bar(data, bar_width=20, show_count=False):
+    """渲染 Unicode 柱状图。
+
+    Args:
+        data: dict 列表，每个 dict 包含 label, value（百分比 0-100）, 可选 count
+        bar_width: 柱状图宽度（字符数）
+        show_count: 是否显示绝对数量
+
+    Returns:
+        str: 渲染后的图表文本
+    """
+    if not data:
+        return "  (no data)"
+
+    max_label_len = max(len(str(d.get("label", ""))) for d in data)
+    max_label_len = max(max_label_len, 4)
+
+    lines = []
+    for d in data:
+        label = str(d.get("label", ""))
+        value = float(d.get("value", 0))
+        count = d.get("count")
+
+        filled = round(value / 100 * bar_width) if value > 0 else 0
+        filled = max(1, filled) if value > 0 else 0
+        filled = min(bar_width, filled)
+        empty = bar_width - filled
+        bar = "█" * filled + "░" * empty
+
+        line = f"  {label:<{max_label_len}}  {bar} {value:>5.1f}%"
+        if show_count and count is not None:
+            line += f"  (N={count})"
+        lines.append(line)
+
+    return "\n".join(lines)
+
+
+# ════════════════════════════════════════════════════════════════
+# Markdown 表格
+# ════════════════════════════════════════════════════════════════
+
+
+def render_table(data, columns=None, right_align=None):
+    """渲染 Markdown 表格。
+
+    Args:
+        data: dict 列表
+        columns: 列名列表，None 则用第一条记录的所有 key
+        right_align: 右对齐的列名集合
+
+    Returns:
+        str: 渲染后的表格文本
+    """
+    if not data:
+        return "  (no data)"
+
+    if columns is None:
+        columns = list(data[0].keys())
+    if right_align is None:
+        right_align = set()
+
+    # 计算列宽
+    col_widths = {}
+    for col in columns:
+        col_widths[col] = len(col)
+        for row in data:
+            val = str(row.get(col, ""))
+            col_widths[col] = max(col_widths[col], len(val))
+
+    # 表头
+    header_parts = []
+    sep_parts = []
+    for col in columns:
+        w = col_widths[col]
+        if col in right_align:
+            header_parts.append(f" {col:>{w}} ")
+        else:
+            header_parts.append(f" {col:<{w}} ")
+        sep_parts.append("-" * (w + 2))
+
+    lines = []
+    lines.append("|" + "|".join(header_parts) + "|")
+    lines.append("|" + "|".join(sep_parts) + "|")
+
+    # 数据行
+    for row in data:
+        row_parts = []
+        for col in columns:
+            val = str(row.get(col, ""))
+            w = col_widths[col]
+            if col in right_align:
+                row_parts.append(f" {val:>{w}} ")
+            else:
+                row_parts.append(f" {val:<{w}} ")
+        lines.append("|" + "|".join(row_parts) + "|")
+
+    return "\n".join(lines)
diff --git a/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/scripts/log_parser.py b/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/scripts/log_parser.py
new file mode 100644
index 00000000000..bb31235f3fa
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/scripts/log_parser.py
@@ -0,0 +1,424 @@
+#!/usr/bin/env python3
+"""
+Router Log Parser — FastDeploy Go Router 日志解析原语
+
+支持两种调用方式：
+1. 作为模块导入：from log_parser import parse_cache_strategy_line, parse_stats_line
+2. 作为 CLI 工具：grep 'pattern' logfile | python3 log_parser.py parse-cache-strategy
+
+Python 3 stdlib only，零依赖。
+"""
+
+import argparse
+import json
+import re
+import sys
+from datetime import datetime
+
+# ════════════════════════════════════════════════════════════════
+# 通用解析原语
+# ════════════════════════════════════════════════════════════════
+
+
+def parse_go_map(s):
+    """解析 Go fmt.Sprintf('%v', map) 输出：map[key1:val1 key2:val2 ...]
+
+    处理 URL 中冒号与 Go map key-value 分隔符的冲突（从最后一个冒号分割）。
+    空 map 'map[]' 返回空 dict。
+    """
+    inner_match = re.search(r"map\[(.*?)\]", s)
+    if not inner_match:
+        return {}
+    inner = inner_match.group(1).strip()
+    if not inner:
+        return {}
+    result = {}
+    for token in inner.split():
+        idx = token.rfind(":")
+        if idx > 0:
+            key = token[:idx]
+            val_str = token[idx + 1 :]
+            try:
+                result[key] = int(val_str) if "." not in val_str else float(val_str)
+            except ValueError:
+                result[key] = val_str
+    return result
+
+
+# 时间戳：YYYY/MM/DD HH:MM:SS
+TS_RE = re.compile(r"(\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2})")
+
+# ts_ms：2025-01-15 18:25:33.123
+TS_MS_RE = re.compile(r"ts_ms=(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+)")
+
+
+def extract_ts(line):
+    """从日志行提取 YYYY/MM/DD HH:MM:SS 时间戳。"""
+    m = TS_RE.search(line)
+    return m.group(1) if m else None
+
+
+def parse_ts(ts_str):
+    """将 YYYY/MM/DD HH:MM:SS 时间戳解析为 datetime。"""
+    return datetime.strptime(ts_str, "%Y/%m/%d %H:%M:%S")
+
+
+# ════════════════════════════════════════════════════════════════
+# 时间范围过滤
+# ════════════════════════════════════════════════════════════════
+
+import os
+import subprocess
+import tempfile
+
+_FULL_DT_RE = re.compile(r"^(\d{4})[/-](\d{1,2})[/-](\d{1,2})\s+(\d{1,2}):(\d{2})(?::(\d{2}))?$")
+_DATE_ONLY_RE = re.compile(r"^(\d{4})[/-](\d{1,2})[/-](\d{1,2})$")
+_SHORT_DATE_RE = re.compile(r"^(\d{1,2})[/-](\d{1,2})(?:\s+(\d{1,2}):(\d{2})(?::(\d{2}))?)?$")
+_TIME_ONLY_RE = re.compile(r"^(\d{1,2}):(\d{2})(?::(\d{2}))?$")
+
+# 相对时间正则：支持 30m、30分钟、2h、2小时、1d、1天、last 30m、最后30分钟
+_RELATIVE_TIME_RE = re.compile(r"^(?:last|最后)?\s*(\d+)\s*(m|分钟|mins?|h|小时|hours?|d|天|days?)$", re.IGNORECASE)
+
+
+def _parse_relative_time(time_str):
+    """解析相对时间字符串，返回 timedelta。
+
+    支持格式：30m、30分钟、2h、2小时、1d、1天、last 30m、最后30分钟
+    """
+    m = _RELATIVE_TIME_RE.match(time_str.strip())
+    if not m:
+        return None
+
+    value = int(m.group(1))
+    unit = m.group(2).lower()
+
+    if unit.startswith("m") and "in" not in unit:  # m, min, mins
+        from datetime import timedelta
+
+        return timedelta(minutes=value)
+    elif unit.startswith("h"):  # h, hour, hours
+        from datetime import timedelta
+
+        return timedelta(hours=value)
+    else:  # d, day, days
+        from datetime import timedelta
+
+        return timedelta(days=value)
+
+
+def _relative_to_absolute(time_str, log_file, is_end=False):
+    """将相对时间转换为绝对时间，基于日志文件的时间边界。
+
+    - start: 从日志末行时间往前推
+    - end: 直接使用日志末行时间（或当前时间）
+    """
+    relative_delta = _parse_relative_time(time_str)
+    if not relative_delta:
+        return None
+
+    # 获取日志文件末行时间作为基准
+    boundary_ts = _get_log_boundary_ts(log_file, "last")
+    if not boundary_ts:
+        return None
+
+    # 解析为 datetime
+    dt = datetime.strptime(boundary_ts, "%Y/%m/%d %H:%M:%S")
+
+    if is_end:
+        # end 时间：直接使用日志末行时间
+        return boundary_ts
+    else:
+        # start 时间：末行时间减去 duration
+
+        abs_time = dt - relative_delta
+        return abs_time.strftime("%Y/%m/%d %H:%M:%S")
+
+
+def _get_log_boundary_ts(log_file, which="first"):
+    """从日志文件首行或末行提取时间戳。"""
+    cmd = "head" if which == "first" else "tail"
+    try:
+        r = subprocess.run([cmd, "-1", log_file], capture_output=True, text=True, timeout=5)
+        return extract_ts(r.stdout) if r.returncode == 0 else None
+    except (subprocess.TimeoutExpired, FileNotFoundError):
+        return None
+
+
+def complete_time_arg(time_str, log_file, is_end=False):
+    """解析灵活时间输入，补全缺失部分。
+
+    支持格式：
+        'YYYY/MM/DD HH:MM:SS', 'YYYY-MM-DD HH:MM:SS', 'YYYY/MM/DD',
+        'MM/DD', 'MM/DD HH:MM', 'HH:MM:SS', 'HH:MM'
+        相对时间：30m、2h、1d、最后30分钟 等（从日志末行时间算起）
+
+    补全规则：
+        - 缺年份：从日志首行取
+        - 缺日期：从日志末行取
+        - 缺时间：start→00:00:00, end→23:59:59
+        - 相对时间：start 从日志末行往前推，end 直接用日志末行时间
+
+    Returns: 'YYYY/MM/DD HH:MM:SS' 格式字符串
+    """
+    if time_str is None:
+        return None
+    time_str = time_str.strip()
+
+    # Case 0: 相对时间处理（如 "30m"、"最后30分钟"、"2h"）
+    # 从日志文件末行时间开始算起
+    relative_result = _relative_to_absolute(time_str, log_file, is_end)
+    if relative_result:
+        return relative_result
+
+    # Case 1: 完整日期时间
+    m = _FULL_DT_RE.match(time_str)
+    if m:
+        y, mo, d = m.group(1), m.group(2).zfill(2), m.group(3).zfill(2)
+        h, mi = m.group(4).zfill(2), m.group(5)
+        s = (m.group(6) or "00").zfill(2)
+        return f"{y}/{mo}/{d} {h}:{mi}:{s}"
+
+    # Case 2: 仅日期 YYYY/MM/DD
+    m = _DATE_ONLY_RE.match(time_str)
+    if m:
+        y, mo, d = m.group(1), m.group(2).zfill(2), m.group(3).zfill(2)
+        t = "23:59:59" if is_end else "00:00:00"
+        return f"{y}/{mo}/{d} {t}"
+
+    # Case 3: 短日期 MM/DD 或 MM/DD HH:MM[:SS]
+    m = _SHORT_DATE_RE.match(time_str)
+    if m:
+        mo, d = m.group(1).zfill(2), m.group(2).zfill(2)
+        ts = _get_log_boundary_ts(log_file, "first")
+        year = ts[:4] if ts else str(datetime.now().year)
+        if m.group(3):  # 有时间部分
+            h, mi = m.group(3).zfill(2), m.group(4)
+            s = (m.group(5) or "00").zfill(2)
+            return f"{year}/{mo}/{d} {h}:{mi}:{s}"
+        t = "23:59:59" if is_end else "00:00:00"
+        return f"{year}/{mo}/{d} {t}"
+
+    # Case 4: 仅时间 HH:MM[:SS]
+    m = _TIME_ONLY_RE.match(time_str)
+    if m:
+        h, mi = m.group(1).zfill(2), m.group(2)
+        s = (m.group(3) or "00").zfill(2)
+        ts = _get_log_boundary_ts(log_file, "last")
+        date_part = ts[:10] if ts else f"{datetime.now().year}/01/01"
+        return f"{date_part} {h}:{mi}:{s}"
+
+    # Fallback: 原样返回
+    return time_str
+
+
+def filter_file_by_time_range(log_file, start_str=None, end_str=None):
+    """用 awk 按时间范围预过滤日志文件。
+
+    时间戳 YYYY/MM/DD HH:MM:SS 天然字典序可比，直接用 awk 字符串比较。
+    无时间戳的行（如 panic 堆栈续行）保留。
+
+    Args:
+        log_file: 原日志文件路径
+        start_str: 起始时间 'YYYY/MM/DD HH:MM:SS'（含），或 None
+        end_str: 结束时间 'YYYY/MM/DD HH:MM:SS'（含），或 None
+
+    Returns:
+        tuple: (file_path, is_temp) — is_temp=True 时调用方负责删除
+    """
+    if not start_str and not end_str:
+        return (log_file, False)
+
+    tmp = tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False, prefix="router_filtered_")
+    tmp.close()
+
+    awk_script = r"""{
+        ts = ""
+        if (match($0, /[0-9]{4}\/[0-9]{2}\/[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/)) {
+            ts = substr($0, RSTART, RLENGTH)
+        }
+        if (ts == "") { print; next }
+        if ((start == "" || ts >= start) && (end == "" || ts <= end)) print
+    }"""
+
+    cmd = ["awk", "-v", f'start={start_str or ""}', "-v", f'end={end_str or ""}', awk_script, log_file]
+
+    try:
+        with open(tmp.name, "w") as outf:
+            result = subprocess.run(cmd, stdout=outf, stderr=subprocess.PIPE, text=True, timeout=120)
+        if result.returncode != 0:
+            os.unlink(tmp.name)
+            return (log_file, False)
+    except (subprocess.TimeoutExpired, OSError):
+        if os.path.exists(tmp.name):
+            os.unlink(tmp.name)
+        return (log_file, False)
+
+    return (tmp.name, True)
+
+
+# Context tag：[session_id:...], [request_id:...], [trace_id:...], [req_id:...]
+TAG_RE = re.compile(r"\[(session_id|request_id|trace_id|req_id):([^\]]+)\]")
+
+
+def extract_tags(line):
+    """从日志行提取 context tag。"""
+    return {m.group(1): m.group(2) for m in TAG_RE.finditer(line)}
+
+
+# ════════════════════════════════════════════════════════════════
+# Cache-Aware 策略行解析（类别 A）
+# ════════════════════════════════════════════════════════════════
+
+URL_RE = r"(?:https?://)?[A-Za-z0-9.-]+(?::\d+)?"
+STRATEGY_RE = re.compile(r"final strategy:\s*(\w+)")
+SELECTED_RE = re.compile(rf"selected=({URL_RE})(?:,|\s|$)")
+REASON_RE = re.compile(r"reason:\s*(.+?)(?:,\s*loads=|\.?\s*ts_ms=|$)")
+
+
+def parse_cache_strategy_line(line):
+    """解析 cache-aware prefill 策略行。
+
+    输入示例：
+        [INFO] 2026/03/30 20:16:57 logger.go:79: ... cache-aware prefill: final strategy:
+        cache_aware_scoring, selected=http://10.52.95.17:9263, loads=map[...], hitRatios=map[...]
+
+    返回 dict 或 None（如果不是策略行）。
+    """
+    sm = STRATEGY_RE.search(line)
+    if not sm:
+        return None
+
+    ts = extract_ts(line)
+    strategy = sm.group(1)
+    record = {"ts": ts or "", "strategy": strategy}
+
+    # selected worker URL
+    sel_m = SELECTED_RE.search(line)
+    if sel_m:
+        record["selected"] = sel_m.group(1)
+
+    # reason（仅 process_tokens fallback）
+    reason_m = REASON_RE.search(line)
+    if reason_m and strategy == "process_tokens":
+        record["reason"] = reason_m.group(1).strip()
+
+    # hitRatios map
+    hr_match = re.search(r"hitRatios=(map\[.*?\])", line)
+    if hr_match:
+        hit_ratios = parse_go_map(hr_match.group(1))
+        record["hitRatios"] = hit_ratios
+        if "selected" in record:
+            record["selected_hitRatio"] = hit_ratios.get(record["selected"], 0)
+    else:
+        record["hitRatios"] = {}
+        if "selected" in record:
+            record["selected_hitRatio"] = 0
+
+    # loads map
+    loads_match = re.search(r"loads=(map\[.*?\])", line)
+    if loads_match:
+        record["loads"] = parse_go_map(loads_match.group(1))
+
+    # ts_ms（精确到毫秒的调度时间戳）
+    ts_ms_m = TS_MS_RE.search(line)
+    if ts_ms_m:
+        record["ts_ms"] = ts_ms_m.group(1)
+
+    # context tags
+    tags = extract_tags(line)
+    if tags:
+        record["tags"] = tags
+
+    return record
+
+
+# ════════════════════════════════════════════════════════════════
+# Stats 行解析（类别 B）
+# ════════════════════════════════════════════════════════════════
+
+TOTAL_RUNNING_RE = re.compile(r"total_running=(\d+)")
+WORKER_RUNNING_RE = re.compile(rf"({URL_RE}): running=(\d+)")
+CACHE_HR_RE = re.compile(r"cache_hit_rate=([\d.]+)%\s*\(hits=(\d+)/total=(\d+)\)")
+
+
+def parse_stats_line(line):
+    """解析 [stats] 统计行。
+
+    输入示例：
+        [INFO] 2026/03/30 20:14:38 logger.go:79: [stats] total_running=14,
+        workers: [...], cache_hit_rate=0.00% (hits=0/total=7)
+
+    注意：hits 和 total 是 per-interval 的（每 5s 重置），累计值必须 sum 所有行。
+
+    返回 dict 或 None（如果不是 stats 行）。
+    """
+    if "[stats]" not in line:
+        return None
+
+    ts = extract_ts(line)
+    record = {"ts": ts or ""}
+
+    # total_running
+    tr_m = TOTAL_RUNNING_RE.search(line)
+    if tr_m:
+        record["total_running"] = int(tr_m.group(1))
+
+    # per-worker running
+    workers = {}
+    for wm in WORKER_RUNNING_RE.finditer(line):
+        workers[wm.group(1)] = int(wm.group(2))
+    record["workers"] = workers
+
+    # cache_hit_rate + hits/total
+    chr_m = CACHE_HR_RE.search(line)
+    if chr_m:
+        record["cache_hit_rate"] = float(chr_m.group(1))
+        record["hits"] = int(chr_m.group(2))
+        record["total"] = int(chr_m.group(3))
+
+    return record
+
+
+# ════════════════════════════════════════════════════════════════
+# CLI 入口
+# ════════════════════════════════════════════════════════════════
+
+
+def _cli_parse_stream(parse_fn):
+    """通用 CLI 流式解析：从 stdin 读入日志行，输出 JSON Lines 到 stdout。"""
+    parsed = 0
+    skipped = 0
+    for line in sys.stdin:
+        line = line.rstrip("\n")
+        record = parse_fn(line)
+        if record:
+            print(json.dumps(record, ensure_ascii=False))
+            parsed += 1
+        else:
+            skipped += 1
+    print(f"Parsed {parsed} lines, skipped {skipped}", file=sys.stderr)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="FastDeploy Go Router Log Parser",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    sub = parser.add_subparsers(dest="command")
+
+    sub.add_parser("parse-cache-strategy", help="解析 cache-aware 策略行 → JSON Lines")
+    sub.add_parser("parse-stats", help="解析 [stats] 统计行 → JSON Lines")
+
+    args = parser.parse_args()
+
+    if args.command == "parse-cache-strategy":
+        _cli_parse_stream(parse_cache_strategy_line)
+    elif args.command == "parse-stats":
+        _cli_parse_stream(parse_stats_line)
+    else:
+        parser.print_help()
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/scripts/session_analysis.py b/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/scripts/session_analysis.py
new file mode 100644
index 00000000000..7de5b7f6042
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/scripts/session_analysis.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python3
+"""
+Session 维度分析：聚合每个 session 的命中率、worker 切换与突降请求。
+"""
+
+from collections import defaultdict
+
+
+def compute_session_details(strategies, strip_scheme):
+    """按 session_id（优先）或 trace_id（兜底）统计命中详情。"""
+
+    def _req_id_from_tags(tags, fallback):
+        return tags.get("request_id") or tags.get("req_id") or tags.get("trace_id") or fallback
+
+    session_records = defaultdict(list)
+    for idx, rec in enumerate(strategies):
+        if rec.get("strategy") != "cache_aware_scoring":
+            continue
+        tags = rec.get("tags", {}) or {}
+        session_id = tags.get("session_id")
+        trace_id = tags.get("trace_id")
+        identity = session_id or trace_id
+        if not identity:
+            continue
+        session_records[identity].append((idx, rec))
+
+    rows = []
+    for identity, items in session_records.items():
+        items.sort(key=lambda x: (x[1].get("ts_ms", ""), x[1].get("ts", ""), x[0]))
+        recs = [r for _, r in items]
+        hits = [int(r.get("selected_hitRatio", 0)) for r in recs]
+        if not hits:
+            continue
+
+        non_first = hits[1:]
+        avg_excl_first = round(sum(non_first) / len(non_first), 1) if non_first else "-"
+        workers = {r.get("selected", "") for r in recs if r.get("selected")}
+
+        prefill_urls = []
+        for r in recs:
+            u = r.get("selected", "")
+            if u and u not in prefill_urls:
+                prefill_urls.append(u)
+
+        switch_events = []
+        sharp_drop_req_ids = []
+        for i in range(1, len(recs)):
+            prev_r = recs[i - 1]
+            curr_r = recs[i]
+            prev_url = prev_r.get("selected", "")
+            curr_url = curr_r.get("selected", "")
+            prev_tags = prev_r.get("tags", {}) or {}
+            curr_tags = curr_r.get("tags", {}) or {}
+            prev_req = _req_id_from_tags(prev_tags, f"idx#{i}")
+            curr_req = _req_id_from_tags(curr_tags, f"idx#{i+1}")
+
+            if prev_url and curr_url and prev_url != curr_url:
+                switch_events.append(f"{prev_req}->{curr_req} ({strip_scheme(prev_url)}→{strip_scheme(curr_url)})")
+
+            prev_hit = int(prev_r.get("selected_hitRatio", 0))
+            curr_hit = int(curr_r.get("selected_hitRatio", 0))
+            if curr_hit - prev_hit <= -30:
+                sharp_drop_req_ids.append(f"{curr_req} ({prev_hit}%→{curr_hit}%)")
+
+        rows.append(
+            {
+                "session": identity,
+                "id_type": "session_id" if recs[0].get("tags", {}).get("session_id") else "trace_id",
+                "first_ts": recs[0].get("ts", "-"),
+                "last_ts": recs[-1].get("ts", "-"),
+                "req_count": len(hits),
+                "first_hit": f"{hits[0]}%",
+                "avg_hit(excl_first)": f"{avg_excl_first}%" if avg_excl_first != "-" else "-",
+                "max_hit": f"{max(hits)}%",
+                "min_hit": f"{min(hits)}%",
+                "all_hits": ", ".join(f"{h}%" for h in hits),
+                "sticky": "yes" if len(workers) <= 1 else "no",
+                "unique_workers": len(workers),
+                "prefill_url_count": len(prefill_urls),
+                "prefill_urls": " | ".join(strip_scheme(u) for u in prefill_urls),
+                "switch_req_pairs": " ; ".join(switch_events) if switch_events else "-",
+                "sharp_drop_request_ids": " ; ".join(sharp_drop_req_ids) if sharp_drop_req_ids else "-",
+            }
+        )
+
+    rows.sort(key=lambda r: (r["req_count"], r["session"]), reverse=True)
+    return rows
+
+
+def summarize_session_details(rows):
+    """生成 session 级摘要指标。"""
+    if not rows:
+        return {
+            "total_sessions": 0,
+            "multi_req": 0,
+            "single_req": 0,
+            "sticky_multi": 0,
+            "non_sticky_multi": 0,
+            "non_first_avg": 0,
+            "non_first_total": 0,
+        }
+
+    multi_req_rows = [r for r in rows if r["req_count"] > 1]
+    sticky_multi = [r for r in multi_req_rows if r["sticky"] == "yes"]
+    non_sticky_multi = [r for r in multi_req_rows if r["sticky"] == "no"]
+
+    non_first_vals = []
+    for r in rows:
+        hit_tokens = [h.strip().rstrip("%") for h in r["all_hits"].split(",") if h.strip()]
+        nums = [int(x) for x in hit_tokens if x.isdigit()]
+        if len(nums) > 1:
+            non_first_vals.extend(nums[1:])
+
+    return {
+        "total_sessions": len(rows),
+        "multi_req": len(multi_req_rows),
+        "single_req": len(rows) - len(multi_req_rows),
+        "sticky_multi": len(sticky_multi),
+        "non_sticky_multi": len(non_sticky_multi),
+        "non_first_avg": round(sum(non_first_vals) / len(non_first_vals), 2) if non_first_vals else 0,
+        "non_first_total": len(non_first_vals),
+    }
diff --git a/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/scripts/stat_cache_hitrate.py b/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/scripts/stat_cache_hitrate.py
new file mode 100644
index 00000000000..7c6e0d40ecf
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/scripts/stat_cache_hitrate.py
@@ -0,0 +1,1111 @@
+#!/usr/bin/env python3
+"""
+stat_cache_hitrate — FastDeploy Go Router Cache 命中率统计工具
+
+统计三层 cache 命中率指标：
+  1. Prefix Hit Ratio  — KV Cache 内容复用度
+  2. Session Hit Rate   — 请求级路由粘性
+  3. Per-Worker Stats   — 各 worker 缓存利用排名
+
+用法：
+  python3 stat_cache_hitrate.py <log_file> [--tail N|Nk|Nw] [--output DIR]
+"""
+
+import argparse
+import json
+import os
+import re
+import subprocess
+import sys
+from collections import defaultdict
+from datetime import datetime
+from pathlib import Path
+from urllib.parse import quote
+
+# 同目录模块导入
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from chart import render_bar, render_sparkline, render_table
+from log_parser import (
+    complete_time_arg,
+    filter_file_by_time_range,
+    parse_cache_strategy_line,
+    parse_stats_line,
+    parse_ts,
+)
+from session_analysis import compute_session_details, summarize_session_details
+from stats import compute_statistics, count_by, time_bucket
+from window_utils import merge_blank_window_rows
+
+
+def _strip_scheme(url):
+    return re.sub(r"^https?://", "", url)
+
+
+def _build_path_links(path):
+    """返回绝对路径与 file URI，兼容空格/中文路径。"""
+    abs_path = str(Path(path).resolve())
+    file_uri = "file://" + quote(abs_path, safe="/:-._~")
+    return abs_path, file_uri
+
+
+def _format_half_running(total_running):
+    """将 stats.total_running 归一化为 prefill 口径（decode+prefill 合计 / 2）。"""
+    normalized = total_running / 2
+    if float(normalized).is_integer():
+        return str(int(normalized))
+    return f"{normalized:.1f}"
+
+
+def _render_scrollable_tsv(data, columns):
+    """渲染单行 TSV 文本，适合在 Markdown 查看器里横向滚动。"""
+    if not data:
+        return "```tsv\n(no data)\n```"
+
+    def _escape(v):
+        return str(v).replace("\t", " ").replace("\n", "\\n")
+
+    lines = ["\t".join(columns)]
+    for row in data:
+        lines.append("\t".join(_escape(row.get(col, "")) for col in columns))
+    return "```tsv\n" + "\n".join(lines) + "\n```"
+
+
+def _render_markdown_table(data, columns, align_right=None):
+    """渲染 Markdown 表格，便于在终端/文档中直接阅读。"""
+    if not data:
+        return "_(no data)_"
+
+    align_right = align_right or set()
+
+    def _escape_md(v):
+        return str(v).replace("\n", "<br>").replace("|", "\\|")
+
+    matrix = []
+    for row in data:
+        matrix.append([_escape_md(row.get(c, "")) for c in columns])
+
+    widths = []
+    for i, col in enumerate(columns):
+        max_cell = max((len(r[i]) for r in matrix), default=0)
+        widths.append(max(len(col), max_cell))
+
+    def _format_cell(text, width, right=False):
+        return text.rjust(width) if right else text.ljust(width)
+
+    header_cells = [_format_cell(c, widths[i]) for i, c in enumerate(columns)]
+    header = "| " + " | ".join(header_cells) + " |"
+
+    align_cells = []
+    for i, c in enumerate(columns):
+        w = max(widths[i], 3)
+        if c in align_right:
+            align_cells.append("-" * (w - 1) + ":")
+        else:
+            align_cells.append(":" + "-" * (w - 1))
+    align = "| " + " | ".join(align_cells) + " |"
+
+    rows = []
+    for row_cells in matrix:
+        padded = [_format_cell(cell, widths[i], right=(columns[i] in align_right)) for i, cell in enumerate(row_cells)]
+        rows.append("| " + " | ".join(padded) + " |")
+    return "\n".join([header, align] + rows)
+
+
+def _truncate_text(v, limit=72):
+    s = str(v)
+    return s if len(s) <= limit else s[: limit - 1] + "…"
+
+
+def _seq_label(n):
+    return f"S{n:03d}"
+
+
+def _extract_seq_num(seq_id):
+    return int(str(seq_id).lstrip("S") or 0)
+
+
+def _summarize_id_type_ranges(rows_with_seq):
+    """基于序号连续区间汇总 id_type，便于在报告开头快速识别口径。"""
+    if not rows_with_seq:
+        return []
+
+    ranges = []
+    current_type = rows_with_seq[0].get("id_type", "session_id")
+    start_id = rows_with_seq[0]["id"]
+    end_id = start_id
+    start_ts = rows_with_seq[0].get("first_ts", "-")
+    end_ts = rows_with_seq[0].get("last_ts", "-")
+
+    for row in rows_with_seq[1:]:
+        row_type = row.get("id_type", "session_id")
+        row_id = row["id"]
+        if row_type == current_type and _extract_seq_num(row_id) == _extract_seq_num(end_id) + 1:
+            end_id = row_id
+            end_ts = row.get("last_ts", end_ts)
+            continue
+
+        ranges.append((start_id, end_id, current_type, start_ts, end_ts))
+        current_type = row_type
+        start_id = row_id
+        end_id = row_id
+        start_ts = row.get("first_ts", "-")
+        end_ts = row.get("last_ts", "-")
+
+    ranges.append((start_id, end_id, current_type, start_ts, end_ts))
+    return ranges
+
+
+# ════════════════════════════════════════════════════════════════
+# Phase 1: 日志读取
+# ════════════════════════════════════════════════════════════════
+
+
+def count_lines(filepath):
+    """快速统计文件行数。"""
+    result = subprocess.run(["wc", "-l", filepath], capture_output=True, text=True)
+    if result.returncode == 0:
+        return int(result.stdout.strip().split()[0])
+    return 0
+
+
+def read_lines(filepath, tail=None):
+    """读取日志文件，支持 tail 模式。"""
+    if tail is not None:
+        # 按行数 tail
+        n = int(tail)
+        result = subprocess.run(["tail", "-n", str(n), filepath], capture_output=True, text=True)
+        return result.stdout.splitlines() if result.returncode == 0 else []
+    return _read_file_lines(filepath)
+
+
+def _read_file_lines(filepath):
+    with open(filepath, "r", errors="replace") as f:
+        return f.readlines()
+
+
+# ════════════════════════════════════════════════════════════════
+# Phase 2: 日志提取与解析
+# ════════════════════════════════════════════════════════════════
+
+STRATEGY_PATTERN = "cache-aware prefill: final strategy:"
+STATS_PATTERN = "[stats]"
+INFERENCE_PATTERNS = ["] [POST] /v1/chat/completions ", "] [POST] /v1/completions "]
+
+
+def _shell_quote(s):
+    """Shell 引号转义，安全处理含空格、括号、单引号的路径。"""
+    return "'" + s.replace("'", "'\\''") + "'"
+
+
+def grep_and_parse(filepath, grep_pattern, parse_cmd, tail=None):
+    """大文件模式：grep 过滤 + log_parser.py CLI 管道解析。"""
+    parser_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "log_parser.py")
+
+    if tail:
+        grep_cmd = f"tail -n {tail} {_shell_quote(filepath)} | grep -F {_shell_quote(grep_pattern)} | python3 {_shell_quote(parser_path)} {parse_cmd}"
+    else:
+        grep_cmd = f"grep -F {_shell_quote(grep_pattern)} {_shell_quote(filepath)} | python3 {_shell_quote(parser_path)} {parse_cmd}"
+
+    result = subprocess.run(grep_cmd, shell=True, capture_output=True, text=True)
+    records = []
+    for line in result.stdout.strip().splitlines():
+        if line:
+            try:
+                records.append(json.loads(line))
+            except json.JSONDecodeError:
+                pass
+    return records
+
+
+def grep_count(filepath, grep_pattern, tail=None):
+    """大文件模式：grep 计数。"""
+    if tail:
+        cmd = f"tail -n {tail} {_shell_quote(filepath)} | grep -cE {_shell_quote(grep_pattern)}"
+    else:
+        cmd = f"grep -cE {_shell_quote(grep_pattern)} {_shell_quote(filepath)}"
+
+    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
+    try:
+        return int(result.stdout.strip())
+    except ValueError:
+        return 0
+
+
+def extract_data(filepath, tail=None):
+    """提取并解析日志数据，根据文件大小自动选择策略。"""
+    total = count_lines(filepath)
+
+    if total < 5000:
+        # 小文件：内存中处理
+        lines = read_lines(filepath, tail)
+        strategy_recs = [r for l in lines if (r := parse_cache_strategy_line(l)) is not None]
+        stats_recs = [r for l in lines if (r := parse_stats_line(l)) is not None]
+        inference_count = sum(1 for l in lines if any(p in l for p in INFERENCE_PATTERNS))
+        return strategy_recs, stats_recs, inference_count, len(lines)
+    else:
+        # 大文件：grep + subprocess
+        strategy_recs = grep_and_parse(filepath, STRATEGY_PATTERN, "parse-cache-strategy", tail)
+        stats_recs = grep_and_parse(filepath, STATS_PATTERN, "parse-stats", tail)
+        inference_count = grep_count(filepath, r"\] \[POST\] /v1/chat/completions |\] \[POST\] /v1/completions ", tail)
+        line_count = int(tail) if tail is not None else total
+        return strategy_recs, stats_recs, inference_count, line_count
+
+
+# ════════════════════════════════════════════════════════════════
+# Phase 3: 三层指标计算
+# ════════════════════════════════════════════════════════════════
+
+
+def compute_prefix_hitrate(strategies):
+    """计算第一层：Prefix Hit Ratio。"""
+    scoring_recs = [r for r in strategies if r.get("strategy") == "cache_aware_scoring"]
+    if not scoring_recs:
+        return {"mean": 0, "stats": None, "distribution": [], "cold_start_rate": 0, "trend": [], "count": 0}
+
+    hit_ratios = [r.get("selected_hitRatio", 0) for r in scoring_recs]
+    cold_starts = sum(1 for r in scoring_recs if not r.get("hitRatios"))
+
+    stats = compute_statistics(hit_ratios, distribution_spec="0-20,20-40,40-60,60-80,80-100")
+    trend = time_bucket(scoring_recs, "auto", [("selected_hitRatio", "mean")])
+
+    return {
+        "mean": stats["mean"],
+        "stats": stats,
+        "distribution": stats.get("distribution", []),
+        "cold_start_rate": round(cold_starts / len(scoring_recs) * 100, 1) if scoring_recs else 0,
+        "trend": trend,
+        "count": len(scoring_recs),
+    }
+
+
+def compute_session_hitrate(stats_recs, inference_count):
+    """计算第二层：Session Hit Rate。"""
+    total_hits = sum(r.get("hits", 0) for r in stats_recs)
+    total_total = sum(r.get("total", 0) for r in stats_recs)
+
+    session_hr = round(total_hits / total_total * 100, 1) if total_total else 0
+
+    # 趋势：每个窗口的 hits/total
+    trend = time_bucket(stats_recs, "auto", [("hits", "sum"), ("total", "sum")])
+    for t in trend:
+        h = t.get("hits_sum", 0)
+        tot = t.get("total_sum", 0)
+        t["value"] = round(h / tot * 100, 1) if tot else 0
+
+    return {
+        "rate": session_hr,
+        "hits": total_hits,
+        "total": total_total,
+        "inference_count": inference_count,
+        "trend": trend,
+    }
+
+
+def compute_per_worker_stats(strategies):
+    """计算第三层：Per-Worker Cache Stats。"""
+    scoring_recs = [r for r in strategies if r.get("strategy") == "cache_aware_scoring"]
+    if not scoring_recs:
+        return []
+
+    worker_data = defaultdict(lambda: {"selected_count": 0, "hit_ratios": []})
+    total_scoring = len(scoring_recs)
+
+    for r in scoring_recs:
+        selected = r.get("selected", "")
+        if selected:
+            worker_data[selected]["selected_count"] += 1
+            worker_data[selected]["hit_ratios"].append(r.get("selected_hitRatio", 0))
+
+    result = []
+    for worker, data in worker_data.items():
+        avg_hr = round(sum(data["hit_ratios"]) / len(data["hit_ratios"]), 1) if data["hit_ratios"] else 0
+        result.append(
+            {
+                "Worker": _strip_scheme(worker),
+                "Selected": data["selected_count"],
+                "Select%": f"{round(data['selected_count'] / total_scoring * 100, 1)}%",
+                "AvgHitRatio": f"{avg_hr}%",
+            }
+        )
+
+    result.sort(key=lambda x: x["Selected"], reverse=True)
+    return result
+
+
+def compute_scheduling_stats(strategies):
+    """计算调度策略概况。"""
+    if not strategies:
+        return {"scoring_count": 0, "fallback_count": 0, "scoring_pct": 0, "fallback_reasons": [], "suboptimal_pct": 0}
+
+    scoring = [r for r in strategies if r.get("strategy") == "cache_aware_scoring"]
+    fallback = [r for r in strategies if r.get("strategy") == "process_tokens"]
+
+    # Fallback 原因分类
+    fallback_reasons = count_by(fallback, "reason") if fallback else []
+
+    # 非最优命中选择比例
+    suboptimal = 0
+    for r in scoring:
+        hit_ratios = r.get("hitRatios", {})
+        if not hit_ratios:
+            continue
+        selected_hr = r.get("selected_hitRatio", 0)
+        max_hr = max(hit_ratios.values()) if hit_ratios else 0
+        if selected_hr < max_hr:
+            suboptimal += 1
+
+    total = len(strategies)
+    return {
+        "scoring_count": len(scoring),
+        "fallback_count": len(fallback),
+        "scoring_pct": round(len(scoring) / total * 100, 1) if total else 0,
+        "fallback_reasons": fallback_reasons,
+        "suboptimal_count": suboptimal,
+        "suboptimal_pct": round(suboptimal / len(scoring) * 100, 1) if scoring else 0,
+    }
+
+
+def cross_diagnose(prefix_hr, session_hr):
+    """交叉诊断矩阵。"""
+    p_high = prefix_hr["mean"] >= 60
+    s_high = session_hr["rate"] >= 60
+
+    if s_high and p_high:
+        return {
+            "icon": "\u2705",
+            "summary": "cache-aware 策略运行良好",
+            "detail": "Session 粘性好，KV cache 实际复用度高",
+        }
+    elif s_high and not p_high:
+        return {
+            "icon": "\u26a0\ufe0f",
+            "summary": "Session 粘性好但 Prefix HR 低",
+            "detail": "prompt 内容变化大，同 worker 的 KV cache 实际复用低",
+        }
+    elif not s_high and p_high:
+        return {
+            "icon": "\u26a0\ufe0f",
+            "summary": "换 worker 频繁但 Prefix HR 尚可",
+            "detail": "负载均衡分散了请求，但新 worker 也有类似前缀缓存",
+        }
+    else:
+        return {
+            "icon": "\u274c",
+            "summary": "命中率全面偏低",
+            "detail": "负载均衡强制分散或缓存未预热，建议检查 worker 数量和 session 分配策略",
+        }
+
+
+# ════════════════════════════════════════════════════════════════
+# Phase 4: 报告格式化
+# ════════════════════════════════════════════════════════════════
+
+
+def _quartile_trend(trend, value_field):
+    """将趋势数据分为 4 个 quartile，计算每段均值。"""
+    if not trend:
+        return ""
+    n = len(trend)
+    if n < 4:
+        values = [t.get(value_field, 0) for t in trend]
+        avg = round(sum(values) / len(values), 1) if values else 0
+        return f"{avg}%"
+
+    q_size = n // 4
+    quartiles = []
+    for i in range(4):
+        start = i * q_size
+        end = start + q_size if i < 3 else n
+        vals = [t.get(value_field, 0) for t in trend[start:end]]
+        quartiles.append(round(sum(vals) / len(vals), 1) if vals else 0)
+
+    arrow = (
+        "\u2191" if quartiles[3] > quartiles[0] + 10 else "\u2193" if quartiles[3] < quartiles[0] - 10 else "\u2192"
+    )
+    return f"Q1={quartiles[0]}% \u2192 Q2={quartiles[1]}% \u2192 Q3={quartiles[2]}% \u2192 Q4={quartiles[3]}% {arrow}"
+
+
+def format_full_report(
+    filepath, line_count, prefix_hr, session_hr, per_worker, scheduling, diagnosis, time_span=None, window_rows=None
+):
+    """格式化完整终端报告。"""
+    parts = []
+
+    # 标题
+    span_str = time_span or ""
+    parts.append("## Cache Hit Rate Report")
+    parts.append(f"**File**: {filepath} | **Lines**: {line_count:,}")
+    if span_str:
+        parts.append(f"**Span**: {span_str}")
+    parts.append("")
+
+    # 图表说明
+    parts.append("### 图表说明（如何解读）")
+    parts.append("  - Unicode 柱状图：每行代表一个 Prefix HR 区间（如 60-80%），条越长表示该区间请求占比越高。")
+    parts.append("  - ASCII 折线图：横轴是时间窗口，纵轴是命中率（0-100%）；越靠上表示命中率越高。")
+    parts.append("  - 趋势 Q1→Q4：把时间均分为四段，比较首尾；↑ 上升，↓ 下降，→ 基本稳定。")
+    parts.append("")
+
+    # 1. Prefix Hit Ratio
+    parts.append("### 1. Prefix Hit Ratio (KV Cache 内容复用度)")
+    if prefix_hr["stats"]:
+        _ = prefix_hr["stats"]
+        parts.append(f'  累计平均: {prefix_hr["mean"]}% (被选中 worker, N={prefix_hr["count"]})')
+        parts.append("  分布:")
+
+        dist_data = [
+            {"label": d["range"] + "%", "value": d["pct"], "count": d["count"]} for d in prefix_hr["distribution"]
+        ]
+        parts.append("  Unicode 柱状图（Prefix HR 分布）:")
+        parts.append(render_bar(dist_data, show_count=True))
+
+        parts.append(f'  冷启动率: {prefix_hr["cold_start_rate"]}%')
+
+        trend_str = _quartile_trend(prefix_hr["trend"], "selected_hitRatio_mean")
+        if trend_str:
+            parts.append(f"  趋势: {trend_str}")
+
+        # Sparkline
+        if prefix_hr["trend"]:
+            sparkline_data = [
+                {"bucket": t["bucket"], "value": t.get("selected_hitRatio_mean", 0)} for t in prefix_hr["trend"]
+            ]
+            parts.append("")
+            parts.append("  ASCII 折线图（Prefix HR 趋势）:")
+            parts.append(render_sparkline(sparkline_data, title="Prefix HR Trend", y_label="%", y_range=(0, 100)))
+    else:
+        parts.append("  (无 cache_aware_scoring 数据)")
+    parts.append("")
+
+    # 2. Session Hit Rate
+    parts.append("### 2. Session Hit Rate (请求级路由粘性)")
+    parts.append(f'  累计: {session_hr["rate"]}% (hits={session_hr["hits"]} / total={session_hr["total"]})')
+    trend_str = _quartile_trend(session_hr["trend"], "value")
+    if trend_str:
+        parts.append(f"  趋势: {trend_str}")
+
+    if session_hr["trend"]:
+        parts.append("")
+        parts.append("  ASCII 折线图（Session HR 趋势）:")
+        parts.append(render_sparkline(session_hr["trend"], title="Session HR Trend", y_label="%", y_range=(0, 100)))
+    parts.append("")
+
+    # 3. Per-Worker
+    parts.append("### 3. Per-Worker Cache Stats")
+    if per_worker:
+        parts.append(
+            render_table(
+                per_worker,
+                columns=["Worker", "Selected", "Select%", "AvgHitRatio"],
+                right_align={"Selected", "Select%", "AvgHitRatio"},
+            )
+        )
+    else:
+        parts.append("  (无数据)")
+    parts.append("")
+
+    # 4. Scheduling Strategy
+    parts.append("### 4. Scheduling Strategy")
+    parts.append(
+        f'  cache_aware_scoring: {scheduling["scoring_count"]} ({scheduling["scoring_pct"]}%)'
+        f' | fallback: {scheduling["fallback_count"]}'
+    )
+    if scheduling["fallback_reasons"]:
+        reasons = ", ".join(f'{r["value"]}={r["count"]}' for r in scheduling["fallback_reasons"])
+        parts.append(f"    fallback reasons: {reasons}")
+    parts.append(
+        f'  非最优命中选择: {scheduling["suboptimal_pct"]}%'
+        f' ({scheduling.get("suboptimal_count", 0)} 次, 负载均衡优先于命中率)'
+    )
+    parts.append("")
+
+    # 5. Diagnosis
+    parts.append("### 5. Diagnosis")
+    parts.append(f'  {diagnosis["icon"]} {diagnosis["summary"]}')
+    parts.append(f'  {diagnosis["detail"]}')
+
+    # 6. 每窗口明细预览
+    if window_rows:
+        parts.append("")
+        parts.append("### 6. 每5s窗口明细预览（前10行）")
+        parts.append(
+            render_table(
+                window_rows[:10],
+                columns=["Time", "Prefix HR", "Session HR", "Scoring", "Fallback", "Total Running (prefill≈stats/2)"],
+                right_align={"Scoring", "Fallback", "Total Running (prefill≈stats/2)"},
+            )
+        )
+
+    return "\n".join(parts)
+
+
+def format_tail_report(filepath, line_count, prefix_hr, session_hr, scheduling):
+    """格式化 --tail 精简报告。"""
+    parts = []
+    parts.append("## Cache Hit Rate (Recent)")
+    parts.append(f"**File**: {filepath} | **tail {line_count} lines**")
+    parts.append("")
+    parts.append(f'  Prefix Hit Ratio:  {prefix_hr["mean"]}% (avg) | Cold start: {prefix_hr["cold_start_rate"]}%')
+    parts.append(f'  Session Hit Rate:  {session_hr["rate"]}% (hits={session_hr["hits"]}/total={session_hr["total"]})')
+    parts.append(
+        f'  Strategy: scoring {scheduling["scoring_count"]} ({scheduling["scoring_pct"]}%)'
+        f' | fallback {scheduling["fallback_count"]}'
+    )
+
+    # Sparkline
+    if prefix_hr["trend"]:
+        parts.append("")
+        sparkline_data = [
+            {"bucket": t["bucket"], "value": t.get("selected_hitRatio_mean", 0)} for t in prefix_hr["trend"]
+        ]
+        parts.append(render_sparkline(sparkline_data, title="Recent Prefix HR", y_label="%", y_range=(0, 100)))
+        parts.append("  说明: 折线越靠上表示对应时间窗口 Prefix HR 越高。")
+
+    return "\n".join(parts)
+
+
+def build_per_window_rows(strategies, stats_recs):
+    """构建每窗口明细行，用于终端预览和 details 导出。"""
+    time_data = defaultdict(
+        lambda: {
+            "prefix_vals": [],
+            "hits": 0,
+            "total": 0,
+            "scoring": 0,
+            "fallback": 0,
+            "running": 0,
+            "has_running": False,
+        }
+    )
+    for r in strategies:
+        ts = r.get("ts", "")
+        if r.get("strategy") == "cache_aware_scoring":
+            time_data[ts]["scoring"] += 1
+            time_data[ts]["prefix_vals"].append(r.get("selected_hitRatio", 0))
+        else:
+            time_data[ts]["fallback"] += 1
+
+    for r in stats_recs:
+        ts = r.get("ts", "")
+        time_data[ts]["hits"] += r.get("hits", 0)
+        time_data[ts]["total"] += r.get("total", 0)
+        if "total_running" in r:
+            time_data[ts]["running"] += r.get("total_running", 0)
+            time_data[ts]["has_running"] = True
+
+    rows = []
+    for ts in sorted(time_data.keys()):
+        d = time_data[ts]
+        short_ts = ts.split(" ")[-1] if " " in ts else ts
+        if d["prefix_vals"]:
+            prefix_mean = round(sum(d["prefix_vals"]) / len(d["prefix_vals"]), 1)
+            prefix_hr = f"{prefix_mean}%"
+        else:
+            prefix_hr = "-"
+
+        if d["total"] > 0:
+            session_val = round(d["hits"] / d["total"] * 100, 1)
+            session_hr = f'{session_val}% ({d["hits"]}/{d["total"]})'
+        else:
+            session_hr = "-"
+
+        running = _format_half_running(d["running"]) if d["has_running"] else "-"
+        rows.append(
+            {
+                "Time": short_ts,
+                "Prefix HR": prefix_hr,
+                "Session HR": session_hr,
+                "Scoring": str(d["scoring"]),
+                "Fallback": str(d["fallback"]),
+                "Total Running (prefill≈stats/2)": running,
+            }
+        )
+    return rows
+
+
+def save_detailed_report(
+    filepath,
+    strategies,
+    stats_recs,
+    prefix_hr,
+    session_hr,
+    per_worker,
+    scheduling,
+    diagnosis,
+    output_dir,
+    time_span=None,
+):
+    """导出详细数据 Markdown 文件。
+
+    主报告包含 Per-Worker 统计和 Fallback 明细。
+    每窗口明细数据拆分到 details/per_window_data.md。
+    """
+    summary_dir = os.path.join(output_dir, "summary")
+    details_dir = os.path.join(output_dir, "detail")
+    os.makedirs(summary_dir, exist_ok=True)
+    os.makedirs(details_dir, exist_ok=True)
+    output_path = os.path.join(summary_dir, "cache_hitrate_report.md")
+
+    parts = []
+    parts.append("# Cache Hit Rate Detailed Report")
+    parts.append(f'**Generated**: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
+    parts.append(f"**Source**: {filepath}")
+    if time_span:
+        parts.append(f"**Span**: {time_span}")
+    parts.append("")
+
+    parts.append("## 图表说明（Legend）")
+    parts.append("- **Unicode 柱状图**: 展示 Prefix HR 分布，`█` 越多说明该命中率区间占比越高。")
+    parts.append("- **ASCII 折线图**: 展示命中率随时间变化，横轴为时间窗口，纵轴为命中率（0-100%）。")
+    parts.append("- **Q1~Q4 趋势**: 将观察区间均分四段，反映整体走向（↑/↓/→）。")
+    parts.append("")
+
+    # 1) 主指标摘要（与终端一致，避免“只在终端可见”）
+    parts.append("## 1. Key Metrics Summary")
+    parts.append("")
+    parts.append("### Prefix Hit Ratio")
+    if prefix_hr["stats"]:
+        parts.append(f'- 累计平均: **{prefix_hr["mean"]}%** (N={prefix_hr["count"]})')
+        parts.append(f'- 冷启动率: **{prefix_hr["cold_start_rate"]}%**')
+        trend_str = _quartile_trend(prefix_hr["trend"], "selected_hitRatio_mean")
+        if trend_str:
+            parts.append(f"- 趋势: {trend_str}")
+        dist_data = [
+            {"label": d["range"] + "%", "value": d["pct"], "count": d["count"]} for d in prefix_hr["distribution"]
+        ]
+        parts.append("")
+        parts.append("```text")
+        parts.append("Unicode 柱状图（Prefix HR 分布）")
+        parts.append(render_bar(dist_data, show_count=True))
+        if prefix_hr["trend"]:
+            sparkline_data = [
+                {"bucket": t["bucket"], "value": t.get("selected_hitRatio_mean", 0)} for t in prefix_hr["trend"]
+            ]
+            parts.append("")
+            parts.append("ASCII 折线图（Prefix HR 趋势）")
+            parts.append(render_sparkline(sparkline_data, title="Prefix HR Trend", y_label="%", y_range=(0, 100)))
+        parts.append("```")
+    else:
+        parts.append("- (无 cache_aware_scoring 数据)")
+    parts.append("")
+
+    parts.append("### Session Hit Rate")
+    parts.append(f'- 累计: **{session_hr["rate"]}%** (hits={session_hr["hits"]}/total={session_hr["total"]})')
+    trend_str = _quartile_trend(session_hr["trend"], "value")
+    if trend_str:
+        parts.append(f"- 趋势: {trend_str}")
+    if session_hr["trend"]:
+        parts.append("")
+        parts.append("```text")
+        parts.append("ASCII 折线图（Session HR 趋势）")
+        parts.append(render_sparkline(session_hr["trend"], title="Session HR Trend", y_label="%", y_range=(0, 100)))
+        parts.append("```")
+    parts.append("")
+
+    parts.append("### Scheduling Strategy")
+    parts.append(
+        f'- cache_aware_scoring: **{scheduling["scoring_count"]} ({scheduling["scoring_pct"]}%)**'
+        f' | fallback: **{scheduling["fallback_count"]}**'
+    )
+    parts.append(
+        f'- 非最优命中选择: **{scheduling["suboptimal_pct"]}%**'
+        f' ({scheduling.get("suboptimal_count", 0)} 次, 负载均衡优先于命中率)'
+    )
+    parts.append(f'- Diagnosis: {diagnosis["icon"]} {diagnosis["summary"]}；{diagnosis["detail"]}')
+    parts.append("")
+
+    # 2) Per-Worker 完整统计
+    parts.append("## 2. Per-Worker 完整统计")
+    parts.append("")
+    if per_worker:
+        parts.append(
+            render_table(
+                per_worker,
+                columns=["Worker", "Selected", "Select%", "AvgHitRatio"],
+                right_align={"Selected", "Select%", "AvgHitRatio"},
+            )
+        )
+    parts.append("")
+
+    # 3) Fallback 明细
+    if scheduling["fallback_reasons"]:
+        parts.append("## 3. Fallback 明细")
+        for reason in scheduling["fallback_reasons"]:
+            parts.append(f'- **{reason["value"]}**: {reason["count"]} 次 ({reason["pct"]}%)')
+        parts.append("")
+
+    # 每窗口明细 → 拆分到 details/
+    window_rows = build_per_window_rows(strategies, stats_recs)
+    window_rows_merged = merge_blank_window_rows(window_rows)
+    session_rows = compute_session_details(strategies, _strip_scheme)
+    session_summary = summarize_session_details(session_rows)
+
+    if window_rows:
+        # 主报告中添加引用
+        parts.append(
+            f"> 每5s窗口明细数据（原始 {len(window_rows)} 条，合并后 {len(window_rows_merged)} 条）:"
+            " [../detail/per_window_data.md](../detail/per_window_data.md)"
+        )
+        parts.append("")
+
+        # 写入 details 子目录
+        detail_parts = ["# 每5s窗口明细数据", ""]
+        detail_parts.append(
+            "> 注：连续空窗口（Prefix/Session 都为空、且 Scoring/Fallback=0）已按 3 行格式合并展示（起始/合并说明/结束）。"
+        )
+        detail_parts.append("")
+        detail_parts.append(
+            render_table(
+                window_rows_merged,
+                columns=["Time", "Prefix HR", "Session HR", "Scoring", "Fallback", "Total Running (prefill≈stats/2)"],
+                right_align={"Scoring", "Fallback", "Total Running (prefill≈stats/2)"},
+            )
+        )
+        detail_parts.append("")
+
+        detail_path = os.path.join(details_dir, "per_window_data.md")
+        with open(detail_path, "w") as f:
+            f.write("\n".join(detail_parts))
+
+        if session_rows:
+            parts.append(
+                f"> Session 命中详情 ({len(session_rows)} sessions): [../detail/session_hit_details.md](../detail/session_hit_details.md)"
+            )
+            parts.append("")
+
+            all_rows_with_seq = []
+            for i, r in enumerate(session_rows, start=1):
+                all_rows_with_seq.append({**r, "id": _seq_label(i)})
+            id_type_ranges = _summarize_id_type_ranges(all_rows_with_seq)
+            seq_map = {r["session"]: r["id"] for r in all_rows_with_seq}
+            ts_starts = [r.get("first_ts", "-") for r in all_rows_with_seq if r.get("first_ts", "-") != "-"]
+            ts_ends = [r.get("last_ts", "-") for r in all_rows_with_seq if r.get("last_ts", "-") != "-"]
+
+            session_parts = ["# Session 命中详情", ""]
+            overall_start_ts = min(ts_starts) if ts_starts else "-"
+            overall_end_ts = max(ts_ends) if ts_ends else "-"
+            session_parts.append("## 时间范围")
+            session_parts.append(f"- 分析覆盖时间段: `{overall_start_ts} ~ {overall_end_ts}`")
+            session_parts.append("")
+            session_parts.append("## id_type 摘要")
+            if len(id_type_ranges) == 1:
+                start_id, end_id, id_type, range_start_ts, range_end_ts = id_type_ranges[0]
+                if start_id == end_id:
+                    session_parts.append(f"- `{start_id}`: `{id_type}` (`{range_start_ts} ~ {range_end_ts}`)")
+                else:
+                    session_parts.append(f"- `{start_id}~{end_id}`: `{id_type}` (`{range_start_ts} ~ {range_end_ts}`)")
+            else:
+                for start_id, end_id, id_type, range_start_ts, range_end_ts in id_type_ranges:
+                    if start_id == end_id:
+                        session_parts.append(f"- `{start_id}`: `{id_type}` (`{range_start_ts} ~ {range_end_ts}`)")
+                    else:
+                        session_parts.append(
+                            f"- `{start_id}~{end_id}`: `{id_type}` (`{range_start_ts} ~ {range_end_ts}`)"
+                        )
+            session_parts.append("")
+            session_parts.append("## 概览")
+            session_parts.append("- 字段说明：`avg-hit` = `avg_hit(excl_first)`（去除首请求后的平均命中率）")
+            session_parts.append(f'- Total sessions: **{session_summary["total_sessions"]}**')
+            session_parts.append(
+                f'- Sessions with >1 request: **{session_summary["multi_req"]}**'
+                f' | single request: **{session_summary["single_req"]}**'
+            )
+            if session_summary["multi_req"] > 0:
+                sticky_pct = round(session_summary["sticky_multi"] / session_summary["multi_req"] * 100, 1)
+                session_parts.append(
+                    f'- Sticky (multi-request): **{session_summary["sticky_multi"]} ({sticky_pct}%)**'
+                    f' | non-sticky: **{session_summary["non_sticky_multi"]}**'
+                )
+            session_parts.append(
+                f'- Non-first request avg hit: **{session_summary["non_first_avg"]}%**'
+                f' (N={session_summary["non_first_total"]})'
+            )
+            session_parts.append("")
+            focus_columns = [
+                "id",
+                "req_count",
+                "sticky",
+                "purl_cnt",
+                "avg-hit",
+                "max_hit",
+                "min_hit",
+                "switch_reqids",
+            ]
+            session_parts.append("## 优先排查 Session（Top 20）")
+            prioritized_rows = sorted(
+                session_rows,
+                key=lambda r: (
+                    0 if r.get("sticky") == "no" else 1,
+                    int(str(r.get("min_hit", "0")).rstrip("%") or 0),
+                    -int(r.get("req_count", 0)),
+                ),
+            )[:20]
+            compact_rows = []
+
+            for r in prioritized_rows:
+                sid = seq_map.get(r["session"], "-")
+                compact_rows.append(
+                    {
+                        "id": sid,
+                        "req_count": r["req_count"],
+                        "sticky": r["sticky"],
+                        "purl_cnt": r.get("prefill_url_count", 0),
+                        "avg-hit": r["avg_hit(excl_first)"],
+                        "max_hit": r["max_hit"],
+                        "min_hit": r["min_hit"],
+                        "switch_reqids": f"[查看](#switch-{sid.lower()})" if r["switch_req_pairs"] != "-" else "-",
+                    }
+                )
+            session_parts.append(
+                _render_markdown_table(compact_rows, focus_columns, align_right={"req_count", "purl_cnt"})
+            )
+            session_parts.append("")
+
+            session_columns = focus_columns
+            all_rows_for_table = []
+            for r in all_rows_with_seq:
+                sid = r["id"]
+                all_rows_for_table.append(
+                    {
+                        "id": sid,
+                        "req_count": r["req_count"],
+                        "sticky": r["sticky"],
+                        "purl_cnt": r.get("prefill_url_count", 0),
+                        "avg-hit": r["avg_hit(excl_first)"],
+                        "max_hit": r["max_hit"],
+                        "min_hit": r["min_hit"],
+                        "switch_reqids": f"[查看](#switch-{sid.lower()})" if r["switch_req_pairs"] != "-" else "-",
+                    }
+                )
+            session_parts.append("## 全量明细（Markdown 表格）")
+            session_parts.append(
+                _render_markdown_table(
+                    all_rows_for_table,
+                    session_columns,
+                    align_right={"req_count", "purl_cnt"},
+                )
+            )
+            session_parts.append("")
+
+            session_parts.append("## 序号与会话ID映射")
+            map_rows = [
+                {
+                    "id": r["id"],
+                    "session_or_trace_id": r["session"],
+                }
+                for r in all_rows_with_seq
+            ]
+            session_parts.append(_render_markdown_table(map_rows, ["id", "session_or_trace_id"]))
+            session_parts.append("")
+
+            session_parts.append("## 切换 reqid 明细（可跳转）")
+            for r in all_rows_with_seq:
+                session_parts.append(f'### switch-{r["id"].lower()}')
+                session_parts.append(f'- ID: **{r["id"]}**')
+                session_parts.append(f'- 会话标识: `{r["session"]}` ({r.get("id_type", "session_id")})')
+                session_parts.append(f'- 时间段: `{r.get("first_ts", "-")} ~ {r.get("last_ts", "-")}`')
+                session_parts.append(f'- switch_req_pairs: {r["switch_req_pairs"]}')
+                session_parts.append(f'- sharp_drop_request_ids: {r["sharp_drop_request_ids"]}')
+                session_parts.append("")
+
+            session_path = os.path.join(details_dir, "session_hit_details.md")
+            with open(session_path, "w") as f:
+                f.write("\n".join(session_parts))
+
+    with open(output_path, "w") as f:
+        f.write("\n".join(parts))
+
+    return output_path
+
+
+# ════════════════════════════════════════════════════════════════
+# 时间跨度计算
+# ════════════════════════════════════════════════════════════════
+
+
+def compute_time_span(strategies, stats_recs):
+    """从数据中计算时间跨度字符串。"""
+    all_ts = []
+    for r in strategies + stats_recs:
+        ts = r.get("ts", "")
+        if ts:
+            try:
+                all_ts.append(parse_ts(ts))
+            except ValueError:
+                pass
+    if len(all_ts) < 2:
+        return None
+    t_min = min(all_ts)
+    t_max = max(all_ts)
+    duration = t_max - t_min
+    hours = int(duration.total_seconds() // 3600)
+    minutes = int((duration.total_seconds() % 3600) // 60)
+    start = t_min.strftime("%Y-%m-%d %H:%M:%S")
+    end = t_max.strftime("%Y-%m-%d %H:%M:%S")
+    if hours > 0:
+        return f"{start} ~ {end} ({hours}h{minutes}m)"
+    return f"{start} ~ {end} ({minutes}m)"
+
+
+# ════════════════════════════════════════════════════════════════
+# CLI 入口
+# ════════════════════════════════════════════════════════════════
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="FastDeploy Go Router Cache 命中率统计",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    parser.add_argument("log_file", help="日志文件路径")
+    parser.add_argument(
+        "--tail",
+        nargs="?",
+        const="2000",
+        help="只分析尾部数据（支持 2000、1k、1w 等行数写法）。按时间请使用 --start/--end",
+    )
+    parser.add_argument(
+        "--output", default=None, help="详细报告输出目录（默认：skill_output/stat-cache-hitrate/<timestamp>/）"
+    )
+    parser.add_argument(
+        "--start", default=None, help='起始时间（如 "16:00:00"、"03/31 16:00"、"2026/03/31 16:00:00"）'
+    )
+    parser.add_argument("--end", default=None, help='结束时间（如 "17:00:00"、"03/31 17:00"、"2026/03/31 17:00:00"）')
+    return parser.parse_args()
+
+
+def parse_tail_arg(tail_str):
+    """解析 --tail 参数，返回行数 int。支持数字及 k/w 缩写。"""
+    if tail_str is None:
+        return None
+
+    s = str(tail_str).strip().lower()
+    if not s:
+        raise ValueError("--tail 不能为空")
+
+    m = re.fullmatch(r"(\d+)([kw])?", s)
+    if not m:
+        raise ValueError("不支持的 --tail 格式：请使用 2000、1k、1w 等行数写法。按时间请改用 --start/--end")
+
+    value = int(m.group(1))
+    unit = m.group(2)
+    if unit == "k":
+        value *= 1000
+    elif unit == "w":
+        value *= 10000
+
+    if value <= 0:
+        raise ValueError("--tail 行数必须 > 0")
+    return value
+
+
+def main():
+    args = parse_args()
+
+    # 验证文件存在
+    if not os.path.isfile(args.log_file):
+        print(f"Error: 文件不存在: {args.log_file}", file=sys.stderr)
+        sys.exit(1)
+
+    # --tail 与 --start/--end 不能混用（两者是不同的范围选择方式）
+    if args.tail and (args.start or args.end):
+        print("Error: --tail 与 --start/--end 不能同时使用，请选择其一", file=sys.stderr)
+        sys.exit(1)
+
+    try:
+        tail = parse_tail_arg(args.tail)
+    except ValueError as e:
+        print(f"Error: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    # 时间范围预过滤（--start 和 --end 可单独或同时指定）
+    import atexit
+
+    log_file = args.log_file
+    if args.start or args.end:
+        start_ts = complete_time_arg(args.start, log_file, is_end=False) if args.start else None
+        end_ts = complete_time_arg(args.end, log_file, is_end=True) if args.end else None
+        filtered_path, is_temp = filter_file_by_time_range(log_file, start_ts, end_ts)
+        if is_temp:
+            atexit.register(lambda p=filtered_path: os.unlink(p) if os.path.exists(p) else None)
+        log_file = filtered_path
+        print(f'时间范围过滤: {start_ts or "..."} ~ {end_ts or "..."}', file=sys.stderr)
+
+    # Phase 2: 提取 + 解析
+    strategy_recs, stats_recs, inference_count, line_count = extract_data(log_file, tail)
+
+    if not strategy_recs and not stats_recs:
+        print(
+            "Warning: 未找到 cache-aware 策略行或 [stats] 行。" "请确认日志文件包含 Go Router 日志。", file=sys.stderr
+        )
+        sys.exit(0)
+
+    # Phase 3: 计算三层指标
+    prefix_hr = compute_prefix_hitrate(strategy_recs)
+    session_hr = compute_session_hitrate(stats_recs, inference_count)
+    per_worker = compute_per_worker_stats(strategy_recs)
+    scheduling = compute_scheduling_stats(strategy_recs)
+    diagnosis = cross_diagnose(prefix_hr, session_hr)
+
+    # Phase 4: 输出
+    # 无论 tail 还是全量模式，都生成详细报告
+    run_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    if args.output:
+        output_base = args.output
+    else:
+        script_dir = os.path.dirname(os.path.abspath(__file__))
+        golang_router_root = os.path.normpath(os.path.join(script_dir, "..", "..", "..", ".."))
+        output_base = os.path.join(golang_router_root, "skill_output", "stat-cache-hitrate")
+    output_dir = os.path.join(output_base, run_timestamp)
+
+    time_span = compute_time_span(strategy_recs, stats_recs)
+    window_rows = build_per_window_rows(strategy_recs, stats_recs)
+
+    if tail is not None:
+        # tail 精简模式：打印摘要 + 生成详细报告
+        print(format_tail_report(args.log_file, line_count, prefix_hr, session_hr, scheduling))
+    else:
+        # 全量模式：打印完整报告
+        print(
+            format_full_report(
+                args.log_file,
+                line_count,
+                prefix_hr,
+                session_hr,
+                per_worker,
+                scheduling,
+                diagnosis,
+                time_span,
+                window_rows=window_rows,
+            )
+        )
+
+    # 导出详细报告（tail 和全量都生成）
+    report_path = save_detailed_report(
+        args.log_file,
+        strategy_recs,
+        stats_recs,
+        prefix_hr,
+        session_hr,
+        per_worker,
+        scheduling,
+        diagnosis,
+        output_dir,
+        time_span=time_span,
+    )
+    print("\n\U0001f4c4 详细数据见:")
+    report_abs, report_uri = _build_path_links(report_path)
+    print(f"  - 报告文件: [{report_abs}]({report_uri})")
+    details_path = os.path.join(output_dir, "detail", "per_window_data.md")
+    if os.path.exists(details_path):
+        details_abs, details_uri = _build_path_links(details_path)
+        print(f"  - 窗口明细: [{details_abs}]({details_uri})")
+    session_detail_path = os.path.join(output_dir, "detail", "session_hit_details.md")
+    if os.path.exists(session_detail_path):
+        session_abs, session_uri = _build_path_links(session_detail_path)
+        print(f"  - Session 明细: [{session_abs}]({session_uri})")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/scripts/stats.py b/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/scripts/stats.py
new file mode 100644
index 00000000000..a197ee7aff0
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/scripts/stats.py
@@ -0,0 +1,278 @@
+#!/usr/bin/env python3
+"""
+Stats — 通用统计计算工具
+
+提供百分位数、分布、时间窗口聚合、分组计数等通用统计函数。
+不含任何业务逻辑或日志格式依赖。
+
+Python 3 stdlib only，零依赖。
+"""
+
+import math
+from collections import defaultdict
+from datetime import datetime, timedelta
+
+# ════════════════════════════════════════════════════════════════
+# 百分位数与基础统计
+# ════════════════════════════════════════════════════════════════
+
+
+def percentile(sorted_vals, p):
+    """从已排序列表计算第 p 百分位数（线性插值）。"""
+    if not sorted_vals:
+        return 0.0
+    n = len(sorted_vals)
+    k = (p / 100.0) * (n - 1)
+    f = math.floor(k)
+    c = math.ceil(k)
+    if f == c:
+        return sorted_vals[int(k)]
+    return sorted_vals[f] * (c - k) + sorted_vals[c] * (k - f)
+
+
+def compute_statistics(values, percentiles_list=None, distribution_spec=None):
+    """计算一组数值的统计量。
+
+    Args:
+        values: 数值列表
+        percentiles_list: 要计算的百分位数列表，默认 [50, 90, 95, 99]
+        distribution_spec: 分布区间规格字符串，如 '0-20,20-40,40-60,60-80,80-100'
+
+    Returns:
+        dict: {count, min, max, mean, sum, stddev, p50, p90, ..., distribution}
+    """
+    if percentiles_list is None:
+        percentiles_list = [50, 90, 95, 99]
+
+    if not values:
+        result = {"count": 0, "min": 0, "max": 0, "mean": 0, "sum": 0, "stddev": 0}
+        for p in percentiles_list:
+            result[f"p{p}"] = 0
+        if distribution_spec is not None:
+            result["distribution"] = []
+        return result
+
+    sorted_vals = sorted(values)
+    n = len(sorted_vals)
+    total = sum(sorted_vals)
+    mean = total / n
+    variance = sum((x - mean) ** 2 for x in sorted_vals) / n
+    stddev = math.sqrt(variance)
+
+    result = {
+        "count": n,
+        "min": round(sorted_vals[0], 3),
+        "max": round(sorted_vals[-1], 3),
+        "mean": round(mean, 3),
+        "sum": round(total, 3),
+        "stddev": round(stddev, 3),
+    }
+
+    for p in percentiles_list:
+        result[f"p{p}"] = round(percentile(sorted_vals, p), 3)
+
+    if distribution_spec is not None:
+        result["distribution"] = compute_distribution(sorted_vals, distribution_spec)
+
+    return result
+
+
+def compute_distribution(sorted_vals, spec_str):
+    """根据区间规格计算分布直方图。
+
+    spec_str 示例：'0-20,20-40,40-60,60-80,80-100'
+    每个区间是左闭右开 [lo, hi)。
+    """
+    buckets = _parse_distribution_spec(spec_str)
+    n = len(sorted_vals)
+    result = []
+    for b in buckets:
+        if b[0] == "lt":
+            count = sum(1 for v in sorted_vals if v < b[1])
+            label = b[2]
+        elif b[0] == "gt":
+            count = sum(1 for v in sorted_vals if v > b[1])
+            label = b[2]
+        elif b[0] == "range":
+            count = sum(1 for v in sorted_vals if b[1] <= v < b[2])
+            label = b[3]
+        else:
+            continue
+        result.append({"range": label, "count": count, "pct": round(count / n * 100, 1) if n else 0})
+    return result
+
+
+def _parse_distribution_spec(spec_str):
+    """解析分布区间规格：'<100,100-500,>1000' → bucket 定义列表。"""
+    buckets = []
+    for part in spec_str.split(","):
+        part = part.strip()
+        if part.startswith("<"):
+            buckets.append(("lt", float(part[1:]), part))
+        elif part.startswith(">"):
+            buckets.append(("gt", float(part[1:]), part))
+        elif "-" in part:
+            lo, hi = part.split("-", 1)
+            buckets.append(("range", float(lo), float(hi), part))
+    return buckets
+
+
+# ════════════════════════════════════════════════════════════════
+# 时间窗口聚合
+# ════════════════════════════════════════════════════════════════
+
+
+def time_bucket(records, window="auto", agg_specs=None, ts_field="ts"):
+    """按时间窗口聚合记录。
+
+    Args:
+        records: dict 列表，每个 dict 必须有 ts_field 字段
+        window: 窗口大小 '5s'/'1m'/'5m'/'auto'
+        agg_specs: 聚合规格列表 [(field, func), ...]，如 [('selected_hitRatio', 'mean')]
+                   func 支持：count, sum, mean, min, max, pNN
+        ts_field: 时间戳字段名
+
+    Returns:
+        list[dict]: 每个窗口一条记录 {bucket, count, field_func, ...}
+    """
+    if agg_specs is None:
+        agg_specs = [("_", "count")]
+
+    if not records:
+        return []
+
+    window_td = _parse_window(window, records, ts_field)
+
+    # 按窗口分组
+    buckets = defaultdict(list)
+    for r in records:
+        ts_str = r.get(ts_field, "")
+        if not ts_str:
+            continue
+        try:
+            dt = datetime.strptime(ts_str, "%Y/%m/%d %H:%M:%S")
+        except ValueError:
+            continue
+        bucket_dt = _align_to_bucket(dt, window_td)
+        bucket_key = bucket_dt.strftime("%Y/%m/%d %H:%M:%S")
+        buckets[bucket_key].append(r)
+
+    # 按时间排序并聚合
+    result = []
+    for bucket_key in sorted(buckets.keys()):
+        bucket_records = buckets[bucket_key]
+        entry = {"bucket": bucket_key, "count": len(bucket_records)}
+
+        for field, func in agg_specs:
+            if field == "_":
+                if func == "count":
+                    entry["count"] = len(bucket_records)
+                continue
+
+            values = []
+            for r in bucket_records:
+                v = r.get(field)
+                if v is not None:
+                    try:
+                        values.append(float(v))
+                    except (ValueError, TypeError):
+                        pass
+
+            out_key = f"{field}_{func}"
+            entry[out_key] = _aggregate_values(values, func)
+
+        result.append(entry)
+
+    return result
+
+
+def _parse_window(window_str, records, ts_field):
+    """解析窗口字符串为 timedelta。'auto' 根据数据跨度自动选择。"""
+    if window_str == "auto":
+        timestamps = []
+        for r in records:
+            ts_str = r.get(ts_field, "")
+            if ts_str:
+                try:
+                    timestamps.append(datetime.strptime(ts_str, "%Y/%m/%d %H:%M:%S"))
+                except ValueError:
+                    pass
+        if len(timestamps) < 2:
+            return timedelta(minutes=1)
+        span = max(timestamps) - min(timestamps)
+        if span < timedelta(minutes=30):
+            return timedelta(seconds=5)
+        elif span < timedelta(hours=3):
+            return timedelta(minutes=1)
+        else:
+            return timedelta(minutes=5)
+    elif window_str.endswith("s"):
+        return timedelta(seconds=int(window_str[:-1]))
+    elif window_str.endswith("m"):
+        return timedelta(minutes=int(window_str[:-1]))
+    elif window_str.endswith("h"):
+        return timedelta(hours=int(window_str[:-1]))
+    return timedelta(minutes=1)
+
+
+def _align_to_bucket(dt, window_td):
+    """将 datetime 对齐到窗口边界。"""
+    secs = max(1, int(window_td.total_seconds()))
+    epoch = datetime(dt.year, dt.month, dt.day)
+    offset = int((dt - epoch).total_seconds())
+    aligned = (offset // secs) * secs
+    return epoch + timedelta(seconds=aligned)
+
+
+def _aggregate_values(values, func):
+    """用指定函数聚合一组数值。"""
+    if not values:
+        return 0
+    if func == "count":
+        return len(values)
+    elif func == "sum":
+        return round(sum(values), 3)
+    elif func == "mean":
+        return round(sum(values) / len(values), 3)
+    elif func == "min":
+        return round(min(values), 3)
+    elif func == "max":
+        return round(max(values), 3)
+    elif func.startswith("p"):
+        p = int(func[1:])
+        return round(percentile(sorted(values), p), 3)
+    return 0
+
+
+# ════════════════════════════════════════════════════════════════
+# 分组计数
+# ════════════════════════════════════════════════════════════════
+
+
+def count_by(records, field, top_n=None):
+    """按指定字段分组计数。
+
+    Args:
+        records: dict 列表
+        field: 分组字段名
+        top_n: 只返回前 N 个（按计数降序）
+
+    Returns:
+        list[dict]: [{value, count, pct}]，按计数降序排列
+    """
+    counts = defaultdict(int)
+    total = 0
+    for r in records:
+        val = r.get(field)
+        if val is not None:
+            counts[str(val)] += 1
+            total += 1
+
+    result = []
+    for val, count in sorted(counts.items(), key=lambda x: -x[1]):
+        result.append({"value": val, "count": count, "pct": round(count / total * 100, 1) if total else 0})
+
+    if top_n:
+        result = result[:top_n]
+
+    return result
diff --git a/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/scripts/window_utils.py b/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/scripts/window_utils.py
new file mode 100644
index 00000000000..4e09710f6f9
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/stat-cache-hitrate/scripts/window_utils.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+"""
+窗口明细压缩工具：合并连续空窗口，降低 per_window_data.md 噪声。
+"""
+
+RUNNING_COL = "Total Running (prefill≈stats/2)"
+
+
+def _is_blank_window_row(row):
+    """判断是否为空窗口（无 Prefix/Session 明细值）。"""
+    return (
+        row.get("Prefix HR") == "-"
+        and row.get("Session HR") == "-"
+        and row.get("Scoring") in {"0", 0}
+        and row.get("Fallback") in {"0", 0}
+    )
+
+
+def merge_blank_window_rows(rows, min_merge_len=5):
+    """合并连续空窗口，避免明细表被大量 '-' 行淹没。
+
+    对于连续空窗口段（长度 >= min_merge_len），压缩成 3 行：
+      1) 起始时间行
+      2) 合并说明行（含窗口数量）
+      3) 结束时间行
+    """
+    if not rows:
+        return rows
+
+    merged = []
+    i = 0
+    while i < len(rows):
+        if not _is_blank_window_row(rows[i]):
+            merged.append(rows[i])
+            i += 1
+            continue
+
+        j = i
+        while j < len(rows) and _is_blank_window_row(rows[j]):
+            j += 1
+
+        seg_len = j - i
+        if seg_len < min_merge_len:
+            merged.extend(rows[i:j])
+            i = j
+            continue
+
+        start_t = rows[i]["Time"]
+        end_t = rows[j - 1]["Time"]
+        merged.append(
+            {
+                "Time": start_t,
+                "Prefix HR": "-",
+                "Session HR": "-",
+                "Scoring": "0",
+                "Fallback": "0",
+                RUNNING_COL: rows[i].get(RUNNING_COL, "-"),
+            }
+        )
+        merged.append(
+            {
+                "Time": "|",
+                "Prefix HR": "-",
+                "Session HR": f"merged {seg_len} windows",
+                "Scoring": "0",
+                "Fallback": "0",
+                RUNNING_COL: "-",
+            }
+        )
+        merged.append(
+            {
+                "Time": end_t,
+                "Prefix HR": "-",
+                "Session HR": "-",
+                "Scoring": "0",
+                "Fallback": "0",
+                RUNNING_COL: rows[j - 1].get(RUNNING_COL, "-"),
+            }
+        )
+        i = j
+
+    return merged
diff --git a/fastdeploy/golang_router/.claude/skills/troubleshoot/SKILL.md b/fastdeploy/golang_router/.claude/skills/troubleshoot/SKILL.md
new file mode 100644
index 00000000000..ecb27c1436a
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/troubleshoot/SKILL.md
@@ -0,0 +1,164 @@
+---
+name: troubleshoot
+description: >
+  FastDeploy Go Router 综合问题排查 skill。覆盖错误分类、延迟分析、请求追踪、Worker 健康时间线、
+  Cache 调度诊断、负载与计数器分析六个维度。输出按三层问题来源分类：Router 自身、FastDeploy 后端、客户端。
+
+  当用户要求以下操作时触发此 skill：排查 router 问题、分析 router 日志、router 排查、
+  查看 router 状态、综合排查、全量扫描、troubleshoot router、/troubleshoot、
+  分析错误日志、502/503 排查、延迟分析、Worker 健康、负载分析、cache 调度诊断、
+  请求追踪、trace 请求。
+  关键词：troubleshoot、排查、router 问题、全量扫描、综合分析、error、502、latency、
+  health、load、cache、trace、/troubleshoot。
+
+---
+
+# Router Troubleshooting
+
+综合排查 FastDeploy Go Router 问题，输出完整诊断报告。
+
+> IMPORTANT: 执行前务必先读取 `references/log_patterns.md` 了解日志格式和提取规则。错误分类时参考 `references/error_catalog.md`。涉及后端问题时参考 `references/fastdeploy_cross_reference.md`。
+
+## 执行前交互
+
+运行脚本前，Claude 必须按以下顺序向用户确认参数：
+
+### 1. 日志文件路径
+使用 AskUserQuestion 工具向用户询问日志文件路径。提供两个常用快捷选项（客户端会自动提供 Other 自定义输入）：
+- 选项 1: `logs/router.log`（默认）
+- 选项 2: `fd-router.log`（golang_router 根目录）
+
+**重要规则**：
+- 如果用户已经在消息中明确指定了日志路径，直接使用该路径，跳过询问步骤
+- 用户指定路径后不要质疑、推荐替代文件、或以任何理由尝试切换到其他文件
+- 支持绝对路径（如 `/home/user/logs/xxx.log`）和相对路径（如 `logs/fd-router (2).log`）
+
+如果用户直接确认或未指定路径，使用脚本的自动发现逻辑。
+
+### 2. 分析范围
+必须使用 **AskUserQuestion 的离散选项**（不要只发纯文本编号）：
+- 选项 1: `全量分析（默认）` — 分析整个日志文件
+- 选项 2: `尾部分析` — 只分析最近数据（仅支持行数，如 `--tail 5000`）
+- 选项 3: `指定时间段` — 分析特定时间范围内的日志
+
+如果用户未选择，默认使用全量分析。
+
+#### 指定时间段的处理
+
+脚本原生支持 `--start` 和 `--end` 参数，无需手动预过滤。两者可单独或同时指定。
+
+时间格式灵活：支持 `YYYY/MM/DD HH:MM:SS`、`HH:MM:SS`、`HH:MM`、`MM/DD`、`MM/DD HH:MM`。
+缺失部分自动从日志首末行推断（缺年份取首行，缺日期取末行）。
+`--start/--end` 与 `--tail` 互斥。
+`--tail` 仅支持“行数”语义（如 `5000`，也兼容 `1k/1w` 自动换算），不再支持 `30m` 这类时间写法；凡是按时间筛选都使用 `--start/--end`。
+
+当用户选择“指定时间段”时，必须再发起一次 **AskUserQuestion**（离散选项）引导时间输入：
+- 选项 1: `当天（00:00:00 到当前）`（推荐）
+- 选项 2: `自定义时间段`（由用户直接输入起止时间）
+
+用户若通过客户端默认 `Other` 输入时间，则将该输入直接作为时间范围参数解析。
+可补充一条简短示例引导：
+- 示例 1：`16:00-16:30`
+- 示例 2：`03/31 16:00 ~ 03/31 18:00`
+- 示例 3：`2026/03/31 16:00:00`（仅起始）
+
+### 3. 分析模式
+必须使用 **AskUserQuestion 的离散选项**（不要只发纯文本编号）：
+- 选项 1: `完整分析（默认）` — 运行所有维度（errors + latency + health + cache + load）
+- 选项 2: `单维度/多维度分析` — 选择特定维度（errors / latency / health / cache / load），可选多个
+- 选项 3: `请求追踪` — 追踪特定请求 ID
+
+如果用户未选择，默认使用完整分析。
+
+当用户选择“请求追踪”后，**不要再发 AskUserQuestion** 收集 trace ID。
+直接发一条提示并等待用户输入完成后再继续执行即可。
+
+提示文案建议：
+- `请输入要追踪的 ID（支持 trace_id / request_id / session_id，多个用逗号分隔；输入 all 可全量追踪）`
+- 示例：`a1b2c3d4` / `trace-001,trace-002` / `session-abc-123` / `all`
+
+### 4. 输出目录
+诊断报告默认保存到 `skill_output/troubleshoot/<YYYYMMDD_HHMMSS>/`（自动按运行时间创建子目录）。
+用户可通过 `--output` 指定**基目录**，脚本会继续在其下创建 `<YYYYMMDD_HHMMSS>/summary` 与 `<YYYYMMDD_HHMMSS>/detail`，避免覆盖历史明细。
+
+## 用法
+
+脚本路径（相对于 `fastdeploy/golang_router/`）：`.claude/skills/troubleshoot/scripts/`
+
+```bash
+SCRIPTS=.claude/skills/troubleshoot/scripts
+
+# 全量扫描（errors + latency + health + cache + load）
+python3 $SCRIPTS/troubleshoot.py <log_file>
+
+# 单维度分析
+python3 $SCRIPTS/troubleshoot.py <log_file> --errors
+python3 $SCRIPTS/troubleshoot.py <log_file> --latency
+python3 $SCRIPTS/troubleshoot.py <log_file> --health
+python3 $SCRIPTS/troubleshoot.py <log_file> --cache
+python3 $SCRIPTS/troubleshoot.py <log_file> --load
+
+# 请求追踪（需指定 ID，支持逗号分隔多 ID）
+python3 $SCRIPTS/troubleshoot.py <log_file> --trace <ID>
+python3 $SCRIPTS/troubleshoot.py <log_file> --trace "id1,id2"
+python3 $SCRIPTS/troubleshoot.py <log_file> --trace all
+
+# 尾部分析
+python3 $SCRIPTS/troubleshoot.py <log_file> --tail 5000
+# 指定时间段（需要按时间筛选时使用；--start 和 --end 可单独或同时使用）
+python3 $SCRIPTS/troubleshoot.py <log_file> --start "16:00:00" --end "17:00:00"
+python3 $SCRIPTS/troubleshoot.py <log_file> --start "2026/03/31 16:00:00"
+python3 $SCRIPTS/troubleshoot.py <log_file> --start "03/31" --end "03/31 18:00"
+
+# 组合模式
+python3 $SCRIPTS/troubleshoot.py <log_file> --errors --latency
+python3 $SCRIPTS/troubleshoot.py <log_file> --errors --tail 5000
+python3 $SCRIPTS/troubleshoot.py <log_file> --start "16:00" --end "17:00" --errors --latency
+```
+
+默认日志路径：`logs/router.log` → `fd-router.log`
+
+## 输出
+
+- **终端**：简洁三层汇总（Router / FD 后端 / 客户端），含状态码分布、错误 Top N、趋势图
+- **文件**：详细报告导出到 `skill_output/troubleshoot/<YYYYMMDD_HHMMSS>/summary/troubleshoot_report.md`
+  - 逐分钟事件详情拆分到 `detail/health_events.md`
+  - 请求追踪事件链拆分到 `detail/trace/trace_<ID>.md`
+- **Cache 明细要求**：`cache_session_stickiness.md` / `cache_suboptimal.md` / `cache_eviction.md` / `cache_fallback.md` / `cache_cross.md`
+  必须始终生成（即使无异常也写“未发现/样本不足”总结，避免链接缺失）
+- **状态行**：`STATUS: HEALTHY / DEGRADED / CRITICAL`
+
+## 三层诊断框架
+
+| 层 | 典型问题 | 日志特征 |
+|----|---------|---------|
+| Router | Panic、500、Counter 异常、调度瓶颈、Cache 策略不优 | `Panic recovered`、`Failed to encode`、`double-release` |
+| FD 后端 | 502、Worker 下线、高推理延迟、请求卡住 | `Failed to connect`、`Removed unhealthy`、p99 高 |
+| 客户端 | 断连、请求格式错误 | `context canceled`、400 |
+
+## 脚本架构
+
+```
+scripts/
+  log_parser.py    — 日志解析原语（HTTP/Cache/Stats/错误归一化/事件匹配）
+  stats.py         — 通用统计计算（百分位数/时间窗口/分组）
+  chart.py         — 终端可视化（sparkline/柱状图/表格/时间线）
+  troubleshoot.py  — 主编排器
+  analyzers/
+    errors.py      — 错误分类分析
+    latency.py     — 延迟分析
+    health.py      — Worker 健康时间线
+    cache.py       — Cache 调度诊断
+    load.py        — 负载与计数器分析
+    trace.py       — 请求追踪
+```
+
+## 重要规则
+
+1. 大文件 (>5000 行) 用 grep 分类提取，不一次性读取
+2. 每个问题标注来源层（Router / FD 后端 / 客户端）
+3. Cache 命中率数值分析用 `/stat-cache-hitrate`，本 skill 做策略诊断
+4. 分析前读取 `references/log_patterns.md`
+5. 错误查询参考 `references/error_catalog.md`
+6. 后端问题排查参考 `references/fastdeploy_cross_reference.md`
+7. 输出格式参考 `references/report_templates.md`
diff --git a/fastdeploy/golang_router/.claude/skills/troubleshoot/evals/trigger_eval.json b/fastdeploy/golang_router/.claude/skills/troubleshoot/evals/trigger_eval.json
new file mode 100644
index 00000000000..4b961e85b36
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/troubleshoot/evals/trigger_eval.json
@@ -0,0 +1,18 @@
+[
+  {"query": "router 最近频繁 502 和 503，帮我全面排查一下问题", "should_trigger": true},
+  {"query": "帮我 troubleshoot 一下 Go Router，感觉有些请求延迟特别高", "should_trigger": true},
+  {"query": "分析 logs/fd-router.log 里面的错误日志，看看哪些错误最多", "should_trigger": true},
+  {"query": "有几个 Worker 好像不太健康，帮我看看 Worker 健康时间线", "should_trigger": true},
+  {"query": "cache 调度策略最近好像有问题，fallback 比例太高了，诊断一下", "should_trigger": true},
+  {"query": "帮我追踪请求 trace-id-12345，看看这个请求在 router 里经历了什么", "should_trigger": true},
+  {"query": "/troubleshoot 全量扫描 router 日志，给我一份完整的诊断报告", "should_trigger": true},
+  {"query": "router 负载分析一下，有没有 counter 异常或者 double-release 的情况", "should_trigger": true},
+  {"query": "统计一下 cache 命中率是多少，prefix hit ratio 和 session hit rate 各是多少", "should_trigger": false},
+  {"query": "帮我看看 hitRatio 数据，想了解 KV cache 的复用度", "should_trigger": false},
+  {"query": "帮我写一个 Go 的 reverse proxy，要支持负载均衡", "should_trigger": false},
+  {"query": "分析 Kubernetes pod 的日志，看看为什么 OOMKilled", "should_trigger": false},
+  {"query": "FastDeploy 模型部署失败了，帮我看看怎么回事", "should_trigger": false},
+  {"query": "帮我优化一下 Python 代码的性能，跑得太慢了", "should_trigger": false},
+  {"query": "nginx 返回 504 Gateway Timeout，帮我排查原因", "should_trigger": false},
+  {"query": "帮我监控 cache 命中率的实时变化趋势", "should_trigger": false}
+]
diff --git a/fastdeploy/golang_router/.claude/skills/troubleshoot/references/error_catalog.md b/fastdeploy/golang_router/.claude/skills/troubleshoot/references/error_catalog.md
new file mode 100644
index 00000000000..60b4931b546
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/troubleshoot/references/error_catalog.md
@@ -0,0 +1,123 @@
+# Router 错误目录
+
+按 HTTP 状态码和日志级别分类的 Router 错误快速索引。每条含严重程度、根因、影响、排查命令、问题来源层。
+
+---
+
+## 按 HTTP 状态码索引
+
+注意：HTTP 响应体中的错误消息与 logger 输出的 ERROR 消息**可能不同**。
+例如：HTTP 502 响应 `Failed to select worker: {err}` 对应的日志 ERROR 是 `Failed to select mixed worker: {err}`。
+分析时需将两者关联而非简单去重。
+
+### 400 Bad Request
+
+| 错误消息 | 根因 | 来源层 | 排查 |
+|---------|------|-------|------|
+| `Invalid request body: {err}` | 请求体读取失败 | 客户端 | 检查客户端请求格式 |
+| `Invalid JSON format: {err}` | JSON 解析失败 | 客户端 | 检查 JSON 格式 |
+| `DefaultManager is nil` | Manager 未初始化 | Router | 检查 Router 启动日志 |
+
+### 500 Internal Server Error
+
+| 错误消息 | 根因 | 来源层 | 排查 |
+|---------|------|-------|------|
+| `Failed to build disaggregate_info: {err}` | PD 模式配置错误 | Router | 检查 register.yaml 参数 |
+| `Failed to encode modified request: {err}` | 请求编码失败 | Router | 检查请求参数特殊字符 |
+| `Internal server error` (Panic) | Router 代码 bug | Router | 检查 Panic recovered 日志 |
+
+### 502 Bad Gateway
+
+| 错误消息 | 根因 | 来源层 | 排查 |
+|---------|------|-------|------|
+| `Failed to select worker: {err}` | 无可用 Mixed Worker | FD 后端 | `curl /health` 检查后端 |
+| `Failed to select worker pair: {err}` | 无可用 PD Worker | FD 后端 | 检查 prefill/decode 注册状态 |
+| `Failed to connect to backend service: {err}` | 后端不可达 | FD 后端 | `curl {worker_url}/health` |
+
+### 503 Service Unavailable
+
+| 错误消息 | 根因 | 来源层 | 排查 |
+|---------|------|-------|------|
+| `No available prefill/decode workers` | 全部 Worker 不健康 | FD 后端 | 检查部署状态 |
+
+---
+
+## 按日志级别索引
+
+### ERROR 级别
+
+| 消息模板 | 严重程度 | 来源层 | 影响 |
+|---------|---------|-------|------|
+| `Failed to select mixed worker: {err}` | HIGH | FD 后端 | 请求返回 502 |
+| `Failed to select prefill worker: {err}` | HIGH | FD 后端 | 请求返回 502 |
+| `Failed to read register request body: {err}` | MEDIUM | Router | 注册失败 |
+| `Failed to unmarshal register request JSON: {err}` | MEDIUM | Router | 注册失败 |
+| `Failed to create decode request for {url}: {err}` | HIGH | FD 后端 | PD 请求失败 |
+| `Failed to create prefill request for {url}: {err}` | HIGH | FD 后端 | PD 请求失败 |
+| `Decode request failed for {url}: {err}` | HIGH | FD 后端 | PD 请求失败 |
+| `Prefill request failed for {url}: {err}` | HIGH | FD 后端 | PD 请求失败 |
+| `Failed to read request body: {err}` | LOW | 客户端 | 单请求失败 |
+| `Failed to unmarshal request JSON: {err}` | LOW | 客户端 | 单请求失败 |
+| `Failed to select worker pair: {err}` | HIGH | FD 后端 | 请求返回 502 |
+| `Failed to build disaggregate_info: {err}` | HIGH | Router | 请求返回 500 |
+| `Failed to encode modified request: {err}` | HIGH | Router | 请求返回 500 |
+| `Failed to read YAML file config/register.yaml: {err}` | LOW | Router | 启动时未找到可选配置文件（若未使用 register.yaml 可忽略） |
+| `Failed to select worker: {err}` | HIGH | FD 后端 | 请求返回 502 |
+| `Failed to connect to backend service: {err}` | HIGH | FD 后端 | 请求返回 502 |
+| `Request failed (attempt {n}/{max}): {err}` | MEDIUM | FD 后端 | 重试中 |
+| `Failed to create backend request for {url}: {err}` | HIGH | FD 后端 | 请求失败 |
+| `Backend request failed for {url}: {err}` | HIGH | FD 后端 | 请求失败 |
+| `scanner error: {err}` | MEDIUM | FD 后端/客户端 | 流式响应中断（gateway redirect 函数） |
+| `[prefill] scanner error: {err}, message={msg}` | MEDIUM | FD 后端/客户端 | PD 模式 prefill 流式错误 |
+| `copy error: {err}` | MEDIUM | FD 后端/客户端 | 非流式响应中断 |
+| `[prefill] copy error: {err}, message={msg}` | MEDIUM | FD 后端/客户端 | PD 模式 prefill 非流式错误 |
+| `Removed unhealthy prefill/decode/mixed instance: {url}` | HIGH | FD 后端 | Worker 被移除（注意：这是 ERROR 级别） |
+
+### WARN 级别
+
+| 消息模板 | 严重程度 | 来源层 | 影响 |
+|---------|---------|-------|------|
+| `GetRemoteMetrics failed for {url}, falling back to local counter` | LOW | FD 后端 | 调度精度降低 |
+| `release worker: {url} skipped, counter already cleaned up` | LOW | Router | 计数器异常 |
+| `release worker: {url} skipped, counter already zero (possible double-release)` | MEDIUM | Router | 计数器逻辑 bug |
+| `cache-aware prefill: tokenizer failed, fallback to char tokens: {err}` | LOW | Router | cache-aware 精度降低 |
+| `Instance {url} role is unknown` | LOW | Router | 注册角色不识别 |
+
+### INFO 级别（异常相关）
+
+| 消息模板 | 含义 | 关注场景 |
+|---------|------|---------|
+| `unhealthy worker counter preserved (inflight requests): {url}, count: {N}` | 不健康 Worker 仍有 inflight 请求 | 频繁出现说明 Worker 不稳定 |
+| `unhealthy worker token counter preserved (inflight requests): {url}, tokens: {N}` | 不健康 Worker 仍有 token 计数 | 同上 |
+| `cleanup unhealthy worker counter: {url}` | 清理不健康 Worker 的请求计数 | 正常清理 |
+| `cleanup unhealthy worker token counter: {url}` | 清理不健康 Worker 的 token 计数 | 正常清理 |
+| `preserved counters for {N} workers with inflight requests: [...]` | 保留了 N 个 Worker 的计数器 | N 大说明多 Worker 不稳定 |
+| `removed counters for {N} unhealthy workers: [...]` | 移除了 N 个 Worker 的计数器 | 正常清理 |
+| `Server {url} is healthy` | 健康检查恢复 | Worker 恢复（来自 HealthGenerate 端点） |
+
+注意：以下事件是 **ERROR 级别**，不是 INFO：
+- `Removed unhealthy prefill/decode/mixed instance: {url}` — Worker 被移除
+
+注意：以下内容是 **HTTP 响应体**，不是 logger 输出（不会出现在日志行中）：
+- `Register success` — 注册成功的 HTTP 200 响应体
+- Worker 注册检测应通过 H1 行的 `POST /register 200` 判断
+
+---
+
+## 注册参数校验错误
+
+| 错误消息 | 根因 | 排查 |
+|---------|------|------|
+| `invalid connector_port: {value}` | connector_port 非数字或范围错误 | 检查 register.yaml |
+| `invalid engine_worker_queue_port: {value}` | engine_worker_queue_port 非数字或范围错误 | 检查 register.yaml |
+| `invalid metrics_port: {value}` | metrics_port 非数字或范围错误 | 检查 register.yaml |
+| `rdma_ports[{i}] invalid port: {value}` | RDMA 端口配置错误 | 检查 register.yaml |
+
+---
+
+## scanner error / copy error 区分
+
+| error 内容 | 来源层 | 含义 |
+|-----------|-------|------|
+| `context canceled` | 客户端 | 客户端主动断连（超时或取消） |
+| 其他 | FD 后端 | 后端流式响应异常 |
diff --git a/fastdeploy/golang_router/.claude/skills/troubleshoot/references/fastdeploy_cross_reference.md b/fastdeploy/golang_router/.claude/skills/troubleshoot/references/fastdeploy_cross_reference.md
new file mode 100644
index 00000000000..f35cbcb303a
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/troubleshoot/references/fastdeploy_cross_reference.md
@@ -0,0 +1,102 @@
+# FastDeploy 后端交叉引用
+
+从 Router 日志推断 FastDeploy 后端问题时的排查指引。
+
+---
+
+## 症状 → 后端排查
+
+### 1. 后端不可达 (502)
+
+**Router 日志特征**：
+```
+[ERROR] Failed to connect to backend service: dial tcp {ip}:{port}: connect: connection refused
+```
+
+**排查步骤**：
+1. `curl http://{worker_url}/health` — 确认后端是否存活
+2. `curl http://{worker_url}/v1/models` — 确认模型是否加载完成
+3. 检查后端日志 `logs/workerlog.0`
+4. `netstat -tlnp | grep {port}` — 确认端口监听
+5. 检查网络连通性（防火墙、安全组）
+
+### 2. 后端 OOM / 频繁重启
+
+**Router 日志特征**：
+- Worker 频繁 REMOVED → RE-REGISTERED（短周期内多次）
+- 健康检查间歇性失败
+
+**排查步骤**：
+1. `dmesg | grep -i oom` — 检查 OOM killer
+2. `nvidia-smi` — 检查 GPU 内存
+3. 后端日志搜索 `CUDA out of memory`
+4. 检查 `max_num_seqs`、`max_model_len` 配置
+
+### 3. 高推理延迟
+
+**Router 日志特征**：
+- 请求 p99 高（>10s）但调度耗时仅 ms 级
+- 确认延迟不在 Router 层（调度耗时 << 总延迟）
+
+**排查步骤**：
+1. 检查后端 Prometheus metrics：`http://{worker_url}:{metrics_port}/metrics`
+   - `fastdeploy_llm_running_queue_size` — 推理队列
+   - `fastdeploy_llm_waiting_queue_size` — 等待队列
+   - `fastdeploy_llm_generation_tokens_per_second` — 吞吐量
+2. 确认 GPU 利用率：`nvidia-smi --query-gpu=utilization.gpu --format=csv`
+3. 检查是否有长 prompt 请求拖慢整体
+
+### 4. 流式响应异常
+
+**Router 日志特征**：
+```
+[ERROR] scanner error: {err}  (非 context canceled)
+[ERROR] copy error: {err}  (非 context canceled)
+```
+
+**排查步骤**：
+1. 后端日志搜索对应 request_id
+2. 检查后端是否产生格式错误的 SSE
+3. 检查网络是否有中间代理超时切断
+
+### 5. 请求超时/卡住
+
+**Router 日志特征**：
+- 有 select worker 但长时间无 release/completed
+- [stats] 中 running 持续不降
+
+**根因**：Router 的 `http.Client{}` 没有设置超时，后端不响应则阻塞到客户端断连或 TCP 超时。
+
+**排查步骤**：
+1. 检查后端是否还在处理请求
+2. 检查后端是否出现死锁
+3. `ss -tnp | grep {port}` — 检查 TCP 连接状态
+
+---
+
+## 通用 FastDeploy 排查工具
+
+### collect-env
+
+收集环境信息：
+```bash
+python -m fastdeploy.utils.collect_env
+```
+
+### 后端日志位置
+
+- 默认：`logs/workerlog.0`
+- 多 Worker：`logs/workerlog.{N}`
+
+### Prometheus Metrics
+
+后端 metrics 端口（从注册信息获取 `metrics_port`）：
+```
+http://{worker_ip}:{metrics_port}/metrics
+```
+
+关键指标：
+- `fastdeploy_llm_running_queue_size` — 当前推理中的请求数
+- `fastdeploy_llm_waiting_queue_size` — 等待队列长度
+- `fastdeploy_llm_generation_tokens_per_second` — 生成吞吐
+- `fastdeploy_llm_request_total` — 总请求数
diff --git a/fastdeploy/golang_router/.claude/skills/troubleshoot/references/log_patterns.md b/fastdeploy/golang_router/.claude/skills/troubleshoot/references/log_patterns.md
new file mode 100644
index 00000000000..4322909c01d
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/troubleshoot/references/log_patterns.md
@@ -0,0 +1,293 @@
+# 日志格式与提取规则
+
+本文档定义 Router 日志的所有类别、Grep 匹配模式、精确正则，供各子 skill 参考。
+
+---
+
+## 日志基本格式
+
+```
+[LEVEL] YYYY/MM/DD HH:MM:SS logger.go:<line>: [context_tags] message
+```
+
+### Context Tags（可选，顺序固定）
+
+- `[trace_id:<value>]`
+- `[req_id:<value>]`
+- `[session_id:<value>]`
+- `[request_id:<value>]`
+
+所有 tag 可能同时出现，也可能只有部分或没有。顺序固定为：`trace_id → req_id → session_id → request_id`。
+
+### ID 匹配正则
+
+搜索某个 ID 时，同时匹配四种 tag：
+```
+session_id:<ID>|trace_id:<ID>|request_id:<ID>|req_id:<ID>
+```
+
+---
+
+## 日志分类提取
+
+| 类别 | Grep 模式 | 用途 | 典型内容 |
+|------|----------|------|---------|
+| E1 — ERROR | `\[ERROR\]` | 错误分类 | 各类 Failed to ... 错误 |
+| E2 — WARN | `\[WARN\]` | 警告分类 | counter 异常、tokenizer 退化 |
+| H1 — HTTP 请求 | `\] \[(POST\|GET)\] /` | 延迟/状态码/吞吐量 | HTTP middleware 日志行 |
+| H2 — 健康事件 | `Removed unhealthy\|is not healthy\|is healthy` | Worker 健康时间线 | 上下线事件 |
+| H2b — 注册事件 | `\] \[POST\] /register.*200` | Worker 注册 | 从 H1 HTTP 行中匹配 POST /register 返回 200 |
+| H3 — 调度事件 | `select worker\|release worker\|Failed to select\|SelectWorkerPair` | 调度/计数器分析 | Worker 选择和释放 |
+| H4 — 后端问题 | `Failed to connect\|request failed\|scanner error\|copy error\|Panic recovered` | 后端问题 | 连接/流式/Panic（注意：`scanner error`/`copy error` 与 H9 有重叠，带 `[prefill]` 前缀的行同时属于 H9） |
+| H5 — Counter | `counter preserved\|cleanup unhealthy\|removed counters\|counter already\|double-release\|preserved counters` | 计数器异常 | 计数器生命周期 |
+| H6 — Cache-aware | `cache-aware prefill: final strategy:` | Cache 调度诊断 | 策略选择 + hitRatios |
+| H7 — Stats | `\[stats\]` | 负载/命中率 | 周期性统计行 |
+| H8 — ts_ms | `ts_ms=` | 调度耗时 | 调度开始结束时间戳 |
+| H9 — Prefill 生命周期 | `\[prefill\]` | PD 模式 prefill 追踪 | 首包/释放/错误 |
+| H10 — 请求标记 | `Parsing completed\|Request completed successfully` | 请求生命周期 | 调度开始/请求结束标记 |
+| H11 — Token 释放 | `release prefill tokens` | Token 计数器生命周期 | Token 释放事件 |
+
+---
+
+## H1 — HTTP 请求行格式
+
+```
+[INFO] 2025/01/15 18:25:33 logger.go:45: [POST] /v1/chat/completions HTTP/1.1 200 1.234567s 10.0.0.1
+```
+
+字段：`[METHOD] /path HTTP/1.1 STATUS LATENCY CLIENT_IP`
+
+### 延迟单位归一化
+
+Go `time.Duration.String()` 输出格式不固定，需归一化为毫秒：
+
+| 原始格式 | 含义 | 转换为 ms |
+|---------|------|----------|
+| `1.5s` | 秒 | × 1000 |
+| `150ms` | 毫秒 | 直接使用 |
+| `150.5ms` | 毫秒 | 直接使用 |
+| `500µs` | 微秒 | ÷ 1000 |
+| `500us` | 微秒（ASCII） | ÷ 1000 |
+| `500ns` | 纳秒 | ÷ 1000000 |
+| `1m30s` | 分+秒 | 分×60000 + 秒×1000 |
+| `1h2m3s` | 时+分+秒 | 时×3600000 + 分×60000 + 秒×1000 |
+
+正则提取延迟值：`(\d+(?:\.\d+)?(?:h|m(?!s)|s|ms|µs|us|ns))+`
+
+### 仅推理请求
+
+延迟分析只统计推理请求路径：
+- `/v1/chat/completions`
+- `/v1/completions`
+
+排除健康检查 `/health`、注册 `/register` 等管理路径。
+
+---
+
+## H6 — Cache-aware 策略行格式
+
+```
+[INFO] 2025/01/15 18:25:33 logger.go:87: [trace_id:xxx] [session_id:xxx] cache-aware prefill: final strategy: cache_aware_scoring, selected=http://10.0.0.1:9965, loads=map[http://10.0.0.1:9965:2 http://10.0.0.2:9965:5], hitRatios=map[http://10.0.0.1:9965:0.85 http://10.0.0.2:9965:0.42]. ts_ms=2025-01-15 18:25:33.123
+```
+
+```
+[INFO] ... cache-aware prefill: final strategy: process_tokens, reason: load imbalanced, loads=map[...]. ts_ms=2025-01-15 18:25:33.123
+```
+
+注意：日志中**没有** `scores=map[...]` 字段。scores 仅在 DEBUG 级别的 `chooseByScore` 中逐条打印。
+如需分析非最优选择，需从 hitRatios + loads 使用公式重新计算：
+`score = (100-hitRatio)/100 * hitRatioWeight + loadRatio * loadBalanceWeight`
+
+### Go map 解析
+
+`hitRatios=map[key1:val1 key2:val2]`
+
+- 空 map：`hitRatios=map[]` — 表示冷启动
+- 正则提取 map 内容：`map\[(.*?)\]`
+- 每对 key:value 用空格分隔：`(\S+):(\S+)`
+- key 是 worker URL，value 是 float64
+
+### selected worker 的 hitRatio
+
+从 hitRatios map 中查找 selected URL 的值：
+- 在 map 中找到 → 使用该值
+- 不在 map 中 → hitRatio = 0
+- map 为空 → 冷启动，hitRatio = 0
+
+### ts_ms 格式
+
+`ts_ms=2025-01-15 18:25:33.123`
+
+格式：`2006-01-02 15:04:05.000`（Go reference time）
+
+用于计算调度耗时（两个 ts_ms 之间的差值）。
+
+---
+
+## H7 — Stats 行格式
+
+```
+[INFO] 2025/01/15 18:25:33 logger.go:87: [stats] total_running=5, workers: [http://10.0.0.1:9965: running=2, http://10.0.0.2:9965: running=3], cache_hit_rate=85.71% (hits=6/total=7)
+```
+
+注意：由于 Go `log.Lshortfile` 打印的是 `Printf` 调用处，stats 行的源文件始终为 `logger.go:NN:`（行号随编译变化），而非 `handler.go`。
+
+注意：stats 行**不包含**任何 context tag（trace_id 等），因为由后台 goroutine 周期输出。
+
+### 关键：per-interval 计数器
+
+`hits` 和 `total` 是 **per-interval** 的值（每 5s 通过 `atomic.Swap(0)` 重置为 0）。
+
+计算累计值必须 **sum 所有行**：
+- 累计 Session Hit Rate = `sum(hits) / sum(total) * 100`
+
+### Worker 负载提取
+
+`workers: [url1: running=N, url2: running=N]`
+
+- 注意格式：`workers:` 带冒号+空格，每个 worker 格式为 `url: running=N`，逗号+空格分隔
+- **不包含 token 数据**（reportStats 只读取 running 计数）
+
+正则：`(http://[^:]+:\d+): running=(\d+)`
+
+### cache_hit_rate 提取
+
+`cache_hit_rate=85.71% (hits=6/total=7)`
+
+正则：`cache_hit_rate=([\d.]+)% \(hits=(\d+)/total=(\d+)\)`
+
+---
+
+## 模板归一化
+
+ERROR/WARN 消息分组时，需将变量替换为占位符：
+
+| 变量类型 | 正则 | 替换为 |
+|---------|------|-------|
+| URL | `https?://[\w.:]+` | `{url}` |
+| UUID | `[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}` | `{uuid}` |
+| 数字 | `\d+` (仅在特定位置) | `{N}` |
+| IP:Port | `\d+\.\d+\.\d+\.\d+:\d+` | `{ip:port}` |
+
+---
+
+## Fallback 策略行识别
+
+| final strategy | reason 关键词 | 含义 |
+|---------------|--------------|------|
+| `cache_aware_scoring` | (无 reason) | 正常 cache-aware 调度 |
+| `process_tokens` | `tokenize failed` | 退化 B：字符级 tokenize 也失败 |
+| `process_tokens` | `load imbalanced` | 退化 C：负载不均衡 |
+| `process_tokens` | (其他) | 退化 D：策略未初始化等 |
+
+退化 A（Tokenizer 服务→字符级）在 WARN 行识别：
+```
+[WARN] ... cache-aware prefill: tokenizer failed, fallback to char tokens: {err}
+```
+注意完整前缀 `cache-aware prefill: tokenizer failed`。
+退化 A 后仍可走 cache_aware_scoring（精度降低），与 B/C/D 不互斥。
+
+---
+
+## H4 — 后端问题匹配说明
+
+H4 的 `request failed` 模式会匹配多个消息模板：
+- `Request failed (attempt {n}/{max}): {err}` — 重试日志
+- `Decode request failed for {url}: {err}` — PD 模式 decode 失败
+- `Prefill request failed for {url}: {err}` — PD 模式 prefill 失败
+- `Backend request failed for {url}: {err}` — 后端请求失败
+
+分析时需通过模板归一化去重。
+
+---
+
+## H9 — Prefill 生命周期事件
+
+PD（Prefill/Decode 分离）模式下，`completions.go` 产生的 `[prefill]` 前缀日志：
+
+| 消息模板 | 含义 |
+|---------|------|
+| `[prefill] first chunk received, release counter url=%s` | Prefill 首包到达，释放计数器 |
+| `[prefill] non-stream prefill response done, release counter url=%s` | 非流式 prefill 完成 |
+| `[prefill] release in defer (fallback) url=%s, isStream=%v` | defer 兜底释放 |
+| `[prefill] release in CommonCompletions defer (error path) url=%s` | 错误路径释放 |
+| `[prefill] backendResp is nil or backendResp.Body is nil, url=%s` | 后端响应异常 |
+| `[prefill] scanner error: %v, message=%s` | 流式读取错误（ERROR 级别） |
+| `[prefill] copy error: %v, message=%s` | 非流式复制错误（ERROR 级别） |
+
+---
+
+## H10 — 请求生命周期标记
+
+| 消息 | 含义 | 级别 |
+|------|------|------|
+| `Parsing completed; starting worker selection.` | 请求解析完成，开始调度 | INFO |
+| `Request completed successfully.` | 请求成功完成 | INFO |
+
+---
+
+## H11 — Token 释放
+
+`release prefill tokens: %s, tokens: %d` — 释放 prefill token 计数。
+数据源：`handler.go:333`。用于 troubleshoot-load 的 token 计数器分析。
+
+---
+
+## Select/Release 日志细节（与代码一致）
+
+- `select worker (prefill): <url>, tokens: <n>`
+- `select worker (decode|mixed): <url>, count: <n>`
+- `release worker: <url>, count: <n>`（request counter 释放）
+- `release prefill tokens: <url>, tokens: <n>`（token counter 释放；可能来自 prefill 或 mixed 请求路径）
+
+重点：release 只有上面这两种。`release worker` 不带 worker type，`release prefill tokens` 的文本也不能直接断定是 prefill（mixed 也可能调用）。因此按 `prefill/decode/mixed` 统计时，需要从 select 侧做归类；确实无法归类时才记为 `unknown`。
+
+---
+
+## 使用脚本工具
+
+各 skill 的脚本位于各自的 `scripts/` 目录下，自动处理上述所有日志解析和计算。
+
+### 快速参考
+
+| 任务 | 脚本 |
+|------|------|
+| 解析 H1 HTTP 行 | `log_parser.py parse-http [--inference-only]` |
+| 解析 H6 cache 策略行 | `log_parser.py parse-cache-strategy` |
+| 解析 H7 stats 行 | `log_parser.py parse-stats` |
+| 检测非支持请求 | `log_parser.py unsupported-requests [--summary-only]` |
+| ASCII 折线图 | `chart.py` |
+| Unicode 柱状图 | `chart.py` |
+| Markdown 表格 | `chart.py` |
+| Worker 时间线 | `chart.py` |
+
+所有工具从 stdin 读取，输出到 stdout。中间数据使用 JSON Lines 格式。
+
+---
+
+## 已知路由列表
+
+Router 支持的全部路由（来自 `internal/router/router.go`）：
+
+| Method | Path | 类型 |
+|--------|------|------|
+| POST | `/v1/chat/completions` | 推理 |
+| POST | `/v1/completions` | 推理 |
+| POST | `/register` | 实例注册 |
+| GET | `/registered_number` | 注册数量查询 |
+| GET | `/registered` | 注册列表查询 |
+| GET | `/health_generate` | 健康检查 |
+| GET | `/metrics` | Prometheus 指标 |
+
+### 非支持请求排查
+
+客户端可能发送不属于已知路由的请求（如 `/v1/models`），会收到 404 但仍记录在 H1 HTTP 日志中。
+
+使用 `log_parser.py unsupported-requests` 子命令检测：
+```bash
+# 完整输出（详细列表 + 汇总）
+grep -E '\] \[(POST|GET|PUT|DELETE|PATCH|HEAD|OPTIONS)\] /' logfile | python3 log_parser.py unsupported-requests
+
+# 仅汇总
+grep -E '\] \[(POST|GET|PUT|DELETE|PATCH|HEAD|OPTIONS)\] /' logfile | python3 log_parser.py unsupported-requests --summary-only
+```
diff --git a/fastdeploy/golang_router/.claude/skills/troubleshoot/references/report_templates.md b/fastdeploy/golang_router/.claude/skills/troubleshoot/references/report_templates.md
new file mode 100644
index 00000000000..61db59ec7e6
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/troubleshoot/references/report_templates.md
@@ -0,0 +1,131 @@
+# 报告输出规范
+
+所有 troubleshoot 分析维度共享的可视化和格式规范。
+
+---
+
+## 通用可视化组件
+
+### Unicode 柱状图
+- 填充块：`█`（U+2588），空块：`░`（U+2591）
+- 总宽度：20 字符，右侧标注百分比和计数
+- 块数 = round(percentage / 100 * 20)，最小 1 块（>0% 时）
+
+### Sparkline 折线图
+- 字符集：`▁▂▃▄▅▆▇█`（8 级高度）
+- 图表宽度：60 字符，自动降采样
+- X 轴标注时间（首/尾 + 中间 2-3 个刻度）
+- Y 轴自适应：百分比类 0-100%，计数类 0-max
+
+### Markdown 表格
+- 标准 Markdown 表格格式
+- 数值列右对齐
+
+### Worker 可用性时间线
+- `█` = 在线，`░` = 下线
+- 右侧标注在线率百分比
+
+---
+
+## 严重程度标记
+
+| 标记 | 含义 | 使用场景 |
+|------|------|---------|
+| CRITICAL | 服务不可用 | Panic、全部 Worker 不健康、错误率 >20% |
+| HIGH | 部分请求失败 | 502/503、Worker 频繁下线 |
+| MEDIUM | 性能下降 | 高延迟、cache 命中率低 |
+| LOW | 需关注 | 计数器异常、tokenizer 退化 |
+| INFO | 正常 | 统计信息 |
+
+---
+
+## 报告格式
+
+### 简洁版（终端输出）
+
+- 第一行：`STATUS: HEALTHY / DEGRADED / CRITICAL — 简要说明`
+- 状态定义：`HEALTHY`=无明显异常；`DEGRADED`=服务可用但性能/稳定性下降（需关注）；`CRITICAL`=服务不可用或高风险故障
+- 按三层分类（Router / FD 后端 / 客户端）
+- 每个问题一行摘要 + 关键指标
+- 末尾提示详细版文件路径
+
+### 详细版（文件导出）
+
+- 路径：`skill_output/troubleshoot/<YYYYMMDD_HHMMSS>/troubleshoot_report_<timestamp>.md`
+- 主报告包含各维度总结 + 可视化图表（sparkline/柱状图/时间线等）
+- 详情拆分到 `details/` 子目录：
+  - `detail/health_events.md` — Worker 逐分钟健康事件 + 健康诊断
+  - `detail/errors_topn.md` — ERROR/WARN 模板明细（数量/级别/来源层/影响 + URLs）
+  - `detail/load_select_release.md` — 负载诊断 + select/release 明细
+  - `detail/load_diagnoses.md` — load 诊断列表
+  - `detail/load_counter_state.md` — request/token counter 末状态
+  - `detail/latency_diagnoses.md` — 延迟诊断详情
+  - `detail/cache_diagnosis.md` — cache 六维诊断详情（session 粘性/非最优/驱逐/Fallback/冷启动/交叉诊断）
+  - `detail/cache_session_stickiness.md` / `detail/cache_suboptimal.md` / `detail/cache_eviction.md` / `detail/cache_fallback.md` / `detail/cache_cross.md` — cache 分职责拆分明细
+  - `detail/trace/trace_<ID>.md` — 请求追踪事件链
+
+---
+
+## 状态判定规则
+
+- **CRITICAL**：存在 Panic、全部 Worker 不健康、或错误率 >20%
+- **DEGRADED**：存在 502/503、Worker 不稳定、或错误率 >5%
+- **HEALTHY**：无严重问题
+
+---
+
+## 各维度报告结构
+
+### Errors（错误分析）
+
+```
+HTTP 状态码分布（柱状图）
+错误率趋势（折线图）
+ERROR/WARN Top N（柱状图 + 表格，标注来源层）
+Panic 列表
+```
+
+### Latency（延迟分析）— 待实现
+
+```
+延迟百分位数 (p50/p90/p95/p99)
+延迟分布（柱状图）
+吞吐量趋势（折线图）
+慢请求 Top 10
+```
+
+### Health（Worker 健康）— 待实现
+
+```
+Worker 可用性时间线
+健康事件汇总表
+可用性统计
+```
+
+### Cache（调度诊断）
+
+```
+调度策略分布
+Session 粘性分析
+非最优选择分析
+Fallback 原因分类
+驱逐影响与交叉诊断
+```
+
+要求：即使某项计数为 0（例如“非最优选择”），也要输出该小节并给出“未发现/样本不足”总结，保证 detail 链接稳定存在。
+
+### Load（负载分析）— 待实现
+
+```
+Worker 负载分布
+计数器异常检测
+Token 计数器统计
+```
+
+### Trace（请求追踪）— 待实现
+
+```
+单请求事件链
+生命周期完整性检查
+Session 多请求汇总
+```
diff --git a/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/__init__.py b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/__init__.py
new file mode 100644
index 00000000000..e7bb50660a8
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/__init__.py
@@ -0,0 +1 @@
+# Analyzers package
diff --git a/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/cache.py b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/cache.py
new file mode 100644
index 00000000000..a12341967a0
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/cache.py
@@ -0,0 +1,607 @@
+#!/usr/bin/env python3
+"""
+Cache Analyzer — Cache 调度诊断
+
+分析 cache-aware 调度策略：session 粘性、非最优选择评分、驱逐影响、
+fallback 原因、冷启动识别、交叉诊断。
+注意：cache 命中率数值分析由 stat-cache-hitrate skill 负责，本模块做策略诊断。
+"""
+
+import os
+import re
+import subprocess
+import sys
+from collections import defaultdict
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from chart import render_bar, render_table
+from log_parser import parse_cache_strategy_line, parse_ts
+from stats import compute_statistics, count_by
+
+# ════════════════════════════════════════════════════════════════
+# Fallback 分类
+# ════════════════════════════════════════════════════════════════
+
+TOKENIZER_WARN_RE = re.compile(r"tokenizer failed, fallback to char tokens")
+
+
+def _strip_scheme(url):
+    return re.sub(r"^https?://", "", url)
+
+
+def classify_fallback(record, tokenizer_degraded_ts=None):
+    """对 process_tokens 策略行分类 fallback 原因。
+
+    Returns: 'A-Tokenizer退化' | 'B-char tokenize失败' | 'C-负载不均衡' | 'D-其他'
+    """
+    reason = record.get("reason", "")
+    if "load imbalanced" in reason:
+        return "C-负载不均衡"
+    if "tokenize failed" in reason:
+        return "B-char tokenize失败"
+    return "D-其他"
+
+
+# ════════════════════════════════════════════════════════════════
+# 主分析函数
+# ════════════════════════════════════════════════════════════════
+
+
+def analyze_cache(log_file, tail=None, eviction_duration_mins=30, hit_ratio_weight=1.0, load_balance_weight=1.0):
+    """分析 cache-aware 调度策略。
+
+    Args:
+        log_file: 日志文件路径
+        tail: 尾部行数限制
+        eviction_duration_mins: 驱逐时间（分钟，默认 30）
+        hit_ratio_weight: hitRatio 权重（默认 1.0）
+        load_balance_weight: loadBalance 权重（默认 1.0）
+
+    Returns:
+        dict: {strategy_dist, fallback_reasons, session_stickiness, suboptimal_selections,
+               eviction_impact, cold_starts, hitratio_stats, diagnoses, summary}
+    """
+    h6_lines = _grep_lines(log_file, r"cache-aware prefill: final strategy:", tail)
+    tokenizer_warn_lines = _grep_lines(log_file, r"tokenizer failed, fallback to char tokens", tail)
+
+    # 解析策略行
+    strategy_records = [r for line in h6_lines for r in [parse_cache_strategy_line(line)] if r]
+
+    if not strategy_records:
+        return {
+            "strategy_dist": [],
+            "fallback_reasons": [],
+            "session_stickiness": {},
+            "suboptimal_selections": [],
+            "eviction_impact": [],
+            "cold_starts": 0,
+            "hitratio_stats": {},
+            "diagnoses": [],
+            "summary": "未检测到 cache-aware 策略日志",
+        }
+
+    # Tokenizer 退化次数
+    tokenizer_degraded_count = len(tokenizer_warn_lines)
+
+    # 策略分布
+    strategy_dist = count_by(strategy_records, "strategy")
+
+    # Fallback 原因
+    fallback_records = [r for r in strategy_records if r.get("strategy") == "process_tokens"]
+    fallback_reasons = []
+    if fallback_records:
+        for r in fallback_records:
+            r["fallback_type"] = classify_fallback(r)
+        fallback_reasons = count_by(fallback_records, "fallback_type")
+
+    # hitRatio 统计
+    hr_vals = [r.get("selected_hitRatio", 0) for r in strategy_records if "selected_hitRatio" in r]
+    hitratio_stats = compute_statistics(hr_vals) if hr_vals else {}
+
+    # Session 粘性分析
+    session_stickiness = _analyze_session_stickiness(strategy_records)
+
+    # 非最优选择分析
+    suboptimal = _analyze_suboptimal(strategy_records, hit_ratio_weight, load_balance_weight)
+
+    # 驱逐影响
+    eviction_impact = _analyze_eviction(strategy_records, eviction_duration_mins)
+
+    # 冷启动
+    cold_starts = sum(1 for r in strategy_records if r.get("hitRatios") == {})
+
+    total = len(strategy_records)
+    cache_aware_count = sum(1 for r in strategy_records if r["strategy"] == "cache_aware_scoring")
+    fallback_count = len(fallback_records)
+
+    diagnoses = _diagnose(
+        strategy_dist,
+        fallback_reasons,
+        session_stickiness,
+        suboptimal,
+        eviction_impact,
+        cold_starts,
+        total,
+        tokenizer_degraded_count,
+        hitratio_stats,
+    )
+
+    return {
+        "strategy_dist": strategy_dist,
+        "fallback_reasons": fallback_reasons,
+        "session_stickiness": session_stickiness,
+        "suboptimal_selections": suboptimal,
+        "eviction_impact": eviction_impact,
+        "cold_starts": cold_starts,
+        "hitratio_stats": hitratio_stats,
+        "tokenizer_degraded_count": tokenizer_degraded_count,
+        "cross_diagnosis": _analyze_cross_diagnosis(
+            session_stickiness=session_stickiness,
+            hitratio_stats=hitratio_stats,
+            strategy_dist=strategy_dist,
+            eviction_impact=eviction_impact,
+        ),
+        "diagnoses": diagnoses,
+        "summary": f"{total} 策略决策, cache_aware {cache_aware_count}, fallback {fallback_count}, "
+        f"冷启动 {cold_starts}",
+    }
+
+
+def _analyze_session_stickiness(records):
+    """Session 粘性分析。"""
+    sessions = defaultdict(list)
+    for r in records:
+        sid = (r.get("tags") or {}).get("session_id")
+        if sid and "selected" in r:
+            sessions[sid].append(r["selected"])
+
+    result = {}
+    for sid, workers in sessions.items():
+        if len(workers) < 2:
+            continue
+        same_count = sum(1 for i in range(1, len(workers)) if workers[i] == workers[i - 1])
+        stickiness = round(same_count / (len(workers) - 1) * 100, 1)
+        switches = [(i, workers[i - 1], workers[i]) for i in range(1, len(workers)) if workers[i] != workers[i - 1]]
+        result[sid] = {
+            "total_requests": len(workers),
+            "stickiness_pct": stickiness,
+            "switches": len(switches),
+        }
+
+    return result
+
+
+def _analyze_suboptimal(records, hr_weight, lb_weight):
+    """非最优选择分析：selected 的 hitRatio 不是最高时，重新计算 score 对比。"""
+    suboptimal = []
+    for r in records:
+        if r.get("strategy") != "cache_aware_scoring":
+            continue
+        hit_ratios = r.get("hitRatios", {})
+        loads = r.get("loads", {})
+        selected = r.get("selected")
+        if not hit_ratios or not selected or selected not in hit_ratios:
+            continue
+
+        max_hr = max(hit_ratios.values()) if hit_ratios else 0
+        sel_hr = hit_ratios.get(selected, 0)
+
+        if sel_hr >= max_hr:
+            continue
+
+        # 计算 scores: score = (100-hitRatio)/100 * hrWeight + loadRatio * lbWeight
+        # Go 源码使用 maxLoad 做归一化: loadRatio = load / maxLoad
+        max_load = max(loads.values()) if loads else 1
+        max_load = max(max_load, 1)
+        scores = {}
+        for w_url in hit_ratios:
+            hr = hit_ratios.get(w_url, 0)
+            load = loads.get(w_url, 0)
+            load_ratio = load / max_load
+            score = (100 - hr) / 100 * hr_weight + load_ratio * lb_weight
+            scores[w_url] = round(score, 4)
+
+        best_by_hr = min(hit_ratios, key=lambda w: -hit_ratios[w])
+        sel_score = scores.get(selected, 0)
+        best_hr_score = scores.get(best_by_hr, 0)
+
+        # 分类原因
+        load_diff = abs(loads.get(selected, 0) - loads.get(best_by_hr, 0))
+        if load_diff > 5:
+            reason = "负载主导"
+        elif max_hr < 10:
+            reason = "区分度不够"
+        elif abs(sel_score - best_hr_score) < 0.05:
+            reason = "正常竞争"
+        else:
+            reason = "综合权衡"
+
+        suboptimal.append(
+            {
+                "ts": r.get("ts", ""),
+                "selected": _strip_scheme(selected),
+                "selected_hr": sel_hr,
+                "best_hr_worker": _strip_scheme(best_by_hr),
+                "best_hr": max_hr,
+                "reason": reason,
+            }
+        )
+
+    return suboptimal
+
+
+def _analyze_eviction(records, eviction_mins):
+    """驱逐影响分析：同 session 连续请求间隔 > eviction_duration。"""
+    sessions = defaultdict(list)
+    for r in records:
+        sid = (r.get("tags") or {}).get("session_id")
+        ts = r.get("ts")
+        if sid and ts:
+            sessions[sid].append(r)
+
+    impacts = []
+    for sid, reqs in sessions.items():
+        reqs.sort(key=lambda x: x.get("ts", ""))
+        for i in range(1, len(reqs)):
+            try:
+                prev_dt = parse_ts(reqs[i - 1]["ts"])
+                curr_dt = parse_ts(reqs[i]["ts"])
+                interval_mins = (curr_dt - prev_dt).total_seconds() / 60
+                if interval_mins > eviction_mins:
+                    curr_hr = reqs[i].get("selected_hitRatio", -1)
+                    impacts.append(
+                        {
+                            "session_id": sid,
+                            "interval_mins": round(interval_mins, 1),
+                            "hitRatio_after": curr_hr,
+                            "evicted": curr_hr == 0,
+                        }
+                    )
+            except (ValueError, KeyError):
+                pass
+
+    return impacts
+
+
+def _diagnose(
+    strategy_dist,
+    fallback_reasons,
+    session_stickiness,
+    suboptimal,
+    eviction_impact,
+    cold_starts,
+    total,
+    tokenizer_degraded_count,
+    hitratio_stats,
+):
+    """生成 cache 调度诊断。"""
+    diagnoses = []
+
+    # Tokenizer 退化
+    if tokenizer_degraded_count > 0:
+        pct = round(tokenizer_degraded_count / max(total, 1) * 100, 1)
+        sev = "HIGH" if pct > 10 else "MEDIUM"
+        diagnoses.append(
+            {
+                "severity": sev,
+                "message": f"Tokenizer 退化 {tokenizer_degraded_count} 次 ({pct}%)，精度降低",
+                "source_layer": "Router",
+            }
+        )
+
+    # Fallback 比例
+    for s in strategy_dist:
+        if s["value"] == "process_tokens" and s["pct"] > 20:
+            diagnoses.append(
+                {
+                    "severity": "MEDIUM",
+                    "message": f'Fallback 到 process_tokens {s["pct"]}%，cache-aware 策略未生效',
+                    "source_layer": "Router",
+                }
+            )
+
+    # 非最优选择
+    if suboptimal and total > 0:
+        pct = round(len(suboptimal) / total * 100, 1)
+        if pct > 20:
+            diagnoses.append(
+                {
+                    "severity": "MEDIUM",
+                    "message": f"非最优选择 {pct}%（{len(suboptimal)}/{total}）",
+                    "source_layer": "Router",
+                }
+            )
+
+    # 冷启动
+    if cold_starts > 0 and total > 0:
+        pct = round(cold_starts / total * 100, 1)
+        if pct > 10:
+            diagnoses.append(
+                {"severity": "LOW", "message": f"冷启动 {pct}%（hitRatios=map[]）", "source_layer": "Router"}
+            )
+
+    # 驱逐影响
+    evicted = [e for e in eviction_impact if e["evicted"]]
+    if evicted:
+        diagnoses.append(
+            {
+                "severity": "MEDIUM",
+                "message": f"{len(evicted)} 次驱逐后 hitRatio=0，考虑增大 eviction-duration-mins",
+                "source_layer": "Router",
+            }
+        )
+
+    # hitRatio 整体偏低
+    if hitratio_stats.get("mean", 100) < 20:
+        diagnoses.append(
+            {
+                "severity": "LOW",
+                "message": f'平均 hitRatio {hitratio_stats["mean"]}%，缓存效果较差',
+                "source_layer": "Router",
+            }
+        )
+
+    return diagnoses
+
+
+def _analyze_cross_diagnosis(session_stickiness, hitratio_stats, strategy_dist, eviction_impact):
+    """交叉诊断：基于粘性/命中率/fallback/驱逐给出简表。"""
+    if not session_stickiness:
+        return []
+    avg_stickiness = sum(v["stickiness_pct"] for v in session_stickiness.values()) / max(len(session_stickiness), 1)
+    mean_hr = hitratio_stats.get("mean", 0)
+    fallback_pct = 0
+    for s in strategy_dist:
+        if s.get("value") == "process_tokens":
+            fallback_pct = s.get("pct", 0)
+            break
+    evicted_cnt = sum(1 for e in eviction_impact if e.get("evicted"))
+
+    diagnosis = "运行良好"
+    action = "-"
+    if avg_stickiness >= 70 and mean_hr >= 40 and fallback_pct < 10:
+        diagnosis = "运行良好"
+    elif avg_stickiness >= 70 and mean_hr < 20 and evicted_cnt > 0:
+        diagnosis = "疑似驱逐导致命中率低"
+        action = "考虑增大 eviction-duration-mins"
+    elif avg_stickiness < 40 and fallback_pct >= 20:
+        diagnosis = "低粘性 + 高 fallback"
+        action = "检查负载阈值与 cache-aware 参数"
+    elif avg_stickiness < 40 and mean_hr < 20:
+        diagnosis = "低粘性 + 低命中"
+        action = "检查缓存预热与 prompt 稳定性"
+
+    return [
+        {
+            "avg_stickiness_pct": round(avg_stickiness, 1),
+            "mean_hitRatio_pct": round(mean_hr, 1),
+            "fallback_pct": round(fallback_pct, 1),
+            "evicted_after_timeout": evicted_cnt,
+            "diagnosis": diagnosis,
+            "action": action,
+        }
+    ]
+
+
+# ════════════════════════════════════════════════════════════════
+# 报告格式化
+# ════════════════════════════════════════════════════════════════
+
+
+def format_cache_report(result):
+    """将分析结果格式化为终端报告。"""
+    sections = ["## Cache 调度诊断", ""]
+    sections.append(f'  {result["summary"]}')
+    sections.append("")
+    detail_sections = ["# Cache 调度详情", "", f'总结: {result["summary"]}', ""]
+
+    if result["diagnoses"]:
+        sections.append("### 诊断")
+        sections.append("")
+        sections.append("  诊断见详情: [detail/cache_diagnosis.md](../detail/cache_diagnosis.md)")
+        sections.append("")
+        detail_sections.append("## 诊断")
+        detail_sections.append("")
+        for d in result["diagnoses"]:
+            detail_sections.append(f'[{d["severity"]}] [{d["source_layer"]}] {d["message"]}')
+        detail_sections.append("")
+
+    # 策略分布
+    if result["strategy_dist"]:
+        sections.append("### 策略分布")
+        sections.append("")
+        bar_data = [{"label": s["value"], "value": s["pct"], "count": s["count"]} for s in result["strategy_dist"]]
+        sections.append(render_bar(bar_data, show_count=True))
+        sections.append("")
+        detail_sections.append("## 策略分布")
+        detail_sections.append("")
+        detail_sections.append(render_bar(bar_data, show_count=True))
+        detail_sections.append("")
+
+    # hitRatio 统计
+    hs = result.get("hitratio_stats", {})
+    if hs:
+        sections.append("### hitRatio 统计")
+        sections.append("")
+        sections.append(
+            f'  mean={hs.get("mean",0)}%  p50={hs.get("p50",0)}%  p90={hs.get("p90",0)}%  '
+            f'p99={hs.get("p99",0)}%  max={hs.get("max",0)}%'
+        )
+        sections.append("")
+
+    # Fallback 原因
+    if result["fallback_reasons"]:
+        sections.append("### Fallback 原因分布")
+        sections.append("")
+        bar_data = [{"label": f["value"], "value": f["pct"], "count": f["count"]} for f in result["fallback_reasons"]]
+        sections.append(render_bar(bar_data, show_count=True))
+        sections.append("")
+        detail_sections.append("## Fallback 原因分布")
+        detail_sections.append("")
+        detail_sections.append(render_bar(bar_data, show_count=True))
+        detail_sections.append("")
+
+    # Tokenizer 退化
+    if result.get("tokenizer_degraded_count", 0) > 0:
+        sections.append(f'  Tokenizer 退化: {result["tokenizer_degraded_count"]} 次')
+        sections.append("")
+
+    # Session 粘性
+    stickiness = result.get("session_stickiness", {})
+    sections.append("### Session 粘性")
+    sections.append("")
+    sections.append("  Session 粘性详情见: [detail/cache_session_stickiness.md](../detail/cache_session_stickiness.md)")
+    sections.append("")
+    if stickiness:
+        table_data = [
+            {
+                "Session": sid,
+                "请求数": str(s["total_requests"]),
+                "粘性率": f'{s["stickiness_pct"]}%',
+                "切换次数": str(s["switches"]),
+            }
+            for sid, s in sorted(stickiness.items(), key=lambda x: x[1]["stickiness_pct"])
+        ]
+        detail_sections.append("## Session 粘性")
+        detail_sections.append("")
+        detail_sections.append(
+            render_table(
+                table_data,
+                columns=["Session", "请求数", "粘性率", "切换次数"],
+                right_align={"请求数", "粘性率", "切换次数"},
+            )
+        )
+        detail_sections.append("")
+    else:
+        sections.append("  未检测到可计算粘性的多请求 Session。")
+        sections.append("")
+        detail_sections.append("## Session 粘性")
+        detail_sections.append("")
+        detail_sections.append("- 无可用样本（需要同一 session 至少 2 次请求）。")
+        detail_sections.append("")
+
+    # 非最优选择
+    subs = result.get("suboptimal_selections") or []
+    sections.append(f"### 非最优选择 ({len(subs)} 次)")
+    sections.append("")
+    sections.append("  详情见: [detail/cache_suboptimal.md](../detail/cache_suboptimal.md)")
+    sections.append("")
+    if subs:
+        reason_counts = defaultdict(int)
+        for s in subs:
+            reason_counts[s["reason"]] += 1
+        for reason, count in sorted(reason_counts.items(), key=lambda x: -x[1]):
+            sections.append(f"  {reason}: {count} 次")
+        sections.append("")
+        detail_sections.append("## 非最优选择（Top 20）")
+        detail_sections.append("")
+        for s in subs[:20]:
+            detail_sections.append(
+                f'- [{s.get("ts","")}] selected={s.get("selected","")}({s.get("selected_hr",0)}), best={s.get("best_hr_worker","")}({s.get("best_hr",0)}), reason={s.get("reason","")}'
+            )
+        detail_sections.append("")
+    else:
+        sections.append("  未发现非最优选择（selected_hitRatio 始终为当次最高）。")
+        sections.append("")
+        detail_sections.append("## 非最优选择")
+        detail_sections.append("")
+        detail_sections.append("- 未发现非最优选择。")
+        detail_sections.append("")
+
+    # 驱逐影响
+    evictions = result.get("eviction_impact") or []
+    evicted = [e for e in evictions if e["evicted"]]
+    sections.append(f"### 驱逐影响 ({len(evictions)} 次超时, {len(evicted)} 次缓存失效)")
+    sections.append("")
+    sections.append("  详情见: [detail/cache_eviction.md](../detail/cache_eviction.md)")
+    sections.append("")
+    if evictions:
+        detail_sections.append("## 驱逐影响")
+        detail_sections.append("")
+        for e in evictions[:50]:
+            detail_sections.append(
+                f'- session={e.get("session_id","")[:24]} interval={e.get("interval_mins",0)}m hitRatio_after={e.get("hitRatio_after",0)} evicted={e.get("evicted",False)}'
+            )
+        detail_sections.append("")
+    else:
+        sections.append("  未检测到超时导致的潜在驱逐影响。")
+        sections.append("")
+        detail_sections.append("## 驱逐影响")
+        detail_sections.append("")
+        detail_sections.append("- 未检测到超时驱逐样本。")
+        detail_sections.append("")
+
+    # 冷启动
+    if result.get("cold_starts", 0) > 0:
+        sections.append(f'  冷启动: {result["cold_starts"]} 次（hitRatios=map[]）')
+        sections.append("")
+        detail_sections.append("## 冷启动识别")
+        detail_sections.append("")
+        detail_sections.append(f'- 冷启动次数: {result["cold_starts"]}')
+        detail_sections.append("")
+
+    sections.append("### 交叉诊断")
+    sections.append("")
+    sections.append("  详情见: [detail/cache_cross.md](../detail/cache_cross.md)")
+    sections.append("")
+    if result.get("cross_diagnosis"):
+        detail_sections.append("## 交叉诊断")
+        detail_sections.append("")
+        detail_sections.append(
+            render_table(
+                result["cross_diagnosis"],
+                columns=[
+                    "avg_stickiness_pct",
+                    "mean_hitRatio_pct",
+                    "fallback_pct",
+                    "evicted_after_timeout",
+                    "diagnosis",
+                    "action",
+                ],
+                right_align={"avg_stickiness_pct", "mean_hitRatio_pct", "fallback_pct", "evicted_after_timeout"},
+            )
+        )
+        detail_sections.append("")
+    else:
+        sections.append("  样本不足，未生成交叉诊断。")
+        sections.append("")
+        detail_sections.append("## 交叉诊断")
+        detail_sections.append("")
+        detail_sections.append("- 样本不足，未生成交叉诊断。")
+        detail_sections.append("")
+
+    sections.append(
+        "> 详细诊断: [detail/cache_diagnosis.md](../detail/cache_diagnosis.md) | "
+        "[detail/cache_session_stickiness.md](../detail/cache_session_stickiness.md) | "
+        "[detail/cache_suboptimal.md](../detail/cache_suboptimal.md) | "
+        "[detail/cache_eviction.md](../detail/cache_eviction.md) | "
+        "[detail/cache_fallback.md](../detail/cache_fallback.md) | "
+        "[detail/cache_cross.md](../detail/cache_cross.md)"
+    )
+    sections.append("")
+
+    return "\n".join(sections), "\n".join(detail_sections)
+
+
+# ════════════════════════════════════════════════════════════════
+# Grep 工具
+# ════════════════════════════════════════════════════════════════
+
+
+def _grep_lines(log_file, pattern, tail=None):
+    try:
+        if tail:
+            cmd = f"tail -n {tail} {_shell_quote(log_file)} | grep -E {_shell_quote(pattern)}"
+        else:
+            cmd = f"grep -E {_shell_quote(pattern)} {_shell_quote(log_file)}"
+        result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=60)
+        if result.returncode > 1:
+            return []
+        return [line for line in result.stdout.split("\n") if line.strip()]
+    except (subprocess.TimeoutExpired, FileNotFoundError):
+        return []
+
+
+def _shell_quote(s):
+    return "'" + s.replace("'", "'\\''") + "'"
diff --git a/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/errors.py b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/errors.py
new file mode 100644
index 00000000000..f0e4c352b6c
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/errors.py
@@ -0,0 +1,342 @@
+#!/usr/bin/env python3
+"""
+Errors Analyzer — 错误分类分析
+
+分析 Router 日志中的 ERROR/WARN 日志、HTTP 状态码分布、Panic 事件。
+按问题来源层（Router / FastDeploy 后端 / 客户端）标注每类错误。
+"""
+
+import os
+import subprocess
+import sys
+
+# 让 analyzers 能 import 同级 scripts 下的模块
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from chart import render_bar, render_sparkline, render_table
+from log_parser import extract_ts, parse_error_line, parse_http_line
+from stats import count_by, time_bucket
+
+# ════════════════════════════════════════════════════════════════
+# 错误来源层映射（从 error_catalog.md 提取的核心规则）
+# ════════════════════════════════════════════════════════════════
+
+# 模板 → 来源层 映射（归一化后的模板匹配）
+SOURCE_LAYER_RULES = [
+    # Router 自身
+    ("Failed to build disaggregate_info", "Router"),
+    ("Failed to encode modified request", "Router"),
+    ("Panic recovered", "Router"),
+    ("DefaultManager is nil", "Router"),
+    ("double-release", "Router"),
+    ("counter already cleaned up", "Router"),
+    ("counter already zero", "Router"),
+    ("tokenizer failed", "Router"),
+    ("Instance {url} role is unknown", "Router"),
+    ("Failed to read YAML file config/register.yaml", "Router"),
+    # 客户端
+    ("Invalid request body", "客户端"),
+    ("Invalid JSON format", "客户端"),
+    ("Failed to read request body", "客户端"),
+    ("Failed to unmarshal request JSON", "客户端"),
+    # FD 后端（默认多数 ERROR 来自后端）
+    ("Failed to select", "FD 后端"),
+    ("Failed to connect to backend", "FD 后端"),
+    ("No available", "FD 后端"),
+    ("request failed", "FD 后端"),
+    ("Removed unhealthy", "FD 后端"),
+    ("is not healthy", "FD 后端"),
+    ("is healthy", "FD 后端"),
+    ("Backend request failed", "FD 后端"),
+    ("Decode request failed", "FD 后端"),
+    ("Prefill request failed", "FD 后端"),
+    ("Failed to create decode request", "FD 后端"),
+    ("Failed to create prefill request", "FD 后端"),
+    ("Failed to create backend request", "FD 后端"),
+    ("GetRemoteMetrics failed", "FD 后端"),
+]
+
+IMPACT_RULES = [
+    ("Failed to select", "请求可能返回 502/503"),
+    ("Failed to connect to backend", "后端不可达，请求失败"),
+    ("Panic recovered", "Router 代码异常，可能影响稳定性"),
+    ("scanner error", "流式响应中断"),
+    ("copy error", "非流式响应中断"),
+    ("Failed to read YAML file config/register.yaml", "可选配置未加载（若未启用可忽略）"),
+]
+
+# scanner error / copy error 特殊处理：context canceled → 客户端，其他 → FD 后端
+SCANNER_COPY_PATTERNS = ("scanner error", "copy error")
+
+
+def classify_source_layer(template, original=""):
+    """根据错误模板判断来源层。"""
+    # scanner error / copy error 特殊判断
+    for pat in SCANNER_COPY_PATTERNS:
+        if pat in template or pat in original:
+            if "context canceled" in original:
+                return "客户端"
+            return "FD 后端"
+
+    for pattern, layer in SOURCE_LAYER_RULES:
+        if pattern in template:
+            return layer
+
+    return "未知"
+
+
+def classify_impact(template):
+    for pattern, impact in IMPACT_RULES:
+        if pattern in template:
+            return impact
+    return "-"
+
+
+# ════════════════════════════════════════════════════════════════
+# 主分析函数
+# ════════════════════════════════════════════════════════════════
+
+
+def analyze_errors(log_file, tail=None, top_n=20):
+    """分析日志中的错误。
+
+    Args:
+        log_file: 日志文件路径
+        tail: 尾部行数限制（None 则全量）
+        top_n: 错误 Top N
+
+    Returns:
+        dict: {
+            error_top_n: [{template, count, pct, source_layer, level, urls}],
+            status_code_dist: [{value, count, pct}],
+            panic_list: [{ts, context}],
+            error_rate: float,
+            error_trend: [{bucket, count}],
+            total_errors: int,
+            total_warns: int,
+            total_requests: int,
+            summary: str,
+        }
+    """
+    # Phase 1: Grep 提取各类日志
+    error_lines = _grep_lines(log_file, r"\[ERROR\]", tail)
+    warn_lines = _grep_lines(log_file, r"\[WARN\]", tail)
+    http_lines = _grep_lines(log_file, r"\[(POST|GET)\] /", tail)
+    panic_lines = _grep_lines(log_file, "Panic recovered", tail)
+
+    # Phase 2: 解析
+    # 2.1 ERROR + WARN 归一化
+    error_records = [parse_error_line(line) for line in error_lines]
+    warn_records = [parse_error_line(line) for line in warn_lines]
+    all_error_records = error_records + warn_records
+
+    # 2.2 HTTP 请求解析
+    http_records = []
+    for line in http_lines:
+        r = parse_http_line(line)
+        if r:
+            http_records.append(r)
+
+    # 2.3 Panic 提取
+    panic_list = []
+    for line in panic_lines:
+        ts = extract_ts(line)
+        panic_list.append({"ts": ts or "", "context": line.strip()})
+
+    # Phase 3: 分析
+    # 3.1 按模板分组 Top N
+    error_top = _compute_error_top_n(all_error_records, top_n)
+
+    # 3.2 HTTP 状态码分布
+    status_dist = count_by(http_records, "status")
+
+    # 3.3 错误率
+    total_requests = len(http_records)
+    non_200 = sum(1 for r in http_records if r["status"] != 200)
+    error_rate = round(non_200 / total_requests * 100, 2) if total_requests else 0
+
+    # 3.4 错误趋势（按时间窗口统计非 200 请求数）
+    non_200_records = [r for r in http_records if r["status"] != 200]
+    error_trend = time_bucket(non_200_records, window="auto")
+
+    return {
+        "error_top_n": error_top,
+        "status_code_dist": status_dist,
+        "panic_list": panic_list,
+        "error_rate": error_rate,
+        "error_trend": error_trend,
+        "total_errors": len(error_records),
+        "total_warns": len(warn_records),
+        "total_requests": total_requests,
+    }
+
+
+def _compute_error_top_n(records, top_n):
+    """按模板分组并标注来源层。"""
+    # 分组
+    groups = {}
+    for r in records:
+        tpl = r["template"]
+        if tpl not in groups:
+            groups[tpl] = {
+                "template": tpl,
+                "count": 0,
+                "level": r["level"],
+                "originals": [],
+            }
+        groups[tpl]["count"] += 1
+        # 保留最多 5 个原始消息用于详细报告中提取 URL
+        if len(groups[tpl]["originals"]) < 5:
+            groups[tpl]["originals"].append(r["original"])
+
+    total = len(records)
+    result = []
+    for g in sorted(groups.values(), key=lambda x: -x["count"]):
+        source_layer = classify_source_layer(g["template"], g["originals"][0] if g["originals"] else "")
+        result.append(
+            {
+                "template": g["template"],
+                "count": g["count"],
+                "pct": round(g["count"] / total * 100, 1) if total else 0,
+                "source_layer": source_layer,
+                "impact": classify_impact(g["template"]),
+                "level": g["level"],
+                "urls": _extract_urls(g["originals"]),
+                "sample_originals": g["originals"],
+            }
+        )
+        if len(result) >= top_n:
+            break
+
+    return result
+
+
+def _extract_urls(originals):
+    import re
+
+    urls = set()
+    for line in originals:
+        for m in re.findall(r"https?://[A-Za-z0-9_.:-]+", line):
+            urls.add(m)
+    return sorted(urls)
+
+
+def _grep_lines(log_file, pattern, tail=None):
+    """用 grep 从日志文件提取匹配行。"""
+    try:
+        if tail:
+            # 先 tail 再 grep
+            cmd = f"tail -n {tail} {_shell_quote(log_file)} | grep -E {_shell_quote(pattern)}"
+        else:
+            cmd = f"grep -E {_shell_quote(pattern)} {_shell_quote(log_file)}"
+        result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=60)
+        if result.returncode > 1:
+            return []
+        return [line for line in result.stdout.split("\n") if line.strip()]
+    except (subprocess.TimeoutExpired, FileNotFoundError):
+        return []
+
+
+def _shell_quote(s):
+    """简单 shell 引号转义。"""
+    return "'" + s.replace("'", "'\\''") + "'"
+
+
+# ════════════════════════════════════════════════════════════════
+# 报告格式化
+# ════════════════════════════════════════════════════════════════
+
+
+def format_errors_report(result):
+    """将分析结果格式化为终端报告。
+
+    Args:
+        result: analyze_errors 返回的 dict
+
+    Returns:
+        str: 格式化后的报告文本
+    """
+    sections = []
+
+    # 标题
+    sections.append("## 错误分析")
+    sections.append("")
+
+    # 概览
+    sections.append(
+        f'  ERROR: {result["total_errors"]}  |  '
+        f'WARN: {result["total_warns"]}  |  '
+        f'请求总数: {result["total_requests"]}  |  '
+        f'错误率: {result["error_rate"]}%'
+    )
+    sections.append("  指标口径: ERROR/WARN=日志级别计数；请求总数=HTTP 请求行数；错误率=非200请求数/请求总数×100%。")
+    if result["error_rate"] == 0 and (result["total_errors"] > 0 or result["total_warns"] > 0):
+        sections.append("  ℹ 错误率为 0.0% 仅表示 HTTP 状态码均为 200；并不代表没有 ERROR/WARN 日志。")
+    sections.append("")
+
+    # Panic
+    if result["panic_list"]:
+        sections.append(f'  ⚠ Panic 事件: {len(result["panic_list"])} 次')
+        for p in result["panic_list"][:5]:
+            sections.append(f'    [{p["ts"]}] {p["context"][:100]}')
+        sections.append("")
+
+    # 错误 Top N
+    if result["error_top_n"]:
+        sections.append("### ERROR/WARN Top 分类")
+        sections.append("")
+        bar_data = []
+        for e in result["error_top_n"][:10]:
+            label = e["template"][:50]
+            bar_data.append(
+                {
+                    "label": label,
+                    "value": e["pct"],
+                    "count": e["count"],
+                }
+            )
+        sections.append(render_bar(bar_data, show_count=True))
+        sections.append("")
+
+        sections.append("  具体模板表见: [../detail/errors_topn.md](../detail/errors_topn.md)")
+        sections.append("")
+        yaml_missing_count = sum(
+            e["count"] for e in result["error_top_n"] if "Failed to read YAML file config/register.yaml" in e["template"]
+        )
+        if yaml_missing_count > 0:
+            sections.append(
+                f"  ℹ `Failed to read YAML file config/register.yaml` 出现 {yaml_missing_count} 次：若未启用该配置文件，可忽略。"
+            )
+            sections.append("")
+
+    # 状态码分布
+    if result["status_code_dist"]:
+        sections.append("### HTTP 状态码分布")
+        sections.append("")
+        bar_data = []
+        for s in result["status_code_dist"]:
+            bar_data.append(
+                {
+                    "label": str(s["value"]),
+                    "value": s["pct"],
+                    "count": s["count"],
+                }
+            )
+        sections.append(render_bar(bar_data, show_count=True))
+        sections.append("")
+
+    # 错误趋势
+    if result["error_trend"] and len(result["error_trend"]) > 1:
+        sections.append("### 非 200 请求趋势")
+        sections.append("")
+        sections.append(
+            render_sparkline(
+                result["error_trend"],
+                value_field="count",
+                title="Error Count",
+                y_label="req",
+            )
+        )
+        sections.append("")
+
+    return "\n".join(sections)
diff --git a/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/health.py b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/health.py
new file mode 100644
index 00000000000..5d1994d9405
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/health.py
@@ -0,0 +1,454 @@
+#!/usr/bin/env python3
+"""
+Health Analyzer — Worker 健康时间线分析
+
+追踪 Worker 上下线事件、恢复检测、可用性统计。
+按 Worker URL 聚合事件，构建状态时间线。
+"""
+
+import os
+import re
+import subprocess
+import sys
+from collections import defaultdict
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from chart import render_table, render_timeline
+from log_parser import extract_ts, parse_http_line, parse_ts
+
+# ════════════════════════════════════════════════════════════════
+# 健康事件解析
+# ════════════════════════════════════════════════════════════════
+
+WORKER_URL_RE = r"((?:https?://)?[A-Za-z0-9.-]+(?::\d+)?)"
+NOT_HEALTHY_RE = re.compile(rf"{WORKER_URL_RE}\s+is not healthy")
+REMOVED_RE = re.compile(rf"Removed unhealthy \w+ instance:\s*{WORKER_URL_RE}")
+IS_HEALTHY_RE = re.compile(rf"{WORKER_URL_RE}\s+is healthy")
+COUNTER_PRESERVED_RE = re.compile(rf"counter preserved.*?{WORKER_URL_RE}")
+CLEANUP_UNHEALTHY_RE = re.compile(rf"cleanup unhealthy.*?{WORKER_URL_RE}")
+
+
+def _strip_scheme(url):
+    return re.sub(r"^https?://", "", url)
+
+
+def parse_health_event(line):
+    """解析 H2 健康事件行。返回 {ts, worker, event_type} 或 None。"""
+    ts = extract_ts(line)
+    m = REMOVED_RE.search(line)
+    if m:
+        return {"ts": ts, "worker": m.group(1), "event_type": "REMOVED"}
+    m = NOT_HEALTHY_RE.search(line)
+    if m:
+        return {"ts": ts, "worker": m.group(1), "event_type": "NOT_HEALTHY"}
+    m = IS_HEALTHY_RE.search(line)
+    if m:
+        return {"ts": ts, "worker": m.group(1), "event_type": "HEALTHY"}
+    return None
+
+
+def parse_counter_preserved(line):
+    """解析 H5 counter preserved / cleanup 事件。"""
+    ts = extract_ts(line)
+    m = COUNTER_PRESERVED_RE.search(line)
+    if m:
+        return {"ts": ts, "worker": m.group(1), "event_type": "COUNTER_PRESERVED"}
+    m = CLEANUP_UNHEALTHY_RE.search(line)
+    if m:
+        return {"ts": ts, "worker": m.group(1), "event_type": "CLEANUP_UNHEALTHY"}
+    return None
+
+
+# ════════════════════════════════════════════════════════════════
+# 主分析函数
+# ════════════════════════════════════════════════════════════════
+
+
+def analyze_health(log_file, tail=None):
+    """分析 Worker 健康状态。
+
+    Returns:
+        dict: {workers, diagnoses, time_range, summary}
+    """
+    h2_lines = _grep_lines(log_file, r"Removed unhealthy|is not healthy|is healthy", tail)
+    h5_lines = _grep_lines(log_file, r"counter preserved|cleanup unhealthy", tail)
+    register_lines = _grep_lines(log_file, r"\[POST\] /register", tail)
+
+    health_events = [e for line in h2_lines for e in [parse_health_event(line)] if e]
+    counter_events = [e for line in h5_lines for e in [parse_counter_preserved(line)] if e]
+
+    register_events = []
+    for line in register_lines:
+        r = parse_http_line(line)
+        if r and r["method"] == "POST" and r["path"] == "/register" and r["status"] == 200:
+            register_events.append({"ts": r["ts"], "client_ip": r["client_ip"]})
+
+    if not health_events and not register_events:
+        return {
+            "workers": {},
+            "diagnoses": [],
+            "time_range": {"start": "", "end": ""},
+            "summary": "未检测到 Worker 健康事件",
+        }
+
+    workers = _build_worker_timelines(health_events, counter_events, register_events)
+
+    all_ts = sorted([e["ts"] for e in health_events + register_events if e.get("ts")])
+    time_range = {"start": all_ts[0] if all_ts else "", "end": all_ts[-1] if all_ts else ""}
+
+    diagnoses = _diagnose(workers)
+    down_workers = sum(1 for w in workers.values() if w["down_count"] > 0)
+
+    return {
+        "workers": workers,
+        "diagnoses": diagnoses,
+        "time_range": time_range,
+        "summary": f"{len(workers)} Worker(s), {down_workers} 有下线事件",
+    }
+
+
+def _build_worker_timelines(health_events, counter_events, register_events):
+    """构建每个 Worker 的状态时间线。"""
+    worker_urls = {evt["worker"] for evt in health_events}
+
+    # IP → worker URL 映射
+    ip_to_urls = defaultdict(set)
+    for url in worker_urls:
+        ip_m = re.search(r"(?:https?://)?(\d+\.\d+\.\d+\.\d+)", url)
+        if ip_m:
+            ip_to_urls[ip_m.group(1)].add(url)
+
+    worker_events = defaultdict(list)
+    for evt in health_events:
+        worker_events[evt["worker"]].append(evt)
+
+    counter_counts = defaultdict(int)
+    for evt in counter_events:
+        if evt["event_type"] == "COUNTER_PRESERVED":
+            counter_counts[evt["worker"]] += 1
+
+    register_by_ip = defaultdict(list)
+    for evt in register_events:
+        register_by_ip[evt["client_ip"]].append(evt)
+
+    workers = {}
+    for url in sorted(worker_urls):
+        events = sorted(worker_events[url], key=lambda e: e["ts"] or "")
+        ip_m = re.search(r"(?:https?://)?(\d+\.\d+\.\d+\.\d+)", url)
+        worker_ip = ip_m.group(1) if ip_m else ""
+
+        # 恢复检测：REMOVED 后有 register
+        recovered = False
+        recovery_events = []
+        for evt in events:
+            if evt["event_type"] == "REMOVED" and worker_ip:
+                for reg in register_by_ip.get(worker_ip, []):
+                    if reg["ts"] and evt["ts"] and reg["ts"] > evt["ts"]:
+                        recovered = True
+                        recovery_events.append({"ts": reg["ts"], "type": "RE-REGISTERED"})
+                        break
+
+        all_events = [{"ts": e["ts"], "type": e["event_type"]} for e in events]
+        for reg in register_by_ip.get(worker_ip, []):
+            all_events.append({"ts": reg["ts"], "type": "REGISTERED"})
+        all_events.extend(recovery_events)
+        all_events.sort(key=lambda e: e["ts"] or "")
+
+        down_periods = _compute_down_periods(all_events)
+        down_count = len(down_periods)
+        avg_down_s = (sum(p["duration_s"] for p in down_periods) / len(down_periods)) if down_periods else 0.0
+        detect_latency = _compute_detect_latency(all_events)
+
+        workers[url] = {
+            "events": all_events,
+            "uptime_pct": _compute_uptime_pct(all_events),
+            "down_count": down_count,
+            "avg_down_duration_s": round(avg_down_s, 1),
+            "recovered": recovered,
+            "inflight_preserved": counter_counts.get(url, 0),
+            "down_periods": down_periods,
+            "avg_detect_latency_s": detect_latency,
+        }
+
+    return workers
+
+
+def _compute_down_periods(events):
+    """从事件列表计算下线时段。"""
+    down_periods = []
+    down_start = None
+    for evt in events:
+        if evt["type"] in ("NOT_HEALTHY", "REMOVED"):
+            if down_start is None and evt["ts"]:
+                down_start = evt["ts"]
+        elif evt["type"] in ("HEALTHY", "RE-REGISTERED"):
+            if down_start is not None and evt["ts"]:
+                try:
+                    duration_s = (parse_ts(evt["ts"]) - parse_ts(down_start)).total_seconds()
+                    down_periods.append({"start": down_start, "end": evt["ts"], "duration_s": max(0, duration_s)})
+                except ValueError:
+                    pass
+                down_start = None
+    if down_start is not None:
+        down_periods.append({"start": down_start, "end": None, "duration_s": 0})
+    return down_periods
+
+
+def _compute_detect_latency(events):
+    """计算 NOT_HEALTHY -> REMOVED 平均检测延迟（秒）。"""
+    last_unhealthy = None
+    latencies = []
+    for evt in events:
+        if evt["type"] == "NOT_HEALTHY" and evt.get("ts"):
+            last_unhealthy = evt["ts"]
+        elif evt["type"] == "REMOVED" and last_unhealthy and evt.get("ts"):
+            try:
+                latencies.append((parse_ts(evt["ts"]) - parse_ts(last_unhealthy)).total_seconds())
+            except ValueError:
+                pass
+            last_unhealthy = None
+    if not latencies:
+        return "-"
+    return round(sum(latencies) / len(latencies), 1)
+
+
+def _compute_uptime_pct(events):
+    """计算 Worker 可用性百分比。"""
+    if not events:
+        return 100.0
+    ts_list = [e["ts"] for e in events if e["ts"]]
+    if len(ts_list) < 2:
+        return 0.0 if events[0]["type"] in ("NOT_HEALTHY", "REMOVED") else 100.0
+    try:
+        first_dt, last_dt = parse_ts(ts_list[0]), parse_ts(ts_list[-1])
+        total_s = (last_dt - first_dt).total_seconds()
+        if total_s <= 0:
+            return 100.0
+    except ValueError:
+        return 100.0
+
+    down_s, down_start = 0.0, None
+    for evt in events:
+        if evt["type"] in ("NOT_HEALTHY", "REMOVED") and down_start is None and evt["ts"]:
+            try:
+                down_start = parse_ts(evt["ts"])
+            except ValueError:
+                pass
+        elif evt["type"] in ("HEALTHY", "RE-REGISTERED") and down_start is not None and evt["ts"]:
+            try:
+                down_s += (parse_ts(evt["ts"]) - down_start).total_seconds()
+            except ValueError:
+                pass
+            down_start = None
+    if down_start is not None:
+        down_s += (last_dt - down_start).total_seconds()
+
+    return round(max(0, total_s - down_s) / total_s * 100, 1)
+
+
+def _diagnose(workers):
+    """根据 Worker 健康数据生成诊断。"""
+    diagnoses = []
+    if not workers:
+        return diagnoses
+
+    all_down = all(w["events"] and w["events"][-1]["type"] in ("NOT_HEALTHY", "REMOVED") for w in workers.values())
+    if all_down:
+        diagnoses.append(
+            {
+                "severity": "CRITICAL",
+                "message": f"所有 Worker ({len(workers)}) 当前均不可用",
+                "source_layer": "FD 后端",
+            }
+        )
+
+    for url, w in workers.items():
+        s = _strip_scheme(url)
+        if w["down_count"] > 3:
+            diagnoses.append(
+                {
+                    "severity": "HIGH",
+                    "message": f'{s} 下线 {w["down_count"]} 次，Worker 不稳定',
+                    "source_layer": "FD 后端",
+                }
+            )
+        for p in w.get("down_periods", []):
+            if p["duration_s"] > 300:
+                diagnoses.append(
+                    {
+                        "severity": "HIGH",
+                        "message": f'{s} 下线 {p["duration_s"]/60:.1f}min（{p["start"]} ~ {p["end"] or "未恢复"}）',
+                        "source_layer": "FD 后端",
+                    }
+                )
+        if len(w["events"]) >= 3:
+            ts_list = [e["ts"] for e in w["events"] if e["ts"]]
+            if len(ts_list) >= 2:
+                try:
+                    hours = (parse_ts(ts_list[-1]) - parse_ts(ts_list[0])).total_seconds() / 3600
+                    if hours > 0 and len(w["events"]) / hours > 3:
+                        diagnoses.append(
+                            {
+                                "severity": "MEDIUM",
+                                "message": f'{s} 状态变更频繁 ({len(w["events"])/hours:.1f} 次/小时)',
+                                "source_layer": "FD 后端",
+                            }
+                        )
+                except ValueError:
+                    pass
+        if w["inflight_preserved"] > 3:
+            diagnoses.append(
+                {
+                    "severity": "MEDIUM",
+                    "message": f'{s} counter preserved {w["inflight_preserved"]} 次（下线时仍有 inflight 请求）',
+                    "source_layer": "FD 后端",
+                }
+            )
+
+    return diagnoses
+
+
+# ════════════════════════════════════════════════════════════════
+# 报告格式化
+# ════════════════════════════════════════════════════════════════
+
+
+def format_health_report(result):
+    """将分析结果格式化为终端报告。
+
+    Returns:
+        tuple: (summary_text, detail_text)
+            summary_text: 总结部分（诊断 + 可用性表格 + 时间线）
+            detail_text: 事件详情（逐条事件记录，可能很长）
+    """
+    sections = ["## Worker 健康分析", ""]
+    if not result["workers"]:
+        sections.append("  未检测到 Worker 健康事件（所有 Worker 状态正常或无健康日志）")
+        return "\n".join(sections), ""
+
+    sections.append(f'  {result["summary"]}')
+    if result["time_range"]["start"]:
+        sections.append(f'  时间范围: {result["time_range"]["start"]} ~ {result["time_range"]["end"]}')
+    sections.append("")
+
+    if result["diagnoses"]:
+        sections.append("### 诊断")
+        sections.append("")
+        sections.append("  诊断见详情: [detail/health_events.md](../detail/health_events.md)")
+        sections.append("")
+
+    # Worker 可用性表格
+    sections.append("### Worker 可用性")
+    sections.append("")
+    table_data = []
+    for url, w in sorted(result["workers"].items()):
+        avg_down = ""
+        if w["avg_down_duration_s"] > 0:
+            avg_down = (
+                f'{w["avg_down_duration_s"]/60:.1f}min'
+                if w["avg_down_duration_s"] >= 60
+                else f'{w["avg_down_duration_s"]:.0f}s'
+            )
+        table_data.append(
+            {
+                "Worker": _strip_scheme(url),
+                "在线率": f'{w["uptime_pct"]}%',
+                "下线次数": str(w["down_count"]),
+                "平均下线时长": avg_down or "-",
+                "检测延迟": (f'{w["avg_detect_latency_s"]}s' if w["avg_detect_latency_s"] != "-" else "-"),
+                "恢复": "是" if w["recovered"] else ("否" if w["down_count"] > 0 else "-"),
+                "inflight保留": str(w["inflight_preserved"]) if w["inflight_preserved"] > 0 else "-",
+            }
+        )
+    sections.append(
+        render_table(
+            table_data,
+            columns=["Worker", "在线率", "下线次数", "平均下线时长", "检测延迟", "恢复", "inflight保留"],
+            right_align={"在线率", "下线次数", "平均下线时长", "检测延迟", "inflight保留"},
+        )
+    )
+    sections.append("")
+
+    # 时间线
+    if result["time_range"]["start"] and result["time_range"]["end"]:
+        sections.append("### Worker 时间线")
+        sections.append("")
+        timeline_data = _build_timeline_data(result)
+        if timeline_data:
+            sections.append(render_timeline(timeline_data, width=40))
+            sections.append("")
+
+    # 事件详情 → 拆分到 detail_text
+    detail_parts = ["# Worker 健康事件详情", ""]
+    has_events = False
+    if result.get("diagnoses"):
+        detail_parts.append("## 诊断")
+        detail_parts.append("")
+        for d in result["diagnoses"]:
+            detail_parts.append(f'[{d["severity"]}] [{d["source_layer"]}] {d["message"]}')
+        detail_parts.append("")
+    for url, w in sorted(result["workers"].items()):
+        if w["events"]:
+            has_events = True
+            detail_parts.append(f"## {_strip_scheme(url)}")
+            detail_parts.append("")
+            for evt in w["events"]:
+                detail_parts.append(f'  [{evt["ts"]}] {evt["type"]}')
+            detail_parts.append("")
+
+    detail_text = "\n".join(detail_parts) if has_events else ""
+
+    # 主报告中添加引用
+    if has_events:
+        sections.append("> 完整事件详情: [detail/health_events.md](../detail/health_events.md)")
+        sections.append("")
+
+    return "\n".join(sections), detail_text
+
+
+def _build_timeline_data(result):
+    """构建 render_timeline 需要的数据格式。"""
+    tr = result["time_range"]
+    if not tr["start"] or not tr["end"]:
+        return None
+    workers_data = {}
+    for url, w in result["workers"].items():
+        periods = []
+        status, start = "up", tr["start"]
+        for evt in w["events"]:
+            if not evt["ts"]:
+                continue
+            if evt["type"] in ("NOT_HEALTHY", "REMOVED") and status == "up":
+                periods.append({"from": start, "to": evt["ts"], "status": "up"})
+                status, start = "down", evt["ts"]
+            elif evt["type"] in ("HEALTHY", "RE-REGISTERED") and status == "down":
+                periods.append({"from": start, "to": evt["ts"], "status": "down"})
+                status, start = "up", evt["ts"]
+        periods.append({"from": start, "to": tr["end"], "status": status})
+        workers_data[url] = periods
+    return {"start": tr["start"], "end": tr["end"], "workers": workers_data}
+
+
+# ════════════════════════════════════════════════════════════════
+# Grep 工具
+# ════════════════════════════════════════════════════════════════
+
+
+def _grep_lines(log_file, pattern, tail=None):
+    """用 grep 从日志文件提取匹配行。"""
+    try:
+        if tail:
+            cmd = f"tail -n {tail} {_shell_quote(log_file)} | grep -E {_shell_quote(pattern)}"
+        else:
+            cmd = f"grep -E {_shell_quote(pattern)} {_shell_quote(log_file)}"
+        result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=60)
+        if result.returncode > 1:
+            return []
+        return [line for line in result.stdout.split("\n") if line.strip()]
+    except (subprocess.TimeoutExpired, FileNotFoundError):
+        return []
+
+
+def _shell_quote(s):
+    return "'" + s.replace("'", "'\\''") + "'"
diff --git a/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/latency.py b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/latency.py
new file mode 100644
index 00000000000..508cf3824d9
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/latency.py
@@ -0,0 +1,353 @@
+#!/usr/bin/env python3
+"""
+Latency Analyzer — 延迟分析
+
+分析 Router 日志中的请求延迟百分位数、延迟分布、吞吐量趋势、调度耗时、慢请求。
+仅统计推理请求路径（/v1/chat/completions, /v1/completions）。
+"""
+
+import os
+import subprocess
+import sys
+from collections import defaultdict
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from chart import render_bar, render_sparkline, render_table
+from log_parser import TS_MS_RE, extract_tags, parse_http_line
+from stats import compute_statistics, time_bucket
+
+# ════════════════════════════════════════════════════════════════
+# 调度耗时解析
+# ════════════════════════════════════════════════════════════════
+
+
+def _parse_scheduling_ms(ts_ms_lines):
+    """从 ts_ms 行计算调度耗时（同一请求两个 ts_ms 之间的差值）。
+
+    同一 request_id 的两条 ts_ms 行之间的时间差即为调度耗时。
+    返回 ms 列表。
+    """
+    from datetime import datetime
+
+    # 按 request_id 分组
+    by_reqid = defaultdict(list)
+    for line in ts_ms_lines:
+        m = TS_MS_RE.search(line)
+        if not m:
+            continue
+        ts_ms_str = m.group(1)
+        tags = extract_tags(line)
+        rid = tags.get("request_id", "")
+        if rid:
+            try:
+                dt = datetime.strptime(ts_ms_str, "%Y-%m-%d %H:%M:%S.%f")
+                by_reqid[rid].append(dt)
+            except ValueError:
+                pass
+
+    # 计算每个 request_id 的 max - min 差值
+    durations = []
+    for rid, timestamps in by_reqid.items():
+        if len(timestamps) >= 2:
+            timestamps.sort()
+            delta_ms = (timestamps[-1] - timestamps[0]).total_seconds() * 1000
+            durations.append(round(delta_ms, 3))
+
+    return durations
+
+
+# ════════════════════════════════════════════════════════════════
+# 主分析函数
+# ════════════════════════════════════════════════════════════════
+
+LATENCY_DIST_SPEC = "<100,100-500,500-1000,1000-5000,5000-10000,>10000"
+
+
+def analyze_latency(log_file, tail=None):
+    """分析日志中的请求延迟。
+
+    Args:
+        log_file: 日志文件路径
+        tail: 尾部行数限制
+
+    Returns:
+        dict: {
+            stats: {count, p50, p90, p95, p99, max, mean, stddev, distribution},
+            latency_trend: [{bucket, latency_ms_p50}],
+            throughput_trend: [{bucket, count}],
+            slow_top10: [{ts, path, status, latency_ms, client_ip}],
+            scheduling_stats: {p50, p90, p99} | None,
+            diagnoses: [{message, severity}],
+        }
+    """
+    # Phase 1: Grep 提取
+    http_lines = _grep_lines(log_file, r"\[(POST|GET)\] /", tail)
+    ts_ms_lines = _grep_lines(log_file, "ts_ms=", tail)
+
+    # Phase 2: 解析 HTTP 行（仅推理路径）
+    http_records = []
+    for line in http_lines:
+        r = parse_http_line(line, inference_only=True)
+        if r:
+            http_records.append(r)
+
+    # Phase 3: 分析
+
+    # 3.1 延迟统计
+    latency_values = [r["latency_ms"] for r in http_records]
+    stats = compute_statistics(
+        latency_values,
+        percentiles_list=[50, 90, 95, 99],
+        distribution_spec=LATENCY_DIST_SPEC,
+    )
+
+    # 3.2 延迟趋势 (p50)
+    latency_trend = time_bucket(
+        http_records,
+        window="auto",
+        agg_specs=[("latency_ms", "p50")],
+    )
+
+    # 3.3 吞吐量趋势
+    throughput_trend = time_bucket(http_records, window="auto")
+
+    # 3.4 慢请求 Top 10
+    sorted_by_latency = sorted(http_records, key=lambda r: -r["latency_ms"])
+    slow_top10 = []
+    for r in sorted_by_latency[:10]:
+        slow_top10.append(
+            {
+                "ts": r["ts"],
+                "path": r["path"],
+                "status": r["status"],
+                "latency_ms": r["latency_ms"],
+                "client_ip": r["client_ip"],
+            }
+        )
+
+    # 3.5 调度耗时
+    scheduling_stats = None
+    if ts_ms_lines:
+        sched_durations = _parse_scheduling_ms(ts_ms_lines)
+        if sched_durations:
+            sched_raw = compute_statistics(sched_durations, percentiles_list=[50, 90, 99])
+            scheduling_stats = {
+                "p50": sched_raw["p50"],
+                "p90": sched_raw["p90"],
+                "p99": sched_raw["p99"],
+                "count": sched_raw["count"],
+            }
+
+    # 3.6 诊断规则
+    diagnoses = _run_diagnostics(stats, scheduling_stats)
+
+    return {
+        "stats": stats,
+        "latency_trend": latency_trend,
+        "throughput_trend": throughput_trend,
+        "slow_top10": slow_top10,
+        "scheduling_stats": scheduling_stats,
+        "diagnoses": diagnoses,
+    }
+
+
+def _run_diagnostics(stats, scheduling_stats):
+    """应用诊断规则。"""
+    diagnoses = []
+
+    if stats["count"] == 0:
+        diagnoses.append({"message": "未找到推理请求", "severity": "INFO"})
+        return diagnoses
+
+    p99 = stats.get("p99", 0)
+    p50 = stats.get("p50", 0)
+
+    # p99 > 10s
+    if p99 > 10000:
+        if scheduling_stats and scheduling_stats["p99"] < 100:
+            diagnoses.append(
+                {
+                    "message": f'p99={p99:.0f}ms 但调度仅 {scheduling_stats["p99"]:.0f}ms → 延迟在后端推理层',
+                    "severity": "HIGH",
+                }
+            )
+        elif scheduling_stats and scheduling_stats["p99"] >= 100:
+            diagnoses.append(
+                {
+                    "message": f'p99={p99:.0f}ms 且调度 p99={scheduling_stats["p99"]:.0f}ms → 调度层瓶颈',
+                    "severity": "CRITICAL",
+                }
+            )
+        else:
+            diagnoses.append(
+                {
+                    "message": f"p99={p99:.0f}ms (>10s)，后端推理延迟高",
+                    "severity": "HIGH",
+                }
+            )
+
+    # 尾延迟
+    if p50 > 0 and p99 / p50 > 10:
+        diagnoses.append(
+            {
+                "message": f"p99/p50={p99/p50:.1f}x → 尾延迟严重",
+                "severity": "MEDIUM",
+            }
+        )
+
+    if not diagnoses:
+        diagnoses.append(
+            {
+                "message": f"延迟正常 (p50={p50:.0f}ms, p99={p99:.0f}ms)",
+                "severity": "INFO",
+            }
+        )
+
+    return diagnoses
+
+
+# ════════════════════════════════════════════════════════════════
+# Grep 工具
+# ════════════════════════════════════════════════════════════════
+
+
+def _grep_lines(log_file, pattern, tail=None):
+    """用 grep 从日志文件提取匹配行。"""
+    try:
+        if tail:
+            cmd = f"tail -n {tail} {_shell_quote(log_file)} | grep -E {_shell_quote(pattern)}"
+        else:
+            cmd = f"grep -E {_shell_quote(pattern)} {_shell_quote(log_file)}"
+        result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=60)
+        if result.returncode > 1:
+            return []
+        return [line for line in result.stdout.split("\n") if line.strip()]
+    except (subprocess.TimeoutExpired, FileNotFoundError):
+        return []
+
+
+def _shell_quote(s):
+    return "'" + s.replace("'", "'\\''") + "'"
+
+
+# ════════════════════════════════════════════════════════════════
+# 报告格式化
+# ════════════════════════════════════════════════════════════════
+
+
+def format_latency_report(result):
+    """将分析结果格式化为终端报告。"""
+    sections = []
+    stats = result["stats"]
+
+    sections.append("## 延迟分析")
+    sections.append("")
+
+    if stats["count"] == 0:
+        sections.append("  未找到推理请求 (/v1/chat/completions, /v1/completions)")
+        return "\n".join(sections)
+
+    # 百分位数概览
+    sections.append(
+        f'  推理请求: {stats["count"]}  |  '
+        f'p50={_fmt_ms(stats["p50"])}  p90={_fmt_ms(stats["p90"])}  '
+        f'p95={_fmt_ms(stats["p95"])}  p99={_fmt_ms(stats["p99"])}  '
+        f'max={_fmt_ms(stats["max"])}'
+    )
+    sections.append("  指标口径: pXX=延迟分位数；吞吐量=每个时间桶内请求数(count)；调度耗时=同 request_id 的 ts_ms(max-min)。")
+    sections.append("")
+
+    # 延迟分布
+    if stats.get("distribution"):
+        sections.append("### 延迟分布")
+        sections.append("")
+        bar_data = []
+        for d in stats["distribution"]:
+            bar_data.append(
+                {
+                    "label": d["range"],
+                    "value": d["pct"],
+                    "count": d["count"],
+                }
+            )
+        sections.append(render_bar(bar_data, show_count=True))
+        sections.append("")
+
+    # 延迟趋势
+    if result["latency_trend"] and len(result["latency_trend"]) > 1:
+        sections.append("### 延迟趋势 (p50)")
+        sections.append("")
+        sections.append(
+            render_sparkline(
+                result["latency_trend"],
+                value_field="latency_ms_p50",
+                title="p50 Latency",
+                y_label="ms",
+            )
+        )
+        sections.append("")
+
+    # 吞吐量趋势
+    if result["throughput_trend"] and len(result["throughput_trend"]) > 1:
+        sections.append("### 吞吐量趋势")
+        sections.append("")
+        sections.append(
+            render_sparkline(
+                result["throughput_trend"],
+                value_field="count",
+                title="Throughput",
+                y_label="req",
+            )
+        )
+        sections.append("")
+
+    # 调度耗时
+    if result["scheduling_stats"]:
+        ss = result["scheduling_stats"]
+        sections.append(f'### 调度耗时 ({ss["count"]} samples)')
+        sections.append(f'  p50={_fmt_ms(ss["p50"])}  p90={_fmt_ms(ss["p90"])}  p99={_fmt_ms(ss["p99"])}')
+        sections.append("")
+
+    # 慢请求 Top 10
+    if result["slow_top10"]:
+        sections.append("### 慢请求 Top 10")
+        sections.append("")
+        table_data = []
+        for r in result["slow_top10"]:
+            table_data.append(
+                {
+                    "时间": r["ts"][-8:] if len(r["ts"]) > 8 else r["ts"],
+                    "延迟": _fmt_ms(r["latency_ms"]),
+                    "状态": str(r["status"]),
+                    "路径": r["path"],
+                    "Client": r["client_ip"],
+                }
+            )
+        sections.append(
+            render_table(
+                table_data,
+                columns=["时间", "延迟", "状态", "路径", "Client"],
+            )
+        )
+        sections.append("")
+
+    # 诊断（仅在 detail 输出）
+    if result["diagnoses"]:
+        sections.append("### 诊断")
+        sections.append("  诊断见详情: [detail/latency_diagnoses.md](../detail/latency_diagnoses.md)")
+        sections.append("")
+
+    return "\n".join(sections)
+
+
+def _fmt_ms(ms):
+    """格式化毫秒值为人类可读字符串。"""
+    if ms >= 60000:
+        return f"{ms/60000:.1f}min"
+    elif ms >= 1000:
+        return f"{ms/1000:.2f}s"
+    elif ms >= 1:
+        return f"{ms:.1f}ms"
+    else:
+        return f"{ms*1000:.0f}µs"
diff --git a/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/load.py b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/load.py
new file mode 100644
index 00000000000..83b9c8a05e1
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/load.py
@@ -0,0 +1,367 @@
+#!/usr/bin/env python3
+"""
+Load Analyzer — 负载与计数器分析
+
+分析 Worker 负载分布、计数器异常、请求堆积检测、token 计数器。
+"""
+
+import os
+import re
+import subprocess
+import sys
+from collections import defaultdict
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from log_parser import extract_ts, match_select_release, parse_stats_line
+from stats import compute_statistics, time_bucket
+
+# ════════════════════════════════════════════════════════════════
+# Counter 异常检测正则
+# ════════════════════════════════════════════════════════════════
+
+URL_RE = r"((?:https?://)?[A-Za-z0-9.-]+(?::\d+)?)"
+DOUBLE_RELEASE_RE = re.compile(rf"release worker:\s*{URL_RE}\s+skipped.*?double-release")
+COUNTER_CLEANED_RE = re.compile(rf"release worker:\s*{URL_RE}\s+skipped.*?counter already cleaned up")
+COUNTER_PRESERVED_RE = re.compile(rf"counter preserved.*?{URL_RE}")
+TOKEN_PRESERVED_RE = re.compile(rf"token counter preserved.*?{URL_RE}")
+
+# Token 事件
+SELECT_TOKENS_RE = re.compile(rf"select worker \((\w+)\):\s*{URL_RE},\s*tokens:\s*(\d+)")
+RELEASE_TOKENS_RE = re.compile(rf"release (?:([a-zA-Z_]+)\s+)?tokens:\s*{URL_RE},\s*tokens:\s*(\d+)")
+SELECT_REQ_COUNT_RE = re.compile(rf"select worker \((\w+)\):\s*{URL_RE},\s*count:\s*(\d+)")
+RELEASE_REQ_COUNT_RE = re.compile(rf"release worker:\s*{URL_RE},\s*count:\s*(\d+)")
+
+
+def _strip_scheme(url):
+    return re.sub(r"^https?://", "", url)
+
+
+def _normalize_worker_type(worker_type):
+    t = (worker_type or "unknown").lower()
+    if t in ("prefill", "decode", "mixed"):
+        return t
+    return "unknown"
+
+
+def parse_counter_anomaly(line):
+    """解析 H5 counter 异常行。"""
+    ts = extract_ts(line)
+    m = DOUBLE_RELEASE_RE.search(line)
+    if m:
+        return {"ts": ts, "worker": m.group(1), "anomaly_type": "double-release"}
+    m = COUNTER_CLEANED_RE.search(line)
+    if m:
+        return {"ts": ts, "worker": m.group(1), "anomaly_type": "counter-cleaned-up"}
+    m = COUNTER_PRESERVED_RE.search(line)
+    if m:
+        return {"ts": ts, "worker": m.group(1), "anomaly_type": "counter-preserved"}
+    m = TOKEN_PRESERVED_RE.search(line)
+    if m:
+        return {"ts": ts, "worker": m.group(1), "anomaly_type": "token-preserved"}
+    return None
+
+
+# ════════════════════════════════════════════════════════════════
+# 主分析函数
+# ════════════════════════════════════════════════════════════════
+
+
+def analyze_load(log_file, tail=None):
+    """分析负载与计数器。
+
+    Returns:
+        dict: {load_stats, worker_load, load_trend, counter_anomalies,
+               select_release, token_stats, diagnoses, summary}
+    """
+    h7_lines = _grep_lines(log_file, r"\[stats\]", tail)
+    h3_lines = _grep_lines(log_file, r"select worker|release worker|Failed to select", tail)
+    h5_lines = _grep_lines(
+        log_file,
+        r"counter preserved|cleanup unhealthy|removed counters|counter already|double-release|preserved counters",
+        tail,
+    )
+    h11_lines = _grep_lines(log_file, r"release [a-zA-Z_]+ tokens:", tail)
+
+    # 解析 stats 行
+    stats_records = [r for line in h7_lines for r in [parse_stats_line(line)] if r]
+
+    # 负载统计
+    total_running_vals = [r["total_running"] for r in stats_records if "total_running" in r]
+    load_stats = compute_statistics(total_running_vals) if total_running_vals else {}
+
+    # Per-Worker 负载分布
+    worker_running = defaultdict(list)
+    for r in stats_records:
+        for w_url, running in r.get("workers", {}).items():
+            worker_running[w_url].append(running)
+
+    worker_load = []
+    for w_url in sorted(worker_running.keys()):
+        vals = worker_running[w_url]
+        avg = sum(vals) / len(vals) if vals else 0
+        worker_load.append(
+            {
+                "worker": _strip_scheme(w_url),
+                "avg_running": round(avg, 1),
+                "max_running": max(vals) if vals else 0,
+                "samples": len(vals),
+            }
+        )
+
+    # 负载趋势
+    load_trend = (
+        time_bucket(stats_records, window="auto", agg_specs=[("total_running", "mean")]) if stats_records else []
+    )
+
+    # Counter 异常
+    counter_anomalies = defaultdict(lambda: defaultdict(int))
+    for line in h5_lines:
+        evt = parse_counter_anomaly(line)
+        if evt:
+            counter_anomalies[evt["anomaly_type"]][evt["worker"]] += 1
+
+    anomaly_summary = []
+    for atype, workers in counter_anomalies.items():
+        total = sum(workers.values())
+        anomaly_summary.append(
+            {
+                "type": atype,
+                "total": total,
+                "workers": dict(workers),
+            }
+        )
+
+    # Select/Release 匹配
+    sr_result = (
+        match_select_release(h3_lines + h11_lines)
+        if h3_lines
+        else {
+            "matched": [],
+            "unmatched_selects": [],
+            "unmatched_releases": [],
+            "failed_selects": [],
+            "per_worker": {},
+            "id_coverage": {},
+            "type_summary": {},
+            "worker_type_profile": {},
+        }
+    )
+
+    # Token 统计
+    token_stats = _analyze_tokens(h3_lines, h11_lines)
+    counter_last_state = _analyze_counter_last_state(h3_lines + h11_lines)
+
+    # 请求堆积检测
+    pileup = _detect_pileup(stats_records)
+
+    # 诊断
+    diagnoses = _diagnose(load_stats, worker_load, anomaly_summary, sr_result, token_stats, pileup)
+
+    return {
+        "load_stats": load_stats,
+        "worker_load": worker_load,
+        "load_trend": load_trend,
+        "counter_anomalies": anomaly_summary,
+        "select_release": sr_result,
+        "token_stats": token_stats,
+        "counter_last_state": counter_last_state,
+        "pileup_detected": pileup,
+        "diagnoses": diagnoses,
+        "summary": f"{len(stats_records)} stats 采样, {len(worker_running)} Worker(s)",
+    }
+
+
+def _analyze_tokens(h3_lines, h11_lines):
+    """分析 token 分配与释放。"""
+    token_alloc = defaultdict(list)
+    token_release = defaultdict(list)
+
+    for line in h3_lines:
+        m = SELECT_TOKENS_RE.search(line)
+        if m:
+            token_alloc[m.group(2)].append(int(m.group(3)))
+
+    for line in h11_lines:
+        m = RELEASE_TOKENS_RE.search(line)
+        if m:
+            token_release[m.group(2)].append(int(m.group(3)))
+
+    result = []
+    all_workers = set(token_alloc.keys()) | set(token_release.keys())
+    for w in sorted(all_workers):
+        allocs = token_alloc.get(w, [])
+        releases = token_release.get(w, [])
+        result.append(
+            {
+                "worker": _strip_scheme(w),
+                "alloc_count": len(allocs),
+                "alloc_avg": round(sum(allocs) / len(allocs), 0) if allocs else 0,
+                "release_count": len(releases),
+            }
+        )
+    return result
+
+
+def _analyze_counter_last_state(lines):
+    """统计每个 worker 的 request/token counter 最后一条计数日志值与动作类型。"""
+    state = defaultdict(
+        lambda: {
+            "req_last_action": "-",
+            "req_last_value": "-",
+            "token_last_action": "-",
+            "token_last_value": "-",
+            "last_ts": "",
+        }
+    )
+    for line in lines:
+        ts = extract_ts(line) or ""
+        m = SELECT_REQ_COUNT_RE.search(line)
+        if m:
+            w = m.group(2)
+            state[w]["req_last_action"] = "select"
+            state[w]["req_last_value"] = m.group(3)
+            state[w]["last_ts"] = ts
+            continue
+        m = RELEASE_REQ_COUNT_RE.search(line)
+        if m:
+            w = m.group(1)
+            state[w]["req_last_action"] = "release"
+            state[w]["req_last_value"] = m.group(2)
+            state[w]["last_ts"] = ts
+            continue
+        m = SELECT_TOKENS_RE.search(line)
+        if m:
+            w = m.group(2)
+            state[w]["token_last_action"] = "select"
+            state[w]["token_last_value"] = m.group(3)
+            state[w]["last_ts"] = ts
+            continue
+        m = RELEASE_TOKENS_RE.search(line)
+        if m:
+            w = m.group(2)
+            state[w]["token_last_action"] = "release"
+            state[w]["token_last_value"] = m.group(3)
+            state[w]["last_ts"] = ts
+            continue
+
+    result = []
+    for w in sorted(state.keys()):
+        s = state[w]
+        result.append({"worker": _strip_scheme(w), **s})
+    return result
+
+
+def _detect_pileup(stats_records):
+    """检测请求堆积：total_running 连续上升 >5 个采样点。"""
+    if len(stats_records) < 5:
+        return False
+    vals = [r.get("total_running", 0) for r in stats_records]
+    max_consecutive = 0
+    current = 0
+    for i in range(1, len(vals)):
+        if vals[i] > vals[i - 1]:
+            current += 1
+            max_consecutive = max(max_consecutive, current)
+        else:
+            current = 0
+    return max_consecutive >= 5
+
+
+def _diagnose(load_stats, worker_load, anomaly_summary, sr_result, token_stats, pileup):
+    """生成负载诊断。"""
+    diagnoses = []
+
+    if pileup:
+        diagnoses.append(
+            {"severity": "HIGH", "message": "total_running 持续上升，疑似请求堆积", "source_layer": "FD 后端"}
+        )
+
+    # 空闲 Worker
+    for w in worker_load:
+        if w["avg_running"] == 0 and w["samples"] > 3:
+            diagnoses.append(
+                {
+                    "severity": "MEDIUM",
+                    "message": f'{w["worker"]} running 持续 =0（空闲或故障未移除）',
+                    "source_layer": "Router",
+                }
+            )
+
+    # 负载严重不均
+    if load_stats.get("stddev", 0) > 3:
+        diagnoses.append(
+            {
+                "severity": "MEDIUM",
+                "message": f'负载标准差 {load_stats["stddev"]}，分布不均衡',
+                "source_layer": "Router",
+            }
+        )
+
+    # Counter 异常
+    for a in anomaly_summary:
+        if a["type"] == "double-release" and a["total"] > 0:
+            diagnoses.append(
+                {
+                    "severity": "MEDIUM",
+                    "message": f'double-release {a["total"]} 次（计数器逻辑 bug）',
+                    "source_layer": "Router",
+                }
+            )
+
+    id_cov = sr_result.get("id_coverage", {})
+    has_correlatable_ids = (id_cov.get("with_request_id", 0) + id_cov.get("with_alt_id", 0)) > 0
+
+    # Select/Release 不一致（仅在存在可关联 ID 时启用，避免无 ID 场景误报）
+    if has_correlatable_ids:
+        for w_url, pw in sr_result.get("per_worker", {}).items():
+            delta = pw.get("delta", 0)
+            if delta >= 3:
+                diagnoses.append(
+                    {
+                        "severity": "MEDIUM",
+                        "message": f"{_strip_scheme(w_url)} select-release 差值 {delta}（可能存在在途请求堆积）",
+                        "source_layer": "FD 后端",
+                    }
+                )
+
+    # Token 计数器潜在泄漏
+    for t in token_stats:
+        if t.get("alloc_count", 0) > t.get("release_count", 0):
+            diagnoses.append(
+                {
+                    "severity": "MEDIUM",
+                    "message": f'{t["worker"]} token alloc/release 不平衡 ({t["alloc_count"]}/{t["release_count"]})',
+                    "source_layer": "Router",
+                }
+            )
+
+    return diagnoses
+
+
+# ════════════════════════════════════════════════════════════════
+# 报告格式化
+# ════════════════════════════════════════════════════════════════
+
+
+# ════════════════════════════════════════════════════════════════
+# Grep 工具
+# ════════════════════════════════════════════════════════════════
+
+
+def _grep_lines(log_file, pattern, tail=None):
+    try:
+        if tail:
+            cmd = f"tail -n {tail} {_shell_quote(log_file)} | grep -E {_shell_quote(pattern)}"
+        else:
+            cmd = f"grep -E {_shell_quote(pattern)} {_shell_quote(log_file)}"
+        result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=60)
+        if result.returncode > 1:
+            return []
+        return [line for line in result.stdout.split("\n") if line.strip()]
+    except (subprocess.TimeoutExpired, FileNotFoundError):
+        return []
+
+
+def _shell_quote(s):
+    return "'" + s.replace("'", "'\\''") + "'"
diff --git a/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/load_report.py b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/load_report.py
new file mode 100644
index 00000000000..5cbdc829bf6
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/load_report.py
@@ -0,0 +1,257 @@
+#!/usr/bin/env python3
+"""Load report formatter."""
+
+from chart import render_bar, render_sparkline, render_table
+
+
+def _strip_scheme(url):
+    import re
+    return re.sub(r"^https?://", "", url)
+
+
+def format_load_report(result):
+    """将分析结果格式化为终端报告。
+
+    Returns:
+        tuple: (summary_text, detail_text)
+    """
+    sections = ["## 负载与计数器分析", ""]
+    sections.append(f'  {result["summary"]}')
+    sections.append("")
+    detail_sections = ["# 负载与计数器详情", ""]
+    detail_sections.append(f'总结: {result["summary"]}')
+    detail_sections.append("")
+
+    if result["diagnoses"]:
+        sections.append("### 诊断")
+        sections.append("")
+        sections.append(f'  共 {len(result["diagnoses"])} 条诊断，见详情: [detail/load_diagnoses.md](../detail/load_diagnoses.md)')
+        sections.append("")
+        detail_sections.append("## 诊断")
+        detail_sections.append("")
+        for d in result["diagnoses"]:
+            detail_sections.append(f'[{d["severity"]}] [{d["source_layer"]}] {d["message"]}')
+        detail_sections.append("")
+
+    # 负载概览
+    ls = result.get("load_stats", {})
+    if ls:
+        sections.append("### 负载概览 (total_running)")
+        sections.append("")
+        sections.append("  说明: stats 采样来自 `[stats]` 周期日志（通常每 5s 一条），用于观察当前并发与负载变化趋势。")
+        sections.append(
+            f'  mean={ls.get("mean",0)}  p50={ls.get("p50",0)}  p90={ls.get("p90",0)}  '
+            f'p99={ls.get("p99",0)}  max={ls.get("max",0)}  stddev={ls.get("stddev",0)}'
+        )
+        sections.append("")
+
+    # Per-Worker 负载
+    if result["worker_load"]:
+        sections.append("### Per-Worker 负载")
+        sections.append("")
+        bar_data = [
+            {"label": w["worker"][:25], "value": min(100, w["avg_running"] * 5), "count": w["avg_running"]}
+            for w in result["worker_load"]
+        ]
+        sections.append(render_bar(bar_data, show_count=True))
+        sections.append("")
+
+    # 负载趋势
+    if result["load_trend"] and len(result["load_trend"]) > 1:
+        sections.append("### 负载趋势")
+        sections.append("")
+        sections.append(
+            render_sparkline(
+                result["load_trend"], value_field="total_running_mean", title="Total Running", y_label="req"
+            )
+        )
+        sections.append("")
+
+    # Counter 异常
+    if result["counter_anomalies"]:
+        sections.append("### 计数器异常")
+        sections.append("")
+        for a in result["counter_anomalies"]:
+            workers_str = ", ".join(f'{_strip_scheme(w)}({c})' for w, c in a["workers"].items())
+            sections.append(f'  {a["type"]}: {a["total"]} 次 [{workers_str}]')
+        sections.append("")
+        detail_sections.append("## 计数器异常")
+        detail_sections.append("")
+        for a in result["counter_anomalies"]:
+            workers_str = ", ".join(f'{_strip_scheme(w)}({c})' for w, c in a["workers"].items())
+            detail_sections.append(f'- {a["type"]}: {a["total"]} 次 [{workers_str}]')
+        detail_sections.append("")
+
+    # 按 prefill / decode / mixed 分类统计
+    type_summary = result.get("select_release", {}).get("type_summary", {})
+    if type_summary:
+        sections.append("### 按类型统计（prefill / decode / mixed）")
+        sections.append("")
+        type_rows = []
+        for t in ("prefill", "decode", "mixed", "unknown"):
+            s = type_summary.get(t)
+            if not s:
+                continue
+            token_display = "-"
+            if t == "prefill":
+                token_display = f'{s.get("token_selects",0)}/{s.get("token_releases",0)}'
+            elif t == "mixed" and (s.get("token_selects", 0) > 0 or s.get("token_releases", 0) > 0):
+                token_display = f'{s.get("token_selects",0)}/{s.get("token_releases",0)}'
+            type_rows.append(
+                {
+                    "type": t,
+                    "counter(S/R)": f'{s.get("counter_selects",0)}/{s.get("counter_releases",0)}',
+                    "token(S/R)": token_display,
+                }
+            )
+        if type_rows:
+            sections.append(render_table(type_rows, columns=["type", "counter(S/R)", "token(S/R)"]))
+            sections.append("")
+            sections.append("  说明: prefill/mixed 的 token-select 同时表示 request counter + token counter 增加；decode 仅 request counter。")
+            sections.append("  说明: `release prefill tokens` 会被识别为 token-release；worker type 按该 worker URL 在 select 中的类型映射（prefill/decode/mixed）。")
+            if type_summary.get("unknown"):
+                sections.append("  说明: unknown 表示日志里缺少 worker type，且无法从邻近 select/release 关系推断。")
+            sections.append("")
+            detail_sections.append("## 按类型统计")
+            detail_sections.append("")
+            detail_sections.append(render_table(type_rows, columns=["type", "counter(S/R)", "token(S/R)"]))
+            detail_sections.append("")
+
+    id_cov = result.get("select_release", {}).get("id_coverage", {})
+    if id_cov:
+        sections.append("### 请求标识覆盖（基于 select 近似请求数）")
+        sections.append("")
+        sections.append(
+            "  total={total} | with_request_id={with_rid} | without_request_id={without_rid} | "
+            "with_alt_id={with_alt} | without_any_id={without_any}".format(
+                total=id_cov.get("total_requests_estimated", 0),
+                with_rid=id_cov.get("with_request_id", 0),
+                without_rid=id_cov.get("without_request_id", 0),
+                with_alt=id_cov.get("with_alt_id", 0),
+                without_any=id_cov.get("without_any_id", 0),
+            )
+        )
+        if id_cov.get("without_any_id", 0) > 0:
+            sections.append("  ℹ 无 request/session/trace/req_id 时，不做退化匹配，仅统计为 untracked。")
+        sections.append("  字段说明: total=select 事件总数估算；with_request_id=含 request_id；without_request_id=不含 request_id；with_alt_id=含 req_id/trace_id/session_id；without_any_id=四类 ID 都缺失。")
+        sections.append("")
+        detail_sections.append("## 请求标识覆盖字段说明")
+        detail_sections.append("")
+        detail_sections.append(
+            "- total: select 事件总数（近似请求数）\n"
+            "- with_request_id: 携带 request_id 的 select 数\n"
+            "- without_request_id: 未携带 request_id 的 select 数\n"
+            "- with_alt_id: 无 request_id 但携带 req_id/trace_id/session_id 的 select 数\n"
+            "- without_any_id: 四类 ID 都没有，无法做请求级关联"
+        )
+        detail_sections.append("")
+
+    # Select/Release 匹配
+    sr = result.get("select_release", {})
+    if sr.get("per_worker"):
+        sections.append("### Select/Release 匹配")
+        sections.append("")
+        id_cov = sr.get("id_coverage", {})
+        no_correlatable_id = (id_cov.get("with_request_id", 0) + id_cov.get("with_alt_id", 0)) == 0
+        table_data = []
+        for w_url, pw in sorted(sr["per_worker"].items()):
+            delta_display = "N/A" if no_correlatable_id else str(pw["delta"])
+            table_data.append(
+                {
+                    "Worker": _strip_scheme(w_url),
+                    "ReqSelect": str(pw["selects"]),
+                    "ReqRelease": str(pw["releases"]),
+                    "ReqDelta": delta_display,
+                    "TokenSelect": str(pw.get("token_selects", 0)),
+                    "TokenRelease": str(pw.get("token_releases", 0)),
+                }
+            )
+        sections.append(
+            render_table(
+                table_data,
+                columns=["Worker", "ReqSelect", "ReqRelease", "ReqDelta", "TokenSelect", "TokenRelease"],
+                right_align={"ReqSelect", "ReqRelease", "ReqDelta", "TokenSelect", "TokenRelease"},
+            )
+        )
+        sections.append("")
+        if no_correlatable_id:
+            sections.append("  ℹ 当前样本无可关联 ID，Delta 不用于请求泄漏结论。")
+            sections.append("")
+        sections.append("  ℹ ReqDelta>0 可能仅表示仍有在途请求（尚未完成推理），需结合时间窗口观察。")
+        sections.append("")
+        sections.append("  说明: TokenSelect 按 worker type 统计（prefill + mixed 的 select 都计入），不依赖日志里是否出现 tokens 字段。")
+        sections.append("")
+        detail_sections.append("## Select/Release Per-Worker")
+        detail_sections.append("")
+
+    if sr.get("worker_type_profile"):
+        sections.append("### Worker URL 类型画像（基于 select）")
+        sections.append("")
+        rows = []
+        for w, p in sorted(sr["worker_type_profile"].items()):
+            rows.append(
+                {
+                    "Worker": _strip_scheme(w),
+                    "Dominant": p.get("dominant_type", "unknown"),
+                    "Prefill": p.get("prefill", 0),
+                    "Decode": p.get("decode", 0),
+                    "Mixed": p.get("mixed", 0),
+                }
+            )
+        sections.append(
+            render_table(
+                rows,
+                columns=["Worker", "Dominant", "Prefill", "Decode", "Mixed"],
+                right_align={"Prefill", "Decode", "Mixed"},
+            )
+        )
+        sections.append("")
+        detail_sections.append(
+            render_table(
+                table_data,
+                columns=["Worker", "ReqSelect", "ReqRelease", "ReqDelta", "TokenSelect", "TokenRelease"],
+                right_align={"ReqSelect", "ReqRelease", "ReqDelta", "TokenSelect", "TokenRelease"},
+            )
+        )
+        detail_sections.append("")
+
+    if sr.get("failed_selects"):
+        sections.append(f'  ⚠ Failed to select: {len(sr["failed_selects"])} 次')
+        sections.append("  解释: 路由在该时刻未能选出可用 worker，通常意味着可用池不足或健康状态异常。")
+        sections.append("")
+        detail_sections.append("## Failed to select")
+        detail_sections.append("")
+        for f in sr["failed_selects"]:
+            detail_sections.append(f'- [{f.get("ts","")}] line={f.get("line","")}')
+        detail_sections.append("")
+
+    # Token 统计
+    if result.get("token_stats"):
+        sections.append("### Token 计数器")
+        sections.append("")
+        sections.append(
+            render_table(
+                result["token_stats"],
+                columns=["worker", "alloc_count", "alloc_avg", "release_count"],
+                right_align={"alloc_count", "alloc_avg", "release_count"},
+            )
+        )
+        sections.append("")
+
+    if result.get("counter_last_state"):
+        sections.append("### 计数器末状态")
+        sections.append("")
+        sections.append("  末状态详情见: [detail/load_counter_state.md](../detail/load_counter_state.md)")
+        sections.append("")
+        detail_sections.append("## Counter / Token Counter 末状态（最后一条计数日志）")
+        detail_sections.append("")
+        detail_sections.append(
+            render_table(
+                result["counter_last_state"],
+                columns=["worker", "req_last_action", "req_last_value", "token_last_action", "token_last_value", "last_ts"],
+                right_align={"req_last_value", "token_last_value"},
+            )
+        )
+        detail_sections.append("")
+
+    return "\n".join(sections), "\n".join(detail_sections)
diff --git a/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/trace.py b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/trace.py
new file mode 100644
index 00000000000..ba4c7bd1051
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/analyzers/trace.py
@@ -0,0 +1,616 @@
+#!/usr/bin/env python3
+"""
+Trace Analyzer — 请求追踪
+
+通过 session_id / trace_id / request_id / req_id 追踪单个或多个请求的
+完整生命周期，重建事件链，检测不完整生命周期。
+"""
+
+import os
+import re
+import subprocess
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from log_parser import (
+    extract_tags,
+    extract_ts,
+    match_select_release,
+    parse_cache_strategy_line,
+    parse_http_line,
+)
+
+# ════════════════════════════════════════════════════════════════
+# 事件识别正则
+# ════════════════════════════════════════════════════════════════
+
+PARSING_COMPLETE_RE = re.compile(r"Parsing completed.*worker selection")
+URL_RE = r"((?:https?://)?[A-Za-z0-9.-]+(?::\d+)?)"
+SELECT_WORKER_RE = re.compile(rf"select worker\s*(?:\((\w+)\))?:\s*{URL_RE}")
+RELEASE_WORKER_RE = re.compile(rf"release worker\s*(?:\((\w+)\))?:\s*{URL_RE}")
+RELEASE_TOKENS_RE = re.compile(rf"release prefill tokens:\s*{URL_RE},\s*tokens:\s*(\d+)")
+REQUEST_COMPLETE_RE = re.compile(r"Request completed successfully")
+TS_MS_RE = re.compile(r"ts_ms=(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+)")
+
+# Prefill 事件
+PREFILL_FIRST_CHUNK_RE = re.compile(rf"\[prefill\] first chunk received.*?{URL_RE}")
+PREFILL_DONE_RE = re.compile(rf"\[prefill\] non-stream prefill response done.*?{URL_RE}")
+PREFILL_ERROR_RE = re.compile(rf"\[prefill\] (scanner error|copy error).*?{URL_RE}")
+PREFILL_DEFER_RE = re.compile(rf"\[prefill\] release in defer.*?{URL_RE}")
+PREFILL_ERR_PATH_RE = re.compile(rf"\[prefill\] release in CommonCompletions defer \(error path\).*?{URL_RE}")
+FAILED_SELECT_RE = re.compile(r"Failed to select")
+
+
+def _strip_scheme(url):
+    return re.sub(r"^https?://", "", url)
+
+
+# ════════════════════════════════════════════════════════════════
+# 主分析函数
+# ════════════════════════════════════════════════════════════════
+
+
+def analyze_trace(log_file, trace_ids, tail=None):
+    """追踪指定 ID 的请求生命周期。
+
+    Args:
+        log_file: 日志文件路径
+        trace_ids: ID 列表（逗号分隔的字符串或列表）
+        tail: 尾部行数限制
+
+    Returns:
+        dict: {traces: {id: {events, lifecycle_complete, diagnoses}}, summary}
+    """
+    auto_discovery_summary = ""
+    if isinstance(trace_ids, str):
+        normalized = trace_ids.strip().lower()
+        if normalized in ("all", "full", "all_ids", "全部", "全量"):
+            trace_ids, auto_discovery_summary = _discover_full_trace_targets(log_file, tail=tail)
+        else:
+            trace_ids = [tid.strip() for tid in trace_ids.split(",") if tid.strip()]
+
+    if not trace_ids:
+        return {"traces": {}, "summary": "未指定追踪 ID"}
+
+    traces = {}
+    for tid in trace_ids:
+        # Grep 搜索四种 context tag
+        pattern = f"session_id:{tid}|trace_id:{tid}|request_id:{tid}|req_id:{tid}"
+        matching_lines = _grep_lines(log_file, pattern, tail)
+
+        if not matching_lines:
+            traces[tid] = {
+                "events": [],
+                "lifecycle_complete": False,
+                "diagnoses": [{"severity": "INFO", "message": f"未找到 ID={tid} 的匹配行"}],
+                "matched_tag": None,
+                "related_ids": {},
+            }
+            continue
+
+        # 识别匹配到的 tag 类型，并展开 session 下所有 request_id
+        first_tags = extract_tags(matching_lines[0])
+        is_session = tid in [first_tags.get("session_id", "")]
+
+        # 如果是 session_id，收集所有关联的 request_id
+        related_request_ids = set()
+        if is_session:
+            for line in matching_lines:
+                tags = extract_tags(line)
+                rid = tags.get("request_id", "")
+                if rid:
+                    related_request_ids.add(rid)
+
+            # 为每个 request_id 额外搜索行
+            extra_lines = []
+            for rid in related_request_ids:
+                rid_lines = _grep_lines(log_file, f"request_id:{rid}", tail)
+                extra_lines.extend(rid_lines)
+            all_lines = list(set(matching_lines + extra_lines))
+        else:
+            all_lines = matching_lines
+
+        # 解析事件链
+        events = _parse_event_chain(all_lines)
+        lifecycle_complete = _check_lifecycle_complete(events)
+        sr_check = match_select_release(all_lines)
+        diagnoses = _diagnose_trace(events, lifecycle_complete, sr_check)
+
+        tag_coverage = _build_id_coverage_stats(all_lines)
+        tag_combos = _build_id_combo_stats(all_lines)
+        matched_tags = _detect_matched_tags(all_lines, tid)
+        traces[tid] = {
+            "events": events,
+            "lifecycle_complete": lifecycle_complete,
+            "diagnoses": diagnoses,
+            "sr_check": sr_check,
+            "matched_tag": _format_matched_tag(matched_tags),
+            "matched_tags": matched_tags,
+            "related_ids": {
+                "request_ids": sorted(related_request_ids) if is_session else [],
+            },
+            "id_coverage": tag_coverage,
+            "id_combos": tag_combos,
+        }
+
+    total_traced = len(traces)
+    complete = sum(1 for t in traces.values() if t["lifecycle_complete"])
+
+    summary = f"{total_traced} ID(s) 追踪, {complete} 生命周期完整"
+    if auto_discovery_summary:
+        summary += f" | {auto_discovery_summary}"
+
+    return {"traces": traces, "summary": summary}
+
+
+def _discover_full_trace_targets(log_file, tail=None):
+    """全量追踪目标发现。
+
+    规则：
+    1) 有 session_id 的优先按 session_id 追踪
+    2) 无 session 但有 trace_id 的按 trace_id 追踪
+    3) 剩余“孤立”的 request_id/req_id 单独追踪
+    """
+    lines = _grep_lines(log_file, r"session_id:|trace_id:|request_id:|req_id:", tail=tail)
+    if not lines:
+        return [], "全量追踪未发现任何可用 ID"
+
+    session_ids = set()
+    trace_ids = set()
+    all_request_ids = set()
+    request_ids_with_session_or_trace = set()
+
+    for line in lines:
+        tags = extract_tags(line)
+        sid = tags.get("session_id")
+        tid = tags.get("trace_id")
+        rid = tags.get("request_id") or tags.get("req_id")
+        has_session = bool(sid)
+        has_trace = bool(tid)
+        has_request = bool(rid)
+
+        if has_session:
+            session_ids.add(sid)
+        if has_trace:
+            trace_ids.add(tid)
+        if has_request:
+            all_request_ids.add(rid)
+            if has_session or has_trace:
+                request_ids_with_session_or_trace.add(rid)
+
+    standalone_request_ids = all_request_ids - request_ids_with_session_or_trace
+
+    targets = []
+    chosen = set()
+    for bucket in (sorted(session_ids), sorted(trace_ids), sorted(standalone_request_ids)):
+        for _id in bucket:
+            if _id and _id not in chosen:
+                chosen.add(_id)
+                targets.append(_id)
+
+    summary = (
+        "全量ID发现: "
+        f"session={len(session_ids)}, trace={len(trace_ids)}, "
+        f"standalone_request={len(standalone_request_ids)}, total_targets={len(targets)}"
+    )
+    return targets, summary
+
+
+def _parse_event_chain(lines):
+    """从匹配行重建事件链，按时间排序。"""
+    events = []
+
+    for line in lines:
+        ts = extract_ts(line)
+        tags = extract_tags(line)
+
+        # HTTP 请求进入/完成
+        http = parse_http_line(line)
+        if http:
+            events.append(
+                {
+                    "ts": ts,
+                    "type": "HTTP",
+                    "tags": tags,
+                    "method": http["method"],
+                    "path": http["path"],
+                    "status": http["status"],
+                    "latency_ms": http["latency_ms"],
+                    "raw": line.strip(),
+                }
+            )
+            continue
+
+        # Parsing completed
+        if PARSING_COMPLETE_RE.search(line):
+            events.append({"ts": ts, "type": "PARSING_COMPLETE", "tags": tags, "raw": line.strip()})
+            continue
+
+        # Cache-aware strategy
+        strategy = parse_cache_strategy_line(line)
+        if strategy:
+            events.append(
+                {
+                    "ts": ts,
+                    "type": "CACHE_STRATEGY",
+                    "tags": tags,
+                    "strategy": strategy.get("strategy"),
+                    "selected": strategy.get("selected", ""),
+                    "selected_hitRatio": strategy.get("selected_hitRatio", 0),
+                    "raw": line.strip(),
+                }
+            )
+            continue
+
+        # Select worker
+        m = SELECT_WORKER_RE.search(line)
+        if m:
+            events.append(
+                {
+                    "ts": ts,
+                    "type": "SELECT_WORKER",
+                    "tags": tags,
+                    "worker_type": m.group(1) or "unknown",
+                    "worker": m.group(2),
+                    "raw": line.strip(),
+                }
+            )
+            continue
+
+        # Release worker
+        m = RELEASE_WORKER_RE.search(line)
+        if m:
+            events.append(
+                {
+                    "ts": ts,
+                    "type": "RELEASE_WORKER",
+                    "tags": tags,
+                    "worker_type": m.group(1) or "unknown",
+                    "worker": m.group(2),
+                    "raw": line.strip(),
+                }
+            )
+            continue
+
+        # Release tokens
+        m = RELEASE_TOKENS_RE.search(line)
+        if m:
+            events.append(
+                {
+                    "ts": ts,
+                    "type": "RELEASE_TOKENS",
+                    "tags": tags,
+                    "worker": m.group(1),
+                    "tokens": int(m.group(2)),
+                    "raw": line.strip(),
+                }
+            )
+            continue
+
+        # Prefill events
+        m = PREFILL_FIRST_CHUNK_RE.search(line)
+        if m:
+            events.append(
+                {"ts": ts, "type": "PREFILL_FIRST_CHUNK", "tags": tags, "worker": m.group(1), "raw": line.strip()}
+            )
+            continue
+        m = PREFILL_DONE_RE.search(line)
+        if m:
+            events.append({"ts": ts, "type": "PREFILL_DONE", "tags": tags, "worker": m.group(1), "raw": line.strip()})
+            continue
+        m = PREFILL_ERROR_RE.search(line)
+        if m:
+            events.append(
+                {
+                    "ts": ts,
+                    "type": "PREFILL_ERROR",
+                    "tags": tags,
+                    "error": m.group(1),
+                    "worker": m.group(2),
+                    "raw": line.strip(),
+                }
+            )
+            continue
+        m = PREFILL_DEFER_RE.search(line)
+        if m:
+            events.append(
+                {"ts": ts, "type": "PREFILL_DEFER_RELEASE", "tags": tags, "worker": m.group(1), "raw": line.strip()}
+            )
+            continue
+        m = PREFILL_ERR_PATH_RE.search(line)
+        if m:
+            events.append(
+                {
+                    "ts": ts,
+                    "type": "PREFILL_ERROR_PATH_RELEASE",
+                    "tags": tags,
+                    "worker": m.group(1),
+                    "raw": line.strip(),
+                }
+            )
+            continue
+
+        # Request completed
+        if REQUEST_COMPLETE_RE.search(line):
+            events.append({"ts": ts, "type": "REQUEST_COMPLETE", "tags": tags, "raw": line.strip()})
+            continue
+
+        # ts_ms
+        m = TS_MS_RE.search(line)
+        if m:
+            events.append({"ts": ts, "type": "TS_MS", "tags": tags, "ts_ms": m.group(1), "raw": line.strip()})
+            continue
+
+        # Failed to select
+        if FAILED_SELECT_RE.search(line):
+            events.append({"ts": ts, "type": "FAILED_SELECT", "tags": tags, "raw": line.strip()})
+            continue
+
+    # 按时间排序
+    events.sort(key=lambda e: e.get("ts") or "")
+    return events
+
+
+def _check_lifecycle_complete(events):
+    """检查生命周期是否完整。"""
+    types = {e["type"] for e in events}
+    has_entry = "HTTP" in types or "PARSING_COMPLETE" in types
+    has_exit = "REQUEST_COMPLETE" in types or (
+        "HTTP" in types and any(e["type"] == "HTTP" and e.get("status") for e in events)
+    )
+    has_select = "SELECT_WORKER" in types
+    has_release = "RELEASE_WORKER" in types
+
+    return has_entry and has_exit and (not has_select or has_release)
+
+
+def _diagnose_trace(events, lifecycle_complete, sr_check=None):
+    """生成追踪诊断。"""
+    diagnoses = []
+    types = [e["type"] for e in events]
+
+    if not lifecycle_complete:
+        if "SELECT_WORKER" in types and "RELEASE_WORKER" not in types:
+            diagnoses.append({"severity": "HIGH", "message": "有 select 但无 release — 疑似请求卡住"})
+        elif "HTTP" not in types and "PARSING_COMPLETE" not in types:
+            diagnoses.append({"severity": "MEDIUM", "message": "未找到 HTTP 入口事件"})
+        elif "REQUEST_COMPLETE" not in types:
+            diagnoses.append({"severity": "MEDIUM", "message": "未检测到请求完成事件 — 疑似异常退出"})
+
+    if "PREFILL_ERROR" in types:
+        for e in events:
+            if e["type"] == "PREFILL_ERROR":
+                diagnoses.append(
+                    {"severity": "HIGH", "message": f'Prefill 错误: {e.get("error","")} @ {e.get("worker","")}'}
+                )
+
+    if "FAILED_SELECT" in types:
+        diagnoses.append({"severity": "HIGH", "message": "Failed to select worker — 无可用 Worker"})
+
+    if sr_check:
+        if sr_check.get("unmatched_selects"):
+            diagnoses.append(
+                {
+                    "severity": "HIGH",
+                    "message": f'match-select-release 检测到 {len(sr_check["unmatched_selects"])} 个 unmatched select',
+                }
+            )
+        if sr_check.get("unmatched_releases"):
+            diagnoses.append(
+                {
+                    "severity": "MEDIUM",
+                    "message": f'match-select-release 检测到 {len(sr_check["unmatched_releases"])} 个 unmatched release',
+                }
+            )
+
+    return diagnoses
+
+
+# ════════════════════════════════════════════════════════════════
+# 报告格式化
+# ════════════════════════════════════════════════════════════════
+
+
+def format_trace_report(result):
+    """将追踪结果格式化为终端报告。
+
+    Returns:
+        tuple: (summary_text, detail_dict)
+            summary_text: 总结部分（概览 + 诊断 + 生命周期状态）
+            detail_dict: {trace_id: event_chain_text} 各 ID 的完整事件链
+    """
+    sections = ["## 请求追踪", ""]
+    sections.append(f'  {result["summary"]}')
+    sections.append("")
+
+    detail_dict = {}
+
+    for tid, trace in result["traces"].items():
+        sections.append(f"### ID: {tid}")
+        if trace.get("matched_tag"):
+            sections.append(f'  匹配类型: {trace["matched_tag"]}')
+        if trace.get("id_coverage"):
+            c = trace["id_coverage"]
+            sections.append(
+                "  ID统计: "
+                f'request_only={c["request_only"]}, session_only={c["session_only"]}, trace_only={c["trace_only"]}'
+            )
+        if trace.get("related_ids", {}).get("request_ids"):
+            sections.append(f'  关联 request_ids: {", ".join(trace["related_ids"]["request_ids"])}')
+
+        status = "完整" if trace["lifecycle_complete"] else "不完整"
+        sections.append(f"  生命周期: {status}")
+        sections.append("")
+
+        # 诊断
+        if trace["diagnoses"]:
+            for d in trace["diagnoses"]:
+                sections.append(f'  [{d["severity"]}] {d["message"]}')
+            sections.append("")
+
+        # 事件链 → 拆分到 detail_dict
+        if trace["events"]:
+            detail_lines = [f"# 请求追踪事件链: {tid}", ""]
+            if trace.get("matched_tag"):
+                detail_lines.append(f'匹配类型: {trace["matched_tag"]}')
+            if trace.get("id_coverage"):
+                c = trace["id_coverage"]
+                detail_lines.append("ID覆盖统计:")
+                detail_lines.append(
+                    f'- only_request_id: {c["request_only"]} | only_session_id: {c["session_only"]} | only_trace_id: {c["trace_only"]}'
+                )
+            if trace.get("id_combos"):
+                detail_lines.append("")
+                detail_lines.append("标签组合明细（按唯一ID计数）:")
+                for item in trace["id_combos"]:
+                    detail_lines.append(
+                        f'- combo={item["combo"]} | count={item["count"]} | ids={", ".join(item["ids"])}'
+                    )
+            if trace.get("related_ids", {}).get("request_ids"):
+                detail_lines.append(f'关联 request_ids: {", ".join(trace["related_ids"]["request_ids"])}')
+            detail_lines.append(f"生命周期: {status}")
+            detail_lines.append("")
+            detail_lines.append("## 事件链（整理）")
+            detail_lines.append("")
+            for evt in trace["events"]:
+                line = f'  [{evt.get("ts","")}] {evt["type"]}'
+                if evt.get("worker"):
+                    line += f' → {_strip_scheme(evt["worker"])}'
+                if evt.get("status"):
+                    line += f' [{evt["status"]}]'
+                if evt.get("latency_ms"):
+                    line += f' {evt["latency_ms"]}ms'
+                if evt.get("strategy"):
+                    line += f' strategy={evt["strategy"]}'
+                if evt.get("selected_hitRatio"):
+                    line += f' hitRatio={evt["selected_hitRatio"]}'
+                if evt.get("tokens"):
+                    line += f' tokens={evt["tokens"]}'
+                if evt.get("error"):
+                    line += f' error={evt["error"]}'
+                if evt.get("ts_ms"):
+                    line += f' ts_ms={evt["ts_ms"]}'
+                detail_lines.append(line)
+            detail_lines.append("")
+            detail_lines.append("## 原始日志 RAW")
+            detail_lines.append("")
+            for evt in trace["events"]:
+                if evt.get("raw"):
+                    detail_lines.append(evt["raw"])
+            detail_lines.append("")
+            detail_dict[tid] = "\n".join(detail_lines)
+
+            # 主报告中添加引用和摘要
+            safe_tid = tid.replace("/", "_")
+            sections.append(f'  事件数: {len(trace["events"])}')
+            sections.append(
+                f"  > 完整事件链: [detail/trace/trace_{safe_tid}.md](../detail/trace/trace_{safe_tid}.md)"
+            )
+            sections.append("")
+
+    return "\n".join(sections), detail_dict
+
+
+# ════════════════════════════════════════════════════════════════
+# Grep 工具
+# ════════════════════════════════════════════════════════════════
+
+
+def _grep_lines(log_file, pattern, tail=None):
+    try:
+        if tail:
+            cmd = f"tail -n {tail} {_shell_quote(log_file)} | grep -E {_shell_quote(pattern)}"
+        else:
+            cmd = f"grep -E {_shell_quote(pattern)} {_shell_quote(log_file)}"
+        result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=60)
+        if result.returncode > 1:
+            return []
+        return [line for line in result.stdout.split("\n") if line.strip()]
+    except (subprocess.TimeoutExpired, FileNotFoundError):
+        return []
+
+
+def _shell_quote(s):
+    return "'" + s.replace("'", "'\\''") + "'"
+
+
+def _detect_matched_tags(lines, target_id):
+    matched = set()
+    for line in lines:
+        tags = extract_tags(line)
+        for key in ("request_id", "trace_id", "session_id", "req_id"):
+            if tags.get(key) == target_id:
+                matched.add(key)
+    return sorted(matched)
+
+
+def _format_matched_tag(matched_tags):
+    if not matched_tags:
+        return "unknown"
+    if len(matched_tags) == 1:
+        return matched_tags[0]
+    return "+".join(matched_tags)
+
+
+def _build_id_coverage_stats(lines):
+    request_only_ids = set()
+    session_only_ids = set()
+    trace_only_ids = set()
+
+    for line in lines:
+        tags = extract_tags(line)
+        req_val = tags.get("request_id") or tags.get("req_id")
+        session_val = tags.get("session_id")
+        trace_val = tags.get("trace_id")
+        has_request = bool(req_val)
+        has_session = bool(session_val)
+        has_trace = bool(trace_val)
+
+        if has_request and not has_session and not has_trace:
+            request_only_ids.add(req_val)
+        if has_session and not has_request and not has_trace:
+            session_only_ids.add(session_val)
+        if has_trace and not has_request and not has_session:
+            trace_only_ids.add(trace_val)
+
+    return {
+        "request_only": len(request_only_ids),
+        "session_only": len(session_only_ids),
+        "trace_only": len(trace_only_ids),
+    }
+
+
+def _build_id_combo_stats(lines):
+    combo_to_ids = {}
+    for line in lines:
+        tags = extract_tags(line)
+        keys = []
+        if tags.get("request_id"):
+            keys.append("request_id")
+        if tags.get("req_id"):
+            keys.append("req_id")
+        if tags.get("session_id"):
+            keys.append("session_id")
+        if tags.get("trace_id"):
+            keys.append("trace_id")
+        combo = "+".join(keys) if keys else "no_id_tag"
+
+        ids = []
+        if tags.get("request_id"):
+            ids.append(tags["request_id"])
+        if tags.get("req_id"):
+            ids.append(tags["req_id"])
+        if tags.get("session_id"):
+            ids.append(tags["session_id"])
+        if tags.get("trace_id"):
+            ids.append(tags["trace_id"])
+        id_key = "|".join(ids) if ids else "<none>"
+
+        combo_to_ids.setdefault(combo, set()).add(id_key)
+
+    rows = []
+    for combo, ids in combo_to_ids.items():
+        rows.append({"combo": combo, "count": len(ids), "ids": sorted(ids)})
+    rows.sort(key=lambda x: x["count"], reverse=True)
+    return rows
diff --git a/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/chart.py b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/chart.py
new file mode 100644
index 00000000000..1eaea1369f8
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/chart.py
@@ -0,0 +1,352 @@
+#!/usr/bin/env python3
+"""
+Chart — 终端可视化渲染工具
+
+提供 sparkline 折线图、Unicode 柱状图、Markdown 表格、Worker 时间线的渲染函数。
+所有函数返回字符串（不直接打印），方便组装到报告中。
+
+Python 3 stdlib only，零依赖。
+"""
+
+from datetime import datetime
+
+# ════════════════════════════════════════════════════════════════
+# Sparkline 折线图
+# ════════════════════════════════════════════════════════════════
+
+BLOCK_CHARS = " ▁▂▃▄▅▆▇█"
+
+
+def render_sparkline(
+    records, value_field="value", bucket_field="bucket", title=None, y_label=None, y_range=None, width=60
+):
+    """渲染 8 级 Unicode sparkline 折线图。
+
+    Args:
+        records: dict 列表，每个 dict 包含 bucket_field 和 value_field
+        value_field: 数值字段名
+        bucket_field: 时间桶字段名
+        title: 图表标题
+        y_label: Y 轴标签（如 '%'）
+        y_range: Y 轴范围 (min, max) 元组，None 则自动
+        width: 图表宽度（字符数）
+
+    Returns:
+        str: 渲染后的图表文本
+    """
+    if not records:
+        return "  (no data)"
+
+    all_values = []
+    for r in records:
+        v = r.get(value_field)
+        if v is not None:
+            all_values.append(float(v))
+
+    if not all_values:
+        return "  (no numeric data)"
+
+    # Y 轴范围
+    if y_range:
+        y_min, y_max = y_range
+    else:
+        y_min = min(all_values)
+        y_max = max(all_values)
+        if y_max == y_min:
+            y_min = 0 if y_max > 0 else y_max - 1
+            y_max = max(y_max, 1)
+
+    y_span = y_max - y_min if y_max != y_min else 1
+
+    # 降采样
+    n = len(records)
+    if n > width:
+        step = n / width
+        sampled = []
+        for i in range(width):
+            start_idx = int(i * step)
+            end_idx = int((i + 1) * step)
+            chunk = records[start_idx:end_idx]
+            vals = [float(r.get(value_field, 0)) for r in chunk if r.get(value_field) is not None]
+            avg_record = {
+                bucket_field: chunk[0].get(bucket_field, ""),
+                value_field: sum(vals) / len(vals) if vals else 0,
+            }
+            sampled.append(avg_record)
+        records = sampled
+
+    lines = []
+
+    # 标题行
+    def fmt_val(v):
+        if abs(v) >= 1000:
+            return f"{v:.0f}"
+        elif abs(v) >= 10:
+            return f"{v:.1f}"
+        return f"{v:.2f}"
+
+    header_parts = []
+    if title:
+        header_parts.append(title)
+    header_parts.append(f"min={fmt_val(min(all_values))}")
+    header_parts.append(f"max={fmt_val(max(all_values))}")
+    if y_label:
+        header_parts.append(f"({y_label})")
+    lines.append("  " + "  ".join(header_parts))
+
+    # Sparkline 字符
+    spark_chars = []
+    for r in records:
+        v = r.get(value_field)
+        if v is None:
+            spark_chars.append(" ")
+            continue
+        v = float(v)
+        normalized = (v - y_min) / y_span
+        level = max(0, min(8, round(normalized * 8)))
+        spark_chars.append(BLOCK_CHARS[level])
+    lines.append("  " + "".join(spark_chars))
+
+    # X 轴标签
+    data_width = len(records)
+    if data_width > 0:
+
+        def short_bucket(r):
+            b = str(r.get(bucket_field, ""))
+            if " " in b:
+                b = b.split(" ")[-1]
+            return b[:5] if len(b) >= 5 else b
+
+        lbl_width = 6
+        max_labels = max(1, data_width // lbl_width)
+        n_records = len(records)
+
+        if n_records <= 2:
+            indices = list(range(n_records))
+        elif n_records <= max_labels:
+            indices = [0, n_records - 1]
+        else:
+            n_labels = min(5, max(2, max_labels))
+            indices = [int(i * (n_records - 1) / (n_labels - 1)) for i in range(n_labels)]
+
+        label_line = [" "] * (data_width + lbl_width + 2)
+        last_end = -1
+        for idx in indices:
+            lbl = short_bucket(records[idx])
+            pos = idx
+            if pos < last_end:
+                continue
+            for ci, c in enumerate(lbl):
+                p = pos + ci
+                if p < len(label_line):
+                    label_line[p] = c
+            last_end = pos + len(lbl) + 1
+        lines.append("  " + "".join(label_line).rstrip())
+
+    return "\n".join(lines)
+
+
+# ════════════════════════════════════════════════════════════════
+# Unicode 柱状图
+# ════════════════════════════════════════════════════════════════
+
+
+def render_bar(data, bar_width=20, show_count=False):
+    """渲染 Unicode 柱状图。
+
+    Args:
+        data: dict 列表，每个 dict 包含 label, value（百分比 0-100）, 可选 count
+        bar_width: 柱状图宽度（字符数）
+        show_count: 是否显示绝对数量
+
+    Returns:
+        str: 渲染后的图表文本
+    """
+    if not data:
+        return "  (no data)"
+
+    max_label_len = max(len(str(d.get("label", ""))) for d in data)
+    max_label_len = max(max_label_len, 4)
+
+    lines = []
+    for d in data:
+        label = str(d.get("label", ""))
+        value = float(d.get("value", 0))
+        count = d.get("count")
+
+        filled = round(value / 100 * bar_width) if value > 0 else 0
+        filled = max(1, filled) if value > 0 else 0
+        filled = min(bar_width, filled)
+        empty = bar_width - filled
+        bar = "█" * filled + "░" * empty
+
+        line = f"  {label:<{max_label_len}}  {bar} {value:>5.1f}%"
+        if show_count and count is not None:
+            line += f"  (N={count})"
+        lines.append(line)
+
+    return "\n".join(lines)
+
+
+# ════════════════════════════════════════════════════════════════
+# Markdown 表格
+# ════════════════════════════════════════════════════════════════
+
+
+def render_table(data, columns=None, right_align=None):
+    """渲染 Markdown 表格。
+
+    Args:
+        data: dict 列表
+        columns: 列名列表，None 则用第一条记录的所有 key
+        right_align: 右对齐的列名集合
+
+    Returns:
+        str: 渲染后的表格文本
+    """
+    if not data:
+        return "  (no data)"
+
+    if columns is None:
+        columns = list(data[0].keys())
+    if right_align is None:
+        right_align = set()
+
+    # 计算列宽
+    col_widths = {}
+    for col in columns:
+        col_widths[col] = len(col)
+        for row in data:
+            val = str(row.get(col, ""))
+            col_widths[col] = max(col_widths[col], len(val))
+
+    # 表头
+    header_parts = []
+    sep_parts = []
+    for col in columns:
+        w = col_widths[col]
+        if col in right_align:
+            header_parts.append(f" {col:>{w}} ")
+            sep_parts.append("-" * (w + 1) + ":")
+        else:
+            header_parts.append(f" {col:<{w}} ")
+            sep_parts.append(":" + "-" * (w + 1))
+
+    lines = []
+    lines.append("|" + "|".join(header_parts) + "|")
+    lines.append("|" + "|".join(sep_parts) + "|")
+
+    # 数据行
+    for row in data:
+        row_parts = []
+        for col in columns:
+            val = str(row.get(col, ""))
+            w = col_widths[col]
+            if col in right_align:
+                row_parts.append(f" {val:>{w}} ")
+            else:
+                row_parts.append(f" {val:<{w}} ")
+        lines.append("|" + "|".join(row_parts) + "|")
+
+    return "\n".join(lines)
+
+
+# ════════════════════════════════════════════════════════════════
+# Worker 可用性时间线
+# ════════════════════════════════════════════════════════════════
+
+
+def render_timeline(data, width=40):
+    """渲染 Worker 可用性时间线。
+
+    Args:
+        data: dict，结构为:
+            {
+                'start': 'YYYY/MM/DD HH:MM:SS',
+                'end': 'YYYY/MM/DD HH:MM:SS',
+                'workers': {
+                    'http://ip:port': [
+                        {'from': 'ts', 'to': 'ts', 'status': 'up'|'down'},
+                        ...
+                    ],
+                    ...
+                }
+            }
+        width: 时间线宽度（字符数）
+
+    Returns:
+        str: 渲染后的时间线文本
+    """
+    if not data:
+        return "  (no data)"
+
+    start_str = data.get("start", "")
+    end_str = data.get("end", "")
+    workers = data.get("workers", {})
+
+    if not workers or not start_str or not end_str:
+        return "  (insufficient data)"
+
+    # Parse time range
+    try:
+        if "/" in start_str:
+            fmt = "%Y/%m/%d %H:%M:%S"
+        else:
+            fmt = "%H:%M:%S"
+        t_start = datetime.strptime(start_str, fmt)
+        t_end = datetime.strptime(end_str, fmt)
+    except ValueError:
+        return f"  (cannot parse time range: {start_str} ~ {end_str})"
+
+    total_seconds = (t_end - t_start).total_seconds()
+    if total_seconds <= 0:
+        total_seconds = 1
+
+    lines = []
+
+    for worker_url, periods in workers.items():
+        # Short label: just IP:PORT
+        label = worker_url.replace("http://", "")
+        bar = ["░"] * width
+
+        for period in periods:
+            p_start_str = period.get("from", start_str)
+            p_end_str = period.get("to", end_str)
+            status = period.get("status", "up")
+
+            try:
+                if "/" in p_start_str:
+                    p_start = datetime.strptime(p_start_str, "%Y/%m/%d %H:%M:%S")
+                    p_end = datetime.strptime(p_end_str, "%Y/%m/%d %H:%M:%S")
+                else:
+                    p_start = datetime.strptime(p_start_str, "%H:%M:%S")
+                    p_end = datetime.strptime(p_end_str, "%H:%M:%S")
+            except ValueError:
+                continue
+
+            start_pos = int((p_start - t_start).total_seconds() / total_seconds * width)
+            end_pos = int((p_end - t_start).total_seconds() / total_seconds * width)
+            start_pos = max(0, min(width - 1, start_pos))
+            end_pos = max(0, min(width, end_pos))
+
+            char = "█" if status == "up" else "░"
+            for i in range(start_pos, end_pos):
+                bar[i] = char
+
+        up_count = bar.count("█")
+        uptime_pct = round(up_count / width * 100, 1)
+
+        max_label_len = max(len(w.replace("http://", "")) for w in workers)
+        lines.append(f'  {label:<{max_label_len}}  {"".join(bar)} {uptime_pct}%')
+
+    # Time axis
+    if lines:
+        max_label_len = max(len(w.replace("http://", "")) for w in workers)
+        padding = " " * (max_label_len + 4)
+        start_lbl = start_str.split(" ")[-1] if " " in start_str else start_str
+        end_lbl = end_str.split(" ")[-1] if " " in end_str else end_str
+        gap = width - len(start_lbl) - len(end_lbl)
+        lines.append(f'{padding}{start_lbl}{" " * max(1, gap)}{end_lbl}')
+        lines.append(f"{padding}█ = online  ░ = offline")
+
+    return "\n".join(lines)
diff --git a/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/log_parser.py b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/log_parser.py
new file mode 100644
index 00000000000..99864e1de16
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/log_parser.py
@@ -0,0 +1,1116 @@
+#!/usr/bin/env python3
+"""
+Router Log Parser — FastDeploy Go Router 日志解析原语
+
+支持两种调用方式：
+1. 作为模块导入：from log_parser import parse_http_line, parse_cache_strategy_line, ...
+2. 作为 CLI 工具：grep 'pattern' logfile | python3 log_parser.py parse-http
+
+Python 3 stdlib only，零依赖。
+"""
+
+import argparse
+import json
+import re
+import sys
+from collections import defaultdict
+from datetime import datetime, timedelta
+
+# ════════════════════════════════════════════════════════════════
+# 通用解析原语
+# ════════════════════════════════════════════════════════════════
+
+# Go time.Duration.String() parser: handles 1h2m3.456s, 500µs, 150.5ms, etc.
+DURATION_RE = re.compile(r"(\d+(?:\.\d+)?)(h|m(?!s)|s|ms|[µu]s|ns)")
+
+
+def parse_go_duration_ms(s):
+    """解析 Go time.Duration.String() 输出为毫秒。
+
+    Examples: '1.5s' -> 1500.0, '500µs' -> 0.5, '1m30s' -> 90000.0
+    """
+    total = 0.0
+    for m in DURATION_RE.finditer(s):
+        val, unit = float(m.group(1)), m.group(2)
+        if unit == "h":
+            total += val * 3600000
+        elif unit == "m":
+            total += val * 60000
+        elif unit == "s":
+            total += val * 1000
+        elif unit == "ms":
+            total += val
+        elif unit in ("µs", "us"):
+            total += val / 1000
+        elif unit == "ns":
+            total += val / 1000000
+    return total
+
+
+def parse_go_map(s):
+    """解析 Go fmt.Sprintf('%v', map) 输出：map[key1:val1 key2:val2 ...]
+
+    处理 URL 中冒号与 Go map key-value 分隔符的冲突（从最后一个冒号分割）。
+    空 map 'map[]' 返回空 dict。
+    """
+    inner_match = re.search(r"map\[(.*?)\]", s)
+    if not inner_match:
+        return {}
+    inner = inner_match.group(1).strip()
+    if not inner:
+        return {}
+    result = {}
+    for token in inner.split():
+        idx = token.rfind(":")
+        if idx > 0:
+            key = token[:idx]
+            val_str = token[idx + 1 :]
+            try:
+                result[key] = int(val_str) if "." not in val_str else float(val_str)
+            except ValueError:
+                result[key] = val_str
+    return result
+
+
+# 时间戳：YYYY/MM/DD HH:MM:SS
+TS_RE = re.compile(r"(\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2})")
+
+# ts_ms：2025-01-15 18:25:33.123
+TS_MS_RE = re.compile(r"ts_ms=(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+)")
+
+
+def extract_ts(line):
+    """从日志行提取 YYYY/MM/DD HH:MM:SS 时间戳。"""
+    m = TS_RE.search(line)
+    return m.group(1) if m else None
+
+
+def parse_ts(ts_str):
+    """将 YYYY/MM/DD HH:MM:SS 时间戳解析为 datetime。"""
+    return datetime.strptime(ts_str, "%Y/%m/%d %H:%M:%S")
+
+
+# ════════════════════════════════════════════════════════════════
+# 时间范围过滤
+# ════════════════════════════════════════════════════════════════
+
+import os
+import subprocess
+import tempfile
+
+_FULL_DT_RE = re.compile(r"^(\d{4})[/-](\d{1,2})[/-](\d{1,2})\s+(\d{1,2}):(\d{2})(?::(\d{2}))?$")
+_DATE_ONLY_RE = re.compile(r"^(\d{4})[/-](\d{1,2})[/-](\d{1,2})$")
+_SHORT_DATE_RE = re.compile(r"^(\d{1,2})[/-](\d{1,2})(?:\s+(\d{1,2}):(\d{2})(?::(\d{2}))?)?$")
+_TIME_ONLY_RE = re.compile(r"^(\d{1,2}):(\d{2})(?::(\d{2}))?$")
+
+
+def _get_log_boundary_ts(log_file, which="first"):
+    """从日志文件首行或末行提取时间戳。"""
+    cmd = "head" if which == "first" else "tail"
+    try:
+        r = subprocess.run([cmd, "-1", log_file], capture_output=True, text=True, timeout=5)
+        return extract_ts(r.stdout) if r.returncode == 0 else None
+    except (subprocess.TimeoutExpired, FileNotFoundError):
+        return None
+
+
+def complete_time_arg(time_str, log_file, is_end=False):
+    """解析灵活时间输入，补全缺失部分。
+
+    支持格式：
+        'YYYY/MM/DD HH:MM:SS', 'YYYY-MM-DD HH:MM:SS', 'YYYY/MM/DD',
+        'MM/DD', 'MM/DD HH:MM', 'HH:MM:SS', 'HH:MM'
+
+    补全规则：
+        - 缺年份：从日志首行取
+        - 缺日期：从日志末行取
+        - 缺时间：start→00:00:00, end→23:59:59
+
+    Returns: 'YYYY/MM/DD HH:MM:SS' 格式字符串
+    """
+    if time_str is None:
+        return None
+    time_str = time_str.strip()
+
+    # Case 1: 完整日期时间
+    m = _FULL_DT_RE.match(time_str)
+    if m:
+        y, mo, d = m.group(1), m.group(2).zfill(2), m.group(3).zfill(2)
+        h, mi = m.group(4).zfill(2), m.group(5)
+        s = (m.group(6) or "00").zfill(2)
+        return f"{y}/{mo}/{d} {h}:{mi}:{s}"
+
+    # Case 2: 仅日期 YYYY/MM/DD
+    m = _DATE_ONLY_RE.match(time_str)
+    if m:
+        y, mo, d = m.group(1), m.group(2).zfill(2), m.group(3).zfill(2)
+        t = "23:59:59" if is_end else "00:00:00"
+        return f"{y}/{mo}/{d} {t}"
+
+    # Case 3: 短日期 MM/DD 或 MM/DD HH:MM[:SS]
+    m = _SHORT_DATE_RE.match(time_str)
+    if m:
+        mo, d = m.group(1).zfill(2), m.group(2).zfill(2)
+        ts = _get_log_boundary_ts(log_file, "first")
+        year = ts[:4] if ts else str(datetime.now().year)
+        if m.group(3):  # 有时间部分
+            h, mi = m.group(3).zfill(2), m.group(4)
+            s = (m.group(5) or "00").zfill(2)
+            return f"{year}/{mo}/{d} {h}:{mi}:{s}"
+        t = "23:59:59" if is_end else "00:00:00"
+        return f"{year}/{mo}/{d} {t}"
+
+    # Case 4: 仅时间 HH:MM[:SS]
+    m = _TIME_ONLY_RE.match(time_str)
+    if m:
+        h, mi = m.group(1).zfill(2), m.group(2)
+        s = (m.group(3) or "00").zfill(2)
+        ts = _get_log_boundary_ts(log_file, "last")
+        date_part = ts[:10] if ts else f"{datetime.now().year}/01/01"
+        return f"{date_part} {h}:{mi}:{s}"
+
+    # Fallback: 原样返回
+    return time_str
+
+
+def filter_file_by_time_range(log_file, start_str=None, end_str=None):
+    """用 awk 按时间范围预过滤日志文件。
+
+    时间戳 YYYY/MM/DD HH:MM:SS 天然字典序可比，直接用 awk 字符串比较。
+    无时间戳的行（如 panic 堆栈续行）保留。
+
+    Args:
+        log_file: 原日志文件路径
+        start_str: 起始时间 'YYYY/MM/DD HH:MM:SS'（含），或 None
+        end_str: 结束时间 'YYYY/MM/DD HH:MM:SS'（含），或 None
+
+    Returns:
+        tuple: (file_path, is_temp) — is_temp=True 时调用方负责删除
+    """
+    if not start_str and not end_str:
+        return (log_file, False)
+
+    tmp = tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False, prefix="router_filtered_")
+    tmp.close()
+
+    awk_script = r"""{
+        ts = ""
+        if (match($0, /[0-9]{4}\/[0-9]{2}\/[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/)) {
+            ts = substr($0, RSTART, RLENGTH)
+        }
+        if (ts == "") { print; next }
+        if ((start == "" || ts >= start) && (end == "" || ts <= end)) print
+    }"""
+
+    cmd = ["awk", "-v", f'start={start_str or ""}', "-v", f'end={end_str or ""}', awk_script, log_file]
+
+    try:
+        with open(tmp.name, "w") as outf:
+            result = subprocess.run(cmd, stdout=outf, stderr=subprocess.PIPE, text=True, timeout=120)
+        if result.returncode != 0:
+            os.unlink(tmp.name)
+            return (log_file, False)
+    except (subprocess.TimeoutExpired, OSError):
+        if os.path.exists(tmp.name):
+            os.unlink(tmp.name)
+        return (log_file, False)
+
+    return (tmp.name, True)
+
+
+def filter_file_by_recent_minutes(log_file, minutes):
+    """按日志末时间戳向前过滤最近 N 分钟日志。
+
+    Returns:
+        tuple: (file_path, is_temp) — is_temp=True 时调用方负责删除
+    """
+    if minutes is None or minutes <= 0:
+        return (log_file, False)
+
+    last_ts = _get_log_boundary_ts(log_file, "last")
+    if not last_ts:
+        return (log_file, False)
+
+    try:
+        end_dt = parse_ts(last_ts)
+    except ValueError:
+        return (log_file, False)
+
+    start_dt = end_dt - timedelta(minutes=minutes)
+    start_str = start_dt.strftime("%Y/%m/%d %H:%M:%S")
+    end_str = end_dt.strftime("%Y/%m/%d %H:%M:%S")
+    return filter_file_by_time_range(log_file, start_str=start_str, end_str=end_str)
+
+
+# Context tag：[session_id:...], [request_id:...], [trace_id:...], [req_id:...]
+TAG_RE = re.compile(r"\[(session_id|request_id|trace_id|req_id):([^\]]+)\]")
+
+
+def extract_tags(line):
+    """从日志行提取 context tag。"""
+    return {m.group(1): m.group(2) for m in TAG_RE.finditer(line)}
+
+
+# Log level
+LEVEL_RE = re.compile(r"\[(INFO|ERROR|WARN|DEBUG)\]")
+
+
+def extract_level(line):
+    """从日志行提取日志级别。"""
+    m = LEVEL_RE.search(line)
+    return m.group(1) if m else None
+
+
+# ════════════════════════════════════════════════════════════════
+# HTTP 请求行解析（类别 H1）
+# ════════════════════════════════════════════════════════════════
+
+# H1 pattern: [METHOD] /path HTTP/1.1 STATUS LATENCY CLIENT_IP
+HTTP_RE = re.compile(
+    r"\[(POST|GET|PUT|DELETE|PATCH|HEAD|OPTIONS)\]\s+"
+    r"(/\S*)\s+HTTP/\d\.\d\s+"
+    r"(\d{3})\s+"
+    r"(\S+)\s+"  # latency (Go duration)
+    r"(\d+\.\d+\.\d+\.\d+)"  # client IP
+)
+
+
+def parse_http_line(line, inference_only=False):
+    """解析 H1 HTTP 请求行。
+
+    输入示例：
+        [INFO] 2025/01/15 18:25:33 logger.go:45: [POST] /v1/chat/completions HTTP/1.1 200 1.234567s 10.0.0.1
+
+    Args:
+        line: 日志行
+        inference_only: True 则仅保留推理路径 (/v1/chat/completions, /v1/completions)
+
+    返回 dict 或 None。
+    """
+    ts = extract_ts(line)
+    m = HTTP_RE.search(line)
+    if not m:
+        return None
+
+    method, path, status, latency_raw, client_ip = m.groups()
+    latency_ms = parse_go_duration_ms(latency_raw)
+
+    if inference_only and path not in ("/v1/chat/completions", "/v1/completions"):
+        return None
+
+    record = {
+        "ts": ts or "",
+        "method": method,
+        "path": path,
+        "status": int(status),
+        "latency_ms": round(latency_ms, 3),
+        "client_ip": client_ip,
+    }
+
+    tags = extract_tags(line)
+    if tags:
+        record["tags"] = tags
+
+    return record
+
+
+# ════════════════════════════════════════════════════════════════
+# Cache-Aware 策略行解析（类别 H6）
+# ════════════════════════════════════════════════════════════════
+
+URL_RE = r"(?:https?://)?[A-Za-z0-9.-]+(?::\d+)?"
+STRATEGY_RE = re.compile(r"final strategy:\s*(\w+)")
+SELECTED_RE = re.compile(rf"selected=({URL_RE})(?:,|\s|$)")
+REASON_RE = re.compile(r"reason:\s*(.+?)(?:,\s*loads=|\.?\s*ts_ms=|$)")
+
+
+def parse_cache_strategy_line(line):
+    """解析 cache-aware prefill 策略行。
+
+    返回 dict 或 None（如果不是策略行）。
+    """
+    sm = STRATEGY_RE.search(line)
+    if not sm:
+        return None
+
+    ts = extract_ts(line)
+    strategy = sm.group(1)
+    record = {"ts": ts or "", "strategy": strategy}
+
+    sel_m = SELECTED_RE.search(line)
+    if sel_m:
+        record["selected"] = sel_m.group(1)
+
+    reason_m = REASON_RE.search(line)
+    if reason_m and strategy == "process_tokens":
+        record["reason"] = reason_m.group(1).strip()
+
+    hr_match = re.search(r"hitRatios=(map\[.*?\])", line)
+    if hr_match:
+        hit_ratios = parse_go_map(hr_match.group(1))
+        record["hitRatios"] = hit_ratios
+        if "selected" in record:
+            record["selected_hitRatio"] = hit_ratios.get(record["selected"], 0)
+    else:
+        record["hitRatios"] = {}
+        if "selected" in record:
+            record["selected_hitRatio"] = 0
+
+    loads_match = re.search(r"loads=(map\[.*?\])", line)
+    if loads_match:
+        record["loads"] = parse_go_map(loads_match.group(1))
+
+    ts_ms_m = TS_MS_RE.search(line)
+    if ts_ms_m:
+        record["ts_ms"] = ts_ms_m.group(1)
+
+    tags = extract_tags(line)
+    if tags:
+        record["tags"] = tags
+
+    return record
+
+
+# ════════════════════════════════════════════════════════════════
+# Stats 行解析（类别 H7）
+# ════════════════════════════════════════════════════════════════
+
+TOTAL_RUNNING_RE = re.compile(r"total_running=(\d+)")
+WORKER_RUNNING_RE = re.compile(rf"({URL_RE}): running=(\d+)")
+CACHE_HR_RE = re.compile(r"cache_hit_rate=([\d.]+)%\s*\(hits=(\d+)/total=(\d+)\)")
+
+
+def parse_stats_line(line):
+    """解析 [stats] 统计行。
+
+    注意：hits 和 total 是 per-interval 的（每 5s 重置），累计值必须 sum 所有行。
+
+    返回 dict 或 None（如果不是 stats 行）。
+    """
+    if "[stats]" not in line:
+        return None
+
+    ts = extract_ts(line)
+    record = {"ts": ts or ""}
+
+    tr_m = TOTAL_RUNNING_RE.search(line)
+    if tr_m:
+        record["total_running"] = int(tr_m.group(1))
+
+    workers = {}
+    for wm in WORKER_RUNNING_RE.finditer(line):
+        workers[wm.group(1)] = int(wm.group(2))
+    record["workers"] = workers
+
+    chr_m = CACHE_HR_RE.search(line)
+    if chr_m:
+        record["cache_hit_rate"] = float(chr_m.group(1))
+        record["hits"] = int(chr_m.group(2))
+        record["total"] = int(chr_m.group(3))
+
+    return record
+
+
+# ════════════════════════════════════════════════════════════════
+# 错误消息模板归一化
+# ════════════════════════════════════════════════════════════════
+
+NORMALIZE_PATTERNS = [
+    (re.compile(r"https?://[\w.:]+"), "{url}"),
+    (re.compile(r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", re.I), "{uuid}"),
+    (re.compile(r"\d+\.\d+\.\d+\.\d+:\d+"), "{ip:port}"),
+    (re.compile(r"\b\d+\b"), "{N}"),
+]
+
+# Message extraction: everything after "logger.go:NN: " (and optional context tags)
+MSG_RE = re.compile(r"logger\.go:\d+:\s*(?:\[[^\]]*\]\s*)*(.+)")
+
+
+def normalize_message(msg):
+    """将错误消息中的变量替换为占位符模板。"""
+    for pat, repl in NORMALIZE_PATTERNS:
+        msg = pat.sub(repl, msg)
+    return msg
+
+
+def parse_error_line(line):
+    """解析 ERROR/WARN 行并进行模板归一化。
+
+    返回 dict: {ts, level, original, template, tags}
+    """
+    ts = extract_ts(line)
+    level = extract_level(line)
+    tags = extract_tags(line)
+
+    mm = MSG_RE.search(line)
+    original = mm.group(1).strip() if mm else line
+
+    template = normalize_message(original)
+
+    record = {
+        "ts": ts or "",
+        "level": level or "",
+        "original": original,
+        "template": template,
+    }
+    if tags:
+        record["tags"] = tags
+
+    return record
+
+
+# ════════════════════════════════════════════════════════════════
+# Select/Release 事件匹配
+# ════════════════════════════════════════════════════════════════
+
+SELECT_RE = re.compile(rf"select worker\s*(?:\((\w+)\))?:\s*({URL_RE})")
+RELEASE_RE = re.compile(rf"release worker\s*(?:\((\w+)\))?:\s*({URL_RE})")
+FAILED_SELECT_RE = re.compile(r"Failed to select")
+SELECT_TOKENS_RE = re.compile(rf"select worker \((\w+)\):\s*({URL_RE}),\s*tokens:\s*(\d+)")
+RELEASE_TOKENS_RE = re.compile(rf"release (?:([a-zA-Z_]+)\s+)?tokens:\s*({URL_RE}),\s*tokens:\s*(\d+)")
+
+
+def _parse_ts_safe(ts):
+    if not ts:
+        return None
+    try:
+        return parse_ts(ts)
+    except ValueError:
+        return None
+
+
+def _select_match_key(tags):
+    """构建请求关联 key，优先 request_id，其次 req_id/trace_id/session_id。"""
+    if not tags:
+        return (None, None)
+    rid = tags.get("request_id")
+    if rid:
+        return ("request_id", f"request_id:{rid}")
+    for k in ("req_id", "trace_id", "session_id"):
+        v = tags.get(k)
+        if v:
+            return ("alt_id", f"{k}:{v}")
+    return (None, None)
+
+
+def _normalize_worker_type(worker_type):
+    """归一化 worker type。"""
+    t = (worker_type or "unknown").lower()
+    if t in ("prefill", "decode", "mixed"):
+        return t
+    return "unknown"
+
+
+def _normalize_worker_url_key(url):
+    if not url:
+        return ""
+    return re.sub(r"^https?://", "", str(url).strip().rstrip("/"))
+
+
+def _infer_release_worker_type(release, selects, fallback_window_s=120):
+    """为未显式标注 type 的 release 近似推断 worker type。
+
+    优先级：
+      1) 同 worker、时间上最近且不晚于 release 的 select type
+      2) 若无可解析时间戳，则使用同 worker 的最后一个 select type
+      3) 推断失败返回 unknown
+    """
+    worker = release.get("worker")
+    if not worker:
+        return "unknown"
+
+    r_ts = _parse_ts_safe(release.get("ts"))
+    candidates = [s for s in selects if s.get("worker") == worker]
+    if not candidates:
+        return "unknown"
+
+    if r_ts:
+        best = None
+        best_delta = None
+        for s in candidates:
+            s_ts = _parse_ts_safe(s.get("ts"))
+            if not s_ts:
+                continue
+            delta = (r_ts - s_ts).total_seconds()
+            if delta < 0 or delta > fallback_window_s:
+                continue
+            if best_delta is None or delta < best_delta:
+                best = s
+                best_delta = delta
+        if best is not None:
+            return _normalize_worker_type(best.get("type"))
+
+    # 回退：按出现顺序取同 worker 的最近 select
+    return _normalize_worker_type(candidates[-1].get("type"))
+
+
+def _infer_token_release_worker_type(release, selects, fallback_window_s=120):
+    """为 token release 推断 worker type（prefill/mixed）。
+
+    注意：日志文本通常固定为 `release prefill tokens`，即使 mixed 也可能走这条日志。
+    因此 token release 的类型优先依据同 worker 的邻近 select 推断。
+    """
+    worker = release.get("worker")
+    if not worker:
+        return "unknown"
+
+    r_ts = _parse_ts_safe(release.get("ts"))
+    candidates = [
+        s
+        for s in selects
+        if s.get("worker") == worker and _normalize_worker_type(s.get("type")) in ("prefill", "mixed")
+    ]
+    if not candidates:
+        return "unknown"
+
+    if r_ts:
+        best = None
+        best_delta = None
+        for s in candidates:
+            s_ts = _parse_ts_safe(s.get("ts"))
+            if not s_ts:
+                continue
+            delta = (r_ts - s_ts).total_seconds()
+            if delta < 0 or delta > fallback_window_s:
+                continue
+            if best_delta is None or delta < best_delta:
+                best = s
+                best_delta = delta
+        if best is not None:
+            return _normalize_worker_type(best.get("type"))
+
+    return _normalize_worker_type(candidates[-1].get("type"))
+
+
+def match_select_release(lines, fallback_window_s=120):
+    """匹配 select/release worker 事件对。
+
+    Args:
+        lines: 日志行列表（字符串）
+
+    Returns:
+        dict: {matched, unmatched_selects, failed_selects, per_worker}
+    """
+    selects = []
+    releases = []
+    failed_selects = []
+
+    for line_no, line in enumerate(lines, 1):
+        ts = extract_ts(line)
+        tags = extract_tags(line)
+
+        # Token-bearing select
+        tm = SELECT_TOKENS_RE.search(line)
+        if tm:
+            selects.append(
+                {
+                    "ts": ts,
+                    "worker": tm.group(2),
+                    "worker_key": _normalize_worker_url_key(tm.group(2)),
+                    "type": _normalize_worker_type(tm.group(1)),
+                    "tags": tags,
+                    "tokens": int(tm.group(3)),
+                    "line": line_no,
+                }
+            )
+            continue
+
+        # Token-bearing release
+        trm = RELEASE_TOKENS_RE.search(line)
+        if trm:
+            token_type = trm.group(1)
+            releases.append(
+                {
+                    "ts": ts,
+                    "worker": trm.group(2),
+                    "worker_key": _normalize_worker_url_key(trm.group(2)),
+                    # 文本默认按 prefill 记，再结合同 worker 邻近 select 做纠偏（mixed 场景）
+                    "type": f'{_normalize_worker_type(token_type or "prefill")}_tokens',
+                    "raw_token_type": token_type or "",
+                    "tags": tags,
+                    "tokens": int(trm.group(3)),
+                    "line": line_no,
+                }
+            )
+            continue
+
+        sm = SELECT_RE.search(line)
+        if sm:
+            selects.append(
+                {
+                    "ts": ts,
+                    "worker": sm.group(2),
+                    "worker_key": _normalize_worker_url_key(sm.group(2)),
+                    "type": _normalize_worker_type(sm.group(1)),
+                    "tags": tags,
+                    "tokens": None,
+                    "line": line_no,
+                }
+            )
+            continue
+
+        rm = RELEASE_RE.search(line)
+        if rm:
+            releases.append(
+                {
+                    "ts": ts,
+                    "worker": rm.group(2),
+                    "worker_key": _normalize_worker_url_key(rm.group(2)),
+                    "type": _normalize_worker_type(rm.group(1)),
+                    "tags": tags,
+                    "tokens": None,
+                    "line": line_no,
+                }
+            )
+            continue
+
+        if FAILED_SELECT_RE.search(line):
+            failed_selects.append({"ts": ts, "tags": tags, "line": line_no})
+
+    # Match by worker FIFO（select -> 同 worker 下一条 release）
+    matched = []
+    unmatched_selects = []
+    release_used = set()
+
+    # 请求生命周期匹配只使用 request counter release（排除 token release）
+    # 说明：request_id 只用于覆盖率观测，不参与 select/release 配对条件。
+    counter_release_indexes = [i for i, r in enumerate(releases) if not str(r.get("type", "")).endswith("_tokens")]
+    # 请求 ID 覆盖（按 select 事件近似请求数）
+    total_req_est = len(selects)
+    with_request_id = 0
+    with_alt_id = 0
+    without_any_id = 0
+
+    pending_selects = []
+    for s in selects:
+        key_type, key = _select_match_key(s.get("tags", {}))
+        if key_type == "request_id":
+            with_request_id += 1
+        elif key_type == "alt_id":
+            with_alt_id += 1
+        else:
+            without_any_id += 1
+
+        pending_selects.append(s)
+
+    for s in pending_selects:
+        sdt = _parse_ts_safe(s.get("ts"))
+        best_idx = None
+        best_ts = None
+        for ri in counter_release_indexes:
+            if ri in release_used:
+                continue
+            r = releases[ri]
+            if r.get("worker_key") != s.get("worker_key"):
+                continue
+            rdt = _parse_ts_safe(r.get("ts"))
+            # 优先选择时间不早于 select 的最早 release；解析失败则按出现顺序
+            if sdt and rdt and rdt < sdt:
+                continue
+            if best_idx is None:
+                best_idx = ri
+                best_ts = rdt
+            elif rdt and best_ts and rdt < best_ts:
+                best_idx = ri
+                best_ts = rdt
+
+        if best_idx is not None:
+            r = releases[best_idx]
+            s_key_type, s_key = _select_match_key(s.get("tags", {}))
+            r_key_type, r_key = _select_match_key(r.get("tags", {}))
+            if s_key and r_key:
+                if s_key == r_key:
+                    id_check = "match"
+                else:
+                    id_check = "mismatch"
+            elif s_key and not r_key:
+                id_check = "select_only"
+            elif (not s_key) and r_key:
+                id_check = "release_only"
+            else:
+                id_check = "both_missing"
+
+            matched.append(
+                {
+                    "request_id": s["tags"].get("request_id", ""),
+                    "worker": s["worker"],
+                    "select_ts": s["ts"],
+                    "release_ts": r["ts"],
+                    "type": s["type"],
+                    "match_method": "worker_fifo",
+                    "id_check": id_check,
+                }
+            )
+            release_used.add(best_idx)
+        else:
+            unmatched_selects.append(
+                {
+                    "worker": s["worker"],
+                    "select_ts": s["ts"],
+                    "type": s["type"],
+                    "tags": s["tags"],
+                    "note": "no matching release found (worker FIFO)",
+                }
+            )
+
+    # Per-worker summary（按 worker type 统计，不依赖日志中的 tokens 字段）
+    # 规则：prefill/mixed 的 select 均计入 token_selects。
+    per_worker = defaultdict(lambda: {"selects": 0, "releases": 0, "token_selects": 0, "token_releases": 0})
+    for s in selects:
+        s_type = _normalize_worker_type(s.get("type"))
+        wkey = s.get("worker_key") or _normalize_worker_url_key(s.get("worker"))
+        per_worker[wkey]["selects"] += 1
+        if s_type in ("prefill", "mixed"):
+            per_worker[wkey]["token_selects"] += 1
+    for r in releases:
+        wkey = r.get("worker_key") or _normalize_worker_url_key(r.get("worker"))
+        if str(r.get("type", "")).endswith("_tokens"):
+            per_worker[wkey]["token_releases"] += 1
+        else:
+            per_worker[wkey]["releases"] += 1
+
+    pw_result = {}
+    for w, counts in per_worker.items():
+        pw_result[w] = {
+            "selects": counts["selects"],
+            "releases": counts["releases"],
+            "delta": counts["selects"] - counts["releases"],
+            "token_selects": counts["token_selects"],
+            "token_releases": counts["token_releases"],
+        }
+
+    # 基于 select 构建 worker URL -> dominant type 映射
+    per_worker_type_counts = defaultdict(lambda: defaultdict(int))
+    for s in selects:
+        wkey = s.get("worker_key") or _normalize_worker_url_key(s.get("worker"))
+        per_worker_type_counts[wkey][_normalize_worker_type(s.get("type"))] += 1
+    worker_dominant_type = {}
+    for w, counts in per_worker_type_counts.items():
+        worker_dominant_type[w] = sorted(counts.items(), key=lambda kv: -kv[1])[0][0] if counts else "unknown"
+
+    # 为未显式标注 type 的 release 推断 worker type（避免大量 unknown）
+    inferred_release_types = {}
+    for i, r in enumerate(releases):
+        r_type_raw = str(r.get("type", ""))
+        if r_type_raw.endswith("_tokens"):
+            base_t = _normalize_worker_type(r_type_raw.replace("_tokens", ""))
+            # token release 按 worker URL 对应的 select 类型映射，不做邻近时间纠偏
+            mapped_t = worker_dominant_type.get(
+                r.get("worker_key") or _normalize_worker_url_key(r.get("worker")), "unknown"
+            )
+            if mapped_t in ("prefill", "decode", "mixed"):
+                base_t = mapped_t
+            inferred_release_types[i] = f"{base_t}_tokens"
+            continue
+        base_t = _normalize_worker_type(r_type_raw)
+        if base_t != "unknown":
+            inferred_release_types[i] = base_t
+            continue
+        inferred_release_types[i] = _infer_release_worker_type(r, selects, fallback_window_s=fallback_window_s)
+
+    # 按 worker type 分类统计（prefill/decode/mixed，必要时保留 unknown）
+    type_summary = defaultdict(
+        lambda: {
+            "counter_selects": 0,
+            "counter_releases": 0,
+            "token_selects": 0,
+            "token_releases": 0,
+        }
+    )
+    for s in selects:
+        s_type = _normalize_worker_type(s.get("type"))
+        type_summary[s_type]["counter_selects"] += 1
+        if s_type in ("prefill", "mixed"):
+            type_summary[s_type]["token_selects"] += 1
+    for i, r in enumerate(releases):
+        inferred = inferred_release_types.get(i, _normalize_worker_type(str(r.get("type", ""))))
+        r_type = _normalize_worker_type(str(inferred).replace("_tokens", ""))
+        if str(inferred).endswith("_tokens"):
+            type_summary[r_type]["token_releases"] += 1
+        else:
+            type_summary[r_type]["counter_releases"] += 1
+
+    # 每个 worker URL 的类型画像（基于 select）
+    worker_type_profile = {}
+    for w, counts in per_worker_type_counts.items():
+        dominant = "unknown"
+        if counts:
+            dominant = sorted(counts.items(), key=lambda kv: -kv[1])[0][0]
+        worker_type_profile[w] = {
+            "dominant_type": dominant,
+            "prefill": counts.get("prefill", 0),
+            "decode": counts.get("decode", 0),
+            "mixed": counts.get("mixed", 0),
+            "unknown": counts.get("unknown", 0),
+        }
+
+    return {
+        "matched": matched,
+        "unmatched_selects": unmatched_selects,
+        "unmatched_releases": [],
+        "failed_selects": failed_selects,
+        "per_worker": pw_result,
+        "id_coverage": {
+            "total_requests_estimated": total_req_est,
+            "with_request_id": with_request_id,
+            "without_request_id": total_req_est - with_request_id,
+            "with_alt_id": with_alt_id,
+            "without_any_id": without_any_id,
+        },
+        "type_summary": dict(type_summary),
+        "worker_type_profile": worker_type_profile,
+    }
+
+
+# ════════════════════════════════════════════════════════════════
+# 不支持请求检测
+# ════════════════════════════════════════════════════════════════
+
+# Router 已知路由白名单 (method, path)
+KNOWN_ROUTES = {
+    ("POST", "/v1/chat/completions"),
+    ("POST", "/v1/completions"),
+    ("POST", "/register"),
+    ("GET", "/registered_number"),
+    ("GET", "/registered"),
+    ("GET", "/health_generate"),
+    ("GET", "/metrics"),
+}
+
+
+def find_unsupported_requests(lines):
+    """从 HTTP 日志行中筛选不匹配任何已知路由的请求。
+
+    Returns:
+        dict: {details: [...], summary: {total, unique_paths: {path: count}}}
+    """
+    details = []
+    path_counts = defaultdict(int)
+
+    for line in lines:
+        record = parse_http_line(line)
+        if not record:
+            continue
+        key = (record["method"], record["path"])
+        if key not in KNOWN_ROUTES:
+            details.append(
+                {
+                    "ts": record["ts"],
+                    "method": record["method"],
+                    "path": record["path"],
+                    "status": record["status"],
+                    "client_ip": record["client_ip"],
+                }
+            )
+            path_counts[f"{record['method']} {record['path']}"] += 1
+
+    return {
+        "details": details,
+        "summary": {
+            "total": len(details),
+            "unique_paths": dict(path_counts),
+        },
+    }
+
+
+def _cli_unsupported_requests(args):
+    """CLI: 检测不支持的请求。"""
+    lines = [line.rstrip("\n") for line in sys.stdin]
+    result = find_unsupported_requests(lines)
+
+    if args.summary_only:
+        print(json.dumps(result["summary"], ensure_ascii=False))
+    else:
+        print(json.dumps(result, ensure_ascii=False))
+
+
+# ════════════════════════════════════════════════════════════════
+# CLI 入口
+# ════════════════════════════════════════════════════════════════
+
+
+def _cli_parse_stream(parse_fn):
+    """通用 CLI 流式解析：从 stdin 读入日志行，输出 JSON Lines 到 stdout。"""
+    parsed = 0
+    skipped = 0
+    for line in sys.stdin:
+        line = line.rstrip("\n")
+        record = parse_fn(line)
+        if record:
+            print(json.dumps(record, ensure_ascii=False))
+            parsed += 1
+        else:
+            skipped += 1
+    print(f"Parsed {parsed} lines, skipped {skipped}", file=sys.stderr)
+
+
+def _cli_parse_http(args):
+    """CLI: 解析 HTTP 请求行。"""
+    parsed = 0
+    skipped = 0
+    for line in sys.stdin:
+        line = line.rstrip("\n")
+        record = parse_http_line(line, inference_only=args.inference_only)
+        if record:
+            print(json.dumps(record, ensure_ascii=False))
+            parsed += 1
+        else:
+            skipped += 1
+    print(f"Parsed {parsed} lines, skipped {skipped}", file=sys.stderr)
+
+
+def _cli_normalize_errors(args):
+    """CLI: 归一化错误消息。"""
+    parsed = 0
+    for line in sys.stdin:
+        line = line.rstrip("\n")
+        record = parse_error_line(line)
+        print(json.dumps(record, ensure_ascii=False))
+        parsed += 1
+    print(f"Normalized {parsed} lines", file=sys.stderr)
+
+
+def _cli_match_select_release(args):
+    """CLI: 匹配 select/release 事件。"""
+    lines = [line.rstrip("\n") for line in sys.stdin]
+    result = match_select_release(lines)
+    print(json.dumps(result, ensure_ascii=False))
+
+
+def _cli_self_test(args):
+    """运行内置测试。"""
+    passed = 0
+    failed = 0
+
+    def check(name, got, expected):
+        nonlocal passed, failed
+        if got == expected:
+            print(f"  PASS: {name}")
+            passed += 1
+        else:
+            print(f"  FAIL: {name}")
+            print(f"    expected: {expected}")
+            print(f"    got:      {got}")
+            failed += 1
+
+    print("=== Testing parse_go_duration_ms ===")
+    check("simple seconds", parse_go_duration_ms("1.5s"), 1500.0)
+    check("milliseconds", parse_go_duration_ms("150ms"), 150.0)
+    check("fractional ms", parse_go_duration_ms("150.5ms"), 150.5)
+    check("microseconds µs", parse_go_duration_ms("500µs"), 0.5)
+    check("microseconds us", parse_go_duration_ms("500us"), 0.5)
+    check("nanoseconds", parse_go_duration_ms("500ns"), 0.0005)
+    check("composite m+s", parse_go_duration_ms("1m30s"), 90000.0)
+    check("composite h+m+s", parse_go_duration_ms("1h2m3s"), 3723000.0)
+    check("composite h+m+fractional_s", parse_go_duration_ms("1h2m3.456s"), 3723456.0)
+    check("pure minutes", parse_go_duration_ms("2m"), 120000.0)
+    check("zero", parse_go_duration_ms("0s"), 0.0)
+    check("sub-ms decimal", parse_go_duration_ms("2.798235ms"), 2.798235)
+
+    print("\n=== Testing parse_go_map ===")
+    check("single entry", parse_go_map("map[http://10.0.0.1:9263:100]"), {"http://10.0.0.1:9263": 100})
+    check(
+        "multi entry",
+        parse_go_map("map[http://10.0.0.1:9263:100 http://10.0.0.2:9867:50]"),
+        {"http://10.0.0.1:9263": 100, "http://10.0.0.2:9867": 50},
+    )
+    check("empty map", parse_go_map("map[]"), {})
+    check("float values", parse_go_map("map[http://10.0.0.1:9263:0.85]"), {"http://10.0.0.1:9263": 0.85})
+
+    print("\n=== Testing extract_ts ===")
+    check("standard", extract_ts("[INFO] 2025/01/15 18:25:33 logger.go:45: msg"), "2025/01/15 18:25:33")
+    check("no timestamp", extract_ts("no timestamp here"), None)
+
+    print("\n=== Testing extract_tags ===")
+    check(
+        "session+request",
+        extract_tags("[session_id:abc] [request_id:def]"),
+        {"session_id": "abc", "request_id": "def"},
+    )
+    check(
+        "all four",
+        extract_tags("[trace_id:t1] [req_id:r1] [session_id:s1] [request_id:rq1]"),
+        {"trace_id": "t1", "req_id": "r1", "session_id": "s1", "request_id": "rq1"},
+    )
+    check("no tags", extract_tags("no tags here"), {})
+
+    print("\n=== Testing parse_http_line ===")
+    http_line = "[INFO] 2025/01/15 18:25:33 logger.go:45: [POST] /v1/chat/completions HTTP/1.1 200 2.798235ms 10.0.0.1"
+    r = parse_http_line(http_line)
+    check("http method", r["method"], "POST")
+    check("http path", r["path"], "/v1/chat/completions")
+    check("http status", r["status"], 200)
+    check("http latency", r["latency_ms"], 2.798)
+    check("http client_ip", r["client_ip"], "10.0.0.1")
+
+    r_infer = parse_http_line(
+        "[INFO] 2025/01/15 18:25:33 logger.go:45: [GET] /health HTTP/1.1 200 1ms 10.0.0.1", inference_only=True
+    )
+    check("inference_only filters health", r_infer, None)
+
+    print("\n=== Testing normalize_message ===")
+    check("url", normalize_message("Failed to connect to http://10.0.0.1:9965"), "Failed to connect to {url}")
+    check("uuid", normalize_message("request abc12345-1234-5678-9012-abcdef123456 failed"), "request {uuid} failed")
+    check(
+        "ip:port",
+        normalize_message("dial tcp 10.0.0.1:9965: connection refused"),
+        "dial tcp {ip:port}: connection refused",
+    )
+
+    print("\n=== Testing match_select_release (token release type inference) ===")
+    sample_lines = [
+        "[INFO] 2026/04/12 10:00:00 logger.go:1: [request_id:r1] select worker (mixed): http://10.0.0.1:9965, count: 1",
+        "[INFO] 2026/04/12 10:00:01 logger.go:1: [request_id:r1] release prefill tokens: http://10.0.0.1:9965, tokens: 10",
+        "[INFO] 2026/04/12 10:00:02 logger.go:1: [request_id:r1] release worker: http://10.0.0.1:9965, count: 0",
+    ]
+    msr = match_select_release(sample_lines)
+    check("mixed token_releases inferred", msr["type_summary"].get("mixed", {}).get("token_releases", 0), 1)
+    check("prefill token_releases remains 0", msr["type_summary"].get("prefill", {}).get("token_releases", 0), 0)
+
+    print(f'\n{"=" * 40}')
+    print(f"Results: {passed} passed, {failed} failed")
+    if failed:
+        sys.exit(1)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="FastDeploy Go Router Log Parser",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    sub = parser.add_subparsers(dest="command")
+
+    p = sub.add_parser("parse-http", help="解析 HTTP 请求行 (H1) → JSON Lines")
+    p.add_argument("--inference-only", action="store_true", help="仅保留推理路径")
+
+    sub.add_parser("parse-cache-strategy", help="解析 cache-aware 策略行 (H6) → JSON Lines")
+    sub.add_parser("parse-stats", help="解析 [stats] 统计行 (H7) → JSON Lines")
+    sub.add_parser("normalize-errors", help="ERROR/WARN 行模板归一化 → JSON Lines")
+    sub.add_parser("match-select-release", help="匹配 select/release worker 事件")
+    p = sub.add_parser("unsupported-requests", help="检测不匹配已知路由的请求")
+    p.add_argument("--summary-only", action="store_true", help="仅输出汇总（不含详细列表）")
+    sub.add_parser("self-test", help="运行内置测试")
+
+    args = parser.parse_args()
+
+    if args.command == "parse-http":
+        _cli_parse_http(args)
+    elif args.command == "parse-cache-strategy":
+        _cli_parse_stream(parse_cache_strategy_line)
+    elif args.command == "parse-stats":
+        _cli_parse_stream(parse_stats_line)
+    elif args.command == "normalize-errors":
+        _cli_normalize_errors(args)
+    elif args.command == "match-select-release":
+        _cli_match_select_release(args)
+    elif args.command == "unsupported-requests":
+        _cli_unsupported_requests(args)
+    elif args.command == "self-test":
+        _cli_self_test(args)
+    else:
+        parser.print_help()
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/stats.py b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/stats.py
new file mode 100644
index 00000000000..a197ee7aff0
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/stats.py
@@ -0,0 +1,278 @@
+#!/usr/bin/env python3
+"""
+Stats — 通用统计计算工具
+
+提供百分位数、分布、时间窗口聚合、分组计数等通用统计函数。
+不含任何业务逻辑或日志格式依赖。
+
+Python 3 stdlib only，零依赖。
+"""
+
+import math
+from collections import defaultdict
+from datetime import datetime, timedelta
+
+# ════════════════════════════════════════════════════════════════
+# 百分位数与基础统计
+# ════════════════════════════════════════════════════════════════
+
+
+def percentile(sorted_vals, p):
+    """从已排序列表计算第 p 百分位数（线性插值）。"""
+    if not sorted_vals:
+        return 0.0
+    n = len(sorted_vals)
+    k = (p / 100.0) * (n - 1)
+    f = math.floor(k)
+    c = math.ceil(k)
+    if f == c:
+        return sorted_vals[int(k)]
+    return sorted_vals[f] * (c - k) + sorted_vals[c] * (k - f)
+
+
+def compute_statistics(values, percentiles_list=None, distribution_spec=None):
+    """计算一组数值的统计量。
+
+    Args:
+        values: 数值列表
+        percentiles_list: 要计算的百分位数列表，默认 [50, 90, 95, 99]
+        distribution_spec: 分布区间规格字符串，如 '0-20,20-40,40-60,60-80,80-100'
+
+    Returns:
+        dict: {count, min, max, mean, sum, stddev, p50, p90, ..., distribution}
+    """
+    if percentiles_list is None:
+        percentiles_list = [50, 90, 95, 99]
+
+    if not values:
+        result = {"count": 0, "min": 0, "max": 0, "mean": 0, "sum": 0, "stddev": 0}
+        for p in percentiles_list:
+            result[f"p{p}"] = 0
+        if distribution_spec is not None:
+            result["distribution"] = []
+        return result
+
+    sorted_vals = sorted(values)
+    n = len(sorted_vals)
+    total = sum(sorted_vals)
+    mean = total / n
+    variance = sum((x - mean) ** 2 for x in sorted_vals) / n
+    stddev = math.sqrt(variance)
+
+    result = {
+        "count": n,
+        "min": round(sorted_vals[0], 3),
+        "max": round(sorted_vals[-1], 3),
+        "mean": round(mean, 3),
+        "sum": round(total, 3),
+        "stddev": round(stddev, 3),
+    }
+
+    for p in percentiles_list:
+        result[f"p{p}"] = round(percentile(sorted_vals, p), 3)
+
+    if distribution_spec is not None:
+        result["distribution"] = compute_distribution(sorted_vals, distribution_spec)
+
+    return result
+
+
+def compute_distribution(sorted_vals, spec_str):
+    """根据区间规格计算分布直方图。
+
+    spec_str 示例：'0-20,20-40,40-60,60-80,80-100'
+    每个区间是左闭右开 [lo, hi)。
+    """
+    buckets = _parse_distribution_spec(spec_str)
+    n = len(sorted_vals)
+    result = []
+    for b in buckets:
+        if b[0] == "lt":
+            count = sum(1 for v in sorted_vals if v < b[1])
+            label = b[2]
+        elif b[0] == "gt":
+            count = sum(1 for v in sorted_vals if v > b[1])
+            label = b[2]
+        elif b[0] == "range":
+            count = sum(1 for v in sorted_vals if b[1] <= v < b[2])
+            label = b[3]
+        else:
+            continue
+        result.append({"range": label, "count": count, "pct": round(count / n * 100, 1) if n else 0})
+    return result
+
+
+def _parse_distribution_spec(spec_str):
+    """解析分布区间规格：'<100,100-500,>1000' → bucket 定义列表。"""
+    buckets = []
+    for part in spec_str.split(","):
+        part = part.strip()
+        if part.startswith("<"):
+            buckets.append(("lt", float(part[1:]), part))
+        elif part.startswith(">"):
+            buckets.append(("gt", float(part[1:]), part))
+        elif "-" in part:
+            lo, hi = part.split("-", 1)
+            buckets.append(("range", float(lo), float(hi), part))
+    return buckets
+
+
+# ════════════════════════════════════════════════════════════════
+# 时间窗口聚合
+# ════════════════════════════════════════════════════════════════
+
+
+def time_bucket(records, window="auto", agg_specs=None, ts_field="ts"):
+    """按时间窗口聚合记录。
+
+    Args:
+        records: dict 列表，每个 dict 必须有 ts_field 字段
+        window: 窗口大小 '5s'/'1m'/'5m'/'auto'
+        agg_specs: 聚合规格列表 [(field, func), ...]，如 [('selected_hitRatio', 'mean')]
+                   func 支持：count, sum, mean, min, max, pNN
+        ts_field: 时间戳字段名
+
+    Returns:
+        list[dict]: 每个窗口一条记录 {bucket, count, field_func, ...}
+    """
+    if agg_specs is None:
+        agg_specs = [("_", "count")]
+
+    if not records:
+        return []
+
+    window_td = _parse_window(window, records, ts_field)
+
+    # 按窗口分组
+    buckets = defaultdict(list)
+    for r in records:
+        ts_str = r.get(ts_field, "")
+        if not ts_str:
+            continue
+        try:
+            dt = datetime.strptime(ts_str, "%Y/%m/%d %H:%M:%S")
+        except ValueError:
+            continue
+        bucket_dt = _align_to_bucket(dt, window_td)
+        bucket_key = bucket_dt.strftime("%Y/%m/%d %H:%M:%S")
+        buckets[bucket_key].append(r)
+
+    # 按时间排序并聚合
+    result = []
+    for bucket_key in sorted(buckets.keys()):
+        bucket_records = buckets[bucket_key]
+        entry = {"bucket": bucket_key, "count": len(bucket_records)}
+
+        for field, func in agg_specs:
+            if field == "_":
+                if func == "count":
+                    entry["count"] = len(bucket_records)
+                continue
+
+            values = []
+            for r in bucket_records:
+                v = r.get(field)
+                if v is not None:
+                    try:
+                        values.append(float(v))
+                    except (ValueError, TypeError):
+                        pass
+
+            out_key = f"{field}_{func}"
+            entry[out_key] = _aggregate_values(values, func)
+
+        result.append(entry)
+
+    return result
+
+
+def _parse_window(window_str, records, ts_field):
+    """解析窗口字符串为 timedelta。'auto' 根据数据跨度自动选择。"""
+    if window_str == "auto":
+        timestamps = []
+        for r in records:
+            ts_str = r.get(ts_field, "")
+            if ts_str:
+                try:
+                    timestamps.append(datetime.strptime(ts_str, "%Y/%m/%d %H:%M:%S"))
+                except ValueError:
+                    pass
+        if len(timestamps) < 2:
+            return timedelta(minutes=1)
+        span = max(timestamps) - min(timestamps)
+        if span < timedelta(minutes=30):
+            return timedelta(seconds=5)
+        elif span < timedelta(hours=3):
+            return timedelta(minutes=1)
+        else:
+            return timedelta(minutes=5)
+    elif window_str.endswith("s"):
+        return timedelta(seconds=int(window_str[:-1]))
+    elif window_str.endswith("m"):
+        return timedelta(minutes=int(window_str[:-1]))
+    elif window_str.endswith("h"):
+        return timedelta(hours=int(window_str[:-1]))
+    return timedelta(minutes=1)
+
+
+def _align_to_bucket(dt, window_td):
+    """将 datetime 对齐到窗口边界。"""
+    secs = max(1, int(window_td.total_seconds()))
+    epoch = datetime(dt.year, dt.month, dt.day)
+    offset = int((dt - epoch).total_seconds())
+    aligned = (offset // secs) * secs
+    return epoch + timedelta(seconds=aligned)
+
+
+def _aggregate_values(values, func):
+    """用指定函数聚合一组数值。"""
+    if not values:
+        return 0
+    if func == "count":
+        return len(values)
+    elif func == "sum":
+        return round(sum(values), 3)
+    elif func == "mean":
+        return round(sum(values) / len(values), 3)
+    elif func == "min":
+        return round(min(values), 3)
+    elif func == "max":
+        return round(max(values), 3)
+    elif func.startswith("p"):
+        p = int(func[1:])
+        return round(percentile(sorted(values), p), 3)
+    return 0
+
+
+# ════════════════════════════════════════════════════════════════
+# 分组计数
+# ════════════════════════════════════════════════════════════════
+
+
+def count_by(records, field, top_n=None):
+    """按指定字段分组计数。
+
+    Args:
+        records: dict 列表
+        field: 分组字段名
+        top_n: 只返回前 N 个（按计数降序）
+
+    Returns:
+        list[dict]: [{value, count, pct}]，按计数降序排列
+    """
+    counts = defaultdict(int)
+    total = 0
+    for r in records:
+        val = r.get(field)
+        if val is not None:
+            counts[str(val)] += 1
+            total += 1
+
+    result = []
+    for val, count in sorted(counts.items(), key=lambda x: -x[1]):
+        result.append({"value": val, "count": count, "pct": round(count / total * 100, 1) if total else 0})
+
+    if top_n:
+        result = result[:top_n]
+
+    return result
diff --git a/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/troubleshoot.py b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/troubleshoot.py
new file mode 100644
index 00000000000..b00521e6b01
--- /dev/null
+++ b/fastdeploy/golang_router/.claude/skills/troubleshoot/scripts/troubleshoot.py
@@ -0,0 +1,559 @@
+#!/usr/bin/env python3
+"""
+Troubleshoot — FastDeploy Go Router 综合问题排查主编排器
+
+Usage:
+    python3 troubleshoot.py <log_file> [options]
+
+Options:
+    --errors            仅分析错误日志
+    --latency           仅分析延迟
+    --health            仅分析 Worker 健康
+    --cache             仅分析 Cache 调度
+    --load              仅分析负载与计数器
+    --trace ID          追踪指定请求（支持逗号分隔多 ID；传 all 可全量追踪）
+    --tail N            仅分析尾部 N 行（支持 5000/1k/1w 等行数写法）
+    --start TIME        起始时间（如 "16:00:00"、"03/31 16:00"）
+    --end TIME          结束时间（如 "17:00:00"、"2026/03/31 17:00:00"）
+    --output DIR        详细报告导出目录（默认: skill_output/troubleshoot/<timestamp>/）
+
+支持维度：errors, latency, health, cache, load, trace
+"""
+
+import argparse
+import re
+import os
+import sys
+from datetime import datetime
+from pathlib import Path
+
+# 确保能 import 同级模块
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+from analyzers.cache import analyze_cache, format_cache_report
+from analyzers.errors import analyze_errors, format_errors_report
+from analyzers.health import analyze_health, format_health_report
+from analyzers.latency import analyze_latency, format_latency_report
+from analyzers.load import analyze_load
+from analyzers.load_report import format_load_report
+from analyzers.trace import analyze_trace, format_trace_report
+from log_parser import (
+    complete_time_arg,
+    filter_file_by_time_range,
+)
+
+
+def determine_log_file(user_path=None):
+    """确定日志文件路径。
+
+    搜索顺序：
+    1. 用户指定路径（直接使用，不质疑）
+    2. logs/router.log
+    3. fd-router.log（golang_router 根目录）
+    """
+    if user_path:
+        p = Path(user_path).expanduser()
+        if p.is_file():
+            return str(p)
+        print(f"ERROR: 文件不存在: {user_path}", file=sys.stderr)
+        print(
+            "提示: 若路径含空格/括号，请使用引号，例如: "
+            "python3 scripts/troubleshoot.py 'fastdeploy/golang_router/logs/fd-router (2).log' --load",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    # 统一基于脚本位置与当前工作目录搜索，避免 CWD 差异导致找不到日志。
+    script_dir = Path(__file__).resolve().parent
+    golang_router_dir = script_dir.parents[2]  # .../fastdeploy/golang_router
+    cwd = Path.cwd()
+
+    # 精确候选（优先常见命名）
+    exact_candidates = [
+        golang_router_dir / "logs" / "router.log",
+        golang_router_dir / "fd-router.log",
+        cwd / "logs" / "router.log",
+        cwd / "fd-router.log",
+        cwd / "fastdeploy" / "golang_router" / "logs" / "router.log",
+        cwd / "fastdeploy" / "golang_router" / "fd-router.log",
+    ]
+    for p in exact_candidates:
+        if p.is_file():
+            return str(p)
+
+    # 模糊候选：支持 fd-router (2).log 等命名
+    pattern_roots = [
+        golang_router_dir / "logs",
+        golang_router_dir,
+        cwd / "logs",
+        cwd,
+        cwd / "fastdeploy" / "golang_router" / "logs",
+        cwd / "fastdeploy" / "golang_router",
+    ]
+    dynamic_candidates = []
+    for root in pattern_roots:
+        if not root.is_dir():
+            continue
+        dynamic_candidates.extend(sorted(root.glob("fd-router*.log")))
+        dynamic_candidates.extend(sorted(root.glob("router*.log")))
+
+    if dynamic_candidates:
+        return str(dynamic_candidates[0])
+
+    print("ERROR: 未找到日志文件。请指定路径或检查 logs/ 目录。", file=sys.stderr)
+    print("已搜索: logs/router.log, fd-router.log, fd-router*.log, router*.log", file=sys.stderr)
+    sys.exit(1)
+
+
+def parse_tail_arg(tail_str):
+    """解析 --tail 参数：支持数字及 k/w 缩写。"""
+    if tail_str is None:
+        return None
+    s = str(tail_str).strip().lower()
+    m = re.fullmatch(r"(\d+)([kw])?", s)
+    if not m:
+        raise ValueError("--tail 仅支持行数（如 5000、1k、1w）。按时间请改用 --start/--end")
+    value = int(m.group(1))
+    unit = m.group(2)
+    if unit == "k":
+        value *= 1000
+    elif unit == "w":
+        value *= 10000
+    if value <= 0:
+        raise ValueError("--tail 行数必须 > 0")
+    return {"type": "lines", "value": value}
+
+
+def determine_status(results):
+    """根据分析结果判定全局状态。"""
+    reasons = []
+
+    # Errors 维度
+    errors_result = results.get("errors")
+    if errors_result:
+        if errors_result["panic_list"]:
+            return "CRITICAL", f'{len(errors_result["panic_list"])} Panic 事件'
+        if errors_result["error_rate"] > 20:
+            return "CRITICAL", f'错误率 {errors_result["error_rate"]}%'
+        if errors_result["error_rate"] > 5:
+            reasons.append(f'错误率 {errors_result["error_rate"]}%')
+        for s in errors_result["status_code_dist"]:
+            code = str(s["value"])
+            if code in ("502", "503") and s["count"] > 0:
+                reasons.append(f'{code}: {s["count"]}')
+
+    # Latency 维度
+    latency_result = results.get("latency")
+    if latency_result:
+        for d in latency_result.get("diagnoses", []):
+            if d["severity"] == "CRITICAL":
+                return "CRITICAL", d["message"]
+            if d["severity"] == "HIGH":
+                reasons.append(d["message"])
+
+    # Health 维度
+    health_result = results.get("health")
+    if health_result:
+        for d in health_result.get("diagnoses", []):
+            if d["severity"] == "CRITICAL":
+                return "CRITICAL", d["message"]
+            if d["severity"] == "HIGH":
+                reasons.append(d["message"])
+
+    # Load 维度
+    load_result = results.get("load")
+    if load_result:
+        for d in load_result.get("diagnoses", []):
+            if d["severity"] == "CRITICAL":
+                return "CRITICAL", d["message"]
+            if d["severity"] == "HIGH":
+                reasons.append(d["message"])
+
+    # Cache 维度
+    cache_result = results.get("cache")
+    if cache_result:
+        for d in cache_result.get("diagnoses", []):
+            if d["severity"] == "HIGH":
+                reasons.append(d["message"])
+
+    if reasons:
+        # 去重但保留完整信息
+        deduped = []
+        seen = set()
+        for r in reasons:
+            if r not in seen:
+                deduped.append(r)
+                seen.add(r)
+        return "DEGRADED", "；".join(deduped)
+
+    if not results:
+        return "HEALTHY", "无分析数据"
+
+    return "HEALTHY", "无严重问题"
+
+
+def format_full_report(results, status, status_reason):
+    """组装完整报告。
+
+    Returns:
+        tuple: (report_text, details)
+            report_text: 主报告文本（总结 + 可视化）
+            details: dict 包含需要拆分到独立文件的详情数据
+                - 'health_events': str 或 None
+                - 'load_select_release': str 或 None
+                - 'trace_files': {trace_id: text} 或 {}（写入 detail/trace/）
+    """
+    parts = []
+    details = {
+        "health_events": None,
+        "load_select_release": None,
+        "latency_diagnoses": None,
+        "cache_diagnosis": None,
+        "load_diagnoses": None,
+        "load_counter_state": None,
+        "cache_session_stickiness": None,
+        "cache_suboptimal": None,
+        "cache_eviction": None,
+        "cache_fallback": None,
+        "cache_cross": None,
+        "errors_topn": None,
+        "trace_files": {},
+    }
+
+    # 状态行
+    parts.append(f"STATUS: {status} — {status_reason}")
+    parts.append(
+        "状态定义: HEALTHY=无明显异常；DEGRADED=服务可用但存在性能/稳定性问题（需关注）；CRITICAL=服务不可用或高风险故障。"
+    )
+    parts.append("=" * 60)
+    parts.append("")
+
+    # 各维度报告
+    if "errors" in results:
+        parts.append(format_errors_report(results["errors"]))
+        if results["errors"].get("error_top_n"):
+            lines = [
+                "# Errors TopN 详情",
+                "",
+                "| 模板 | 数量 | 级别 | 来源层 | 影响 |",
+                "|:--|--:|:--|:--|:--|",
+            ]
+            for e in results["errors"]["error_top_n"]:
+                lines.append(
+                    f'| {e.get("template","")} | {e.get("count",0)} | {e.get("level","")} | {e.get("source_layer","")} | {e.get("impact","-")} |'
+                )
+            lines.append("")
+            lines.append("## 涉及 URLs")
+            lines.append("")
+            for e in results["errors"]["error_top_n"]:
+                urls = e.get("urls") or []
+                if not urls:
+                    continue
+                lines.append(f'- 模板: {e.get("template","")}')
+                for u in urls:
+                    lines.append(f"  - {u}")
+            lines.append("")
+            details["errors_topn"] = "\n".join(lines)
+
+    if "latency" in results:
+        parts.append(format_latency_report(results["latency"]))
+        if results["latency"].get("diagnoses"):
+            lines = ["# 延迟诊断详情", ""]
+            for d in results["latency"]["diagnoses"]:
+                lines.append(f'[{d.get("severity","")}] {d.get("message","")}')
+            lines.append("")
+            details["latency_diagnoses"] = "\n".join(lines)
+
+    if "health" in results:
+        summary, detail = format_health_report(results["health"])
+        parts.append(summary)
+        if detail:
+            details["health_events"] = detail
+
+    if "load" in results:
+        summary, detail = format_load_report(results["load"])
+        parts.append(summary)
+        if detail:
+            details["load_select_release"] = detail
+        if results["load"].get("diagnoses"):
+            lines = ["# Load 诊断详情", ""]
+            for d in results["load"]["diagnoses"]:
+                lines.append(f'[{d.get("severity","")}] [{d.get("source_layer","")}] {d.get("message","")}')
+            lines.append("")
+            details["load_diagnoses"] = "\n".join(lines)
+        if results["load"].get("counter_last_state"):
+            rows = results["load"]["counter_last_state"]
+            lines = [
+                "# Load Counter 末状态",
+                "",
+                "| worker | req_last_action | req_last_value | token_last_action | token_last_value | last_ts |",
+                "|:--|:--|--:|:--|--:|:--|",
+            ]
+            for r in rows:
+                lines.append(
+                    f'| {r.get("worker","")} | {r.get("req_last_action","-")} | {r.get("req_last_value","-")} | {r.get("token_last_action","-")} | {r.get("token_last_value","-")} | {r.get("last_ts","")} |'
+                )
+            lines.append("")
+            details["load_counter_state"] = "\n".join(lines)
+
+    if "cache" in results:
+        summary, detail = format_cache_report(results["cache"])
+        parts.append(summary)
+        if detail:
+            details["cache_diagnosis"] = detail
+        c = results["cache"]
+        lines = ["# Cache Session 粘性详情", ""]
+        if c.get("session_stickiness"):
+            for sid, s in c["session_stickiness"].items():
+                lines.append(
+                    f'- {sid}: req={s.get("total_requests",0)}, stickiness={s.get("stickiness_pct",0)}%, switches={s.get("switches",0)}'
+                )
+        else:
+            lines.append("- 无可用样本（需要同一 session 至少 2 次请求）。")
+        lines.append("")
+        details["cache_session_stickiness"] = "\n".join(lines)
+
+        lines = ["# Cache 非最优选择详情", ""]
+        if c.get("suboptimal_selections"):
+            for x in c["suboptimal_selections"][:200]:
+                lines.append(
+                    f'- [{x.get("ts","")}] selected={x.get("selected","")} best={x.get("best_hr_worker","")} reason={x.get("reason","")}'
+                )
+        else:
+            lines.append("- 未发现非最优选择。")
+        lines.append("")
+        details["cache_suboptimal"] = "\n".join(lines)
+
+        lines = ["# Cache 驱逐影响详情", ""]
+        if c.get("eviction_impact"):
+            for x in c["eviction_impact"][:200]:
+                lines.append(
+                    f'- session={x.get("session_id","")} interval={x.get("interval_mins",0)}m hitRatio_after={x.get("hitRatio_after",0)} evicted={x.get("evicted",False)}'
+                )
+        else:
+            lines.append("- 未检测到超时驱逐样本。")
+        lines.append("")
+        details["cache_eviction"] = "\n".join(lines)
+
+        lines = ["# Cache Fallback 原因详情", ""]
+        if c.get("fallback_reasons"):
+            for x in c["fallback_reasons"]:
+                lines.append(f'- {x.get("value","")}: {x.get("count",0)} ({x.get("pct",0)}%)')
+        else:
+            lines.append("- 未出现 fallback 记录。")
+        lines.append("")
+        details["cache_fallback"] = "\n".join(lines)
+
+        lines = ["# Cache 交叉诊断详情", ""]
+        if c.get("cross_diagnosis"):
+            for x in c["cross_diagnosis"]:
+                lines.append(
+                    f'- diagnosis={x.get("diagnosis","")}, action={x.get("action","")}, avg_stickiness={x.get("avg_stickiness_pct",0)}%'
+                )
+        else:
+            lines.append("- 样本不足，未生成交叉诊断。")
+        lines.append("")
+        details["cache_cross"] = "\n".join(lines)
+
+    if "trace" in results:
+        summary, detail_dict = format_trace_report(results["trace"])
+        parts.append(summary)
+        if detail_dict:
+            details["trace_files"] = detail_dict
+
+    return "\n".join(parts), details
+
+
+def save_detailed_report(report_text, output_dir, details=None):
+    """保存报告到文件。
+
+    Args:
+        report_text: 主报告文本
+        output_dir: 输出目录
+        details: 详情数据 dict（来自 format_full_report）
+    """
+    summary_dir = os.path.join(output_dir, "summary")
+    detail_dir = os.path.join(output_dir, "detail")
+    os.makedirs(summary_dir, exist_ok=True)
+    os.makedirs(detail_dir, exist_ok=True)
+    filepath = os.path.join(summary_dir, "troubleshoot_report.md")
+
+    with open(filepath, "w", encoding="utf-8") as f:
+        f.write("# Router Troubleshooting Report\n")
+        f.write(f'> Generated at {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}\n\n')
+        f.write(report_text)
+
+    # 保存详情到 detail/ 子目录
+    if details:
+        if details.get("health_events"):
+            health_path = os.path.join(detail_dir, "health_events.md")
+            with open(health_path, "w", encoding="utf-8") as f:
+                f.write(details["health_events"])
+
+        if details.get("load_select_release"):
+            load_path = os.path.join(detail_dir, "load_select_release.md")
+            with open(load_path, "w", encoding="utf-8") as f:
+                f.write(details["load_select_release"])
+
+        if details.get("latency_diagnoses"):
+            latency_path = os.path.join(detail_dir, "latency_diagnoses.md")
+            with open(latency_path, "w", encoding="utf-8") as f:
+                f.write(details["latency_diagnoses"])
+
+        if details.get("cache_diagnosis"):
+            cache_path = os.path.join(detail_dir, "cache_diagnosis.md")
+            with open(cache_path, "w", encoding="utf-8") as f:
+                f.write(details["cache_diagnosis"])
+        if details.get("load_diagnoses"):
+            with open(os.path.join(detail_dir, "load_diagnoses.md"), "w", encoding="utf-8") as f:
+                f.write(details["load_diagnoses"])
+        if details.get("load_counter_state"):
+            with open(os.path.join(detail_dir, "load_counter_state.md"), "w", encoding="utf-8") as f:
+                f.write(details["load_counter_state"])
+        if details.get("cache_session_stickiness") is not None:
+            with open(os.path.join(detail_dir, "cache_session_stickiness.md"), "w", encoding="utf-8") as f:
+                f.write(details["cache_session_stickiness"])
+        if details.get("cache_suboptimal") is not None:
+            with open(os.path.join(detail_dir, "cache_suboptimal.md"), "w", encoding="utf-8") as f:
+                f.write(details["cache_suboptimal"])
+        if details.get("cache_eviction") is not None:
+            with open(os.path.join(detail_dir, "cache_eviction.md"), "w", encoding="utf-8") as f:
+                f.write(details["cache_eviction"])
+        if details.get("cache_fallback") is not None:
+            with open(os.path.join(detail_dir, "cache_fallback.md"), "w", encoding="utf-8") as f:
+                f.write(details["cache_fallback"])
+        if details.get("cache_cross") is not None:
+            with open(os.path.join(detail_dir, "cache_cross.md"), "w", encoding="utf-8") as f:
+                f.write(details["cache_cross"])
+        if details.get("errors_topn"):
+            with open(os.path.join(detail_dir, "errors_topn.md"), "w", encoding="utf-8") as f:
+                f.write(details["errors_topn"])
+
+        trace_detail_dir = os.path.join(detail_dir, "trace")
+        if details.get("trace_files"):
+            os.makedirs(trace_detail_dir, exist_ok=True)
+        for trace_id, trace_text in details.get("trace_files", {}).items():
+            safe_id = trace_id.replace("/", "_")
+            trace_path = os.path.join(trace_detail_dir, f"trace_{safe_id}.md")
+            with open(trace_path, "w", encoding="utf-8") as f:
+                f.write(trace_text)
+
+    return filepath
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="FastDeploy Go Router Troubleshooting",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    parser.add_argument("log_file", nargs="?", help="日志文件路径")
+    parser.add_argument("--errors", action="store_true", help="仅分析错误日志")
+    parser.add_argument("--latency", action="store_true", help="仅分析延迟")
+    parser.add_argument("--health", action="store_true", help="仅分析 Worker 健康")
+    parser.add_argument("--cache", action="store_true", help="仅分析 Cache 调度")
+    parser.add_argument("--load", action="store_true", help="仅分析负载与计数器")
+    parser.add_argument("--trace", metavar="ID", help="追踪指定请求（逗号分隔多 ID；传 all 可全量追踪）")
+    parser.add_argument("--tail", help="尾部行数（如 5000、1k、1w）。按时间请使用 --start/--end")
+    parser.add_argument(
+        "--start", default=None, help='起始时间（如 "16:00:00"、"03/31 16:00"、"2026/03/31 16:00:00"）'
+    )
+    parser.add_argument("--end", default=None, help='结束时间（如 "17:00:00"、"03/31 17:00"、"2026/03/31 17:00:00"）')
+    parser.add_argument("--output", help="详细报告导出目录（默认：skill_output/troubleshoot/<timestamp>/）")
+
+    args = parser.parse_args()
+
+    # 确定日志文件
+    log_file = determine_log_file(args.log_file)
+    print(f"日志文件: {log_file}", file=sys.stderr)
+
+    # --tail 与 --start/--end 不能混用（两者是不同的范围选择方式）
+    if args.tail and (args.start or args.end):
+        print("Error: --tail 与 --start/--end 不能同时使用，请选择其一", file=sys.stderr)
+        sys.exit(1)
+
+    # 时间范围预过滤（--start 和 --end 可单独或同时指定）
+    import atexit
+
+    start_ts = None
+    end_ts = None
+    if args.start or args.end:
+        start_ts = complete_time_arg(args.start, log_file, is_end=False) if args.start else None
+        end_ts = complete_time_arg(args.end, log_file, is_end=True) if args.end else None
+        filtered_path, is_temp = filter_file_by_time_range(log_file, start_ts, end_ts)
+        if is_temp:
+            atexit.register(lambda p=filtered_path: os.unlink(p) if os.path.exists(p) else None)
+        log_file = filtered_path
+        print(f'时间范围过滤: {start_ts or "..."} ~ {end_ts or "..."}', file=sys.stderr)
+
+    tail_arg = parse_tail_arg(args.tail)
+    tail = None
+    if tail_arg and tail_arg["type"] == "lines":
+        tail = tail_arg["value"]
+
+    # 确定分析模式
+    any_mode = args.errors or args.latency or args.health or args.cache or args.load or args.trace
+    run_errors = args.errors or (not any_mode)
+    run_latency = args.latency or (not any_mode)
+    run_health = args.health or (not any_mode)
+    run_load = args.load or (not any_mode)
+    run_cache = args.cache or (not any_mode)
+    run_trace = bool(args.trace)  # trace 需要指定 ID（支持 all），全量扫描不自动调用
+
+    results = {}
+    step = 0
+    total_steps = sum([run_errors, run_latency, run_health, run_cache, run_load, run_trace])
+
+    # 执行分析
+    if run_errors:
+        step += 1
+        print(f"[{step}/{total_steps}] 分析错误日志...", file=sys.stderr)
+        results["errors"] = analyze_errors(log_file, tail=tail)
+
+    if run_latency:
+        step += 1
+        print(f"[{step}/{total_steps}] 分析请求延迟...", file=sys.stderr)
+        results["latency"] = analyze_latency(log_file, tail=tail)
+
+    if run_health:
+        step += 1
+        print(f"[{step}/{total_steps}] 分析 Worker 健康...", file=sys.stderr)
+        results["health"] = analyze_health(log_file, tail=tail)
+
+    if run_cache:
+        step += 1
+        print(f"[{step}/{total_steps}] 分析 Cache 调度...", file=sys.stderr)
+        results["cache"] = analyze_cache(log_file, tail=tail)
+
+    if run_load:
+        step += 1
+        print(f"[{step}/{total_steps}] 分析负载与计数器...", file=sys.stderr)
+        results["load"] = analyze_load(log_file, tail=tail)
+
+    if run_trace:
+        step += 1
+        print(f"[{step}/{total_steps}] 追踪请求...", file=sys.stderr)
+        results["trace"] = analyze_trace(log_file, args.trace, tail=tail)
+
+    # 判定状态
+    status, status_reason = determine_status(results)
+
+    # 输出报告
+    report, details = format_full_report(results, status, status_reason)
+    print(report)
+
+    # 保存详细报告
+    run_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    if args.output:
+        output_base = args.output
+    else:
+        script_dir = os.path.dirname(os.path.abspath(__file__))
+        golang_router_root = os.path.normpath(os.path.join(script_dir, "..", "..", "..", ".."))
+        output_base = os.path.join(golang_router_root, "skill_output", "troubleshoot")
+    output_dir = os.path.join(output_base, run_timestamp)
+    filepath = save_detailed_report(report, output_dir, details=details)
+    print(f"\n详细报告已保存到: {filepath}", file=sys.stderr)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fastdeploy/golang_router/.gitignore b/fastdeploy/golang_router/.gitignore
new file mode 100644
index 00000000000..58b5c84d190
--- /dev/null
+++ b/fastdeploy/golang_router/.gitignore
@@ -0,0 +1,2 @@
+# Generated skill analysis outputs
+skill_output/
diff --git a/fastdeploy/golang_router/cmd/main.go b/fastdeploy/golang_router/cmd/main.go
index e0e8c98e137..c3670622ab2 100644
--- a/fastdeploy/golang_router/cmd/main.go
+++ b/fastdeploy/golang_router/cmd/main.go
@@ -41,7 +41,15 @@ func main() {
 	}
 
 	// Initialize logger
-	logger.Init(cfg.Log.Level, cfg.Log.Output)
+	logCfg := logger.Config{
+		Level:               cfg.Log.Level,
+		Output:              cfg.Log.Output,
+		Dir:                 cfg.Log.Dir,
+		MaxAgeDays:          cfg.Log.MaxAgeDays,
+		MaxTotalSizeMB:      cfg.Log.MaxTotalSizeMB,
+		CleanupIntervalSecs: cfg.Log.CleanupIntervalSecs,
+	}
+	logger.Init(logCfg)
 	defer logger.CloseLogFile()
 
 	// Initialize manager
@@ -59,6 +67,7 @@ func main() {
 	go scheduler_handler.StartBackupCleanupTask(context.Background(), intervalCleanupSecs)
 	statsIntervalSecs := cfg.Scheduler.StatsIntervalSecs
 	go scheduler_handler.StartStatsReporter(context.Background(), statsIntervalSecs)
+	go logger.StartLogCleanup(context.Background(), logCfg)
 
 	// Start server
 	addr := ":" + cfg.Server.Port
diff --git a/fastdeploy/golang_router/examples/run_with_config/config/config.example.yaml b/fastdeploy/golang_router/examples/run_with_config/config/config.example.yaml
index be4b11227d2..075d8eec5fd 100644
--- a/fastdeploy/golang_router/examples/run_with_config/config/config.example.yaml
+++ b/fastdeploy/golang_router/examples/run_with_config/config/config.example.yaml
@@ -29,3 +29,7 @@ manager:
 log:
   level: "info"  # debug, info, warn, error
   output: "file" # stdout, file
+  dir: "logs"    # log directory; default: logs
+  max-age-days: 7              # max days to keep log files; default: 7
+  max-total-size-mb: 500       # max total log size in MB; default: 500
+  cleanup-interval-secs: 3600  # cleanup check interval in seconds; default: 3600
diff --git a/fastdeploy/golang_router/examples/run_with_default_workers/config/config.example.yaml b/fastdeploy/golang_router/examples/run_with_default_workers/config/config.example.yaml
index be4b11227d2..5e1091b0eef 100644
--- a/fastdeploy/golang_router/examples/run_with_default_workers/config/config.example.yaml
+++ b/fastdeploy/golang_router/examples/run_with_default_workers/config/config.example.yaml
@@ -29,3 +29,6 @@ manager:
 log:
   level: "info"  # debug, info, warn, error
   output: "file" # stdout, file
+  max-age-days: 7              # max days to keep log files; default: 7
+  max-total-size-mb: 500       # max total log size in MB; default: 500
+  cleanup-interval-secs: 3600  # cleanup check interval in seconds; default: 3600
diff --git a/fastdeploy/golang_router/internal/config/config.go b/fastdeploy/golang_router/internal/config/config.go
index 2cb8226961d..7a6dc3fc504 100644
--- a/fastdeploy/golang_router/internal/config/config.go
+++ b/fastdeploy/golang_router/internal/config/config.go
@@ -49,8 +49,12 @@ type SchedulerConfig struct {
 }
 
 type LogConfig struct {
-	Level  string `yaml:"level"`  // debug, info, warn, error
-	Output string `yaml:"output"` // stdout, file
+	Level               string  `yaml:"level"`                // debug, info, warn, error
+	Output              string  `yaml:"output"`               // stdout, file
+	Dir                 string  `yaml:"dir"`                  // log directory; defaults to "logs"
+	MaxAgeDays          int     `yaml:"max-age-days"`         // max days to keep log files; 0 = use default (7)
+	MaxTotalSizeMB      int     `yaml:"max-total-size-mb"`    // max total log size in MB; 0 = use default (500)
+	CleanupIntervalSecs float64 `yaml:"cleanup-interval-secs"` // cleanup check interval in seconds; 0 = use default (3600)
 }
 
 func Load(configPath, listenPort string, isSplitwise bool) (*Config, error) {
@@ -81,6 +85,15 @@ func Load(configPath, listenPort string, isSplitwise bool) (*Config, error) {
 	if cfg.Log.Level == "" {
 		cfg.Log.Level = "info"
 	}
+	if cfg.Log.MaxAgeDays == 0 {
+		cfg.Log.MaxAgeDays = 7
+	}
+	if cfg.Log.MaxTotalSizeMB == 0 {
+		cfg.Log.MaxTotalSizeMB = 500
+	}
+	if cfg.Log.CleanupIntervalSecs == 0 {
+		cfg.Log.CleanupIntervalSecs = 3600
+	}
 	if cfg.Manager.HealthCheckEndpoint == "" {
 		cfg.Manager.HealthCheckEndpoint = "/health"
 	}
diff --git a/fastdeploy/golang_router/internal/gateway/completions_test.go b/fastdeploy/golang_router/internal/gateway/completions_test.go
index 825544ff5e3..4fea9736ad6 100644
--- a/fastdeploy/golang_router/internal/gateway/completions_test.go
+++ b/fastdeploy/golang_router/internal/gateway/completions_test.go
@@ -20,7 +20,7 @@ import (
 )
 
 func TestMain(m *testing.M) {
-	logger.Init("info", "stdout")
+	logger.Init(logger.Config{Level: "info", Output: "stdout"})
 	gin.SetMode(gin.TestMode)
 	os.Exit(m.Run())
 }
diff --git a/fastdeploy/golang_router/internal/manager/health_test.go b/fastdeploy/golang_router/internal/manager/health_test.go
index bc42031d85f..f50ea2d00b2 100644
--- a/fastdeploy/golang_router/internal/manager/health_test.go
+++ b/fastdeploy/golang_router/internal/manager/health_test.go
@@ -15,7 +15,7 @@ import (
 
 func init() {
 	// Initialize logger for all tests
-	logger.Init("info", "stdout")
+	logger.Init(logger.Config{Level: "info", Output: "stdout"})
 }
 
 func TestCheckServiceHealth(t *testing.T) {
diff --git a/fastdeploy/golang_router/internal/middleware/logger_test.go b/fastdeploy/golang_router/internal/middleware/logger_test.go
index da9c7290567..47b63742547 100644
--- a/fastdeploy/golang_router/internal/middleware/logger_test.go
+++ b/fastdeploy/golang_router/internal/middleware/logger_test.go
@@ -12,7 +12,7 @@ import (
 
 func init() {
 	// Initialize logger to avoid nil pointer dereference in recovery middleware
-	logger.Init("info", "stdout")
+	logger.Init(logger.Config{Level: "info", Output: "stdout"})
 }
 
 func TestLoggerMiddleware(t *testing.T) {
diff --git a/fastdeploy/golang_router/internal/scheduler/handler/prefill_cache_aware.go b/fastdeploy/golang_router/internal/scheduler/handler/prefill_cache_aware.go
index 48737c03c72..2259087d619 100644
--- a/fastdeploy/golang_router/internal/scheduler/handler/prefill_cache_aware.go
+++ b/fastdeploy/golang_router/internal/scheduler/handler/prefill_cache_aware.go
@@ -384,6 +384,9 @@ func (c *radixPrefixCache) Record(tokens []int, worker string) {
 
 // evictionWorker periodically evicts inactive nodes
 func (c *radixPrefixCache) evictionWorker(interval time.Duration) {
+	if interval <= 0 {
+		return
+	}
 	ticker := time.NewTicker(interval)
 	defer ticker.Stop()
 	for {
diff --git a/fastdeploy/golang_router/internal/scheduler/handler/tokenizer_test.go b/fastdeploy/golang_router/internal/scheduler/handler/tokenizer_test.go
index d3b6dacfdc4..e1155e3686b 100644
--- a/fastdeploy/golang_router/internal/scheduler/handler/tokenizer_test.go
+++ b/fastdeploy/golang_router/internal/scheduler/handler/tokenizer_test.go
@@ -586,13 +586,13 @@ func TestParseTokensFromBody(t *testing.T) {
 			name:     "invalid JSON format",
 			input:    []byte(`invalid json`),
 			expected: nil,
-			err:      errors.New("tokenizer response missing tokens"),
+			err:      errors.New("tokenizer response unmarshal failed"),
 		},
 		{
 			name:     "empty body",
 			input:    []byte(``),
 			expected: nil,
-			err:      errors.New("tokenizer response missing tokens"),
+			err:      errors.New("tokenizer response unmarshal failed"),
 		},
 		{
 			name:     "large array of tokens",
@@ -610,13 +610,13 @@ func TestParseTokensFromBody(t *testing.T) {
 			name:     "non-array input_ids",
 			input:    []byte(`{"input_ids": "not an array"}`),
 			expected: nil,
-			err:      errors.New("tokenizer response missing tokens"),
+			err:      errors.New("tokenizer response unmarshal failed"),
 		},
 		{
 			name:     "malformed array",
 			input:    []byte(`{"input_ids": [1, "two", 3]}`),
 			expected: nil,
-			err:      errors.New("tokenizer response missing tokens"),
+			err:      errors.New("tokenizer response unmarshal failed"),
 		},
 	}
 
@@ -629,8 +629,8 @@ func TestParseTokensFromBody(t *testing.T) {
 				t.Errorf("parseTokensFromBody() error = %v, wantErr %v", err, tt.err)
 				return
 			}
-			if err != nil && tt.err != nil && err.Error() != tt.err.Error() {
-				t.Errorf("parseTokensFromBody() error message = %v, want %v", err.Error(), tt.err.Error())
+			if err != nil && tt.err != nil && !strings.Contains(err.Error(), tt.err.Error()) {
+				t.Errorf("parseTokensFromBody() error message = %v, want containing %v", err.Error(), tt.err.Error())
 				return
 			}
 
diff --git a/fastdeploy/golang_router/pkg/logger/logger.go b/fastdeploy/golang_router/pkg/logger/logger.go
index 8e213fc0c9f..daa23d55450 100644
--- a/fastdeploy/golang_router/pkg/logger/logger.go
+++ b/fastdeploy/golang_router/pkg/logger/logger.go
@@ -1,12 +1,27 @@
 package logger
 
 import (
+	"context"
+	"fmt"
 	"log"
 	"os"
+	"path/filepath"
+	"sort"
+	"strings"
 	"sync"
-	"context"
+	"time"
 )
 
+// Config holds logger configuration.
+type Config struct {
+	Level               string
+	Output              string
+	Dir                 string // log directory; defaults to "logs"
+	MaxAgeDays          int
+	MaxTotalSizeMB      int
+	CleanupIntervalSecs float64
+}
+
 var (
 	infoLogger  *log.Logger
 	errorLogger *log.Logger
@@ -14,37 +29,260 @@ var (
 	debugLogger *log.Logger
 	level       string
 	once        sync.Once
-	logFile     *os.File
+	writer      *rotatingWriter // nil when output is stdout
 )
 
+// nowFunc is overridable in tests for time-dependent logic.
+var nowFunc = time.Now
+
 type contextKey string
+
 const TraceIDKey contextKey = "trace_id"
 const ReqIDKey contextKey = "req_id"
 const RequestIDKey contextKey = "request_id"
 const SessionIDKey contextKey = "session_id"
 
-// Init initialize logger
-func Init(logLevel, output string) {
-	once.Do(func() {
-		level = logLevel
+// gracePeriod is how long we keep the previous day's file open after rotation.
+const gracePeriod = 5 * time.Minute
+
+// rotatingWriter implements io.Writer with day-level rotation and dual-file writes.
+// Current day's log is written to "router-YYYY-MM-DD.log" and "router.log" is a
+// symlink pointing to the current day's file. On day change a new date file is
+// created and the symlink is updated. During a short grace period after rotation,
+// log lines whose timestamp belongs to the previous day are written to the old file.
+type rotatingWriter struct {
+	mu          sync.Mutex
+	currentFile *os.File  // today's router-<date>.log
+	prevFile    *os.File  // previous day's router-<date>.log during grace period (may be nil)
+	currentDate string    // "2006-01-02"
+	prevDate    string    // previous date during grace period
+	graceUntil  time.Time // when to close prevFile
+	retryAfter  time.Time // earliest time to retry a failed rotation (backoff)
+	logDir      string
+}
+
+func newRotatingWriter(logDir string) (*rotatingWriter, error) {
+	today := nowFunc().Format("2006-01-02")
+	datePath := filepath.Join(logDir, "router-"+today+".log")
+	symlinkPath := filepath.Join(logDir, "router.log")
+
+	// Migration: if router.log is a regular file (legacy), rename it to the date file.
+	if info, err := os.Lstat(symlinkPath); err == nil && info.Mode().IsRegular() {
+		os.Rename(symlinkPath, datePath)
+	}
+
+	// Open the date file (append mode).
+	f, err := os.OpenFile(datePath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
+	if err != nil {
+		return nil, err
+	}
+
+	// Create/update symlink: router.log -> router-<today>.log
+	if err := updateSymlink(symlinkPath, "router-"+today+".log"); err != nil {
+		fmt.Fprintf(os.Stderr, "[WARN] Symlink %s may be stale: %v\n", symlinkPath, err)
+	}
+
+	return &rotatingWriter{
+		currentFile: f,
+		currentDate: today,
+		logDir:      logDir,
+	}, nil
+}
+
+// needsRotate checks if rotation is needed under the lock.
+func (w *rotatingWriter) needsRotate(today string) (bool, string) {
+	w.mu.Lock()
+	defer w.mu.Unlock()
+	needs := today != w.currentDate && (w.retryAfter.IsZero() || !nowFunc().Before(w.retryAfter))
+	return needs, w.logDir
+}
+
+// tryOpenRotateFile checks if rotation is needed and pre-opens the new log file
+// outside the lock to avoid blocking other writers on slow file I/O.
+func (w *rotatingWriter) tryOpenRotateFile(today string) *os.File {
+	needs, logDir := w.needsRotate(today)
+	if !needs {
+		return nil
+	}
+
+	datePath := filepath.Join(logDir, "router-"+today+".log")
+	f, err := os.OpenFile(datePath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "[ERROR] Failed to open new log file %s: %v, keeping current file\n", datePath, err)
+		return nil
+	}
+	return f
+}
+
+func (w *rotatingWriter) Write(p []byte) (n int, err error) {
+	today := nowFunc().Format("2006-01-02")
+
+	// Pre-open new file outside the lock to reduce lock-held I/O time.
+	preOpened := w.tryOpenRotateFile(today)
+
+	w.mu.Lock()
+	defer w.mu.Unlock()
+
+	// Authoritative rotation check under lock.
+	if today != w.currentDate && (w.retryAfter.IsZero() || !nowFunc().Before(w.retryAfter)) {
+		if preOpened != nil {
+			w.commitRotate(today, preOpened)
+			preOpened = nil // ownership transferred
+		} else {
+			// File open failed; set backoff so we don't retry on every Write.
+			w.retryAfter = nowFunc().Add(30 * time.Second)
+		}
+	}
+	// If another goroutine already rotated, close the unused pre-opened file.
+	if preOpened != nil {
+		preOpened.Close()
+	}
+
+	// Close previous file if grace period expired.
+	if w.prevFile != nil && nowFunc().After(w.graceUntil) {
+		w.prevFile.Close()
+		w.prevFile = nil
+		w.prevDate = ""
+	}
+
+	// During grace period, route log lines to the correct file based on timestamp.
+	target := w.currentFile
+	if w.prevFile != nil {
+		if logDate := parseLogDate(p); logDate == w.prevDate {
+			target = w.prevFile
+		}
+	}
+
+	return target.Write(p)
+}
+
+func (w *rotatingWriter) Close() error {
+	w.mu.Lock()
+	defer w.mu.Unlock()
+	if w.prevFile != nil {
+		w.prevFile.Close()
+		w.prevFile = nil
+	}
+	if w.currentFile != nil {
+		return w.currentFile.Close()
+	}
+	return nil
+}
+
+// commitRotate finalises the rotation with a pre-opened file. Must be called with w.mu held.
+func (w *rotatingWriter) commitRotate(newDate string, f *os.File) {
+	// Rotation succeeded — clear any retry backoff.
+	w.retryAfter = time.Time{}
+
+	// Close any lingering previous file.
+	if w.prevFile != nil {
+		w.prevFile.Close()
+		w.prevFile = nil
+	}
+
+	// Keep the old date file open for grace period writes.
+	w.prevFile = w.currentFile
+	w.prevDate = w.currentDate
+	w.graceUntil = nowFunc().Add(gracePeriod)
+
+	w.currentFile = f
+	w.currentDate = newDate
+
+	// Update symlink: router.log -> router-<newDate>.log
+	symlinkPath := filepath.Join(w.logDir, "router.log")
+	if err := updateSymlink(symlinkPath, "router-"+newDate+".log"); err != nil {
+		fmt.Fprintf(os.Stderr, "[WARN] Symlink %s may be stale (points to old date): %v\n", symlinkPath, err)
+	}
+}
+
+// updateSymlink atomically replaces symlinkPath to point to target.
+// It tries os.Remove + os.Symlink first; if remove fails (e.g. permission denied)
+// it falls back to a temp-symlink + os.Rename for an atomic swap attempt.
+func updateSymlink(symlinkPath, target string) error {
+	// Fast path: remove old, create new.
+	if err := os.Remove(symlinkPath); err != nil && !os.IsNotExist(err) {
+		// Remove failed (e.g. permission issue). Try atomic rename as fallback.
+		tmp := symlinkPath + ".tmp"
+		if err2 := os.Symlink(target, tmp); err2 != nil {
+			return fmt.Errorf("remove old symlink: %w; create temp symlink: %v", err, err2)
+		}
+		if err2 := os.Rename(tmp, symlinkPath); err2 != nil {
+			os.Remove(tmp) // best-effort cleanup
+			return fmt.Errorf("remove old symlink: %w; rename temp symlink: %v", err, err2)
+		}
+		return nil
+	}
+	if err := os.Symlink(target, symlinkPath); err != nil {
+		return fmt.Errorf("create symlink: %w", err)
+	}
+	return nil
+}
+
+// parseLogDate extracts the date from a log line produced by log.LstdFlags.
+// Format: "[LEVEL] 2006/01/02 15:04:05 ..."
+// Returns "2006-01-02" or empty string on parse failure.
+func parseLogDate(p []byte) string {
+	// Find the date pattern "YYYY/MM/DD" in the log prefix.
+	// log.LstdFlags produces: "2006/01/02 15:04:05" after the logger prefix.
+	// The prefix is like "[INFO] " (7 chars), so the date starts around index 7.
+	s := string(p)
+	for i := 0; i+10 <= len(s); i++ {
+		c := s[i]
+		if c >= '0' && c <= '9' && i+10 <= len(s) && s[i+4] == '/' && s[i+7] == '/' {
+			// Found a candidate "YYYY/MM/DD" — validate it.
+			year := s[i : i+4]
+			month := s[i+5 : i+7]
+			day := s[i+8 : i+10]
+			if !isAllDigits(month) || !isAllDigits(day) {
+				continue
+			}
+			m := (month[0]-'0')*10 + (month[1] - '0')
+			d := (day[0]-'0')*10 + (day[1] - '0')
+			if m < 1 || m > 12 || d < 1 || d > 31 {
+				continue
+			}
+			_ = year // year already starts with a digit; any 4-digit year is acceptable
+			return year + "-" + month + "-" + day
+		}
+	}
+	return ""
+}
+
+// isAllDigits returns true if every byte in s is an ASCII digit.
+func isAllDigits(s string) bool {
+	for i := 0; i < len(s); i++ {
+		if s[i] < '0' || s[i] > '9' {
+			return false
+		}
+	}
+	return true
+}
 
+// Init initializes the logger.
+func Init(cfg Config) {
+	once.Do(func() {
+		level = cfg.Level
 		flags := log.LstdFlags | log.Lshortfile
 
-		if output == "file" {
-			// Check if logs directory exists
-			if _, err := os.Stat("logs"); os.IsNotExist(err) {
-				if err := os.MkdirAll("logs", 0755); err != nil {
+		if cfg.Output == "file" {
+			logDir := cfg.Dir
+			if logDir == "" {
+				logDir = "logs"
+			}
+			if _, err := os.Stat(logDir); os.IsNotExist(err) {
+				if err := os.MkdirAll(logDir, 0755); err != nil {
 					log.Fatalln("Failed to create logs directory:", err)
 				}
 			}
-			logFile, err := os.OpenFile("logs/router.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
+			var err error
+			writer, err = newRotatingWriter(logDir)
 			if err != nil {
-				log.Fatalln("Failed to open log file:", err)
+				log.Fatalln("Failed to create rotating log writer:", err)
 			}
-			infoLogger = log.New(logFile, "[INFO] ", flags)
-			errorLogger = log.New(logFile, "[ERROR] ", flags)
-			warnLogger = log.New(logFile, "[WARN] ", flags)
-			debugLogger = log.New(logFile, "[DEBUG] ", flags)
+			infoLogger = log.New(writer, "[INFO] ", flags)
+			errorLogger = log.New(writer, "[ERROR] ", flags)
+			warnLogger = log.New(writer, "[WARN] ", flags)
+			debugLogger = log.New(writer, "[DEBUG] ", flags)
 		} else {
 			infoLogger = log.New(os.Stdout, "[INFO] ", flags)
 			errorLogger = log.New(os.Stderr, "[ERROR] ", flags)
@@ -54,9 +292,134 @@ func Init(logLevel, output string) {
 	})
 }
 
+// CloseLogFile closes the log file if in file output mode.
 func CloseLogFile() {
-	if logFile != nil {
-		logFile.Close()
+	if writer != nil {
+		writer.Close()
+	}
+}
+
+// StartLogCleanup blocks running periodic log cleanup; call it in a goroutine.
+// It deletes archived log files older than MaxAgeDays and trims total log size
+// to stay under MaxTotalSizeMB.
+func StartLogCleanup(ctx context.Context, cfg Config) {
+	if cfg.Output != "file" {
+		return
+	}
+	if cfg.CleanupIntervalSecs <= 0 {
+		return
+	}
+
+	logDir := cfg.Dir
+	if logDir == "" {
+		logDir = "logs"
+	}
+
+	ticker := time.NewTicker(time.Duration(cfg.CleanupIntervalSecs * float64(time.Second)))
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-ticker.C:
+			cleanupLogs(logDir, cfg.MaxAgeDays, cfg.MaxTotalSizeMB)
+		}
+	}
+}
+
+type logFileInfo struct {
+	name string
+	path string
+	date time.Time
+	size int64
+}
+
+// cleanupLogs removes archived log files based on age and total size limits.
+func cleanupLogs(logDir string, maxAgeDays, maxTotalSizeMB int) {
+	entries, err := os.ReadDir(logDir)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "[WARN] Failed to read log directory for cleanup: %v\n", err)
+		return
+	}
+
+	now := nowFunc()
+	today := now.Format("2006-01-02")
+	var archives []logFileInfo
+
+	for _, entry := range entries {
+		if entry.IsDir() {
+			continue
+		}
+		name := entry.Name()
+
+		// router.log is now a symlink; skip it.
+		if name == "router.log" {
+			continue
+		}
+
+		// Match archived files: router-YYYY-MM-DD.log
+		if !strings.HasPrefix(name, "router-") || !strings.HasSuffix(name, ".log") {
+			continue
+		}
+		dateStr := strings.TrimPrefix(name, "router-")
+		dateStr = strings.TrimSuffix(dateStr, ".log")
+		fileDate, err := time.Parse("2006-01-02", dateStr)
+		if err != nil {
+			continue
+		}
+		// Never delete today's active date file.
+		if dateStr == today {
+			continue
+		}
+		info, err := entry.Info()
+		if err != nil {
+			continue
+		}
+		archives = append(archives, logFileInfo{
+			name: name,
+			path: filepath.Join(logDir, name),
+			date: fileDate,
+			size: info.Size(),
+		})
+	}
+
+	// Sort by date ascending (oldest first).
+	sort.Slice(archives, func(i, j int) bool {
+		return archives[i].date.Before(archives[j].date)
+	})
+
+	// Phase 1: Age-based cleanup.
+	if maxAgeDays > 0 {
+		cutoff := now.AddDate(0, 0, -maxAgeDays)
+		remaining := archives[:0]
+		for _, f := range archives {
+			if f.date.Before(cutoff) {
+				if err := os.Remove(f.path); err != nil {
+					fmt.Fprintf(os.Stderr, "[ERROR] Failed to remove log file %s: %v\n", f.path, err)
+				}
+			} else {
+				remaining = append(remaining, f)
+			}
+		}
+		archives = remaining
+	}
+
+	// Phase 2: Size-based cleanup.
+	if maxTotalSizeMB > 0 {
+		maxBytes := int64(maxTotalSizeMB) * 1024 * 1024
+		var totalSize int64
+		for _, f := range archives {
+			totalSize += f.size
+		}
+		for len(archives) > 0 && totalSize > maxBytes {
+			oldest := archives[0]
+			if err := os.Remove(oldest.path); err != nil {
+				fmt.Fprintf(os.Stderr, "[ERROR] Failed to remove log file %s: %v\n", oldest.path, err)
+			}
+			totalSize -= oldest.size
+			archives = archives[1:]
+		}
 	}
 }
 
diff --git a/fastdeploy/golang_router/pkg/logger/logger_test.go b/fastdeploy/golang_router/pkg/logger/logger_test.go
index 59faeee2a4d..1d9874ded6f 100644
--- a/fastdeploy/golang_router/pkg/logger/logger_test.go
+++ b/fastdeploy/golang_router/pkg/logger/logger_test.go
@@ -4,13 +4,15 @@ import (
 	"bytes"
 	"context"
 	"os"
+	"path/filepath"
 	"strings"
 	"testing"
+	"time"
 )
 
 func TestLoggerInit(t *testing.T) {
 	t.Run("stdout output", func(t *testing.T) {
-		Init("debug", "stdout")
+		Init(Config{Level: "debug", Output: "stdout"})
 
 		if infoLogger == nil || errorLogger == nil || warnLogger == nil || debugLogger == nil {
 			t.Error("Loggers should be initialized")
@@ -24,7 +26,7 @@ func TestLoggerInit(t *testing.T) {
 		defer os.RemoveAll("logs")
 
 		// sync.Once prevents re-init, so manually verify file creation logic
-		f, err := os.OpenFile("logs/router.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
+		f, err := os.OpenFile("logs/router.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
 		if err != nil {
 			t.Fatalf("Failed to create log file: %v", err)
 		}
@@ -117,7 +119,7 @@ func TestLogLevels(t *testing.T) {
 
 func TestLogFunctions(t *testing.T) {
 	var buf bytes.Buffer
-	Init("debug", "stdout")
+	Init(Config{Level: "debug", Output: "stdout"})
 	level = "debug"
 
 	// Redirect output
@@ -132,7 +134,7 @@ func TestLogFunctions(t *testing.T) {
 }
 
 func TestContextPrefix(t *testing.T) {
-	Init("debug", "stdout")
+	Init(Config{Level: "debug", Output: "stdout"})
 	level = "debug"
 
 	t.Run("nil context produces no prefix", func(t *testing.T) {
@@ -151,7 +153,7 @@ func TestContextPrefix(t *testing.T) {
 		}
 	})
 
-	t.Run("context without request_id produces [request_id:null]", func(t *testing.T) {
+	t.Run("context without request_id produces no request_id prefix", func(t *testing.T) {
 		var buf bytes.Buffer
 		oldOutput := infoLogger.Writer()
 		defer func() { infoLogger.SetOutput(oldOutput) }()
@@ -160,8 +162,11 @@ func TestContextPrefix(t *testing.T) {
 		ctx := context.Background()
 		Info(ctx, "mixed mode log")
 		output := buf.String()
-		if !strings.Contains(output, "[request_id:null]") {
-			t.Errorf("context without request_id should produce [request_id:null], got: %s", output)
+		if strings.Contains(output, "[request_id:") {
+			t.Errorf("context without request_id should not produce request_id prefix, got: %s", output)
+		}
+		if !strings.Contains(output, "mixed mode log") {
+			t.Errorf("message should be present, got: %s", output)
 		}
 	})
 
@@ -179,3 +184,176 @@ func TestContextPrefix(t *testing.T) {
 		}
 	})
 }
+
+func TestParseLogDate(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{"standard INFO log line", "[INFO] 2024/03/15 10:30:45 some message", "2024-03-15"},
+		{"standard ERROR log line", "[ERROR] 2024/01/02 09:00:00 error occurred", "2024-01-02"},
+		{"standard WARN log line", "[WARN] 2025/12/31 23:59:59 warning msg", "2025-12-31"},
+		{"standard DEBUG log line", "[DEBUG] 2024/06/01 00:00:00 debug info", "2024-06-01"},
+		{"empty string", "", ""},
+		{"no date pattern", "no date here at all", ""},
+		{"incomplete date - only year", "2024/", ""},
+		{"incomplete date - year and month", "[INFO] 2024/03", ""},
+		{"short input", "abc", ""},
+		{"date without log prefix", "2024/03/15 10:30:45 message", "2024-03-15"},
+		{"date at different position", "prefix 2024/11/20 rest", "2024-11-20"},
+		{"slash but not date", "path/to/file is not a date", ""},
+		{"single character input", "x", ""},
+		{"exactly 10 chars non-date", "abcdefghij", ""},
+		{"boundary - first day of year", "[INFO] 2024/01/01 00:00:00 new year", "2024-01-01"},
+		{"boundary - last day of year", "[INFO] 2024/12/31 23:59:59 year end", "2024-12-31"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := parseLogDate([]byte(tt.input))
+			if got != tt.expected {
+				t.Errorf("parseLogDate(%q) = %q, want %q", tt.input, got, tt.expected)
+			}
+		})
+	}
+}
+
+func TestStartLogCleanup(t *testing.T) {
+	t.Run("cleanup runs for file output and respects cancellation", func(t *testing.T) {
+		tmpDir := t.TempDir()
+
+		originalNowFunc := nowFunc
+		fixedNow := time.Date(2026, 4, 10, 12, 0, 0, 0, time.UTC)
+		nowFunc = func() time.Time { return fixedNow }
+		defer func() { nowFunc = originalNowFunc }()
+
+		// Create archived logs: one older than 1 day and one recent.
+		oldLog := filepath.Join(tmpDir, "router-2026-04-07.log")
+		recentLog := filepath.Join(tmpDir, "router-2026-04-09.log")
+		todayLog := filepath.Join(tmpDir, "router-2026-04-10.log")
+		for _, p := range []string{oldLog, recentLog, todayLog} {
+			if err := os.WriteFile(p, []byte("test"), 0644); err != nil {
+				t.Fatalf("failed to create test log %s: %v", p, err)
+			}
+		}
+
+		ctx, cancel := context.WithCancel(context.Background())
+		done := make(chan struct{})
+		go func() {
+			defer close(done)
+			StartLogCleanup(ctx, Config{
+				Output:              "file",
+				Dir:                 tmpDir,
+				MaxAgeDays:          2,
+				CleanupIntervalSecs: 0.01,
+			})
+		}()
+
+		waitForCondition(t, 500*time.Millisecond, func() bool {
+			_, err := os.Stat(oldLog)
+			return os.IsNotExist(err)
+		}, "old log should be removed by StartLogCleanup")
+
+		if _, err := os.Stat(recentLog); err != nil {
+			t.Fatalf("recent log should be kept, stat err: %v", err)
+		}
+		if _, err := os.Stat(todayLog); err != nil {
+			t.Fatalf("today log should be kept, stat err: %v", err)
+		}
+
+		cancel()
+		select {
+		case <-done:
+		case <-time.After(500 * time.Millisecond):
+			t.Fatal("StartLogCleanup did not stop after context cancellation")
+		}
+	})
+
+	t.Run("non-file output returns immediately", func(t *testing.T) {
+		done := make(chan struct{})
+		go func() {
+			defer close(done)
+			StartLogCleanup(context.Background(), Config{Output: "stdout", CleanupIntervalSecs: 1})
+		}()
+		select {
+		case <-done:
+		case <-time.After(200 * time.Millisecond):
+			t.Fatal("StartLogCleanup should return immediately for non-file output")
+		}
+	})
+}
+
+func TestRotatingWriterCrossDayGracePeriodIntegration(t *testing.T) {
+	tmpDir := t.TempDir()
+
+	originalNowFunc := nowFunc
+	defer func() { nowFunc = originalNowFunc }()
+
+	current := time.Date(2026, 4, 10, 23, 59, 59, 0, time.UTC)
+	nowFunc = func() time.Time { return current }
+
+	w, err := newRotatingWriter(tmpDir)
+	if err != nil {
+		t.Fatalf("failed to create rotating writer: %v", err)
+	}
+	defer w.Close()
+
+	if _, err = w.Write([]byte("[INFO] 2026/04/10 23:59:59 first day line\n")); err != nil {
+		t.Fatalf("failed to write day-1 line: %v", err)
+	}
+
+	current = time.Date(2026, 4, 11, 0, 0, 1, 0, time.UTC)
+	if _, err = w.Write([]byte("[INFO] 2026/04/11 00:00:01 second day line\n")); err != nil {
+		t.Fatalf("failed to write day-2 line: %v", err)
+	}
+
+	if _, err = w.Write([]byte("[INFO] 2026/04/10 23:59:58 late previous-day line\n")); err != nil {
+		t.Fatalf("failed to write late previous-day line: %v", err)
+	}
+
+	day1Bytes, err := os.ReadFile(filepath.Join(tmpDir, "router-2026-04-10.log"))
+	if err != nil {
+		t.Fatalf("failed to read day-1 log: %v", err)
+	}
+	day1Content := string(day1Bytes)
+	if !strings.Contains(day1Content, "first day line") {
+		t.Fatalf("day-1 log missing initial line, content: %s", day1Content)
+	}
+	if !strings.Contains(day1Content, "late previous-day line") {
+		t.Fatalf("day-1 log missing late previous-day line, content: %s", day1Content)
+	}
+
+	day2Bytes, err := os.ReadFile(filepath.Join(tmpDir, "router-2026-04-11.log"))
+	if err != nil {
+		t.Fatalf("failed to read day-2 log: %v", err)
+	}
+	day2Content := string(day2Bytes)
+	if !strings.Contains(day2Content, "second day line") {
+		t.Fatalf("day-2 log missing day-2 line, content: %s", day2Content)
+	}
+	if strings.Contains(day2Content, "late previous-day line") {
+		t.Fatalf("late previous-day line should not be in day-2 file, content: %s", day2Content)
+	}
+
+	symlinkTarget, err := os.Readlink(filepath.Join(tmpDir, "router.log"))
+	if err != nil {
+		t.Fatalf("failed to read symlink: %v", err)
+	}
+	if symlinkTarget != "router-2026-04-11.log" {
+		t.Fatalf("router.log symlink target = %s, want router-2026-04-11.log", symlinkTarget)
+	}
+}
+
+func waitForCondition(t *testing.T, timeout time.Duration, cond func() bool, msg string) {
+	t.Helper()
+
+	deadline := time.Now().Add(timeout)
+	for time.Now().Before(deadline) {
+		if cond() {
+			return
+		}
+		time.Sleep(10 * time.Millisecond)
+	}
+	t.Fatal(msg)
+}