Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions astrbot/core/provider/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,20 +601,18 @@ async def load_provider(self, provider_config: dict) -> None:
f"加载 {provider_config['type']}({provider_config['id']}) 提供商适配器失败:{e}。可能是因为有未安装的依赖。",
exc_info=True,
)
return
raise

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question (bug_risk): Re-raising here changes the behavior from ‘best-effort load all providers’ to ‘fail on first problematic provider’.

Previously, a failed provider import/instantiation was logged and skipped so other providers could still load. Raising here means one bad provider can now abort the entire load (and possibly app startup). If this change is intentional, verify that callers expect and handle this exception. If not, consider keeping the failure scoped to the single provider while still surfacing enough detail for diagnostics/reporting.

except Exception as e:
logger.critical(
f"加载 {provider_config['type']}({provider_config['id']}) 提供商适配器失败:{e}。未知原因",
exc_info=True,
)
return
raise

if provider_config["type"] not in provider_cls_map:
logger.error(
f"Provider adapter not found: {provider_config['type']}({provider_config['id']}). Skipped.",
exc_info=True,
)
return
msg = f"Provider adapter not found: {provider_config['type']}({provider_config['id']})."
logger.error(msg, exc_info=True)
raise ValueError(msg)

provider_metadata = provider_cls_map[provider_config["type"]]
try:
Expand Down
108 changes: 105 additions & 3 deletions astrbot/core/provider/sources/sensevoice_selfhosted_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""

import asyncio
import os
import re
from typing import cast

Expand All @@ -22,6 +23,11 @@
"sensevoice_stt_selfhost",
"SenseVoice 自托管语音识别 模型部署",
provider_type=ProviderType.SPEECH_TO_TEXT,
default_config_tmpl={
"id": "sensevoice",
"stt_model": "iic/SenseVoiceSmall",
"is_emotion": False,
},
)
class ProviderSenseVoiceSTTSelfHost(STTProvider):
def __init__(
Expand All @@ -37,14 +43,110 @@ def __init__(
async def initialize(self) -> None:
logger.info("下载或者加载 SenseVoice 模型中,这可能需要一些时间 ...")

# 将模型加载放到线程池中执行
def _load_model():
try:
return SenseVoiceSmall(self.model_name, quantize=True, batch_size=16)
except Exception as e:
err_str = str(e)
if "Type parameter (T) of Optype (Less) bound to different types" in err_str:
logger.info("检测到 ONNX 导出类型不匹配,正在修复导出的模型文件 ...")
self._fix_onnx_less_type_mismatch()

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

在调用 _fix_onnx_less_type_mismatch 时,建议传入 self.model_name,以便将修复范围限制在当前模型的缓存目录下,避免扫描整个 ModelScope 缓存目录导致性能低下或误伤其他模型。

Suggested change
self._fix_onnx_less_type_mismatch()
self._fix_onnx_less_type_mismatch(self.model_name)

# 重试加载,此时 model_quant.onnx 已被修复
return SenseVoiceSmall(
self.model_name, quantize=True, batch_size=16
)
raise

self.model = await asyncio.get_running_loop().run_in_executor(
None,
lambda: SenseVoiceSmall(self.model_name, quantize=True, batch_size=16),
None, _load_model,
)

logger.info("SenseVoice 模型加载完成。")

@staticmethod
def _fix_onnx_less_type_mismatch() -> None:
"""修复 ONNX 导出时 Less 节点类型不匹配的问题。

在 model_quant.onnx 中,arange 输出 FLOAT,但 Less 的第二个输入
convert_element_type_default 输出 INT64,导致 Less 的 T 参数冲突。
在 arange 后插入 Cast 节点转为 INT64。
"""
import onnx
from onnx import helper, TensorProto

cache_dir = os.path.expanduser(
os.path.join("~", ".cache", "modelscope", "hub")
)
model_quant_path = None
Comment on lines +66 to +80

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

_fix_onnx_less_type_mismatch 改为接收 model_name 参数,并将 cache_dir 限制在 ~/.cache/modelscope/hub/{model_name} 下。

原因

  1. 性能提升:如果用户缓存了大量其他模型,扫描整个 hub 目录会非常缓慢。
  2. 安全性/正确性:避免误扫描并修改其他模型中可能存在的 model_quant.onnx 文件,导致其他模型损坏。
Suggested change
@staticmethod
def _fix_onnx_less_type_mismatch() -> None:
"""修复 ONNX 导出时 Less 节点类型不匹配的问题
model_quant.onnx arange 输出 FLOAT Less 的第二个输入
convert_element_type_default 输出 INT64导致 Less T 参数冲突
arange 后插入 Cast 节点转为 INT64
"""
import onnx
from onnx import helper, TensorProto
cache_dir = os.path.expanduser(
os.path.join("~", ".cache", "modelscope", "hub")
)
model_quant_path = None
@staticmethod
def _fix_onnx_less_type_mismatch(model_name: str) -> None:
"""修复 ONNX 导出时 Less 节点类型不匹配的问题
model_quant.onnx arange 输出 FLOAT Less 的第二个输入
convert_element_type_default 输出 INT64导致 Less T 参数冲突
arange 后插入 Cast 节点转为 INT64
"""
import onnx
from onnx import helper, TensorProto
cache_dir = os.path.expanduser(
os.path.join("~", ".cache", "modelscope", "hub", model_name)
)

for root, _dirs, files in os.walk(cache_dir):
Comment on lines +77 to +81

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (bug_risk): The ONNX fix searches the entire modelscope cache and patches the first model_quant.onnx, which might not belong to this provider.

Because this walks the entire ~/.cache/modelscope/hub tree and takes the first model_quant.onnx, it may patch an unrelated model when multiple models (or SenseVoice variants) are cached. Please restrict the search to a path derived from self.model_name / known SenseVoice locations, or add a verification step (e.g., checking metadata or filename) before applying the patch.

if "model_quant.onnx" in files:
model_quant_path = os.path.join(root, "model_quant.onnx")
break

if not model_quant_path or not os.path.exists(model_quant_path):
logger.error(
"未找到 model_quant.onnx,无法修复 ONNX 类型不匹配。"
)
return

model = onnx.load(model_quant_path)
graph = model.graph

# 找到 arange 输出节点和 Less 节点
less_node = None
arange_output = None
for node in graph.node:
if node.op_type == "Less":
less_node = node
# Less 的第二个输入是 arange 输出
arange_output = node.input[1]
break

if less_node is None:
logger.info("未找到 Less 节点,无需修复。")
return
Comment on lines +95 to +107

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

直接获取图中的第一个 Less 节点是非常脆弱的。如果模型中存在其他 Less 节点,这可能会导致定位错误,甚至因插入错误的 Cast 节点而损坏模型。

建议通过检查 Less 节点的输入来源(例如其第二个输入是否由 RangeArange 节点生成)来精确、安全地定位需要修复的节点。同时,在此处记录 less_node_idx,以便后续直接插入,避免重复查找。

Suggested change
# 找到 arange 输出节点和 Less 节点
less_node = None
arange_output = None
for node in graph.node:
if node.op_type == "Less":
less_node = node
# Less 的第二个输入是 arange 输出
arange_output = node.input[1]
break
if less_node is None:
logger.info("未找到 Less 节点,无需修复。")
return
# 找到 arange 输出节点和 Less 节点
less_node_idx = -1
arange_output = None
for idx, node in enumerate(graph.node):
if node.op_type == "Less":
second_input = node.input[1]
# 检查该输入的生成节点是否为 Range/Arange,确保定位准确
producer = next((n for n in graph.node if second_input in n.output), None)
if producer and producer.op_type in ("Range", "Arange"):
less_node_idx = idx
arange_output = second_input
break
if less_node_idx == -1:
logger.info("未找到需要修复的 Less 节点。")
return


# 检查 arange_output 的类型
arange_output_tensor = None
for vi in graph.value_info:
if vi.name == arange_output:
arange_output_tensor = vi
break

if arange_output_tensor is None:
# 也可能是 graph.input
for vi in graph.input:
if vi.name == arange_output:
arange_output_tensor = vi
break

if arange_output_tensor is None:
logger.info("无法找到 arange 输出 tensor 信息,跳过修复。")
return

# 创建 cast_name
cast_output_name = arange_output + "_cast_int64"

# 插入 Cast 节点:将 FLOAT 转为 INT64
cast_node = helper.make_node(
"Cast",
inputs=[arange_output],
outputs=[cast_output_name],
name=arange_output + "_to_int64",
to=TensorProto.INT64,
)

# 修改 Less 节点的第二个输入为 cast 后的输出
less_node.input[1] = cast_output_name

# 将 Cast 节点插入到 Less 节点之前
graph.node.insert(
list(graph.node).index(less_node), cast_node
)
Comment on lines +142 to +145

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

使用先前记录的 less_node_idx 直接插入 Cast 节点,避免使用 list(graph.node).index(less_node) 进行昂贵的列表转换和线性查找。

        # 将 Cast 节点插入到 Less 节点之前
        graph.node.insert(less_node_idx, cast_node)


onnx.save(model, model_quant_path)
logger.info("ONNX 模型文件已修复并保存。")

async def get_text(self, audio_url: str) -> str:
try:
# 使用 run_in_executor 来调用模型进行识别
Expand Down
15 changes: 14 additions & 1 deletion astrbot/dashboard/services/config_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -1648,7 +1648,20 @@ async def delete_provider_from_dashboard_payload(self, payload: object) -> str:
async def test_provider(self, provider_id: str) -> dict:
target = self.provider_manager.inst_map.get(provider_id)
if not target:
raise ValueError(f"Provider {provider_id} not found")
# 检查配置中是否存在,帮助用户区分"从未添加"和"加载失败"
config_exists = any(
p.get("id") == provider_id
for p in self.config.get("provider", [])
)
if config_exists:
raise ValueError(
f"Provider {provider_id} 已在配置中但加载失败,"
"请检查 AstrBot 启动日志中的错误信息。"
)
raise ValueError(
f"Provider {provider_id} 未在配置中找到,"
"请先在 Provider 页面添加该服务提供商。"
)
meta = target.meta()
provider_type = getattr(meta, "provider_type", None)
result = {
Expand Down
127 changes: 127 additions & 0 deletions sensevoice_issue.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
## SenseVoice STT Provider: 配置后显示"not found"且缺少依赖处理

### 问题描述

在 Dashboard 中启用 SenseVoice STT 语音识别后,测试/检查 Provider 状态时显示:

```
Provider with id 'sensevoice' not found in provider_manager.
```

但实际上 `sensevoice_selfhosted_source.py` 源码文件是存在的。

### 复现步骤

**场景一(仅设置 STT 开关,未在 Provider 页添加):**

1. 打开 Dashboard → 设置页 → 语音识别选项卡
2. 启用 STT,填写 `provider_id: "sensevoice"`
3. 切换到 Provider 页 → Speech-to-Text 选项卡
4. Provider 列表中不存在 sensevoice
5. 点击 Test → 显示 "not found"

**场景二(在 Provider 页添加了,但缺依赖):**

1. 打开 Dashboard → Provider 页 → Speech-to-Text 选项卡
2. 点击 + 添加 Provider,选择 SenseVoice(Local)
3. 填写配置并保存 → 前端显示"添加成功"
4. 点击 Test → 仍显示 "not found"

### 根因分析

#### 1. 关键依赖未声明(需要手动安装)

`sensevoice_selfhosted_source.py` 顶部直接 import:

```python
from funasr_onnx import SenseVoiceSmall
```

但以下依赖均未列在项目的 `requirements.txt` / `pyproject.toml` 中:

| 依赖 | 用途 |
|------|------|
| `funasr_onnx` | ONNX 推理引擎 |
| `torch` | PyTorch 模型加载(ONNX 导出步骤需要) |
| `modelscope` | 从 ModelScope 下载模型 |
| `funasr` | 完整 funasr 库(ONNX 导出依赖) |
| `torchaudio` | funasr 间接依赖 |
| `onnxscript` | `torch.onnx` 导出需要 |

用户安装 AstrBot 时不会安装这些包,需要用户自行猜测并手动 pip install。

#### 2. `load_provider()` 加载失败时静默吞异常

`create_provider()` 流程:

```
用户点"添加" → 配置写入 cmd_config.json ✅ → load_provider() → import funasr_onnx 失败 ❌
异常被捕获,只打了一行 log
provider 不加入 inst_map
前端显示"添加成功"(返回了 200 OK)
```

`load_provider()` 内部的 import 错误被捕获后既不向上抛异常,也不给前端返回错误信息。用户看到的是"添加成功",但 provider 实际上没有被加载到内存。

`post_new_provider` 的代码路径:

```python
async def post_new_provider(self):
new_provider_config = await request.json
try:
await self.core_lifecycle.provider_manager.create_provider(new_provider_config)
except Exception as e:
return Response().error(str(e)).__dict__ # ← 只有这里会报错
return Response().ok(None, "新增服务提供商配置成功").__dict__
```

但 `create_provider()` 调用的 `load_provider()` 内部捕获了异常却没有 re-raise,所以 `post_new_provider` 永远走不到 except 分支。

#### 3. `check_one` 无法区分失败原因

`check_one_provider_status()` 只查 `inst_map.get(provider_id)`:

```python
target = prov_mgr.inst_map.get(provider_id)
if not target:
return Response().error(f"Provider with id '{provider_id}' not found").__dict__
```

它无法区分三种情况:
- Provider 从未被添加(配置里就没有)
- Provider 添加了但加载失败(import error / 缺依赖)
- Provider 初始化失败(模型下载失败、ONNX 导出错误等)

统一报 "not found",对用户没有任何排查帮助。

#### 4. ONNX 导出模型类型不匹配(依赖齐全后仍会遇到)

安装完所有依赖后,`SenseVoiceSmall(model_name, quantize=True)` 初始化时执行 ONNX 导出会出现:

```
Type Error: Type parameter (T) of Optype (Less) bound to different types
```

根因:导出的 `model_quant.onnx` 中有一个 `Less` 节点,其输入 `arange` 输出类型为 FLOAT(elem_type 1),但 `convert_element_type_default` 输出类型为 INT64(elem_type 7),导致 `Less` 节点的类型参数 `T` 绑定冲突。需要在 ONNX 图中插入 Cast 节点修复。

#### 5. Provider 配置流程存在断裂

STT 设置页的 `provider_stt_settings.provider_id` 和 Provider 页的 `provider` 列表是两个独立的功能。用户可能在设置页直接填写了 `provider_id: "sensevoice"`,但从未在 Provider 页添加过对应的 provider 条目。两者之间缺少联动检查或引导。

### 建议修复

1. **`sensevoice_selfhosted_source.py`**: 补充 `default_config_tmpl` 参数
2. **`pyproject.toml` / `requirements.txt`**: 将 `funasr_onnx` 及其依赖列为可选依赖(extra / optional)
3. **`provider/manager.py` `load_provider`**: 加载失败时向上抛异常或通过回调通知前端,而不是静默吞掉
4. **`provider/manager.py` / `check_one_provider_status`**: 在 provider 记录中保存加载错误信息,`check_one` 时一并返回,而不是笼统报 "not found"
5. **`sensevoice_selfhosted_source.py` `initialize()`**: ONNX 导出后自动修复类型不匹配(或改为直接使用 PyTorch 推理跳过 ONNX 导出)
6. **Dashboard 交互**: 配置页的 provider 选择器和 Provider 页之间增加联动,provider 不存在时给出明确引导

### 环境

- AstrBot 版本: v4.25.5
- 操作系统: Windows 11
- Python: 3.12