Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ class TextApiConfig(BaseModel):
base_url: str = Field(..., description="文本模型 API 基础 URL")
model: str = Field("gemini-3-pro-preview", description="文本模型名称")
format: Literal["gemini", "openai"] = Field("gemini", description="API 格式")
thinking_level: Optional[Literal["low", "high"]] = Field(None, description="思考深度")
thinking: Literal["enabled", "disabled"] = Field("disabled", description="思考模式")
thinking_level: Optional[Literal["low", "high"]] = Field(None, description="旧版 Gemini 思考深度")


class ModelProfileConfig(BaseModel):
Expand All @@ -43,6 +44,7 @@ class ModelProfileConfig(BaseModel):
base_url: str = Field(..., description="OpenAI-compatible Base URL")
api_key: str = Field(..., description="API Key")
adapter: str = Field("openai_chat", description="适配器")
thinking: Optional[Literal["enabled", "disabled"]] = Field(None, description="OpenAI-compatible thinking mode")


class ModelProfilesConfig(BaseModel):
Expand Down Expand Up @@ -121,6 +123,12 @@ def get_thinking_level(self) -> Optional[str]:
return self.text.thinking_level
return None

def get_thinking(self) -> str:
"""获取思考模式"""
if self.text:
return self.text.thinking
return "disabled"


class GenerationRequest(BaseModel):
"""生成请求"""
Expand Down
2 changes: 2 additions & 0 deletions api/profile_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def profiles_from_generation_config(config: Any) -> ModelProfileSet:
"base_url": config.text.base_url,
"api_key": config.text.api_key,
"adapter": "openai_chat",
"thinking": getattr(config.text, "thinking", "disabled"),
},
"image_model": {
"model": config.image.model,
Expand All @@ -72,6 +73,7 @@ def profiles_from_generation_config(config: Any) -> ModelProfileSet:
"base_url": config.get_text_base_url(),
"api_key": config.get_text_api_key(),
"adapter": "openai_chat",
"thinking": config.get_thinking(),
},
"image_model": {
"model": config.get_image_model(),
Expand Down
1 change: 1 addition & 0 deletions config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ api:
base_url: "https://api.example.com/v1"
model: "DeepSeek-V4-Pro"
adapter: "openai_chat"
thinking: "disabled" # 可选: "enabled", "disabled"
image_model:
api_key: "your-image-api-key"
base_url: "https://api.example.com/v1"
Expand Down
Binary file modified docs/assets/aippt-demo.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/assets/aippt-demo.webm
Binary file not shown.
12 changes: 8 additions & 4 deletions src/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,14 @@ def _generate_text_openai(self, prompt: str, system_instruction: str = None) ->

messages.append({"role": "user", "content": prompt})

response = self._openai_client.chat.completions.create(
model=self.config.text_model,
messages=messages
)
kwargs = {
"model": self.config.text_model,
"messages": messages,
}
if self.config.text_thinking in {"enabled", "disabled"}:
kwargs["extra_body"] = {"thinking": {"type": self.config.text_thinking}}
Comment on lines +103 to +104

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Passing extra_body with "thinking": {"type": "disabled"} by default when thinking is disabled will break compatibility with standard OpenAI-compatible endpoints that do not support the thinking parameter (resulting in a 400 Bad Request). We should only pass the thinking parameter in extra_body when it is explicitly set to "enabled".

Suggested change
if self.config.text_thinking in {"enabled", "disabled"}:
kwargs["extra_body"] = {"thinking": {"type": self.config.text_thinking}}
if self.config.text_thinking == "enabled":
kwargs["extra_body"] = {"thinking": {"type": "enabled"}}


response = self._openai_client.chat.completions.create(**kwargs)

return response.choices[0].message.content

Expand Down
5 changes: 4 additions & 1 deletion src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ class APIConfig:
text_model: str = None
text_base_url: str = None # 可选,单独的文本 API 地址
text_api_key: str = None # 可选,单独的文本 API 密钥
text_thinking_level: Optional[Literal["low", "high"]] = None # 思考深度
text_thinking: Optional[Literal["enabled", "disabled"]] = None # OpenAI-compatible 思考模式
text_thinking_level: Optional[Literal["low", "high"]] = None # 旧版 Gemini 思考深度

def __post_init__(self):
"""从配置文件加载默认值"""
Expand All @@ -80,6 +81,8 @@ def __post_init__(self):
self.text_base_url = text_config.get("base_url") # 可选
if self.text_api_key is None:
self.text_api_key = text_config.get("api_key") # 可选
if self.text_thinking is None:
self.text_thinking = text_config.get("thinking", "disabled")
if self.text_thinking_level is None:
self.text_thinking_level = text_config.get("thinking_level") # 可选

Expand Down
2 changes: 2 additions & 0 deletions src/config_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ def _clean_profile_data(profile_data: Dict[str, Any], existing_models: Dict[str,
"api_key": api_key,
"adapter": profile.get("adapter", "openai_chat" if role == "prompt_model" else "raw_chat_multimodal"),
}
if role == "prompt_model":
cleaned[role]["thinking"] = profile.get("thinking", existing_profile.get("thinking", "disabled"))
if profile.get("id"):
cleaned[role]["id"] = profile["id"]
if profile.get("label"):
Expand Down
29 changes: 27 additions & 2 deletions src/document_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
text stay lightweight and do not require Docling.
"""

import re
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, Optional
Expand Down Expand Up @@ -51,7 +52,7 @@ def _parse_pdf_text(self, path: Path) -> Optional[ParsedDocument]:
try:
from pypdf import PdfReader
except ImportError:
return None
return self._parse_literal_pdf_text(path)
Comment on lines 52 to +55

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The fallback literal PDF text extractor is extremely fragile. It does not handle FlateDecode stream compression (which is standard for almost all modern PDFs), hexadecimal strings, TJ operators, or escaped parentheses correctly. If pypdf is not available, it is much safer to return None rather than attempting a naive regex-based binary parse that will almost always fail or return garbage.

Suggested change
try:
from pypdf import PdfReader
except ImportError:
return None
return self._parse_literal_pdf_text(path)
try:
from pypdf import PdfReader
except ImportError:
return None


try:
reader = PdfReader(str(path))
Expand All @@ -60,15 +61,39 @@ def _parse_pdf_text(self, path: Path) -> Optional[ParsedDocument]:
text = page.extract_text() or ""
if text.strip():
pages.append(f"<!-- page: {index} -->\n{text.strip()}")
if not pages:
fallback_text = self._extract_literal_pdf_text(path)
if fallback_text:
pages.append(f"<!-- page: 1 -->\n{fallback_text}")
except Exception:
return None
return self._parse_literal_pdf_text(path)

return ParsedDocument(
filename=path.name,
normalized_markdown="\n\n".join(pages),
metadata={"parser": "pypdf", "extension": ".pdf", "pages": len(reader.pages)},
)

def _parse_literal_pdf_text(self, path: Path) -> Optional[ParsedDocument]:
fallback_text = self._extract_literal_pdf_text(path)
if not fallback_text:
return None
return ParsedDocument(
filename=path.name,
normalized_markdown=f"<!-- page: 1 -->\n{fallback_text}",
metadata={"parser": "pypdf", "extension": ".pdf", "pages": 1},
)
Comment on lines +77 to +85

def _extract_literal_pdf_text(self, path: Path) -> str:
raw = path.read_bytes()
chunks = []
for match in re.findall(rb"\((.*?)\)\s*Tj", raw, flags=re.DOTALL):
text = match.replace(rb"\(", b"(").replace(rb"\)", b")").replace(rb"\\", b"\\")
decoded = text.decode("utf-8", errors="ignore").strip()
if decoded:
chunks.append(decoded)
return "\n".join(chunks)

def _parse_with_docling(self, path: Path) -> ParsedDocument:
try:
from docling.document_converter import DocumentConverter
Expand Down
4 changes: 4 additions & 0 deletions src/model_profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class ModelProfile:
adapter: str = ""
id: str = ""
label: str = ""
thinking: str = "disabled"

def __post_init__(self):
if self.base_url:
Expand All @@ -40,6 +41,8 @@ def __post_init__(self):
self.id = self.role
if not self.label:
self.label = self.model
if self.thinking not in {"enabled", "disabled"}:
self.thinking = "disabled"

def to_public_dict(self) -> Dict[str, Any]:
data = asdict(self)
Expand Down Expand Up @@ -195,6 +198,7 @@ def _profile_from_dict(role: str, data: Dict[str, Any]) -> ModelProfile:
base_url=base_url,
api_key=api_key,
adapter=data.get("adapter", DEFAULT_ADAPTERS.get(role, "openai_chat")),
thinking=data.get("thinking", "disabled"),
)


Expand Down
8 changes: 7 additions & 1 deletion src/model_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,13 @@ def generate_text(self, prompt: str, system_instruction: Optional[str] = None) -
if system_instruction:
messages.append({"role": "system", "content": system_instruction})
messages.append({"role": "user", "content": prompt})
response = client.chat.completions.create(model=profile.model, messages=messages)
kwargs: Dict[str, Any] = {
"model": profile.model,
"messages": messages,
}
if profile.thinking in {"enabled", "disabled"}:
kwargs["extra_body"] = {"thinking": {"type": profile.thinking}}
Comment on lines +37 to +38

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Similar to the client configuration, passing extra_body with "thinking": {"type": "disabled"} by default will break compatibility with standard OpenAI-compatible endpoints. We should only pass it when thinking is explicitly set to "enabled".

Suggested change
if profile.thinking in {"enabled", "disabled"}:
kwargs["extra_body"] = {"thinking": {"type": profile.thinking}}
if profile.thinking == "enabled":
kwargs["extra_body"] = {"thinking": {"type": "enabled"}}

response = client.chat.completions.create(**kwargs)
if not response.choices:
return ""
return response.choices[0].message.content or ""
Expand Down
2 changes: 2 additions & 0 deletions tests/test_config_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def test_saves_model_profiles_to_config_yaml(self):
"base_url": "https://text.example/v1",
"api_key": "text-key",
"adapter": "openai_chat",
"thinking": "enabled",
},
"image_model": {
"model": "image",
Expand All @@ -36,6 +37,7 @@ def test_saves_model_profiles_to_config_yaml(self):
saved = yaml.safe_load(config_path.read_text())
self.assertEqual(saved["ppt"]["num_pages"], 3)
self.assertEqual(saved["api"]["models"]["prompt_model"]["api_key"], "text-key")
self.assertEqual(saved["api"]["models"]["prompt_model"]["thinking"], "enabled")
self.assertEqual(saved["api"]["models"]["image_model"]["model"], "image")

def test_empty_api_key_preserves_existing_secret(self):
Expand Down
54 changes: 54 additions & 0 deletions tests/test_model_router.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import unittest
from types import SimpleNamespace
from unittest.mock import patch

from src.model_profiles import ModelProfile, ModelProfileSet
from src.model_router import ModelRouter


class ModelRouterTest(unittest.TestCase):
def test_generate_text_passes_thinking_extra_body(self):
profiles = ModelProfileSet(
prompt=ModelProfile(
role="prompt",
model="text-model",
base_url="https://text.example/v1",
api_key="text-key",
thinking="enabled",
),
image=ModelProfile(
role="image",
model="image-model",
base_url="https://image.example/v1",
api_key="image-key",
adapter="raw_chat_multimodal",
),
edit=ModelProfile(
role="edit",
model="image-model",
base_url="https://image.example/v1",
api_key="image-key",
adapter="raw_chat_multimodal",
),
)

response = SimpleNamespace(
choices=[SimpleNamespace(message=SimpleNamespace(content="ok"))]
)

with patch("src.model_router.OpenAI") as openai_cls:
create = openai_cls.return_value.chat.completions.create
create.return_value = response

result = ModelRouter(profiles).generate_text("hello")

self.assertEqual(result, "ok")
create.assert_called_once()
self.assertEqual(
create.call_args.kwargs["extra_body"],
{"thinking": {"type": "enabled"}},
)


if __name__ == "__main__":
unittest.main()
11 changes: 11 additions & 0 deletions web/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions web/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"eslint": "^8.55.0",
"eslint-plugin-react-hooks": "^4.6.0",
"eslint-plugin-react-refresh": "^0.4.5",
"fake-indexeddb": "^6.2.5",
"fast-check": "^3.14.0",
"postcss": "^8.4.32",
"prettier": "^3.1.1",
Expand Down
Loading