diff --git a/api/models.py b/api/models.py index dc0a531..f0db170 100644 --- a/api/models.py +++ b/api/models.py @@ -32,7 +32,8 @@ class TextApiConfig(BaseModel): base_url: str = Field(..., description="文本模型 API 基础 URL") model: str = Field("gemini-3-pro-preview", description="文本模型名称") format: Literal["gemini", "openai"] = Field("gemini", description="API 格式") - thinking_level: Optional[Literal["low", "high"]] = Field(None, description="思考深度") + thinking: Literal["enabled", "disabled"] = Field("disabled", description="思考模式") + thinking_level: Optional[Literal["low", "high"]] = Field(None, description="旧版 Gemini 思考深度") class ModelProfileConfig(BaseModel): @@ -43,6 +44,7 @@ class ModelProfileConfig(BaseModel): base_url: str = Field(..., description="OpenAI-compatible Base URL") api_key: str = Field(..., description="API Key") adapter: str = Field("openai_chat", description="适配器") + thinking: Optional[Literal["enabled", "disabled"]] = Field(None, description="OpenAI-compatible thinking mode") class ModelProfilesConfig(BaseModel): @@ -121,6 +123,12 @@ def get_thinking_level(self) -> Optional[str]: return self.text.thinking_level return None + def get_thinking(self) -> str: + """获取思考模式""" + if self.text: + return self.text.thinking + return "disabled" + class GenerationRequest(BaseModel): """生成请求""" diff --git a/api/profile_resolver.py b/api/profile_resolver.py index 03a1954..c6491b4 100644 --- a/api/profile_resolver.py +++ b/api/profile_resolver.py @@ -46,6 +46,7 @@ def profiles_from_generation_config(config: Any) -> ModelProfileSet: "base_url": config.text.base_url, "api_key": config.text.api_key, "adapter": "openai_chat", + "thinking": getattr(config.text, "thinking", "disabled"), }, "image_model": { "model": config.image.model, @@ -72,6 +73,7 @@ def profiles_from_generation_config(config: Any) -> ModelProfileSet: "base_url": config.get_text_base_url(), "api_key": config.get_text_api_key(), "adapter": "openai_chat", + "thinking": config.get_thinking(), }, "image_model": { "model": config.get_image_model(), diff --git a/config.example.yaml b/config.example.yaml index 6282d55..021a8b2 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -10,6 +10,7 @@ api: base_url: "https://api.example.com/v1" model: "DeepSeek-V4-Pro" adapter: "openai_chat" + thinking: "disabled" # 可选: "enabled", "disabled" image_model: api_key: "your-image-api-key" base_url: "https://api.example.com/v1" diff --git a/docs/assets/aippt-demo.gif b/docs/assets/aippt-demo.gif index de6a22c..13f550a 100644 Binary files a/docs/assets/aippt-demo.gif and b/docs/assets/aippt-demo.gif differ diff --git a/docs/assets/aippt-demo.webm b/docs/assets/aippt-demo.webm index 6d7c4b3..314e2e2 100644 Binary files a/docs/assets/aippt-demo.webm and b/docs/assets/aippt-demo.webm differ diff --git a/src/client.py b/src/client.py index 164f34b..c562da5 100644 --- a/src/client.py +++ b/src/client.py @@ -96,10 +96,14 @@ def _generate_text_openai(self, prompt: str, system_instruction: str = None) -> messages.append({"role": "user", "content": prompt}) - response = self._openai_client.chat.completions.create( - model=self.config.text_model, - messages=messages - ) + kwargs = { + "model": self.config.text_model, + "messages": messages, + } + if self.config.text_thinking in {"enabled", "disabled"}: + kwargs["extra_body"] = {"thinking": {"type": self.config.text_thinking}} + + response = self._openai_client.chat.completions.create(**kwargs) return response.choices[0].message.content diff --git a/src/config.py b/src/config.py index 53dc1ab..f88eae9 100644 --- a/src/config.py +++ b/src/config.py @@ -55,7 +55,8 @@ class APIConfig: text_model: str = None text_base_url: str = None # 可选,单独的文本 API 地址 text_api_key: str = None # 可选,单独的文本 API 密钥 - text_thinking_level: Optional[Literal["low", "high"]] = None # 思考深度 + text_thinking: Optional[Literal["enabled", "disabled"]] = None # OpenAI-compatible 思考模式 + text_thinking_level: Optional[Literal["low", "high"]] = None # 旧版 Gemini 思考深度 def __post_init__(self): """从配置文件加载默认值""" @@ -80,6 +81,8 @@ def __post_init__(self): self.text_base_url = text_config.get("base_url") # 可选 if self.text_api_key is None: self.text_api_key = text_config.get("api_key") # 可选 + if self.text_thinking is None: + self.text_thinking = text_config.get("thinking", "disabled") if self.text_thinking_level is None: self.text_thinking_level = text_config.get("thinking_level") # 可选 diff --git a/src/config_writer.py b/src/config_writer.py index b196c18..2e37e1a 100644 --- a/src/config_writer.py +++ b/src/config_writer.py @@ -38,6 +38,8 @@ def _clean_profile_data(profile_data: Dict[str, Any], existing_models: Dict[str, "api_key": api_key, "adapter": profile.get("adapter", "openai_chat" if role == "prompt_model" else "raw_chat_multimodal"), } + if role == "prompt_model": + cleaned[role]["thinking"] = profile.get("thinking", existing_profile.get("thinking", "disabled")) if profile.get("id"): cleaned[role]["id"] = profile["id"] if profile.get("label"): diff --git a/src/document_parser.py b/src/document_parser.py index 0370a61..6f8ac63 100644 --- a/src/document_parser.py +++ b/src/document_parser.py @@ -5,6 +5,7 @@ text stay lightweight and do not require Docling. """ +import re from dataclasses import dataclass, field from pathlib import Path from typing import Any, Dict, Optional @@ -51,7 +52,7 @@ def _parse_pdf_text(self, path: Path) -> Optional[ParsedDocument]: try: from pypdf import PdfReader except ImportError: - return None + return self._parse_literal_pdf_text(path) try: reader = PdfReader(str(path)) @@ -60,8 +61,12 @@ def _parse_pdf_text(self, path: Path) -> Optional[ParsedDocument]: text = page.extract_text() or "" if text.strip(): pages.append(f"\n{text.strip()}") + if not pages: + fallback_text = self._extract_literal_pdf_text(path) + if fallback_text: + pages.append(f"\n{fallback_text}") except Exception: - return None + return self._parse_literal_pdf_text(path) return ParsedDocument( filename=path.name, @@ -69,6 +74,26 @@ def _parse_pdf_text(self, path: Path) -> Optional[ParsedDocument]: metadata={"parser": "pypdf", "extension": ".pdf", "pages": len(reader.pages)}, ) + def _parse_literal_pdf_text(self, path: Path) -> Optional[ParsedDocument]: + fallback_text = self._extract_literal_pdf_text(path) + if not fallback_text: + return None + return ParsedDocument( + filename=path.name, + normalized_markdown=f"\n{fallback_text}", + metadata={"parser": "pypdf", "extension": ".pdf", "pages": 1}, + ) + + def _extract_literal_pdf_text(self, path: Path) -> str: + raw = path.read_bytes() + chunks = [] + for match in re.findall(rb"\((.*?)\)\s*Tj", raw, flags=re.DOTALL): + text = match.replace(rb"\(", b"(").replace(rb"\)", b")").replace(rb"\\", b"\\") + decoded = text.decode("utf-8", errors="ignore").strip() + if decoded: + chunks.append(decoded) + return "\n".join(chunks) + def _parse_with_docling(self, path: Path) -> ParsedDocument: try: from docling.document_converter import DocumentConverter diff --git a/src/model_profiles.py b/src/model_profiles.py index bbc0051..44dc278 100644 --- a/src/model_profiles.py +++ b/src/model_profiles.py @@ -28,6 +28,7 @@ class ModelProfile: adapter: str = "" id: str = "" label: str = "" + thinking: str = "disabled" def __post_init__(self): if self.base_url: @@ -40,6 +41,8 @@ def __post_init__(self): self.id = self.role if not self.label: self.label = self.model + if self.thinking not in {"enabled", "disabled"}: + self.thinking = "disabled" def to_public_dict(self) -> Dict[str, Any]: data = asdict(self) @@ -195,6 +198,7 @@ def _profile_from_dict(role: str, data: Dict[str, Any]) -> ModelProfile: base_url=base_url, api_key=api_key, adapter=data.get("adapter", DEFAULT_ADAPTERS.get(role, "openai_chat")), + thinking=data.get("thinking", "disabled"), ) diff --git a/src/model_router.py b/src/model_router.py index 85ea282..66709aa 100644 --- a/src/model_router.py +++ b/src/model_router.py @@ -30,7 +30,13 @@ def generate_text(self, prompt: str, system_instruction: Optional[str] = None) - if system_instruction: messages.append({"role": "system", "content": system_instruction}) messages.append({"role": "user", "content": prompt}) - response = client.chat.completions.create(model=profile.model, messages=messages) + kwargs: Dict[str, Any] = { + "model": profile.model, + "messages": messages, + } + if profile.thinking in {"enabled", "disabled"}: + kwargs["extra_body"] = {"thinking": {"type": profile.thinking}} + response = client.chat.completions.create(**kwargs) if not response.choices: return "" return response.choices[0].message.content or "" diff --git a/tests/test_config_writer.py b/tests/test_config_writer.py index 126701f..48616b3 100644 --- a/tests/test_config_writer.py +++ b/tests/test_config_writer.py @@ -22,6 +22,7 @@ def test_saves_model_profiles_to_config_yaml(self): "base_url": "https://text.example/v1", "api_key": "text-key", "adapter": "openai_chat", + "thinking": "enabled", }, "image_model": { "model": "image", @@ -36,6 +37,7 @@ def test_saves_model_profiles_to_config_yaml(self): saved = yaml.safe_load(config_path.read_text()) self.assertEqual(saved["ppt"]["num_pages"], 3) self.assertEqual(saved["api"]["models"]["prompt_model"]["api_key"], "text-key") + self.assertEqual(saved["api"]["models"]["prompt_model"]["thinking"], "enabled") self.assertEqual(saved["api"]["models"]["image_model"]["model"], "image") def test_empty_api_key_preserves_existing_secret(self): diff --git a/tests/test_model_router.py b/tests/test_model_router.py new file mode 100644 index 0000000..60c9052 --- /dev/null +++ b/tests/test_model_router.py @@ -0,0 +1,54 @@ +import unittest +from types import SimpleNamespace +from unittest.mock import patch + +from src.model_profiles import ModelProfile, ModelProfileSet +from src.model_router import ModelRouter + + +class ModelRouterTest(unittest.TestCase): + def test_generate_text_passes_thinking_extra_body(self): + profiles = ModelProfileSet( + prompt=ModelProfile( + role="prompt", + model="text-model", + base_url="https://text.example/v1", + api_key="text-key", + thinking="enabled", + ), + image=ModelProfile( + role="image", + model="image-model", + base_url="https://image.example/v1", + api_key="image-key", + adapter="raw_chat_multimodal", + ), + edit=ModelProfile( + role="edit", + model="image-model", + base_url="https://image.example/v1", + api_key="image-key", + adapter="raw_chat_multimodal", + ), + ) + + response = SimpleNamespace( + choices=[SimpleNamespace(message=SimpleNamespace(content="ok"))] + ) + + with patch("src.model_router.OpenAI") as openai_cls: + create = openai_cls.return_value.chat.completions.create + create.return_value = response + + result = ModelRouter(profiles).generate_text("hello") + + self.assertEqual(result, "ok") + create.assert_called_once() + self.assertEqual( + create.call_args.kwargs["extra_body"], + {"thinking": {"type": "enabled"}}, + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/web/package-lock.json b/web/package-lock.json index ba2bf7e..25c3fd5 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -25,6 +25,7 @@ "eslint": "^8.55.0", "eslint-plugin-react-hooks": "^4.6.0", "eslint-plugin-react-refresh": "^0.4.5", + "fake-indexeddb": "^6.2.5", "fast-check": "^3.14.0", "jsdom": "^23.0.1", "postcss": "^8.4.32", @@ -3318,6 +3319,16 @@ "url": "https://github.com/sindresorhus/execa?sponsor=1" } }, + "node_modules/fake-indexeddb": { + "version": "6.2.5", + "resolved": "https://registry.npmjs.org/fake-indexeddb/-/fake-indexeddb-6.2.5.tgz", + "integrity": "sha512-CGnyrvbhPlWYMngksqrSSUT1BAVP49dZocrHuK0SvtR0D5TMs5wP0o3j7jexDJW01KSadjBp1M/71o/KR3nD1w==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=18" + } + }, "node_modules/fast-check": { "version": "3.23.2", "resolved": "https://registry.npmjs.org/fast-check/-/fast-check-3.23.2.tgz", diff --git a/web/package.json b/web/package.json index 6588a0e..bac6959 100644 --- a/web/package.json +++ b/web/package.json @@ -28,6 +28,7 @@ "eslint": "^8.55.0", "eslint-plugin-react-hooks": "^4.6.0", "eslint-plugin-react-refresh": "^0.4.5", + "fake-indexeddb": "^6.2.5", "fast-check": "^3.14.0", "postcss": "^8.4.32", "prettier": "^3.1.1", diff --git a/web/src/components/ApiConfigForm.tsx b/web/src/components/ApiConfigForm.tsx index ef5a2a7..c8745c9 100644 --- a/web/src/components/ApiConfigForm.tsx +++ b/web/src/components/ApiConfigForm.tsx @@ -27,20 +27,20 @@ interface ModelCardProps { const toneStyles = { amber: { - card: 'border-amber-200 bg-amber-50/70', - icon: 'bg-amber-100 text-amber-700', - badge: 'bg-amber-100 text-amber-800 border-amber-200' + card: 'border-amber-200 bg-amber-50/75', + icon: 'bg-white text-amber-700 shadow-sm', + badge: 'bg-white/80 text-amber-800 border-amber-200', }, violet: { - card: 'border-violet-200 bg-violet-50/60', - icon: 'bg-violet-100 text-violet-700', - badge: 'bg-violet-100 text-violet-800 border-violet-200' + card: 'border-violet-200 bg-violet-50/55', + icon: 'bg-white text-violet-700 shadow-sm', + badge: 'bg-white/80 text-violet-800 border-violet-200', }, emerald: { - card: 'border-emerald-200 bg-emerald-50/60', - icon: 'bg-emerald-100 text-emerald-700', - badge: 'bg-emerald-100 text-emerald-800 border-emerald-200' - } + card: 'border-emerald-200 bg-emerald-50/55', + icon: 'bg-white text-emerald-700 shadow-sm', + badge: 'bg-white/80 text-emerald-800 border-emerald-200', + }, } function ModelCard({ @@ -54,49 +54,72 @@ function ModelCard({ errorLabel, emptyModelLabel, onToggle, - children + children, }: ModelCardProps) { const styles = toneStyles[tone] return ( -
+
) @@ -130,7 +151,7 @@ function ApiConfigForm({ onConfigChange, initialConfig }: ApiConfigFormProps) { return { image: !loaded.image.baseUrl, edit: !(loaded.edit || loaded.image).baseUrl, - text: !loaded.text.baseUrl + text: !loaded.text.baseUrl, } }) const [showImageApiKey, setShowImageApiKey] = useState(false) @@ -147,32 +168,32 @@ function ApiConfigForm({ onConfigChange, initialConfig }: ApiConfigFormProps) { useEffect(() => { let cancelled = false loadBackendModelProfiles() - .then(response => { + .then((response) => { if (cancelled || !response.success || !response.profiles) return const { prompt_model, image_model, edit_model } = response.profiles - setConfig(prev => ({ + setConfig((prev) => ({ image: { apiKey: prev.image.apiKey, baseUrl: image_model.base_url, - model: image_model.model + model: image_model.model, }, text: { apiKey: prev.text.apiKey, baseUrl: prompt_model.base_url, model: prompt_model.model, format: 'openai', - thinkingLevel: null + thinking: prompt_model.thinking || prev.text.thinking || 'disabled', }, edit: { apiKey: prev.edit?.apiKey || '', baseUrl: edit_model.base_url, - model: edit_model.model - } + model: edit_model.model, + }, })) setOpenSections({ image: !image_model.base_url, edit: !edit_model.base_url, - text: !prompt_model.base_url + text: !prompt_model.base_url, }) }) .catch(() => { @@ -189,20 +210,24 @@ function ApiConfigForm({ onConfigChange, initialConfig }: ApiConfigFormProps) { useEffect(() => { if (useSharedConfig) { - setConfig(prev => ({ + setConfig((prev) => ({ ...prev, text: { ...prev.text, apiKey: prev.image.apiKey, baseUrl: prev.image.baseUrl }, - edit: { ...(prev.edit || prev.image), apiKey: prev.image.apiKey, baseUrl: prev.image.baseUrl } + edit: { + ...(prev.edit || prev.image), + apiKey: prev.image.apiKey, + baseUrl: prev.image.baseUrl, + }, })) } }, [useSharedConfig]) const toggleSection = (section: ModelSection) => { - setOpenSections(prev => ({ ...prev, [section]: !prev[section] })) + setOpenSections((prev) => ({ ...prev, [section]: !prev[section] })) } const handleImageConfigChange = (field: keyof ImageApiConfig, value: string) => { - setConfig(prev => { + setConfig((prev) => { const newConfig = { ...prev, image: { ...prev.image, [field]: value } } if (useSharedConfig && (field === 'apiKey' || field === 'baseUrl')) { newConfig.text = { ...newConfig.text, [field]: value } @@ -214,26 +239,26 @@ function ApiConfigForm({ onConfigChange, initialConfig }: ApiConfigFormProps) { }) setSaved(false) if (errors.image?.[field]) { - setErrors(prev => ({ ...prev, image: { ...prev.image, [field]: undefined } })) + setErrors((prev) => ({ ...prev, image: { ...prev.image, [field]: undefined } })) } } const handleTextConfigChange = (field: keyof TextApiConfig, value: string | null) => { - setConfig(prev => ({ ...prev, text: { ...prev.text, [field]: value } })) + setConfig((prev) => ({ ...prev, text: { ...prev.text, [field]: value } })) setSaved(false) if (errors.text?.[field as keyof typeof errors.text]) { - setErrors(prev => ({ ...prev, text: { ...prev.text, [field]: undefined } })) + setErrors((prev) => ({ ...prev, text: { ...prev.text, [field]: undefined } })) } } const handleEditConfigChange = (field: keyof ImageApiConfig, value: string) => { - setConfig(prev => ({ + setConfig((prev) => ({ ...prev, - edit: { ...(prev.edit || prev.image), [field]: value } + edit: { ...(prev.edit || prev.image), [field]: value }, })) setSaved(false) if (errors.edit?.[field]) { - setErrors(prev => ({ ...prev, edit: { ...prev.edit, [field]: undefined } })) + setErrors((prev) => ({ ...prev, edit: { ...prev.edit, [field]: undefined } })) } } @@ -242,10 +267,10 @@ function ApiConfigForm({ onConfigChange, initialConfig }: ApiConfigFormProps) { if (!validation.isValid) { setErrors(validation.errors) setIsOpen(true) - setOpenSections(prev => ({ + setOpenSections((prev) => ({ image: prev.image || Boolean(validation.errors.image), edit: prev.edit || Boolean(validation.errors.edit), - text: prev.text || Boolean(validation.errors.text) + text: prev.text || Boolean(validation.errors.text), })) return } @@ -262,9 +287,9 @@ function ApiConfigForm({ onConfigChange, initialConfig }: ApiConfigFormProps) { } const inputClass = (hasError: boolean) => ` - w-full px-4 py-2.5 text-sm border rounded-lg transition-all duration-200 - focus:outline-none focus:ring-2 focus:ring-cyan-400 focus:border-transparent - ${hasError ? 'border-red-400 bg-red-50' : 'border-slate-200 bg-white'} + w-full px-4 py-2.5 text-sm border rounded-xl transition-all duration-200 + focus:outline-none focus:ring-2 focus:ring-primary-300 focus:border-transparent + ${hasError ? 'border-red-400 bg-red-50' : 'border-[var(--border-soft)] bg-white/85'} ` const keyPlaceholder = (hasBaseUrl: boolean, label: string) => hasBaseUrl ? t('api.keyConfiguredPlaceholder') : t('api.keyPlaceholder', { label }) @@ -278,7 +303,9 @@ function ApiConfigForm({ onConfigChange, initialConfig }: ApiConfigFormProps) { hasError: boolean ) => (
- +
setShowKey(!showKey)} - className="absolute inset-y-0 right-0 px-3 flex items-center text-slate-400 hover:text-slate-700" + className="absolute inset-y-0 right-0 px-3 flex items-center text-[var(--text-faint)] hover:text-[var(--text-strong)]" aria-label={showKey ? t('api.hideKey') : t('api.showKey')} > {showKey ? ( - + ) : ( - + )} @@ -307,20 +344,31 @@ function ApiConfigForm({ onConfigChange, initialConfig }: ApiConfigFormProps) { const editConfig = config.edit || config.image const configuredModels = [config.text.model, config.image.model, editConfig.model].filter(Boolean) - const modelSummary = configuredModels.length > 0 ? configuredModels.join(' / ') : t('api.unsetModel') + const modelSummary = + configuredModels.length > 0 ? configuredModels.join(' / ') : t('api.unsetModel') return ( -
+
- {config.text.format === 'gemini' && ( -
- -
- {[ - { value: null, label: t('api.thinking.off') }, - { value: 'low', label: t('api.thinking.low') }, - { value: 'high', label: t('api.thinking.high') } - ].map(option => ( - - ))} -
+
+ +
+ {( + [ + { value: 'disabled', label: t('api.thinking.disabled') }, + { value: 'enabled', label: t('api.thinking.enabled') }, + ] as const + ).map((option) => ( + + ))}
- )} +
@@ -552,8 +637,18 @@ function ApiConfigForm({ onConfigChange, initialConfig }: ApiConfigFormProps) { {saved && ( - - + + {t('common.saved')} diff --git a/web/src/components/CenterPanel.tsx b/web/src/components/CenterPanel.tsx index 8a2c5b5..6caaf5b 100644 --- a/web/src/components/CenterPanel.tsx +++ b/web/src/components/CenterPanel.tsx @@ -32,7 +32,7 @@ function CenterPanel({
{isEditMode && editSession ? ( // 编辑模式 -
+
) : ( // 设置模式 -
+
{/* Section Header */} -
-
+
+
@@ -62,8 +62,8 @@ function CenterPanel({ {/* 设置表单区域 */}
{children || ( -
-
+
+
diff --git a/web/src/components/EditHistory.tsx b/web/src/components/EditHistory.tsx index 1c5b2a2..fcff85a 100644 --- a/web/src/components/EditHistory.tsx +++ b/web/src/components/EditHistory.tsx @@ -22,24 +22,27 @@ function EditHistory({ history, onRevert }: EditHistoryProps) { return date.toLocaleTimeString(language === 'zh' ? 'zh-CN' : 'en-US', { hour: '2-digit', minute: '2-digit', - second: '2-digit' + second: '2-digit', }) } return ( -
-

+
+

{t('edit.historyTitle', { count: history.length })}

-
+
{history.map((item, index) => (
{/* 缩略图 */} -
+
-

- {formatTimestamp(item.timestamp)} -

+

{formatTimestamp(item.timestamp)}

{item.instruction} @@ -67,7 +68,7 @@ function EditHistory({ history, onRevert }: EditHistoryProps) { {/* 回退按钮 */}

- {/* 当前图片 */} -
-

- {hasChanges ? t('edit.current') : t('edit.original')} -

-
- Current slide - {isEditing && ( -
-
- - + {/* 生成结果 */} + {shouldShowResult && ( +
+

+ {t('edit.result')} +

+
-
+ + {isEditing && ( + + + + + + + {t('edit.generating')} + + + )} + + + {hasChanges && ( +

+ {t('edit.resultHint')} +

)}
-
+ )}
- {/* 编辑历史 */} - {editSession.history.length > 0 && ( -
- -
- )} - - {/* 修改指令输入区域 */} -
- -
+
+ {/* 修改指令输入区域 */} + +