diff --git a/.gitignore b/.gitignore index 0be8af1..6e426c2 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,6 @@ manifest.ini *.nvda-addon .sconsign.dblite /[0-9]*.[0-9]*.[0-9]*.json +venv +.env +.python-version diff --git a/addon/globalPlugins/WordBridge/__init__.py b/addon/globalPlugins/WordBridge/__init__.py index 0bf7655..19a7e7b 100644 --- a/addon/globalPlugins/WordBridge/__init__.py +++ b/addon/globalPlugins/WordBridge/__init__.py @@ -31,7 +31,7 @@ from .dictionary.dialog import DictionaryEntryDialog from .lib.coseeing import obtain_openai_key from .lib.decimalUtils import decimal_to_str_0 -from .lib.typo_corrector import ChineseTypoCorrector, ChineseTypoCorrectorLite +from .lib.typo_corrector import ChineseTypoCorrector, ChineseTypoCorrectorLite, CorrectionOrchestrator from .lib.utils import strings_diff from .lib.viewHTML import text2template from hanzidentifier import has_chinese @@ -231,7 +231,8 @@ def correctTypo(self, request): try: batch_mode = not DEBUG_MODE - response, _diff_ = corrector.correct_text(request, batch_mode=batch_mode) + orchestrator = CorrectionOrchestrator(corrector) + response, _diff_ = orchestrator.execute(request, batch_mode=batch_mode) except Exception as e: ui.message(_("Sorry, an error occurred during the program execution, the details are: {e}").format(e=e)) log.warning(_("Sorry, an error occurred during the program execution, the details are: {e}").format(e=e)) diff --git a/addon/globalPlugins/WordBridge/configManager.py b/addon/globalPlugins/WordBridge/configManager.py index b17e69a..c618a85 100644 --- a/addon/globalPlugins/WordBridge/configManager.py +++ b/addon/globalPlugins/WordBridge/configManager.py @@ -12,7 +12,7 @@ "Google": _("Google"), "OpenAI": _("OpenAI"), "OpenRouter": _("OpenRouter"), - "claude-3-5-haiku-20241022": _("claude-3-5-haiku"), + "claude-haiku-4-5-20251001": _("claude-4-5-haiku"), "claude-3-7-sonnet-20250219": _("claude-3.7-sonnet"), "claude-sonnet-4-20250514": _("claude-4-sonnet"), "deepseek-v3": _("deepseek-v3"), diff --git a/addon/globalPlugins/WordBridge/corrector_config/Anthropic-00002-claude-4-5-haiku.json b/addon/globalPlugins/WordBridge/corrector_config/Anthropic-00002-claude-4-5-haiku.json new file mode 100644 index 0000000..94cd346 --- /dev/null +++ b/addon/globalPlugins/WordBridge/corrector_config/Anthropic-00002-claude-4-5-haiku.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "claude-haiku-4-5-20251001", + "provider": "Anthropic", + "llm_access_method": "personal_api_key", + "require_secret_key": false, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": true, + "no_explanation": false + } + } +} diff --git a/addon/globalPlugins/WordBridge/corrector_config/Google-00002-gemini-2.5-pro-preview.json b/addon/globalPlugins/WordBridge/corrector_config/Google-00001-gemini-2.5-flash.json similarity index 85% rename from addon/globalPlugins/WordBridge/corrector_config/Google-00002-gemini-2.5-pro-preview.json rename to addon/globalPlugins/WordBridge/corrector_config/Google-00001-gemini-2.5-flash.json index 1f4dc7f..66e9085 100644 --- a/addon/globalPlugins/WordBridge/corrector_config/Google-00002-gemini-2.5-pro-preview.json +++ b/addon/globalPlugins/WordBridge/corrector_config/Google-00001-gemini-2.5-flash.json @@ -1,6 +1,6 @@ { "model": { - "model_name": "gemini-2.5-pro-preview-06-05", + "model_name": "gemini-2.5-flash", "provider": "Google", "llm_access_method": "personal_api_key", "require_secret_key": false, @@ -13,4 +13,4 @@ "no_explanation": false } } -} \ No newline at end of file +} diff --git a/addon/globalPlugins/WordBridge/corrector_config/Google-00002-gemini-2.5-pro.json b/addon/globalPlugins/WordBridge/corrector_config/Google-00002-gemini-2.5-pro.json new file mode 100644 index 0000000..fa03828 --- /dev/null +++ b/addon/globalPlugins/WordBridge/corrector_config/Google-00002-gemini-2.5-pro.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "gemini-2.5-pro", + "provider": "Google", + "llm_access_method": "personal_api_key", + "require_secret_key": false, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": true, + "no_explanation": false + } + } +} diff --git a/addon/globalPlugins/WordBridge/lib/cost_calculator.py b/addon/globalPlugins/WordBridge/lib/cost_calculator.py new file mode 100644 index 0000000..d3452eb --- /dev/null +++ b/addon/globalPlugins/WordBridge/lib/cost_calculator.py @@ -0,0 +1,44 @@ +""" +Cost Calculator for tracking LLM usage and costs. + +This module provides utilities for: +- Tracking token usage from API responses +- Calculating costs based on model pricing +""" + +from decimal import Decimal +from collections import defaultdict + +class CostCalculator: + def __init__(self, model_entry: dict): + self._model_entry = model_entry + self._pricing = model_entry.get("pricing", {}) + self._usage_key = model_entry.get("usage_key") + + def get_total_usage(self, response_history: list) -> dict: + total_usage = defaultdict(int) + if not self._usage_key: + return total_usage + + for response in response_history: + if isinstance(response, dict) and self._usage_key in response: + for usage_type in self._pricing: + if usage_type == "base_unit": + continue + try: + total_usage[usage_type] += response[self._usage_key][usage_type] + except KeyError: + pass + + return dict(total_usage) + + def get_total_cost(self, response_history: list) -> Decimal: + cost = Decimal("0") + usages = self.get_total_usage(response_history) + for key, value in usages.items(): + cost += ( + Decimal(str(self._pricing[key])) + * Decimal(str(value)) + / Decimal(str(self._pricing["base_unit"])) + ) + return cost diff --git a/addon/globalPlugins/WordBridge/lib/provider.py b/addon/globalPlugins/WordBridge/lib/provider.py index 8e70ff3..03dfe34 100644 --- a/addon/globalPlugins/WordBridge/lib/provider.py +++ b/addon/globalPlugins/WordBridge/lib/provider.py @@ -1,7 +1,11 @@ from copy import deepcopy import json +import logging +import random +import time from pathlib import Path +import requests from requests.utils import urlparse try: @@ -11,6 +15,7 @@ def _(s): return s +log = logging.getLogger(__name__) class Provider: def __init__(self, credential: dict, model: str, llm_settings: dict = {}): @@ -73,6 +78,69 @@ def handle_errors(self, response): message=message )) + def try_connection(self, timeout=10, try_count=1): + url = self.base_url + for r in range(try_count): + try: + response = requests.get(url, timeout=timeout) + return + except Exception as e: + request_error = type(e).__name__ + log.error( + "Try = {try_index}, {request_error}, an error occurred when sending request: {e}".format( + try_index=(r + 1), + request_error=request_error, + e=e, + ) + ) + + raise Exception( + _("HTTP request error ({request_error}). Please check the network setting.").format( + request_error=request_error + ) + ) + + def chat_completion(self, messages, system_template, retries=2, backoff=1): + request_data = self.get_request_data(messages, system_template) + api_url = self.url + headers = self.get_headers() + + current_backoff = backoff + response = None + request_error = None + + for r in range(retries): + timeout = min(self.timeout0 * (r + 1), self.timeout_max) if self.name != "ollama" else 300 + try: + response = requests.post( + api_url, + headers=headers, + json=request_data, + timeout=timeout, + ) + break + except Exception as e: + request_error = type(e).__name__ + log.error( + "Try = {try_index}, {request_error}, an error occurred when sending {provider} request: {e}".format( + try_index=(r + 1), + request_error=request_error, + provider=self.name, + e=e + ) + ) + current_backoff = min(current_backoff * (1 + random.random()), 3) + time.sleep(current_backoff) + + if response is None: + raise Exception( + _("HTTP request error ({request_error}). Please check the network setting.").format( + request_error=request_error + ) + ) + + self.handle_errors(response) + return response.json() class OpenaiProvider(Provider): name = "openai" @@ -240,3 +308,22 @@ def get_request_data(self, messages, system_template): } return data + +def get_provider(provider_name: str, credential: dict, model: str, llm_settings: dict = None) -> Provider: + """ + Factory function to create a provider instance based on the provider name. + """ + provider_mapping = { + "openai": OpenaiProvider, + "anthropic": AnthropicProvider, + "baidu": BaiduProvider, + "deepseek": DeepseekProvider, + "google": GoogleProvider, + "openrouter": OpenrouterProvider, + } + + provider_class = provider_mapping.get(provider_name.lower()) + if not provider_class: + raise ValueError(f"Unsupported provider: {provider_name}") + + return provider_class(credential, model, llm_settings or {}) diff --git a/addon/globalPlugins/WordBridge/lib/typo_corrector.py b/addon/globalPlugins/WordBridge/lib/typo_corrector.py index 0c87cbe..09bfe30 100644 --- a/addon/globalPlugins/WordBridge/lib/typo_corrector.py +++ b/addon/globalPlugins/WordBridge/lib/typo_corrector.py @@ -1,23 +1,19 @@ from collections import defaultdict -from concurrent.futures import ThreadPoolExecutor, as_completed from copy import deepcopy from decimal import Decimal -from threading import Thread from typing import Any, Dict, List, Tuple +from pathlib import Path import json import logging import os -import random -import time - -import requests from pypinyin import lazy_pinyin, Style -from .provider import OpenaiProvider, AnthropicProvider, BaiduProvider, OpenrouterProvider, DeepseekProvider, GoogleProvider +from .provider import get_provider from .utils import get_char_pinyin, has_chinese, has_simplified_chinese_char, has_traditional_chinese_char from .utils import PUNCTUATION, SEPERATOR, is_chinese_character, strings_diff, text_segmentation -from .utils import find_correction_errors, review_correction_errors, get_segments_to_recorrect +from .utils import find_correction_errors, review_correction_errors, get_segments_to_recorrect, parallel_map +from .cost_calculator import CostCalculator import chinese_converter @@ -45,133 +41,78 @@ def __init__( self.response_json = response_json +class CorrectionOrchestrator: + def __init__(self, corrector: 'BaseTypoCorrector'): + self.corrector = corrector + + def execute(self, text: str, batch_mode: bool = True, fake_corrected_text: str = None) -> Tuple[str, List]: + """ + Orchestrate the full text correction process. + + Returns: + A tuple containing the corrected text and a list of differences. + """ + if fake_corrected_text is not None: + return fake_corrected_text, strings_diff(text, fake_corrected_text) + + self.corrector.provider_object.try_connection() + + # Initial correction pass + text_corrected = "" + segments = text_segmentation(text, max_length=100) + + if batch_mode: + results = parallel_map(self.corrector.correct_segment, segments) + else: + results = [self.corrector.correct_segment(segment) for segment in segments] + + for res in results: + text_corrected += res.corrected_text + self.corrector.response_history.append(res.response_json) + + # Iterative refinement loop + recorrection_history = None + for i in range(self.corrector.max_correction_attempts): + text_corrected_revised, typo_indices = find_correction_errors(text, text_corrected) + + # No more typos found, stable + if text_corrected_revised == text_corrected: + break + + text_corrected = "" + segments_revised = text_segmentation(text_corrected_revised, max_length=20) + if recorrection_history is None: + recorrection_history = [[] for _ in range(len(segments_revised))] + + segments_to_recorrect = get_segments_to_recorrect(segments_revised, typo_indices) + history_for_correction = recorrection_history if i >= self.corrector.max_correction_attempts / 3 else [[] for _ in range(len(segments_revised))] + + if batch_mode: + results = parallel_map( + self.corrector.correct_segment, + segments_to_recorrect, + iterable_kwargs=[{"previous_results": h} for h in history_for_correction] + ) + else: + results = [self.corrector.correct_segment(seg, h) for seg, h in zip(segments_to_recorrect, history_for_correction)] + + for j in range(len(segments_revised)): + if results[j].corrected_text: + res_text = results[j].corrected_text + text_corrected += res_text + if res_text not in recorrection_history[j] and len(res_text) < len(text) * 2: + recorrection_history[j].append(res_text) + self.corrector.response_history.append(results[j].response_json) + else: + text_corrected += segments_revised[j] + + final_text = review_correction_errors(text, text_corrected) + diff = strings_diff(text, final_text) + + return final_text, diff + + class BaseTypoCorrector(): - PROVIDER = { - "openai": OpenaiProvider, - "anthropic": AnthropicProvider, - "baidu": BaiduProvider, - "deepseek": DeepseekProvider, - "google": GoogleProvider, - "openrouter": OpenrouterProvider, - } - MODEL = { - "claude-3-5-haiku-20241022": { - "usage_key": "usage", - "input_tokens": "0.8", - "cache_creation_input_tokens": "1", - "cache_read_input_tokens": "0.08", - "output_tokens": "4", - "base_unit": "1000000" - }, - "claude-3-7-sonnet-20250219": { - "usage_key": "usage", - "input_tokens": "3", - "cache_creation_input_tokens": "3.75", - "cache_read_input_tokens": "0.3", - "output_tokens": "15", - "base_unit": "1000000" - }, - "claude-sonnet-4-20250514": { - "usage_key": "usage", - "input_tokens": "3", - "cache_creation_input_tokens": "3.75", - "cache_read_input_tokens": "0.3", - "output_tokens": "15", - "base_unit": "1000000" - }, - "deepseek-v3": {}, - "deepseek-chat": { - "usage_key": "usage", - "prompt_cache_hit_tokens": "0.07", - "prompt_cache_miss_tokens": "0.27", - "completion_tokens": "1.1", - "base_unit": "1000000" - }, - "deepseek-reasoner": { - "usage_key": "usage", - "prompt_cache_hit_tokens": "0.14", - "prompt_cache_miss_tokens": "0.55", - "completion_tokens": "2.19", - "base_unit": "1000000" - }, - "deepseek/deepseek-chat:free": {}, - "deepseek/deepseek-chat-v3-0324:free": {}, - "deepseek/deepseek-r1-0528:free": {}, - "deepseek/deepseek-r1-0528-qwen3-8b:free": {}, - "gemini-2.5-flash-preview-05-20": { - "usage_key": "usageMetadata", - "promptTokenCount": "0.15", - "candidatesTokenCount": "0.6", - "base_unit": "1000000" - }, - "gemini-2.5-pro-preview-06-05": { - "usage_key": "usageMetadata", - "promptTokenCount": "1.25", - "candidatesTokenCount": "10", - "base_unit": "1000000" - }, - "gpt-4o-2024-08-06": { - "usage_key": "usage", - "prompt_tokens": "2.5", - "completion_tokens": "10", - "base_unit": "1000000" - }, - "gpt-4o-mini-2024-07-18": { - "usage_key": "usage", - "prompt_tokens": "0.15", - "completion_tokens": "0.6", - "base_unit": "1000000" - }, - "gpt-4.1-2025-04-14": { - "usage_key": "usage", - "prompt_tokens": "2", - "completion_tokens": "8", - "base_unit": "1000000" - }, - "gpt-4.1-mini-2025-04-14": { - "usage_key": "usage", - "prompt_tokens": "0.4", - "completion_tokens": "1.6", - "base_unit": "1000000" - }, - "gpt-4.1-nano-2025-04-14": { - "usage_key": "usage", - "prompt_tokens": "0.1", - "completion_tokens": "0.4", - "base_unit": "1000000" - }, - "o4-mini-2025-04-16": { - "usage_key": "usage", - "prompt_tokens": "1.1", - "completion_tokens": "4.4", - "base_unit": "1000000" - }, - "gpt-5-chat-latest": { - "usage_key": "usage", - "prompt_tokens": "1.25", - "completion_tokens": "10", - "base_unit": "1000000", - }, - "gpt-5": { - "usage_key": "usage", - "prompt_tokens": "1.25", - "completion_tokens": "10", - "base_unit": "1000000", - }, - "gpt-5-mini": { - "usage_key": "usage", - "prompt_tokens": "0.25", - "completion_tokens": "2", - "base_unit": "1000000", - }, - "gpt-5-nano": { - "usage_key": "usage", - "prompt_tokens": "0.05", - "completion_tokens": "0.4", - "base_unit": "1000000", - }, - "ernie-4.0-turbo-8k": {}, - } def __init__( self, @@ -189,7 +130,7 @@ def __init__( ): self.model = model - self.provider_object = self.PROVIDER[provider.lower()](credential, model) + self.provider_object = get_provider(provider, credential, model, llm_settings) self.max_correction_attempts = max_correction_attempts self.httppost_retries = httppost_retries @@ -210,78 +151,15 @@ def __init__( self.question_string = "" self.answer_string = "" - def correct_text(self, text: str, batch_mode: bool = True, fake_corrected_text: str = None) -> Tuple: - """ - Analyze typos of text using self.segment_corrector. It also analyzes the difference between the original - text and corrected text. - - Parameters: - text (str): The text to be analyzed for typos. - batch_mode (bool): If specified, enable multithread for typo correction - fake_corrected_text (str, optional): If specified, return input text without correction steps. - - Returns: - A tuple containing the corrected text and a list of differences between the original and corrected text. - """ - if fake_corrected_text is not None: - return fake_corrected_text, strings_diff(text, fake_corrected_text) - - base_url = self.provider_object.base_url - self._try_internet_connection(base_url) - - text_corrected = "" - segments = text_segmentation(text, max_length=100) - - # Typo correction - if batch_mode: - corrector_result_list = self.correct_segment_batch(segments) - else: - corrector_result_list = [self.correct_segment(segment) for segment in segments] - for corrector_result in corrector_result_list: - text_corrected += corrector_result.corrected_text - self.response_history.append(corrector_result.response_json) - - # Find typo and keep correcting - recorrection_history = None - for i in range(self.max_correction_attempts): - # Find typo - text_corrected_previous = text_corrected - text_corrected_revised, typo_indices = find_correction_errors(text, text_corrected) - - # No typo, stop correction - if text_corrected_revised == text_corrected: - break - - # Keep correction - text_corrected = "" - segments_revised = text_segmentation(text_corrected_revised, max_length=20) - if recorrection_history is None: - recorrection_history = [[] for _ in range(len(segments_revised))] - segments_to_recorrect = get_segments_to_recorrect(segments_revised, typo_indices) - # for j in range(len(segments_to_recorrect)): - # if segments_to_recorrect[j]: - # print(f"iter = {i}, segment = {segments_revised[j]} isn't correct => {segments_to_recorrect[j]}, text_corrected_previous = {text_corrected_previous}") - history_for_correction = recorrection_history if i >= self.max_correction_attempts / 3 else [[] for _ in range(len(segments_revised))] - - if batch_mode: - corrector_result_list = self.correct_segment_batch(segments_to_recorrect, history_for_correction) - else: - corrector_result_list = [self.correct_segment(segment, segment_previous) for segment, segment_previous in zip(segments_to_recorrect, history_for_correction)] - - for j in range(len(segments_revised)): - if corrector_result_list[j].corrected_text: - text_corrected += corrector_result_list[j].corrected_text - if corrector_result_list[j].corrected_text not in recorrection_history[j] and\ - len(corrector_result_list[j].corrected_text) < len(text) * 2: - recorrection_history[j].append(corrector_result_list[j].corrected_text) - self.response_history.append(corrector_result_list[j].response_json) - else: - text_corrected += segments_revised[j] + config_key = f"{model}&{provider}" + model_entry = self._load_model_config(config_key) + self._cost_calculator = CostCalculator(model_entry) - text_corrected = review_correction_errors(text, text_corrected) - diff = strings_diff(text, text_corrected) - - return text_corrected, diff + def _load_model_config(self, config_key: str) -> dict: + config_path = Path(__file__).parent.parent / "setting" / "llm_models.json" + with open(config_path, "r", encoding="utf8") as f: + config = json.load(f) + return config.get(config_key, {}) def correct_segment(self, input_text: str, previous_results: list = [], fake_operation: bool = False) -> str: if fake_operation or not self._has_target_language(input_text): @@ -310,106 +188,11 @@ def correct_segment(self, input_text: str, previous_results: list = [], fake_ope return corrector_result - def correct_segment_batch(self, input_text_list: list, previous_results_list: list = []) -> list: - assert isinstance(input_text_list, list) - - if not previous_results_list: - previous_results_list = [[] for _ in range(len(input_text_list))] - - if not input_text_list: - return input_text_list - - output_text_list = [None] * len(input_text_list) - - futures = [] - with ThreadPoolExecutor(max_workers=20) as executor: - future_to_index = { - executor.submit( - self._correct_segment_task, - input_text_list[index], - previous_results_list[index], - output_text_list, - index, - ): index for index in range(len(input_text_list)) - } - try: - for future in as_completed(future_to_index): - future.result() - except Exception as e: - executor.shutdown(wait=False) - raise e - - return output_text_list - def get_total_usage(self) -> Dict: - """ - Get the total usage of OpenAI model (in tokens) - - Returns: - The total usage of OpenAI model (in tokens) - """ - usage_key = self.MODEL[self.model].get("usage_key") - total_usage = defaultdict(int) - if not usage_key: - return total_usage - - for response in self.response_history: - if isinstance(response, dict) and usage_key in response: - for usage_type in set(self.MODEL[self.model].keys()): - if usage_type == "base_unit" or usage_type == "usage_key": - continue - try: - total_usage[usage_type] += response[usage_key][usage_type] - except KeyError: - pass - - return total_usage - - def get_total_cost(self) -> int: - """ - Get the total cost of provider model (in USD) - - Returns: - The total cost of provider model (in USDs) - """ - price_info = self.MODEL[self.model] - cost = Decimal("0") - usages = self.get_total_usage() - for key, value in usages.items(): - cost += Decimal(price_info[key]) * Decimal(str(value)) / Decimal(price_info["base_unit"]) - - return cost - - def _correct_segment_task( - self, - input_text: str, - previous_results: list, - output_text_list: list, - index: int, - ) -> str: - text = self.correct_segment(input_text, previous_results) - output_text_list[index] = text - - def _try_internet_connection(self, url, timeout=10, try_count=1): - for r in range(try_count): - try: - response = requests.get(url, timeout=timeout) - return - except Exception as e: - request_error = type(e).__name__ - log.error( - "Try = {try_index}, {request_error}, an error occurred when sending request: {e}".format( - try_index=(r + 1), - request_error=request_error, - e=e, - ) - ) + return self._cost_calculator.get_total_usage(self.response_history) - raise Exception( - _("HTTP request error ({request_error}). Please check the network setting.").format( - request_error=request_error - ) - ) + def get_total_cost(self) -> Decimal: + return self._cost_calculator.get_total_cost(self.response_history) def _get_input_info(self, input_text): input_info = { @@ -478,51 +261,12 @@ def _chat_completion(self, input: List, response_text_history: List, input_info: system_template = system_template.replace("\\n", "\n") system_template = self._system_add_guidance(system_template, input_info) - request_data = self.provider_object.get_request_data(messages, system_template) - api_url = self.provider_object.url - headers = self.provider_object.get_headers() - - return self._post_with_retries(request_data, api_url, headers) - - def _post_with_retries(self, request_data, api_url, headers): - backoff = self.backoff - response_json = None - timeout0 = self.provider_object.timeout0 - timeout_max = self.provider_object.timeout_max - for r in range(self.httppost_retries): - timeout = min(timeout0 * (r + 1), timeout_max) if self.provider_object.name != "ollama" else 300 - request_error = None - response = None - try: - response = requests.post( - api_url, - headers=headers, - json=request_data, - timeout=timeout, - ) - break - except Exception as e: - request_error = type(e).__name__ - log.error( - "Try = {try_index}, {request_error}, an error occurred when sending {provider} request: {e}".format( - try_index=(r + 1), - request_error=request_error, - provider=self.provider_object.name, - e=e - ) - ) - backoff = min(backoff * (1 + random.random()), 3) - time.sleep(backoff) - - if response is None: - raise Exception( - _("HTTP request error ({request_error}). Please check the network setting.").format( - request_error=request_error - ) - ) - - self.provider_object.handle_errors(response) - return response.json() + return self.provider_object.chat_completion( + messages, + system_template, + retries=self.httppost_retries, + backoff=self.backoff + ) def _parse_response(self, response: str) -> str: # ollama: sentence = response["message"]["content"] diff --git a/addon/globalPlugins/WordBridge/lib/utils.py b/addon/globalPlugins/WordBridge/lib/utils.py index 9f8e011..a5716ef 100644 --- a/addon/globalPlugins/WordBridge/lib/utils.py +++ b/addon/globalPlugins/WordBridge/lib/utils.py @@ -1,7 +1,8 @@ import random from difflib import SequenceMatcher -from typing import Dict, List +from typing import Dict, List, Callable, Iterable, Any +from concurrent.futures import ThreadPoolExecutor, as_completed from chinese_converter import to_simplified, to_traditional from .chinese_dictionary import string_to_pinyin, pinyin_to_string from hanzidentifier import identify @@ -17,7 +18,7 @@ # Characters used for text segmentation SEPERATOR = "﹐,,.。﹒.。!ǃⵑ︕!;;︔﹔;?︖﹖?⋯ " -PUNCTUATION = "﹐,,.。﹒.。:։׃∶˸︓﹕:!ǃⵑ︕!;;︔﹔;?︖﹖?⋯ \n\r\t\"\'#$%&()*+-/<=>@[\\]^_`{|}~" +PUNCTUATION = "﹐,,.。﹒.。:׃∶˸︓﹕:!ǃⵑ︕!;;︔﹔;?︖﹖?⋯ \n\r\t\"\'#$%&()*+-/<=>@[\\]^_`{|}~" ZH_UNICODE_INTERVALS = [ ["\u4e00", "\u9fff"], @@ -338,3 +339,33 @@ def strings_diff(string_before: str, string_after: str) -> Dict: diff.append(operation_dict) return diff + + +def parallel_map( + func: Callable, + iterable: Iterable, + max_workers: int = 20, + iterable_kwargs: Iterable[Dict] = None, + *args, + **kwargs +) -> List: + """ + Execute a function over an iterable in parallel using a thread pool. + Returns results in the same order as the input iterable. + """ + results = [None] * len(iterable) + with ThreadPoolExecutor(max_workers=max_workers) as executor: + if iterable_kwargs is None: + future_to_index = { + executor.submit(func, item, *args, **kwargs): i + for i, item in enumerate(iterable) + } + else: + future_to_index = { + executor.submit(func, item, *args, **{**kwargs, **ik}): i + for i, (item, ik) in enumerate(zip(iterable, iterable_kwargs)) + } + for future in as_completed(future_to_index): + index = future_to_index[future] + results[index] = future.result() + return results diff --git a/addon/globalPlugins/WordBridge/corrector_config/Anthropic-00002-claude-3-5-haiku.json b/addon/globalPlugins/WordBridge/setting/corrector/Anthropic-00001-claude-4-sonnet.json similarity index 87% rename from addon/globalPlugins/WordBridge/corrector_config/Anthropic-00002-claude-3-5-haiku.json rename to addon/globalPlugins/WordBridge/setting/corrector/Anthropic-00001-claude-4-sonnet.json index 6cd6ec4..a4945e5 100644 --- a/addon/globalPlugins/WordBridge/corrector_config/Anthropic-00002-claude-3-5-haiku.json +++ b/addon/globalPlugins/WordBridge/setting/corrector/Anthropic-00001-claude-4-sonnet.json @@ -1,6 +1,6 @@ { "model": { - "model_name": "claude-3-5-haiku-20241022", + "model_name": "claude-sonnet-4-20250514", "provider": "Anthropic", "llm_access_method": "personal_api_key", "require_secret_key": false, diff --git a/addon/globalPlugins/WordBridge/setting/corrector/Anthropic-00002-claude-4-5-haiku.json b/addon/globalPlugins/WordBridge/setting/corrector/Anthropic-00002-claude-4-5-haiku.json new file mode 100644 index 0000000..94cd346 --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/corrector/Anthropic-00002-claude-4-5-haiku.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "claude-haiku-4-5-20251001", + "provider": "Anthropic", + "llm_access_method": "personal_api_key", + "require_secret_key": false, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": true, + "no_explanation": false + } + } +} diff --git a/addon/globalPlugins/WordBridge/setting/corrector/Baidu-00001-deepseek-v3.json b/addon/globalPlugins/WordBridge/setting/corrector/Baidu-00001-deepseek-v3.json new file mode 100644 index 0000000..d7f2bbe --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/corrector/Baidu-00001-deepseek-v3.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "deepseek-v3", + "provider": "Baidu", + "llm_access_method": "personal_api_key", + "require_secret_key": true, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": false, + "no_explanation": true + } + } +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/corrector/Baidu-00002-ernie-4.0-turbo-8k.json b/addon/globalPlugins/WordBridge/setting/corrector/Baidu-00002-ernie-4.0-turbo-8k.json new file mode 100644 index 0000000..941864a --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/corrector/Baidu-00002-ernie-4.0-turbo-8k.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "ernie-4.0-turbo-8k", + "provider": "Baidu", + "llm_access_method": "personal_api_key", + "require_secret_key": true, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": false, + "no_explanation": true + } + } +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/corrector/Coseeing-00001-deepseek-chat-v3-0324.json b/addon/globalPlugins/WordBridge/setting/corrector/Coseeing-00001-deepseek-chat-v3-0324.json new file mode 100644 index 0000000..f2ea380 --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/corrector/Coseeing-00001-deepseek-chat-v3-0324.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "deepseek/deepseek-chat-v3-0324:free", + "provider": "OpenRouter", + "llm_access_method": "coseeing_relay", + "require_secret_key": true, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": false, + "no_explanation": true + } + } +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/corrector/Coseeing-00002-deepseek-chat.json b/addon/globalPlugins/WordBridge/setting/corrector/Coseeing-00002-deepseek-chat.json new file mode 100644 index 0000000..9231b81 --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/corrector/Coseeing-00002-deepseek-chat.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "deepseek/deepseek-chat:free", + "provider": "OpenRouter", + "llm_access_method": "coseeing_relay", + "require_secret_key": true, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": false, + "no_explanation": true + } + } +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/corrector/Coseeing-00003-gpt-4.1.json b/addon/globalPlugins/WordBridge/setting/corrector/Coseeing-00003-gpt-4.1.json new file mode 100644 index 0000000..aee94c2 --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/corrector/Coseeing-00003-gpt-4.1.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "gpt-4.1-2025-04-14", + "provider": "OpenAI", + "llm_access_method": "coseeing_relay", + "require_secret_key": false, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": true, + "no_explanation": false + } + } +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/corrector/Coseeing-00004-gpt-4.1-mini.json b/addon/globalPlugins/WordBridge/setting/corrector/Coseeing-00004-gpt-4.1-mini.json new file mode 100644 index 0000000..1b7cefe --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/corrector/Coseeing-00004-gpt-4.1-mini.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "gpt-4.1-mini-2025-04-14", + "provider": "OpenAI", + "llm_access_method": "coseeing_relay", + "require_secret_key": false, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": true, + "no_explanation": false + } + } +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/corrector_config/Google-00001-gemini-2.5-flash-preview.json b/addon/globalPlugins/WordBridge/setting/corrector/Coseeing-00005-gemini-2.5-flash-preview.json similarity index 87% rename from addon/globalPlugins/WordBridge/corrector_config/Google-00001-gemini-2.5-flash-preview.json rename to addon/globalPlugins/WordBridge/setting/corrector/Coseeing-00005-gemini-2.5-flash-preview.json index b6eaec1..e22d3f1 100644 --- a/addon/globalPlugins/WordBridge/corrector_config/Google-00001-gemini-2.5-flash-preview.json +++ b/addon/globalPlugins/WordBridge/setting/corrector/Coseeing-00005-gemini-2.5-flash-preview.json @@ -2,7 +2,7 @@ "model": { "model_name": "gemini-2.5-flash-preview-05-20", "provider": "Google", - "llm_access_method": "personal_api_key", + "llm_access_method": "coseeing_relay", "require_secret_key": false, "template_name": { "standard": "Standard_v1.json", diff --git a/addon/globalPlugins/WordBridge/setting/corrector/DeepSeek-00001-deepseek-chat.json b/addon/globalPlugins/WordBridge/setting/corrector/DeepSeek-00001-deepseek-chat.json new file mode 100644 index 0000000..c5c0aec --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/corrector/DeepSeek-00001-deepseek-chat.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "deepseek-chat", + "provider": "DeepSeek", + "llm_access_method": "personal_api_key", + "require_secret_key": true, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": false, + "no_explanation": true + } + } +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/corrector/Google-00001-gemini-2.5-flash.json b/addon/globalPlugins/WordBridge/setting/corrector/Google-00001-gemini-2.5-flash.json new file mode 100644 index 0000000..66e9085 --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/corrector/Google-00001-gemini-2.5-flash.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "gemini-2.5-flash", + "provider": "Google", + "llm_access_method": "personal_api_key", + "require_secret_key": false, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": true, + "no_explanation": false + } + } +} diff --git a/addon/globalPlugins/WordBridge/setting/corrector/Google-00002-gemini-2.5-pro.json b/addon/globalPlugins/WordBridge/setting/corrector/Google-00002-gemini-2.5-pro.json new file mode 100644 index 0000000..fa03828 --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/corrector/Google-00002-gemini-2.5-pro.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "gemini-2.5-pro", + "provider": "Google", + "llm_access_method": "personal_api_key", + "require_secret_key": false, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": true, + "no_explanation": false + } + } +} diff --git a/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00001-gpt-4.1.json b/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00001-gpt-4.1.json new file mode 100644 index 0000000..e83ec31 --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00001-gpt-4.1.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "gpt-4.1-2025-04-14", + "provider": "OpenAI", + "llm_access_method": "personal_api_key", + "require_secret_key": false, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": true, + "no_explanation": false + } + } +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00002-gpt-4.1-mini.json b/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00002-gpt-4.1-mini.json new file mode 100644 index 0000000..aa393fd --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00002-gpt-4.1-mini.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "gpt-4.1-mini-2025-04-14", + "provider": "OpenAI", + "llm_access_method": "personal_api_key", + "require_secret_key": false, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": true, + "no_explanation": false + } + } +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00003-gpt-4.1-nano.json b/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00003-gpt-4.1-nano.json new file mode 100644 index 0000000..2cf6d3f --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00003-gpt-4.1-nano.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "gpt-4.1-nano-2025-04-14", + "provider": "OpenAI", + "llm_access_method": "personal_api_key", + "require_secret_key": false, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": true, + "no_explanation": false + } + } +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00004-gpt-4o.json b/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00004-gpt-4o.json new file mode 100644 index 0000000..e546e32 --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00004-gpt-4o.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "gpt-4o-2024-08-06", + "provider": "OpenAI", + "llm_access_method": "personal_api_key", + "require_secret_key": false, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": true, + "no_explanation": false + } + } +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00005-gpt-4o-mini.json b/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00005-gpt-4o-mini.json new file mode 100644 index 0000000..3e8e5b1 --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00005-gpt-4o-mini.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "gpt-4o-mini-2024-07-18", + "provider": "OpenAI", + "llm_access_method": "personal_api_key", + "require_secret_key": false, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": true, + "no_explanation": false + } + } +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00006-o4-mini.json b/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00006-o4-mini.json new file mode 100644 index 0000000..fb8c500 --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00006-o4-mini.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "o4-mini-2025-04-16", + "provider": "OpenAI", + "llm_access_method": "personal_api_key", + "require_secret_key": false, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": true, + "no_explanation": false + } + } +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00007-gpt-5-chat-latest.json b/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00007-gpt-5-chat-latest.json new file mode 100644 index 0000000..2c8cd6a --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00007-gpt-5-chat-latest.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "gpt-5-chat-latest", + "provider": "OpenAI", + "llm_access_method": "personal_api_key", + "require_secret_key": false, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": true, + "no_explanation": false + } + } +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00008-gpt-5.json b/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00008-gpt-5.json new file mode 100644 index 0000000..5928763 --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00008-gpt-5.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "gpt-5", + "provider": "OpenAI", + "llm_access_method": "personal_api_key", + "require_secret_key": false, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": true, + "no_explanation": false + } + } +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00009-gpt-5-mini.json b/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00009-gpt-5-mini.json new file mode 100644 index 0000000..1b9eddb --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00009-gpt-5-mini.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "gpt-5-mini", + "provider": "OpenAI", + "llm_access_method": "personal_api_key", + "require_secret_key": false, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": true, + "no_explanation": false + } + } +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00010-gpt-5-nano.json b/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00010-gpt-5-nano.json new file mode 100644 index 0000000..673f933 --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/corrector/OpenAI-00010-gpt-5-nano.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "gpt-5-nano", + "provider": "OpenAI", + "llm_access_method": "personal_api_key", + "require_secret_key": false, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": true, + "no_explanation": false + } + } +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/corrector/OpenRouter-00001-deepseek-chat-v3-0324.json b/addon/globalPlugins/WordBridge/setting/corrector/OpenRouter-00001-deepseek-chat-v3-0324.json new file mode 100644 index 0000000..62e8e27 --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/corrector/OpenRouter-00001-deepseek-chat-v3-0324.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "deepseek/deepseek-chat-v3-0324:free", + "provider": "OpenRouter", + "llm_access_method": "personal_api_key", + "require_secret_key": true, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": false, + "no_explanation": true + } + } +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/corrector/OpenRouter-00002-deepseek-chat.json b/addon/globalPlugins/WordBridge/setting/corrector/OpenRouter-00002-deepseek-chat.json new file mode 100644 index 0000000..f6c568c --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/corrector/OpenRouter-00002-deepseek-chat.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "deepseek/deepseek-chat:free", + "provider": "OpenRouter", + "llm_access_method": "personal_api_key", + "require_secret_key": true, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": false, + "no_explanation": true + } + } +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/corrector/OpenRouter-00003-deepseek-chat-v3-0528.json b/addon/globalPlugins/WordBridge/setting/corrector/OpenRouter-00003-deepseek-chat-v3-0528.json new file mode 100644 index 0000000..d2ddc6c --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/corrector/OpenRouter-00003-deepseek-chat-v3-0528.json @@ -0,0 +1,16 @@ +{ + "model": { + "model_name": "deepseek/deepseek-r1-0528:free", + "provider": "OpenRouter", + "llm_access_method": "personal_api_key", + "require_secret_key": true, + "template_name": { + "standard": "Standard_v1.json", + "lite": "Lite_v1.json" + }, + "optional_guidance_enable": { + "keep_non_chinese_char": false, + "no_explanation": true + } + } +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/llm_clients/anthropic.json b/addon/globalPlugins/WordBridge/setting/llm_clients/anthropic.json new file mode 100644 index 0000000..78c68ee --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/llm_clients/anthropic.json @@ -0,0 +1,16 @@ +{ + "name": "anthropic", + "url": "https://api.anthropic.com/v1/messages", + "setting": { + "max_tokens": 4096, + "temperature": 0.0 + }, + "timeout0": 10, + "timeout_max": 20, + "usage_schemas": { + "anthropic_standard": { + "usage_key": "usage", + "fields": ["input_tokens", "cache_creation_input_tokens", "cache_read_input_tokens", "output_tokens"] + } + } +} diff --git a/addon/globalPlugins/WordBridge/setting/llm_clients/baidu.json b/addon/globalPlugins/WordBridge/setting/llm_clients/baidu.json new file mode 100644 index 0000000..d9874bc --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/llm_clients/baidu.json @@ -0,0 +1,13 @@ +{ + "name": "baidu", + "url": "https://qianfan.baidubce.com/v2/chat/completions", + "setting": { + "max_completion_tokens": 4096, + "temperature": 0.0, + "top_p": 0.0, + "stop": ["\n", "&"] + }, + "timeout0": 30, + "timeout_max": 60, + "usage_schemas": {} +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/llm_clients/deepseek.json b/addon/globalPlugins/WordBridge/setting/llm_clients/deepseek.json new file mode 100644 index 0000000..8b24f2f --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/llm_clients/deepseek.json @@ -0,0 +1,18 @@ +{ + "name": "deepseek", + "url": "https://api.deepseek.com/chat/completions", + "setting": { + "max_tokens": 4096, + "temperature": 0.0, + "top_p": 0.0, + "stop": [ " =>" ] + }, + "timeout0": 30, + "timeout_max": 60, + "usage_schemas": { + "deepseek_standard": { + "usage_key": "usage", + "fields": ["prompt_cache_hit_tokens", "prompt_cache_miss_tokens", "completion_tokens"] + } + } +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/llm_clients/google.json b/addon/globalPlugins/WordBridge/setting/llm_clients/google.json new file mode 100644 index 0000000..2d51e10 --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/llm_clients/google.json @@ -0,0 +1,19 @@ +{ + "name": "google", + "url": "https://generativelanguage.googleapis.com/v1beta", + "setting": { + "max_completion_tokens": 4096, + "seed": 0, + "temperature": 0.0, + "top_p": 0.0, + "stop": [" =>"] + }, + "timeout0": 10, + "timeout_max": 20, + "usage_schemas": { + "google_standard": { + "usage_key": "usageMetadata", + "fields": ["promptTokenCount", "candidatesTokenCount"] + } + } +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/llm_clients/ollama.json b/addon/globalPlugins/WordBridge/setting/llm_clients/ollama.json new file mode 100644 index 0000000..e94843d --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/llm_clients/ollama.json @@ -0,0 +1,10 @@ +{ + "name": "openai", + "url": "https://api.openai.com/v1/chat/completions", + "setting": { + "max_tokens": 4096, + "stop": [" =>"] + }, + "timeout0": 10, + "timeout_max": 20 +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/llm_clients/openai.json b/addon/globalPlugins/WordBridge/setting/llm_clients/openai.json new file mode 100644 index 0000000..c37bfdf --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/llm_clients/openai.json @@ -0,0 +1,19 @@ +{ + "name": "openai", + "url": "https://api.openai.com/v1/chat/completions", + "setting": { + "max_completion_tokens": 4096, + "seed": 0, + "temperature": 0.0, + "top_p": 0.0, + "stop": [" =>"] + }, + "timeout0": 10, + "timeout_max": 20, + "usage_schemas": { + "openai_standard": { + "usage_key": "usage", + "fields": ["prompt_tokens", "completion_tokens"] + } + } +} \ No newline at end of file diff --git a/addon/globalPlugins/WordBridge/setting/llm_clients/openrouter.json b/addon/globalPlugins/WordBridge/setting/llm_clients/openrouter.json new file mode 100644 index 0000000..0cd0504 --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/llm_clients/openrouter.json @@ -0,0 +1,14 @@ +{ + "name": "openrouter", + "url": "https://openrouter.ai/api/v1/chat/completions", + "setting": { + "max_tokens": 4096, + "temperature": 0.0, + "top_p": 0.0, + "stop": [ " =>" ] + }, + "timeout0": 10, + "timeout_max": 20, + "usage_schemas": {} +} + diff --git a/addon/globalPlugins/WordBridge/setting/llm_models.json b/addon/globalPlugins/WordBridge/setting/llm_models.json new file mode 100644 index 0000000..96b55b2 --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/llm_models.json @@ -0,0 +1,219 @@ +{ + "claude-haiku-4-5-20251001&anthropic": { + "model": "claude-haiku-4-5-20251001", + "provider": "anthropic", + "pricing": { + "input_tokens": 1, + "cache_creation_input_tokens": 1.25, + "cache_read_input_tokens": 0.10, + "output_tokens": 5, + "base_unit": 1000000 + }, + "usage_schema": "anthropic_standard", + "usage_key": "usage" + }, + "claude-sonnet-4-20250514&anthropic": { + "model": "claude-sonnet-4-20250514", + "provider": "anthropic", + "pricing": { + "input_tokens": 3, + "cache_creation_input_tokens": 3.75, + "cache_read_input_tokens": 0.3, + "output_tokens": 15, + "base_unit": 1000000 + }, + "usage_schema": "anthropic_standard", + "usage_key": "usage" + }, + "deepseek-v3&deepseek": { + "model": "deepseek-v3", + "provider": "deepseek", + "pricing": {}, + "usage_schema": null + }, + "deepseek-chat&deepseek": { + "model": "deepseek-chat", + "provider": "deepseek", + "pricing": { + "prompt_cache_hit_tokens": 0.07, + "prompt_cache_miss_tokens": 0.27, + "completion_tokens": 1.1, + "base_unit": 1000000 + }, + "usage_schema": "deepseek_standard" + }, + "deepseek-reasoner&deepseek": { + "model": "deepseek-reasoner", + "provider": "deepseek", + "pricing": { + "prompt_cache_hit_tokens": 0.14, + "prompt_cache_miss_tokens": 0.55, + "completion_tokens": 2.19, + "base_unit": 1000000 + }, + "usage_schema": "deepseek_standard" + }, + "deepseek/deepseek-chat:free&openrouter": { + "model": "deepseek/deepseek-chat:free", + "provider": "openrouter", + "pricing": {}, + "usage_schema": null + }, + "deepseek/deepseek-chat-v3-0324:free&openrouter": { + "model": "deepseek/deepseek-chat-v3-0324:free", + "provider": "openrouter", + "pricing": {}, + "usage_schema": null + }, + "deepseek/deepseek-r1-0528:free&openrouter": { + "model": "deepseek/deepseek-r1-0528:free", + "provider": "openrouter", + "pricing": {}, + "usage_schema": null + }, + "deepseek/deepseek-r1-0528-qwen3-8b:free&openrouter": { + "model": "deepseek/deepseek-r1-0528-qwen3-8b:free", + "provider": "openrouter", + "pricing": {}, + "usage_schema": null + }, + "gemini-2.5-flash&google": { + "model": "gemini-2.5-flash", + "provider": "google", + "pricing": { + "promptTokenCount": 0.3, + "candidatesTokenCount": 2.5, + "base_unit": 1000000 + }, + "usage_schema": "google_standard", + "usage_key": "usageMetadata" + }, + "gemini-2.5-pro&google": { + "model": "gemini-2.5-pro", + "provider": "google", + "pricing": { + "promptTokenCount": 1.25, + "candidatesTokenCount": 10, + "base_unit": 1000000 + }, + "usage_schema": "google_standard", + "usage_key": "usageMetadata" + }, + "gpt-4o-2024-08-06&OpenAI": { + "model": "gpt-4o-2024-08-06", + "provider": "openai", + "pricing": { + "prompt_tokens": 2.5, + "completion_tokens": 10, + "base_unit": 1000000 + }, + "usage_schema": "openai_standard", + "usage_key": "usage" + }, + "gpt-4o-mini-2024-07-18&OpenAI": { + "model": "gpt-4o-mini-2024-07-18", + "provider": "openai", + "pricing": { + "prompt_tokens": 0.15, + "completion_tokens": 0.6, + "base_unit": 1000000 + }, + "usage_schema": "openai_standard", + "usage_key": "usage" + }, + "gpt-4.1-2025-04-14&OpenAI": { + "model": "gpt-4.1-2025-04-14", + "provider": "openai", + "pricing": { + "prompt_tokens": 2, + "completion_tokens": 8, + "base_unit": 1000000 + }, + "usage_schema": "openai_standard", + "usage_key": "usage" + }, + "gpt-4.1-mini-2025-04-14&OpenAI": { + "model": "gpt-4.1-mini-2025-04-14", + "provider": "openai", + "pricing": { + "prompt_tokens": 0.4, + "completion_tokens": 1.6, + "base_unit": 1000000 + }, + "usage_schema": "openai_standard", + "usage_key": "usage" + }, + "gpt-4.1-nano-2025-04-14&OpenAI": { + "model": "gpt-4.1-nano-2025-04-14", + "provider": "openai", + "pricing": { + "prompt_tokens": 0.1, + "completion_tokens": 0.4, + "base_unit": 1000000 + }, + "usage_schema": "openai_standard", + "usage_key": "usage" + }, + "o4-mini-2025-04-16&OpenAI": { + "model": "o4-mini-2025-04-16", + "provider": "openai", + "pricing": { + "prompt_tokens": 1.1, + "completion_tokens": 4.4, + "base_unit": 1000000 + }, + "usage_schema": "openai_standard", + "usage_key": "usage" + }, + "gpt-5-chat-latest&OpenAI": { + "model": "gpt-5-chat-latest", + "provider": "openai", + "pricing": { + "prompt_tokens": 1.25, + "completion_tokens": 10, + "base_unit": 1000000 + }, + "usage_schema": "openai_standard", + "usage_key": "usage" + }, + "gpt-5&OpenAI": { + "model": "gpt-5", + "provider": "openai", + "pricing": { + "prompt_tokens": 1.25, + "completion_tokens": 10, + "base_unit": 1000000 + }, + "usage_schema": "openai_standard", + "usage_key": "usage" + }, + "gpt-5-mini&OpenAI": { + "model": "gpt-5-mini", + "provider": "openai", + "pricing": { + "prompt_tokens": 0.25, + "completion_tokens": 2, + "base_unit": 1000000 + }, + "usage_schema": "openai_standard", + "usage_key": "usage" + }, + "gpt-5-nano&OpenAI": { + "model": "gpt-5-nano", + "provider": "openai", + "pricing": { + "prompt_tokens": 0.05, + "completion_tokens": 0.4, + "base_unit": 1000000 + }, + "usage_schema": "openai_standard", + "usage_key": "usage" + }, + "ernie-4.0-turbo-8k&baidu": { + "model": "ernie-4.0-turbo-8k", + "provider": "baidu", + "pricing": {}, + "usage_schema": null, + "usage_key": "usage" + } +} diff --git a/addon/globalPlugins/WordBridge/setting/provider/anthropic.json b/addon/globalPlugins/WordBridge/setting/provider/anthropic.json index 9707ffc..221f26c 100644 --- a/addon/globalPlugins/WordBridge/setting/provider/anthropic.json +++ b/addon/globalPlugins/WordBridge/setting/provider/anthropic.json @@ -3,9 +3,8 @@ "url": "https://api.anthropic.com/v1/messages", "setting": { "max_tokens": 4096, - "temperature": 0.0, - "top_p": 0.0 + "temperature": 0.0 }, "timeout0": 10, "timeout_max": 20 -} \ No newline at end of file +} diff --git a/addon/globalPlugins/WordBridge/setting/templates/Lite_v1.json b/addon/globalPlugins/WordBridge/setting/templates/Lite_v1.json new file mode 100644 index 0000000..6641609 --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/templates/Lite_v1.json @@ -0,0 +1,50 @@ +{ + "zh_traditional": { + "system": "改錯字(避免加減字,或取代原讀音的字):", + "system_tag": "請修正[[]]中的錯字並輸出正確文字(避免加減字,或取代原讀音的字):", + "comment": "'{{response_previous}}'是錯誤答案,請修正重新輸出文字", + "message": [ + {"role": "user", "content": "{{QUESTION}}天器真好 => "}, + {"role": "assistant", "content": "{{ANSWER}}天氣真好"}, + {"role": "user", "content": "{{QUESTION}}出去玩 => "}, + {"role": "assistant", "content": "{{ANSWER}}出去玩"}, + {"role": "user", "content": "{{QUESTION}}{{text_input}}=> "} + ], + "message_tag": [ + {"role": "user", "content": "{{QUESTION}}天[[器]]真好 => "}, + {"role": "assistant", "content": "{{ANSWER}}天氣真好"}, + {"role": "user", "content": "{{QUESTION}}出去玩 => "}, + {"role": "assistant", "content": "{{ANSWER}}出去玩"}, + {"role": "user", "content": "{{QUESTION}}{{text_input}}=> "} + ], + "optional_guidance": { + "keep_non_chinese_char": "勿將非漢字用漢字取代", + "no_explanation": "輸出答案即可,後面無須解釋", + "customized_words": "參考詞彙: " + } + }, + "zh_simplified": { + "system": "改错字(避免加减字,或取代原读音的字):", + "system_tag": "请修正[[]]中的错字并输出正确文字(避免加减字,或取代原读音的字):", + "comment": "'{{response_previous}}'是错误答案,请修正重新输出文字", + "message": [ + {"role": "user", "content": "{{QUESTION}}天器真好 => "}, + {"role": "assistant", "content": "{{ANSWER}}天气真好"}, + {"role": "user", "content": "{{QUESTION}}出去玩 => "}, + {"role": "assistant", "content": "{{ANSWER}}出去玩"}, + {"role": "user", "content": "{{QUESTION}}{{text_input}} => "} + ], + "message_tag": [ + {"role": "user", "content": "{{QUESTION}}天[[器]]真好 => "}, + {"role": "assistant", "content": "{{ANSWER}}天气真好"}, + {"role": "user", "content": "{{QUESTION}}出去玩 => "}, + {"role": "assistant", "content": "{{ANSWER}}出去玩"}, + {"role": "user", "content": "{{QUESTION}}{{text_input}} => "} + ], + "optional_guidance": { + "keep_non_chinese_char": "勿将非汉字用汉字取代", + "no_explanation": "输出答案即可,后面无须解释", + "customized_words": "参考词汇: " + } + } +} diff --git a/addon/globalPlugins/WordBridge/setting/templates/Standard_v1.json b/addon/globalPlugins/WordBridge/setting/templates/Standard_v1.json new file mode 100644 index 0000000..9a8433b --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/templates/Standard_v1.json @@ -0,0 +1,42 @@ +{ + "zh_traditional": { + "system": "輸入為文字與其正確拼音,請修正錯字並輸出正確文字:\n(文字&拼音) => 文字", + "system_tag": "輸入為文字與其正確拼音,請修正[[]]中的錯字並輸出正確文字:\n(文字&拼音) => 文字", + "comment": "'{{response_previous}}'是錯誤答案,請修正重新輸出文字", + "message": [ + {"role": "user", "content": "{{QUESTION}}今天天器真好&jin1 tian1 tian1 qi4 zhen1 hao3 => "}, + {"role": "assistant", "content": "{{ANSWER}}今天天氣真好"}, + {"role": "user", "content": "{{QUESTION}}{{text_input}}&{{phone_input}} => "} + ], + "message_tag": [ + {"role": "user", "content": "{{QUESTION}}今天天[[器]]真好&jin1 tian1 tian1 [[qi4]] zhen1 hao3 => "}, + {"role": "assistant", "content": "{{ANSWER}}今天天氣真好"}, + {"role": "user", "content": "{{QUESTION}}{{text_input}}&{{phone_input}} => "} + ], + "optional_guidance": { + "keep_non_chinese_char": "勿將非漢字用漢字取代", + "no_explanation": "輸出答案即可,後面無須解釋", + "customized_words": "參考詞彙: " + } + }, + "zh_simplified": { + "system": "输入为文字与其正确拼音,请修正错字并输出正确文字:\n(文字&拼音) => 文字", + "system_tag": "输入为文字与其正确拼音,请修正[[]]中的错字并输出正确文字:\n(文字&拼音) => 文字", + "comment": "'{{response_previous}}'是错误答案,请修正重新输出文字", + "message": [ + {"role": "user", "content": "{{QUESTION}}今天天器真好&jin1 tian1 tian1 qi4 zhen1 hao3 => "}, + {"role": "assistant", "content": "{{ANSWER}}今天天气真好"}, + {"role": "user", "content": "{{QUESTION}}{{text_input}}&{{phone_input}} => "} + ], + "message_tag": [ + {"role": "user", "content": "{{QUESTION}}今天天[[器]]真好&jin1 tian1 tian1 [[qi4]] zhen1 hao3 => "}, + {"role": "assistant", "content": "{{ANSWER}}今天天气真好"}, + {"role": "user", "content": "{{QUESTION}}{{text_input}}&{{phone_input}} => "} + ], + "optional_guidance": { + "keep_non_chinese_char": "勿将非汉字用汉字取代", + "no_explanation": "输出答案即可,后面无须解释", + "customized_words": "参考词汇: " + } + } +} diff --git a/addon/globalPlugins/WordBridge/setting/templates/Standard_v2.json b/addon/globalPlugins/WordBridge/setting/templates/Standard_v2.json new file mode 100644 index 0000000..ee7146d --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/templates/Standard_v2.json @@ -0,0 +1,34 @@ +{ + "zh_traditional": { + "system": "輸入為文字與其正確拼音,請修正錯字並輸出正確文字:\n(文字&拼音) => 文字", + "comment": "'{{response_previous}}'是錯誤答案,請修正重新輸出文字", + "message": [ + {"role": "user", "content": "{{QUESTION}}天器真好&tian1 qi4 zhen1 hao3 => "}, + {"role": "assistant", "content": "{{ANSWER}}天氣真好"}, + {"role": "user", "content": "{{QUESTION}}出去玩&chu1 qv4 wan2 => "}, + {"role": "assistant", "content": "{{ANSWER}}出去玩"}, + {"role": "user", "content": "{{QUESTION}}{{text_input}}&{{phone_input}} => "} + ], + "optional_guidance": { + "keep_non_chinese_char": "勿將非漢字用漢字取代", + "no_explanation": "輸出答案即可,後面無須解釋", + "customized_words": "參考詞彙: " + } + }, + "zh_simplified": { + "system": "输入为文字与其正确拼音,请修正错字并输出正确文字:\n(文字&拼音) => 文字", + "comment": "'{{response_previous}}'是错误答案,请修正重新输出文字", + "message": [ + {"role": "user", "content": "{{QUESTION}}今天天器&jin1 tian1 tian1 qi4 => "}, + {"role": "assistant", "content": "{{ANSWER}}今天天气"}, + {"role": "user", "content": "{{QUESTION}}真好&zhen1 hao3 => "}, + {"role": "assistant", "content": "{{ANSWER}}真好"}, + {"role": "user", "content": "{{QUESTION}}{{text_input}}&{{phone_input}} => "} + ], + "optional_guidance": { + "keep_non_chinese_char": "勿将非汉字用汉字取代", + "no_explanation": "输出答案即可,后面无须解释", + "customized_words": "参考词汇: " + } + } +} diff --git a/addon/globalPlugins/WordBridge/setting/templates/Standard_v3.json b/addon/globalPlugins/WordBridge/setting/templates/Standard_v3.json new file mode 100644 index 0000000..99f540c --- /dev/null +++ b/addon/globalPlugins/WordBridge/setting/templates/Standard_v3.json @@ -0,0 +1,42 @@ +{ + "zh_traditional": { + "system": "輸入為繁體文字與其正確拼音,請修正錯字並輸出正確繁體文字\n1. 不要加字刪字,也不要刪除句尾標點符號\n2. 請不要在句尾加換行或空格\n3. 輸出須為繁體字\n4. 不要用同意詞代換原文字\n範例:\n(文字&拼音) => 文字", + "system_tag": "輸入為文字與其正確拼音,請修正[[]]中的錯字並輸出正確文字\n1. 不要加字刪字,也不要刪除句尾標點符號\n2. 請不要在句尾加換行或空格\n3. 輸出須為繁體字\n4. 不要用同意詞代換原文字\n範例:\n(文字&拼音) => 文字", + "comment": "錯誤答案,請修正重新輸出文字", + "message": [ + {"role": "user", "content": "{{QUESTION}}我說今天天器真好。&wo3 shuo1 jin1 tian1 tian1 qi4 zhen1 hao3 。 => "}, + {"role": "assistant", "content": "{{ANSWER}}我說今天天氣真好。"}, + {"role": "user", "content": "{{QUESTION}}{{text_input}}&{{phone_input}} => "} + ], + "message_tag": [ + {"role": "user", "content": "{{QUESTION}}我說今天天[[器]]真好。&wo3 shuo1 jin1 tian1 tian1 [[qi4]] zhen1 hao3 。 => "}, + {"role": "assistant", "content": "{{ANSWER}}我說今天天氣真好。"}, + {"role": "user", "content": "{{QUESTION}}{{text_input}}&{{phone_input}} => "} + ], + "optional_guidance": { + "keep_non_chinese_char": "勿將非漢字用漢字取代", + "no_explanation": "輸出答案即可,後面無須解釋", + "customized_words": "參考詞彙: " + } + }, + "zh_simplified": { + "system": "输入为簡体文字与其正确拼音,请修正错字并输出正确簡体文字\n1. 不要加字删字,也不要删除句尾标点符号\n2. 请不要在句尾加换行或空格\n3. 输出须为簡体字\n4. 不要用同义词代换原文字\n范例:\n(文字&拼音) => 文字", + "system_tag": "输入为文字与其正确拼音,请修正[[]]中的错字并输出正确文字\n1. 不要加字删字,也不要删除句尾标点符号\n2. 请不要在句尾加换行或空格\n3. 输出须为簡体字\n4. 不要用同义词代换原文字\n(文字&拼音) => 文字", + "comment": "错误答案,请修正重新输出文字", + "message": [ + {"role": "user", "content": "{{QUESTION}}我说今天天器真好&wo3 shuo1 jin1 tian1 tian1 qi4 zhen1 hao3 => "}, + {"role": "assistant", "content": "{{ANSWER}}我说今天天气真好"}, + {"role": "user", "content": "{{QUESTION}}{{text_input}}&{{phone_input}} => "} + ], + "message_tag": [ + {"role": "user", "content": "{{QUESTION}}我说今天天[[器]]真好&wo3 shuo1 jin1 tian1 tian1 [[qi4]] zhen1 hao3 => "}, + {"role": "assistant", "content": "{{ANSWER}}我说今天天气真好"}, + {"role": "user", "content": "{{QUESTION}}{{text_input}}&{{phone_input}} => "} + ], + "optional_guidance": { + "keep_non_chinese_char": "勿将非汉字用汉字取代", + "no_explanation": "输出答案即可,后面无须解释", + "customized_words": "参考词汇: " + } + } +} diff --git a/pyproject.toml b/pyproject.toml index 4d76bfe..9c50d82 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,3 +38,9 @@ ignore = [ # sconstruct contains many inbuilt functions not recognised by the lint, # so ignore F821. "sconstruct" = ["F821"] + +[tool.pytest.ini_options] +markers = [ + "integration: marks tests as integration tests (deselect with '-m \"not integration\"')", + "slow: marks tests as slow (deselect with '-m \"not slow\"')", +] diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..f1c25cb --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,63 @@ +import sys +import os +from pathlib import Path +from dotenv import load_dotenv +import pytest + +load_dotenv() + +project_root = Path(__file__).parent.parent +tests_path = Path(__file__).parent +addon_path = project_root / "addon" / "globalPlugins" / "WordBridge" +package_path = addon_path / "package" + +sys.path.insert(0, str(tests_path)) +sys.path.insert(0, str(addon_path)) +sys.path.insert(0, str(package_path)) + +@pytest.fixture +def model_config(): + def _make_config(model_name, provider): + return { + "model_name": model_name, + "provider": provider, + "language": "zh_traditional", + "template_name": "Standard_v1.json", + "optional_guidance_enable": { + "keep_non_chinese_char": True, + "no_explanation": False + } + } + return _make_config + +@pytest.fixture +def credentials(): + def _get_credentials(provider): + if provider == "google": + api_key = os.getenv("TEST_GOOGLE_API_KEY") + if not api_key: + pytest.skip("TEST_GOOGLE_API_KEY not found in environment") + elif provider == "OpenAI": + api_key = os.getenv("TEST_OPENAI_API_KEY") + if not api_key: + pytest.skip("TEST_OPENAI_API_KEY not found in environment") + elif provider == "Anthropic": + api_key = os.getenv("TEST_ANTHROPIC_API_KEY") + if not api_key: + pytest.skip("TEST_ANTHROPIC_API_KEY not found in environment") + else: + pytest.skip(f"Unknown provider: {provider}") + + return { + "api_key": api_key, + "secret_key": "" + } + return _get_credentials + +@pytest.fixture +def test_data(): + return { + "basic_text": "這是測試文字", + "text_with_typo": "今天天器真好", + "expected_correction": "今天天氣真好" + } diff --git a/tests/requirement.txt b/tests/requirement.txt new file mode 100644 index 0000000..609e70c --- /dev/null +++ b/tests/requirement.txt @@ -0,0 +1,3 @@ +pytest==9.0.2 +requests==2.32.5 +dotenv==1.2.1 diff --git a/tests/test_helpers.py b/tests/test_helpers.py new file mode 100644 index 0000000..f551e32 --- /dev/null +++ b/tests/test_helpers.py @@ -0,0 +1,14 @@ +import json + +def print_test_results(model_name, test_text, response, diff, corrector): + cost = corrector.get_total_cost() + + print(f"\n=== Test Results ({model_name}) ===") + print(f"Input: {test_text}") + print(f"Output: {response}") + print(f"Cost: ${cost} USD") + print(f"API calls: {len(corrector.response_history)}") + print(f"Diff: {diff}") + + print("\n=== API Response ===") + print(json.dumps(corrector.response_history[0], indent=2, ensure_ascii=False)) diff --git a/tests/test_integration_anthropic.py b/tests/test_integration_anthropic.py new file mode 100644 index 0000000..35a9a41 --- /dev/null +++ b/tests/test_integration_anthropic.py @@ -0,0 +1,70 @@ +import pytest +from lib.typo_corrector import ChineseTypoCorrector, CorrectionOrchestrator +from test_helpers import print_test_results + +# Test representative Anthropic Claude models +ANTHROPIC_MODELS_TO_TEST = [ + "claude-sonnet-4-20250514", + "claude-haiku-4-5-20251001", +] + +@pytest.mark.integration +@pytest.mark.slow +@pytest.mark.parametrize("model_name", ANTHROPIC_MODELS_TO_TEST) +def test_anthropic_basic_correction(model_config, credentials, test_data, model_name): + config = model_config(model_name, "Anthropic") + creds = credentials("Anthropic") + + corrector = ChineseTypoCorrector( + model=config["model_name"], + provider=config["provider"], + credential=creds, + language=config["language"], + template_name=config["template_name"], + optional_guidance_enable=config["optional_guidance_enable"], + customized_words=[] + ) + + orchestrator = CorrectionOrchestrator(corrector) + test_text = test_data["basic_text"] + response, diff = orchestrator.execute(test_text, batch_mode=True) + + assert response is not None, "Response should not be None" + assert isinstance(response, str), "Response should be a string" + assert isinstance(diff, list), "Diff should be a list" + + cost = corrector.get_total_cost() + assert cost >= 0, "Cost should be non-negative" + + assert len(corrector.response_history) > 0, "Should have response history" + + print_test_results(model_name, test_text, response, diff, corrector) + +@pytest.mark.integration +@pytest.mark.slow +@pytest.mark.parametrize("model_name", ANTHROPIC_MODELS_TO_TEST) +def test_anthropic_with_typo(model_config, credentials, test_data, model_name): + config = model_config(model_name, "Anthropic") + creds = credentials("Anthropic") + + corrector = ChineseTypoCorrector( + model=config["model_name"], + provider=config["provider"], + credential=creds, + language=config["language"], + template_name=config["template_name"], + optional_guidance_enable=config["optional_guidance_enable"], + customized_words=[] + ) + + orchestrator = CorrectionOrchestrator(corrector) + test_text = test_data["text_with_typo"] + response, diff = orchestrator.execute(test_text, batch_mode=True) + + assert response is not None + assert isinstance(response, str) + + cost = corrector.get_total_cost() + assert cost >= 0 + + print_test_results(model_name, test_text, response, diff, corrector) diff --git a/tests/test_integration_gemini.py b/tests/test_integration_gemini.py new file mode 100644 index 0000000..bfb9eb7 --- /dev/null +++ b/tests/test_integration_gemini.py @@ -0,0 +1,70 @@ +import pytest +from lib.typo_corrector import ChineseTypoCorrector, CorrectionOrchestrator +from test_helpers import print_test_results + +# Test representative Gemini models +GEMINI_MODELS_TO_TEST = [ + "gemini-2.5-flash", + "gemini-2.5-pro", +] + +@pytest.mark.integration +@pytest.mark.slow +@pytest.mark.parametrize("model_name", GEMINI_MODELS_TO_TEST) +def test_gemini_basic_correction(model_config, credentials, test_data, model_name): + config = model_config(model_name, "google") + creds = credentials("google") + + corrector = ChineseTypoCorrector( + model=config["model_name"], + provider=config["provider"], + credential=creds, + language=config["language"], + template_name=config["template_name"], + optional_guidance_enable=config["optional_guidance_enable"], + customized_words=[] + ) + + orchestrator = CorrectionOrchestrator(corrector) + test_text = test_data["basic_text"] + response, diff = orchestrator.execute(test_text, batch_mode=True) + + assert response is not None, "Response should not be None" + assert isinstance(response, str), "Response should be a string" + assert isinstance(diff, list), "Diff should be a list" + + cost = corrector.get_total_cost() + assert cost >= 0, "Cost should be non-negative" + + assert len(corrector.response_history) > 0, "Should have response history" + + print_test_results(model_name, test_text, response, diff, corrector) + +@pytest.mark.integration +@pytest.mark.slow +@pytest.mark.parametrize("model_name", GEMINI_MODELS_TO_TEST) +def test_gemini_with_typo(model_config, credentials, test_data, model_name): + config = model_config(model_name, "google") + creds = credentials("google") + + corrector = ChineseTypoCorrector( + model=config["model_name"], + provider=config["provider"], + credential=creds, + language=config["language"], + template_name=config["template_name"], + optional_guidance_enable=config["optional_guidance_enable"], + customized_words=[] + ) + + orchestrator = CorrectionOrchestrator(corrector) + test_text = test_data["text_with_typo"] + response, diff = orchestrator.execute(test_text, batch_mode=True) + + assert response is not None + assert isinstance(response, str) + + cost = corrector.get_total_cost() + assert cost >= 0 + + print_test_results(model_name, test_text, response, diff, corrector) diff --git a/tests/test_integration_openai.py b/tests/test_integration_openai.py new file mode 100644 index 0000000..e5e3bd6 --- /dev/null +++ b/tests/test_integration_openai.py @@ -0,0 +1,73 @@ +import pytest +from lib.typo_corrector import ChineseTypoCorrector, CorrectionOrchestrator +from test_helpers import print_test_results + +# Test representative models from different series +OPENAI_MODELS_TO_TEST = [ + "gpt-4.1-2025-04-14", + "gpt-4o-mini-2024-07-18", + "o4-mini-2025-04-16", + "gpt-5" +] + +@pytest.mark.integration +@pytest.mark.slow +@pytest.mark.parametrize("model_name", OPENAI_MODELS_TO_TEST) +def test_openai_basic_correction(model_config, credentials, test_data, model_name): + config = model_config(model_name, "OpenAI") + creds = credentials("OpenAI") + + corrector = ChineseTypoCorrector( + model=config["model_name"], + provider=config["provider"], + credential=creds, + language=config["language"], + template_name=config["template_name"], + optional_guidance_enable=config["optional_guidance_enable"], + customized_words=[] + ) + + orchestrator = CorrectionOrchestrator(corrector) + test_text = test_data["basic_text"] + response, diff = orchestrator.execute(test_text, batch_mode=True) + + assert response is not None, "Response should not be None" + assert isinstance(response, str), "Response should be a string" + assert isinstance(diff, list), "Diff should be a list" + + cost = corrector.get_total_cost() + assert cost >= 0, "Cost should be non-negative" + + assert len(corrector.response_history) > 0, "Should have response history" + + print_test_results(model_name, test_text, response, diff, corrector) + +@pytest.mark.integration +@pytest.mark.slow +@pytest.mark.parametrize("model_name", OPENAI_MODELS_TO_TEST) +def test_openai_with_typo(model_config, credentials, test_data, model_name): + config = model_config(model_name, "OpenAI") + creds = credentials("OpenAI") + + corrector = ChineseTypoCorrector( + model=config["model_name"], + provider=config["provider"], + credential=creds, + language=config["language"], + template_name=config["template_name"], + optional_guidance_enable=config["optional_guidance_enable"], + customized_words=[] + ) + + orchestrator = CorrectionOrchestrator(corrector) + test_text = test_data["text_with_typo"] + response, diff = orchestrator.execute(test_text, batch_mode=True) + + assert response is not None + assert isinstance(response, str) + + cost = corrector.get_total_cost() + + assert cost >= 0 + + print_test_results(model_name, test_text, response, diff, corrector)