From 4bf57ef89848439857fb3ee3400e743cd2911653 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 8 Dec 2025 16:59:06 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=AE=8C=E5=96=84=E5=B0=88=E6=A1=88?= =?UTF-8?q?=E5=93=81=E8=B3=AA=E8=88=87=E7=A4=BE=E5=8D=80=E5=8D=94=E4=BD=9C?= =?UTF-8?q?=E5=9F=BA=E7=A4=8E=E8=A8=AD=E6=96=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 改進項目: - 更新 pyproject.toml 作者資訊和 GitHub URLs - 修復 CI workflow,移除關鍵檢查的 continue-on-error - 新增 GitHub Issue 模板 (feature_request, question) - 新增 GitHub PR 模板 - 新增 CODE_OF_CONDUCT.md 行為準則 - 新增 SECURITY.md 安全政策 - 新增 42 個單元測試 (test_cost_tracker, test_models) - 新增 pytest conftest.py 共享配置 --- .github/ISSUE_TEMPLATE/feature_request.yml | 71 ++++ .github/ISSUE_TEMPLATE/question.yml | 60 ++++ .github/pull_request_template.md | 70 ++++ .github/workflows/ci.yml | 16 +- CODE_OF_CONDUCT.md | 50 +++ SECURITY.md | 106 ++++++ pyproject.toml | 10 +- tests/conftest.py | 102 ++++++ tests/test_cost_tracker.py | 275 +++++++++++++++ tests/test_models.py | 374 +++++++++++++++++++++ 10 files changed, 1120 insertions(+), 14 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/feature_request.yml create mode 100644 .github/ISSUE_TEMPLATE/question.yml create mode 100644 .github/pull_request_template.md create mode 100644 CODE_OF_CONDUCT.md create mode 100644 SECURITY.md create mode 100644 tests/conftest.py create mode 100644 tests/test_cost_tracker.py create mode 100644 tests/test_models.py diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000..4cfe02f --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,71 @@ +name: "功能請求 / Feature Request" +description: "提出新功能或改進建議 / Suggest a new feature or improvement" +title: "[Feature]: " +labels: ["enhancement"] +body: + - type: markdown + attributes: + value: | + 感謝您提出功能建議!請盡可能詳細地描述您的需求。 + Thank you for your feature suggestion! Please describe your needs as detailed as possible. + + - type: textarea + id: problem + attributes: + label: 問題描述 / Problem Description + description: 您希望解決什麼問題? / What problem would you like to solve? + placeholder: 清楚描述您遇到的問題或不便之處... + validations: + required: true + + - type: textarea + id: solution + attributes: + label: 建議解決方案 / Proposed Solution + description: 您希望如何解決這個問題? / How would you like to solve this problem? + placeholder: 描述您理想中的解決方案... + validations: + required: true + + - type: textarea + id: alternatives + attributes: + label: 替代方案 / Alternatives Considered + description: 您考慮過其他解決方案嗎? / Have you considered any alternative solutions? + placeholder: 描述您考慮過的其他方法... + validations: + required: false + + - type: dropdown + id: category + attributes: + label: 功能類別 / Feature Category + description: 這個功能屬於哪個類別? / Which category does this feature belong to? + options: + - 📚 內容/文檔 (Content/Documentation) + - 💻 代碼示例 (Code Examples) + - 🧪 測試 (Testing) + - 🔧 工具/自動化 (Tools/Automation) + - 📖 學習路徑 (Learning Path) + - 🌐 國際化 (Internationalization) + - 🎨 UI/UX 改進 (UI/UX Improvement) + - 其他 (Other) + validations: + required: true + + - type: textarea + id: additional + attributes: + label: 補充資訊 / Additional Context + description: 任何其他相關資訊 / Any other relevant information + placeholder: 截圖、參考連結等... + validations: + required: false + + - type: checkboxes + id: contribution + attributes: + label: 貢獻意願 / Contribution + description: 您是否願意協助實現這個功能? / Would you like to help implement this feature? + options: + - label: 我願意提交 PR 來實現這個功能 / I'm willing to submit a PR for this feature diff --git a/.github/ISSUE_TEMPLATE/question.yml b/.github/ISSUE_TEMPLATE/question.yml new file mode 100644 index 0000000..2f1a94d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/question.yml @@ -0,0 +1,60 @@ +name: "問題諮詢 / Question" +description: "詢問關於專案的問題 / Ask a question about the project" +title: "[Question]: " +labels: ["question"] +body: + - type: markdown + attributes: + value: | + 歡迎提問!在提問之前,請先查閱: + - [README.md](https://github.com/markl-a/My-AI-Learning-Notes/blob/main/README.md) + - [學習路徑指南](https://github.com/markl-a/My-AI-Learning-Notes/blob/main/LEARNING_PATHS.md) + - [快速開始指南](https://github.com/markl-a/My-AI-Learning-Notes/blob/main/QUICKSTART.md) + + Welcome! Before asking, please check the documentation above. + + - type: textarea + id: question + attributes: + label: 您的問題 / Your Question + description: 請清楚描述您的問題 / Please describe your question clearly + placeholder: 我想了解... + validations: + required: true + + - type: dropdown + id: topic + attributes: + label: 相關主題 / Related Topic + description: 這個問題屬於哪個主題? / Which topic is this question about? + options: + - 🔢 數學基礎 (Math Fundamentals) + - 🐍 Python 基礎 (Python Basics) + - 🤖 機器學習 (Machine Learning) + - 🧠 深度學習 (Deep Learning) + - 🔤 LLM 基礎 (LLM Fundamentals) + - 📚 RAG 系統 (RAG System) + - 🤝 Agent 開發 (Agent Development) + - ⚙️ 環境設置 (Environment Setup) + - 📦 依賴/安裝 (Dependencies/Installation) + - 其他 (Other) + validations: + required: true + + - type: textarea + id: tried + attributes: + label: 已嘗試的方法 / What have you tried? + description: 您已經嘗試過什麼方法? / What solutions have you already tried? + placeholder: 我已經嘗試... + validations: + required: false + + - type: textarea + id: context + attributes: + label: 補充資訊 / Additional Context + description: 任何有助於回答問題的資訊 / Any information that might help answer your question + placeholder: 環境資訊、錯誤訊息、截圖等... + validations: + required: false diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..18174f1 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,70 @@ +## 📋 PR 描述 / Description + + + + + + +## 🔗 相關 Issue / Related Issues + + + + +- Closes # +- Related to # + +## 🔄 改動類型 / Type of Change + + + +- [ ] 🐛 Bug 修復 / Bug fix +- [ ] ✨ 新功能 / New feature +- [ ] 📚 文檔更新 / Documentation update +- [ ] 🧪 測試相關 / Test related +- [ ] 🔧 配置/工具更新 / Configuration/Tool update +- [ ] ♻️ 代碼重構 / Code refactoring +- [ ] 🎨 代碼風格/格式 / Code style/formatting + +## 📝 改動說明 / Changes Made + + + + +1. +2. +3. + +## 🧪 測試方式 / How Has This Been Tested? + + + + +- [ ] 單元測試 / Unit tests +- [ ] 手動測試 / Manual testing +- [ ] 本地運行 / Local execution +- [ ] 其他 / Other: + +## 📸 截圖 / Screenshots (如適用 / if applicable) + + + + + + +## ✅ 自我檢查清單 / Checklist + + + +- [ ] 我已閱讀 [CONTRIBUTING.md](../CONTRIBUTING.md) +- [ ] 我的代碼遵循專案的代碼風格 +- [ ] 我已對代碼進行自我審查 +- [ ] 我已添加必要的註釋(特別是複雜的邏輯) +- [ ] 我的改動不會產生新的警告 +- [ ] 我已添加相關測試(如適用) +- [ ] 所有現有測試都通過 + +## 📌 其他備註 / Additional Notes + + + + diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 134ee79..1b4a0cf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -49,17 +49,15 @@ jobs: - name: 🔍 Ruff - 代碼檢查 run: | ruff check . --output-format=github - continue-on-error: true - name: 🎨 Black - 代碼格式檢查 run: | black --check --diff . - continue-on-error: true - name: 📝 MyPy - 類型檢查 run: | mypy . --ignore-missing-imports --no-strict-optional - continue-on-error: true + continue-on-error: true # 類型檢查可以較寬鬆 # ==================== 單元測試 ==================== test: @@ -153,18 +151,18 @@ jobs: - name: 🔐 Bandit - 安全漏洞掃描 run: | - bandit -r . -f json -o bandit-report.json || true - continue-on-error: true + bandit -r . -f json -o bandit-report.json --exit-zero + # 產出報告但不阻擋 CI,讓開發者檢視結果 - name: 🛡️ Safety - 依賴安全檢查 run: | - safety check --json || true - continue-on-error: true + safety check --json --continue-on-error || true + # 依賴安全問題需要人工評估 - name: 🔍 Pip Audit - 依賴審計 run: | - pip-audit --desc || true - continue-on-error: true + pip-audit --desc + continue-on-error: true # 依賴審計供參考 # ==================== 構建檢查 ==================== build: diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..2f11b4a --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,50 @@ +# 貢獻者公約 / Contributor Covenant Code of Conduct + +## 我們的承諾 / Our Pledge + +為了營造一個開放且友善的環境,我們作為貢獻者和維護者承諾:無論年齡、體型、殘疾、種族、性別特徵、性別認同和表達、經驗水平、教育程度、社會經濟地位、國籍、個人外表、種族、宗教或性認同和取向如何,參與我們專案和社區的每個人都不會受到騷擾。 + +We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. + +## 我們的標準 / Our Standards + +### 有助於創造正面環境的行為 / Positive behavior: + +- 使用友善和包容的語言 / Using welcoming and inclusive language +- 尊重不同的觀點和經驗 / Being respectful of differing viewpoints and experiences +- 優雅地接受建設性批評 / Gracefully accepting constructive criticism +- 專注於對社區最有利的事情 / Focusing on what is best for the community +- 對其他社區成員表示同理心 / Showing empathy towards other community members + +### 不可接受的行為 / Unacceptable behavior: + +- 使用性化的語言或圖像,以及不受歡迎的性關注或挑逗 +- 挑釁、侮辱性/貶損性評論,以及人身或政治攻擊 +- 公開或私下騷擾 +- 未經明確許可,發布他人的私人資訊 +- 其他在專業環境中被合理認為不適當的行為 + +## 我們的責任 / Our Responsibilities + +專案維護者有責任澄清可接受行為的標準,並預期會對任何不可接受的行為採取適當和公平的糾正措施。 + +專案維護者有權利和責任刪除、編輯或拒絕與本行為準則不一致的評論、提交、代碼、Wiki 編輯、Issue 和其他貢獻,並暫時或永久禁止任何貢獻者從事他們認為不適當、威脅、冒犯或有害的行為。 + +## 適用範圍 / Scope + +本行為準則適用於所有專案空間,也適用於個人在公共場所代表專案或其社區時。代表專案或社區的示例包括使用官方專案電子郵件地址、通過官方社交媒體帳戶發布,或在線上或線下活動中擔任指定代表。 + +## 執行 / Enforcement + +可以通過 [建立 Issue](https://github.com/markl-a/My-AI-Learning-Notes/issues) 報告濫用、騷擾或其他不可接受的行為。所有投訴都將被審查和調查,並將產生被認為必要且適合情況的回應。專案團隊有義務對事件報告者保密。 + +## 歸屬 / Attribution + +本行為準則改編自 [Contributor Covenant](https://www.contributor-covenant.org),版本 2.1,可在以下網址獲取: +https://www.contributor-covenant.org/version/2/1/code_of_conduct.html + +--- + +**感謝您幫助我們建立一個友善的學習社區!** + +**Thank you for helping us build a welcoming learning community!** diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..a9ed286 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,106 @@ +# 安全政策 / Security Policy + +## 支援的版本 / Supported Versions + +| 版本 / Version | 支援狀態 / Supported | +| -------------- | -------------------- | +| 1.x.x | :white_check_mark: | +| < 1.0 | :x: | + +## 報告漏洞 / Reporting a Vulnerability + +我們非常重視專案的安全性。如果您發現任何安全漏洞,請按照以下步驟報告: + +We take the security of this project seriously. If you discover a security vulnerability, please follow these steps: + +### 報告方式 / How to Report + +1. **請勿**在公開的 Issue 中報告安全漏洞 +2. 請通過以下方式私下報告: + - 建立一個標題為 `[SECURITY]` 的私密 Issue + - 或發送郵件至專案維護者 + +1. **Do NOT** report security vulnerabilities in public issues +2. Please report privately through: + - Creating a private issue with `[SECURITY]` in the title + - Or emailing the project maintainers + +### 報告內容應包含 / What to Include + +- 漏洞的詳細描述 / Detailed description of the vulnerability +- 重現步驟 / Steps to reproduce +- 潛在影響 / Potential impact +- 可能的修復建議(如有)/ Suggested fix (if any) + +### 回應時間 / Response Timeline + +- **確認收到**:48 小時內 / Within 48 hours +- **初步評估**:7 天內 / Within 7 days +- **修復計劃**:根據嚴重程度,14-30 天內 / 14-30 days depending on severity + +## 安全最佳實踐 / Security Best Practices + +使用本專案的代碼時,請注意以下安全事項: + +When using code from this project, please note the following: + +### API 金鑰管理 / API Key Management + +```bash +# 正確做法:使用環境變數 +export OPENAI_API_KEY="your-key-here" + +# 錯誤做法:硬編碼在代碼中 +# api_key = "sk-xxxxx" # 永遠不要這樣做! +``` + +### 環境變數 / Environment Variables + +- 使用 `.env` 文件管理敏感資訊 +- 確保 `.env` 已加入 `.gitignore` +- 參考 `.env.example` 設置您的環境 + +### 依賴安全 / Dependency Security + +```bash +# 定期檢查依賴安全 +pip-audit +safety check +``` + +## 已知安全考量 / Known Security Considerations + +### 本專案中的代碼示例 + +- 示例代碼主要用於**教育目的** +- 在生產環境中使用前,請進行適當的安全審查 +- 特別注意 API 調用、文件操作和用戶輸入處理 + +### LLM 應用安全 + +使用 LLM 相關代碼時,請注意: + +- **Prompt Injection**:驗證和清理用戶輸入 +- **敏感資料**:避免在 prompt 中包含敏感資訊 +- **輸出驗證**:不要盲目信任 LLM 的輸出 +- **Rate Limiting**:實施適當的請求限制 + +## 安全更新 / Security Updates + +安全更新將通過以下方式發布: + +- GitHub Releases +- README.md 中的更新日誌 +- 重大安全問題會有專門的通知 + +## 致謝 / Acknowledgments + +我們感謝所有負責任地報告安全問題的人。報告者將在修復發布後被列入致謝名單(如果他們願意)。 + +We thank everyone who responsibly reports security issues. Reporters will be credited in the acknowledgments after the fix is released (if they wish). + +--- + +**安全是我們共同的責任。感謝您幫助保護這個專案!** + +**Security is a shared responsibility. Thank you for helping keep this project safe!** diff --git a/pyproject.toml b/pyproject.toml index 0db71fb..ca404cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ readme = "README.md" requires-python = ">=3.9" license = {text = "MIT"} authors = [ - {name = "Your Name", email = "your.email@example.com"} + {name = "AI Learning Notes Contributors"} ] keywords = [ "artificial-intelligence", @@ -116,10 +116,10 @@ full = [ ] [project.urls] -Homepage = "https://github.com/yourusername/My-AI-Learning-Notes" -Documentation = "https://github.com/yourusername/My-AI-Learning-Notes" -Repository = "https://github.com/yourusername/My-AI-Learning-Notes" -Issues = "https://github.com/yourusername/My-AI-Learning-Notes/issues" +Homepage = "https://github.com/markl-a/My-AI-Learning-Notes" +Documentation = "https://github.com/markl-a/My-AI-Learning-Notes" +Repository = "https://github.com/markl-a/My-AI-Learning-Notes" +Issues = "https://github.com/markl-a/My-AI-Learning-Notes/issues" # ==================== 工具配置 ==================== diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..6d6b5d1 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,102 @@ +""" +Pytest 配置和共享 fixtures + +提供測試所需的共享配置和 fixtures。 +""" + +import pytest +import sys +import os +import tempfile +import shutil + +# 確保專案根目錄在 Python 路徑中 +PROJECT_ROOT = os.path.dirname(os.path.dirname(__file__)) +sys.path.insert(0, PROJECT_ROOT) + + +@pytest.fixture(scope="session") +def project_root(): + """返回專案根目錄路徑""" + return PROJECT_ROOT + + +@pytest.fixture +def temp_dir(): + """創建臨時目錄,測試後自動清理""" + temp_path = tempfile.mkdtemp() + yield temp_path + shutil.rmtree(temp_path, ignore_errors=True) + + +@pytest.fixture +def temp_file(): + """創建臨時文件,測試後自動清理""" + fd, path = tempfile.mkstemp() + os.close(fd) + yield path + if os.path.exists(path): + os.unlink(path) + + +@pytest.fixture +def sample_text(): + """返回範例文本""" + return """ + 機器學習是人工智能的一個分支,它使計算機能夠從數據中學習並做出決策或預測, + 而無需明確編程。深度學習是機器學習的一個子領域,使用多層神經網絡來學習 + 數據的層次表示。 + """ + + +@pytest.fixture +def sample_documents(): + """返回範例文檔列表""" + return [ + { + "id": "doc1", + "content": "機器學習基礎介紹", + "metadata": {"category": "ml", "language": "zh"} + }, + { + "id": "doc2", + "content": "深度學習與神經網絡", + "metadata": {"category": "dl", "language": "zh"} + }, + { + "id": "doc3", + "content": "Introduction to AI", + "metadata": {"category": "ai", "language": "en"} + } + ] + + +@pytest.fixture +def mock_llm_response(): + """模擬 LLM 回應""" + return { + "content": "這是一個模擬的 LLM 回應。", + "model": "gpt-4", + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150 + } + } + + +# 標記定義 +def pytest_configure(config): + """添加自定義標記""" + config.addinivalue_line( + "markers", "slow: marks tests as slow (deselect with '-m \"not slow\"')" + ) + config.addinivalue_line( + "markers", "integration: marks tests as integration tests" + ) + config.addinivalue_line( + "markers", "requires_api: marks tests that require external API access" + ) + config.addinivalue_line( + "markers", "requires_gpu: marks tests that require GPU" + ) diff --git a/tests/test_cost_tracker.py b/tests/test_cost_tracker.py new file mode 100644 index 0000000..dcb70ba --- /dev/null +++ b/tests/test_cost_tracker.py @@ -0,0 +1,275 @@ +""" +成本追蹤器單元測試 + +測試 CostTracker 類的各項功能。 +""" + +import pytest +import json +import tempfile +import os + +# 添加路徑以導入模組 +import sys +PROJECT_ROOT = os.path.dirname(os.path.dirname(__file__)) +UTILS_PATH = os.path.join( + PROJECT_ROOT, + '3.LLM應用工程', '3.Agent', 'examples', 'utils' +) +sys.path.insert(0, UTILS_PATH) + +from cost_tracker import CostTracker, get_global_tracker, reset_global_tracker + + +class TestCostTracker: + """CostTracker 測試類""" + + def setup_method(self): + """每個測試前重置""" + reset_global_tracker() + + def test_initialization(self): + """測試初始化""" + tracker = CostTracker() + assert tracker.total_cost == 0.0 + assert len(tracker.usage_log) == 0 + assert tracker.session_name.startswith("session_") + + def test_initialization_with_name(self): + """測試帶名稱的初始化""" + tracker = CostTracker(session_name="test_session") + assert tracker.session_name == "test_session" + + def test_log_usage_gpt4(self): + """測試記錄 GPT-4 使用量""" + tracker = CostTracker() + + result = tracker.log_usage( + model="gpt-4", + input_tokens=1000, + output_tokens=500 + ) + + # GPT-4 價格: input=$0.03/1K, output=$0.06/1K + expected_input_cost = (1000 / 1000) * 0.03 # $0.03 + expected_output_cost = (500 / 1000) * 0.06 # $0.03 + expected_total = expected_input_cost + expected_output_cost # $0.06 + + assert result["model"] == "gpt-4" + assert result["input_tokens"] == 1000 + assert result["output_tokens"] == 500 + assert result["total_tokens"] == 1500 + assert abs(result["input_cost"] - expected_input_cost) < 0.0001 + assert abs(result["output_cost"] - expected_output_cost) < 0.0001 + assert abs(result["total_cost"] - expected_total) < 0.0001 + assert abs(tracker.total_cost - expected_total) < 0.0001 + + def test_log_usage_gpt35_turbo(self): + """測試記錄 GPT-3.5-Turbo 使用量""" + tracker = CostTracker() + + result = tracker.log_usage( + model="gpt-3.5-turbo", + input_tokens=2000, + output_tokens=1000 + ) + + # GPT-3.5-Turbo 價格: input=$0.0005/1K, output=$0.0015/1K + expected_input_cost = (2000 / 1000) * 0.0005 # $0.001 + expected_output_cost = (1000 / 1000) * 0.0015 # $0.0015 + expected_total = expected_input_cost + expected_output_cost # $0.0025 + + assert abs(result["total_cost"] - expected_total) < 0.0001 + + def test_log_usage_claude(self): + """測試記錄 Claude 使用量""" + tracker = CostTracker() + + result = tracker.log_usage( + model="claude-3-sonnet", + input_tokens=1000, + output_tokens=1000 + ) + + # Claude-3-Sonnet 價格: input=$0.003/1K, output=$0.015/1K + expected_input_cost = (1000 / 1000) * 0.003 # $0.003 + expected_output_cost = (1000 / 1000) * 0.015 # $0.015 + expected_total = expected_input_cost + expected_output_cost # $0.018 + + assert abs(result["total_cost"] - expected_total) < 0.0001 + + def test_log_usage_unknown_model(self): + """測試記錄未知模型(應返回 0 成本)""" + tracker = CostTracker() + + result = tracker.log_usage( + model="unknown-model", + input_tokens=1000, + output_tokens=1000 + ) + + assert result["total_cost"] == 0.0 + assert tracker.total_cost == 0.0 + + def test_log_usage_with_metadata(self): + """測試帶元數據的記錄""" + tracker = CostTracker() + + metadata = {"task": "summarization", "user": "test_user"} + result = tracker.log_usage( + model="gpt-4", + input_tokens=100, + output_tokens=50, + metadata=metadata + ) + + assert result["metadata"] == metadata + + def test_multiple_logs_accumulate(self): + """測試多次記錄累積成本""" + tracker = CostTracker() + + tracker.log_usage("gpt-4", 1000, 500) + tracker.log_usage("gpt-4", 1000, 500) + + assert len(tracker.usage_log) == 2 + # 每次 $0.06,共 $0.12 + assert abs(tracker.total_cost - 0.12) < 0.0001 + + def test_get_summary_empty(self): + """測試空追蹤器的摘要""" + tracker = CostTracker() + summary = tracker.get_summary() + + assert summary["total_cost"] == 0.0 + assert summary["total_calls"] == 0 + assert summary["total_tokens"] == 0 + assert summary["by_model"] == {} + + def test_get_summary_with_data(self): + """測試有數據時的摘要""" + tracker = CostTracker(session_name="test") + + tracker.log_usage("gpt-4", 1000, 500) + tracker.log_usage("gpt-3.5-turbo", 2000, 1000) + + summary = tracker.get_summary() + + assert summary["session_name"] == "test" + assert summary["total_calls"] == 2 + assert summary["total_tokens"] == 4500 # 1500 + 3000 + assert "gpt-4" in summary["by_model"] + assert "gpt-3.5-turbo" in summary["by_model"] + assert summary["by_model"]["gpt-4"]["calls"] == 1 + assert summary["by_model"]["gpt-3.5-turbo"]["calls"] == 1 + + def test_group_by_model(self): + """測試按模型分組統計""" + tracker = CostTracker() + + tracker.log_usage("gpt-4", 1000, 500) + tracker.log_usage("gpt-4", 2000, 1000) + tracker.log_usage("gpt-3.5-turbo", 1000, 500) + + grouped = tracker._group_by_model() + + assert grouped["gpt-4"]["calls"] == 2 + assert grouped["gpt-4"]["input_tokens"] == 3000 + assert grouped["gpt-4"]["output_tokens"] == 1500 + assert grouped["gpt-3.5-turbo"]["calls"] == 1 + + def test_reset(self): + """測試重置功能""" + tracker = CostTracker() + + tracker.log_usage("gpt-4", 1000, 500) + assert tracker.total_cost > 0 + assert len(tracker.usage_log) > 0 + + tracker.reset() + + assert tracker.total_cost == 0.0 + assert len(tracker.usage_log) == 0 + + def test_save_and_load(self): + """測試保存和載入""" + tracker = CostTracker(session_name="save_test") + + tracker.log_usage("gpt-4", 1000, 500, {"task": "test"}) + tracker.log_usage("gpt-3.5-turbo", 2000, 1000) + + with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f: + filepath = f.name + + try: + tracker.save_to_file(filepath) + + # 驗證文件存在 + assert os.path.exists(filepath) + + # 載入並驗證 + loaded_tracker = CostTracker.load_from_file(filepath) + + assert loaded_tracker.session_name == "save_test" + assert abs(loaded_tracker.total_cost - tracker.total_cost) < 0.0001 + assert len(loaded_tracker.usage_log) == 2 + finally: + os.unlink(filepath) + + def test_global_tracker(self): + """測試全局追蹤器""" + reset_global_tracker() + + tracker1 = get_global_tracker() + tracker2 = get_global_tracker() + + assert tracker1 is tracker2 + + tracker1.log_usage("gpt-4", 100, 50) + + assert tracker2.total_cost > 0 + + def test_pricing_accuracy(self): + """測試定價準確性""" + tracker = CostTracker() + + # 測試所有已定義的模型 + for model, pricing in CostTracker.PRICING.items(): + result = tracker.log_usage(model, 1000, 1000) + + expected_cost = pricing["input"] + pricing["output"] + assert abs(result["total_cost"] - expected_cost) < 0.0001, \ + f"Model {model} pricing mismatch" + + tracker.reset() + + +class TestCostTrackerEdgeCases: + """邊界情況測試""" + + def test_zero_tokens(self): + """測試零 token 的情況""" + tracker = CostTracker() + result = tracker.log_usage("gpt-4", 0, 0) + + assert result["total_cost"] == 0.0 + assert result["total_tokens"] == 0 + + def test_large_token_count(self): + """測試大量 token""" + tracker = CostTracker() + result = tracker.log_usage("gpt-4", 100000, 50000) + + # GPT-4: 100K * $0.03 + 50K * $0.06 = $3 + $3 = $6 + expected_cost = (100000 / 1000) * 0.03 + (50000 / 1000) * 0.06 + assert abs(result["total_cost"] - expected_cost) < 0.01 + + def test_timestamp_in_log(self): + """測試日誌包含時間戳""" + tracker = CostTracker() + result = tracker.log_usage("gpt-4", 100, 50) + + assert "timestamp" in result + # 驗證 ISO 格式 + from datetime import datetime + datetime.fromisoformat(result["timestamp"]) diff --git a/tests/test_models.py b/tests/test_models.py new file mode 100644 index 0000000..317e62b --- /dev/null +++ b/tests/test_models.py @@ -0,0 +1,374 @@ +""" +數據模型單元測試 + +測試 Pydantic 模型的驗證和功能。 +""" + +import pytest +from datetime import datetime +import sys +import os + +# 添加路徑 +PROJECT_ROOT = os.path.dirname(os.path.dirname(__file__)) +MODELS_PATH = os.path.join( + PROJECT_ROOT, + '3.LLM應用工程', '9.實戰', '9.1-RAG-Agent端到端實戰', 'src' +) +sys.path.insert(0, MODELS_PATH) + +from models import ( + DocumentMetadata, + QueryRequest, + QueryResponse, + Source, + DocumentUploadRequest, + DocumentUploadResponse, + SystemStats, + HealthResponse, + ErrorResponse +) + + +class TestDocumentMetadata: + """DocumentMetadata 模型測試""" + + def test_basic_creation(self): + """測試基本創建""" + metadata = DocumentMetadata( + filename="test.pdf", + file_type="pdf", + file_size=1024 + ) + + assert metadata.filename == "test.pdf" + assert metadata.file_type == "pdf" + assert metadata.file_size == 1024 + assert metadata.chunk_count == 0 + assert metadata.language == "zh" + + def test_with_all_fields(self): + """測試所有欄位""" + upload_date = datetime.now() + metadata = DocumentMetadata( + filename="document.txt", + file_type="txt", + file_size=2048, + upload_date=upload_date, + chunk_count=10, + language="en" + ) + + assert metadata.upload_date == upload_date + assert metadata.chunk_count == 10 + assert metadata.language == "en" + + +class TestQueryRequest: + """QueryRequest 模型測試""" + + def test_minimal_creation(self): + """測試最小創建""" + request = QueryRequest(question="什麼是機器學習?") + + assert request.question == "什麼是機器學習?" + assert request.use_agent is True + assert request.top_k == 5 + assert request.session_id is None + assert request.filters is None + + def test_full_creation(self): + """測試完整創建""" + request = QueryRequest( + question="解釋深度學習", + use_agent=False, + top_k=10, + session_id="session_123", + filters={"category": "dl"} + ) + + assert request.use_agent is False + assert request.top_k == 10 + assert request.session_id == "session_123" + assert request.filters == {"category": "dl"} + + def test_question_min_length(self): + """測試問題最小長度驗證""" + # 空字串應該失敗 + with pytest.raises(ValueError): + QueryRequest(question="") + + def test_top_k_bounds(self): + """測試 top_k 邊界驗證""" + # 有效值 + request = QueryRequest(question="test", top_k=1) + assert request.top_k == 1 + + request = QueryRequest(question="test", top_k=20) + assert request.top_k == 20 + + # 無效值 + with pytest.raises(ValueError): + QueryRequest(question="test", top_k=0) + + with pytest.raises(ValueError): + QueryRequest(question="test", top_k=21) + + +class TestSource: + """Source 模型測試""" + + def test_basic_creation(self): + """測試基本創建""" + source = Source( + content="這是一段內容", + document="doc.pdf", + score=0.95 + ) + + assert source.content == "這是一段內容" + assert source.document == "doc.pdf" + assert source.score == 0.95 + assert source.page is None + + def test_with_page(self): + """測試帶頁碼的創建""" + source = Source( + content="內容", + document="doc.pdf", + page=5, + score=0.8 + ) + + assert source.page == 5 + + def test_score_bounds(self): + """測試分數邊界""" + # 有效值 + source = Source(content="test", document="doc", score=0.0) + assert source.score == 0.0 + + source = Source(content="test", document="doc", score=1.0) + assert source.score == 1.0 + + # 無效值 + with pytest.raises(ValueError): + Source(content="test", document="doc", score=-0.1) + + with pytest.raises(ValueError): + Source(content="test", document="doc", score=1.1) + + +class TestQueryResponse: + """QueryResponse 模型測試""" + + def test_minimal_creation(self): + """測試最小創建""" + response = QueryResponse(answer="這是回答") + + assert response.answer == "這是回答" + assert response.sources == [] + assert response.tools_used == [] + assert response.confidence == 0.0 + assert response.suggestions == [] + assert response.processing_time == 0.0 + + def test_full_creation(self): + """測試完整創建""" + sources = [ + Source(content="內容1", document="doc1.pdf", score=0.9), + Source(content="內容2", document="doc2.pdf", score=0.85) + ] + + response = QueryResponse( + answer="完整回答", + sources=sources, + tools_used=["rag_search", "calculator"], + confidence=0.92, + suggestions=["追問1", "追問2"], + processing_time=1.5 + ) + + assert len(response.sources) == 2 + assert len(response.tools_used) == 2 + assert response.confidence == 0.92 + assert len(response.suggestions) == 2 + assert response.processing_time == 1.5 + + def test_confidence_bounds(self): + """測試置信度邊界""" + response = QueryResponse(answer="test", confidence=0.0) + assert response.confidence == 0.0 + + response = QueryResponse(answer="test", confidence=1.0) + assert response.confidence == 1.0 + + with pytest.raises(ValueError): + QueryResponse(answer="test", confidence=-0.1) + + with pytest.raises(ValueError): + QueryResponse(answer="test", confidence=1.1) + + +class TestDocumentUploadRequest: + """DocumentUploadRequest 模型測試""" + + def test_basic_creation(self): + """測試基本創建""" + request = DocumentUploadRequest( + filename="test.pdf", + content="文檔內容" + ) + + assert request.filename == "test.pdf" + assert request.content == "文檔內容" + assert request.metadata is None + + def test_with_metadata(self): + """測試帶元數據的創建""" + request = DocumentUploadRequest( + filename="test.pdf", + content="內容", + metadata={"author": "test", "date": "2024-01-01"} + ) + + assert request.metadata["author"] == "test" + + +class TestDocumentUploadResponse: + """DocumentUploadResponse 模型測試""" + + def test_success_response(self): + """測試成功響應""" + response = DocumentUploadResponse( + success=True, + document_id="doc_123", + message="上傳成功", + chunks_created=10 + ) + + assert response.success is True + assert response.document_id == "doc_123" + assert response.chunks_created == 10 + + def test_failure_response(self): + """測試失敗響應""" + response = DocumentUploadResponse( + success=False, + document_id="", + message="上傳失敗:文件格式不支援", + chunks_created=0 + ) + + assert response.success is False + + +class TestSystemStats: + """SystemStats 模型測試""" + + def test_default_values(self): + """測試預設值""" + stats = SystemStats() + + assert stats.total_documents == 0 + assert stats.total_chunks == 0 + assert stats.total_queries == 0 + assert stats.avg_response_time == 0.0 + assert stats.cache_hit_rate == 0.0 + assert stats.uptime_seconds == 0.0 + + def test_with_values(self): + """測試帶值創建""" + stats = SystemStats( + total_documents=100, + total_chunks=1000, + total_queries=500, + avg_response_time=0.5, + cache_hit_rate=0.75, + uptime_seconds=86400.0 + ) + + assert stats.total_documents == 100 + assert stats.cache_hit_rate == 0.75 + + +class TestHealthResponse: + """HealthResponse 模型測試""" + + def test_default_values(self): + """測試預設值""" + health = HealthResponse() + + assert health.status == "healthy" + assert health.version == "1.0.0" + assert isinstance(health.timestamp, datetime) + assert health.components == {} + + def test_with_components(self): + """測試帶組件狀態""" + health = HealthResponse( + status="degraded", + components={ + "database": "healthy", + "vector_store": "degraded", + "llm_api": "healthy" + } + ) + + assert health.status == "degraded" + assert len(health.components) == 3 + + +class TestErrorResponse: + """ErrorResponse 模型測試""" + + def test_basic_error(self): + """測試基本錯誤""" + error = ErrorResponse(error="發生錯誤") + + assert error.error == "發生錯誤" + assert error.detail is None + assert isinstance(error.timestamp, datetime) + + def test_with_detail(self): + """測試帶詳情的錯誤""" + error = ErrorResponse( + error="驗證失敗", + detail="欄位 'question' 不能為空" + ) + + assert error.detail == "欄位 'question' 不能為空" + + +class TestModelSerialization: + """模型序列化測試""" + + def test_query_request_json(self): + """測試 QueryRequest JSON 序列化""" + request = QueryRequest( + question="測試問題", + use_agent=True, + top_k=5 + ) + + json_data = request.model_dump() + + assert json_data["question"] == "測試問題" + assert json_data["use_agent"] is True + assert json_data["top_k"] == 5 + + def test_query_response_json(self): + """測試 QueryResponse JSON 序列化""" + response = QueryResponse( + answer="測試回答", + sources=[ + Source(content="內容", document="doc.pdf", score=0.9) + ], + confidence=0.85 + ) + + json_data = response.model_dump() + + assert json_data["answer"] == "測試回答" + assert len(json_data["sources"]) == 1 + assert json_data["sources"][0]["score"] == 0.9