diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..0f09511 --- /dev/null +++ b/.env.example @@ -0,0 +1,181 @@ +# ============================================================ +# Environment Variables Template +# ============================================================ +# +# 使用方式: +# 1. 複製此文件: cp .env.example .env +# 2. 填入您的 API 金鑰和配置 +# 3. .env 文件已被 .gitignore 忽略,不會被提交到 Git +# +# ============================================================ + +# ==================== LLM API 金鑰 ==================== + +# OpenAI API +OPENAI_API_KEY=sk-your-openai-api-key-here +OPENAI_ORG_ID= # 可選 +OPENAI_BASE_URL=https://api.openai.com/v1 # 可自定義 + +# Anthropic API (Claude) +ANTHROPIC_API_KEY=sk-ant-your-anthropic-api-key-here + +# Google Gemini API +GOOGLE_API_KEY=your-google-api-key-here + +# Cohere API +COHERE_API_KEY=your-cohere-api-key-here + +# Hugging Face +HUGGINGFACE_API_KEY=hf_your-huggingface-token-here + +# ==================== 向量數據庫 ==================== + +# Pinecone +PINECONE_API_KEY=your-pinecone-api-key-here +PINECONE_ENVIRONMENT=your-environment + +# Weaviate +WEAVIATE_URL=http://localhost:8080 +WEAVIATE_API_KEY=your-weaviate-api-key-here + +# Qdrant +QDRANT_URL=http://localhost:6333 +QDRANT_API_KEY=your-qdrant-api-key-here + +# ==================== 搜尋 API ==================== + +# SerpAPI (Google Search) +SERPAPI_API_KEY=your-serpapi-key-here + +# Tavily Search +TAVILY_API_KEY=your-tavily-key-here + +# ==================== 監控與追蹤 ==================== + +# LangSmith +LANGCHAIN_TRACING_V2=true +LANGCHAIN_API_KEY=your-langsmith-api-key-here +LANGCHAIN_PROJECT=my-ai-learning-project +LANGCHAIN_ENDPOINT=https://api.smith.langchain.com + +# Weights & Biases +WANDB_API_KEY=your-wandb-api-key-here +WANDB_PROJECT=my-ai-learning +WANDB_ENTITY=your-wandb-username + +# MLflow +MLFLOW_TRACKING_URI=http://localhost:5000 + +# LangFuse +LANGFUSE_PUBLIC_KEY=your-langfuse-public-key +LANGFUSE_SECRET_KEY=your-langfuse-secret-key +LANGFUSE_HOST=https://cloud.langfuse.com + +# ==================== 數據庫 ==================== + +# PostgreSQL (用於向量存儲) +POSTGRES_HOST=localhost +POSTGRES_PORT=5432 +POSTGRES_DB=vectordb +POSTGRES_USER=postgres +POSTGRES_PASSWORD=your-password + +# MongoDB +MONGODB_URI=mongodb://localhost:27017/ai_learning + +# Redis +REDIS_HOST=localhost +REDIS_PORT=6379 +REDIS_PASSWORD=your-redis-password + +# ==================== 應用配置 ==================== + +# 環境類型 +ENVIRONMENT=development # development, staging, production + +# 日誌級別 +LOG_LEVEL=INFO # DEBUG, INFO, WARNING, ERROR, CRITICAL + +# API 服務配置 +API_HOST=0.0.0.0 +API_PORT=8000 +API_WORKERS=4 + +# ==================== 模型配置 ==================== + +# 默認模型 +DEFAULT_LLM_MODEL=gpt-4o-mini +DEFAULT_EMBEDDING_MODEL=text-embedding-3-small + +# 模型參數 +LLM_TEMPERATURE=0.7 +LLM_MAX_TOKENS=2000 +LLM_TOP_P=0.9 + +# ==================== RAG 配置 ==================== + +# 向量數據庫類型 +VECTOR_DB=chromadb # chromadb, faiss, pinecone, qdrant + +# 檢索配置 +RETRIEVAL_TOP_K=5 +CHUNK_SIZE=1000 +CHUNK_OVERLAP=200 + +# ==================== 安全配置 ==================== + +# JWT 密鑰 +JWT_SECRET_KEY=your-super-secret-jwt-key-change-in-production +JWT_ALGORITHM=HS256 +JWT_EXPIRATION_HOURS=24 + +# CORS 設定 +ALLOWED_ORIGINS=http://localhost:3000,http://localhost:8501 + +# ==================== 其他服務 ==================== + +# Ollama (本地 LLM) +OLLAMA_HOST=http://localhost:11434 + +# Elasticsearch +ELASTICSEARCH_URL=http://localhost:9200 +ELASTICSEARCH_API_KEY=your-elasticsearch-api-key + +# ==================== 代理設定 ==================== + +# HTTP 代理 (如需要) +HTTP_PROXY= +HTTPS_PROXY= +NO_PROXY=localhost,127.0.0.1 + +# ==================== 開發工具 ==================== + +# 是否啟用調試模式 +DEBUG=true + +# 是否啟用熱重載 +RELOAD=true + +# ==================== 雲端服務 ==================== + +# AWS +AWS_ACCESS_KEY_ID=your-aws-access-key +AWS_SECRET_ACCESS_KEY=your-aws-secret-key +AWS_REGION=us-east-1 +AWS_S3_BUCKET=your-s3-bucket + +# Google Cloud +GOOGLE_APPLICATION_CREDENTIALS=/path/to/credentials.json +GCP_PROJECT_ID=your-gcp-project-id + +# Azure +AZURE_SUBSCRIPTION_ID=your-azure-subscription-id +AZURE_RESOURCE_GROUP=your-resource-group + +# ==================== 提醒 ==================== +# 🔒 請確保: +# 1. 不要將 .env 文件提交到版本控制 +# 2. 生產環境使用強密碼 +# 3. 定期輪換 API 金鑰 +# 4. 使用環境變量管理敏感信息 +# ============================================================ diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..bef0acd --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,92 @@ +name: 🐛 Bug 報告 +description: 報告一個問題或錯誤 +title: "[Bug]: " +labels: ["bug", "needs-triage"] +body: + - type: markdown + attributes: + value: | + 感謝您花時間填寫此錯誤報告! + + - type: textarea + id: what-happened + attributes: + label: 發生了什麼? + description: 請詳細描述這個 bug + placeholder: 告訴我們您看到了什麼! + validations: + required: true + + - type: textarea + id: expected + attributes: + label: 預期行為 + description: 您期望發生什麼? + placeholder: 描述預期的正確行為 + validations: + required: true + + - type: textarea + id: reproduce + attributes: + label: 重現步驟 + description: 如何重現這個問題? + placeholder: | + 1. 進入 '...' + 2. 點擊 '....' + 3. 滾動到 '....' + 4. 看到錯誤 + validations: + required: true + + - type: dropdown + id: area + attributes: + label: 問題領域 + description: 這個 bug 屬於哪個領域? + options: + - 數學基礎 + - 機器學習 + - 深度學習 + - LLM 基礎 + - RAG 系統 + - Agent 系統 + - 代碼示例 + - 文檔 + - 其他 + validations: + required: true + + - type: textarea + id: environment + attributes: + label: 環境信息 + description: 請提供您的環境信息 + value: | + - OS: [如 Ubuntu 22.04] + - Python 版本: [如 3.11.5] + - 相關庫版本: [如 PyTorch 2.5.0] + validations: + required: false + + - type: textarea + id: logs + attributes: + label: 錯誤日誌 + description: 請粘貼相關的錯誤日誌或截圖 + render: shell + validations: + required: false + + - type: checkboxes + id: terms + attributes: + label: 檢查清單 + description: 提交前請確認 + options: + - label: 我已搜索過現有的 issues + required: true + - label: 我已閱讀過相關文檔 + required: true + - label: 我提供了足夠的信息來重現此問題 + required: true diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 0000000..6374532 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,247 @@ +# ============================================================ +# Benchmark Workflow - 性能基準測試 +# ============================================================ +# +# 觸發條件: +# - 每週定時運行 +# - 手動觸發 +# +# ============================================================ + +name: Benchmark + +on: + schedule: + # 每週一凌晨 2 點運行 + - cron: '0 2 * * 1' + workflow_dispatch: + inputs: + benchmark_type: + description: '基準測試類型' + required: true + default: 'all' + type: choice + options: + - all + - llm + - rag + - agent + +env: + PYTHON_VERSION: '3.11' + +jobs: + # ==================== LLM 基準測試 ==================== + benchmark-llm: + name: LLM 性能測試 + runs-on: ubuntu-latest + if: github.event.inputs.benchmark_type == 'llm' || github.event.inputs.benchmark_type == 'all' || github.event_name == 'schedule' + + steps: + - name: 📥 Checkout 代碼 + uses: actions/checkout@v4 + + - name: 🐍 設置 Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: 'pip' + + - name: 📦 安裝依賴 + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install openai asyncio + + - name: 🏃 運行 LLM 基準測試 + run: | + python benchmarks/benchmark_llm.py + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + + - name: 📊 上傳測試結果 + uses: actions/upload-artifact@v4 + with: + name: llm-benchmark-results + path: benchmarks/results/* + retention-days: 90 + + # ==================== RAG 基準測試 ==================== + benchmark-rag: + name: RAG 性能測試 + runs-on: ubuntu-latest + if: github.event.inputs.benchmark_type == 'rag' || github.event.inputs.benchmark_type == 'all' || github.event_name == 'schedule' + + steps: + - name: 📥 Checkout 代碼 + uses: actions/checkout@v4 + + - name: 🐍 設置 Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: 'pip' + + - name: 📦 安裝依賴 + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install chromadb sentence-transformers + + - name: 🏃 運行 RAG 基準測試 + run: | + python benchmarks/benchmark_rag.py + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + + - name: 📊 上傳測試結果 + uses: actions/upload-artifact@v4 + with: + name: rag-benchmark-results + path: benchmarks/results/* + retention-days: 90 + + # ==================== Agent 基準測試 ==================== + benchmark-agent: + name: Agent 性能測試 + runs-on: ubuntu-latest + if: github.event.inputs.benchmark_type == 'agent' || github.event.inputs.benchmark_type == 'all' || github.event_name == 'schedule' + + steps: + - name: 📥 Checkout 代碼 + uses: actions/checkout@v4 + + - name: 🐍 設置 Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: 'pip' + + - name: 📦 安裝依賴 + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: 🏃 運行 Agent 基準測試 + run: | + python benchmarks/benchmark_agent.py + + - name: 📊 上傳測試結果 + uses: actions/upload-artifact@v4 + with: + name: agent-benchmark-results + path: benchmarks/results/* + retention-days: 90 + + # ==================== 結果分析和報告 ==================== + analyze-results: + name: 分析測試結果 + runs-on: ubuntu-latest + needs: [benchmark-llm, benchmark-rag, benchmark-agent] + if: always() + + steps: + - name: 📥 Checkout 代碼 + uses: actions/checkout@v4 + + - name: 📥 下載所有測試結果 + uses: actions/download-artifact@v4 + with: + path: all-results/ + + - name: 🐍 設置 Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: 📦 安裝依賴 + run: | + pip install pandas matplotlib seaborn + + - name: 📊 生成分析報告 + run: | + python -c " + import json + import os + from pathlib import Path + + results_dir = Path('all-results') + all_results = {} + + # 收集所有結果 + for result_file in results_dir.rglob('*.json'): + with open(result_file) as f: + data = json.load(f) + all_results[result_file.stem] = data + + # 生成摘要 + print('=== 基準測試摘要 ===') + print(json.dumps(all_results, indent=2)) + + # 保存摘要 + with open('benchmark-summary.json', 'w') as f: + json.dump(all_results, f, indent=2) + " + + - name: 📊 上傳分析報告 + uses: actions/upload-artifact@v4 + with: + name: benchmark-analysis + path: benchmark-summary.json + retention-days: 90 + + - name: 💬 創建問題評論 + if: github.event_name == 'schedule' + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + const summary = fs.readFileSync('benchmark-summary.json', 'utf8'); + + github.rest.issues.create({ + owner: context.repo.owner, + repo: context.repo.repo, + title: `基準測試報告 - ${new Date().toISOString().split('T')[0]}`, + body: `## 🔥 每週基準測試報告\n\n\`\`\`json\n${summary}\n\`\`\`\n\n查看詳細結果請查看工作流運行。`, + labels: ['benchmark', 'automated'] + }); + + # ==================== 性能趨勢跟蹤 ==================== + track-performance: + name: 跟蹤性能趨勢 + runs-on: ubuntu-latest + needs: analyze-results + if: github.event_name == 'schedule' + + steps: + - name: 📥 Checkout 代碼 + uses: actions/checkout@v4 + with: + ref: gh-pages + token: ${{ secrets.GITHUB_TOKEN }} + + - name: 📥 下載分析報告 + uses: actions/download-artifact@v4 + with: + name: benchmark-analysis + path: ./ + + - name: 📈 更新性能趨勢數據 + run: | + # 創建趨勢數據目錄 + mkdir -p performance-trends + + # 添加時間戳並保存 + DATE=$(date +%Y-%m-%d) + cp benchmark-summary.json performance-trends/${DATE}.json + + # 生成趨勢圖表(如果需要) + + - name: 📤 提交更新 + run: | + git config user.name "GitHub Actions" + git config user.email "actions@github.com" + git add performance-trends/ + git commit -m "Add performance benchmark results for $(date +%Y-%m-%d)" || echo "No changes" + git push + continue-on-error: true diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..134ee79 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,224 @@ +# ============================================================ +# CI Workflow - 持續集成 +# ============================================================ +# +# 觸發條件: +# - Push 到 main 分支 +# - Pull Request 到 main 分支 +# - 手動觸發 +# +# ============================================================ + +name: CI + +on: + push: + branches: [main, develop] + paths-ignore: + - '**.md' + - 'docs/**' + - '.gitignore' + pull_request: + branches: [main, develop] + workflow_dispatch: + +jobs: + # ==================== 代碼質量檢查 ==================== + lint: + name: 代碼質量檢查 + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.9', '3.10', '3.11', '3.12'] + + steps: + - name: 📥 Checkout 代碼 + uses: actions/checkout@v4 + + - name: 🐍 設置 Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + + - name: 📦 安裝依賴 + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt -r requirements-dev.txt + + - name: 🔍 Ruff - 代碼檢查 + run: | + ruff check . --output-format=github + continue-on-error: true + + - name: 🎨 Black - 代碼格式檢查 + run: | + black --check --diff . + continue-on-error: true + + - name: 📝 MyPy - 類型檢查 + run: | + mypy . --ignore-missing-imports --no-strict-optional + continue-on-error: true + + # ==================== 單元測試 ==================== + test: + name: 單元測試 + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + python-version: ['3.9', '3.11', '3.12'] + exclude: + # 減少測試矩陣,加快速度 + - os: windows-latest + python-version: '3.9' + - os: macos-latest + python-version: '3.9' + + steps: + - name: 📥 Checkout 代碼 + uses: actions/checkout@v4 + + - name: 🐍 設置 Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + + - name: 📦 安裝依賴 + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt -r requirements-dev.txt + + - name: 🧪 運行測試 + run: | + pytest -v --cov=. --cov-report=xml --cov-report=term-missing + env: + PYTHONPATH: ${{ github.workspace }} + + - name: 📊 上傳覆蓋率報告 + if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.11' + uses: codecov/codecov-action@v4 + with: + file: ./coverage.xml + flags: unittests + name: codecov-umbrella + fail_ci_if_error: false + + # ==================== Notebook 測試 ==================== + notebook-test: + name: Notebook 測試 + runs-on: ubuntu-latest + steps: + - name: 📥 Checkout 代碼 + uses: actions/checkout@v4 + + - name: 🐍 設置 Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: '3.11' + cache: 'pip' + + - name: 📦 安裝依賴 + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install nbconvert nbformat jupyter + + - name: 📓 驗證 Notebooks + run: | + # 查找所有 notebook 文件並驗證格式 + find . -name "*.ipynb" -not -path "*/\.*" -print0 | xargs -0 -I {} jupyter nbconvert --to notebook --execute --inplace {} || true + continue-on-error: true + + # ==================== 安全掃描 ==================== + security: + name: 安全掃描 + runs-on: ubuntu-latest + steps: + - name: 📥 Checkout 代碼 + uses: actions/checkout@v4 + + - name: 🐍 設置 Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: 📦 安裝依賴 + run: | + python -m pip install --upgrade pip + pip install bandit safety pip-audit + + - name: 🔐 Bandit - 安全漏洞掃描 + run: | + bandit -r . -f json -o bandit-report.json || true + continue-on-error: true + + - name: 🛡️ Safety - 依賴安全檢查 + run: | + safety check --json || true + continue-on-error: true + + - name: 🔍 Pip Audit - 依賴審計 + run: | + pip-audit --desc || true + continue-on-error: true + + # ==================== 構建檢查 ==================== + build: + name: 構建檢查 + runs-on: ubuntu-latest + needs: [lint, test] + steps: + - name: 📥 Checkout 代碼 + uses: actions/checkout@v4 + + - name: 🐍 設置 Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: 📦 安裝構建工具 + run: | + python -m pip install --upgrade pip + pip install build twine + + - name: 🏗️ 構建包 + run: | + python -m build + + - name: ✅ 檢查包 + run: | + twine check dist/* + + # ==================== 狀態報告 ==================== + status: + name: CI 狀態報告 + runs-on: ubuntu-latest + needs: [lint, test, security, build] + if: always() + steps: + - name: 📊 檢查所有作業狀態 + run: | + echo "Lint: ${{ needs.lint.result }}" + echo "Test: ${{ needs.test.result }}" + echo "Security: ${{ needs.security.result }}" + echo "Build: ${{ needs.build.result }}" + + - name: ✅ 所有檢查通過 + if: | + needs.lint.result == 'success' && + needs.test.result == 'success' && + needs.build.result == 'success' + run: echo "✅ 所有 CI 檢查通過!" + + - name: ❌ 部分檢查失敗 + if: | + needs.lint.result != 'success' || + needs.test.result != 'success' || + needs.build.result != 'success' + run: | + echo "❌ 部分 CI 檢查失敗,請查看詳細日誌" + exit 1 diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 0000000..e7b341a --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,240 @@ +# ============================================================ +# Deploy Workflow - 自動部署 +# ============================================================ +# +# 觸發條件: +# - 推送 tag (v*.*.*) +# - 手動觸發 +# +# ============================================================ + +name: Deploy + +on: + push: + tags: + - 'v*.*.*' + workflow_dispatch: + inputs: + environment: + description: '部署環境' + required: true + default: 'staging' + type: choice + options: + - staging + - production + +env: + DOCKER_REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + # ==================== 構建 Docker 鏡像 ==================== + build-docker: + name: 構建 Docker 鏡像 + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + strategy: + matrix: + project: + - name: rag-chatbot + path: 5.AI研究前沿_2024-2025/實戰項目/RAG-ChatBot + - name: document-analyzer + path: 5.AI研究前沿_2024-2025/實戰項目/AI-Document-Analyzer + + steps: + - name: 📥 Checkout 代碼 + uses: actions/checkout@v4 + + - name: 🔐 登錄 Docker Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.DOCKER_REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: 📋 提取元數據 + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}/${{ matrix.project.name }} + tags: | + type=ref,event=branch + type=ref,event=pr + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=sha,prefix={{branch}}- + + - name: 🏗️ 設置 Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: 🐳 構建並推送 Docker 鏡像 + uses: docker/build-push-action@v5 + with: + context: ${{ matrix.project.path }} + file: ${{ matrix.project.path }}/Dockerfile + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + platforms: linux/amd64,linux/arm64 + + - name: 📊 鏡像掃描 + uses: aquasecurity/trivy-action@master + with: + image-ref: ${{ steps.meta.outputs.tags }} + format: 'sarif' + output: 'trivy-results.sarif' + continue-on-error: true + + - name: 📤 上傳掃描結果 + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: 'trivy-results.sarif' + continue-on-error: true + + # ==================== 部署到 Staging ==================== + deploy-staging: + name: 部署到 Staging + runs-on: ubuntu-latest + needs: build-docker + if: github.event.inputs.environment == 'staging' || github.ref == 'refs/heads/develop' + environment: + name: staging + url: https://staging.example.com + + steps: + - name: 📥 Checkout 代碼 + uses: actions/checkout@v4 + + - name: 🚀 部署到 Staging 環境 + run: | + echo "部署到 Staging 環境..." + # 這裡添加實際的部署命令 + # 例如:kubectl apply -f k8s/staging/ + # 或:docker-compose -f docker-compose.staging.yml up -d + + - name: 🧪 健康檢查 + run: | + echo "執行健康檢查..." + # curl -f https://staging.example.com/api/health || exit 1 + + - name: 📬 發送通知 + if: always() + run: | + echo "Staging 部署完成" + + # ==================== 部署到 Production ==================== + deploy-production: + name: 部署到 Production + runs-on: ubuntu-latest + needs: build-docker + if: github.event.inputs.environment == 'production' || startsWith(github.ref, 'refs/tags/v') + environment: + name: production + url: https://example.com + + steps: + - name: 📥 Checkout 代碼 + uses: actions/checkout@v4 + + - name: 🚀 部署到 Production 環境 + run: | + echo "部署到 Production 環境..." + # 這裡添加實際的部署命令 + + - name: 🧪 健康檢查 + run: | + echo "執行健康檢查..." + # curl -f https://example.com/api/health || exit 1 + + - name: 📊 部署驗證 + run: | + echo "驗證部署狀態..." + + - name: 📬 發送成功通知 + if: success() + run: | + echo "✅ Production 部署成功!" + + - name: 🔄 回滾(如果失敗) + if: failure() + run: | + echo "❌ 部署失敗,執行回滾..." + # 添加回滾邏輯 + + # ==================== 發布到 GitHub Releases ==================== + release: + name: 創建 GitHub Release + runs-on: ubuntu-latest + needs: deploy-production + if: startsWith(github.ref, 'refs/tags/v') + permissions: + contents: write + + steps: + - name: 📥 Checkout 代碼 + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: 📝 生成 Release Notes + id: release_notes + run: | + # 提取版本號 + VERSION=${GITHUB_REF#refs/tags/} + echo "VERSION=$VERSION" >> $GITHUB_OUTPUT + + # 生成 changelog + git log --pretty=format:"* %s (%h)" $(git describe --tags --abbrev=0 HEAD^)..HEAD > CHANGELOG.txt + + - name: 📦 打包項目文件 + run: | + mkdir -p release-assets + # 打包實戰項目 + tar -czf release-assets/rag-chatbot.tar.gz 5.AI研究前沿_2024-2025/實戰項目/RAG-ChatBot + tar -czf release-assets/document-analyzer.tar.gz 5.AI研究前沿_2024-2025/實戰項目/AI-Document-Analyzer + + - name: 🎉 創建 Release + uses: softprops/action-gh-release@v1 + with: + name: Release ${{ steps.release_notes.outputs.VERSION }} + body_path: CHANGELOG.txt + files: | + release-assets/* + draft: false + prerelease: false + + # ==================== 更新文檔 ==================== + update-docs: + name: 更新文檔網站 + runs-on: ubuntu-latest + needs: release + if: startsWith(github.ref, 'refs/tags/v') + + steps: + - name: 📥 Checkout 代碼 + uses: actions/checkout@v4 + + - name: 🐍 設置 Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: 📦 安裝依賴 + run: | + pip install mkdocs mkdocs-material + + - name: 🏗️ 構建文檔 + run: | + # mkdocs build + + - name: 🚀 部署到 GitHub Pages + run: | + # mkdocs gh-deploy --force + echo "文檔已更新" diff --git a/.gitignore b/.gitignore index a6278d8..2810ae2 100644 --- a/.gitignore +++ b/.gitignore @@ -16,20 +16,38 @@ parts/ sdist/ var/ wheels/ +pip-wheel-metadata/ +share/python-wheels/ *.egg-info/ .installed.cfg *.egg +MANIFEST # Jupyter Notebook .ipynb_checkpoints +*/.ipynb_checkpoints/* +*.nbconvert.ipynb + +# IPython +profile_default/ +ipython_config.py # pyenv .python-version +# pipenv +Pipfile.lock + +# poetry +poetry.lock + # Virtual environments venv/ ENV/ env/ +.venv +env.bak/ +venv.bak/ # IDEs .vscode/ @@ -37,27 +55,170 @@ env/ *.swp *.swo *~ +.project +.pydevproject +.settings/ # OS .DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db Thumbs.db +*~ -# Model files (optional - 可能想要追蹤某些模型) +# Model files (大型模型文件) *.h5 *.keras *.weights.h5 +*.pt +*.pth +*.ckpt +*.safetensors +*.bin +*.onnx +*.pb +*.tflite +*.mlmodel +*.joblib +*.pkl +*.pickle # Training outputs logs/ -*.png -*.jpg -*.jpeg +runs/ +checkpoints/ +wandb/ +mlruns/ +outputs/ +lightning_logs/ -# Data files (optional - 根據需求調整) -# *.csv -# *.json -# *.txt +# Image outputs (訓練生成的圖片) +# 注意:保留文檔中的示例圖片 +# *.png +# *.jpg +# *.jpeg + +# Large data files +*.hdf5 +*.h5 +*.tfrecord +*.tfrecords + +# Cache directories +.cache/ +.pytest_cache/ +.mypy_cache/ +.dmypy.json +dmypy.json +.ruff_cache/ + +# Coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ + +# Environment variables +.env +.env.local +.env.*.local +*.env + +# Database +*.db +*.sqlite +*.sqlite3 # Log files *.log security_audit.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# Temporary files +*.tmp +*.temp +*.swp +*.swo +.~* + +# Docker +.dockerignore +docker-compose.override.yml + +# Data files (根據需求調整) +# 大型數據集不要提交 +# *.csv +# *.json +# *.jsonl +# *.txt +# *.parquet + +# Hugging Face cache +.cache/huggingface/ +transformers_cache/ + +# Vector databases +*.faiss +*.index +chroma_db/ +chromadb/ +vectordb/ + +# Weights & Biases +wandb/ + +# MLflow +mlruns/ +mlartifacts/ + +# DVC +.dvc +.dvc/cache + +# Airflow +airflow.cfg +airflow.db +airflow-webserver.pid +logs/ + +# Streamlit +.streamlit/secrets.toml + +# Gradio +flagged/ + +# Node.js (如果有前端組件) +node_modules/ +npm-debug.log +yarn-error.log + +# macOS +.AppleDouble +.LSOverride + +# Windows +Thumbs.db +ehthumbs.db +Desktop.ini +$RECYCLE.BIN/ + +# Linux +*~ +.fuse_hidden* +.directory +.Trash-* + +# Benchmarking results +benchmark_results/ +performance_logs/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..ee40417 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,111 @@ +# ============================================================ +# Pre-commit Hooks 配置 +# ============================================================ +# +# 安裝: pre-commit install +# 運行: pre-commit run --all-files +# 更新: pre-commit autoupdate +# +# ============================================================ + +repos: + # ==================== 通用檢查 ==================== + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + # 文件檢查 + - id: check-added-large-files + args: ['--maxkb=10000'] # 10MB + - id: check-case-conflict + - id: check-merge-conflict + - id: check-symlinks + - id: check-yaml + - id: check-toml + - id: check-json + - id: end-of-file-fixer + - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] + + # Python 檢查 + - id: check-ast + - id: check-docstring-first + - id: debug-statements + - id: name-tests-test + args: ['--pytest-test-first'] + + # 憑證檢查 + - id: detect-private-key + + # ==================== Python 代碼格式化 ==================== + - repo: https://github.com/psf/black + rev: 24.1.1 + hooks: + - id: black + language_version: python3.11 + args: ['--line-length=100'] + + # ==================== Import 排序 ==================== + - repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort + args: ['--profile', 'black', '--line-length', '100'] + + # ==================== 代碼檢查 ==================== + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.15 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + + # ==================== 類型檢查 ==================== + # - repo: https://github.com/pre-commit/mirrors-mypy + # rev: v1.8.0 + # hooks: + # - id: mypy + # additional_dependencies: [types-all] + # args: [--ignore-missing-imports, --no-strict-optional] + + # ==================== Notebook 清理 ==================== + - repo: https://github.com/kynan/nbstripout + rev: 0.7.1 + hooks: + - id: nbstripout + args: ['--extra-keys', 'metadata.kernelspec metadata.language_info'] + + # ==================== Markdown 檢查 ==================== + - repo: https://github.com/executablebooks/mdformat + rev: 0.7.17 + hooks: + - id: mdformat + additional_dependencies: + - mdformat-gfm + - mdformat-black + args: ['--wrap', '100'] + + # ==================== YAML 格式化 ==================== + - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks + rev: v2.12.0 + hooks: + - id: pretty-format-yaml + args: [--autofix, --indent, '2'] + + # ==================== 安全檢查 ==================== + - repo: https://github.com/PyCQA/bandit + rev: 1.7.6 + hooks: + - id: bandit + args: ['-c', 'pyproject.toml'] + additional_dependencies: ['bandit[toml]'] + +# ==================== CI 配置 ==================== +ci: + autofix_commit_msg: | + [pre-commit.ci] auto fixes from pre-commit hooks + + for more information, see https://pre-commit.ci + autofix_prs: true + autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate' + autoupdate_schedule: weekly + skip: [] + submodules: false diff --git "a/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/LEARNING_GUIDE.md" "b/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/LEARNING_GUIDE.md" deleted file mode 100644 index 0c0f4f3..0000000 --- "a/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/LEARNING_GUIDE.md" +++ /dev/null @@ -1,559 +0,0 @@ -# 深度學習預備知識 - 學習路徑指南 - -## 🎯 學習目標設定 - -在開始學習之前,明確你的目標: - -### 目標 A:快速入門(1-2週) -適合想快速了解深度學習基礎的學習者。 - -**核心目標**: -- 理解張量操作基本概念 -- 能夠運行簡單的深度學習代碼 -- 了解數據預處理流程 - -**推薦路徑**: 入門路徑 - -### 目標 B:紮實基礎(3-4週) -適合想深入理解數學原理的學習者。 - -**核心目標**: -- 掌握線性代數、微積分基礎 -- 理解自動微分機制 -- 能夠從零實現簡單網絡 - -**推薦路徑**: 進階路徑 - -### 目標 C:完全精通(4-6週) -適合追求深度理解的學習者。 - -**核心目標**: -- 深入理解所有數學細節 -- 能夠推導反向傳播公式 -- 實現自定義優化器 - -**推薦路徑**: 精通路徑 - ---- - -## 📅 學習計劃模板 - -### 第一週:基礎操作 - -#### Day 1-2:環境準備與張量操作 -- [ ] 安裝 PyTorch 和相關依賴 -- [ ] 完成 `0_index.ipynb` -- [ ] 完成 `1_ndarray.ipynb` -- [ ] 嘗試生成 5 道 ndarray 練習題 - -**學習目標**: -- 熟悉 PyTorch 張量創建 -- 理解張量形狀變換 -- 掌握廣播機制 - -**檢查點**: -```python -# 能夠獨立完成以下任務 -x = torch.randn(3, 4) -y = torch.ones(4) -z = x + y # 理解廣播 -result = torch.mm(x, y.reshape(4, 1)) # 矩陣乘法 -``` - -**時間分配**: -- 視頻/閱讀: 2 小時 -- 動手實踐: 3 小時 -- 練習題: 1 小時 - -#### Day 3-4:數據處理 -- [ ] 完成 `2_pandas.ipynb` -- [ ] 嘗試用 Pandas 處理自己的數據集 -- [ ] 開始項目 1(數據分析基礎) - -**學習目標**: -- 掌握 Pandas 數據讀取 -- 學會數據清洗技巧 -- 理解數據轉換方法 - -**檢查點**: -```python -# 能夠完成數據預處理流程 -df = pd.read_csv('data.csv') -df_clean = df.dropna() -df_normalized = (df_clean - df_clean.mean()) / df_clean.std() -``` - -**時間分配**: -- 學習 Pandas: 2 小時 -- 項目 1 前半部分: 3 小時 -- 總結筆記: 1 小時 - -#### Day 5-7:線性代數 -- [ ] 完成 `3_linear-algebra.ipynb` -- [ ] 使用可視化工具理解線性變換 -- [ ] 完成所有線性代數練習題 - -**學習目標**: -- 理解向量和矩陣運算 -- 掌握點積和矩陣乘法 -- 理解範數的概念 - -**檢查點**: -```python -# 能夠計算和理解 -A = torch.randn(3, 4) -B = torch.randn(4, 2) -C = torch.mm(A, B) # 形狀推導 -norm = torch.norm(A) # L2 範數 -``` - -**時間分配**: -- 理論學習: 3 小時 -- 可視化實驗: 2 小時 -- 練習題: 2 小時 - ---- - -### 第二週:數學基礎 - -#### Day 8-10:微積分 -- [ ] 完成 `4_calculus.ipynb` -- [ ] 可視化梯度下降過程 -- [ ] 理解導數的幾何意義 - -**學習目標**: -- 掌握導數基本概念 -- 理解梯度的含義 -- 學會計算偏導數 - -**檢查點**: -```python -# 能夠手動計算簡單函數的導數 -def f(x): - return x**2 + 2*x + 1 - -# 理解 f'(x) = 2x + 2 -``` - -**時間分配**: -- 微積分復習: 3 小時 -- 可視化工具: 2 小時 -- 練習題: 2 小時 - -#### Day 11-12:自動微分 -- [ ] 完成 `5_autograd.ipynb` -- [ ] 理解計算圖概念 -- [ ] 實驗 PyTorch 自動微分 - -**學習目標**: -- 理解自動微分原理 -- 掌握 PyTorch autograd 使用 -- 理解反向傳播機制 - -**檢查點**: -```python -# 能夠使用自動微分 -x = torch.tensor([2.0], requires_grad=True) -y = x**3 + 2*x**2 -y.backward() -print(x.grad) # 理解梯度含義 -``` - -**時間分配**: -- 理論學習: 2 小時 -- 實驗代碼: 3 小時 -- 練習題: 1 小時 - -#### Day 13-14:概率統計 -- [ ] 完成 `6_probability.ipynb` -- [ ] 可視化不同概率分佈 -- [ ] 完成項目 1 - -**學習目標**: -- 理解常見概率分佈 -- 掌握統計量計算 -- 應用統計方法分析數據 - -**檢查點**: -```python -# 能夠理解和使用概率分佈 -samples = torch.randn(1000) # 標準正態分佈 -mean = samples.mean() # 應接近 0 -std = samples.std() # 應接近 1 -``` - -**時間分配**: -- 概率論復習: 2 小時 -- 完成項目 1: 4 小時 - ---- - -### 第三週:綜合應用 - -#### Day 15-17:項目 2(前半部分) -- [ ] 理解神經網絡結構 -- [ ] 實現前向傳播 -- [ ] 實現激活函數 - -**學習目標**: -- 理解神經網絡的數學結構 -- 掌握矩陣運算在網絡中的應用 -- 理解激活函數的作用 - -**檢查點**: -- 能夠手動實現簡單的前向傳播 -- 理解每一步的矩陣形狀變化 - -**時間分配**: -- 理論學習: 3 小時 -- 代碼實現: 5 小時 -- 調試測試: 2 小時 - -#### Day 18-20:項目 2(後半部分) -- [ ] 實現反向傳播 -- [ ] 實現梯度下降 -- [ ] 訓練並可視化結果 - -**學習目標**: -- 深入理解反向傳播算法 -- 掌握梯度下降優化 -- 學會可視化訓練過程 - -**檢查點**: -- 能夠推導簡單網絡的梯度公式 -- 成功訓練出收斂的模型 - -**時間分配**: -- 反向傳播實現: 4 小時 -- 訓練和調試: 4 小時 -- 可視化分析: 2 小時 - -#### Day 21:總結與複習 -- [ ] 完成所有檢查清單項目 -- [ ] 總結學習筆記 -- [ ] 規劃下一步學習 - -**任務**: -- 回顧所有重點概念 -- 整理錯題和難點 -- 製作知識卡片 - -**時間分配**: -- 知識複習: 3 小時 -- 筆記整理: 2 小時 -- 規劃未來: 1 小時 - ---- - -## ✅ 學習檢查清單 - -### 基礎知識檢查 - -#### 張量操作 -- [ ] 能夠創建各種形狀的張量 -- [ ] 理解並運用廣播機制 -- [ ] 掌握張量索引和切片 -- [ ] 能夠進行張量形狀變換 -- [ ] 理解內存管理(原地操作) - -**自測題**: -```python -# 1. 創建形狀為 (3, 4, 5) 的隨機張量 -# 2. 將其重塑為 (12, 5) -# 3. 計算每列的均值 -# 4. 使用廣播將每個元素減去其列均值 -``` - -#### 線性代數 -- [ ] 理解向量的點積 -- [ ] 掌握矩陣乘法 -- [ ] 理解矩陣轉置 -- [ ] 計算向量範數 -- [ ] 理解線性變換 - -**自測題**: -```python -# 給定 A (3x4) 和 B (4x2) -# 1. 計算 A @ B 的形狀 -# 2. 計算 A 的 Frobenius 範數 -# 3. 驗證 (A.T).T = A -``` - -#### 微積分 -- [ ] 理解導數的定義 -- [ ] 能夠計算簡單函數的導數 -- [ ] 理解偏導數 -- [ ] 掌握鏈式法則 -- [ ] 理解梯度的幾何意義 - -**自測題**: -```python -# 1. 計算 f(x) = x^3 + 2x^2 + 1 在 x=2 的導數 -# 2. 計算 f(x,y) = x^2 + y^2 的梯度 -# 3. 使用鏈式法則計算 h(x) = f(g(x)) 的導數 -``` - -#### 自動微分 -- [ ] 理解計算圖概念 -- [ ] 掌握 requires_grad 的使用 -- [ ] 理解 backward() 的工作原理 -- [ ] 能夠處理非標量輸出 -- [ ] 理解梯度累積 - -**自測題**: -```python -# 1. 使用自動微分計算 y = x^2 + sin(x) 的導數 -# 2. 實現一個簡單的梯度下降優化 -# 3. 解釋為什麼需要 zero_grad() -``` - -#### 概率統計 -- [ ] 理解常見概率分佈 -- [ ] 能夠計算期望和方差 -- [ ] 理解貝葉斯定理 -- [ ] 掌握採樣方法 -- [ ] 理解最大似然估計 - -**自測題**: -```python -# 1. 從正態分佈採樣並驗證均值和方差 -# 2. 計算兩個變量的協方差 -# 3. 解釋貝葉斯定理在機器學習中的應用 -``` - ---- - -### 實踐能力檢查 - -#### 數據處理 -- [ ] 能夠加載和清洗數據 -- [ ] 掌握數據可視化技巧 -- [ ] 實現數據標準化 -- [ ] 進行特徵工程 -- [ ] 檢測異常值 - -**項目檢驗**: 完成項目 1 的所有任務 - -#### 神經網絡實現 -- [ ] 能夠實現前向傳播 -- [ ] 能夠實現反向傳播 -- [ ] 理解梯度下降原理 -- [ ] 能夠訓練簡單模型 -- [ ] 會可視化訓練過程 - -**項目檢驗**: 完成項目 2 的所有任務 - ---- - -## 📊 學習進度追蹤 - -### 使用進度追蹤器 - -```bash -# 開始學習某個主題 -python ai_tools/progress_tracker.py --update --topic ndarray --time 2 - -# 完成練習後更新分數 -python ai_tools/progress_tracker.py --update --topic ndarray --score 85 --exercises 5 - -# 查看進度報告 -python ai_tools/progress_tracker.py --report - -# 獲取學習建議 -python ai_tools/progress_tracker.py --suggest -``` - -### 每週回顧清單 - -#### 週末回顧(每週日) -- [ ] 生成學習進度報告 -- [ ] 總結本週學到的知識 -- [ ] 列出薄弱環節 -- [ ] 規劃下週重點 -- [ ] 更新學習筆記 - -#### 月度總結(每月最後一天) -- [ ] 回顧所有學習內容 -- [ ] 完成綜合測試 -- [ ] 整理知識體系 -- [ ] 分享學習心得 -- [ ] 設定新的目標 - ---- - -## 💡 學習策略建議 - -### 1. 主動學習策略 - -#### 費曼技巧 -1. **選擇概念**: 選一個你想理解的概念 -2. **教授他人**: 用簡單的語言解釋給別人聽 -3. **識別差距**: 找出你解釋不清楚的地方 -4. **複習簡化**: 回到資料,重新學習,然後簡化解釋 - -**實踐**: -- 每學完一個 notebook,寫一篇博客 -- 向朋友或同學解釋概念 -- 製作教學視頻或幻燈片 - -#### 刻意練習 -1. **設定目標**: 明確你要練習什麼 -2. **集中注意**: 全神貫注地練習 -3. **獲得反饋**: 及時檢查結果 -4. **持續改進**: 根據反饋調整 - -**實踐**: -- 每天至少完成 3 道練習題 -- 使用練習生成器生成新題 -- 對比自己的解法和標準答案 - -### 2. 間隔重複策略 - -#### Anki 卡片製作 -為每個重要概念製作記憶卡片: - -**正面**: -``` -什麼是梯度下降? -``` - -**背面**: -``` -梯度下降是一種優化算法,通過迭代更新參數來最小化損失函數。 -更新規則:θ = θ - η∇L(θ) -其中 η 是學習率,∇L 是損失對參數的梯度。 -``` - -**複習計劃**: -- 第 1 天:學習 -- 第 2 天:複習 -- 第 4 天:複習 -- 第 7 天:複習 -- 第 14 天:複習 -- 第 30 天:複習 - -### 3. 可視化學習策略 - -#### 繪製概念圖 -使用思維導圖連接不同概念: - -``` -深度學習預備知識 -├── 數學基礎 -│ ├── 線性代數 -│ │ ├── 向量 -│ │ ├── 矩陣 -│ │ └── 範數 -│ ├── 微積分 -│ │ ├── 導數 -│ │ ├── 梯度 -│ │ └── 鏈式法則 -│ └── 概率統計 -│ ├── 概率分佈 -│ ├── 期望方差 -│ └── 貝葉斯定理 -└── 編程實踐 - ├── PyTorch 基礎 - ├── 數據處理 - └── 模型訓練 -``` - -#### 使用可視化工具 -```bash -# 可視化梯度下降 -python ai_tools/visualizer.py --concept gradient_descent - -# 可視化激活函數 -python ai_tools/visualizer.py --concept activation_functions - -# 可視化所有概念 -python ai_tools/visualizer.py --concept all -``` - ---- - -## 🎯 學習里程碑 - -### Level 1: 入門 (1-2週) -**達成標準**: -- 完成所有基礎 notebooks -- 能夠運行簡單的深度學習代碼 -- 理解基本概念 - -**獎勵自己**: 休息一天,做點別的事情 - -### Level 2: 進階 (3-4週) -**達成標準**: -- 完成項目 1 和 2 -- 能夠從零實現簡單網絡 -- 理解數學推導 - -**獎勵自己**: 分享學習成果,獲得反饋 - -### Level 3: 精通 (4-6週) -**達成標準**: -- 完成所有進階挑戰 -- 能夠教授他人 -- 開始自己的項目 - -**獎勵自己**: 參加一個深度學習競賽或黑客松 - ---- - -## 📚 額外資源推薦 - -### 在線課程 -1. **Fast.ai**: Practical Deep Learning for Coders -2. **Coursera**: Deep Learning Specialization (Andrew Ng) -3. **CS231n**: Convolutional Neural Networks for Visual Recognition - -### 書籍 -1. **入門**: 《Python 深度學習》- François Chollet -2. **進階**: 《深度學習》- Ian Goodfellow -3. **實戰**: 《動手學深度學習》- 李沐 - -### 社群 -1. **Reddit**: r/MachineLearning, r/learnmachinelearning -2. **Discord**: PyTorch Discord, Fast.ai Discord -3. **論壇**: Discuss.PyTorch.org - -### YouTube 頻道 -1. **3Blue1Brown**: 數學可視化 -2. **StatQuest**: 統計學習 -3. **Sentdex**: Python 和機器學習 - ---- - -## 🤔 常見問題 - -### Q: 我數學基礎很差,能學會嗎? -A: 可以!本課程設計為循序漸進,會從基礎講起。關鍵是: -- 不要畏難,數學只是工具 -- 多動手實踐,通過代碼理解數學 -- 利用可視化工具幫助理解 - -### Q: 每天應該學習多久? -A: 建議: -- **最少**: 1 小時(保持連續性) -- **推薦**: 2-3 小時(高效學習) -- **最多**: 4-5 小時(避免疲勞) - -重要的是持續性,而不是一次性長時間學習。 - -### Q: 學習卡住了怎麼辦? -A: 嘗試以下方法: -1. 換個角度理解(看視頻、讀博客) -2. 使用可視化工具 -3. 向社群求助 -4. 暫時跳過,繼續學習後面的內容 -5. 第二天重新嘗試(睡眠有助於理解) - -### Q: 需要全部掌握才能繼續嗎? -A: 不需要!建議: -- 理解 70% 即可繼續 -- 在後續實踐中深化理解 -- 隨時可以回來複習 - ---- - -**記住:學習是一個過程,不是目的地。享受這個過程!🚀** diff --git "a/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/projects/01_data_analysis_basics.ipynb" "b/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/projects/01_data_analysis_basics.ipynb" deleted file mode 100644 index bda23a1..0000000 --- "a/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/projects/01_data_analysis_basics.ipynb" +++ /dev/null @@ -1,469 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 實踐項目 1:數據分析基礎\n", - "\n", - "## 🎯 項目目標\n", - "\n", - "通過分析真實數據集,綜合運用張量操作、Pandas 數據處理和統計分析技能。\n", - "\n", - "## 📚 涵蓋知識點\n", - "\n", - "- PyTorch 張量基礎操作\n", - "- Pandas 數據清洗與轉換\n", - "- 統計分析(均值、方差、分佈)\n", - "- 數據可視化\n", - "- 數據標準化與歸一化\n", - "\n", - "## 🔧 環境準備" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "from sklearn.datasets import load_wine\n", - "\n", - "# 設置隨機種子\n", - "torch.manual_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "# 設置繪圖樣式\n", - "plt.style.use('seaborn-v0_8-whitegrid')\n", - "sns.set_palette(\"husl\")\n", - "\n", - "print(\"✅ 環境準備完成!\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 📊 任務 1:數據加載與探索\n", - "\n", - "我們將使用經典的 Wine 數據集(葡萄酒數據集)。" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# 加載數據集\n", - "wine = load_wine()\n", - "df = pd.DataFrame(wine.data, columns=wine.feature_names)\n", - "df['target'] = wine.target\n", - "\n", - "print(\"數據集基本信息:\")\n", - "print(f\"樣本數量: {len(df)}\")\n", - "print(f\"特徵數量: {len(wine.feature_names)}\")\n", - "print(f\"類別數量: {len(wine.target_names)}\")\n", - "print(f\"\\n類別名稱: {wine.target_names}\")\n", - "\n", - "# 顯示前幾行\n", - "print(\"\\n數據前5行:\")\n", - "df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 🎯 練習 1.1:數據基本統計\n", - "\n", - "**任務**:計算每個特徵的基本統計量(均值、標準差、最小值、最大值)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# TODO: 使用 Pandas 計算統計量\n", - "stats = df.describe()\n", - "print(\"基本統計量:\")\n", - "stats" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# TODO: 使用 PyTorch 張量計算同樣的統計量\n", - "data_tensor = torch.tensor(df.drop('target', axis=1).values, dtype=torch.float32)\n", - "\n", - "mean = data_tensor.mean(dim=0)\n", - "std = data_tensor.std(dim=0)\n", - "min_val = data_tensor.min(dim=0)[0]\n", - "max_val = data_tensor.max(dim=0)[0]\n", - "\n", - "print(\"\\n使用 PyTorch 計算的統計量:\")\n", - "print(f\"均值: {mean[:3]}...\") # 只顯示前3個\n", - "print(f\"標準差: {std[:3]}...\")\n", - "print(f\"最小值: {min_val[:3]}...\")\n", - "print(f\"最大值: {max_val[:3]}...\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 📊 任務 2:數據可視化\n", - "\n", - "可視化是理解數據的重要手段。" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# 特徵分佈可視化\n", - "fig, axes = plt.subplots(3, 5, figsize=(18, 12))\n", - "axes = axes.flatten()\n", - "\n", - "for idx, col in enumerate(wine.feature_names):\n", - " axes[idx].hist(df[col], bins=30, alpha=0.7, color='skyblue', edgecolor='black')\n", - " axes[idx].set_title(col, fontsize=10, fontweight='bold')\n", - " axes[idx].set_xlabel('Value')\n", - " axes[idx].set_ylabel('Frequency')\n", - " axes[idx].grid(True, alpha=0.3)\n", - "\n", - "# 移除多餘的子圖\n", - "for idx in range(len(wine.feature_names), len(axes)):\n", - " fig.delaxes(axes[idx])\n", - "\n", - "plt.tight_layout()\n", - "plt.savefig('wine_distributions.png', dpi=150, bbox_inches='tight')\n", - "plt.show()\n", - "\n", - "print(\"✅ 特徵分佈圖已保存\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 🎯 練習 2.1:相關性分析\n", - "\n", - "**任務**:計算並可視化特徵之間的相關性" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# TODO: 計算相關係數矩陣\n", - "correlation_matrix = df.drop('target', axis=1).corr()\n", - "\n", - "# 可視化相關性矩陣\n", - "plt.figure(figsize=(12, 10))\n", - "sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm',\n", - " square=True, linewidths=0.5, cbar_kws={\"shrink\": 0.8})\n", - "plt.title('Feature Correlation Matrix', fontsize=14, fontweight='bold', pad=20)\n", - "plt.tight_layout()\n", - "plt.savefig('correlation_matrix.png', dpi=150, bbox_inches='tight')\n", - "plt.show()\n", - "\n", - "print(\"✅ 相關性矩陣圖已保存\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 🔧 任務 3:數據預處理\n", - "\n", - "### 3.1 數據標準化(Z-score Normalization)\n", - "\n", - "公式:$z = \\frac{x - \\mu}{\\sigma}$" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def standardize(data):\n", - " \"\"\"\n", - " 標準化數據(Z-score)\n", - " \n", - " 參數:\n", - " data: torch.Tensor, 形狀為 (n_samples, n_features)\n", - " \n", - " 返回:\n", - " 標準化後的數據\n", - " \"\"\"\n", - " # TODO: 實現標準化\n", - " mean = data.mean(dim=0, keepdim=True)\n", - " std = data.std(dim=0, keepdim=True)\n", - " return (data - mean) / (std + 1e-8) # 加小數避免除以零\n", - "\n", - "# 測試標準化\n", - "data_tensor = torch.tensor(df.drop('target', axis=1).values, dtype=torch.float32)\n", - "standardized_data = standardize(data_tensor)\n", - "\n", - "print(\"標準化前:\")\n", - "print(f\"均值: {data_tensor.mean(dim=0)[:3]}\")\n", - "print(f\"標準差: {data_tensor.std(dim=0)[:3]}\")\n", - "\n", - "print(\"\\n標準化後:\")\n", - "print(f\"均值: {standardized_data.mean(dim=0)[:3]}\")\n", - "print(f\"標準差: {standardized_data.std(dim=0)[:3]}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 🎯 練習 3.1:實現 Min-Max 歸一化\n", - "\n", - "**任務**:實現 Min-Max 歸一化,將數據縮放到 [0, 1] 範圍\n", - "\n", - "公式:$x_{\\text{norm}} = \\frac{x - x_{\\min}}{x_{\\max} - x_{\\min}}$" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def min_max_normalize(data):\n", - " \"\"\"\n", - " Min-Max 歸一化\n", - " \n", - " 參數:\n", - " data: torch.Tensor\n", - " \n", - " 返回:\n", - " 歸一化後的數據(範圍 [0, 1])\n", - " \"\"\"\n", - " # TODO: 實現 Min-Max 歸一化\n", - " min_val = data.min(dim=0, keepdim=True)[0]\n", - " max_val = data.max(dim=0, keepdim=True)[0]\n", - " return (data - min_val) / (max_val - min_val + 1e-8)\n", - "\n", - "# 測試\n", - "normalized_data = min_max_normalize(data_tensor)\n", - "\n", - "print(\"歸一化後的數據範圍:\")\n", - "print(f\"最小值: {normalized_data.min(dim=0)[0][:3]}\")\n", - "print(f\"最大值: {normalized_data.max(dim=0)[0][:3]}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 📊 任務 4:統計分析\n", - "\n", - "### 4.1 按類別分組分析" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# 按類別分組統計\n", - "grouped_stats = df.groupby('target').agg(['mean', 'std'])\n", - "print(\"按類別的統計量:\")\n", - "print(grouped_stats.iloc[:, :6]) # 只顯示部分列\n", - "\n", - "# 可視化不同類別的特徵分佈\n", - "fig, axes = plt.subplots(2, 3, figsize=(15, 10))\n", - "axes = axes.flatten()\n", - "\n", - "# 選擇6個最具代表性的特徵\n", - "features_to_plot = wine.feature_names[:6]\n", - "\n", - "for idx, feature in enumerate(features_to_plot):\n", - " for target in range(3):\n", - " subset = df[df['target'] == target][feature]\n", - " axes[idx].hist(subset, bins=20, alpha=0.5, label=wine.target_names[target])\n", - " \n", - " axes[idx].set_title(feature, fontweight='bold')\n", - " axes[idx].set_xlabel('Value')\n", - " axes[idx].set_ylabel('Frequency')\n", - " axes[idx].legend()\n", - " axes[idx].grid(True, alpha=0.3)\n", - "\n", - "plt.tight_layout()\n", - "plt.savefig('class_distributions.png', dpi=150, bbox_inches='tight')\n", - "plt.show()\n", - "\n", - "print(\"✅ 類別分佈圖已保存\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 🎯 練習 4.1:協方差矩陣計算\n", - "\n", - "**任務**:使用 PyTorch 計算協方差矩陣\n", - "\n", - "協方差公式:$\\text{Cov}(X, Y) = \\frac{1}{n-1}\\sum_{i=1}^{n}(x_i - \\bar{x})(y_i - \\bar{y})$" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def compute_covariance_matrix(data):\n", - " \"\"\"\n", - " 計算協方差矩陣\n", - " \n", - " 參數:\n", - " data: torch.Tensor, 形狀為 (n_samples, n_features)\n", - " \n", - " 返回:\n", - " 協方差矩陣, 形狀為 (n_features, n_features)\n", - " \"\"\"\n", - " # TODO: 實現協方差矩陣計算\n", - " # 1. 中心化數據(減去均值)\n", - " centered = data - data.mean(dim=0, keepdim=True)\n", - " \n", - " # 2. 計算協方差矩陣\n", - " n = data.shape[0]\n", - " cov_matrix = (centered.T @ centered) / (n - 1)\n", - " \n", - " return cov_matrix\n", - "\n", - "# 測試\n", - "cov_matrix = compute_covariance_matrix(data_tensor)\n", - "print(f\"協方差矩陣形狀: {cov_matrix.shape}\")\n", - "print(f\"\\n協方差矩陣(前3×3):\\n{cov_matrix[:3, :3]}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 🎓 任務 5:綜合應用\n", - "\n", - "### 5.1 實現簡單的數據異常檢測\n", - "\n", - "使用 Z-score 方法檢測異常值(|Z| > 3)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def detect_outliers_zscore(data, threshold=3.0):\n", - " \"\"\"\n", - " 使用 Z-score 方法檢測異常值\n", - " \n", - " 參數:\n", - " data: torch.Tensor\n", - " threshold: float, Z-score 閾值\n", - " \n", - " 返回:\n", - " 異常值的布爾掩碼\n", - " \"\"\"\n", - " # TODO: 實現異常檢測\n", - " mean = data.mean(dim=0, keepdim=True)\n", - " std = data.std(dim=0, keepdim=True)\n", - " z_scores = torch.abs((data - mean) / (std + 1e-8))\n", - " \n", - " # 任何特徵的 Z-score > threshold 即為異常\n", - " outliers = (z_scores > threshold).any(dim=1)\n", - " \n", - " return outliers\n", - "\n", - "# 檢測異常值\n", - "outliers = detect_outliers_zscore(data_tensor)\n", - "n_outliers = outliers.sum().item()\n", - "\n", - "print(f\"檢測到 {n_outliers} 個異常樣本(總共 {len(data_tensor)} 個樣本)\")\n", - "print(f\"異常比例: {n_outliers / len(data_tensor) * 100:.2f}%\")\n", - "\n", - "# 顯示異常樣本的索引\n", - "if n_outliers > 0:\n", - " outlier_indices = torch.where(outliers)[0]\n", - " print(f\"\\n異常樣本索引: {outlier_indices[:10].tolist()}...\") # 只顯示前10個" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 📝 項目總結\n", - "\n", - "### 你學到了什麼?\n", - "\n", - "✅ PyTorch 張量的基礎操作和統計計算 \n", - "✅ Pandas 數據處理和分組分析 \n", - "✅ 數據可視化技巧 \n", - "✅ 數據預處理方法(標準化、歸一化) \n", - "✅ 協方差矩陣計算 \n", - "✅ 異常值檢測 \n", - "\n", - "### 🎯 進階挑戰\n", - "\n", - "1. **特徵工程**:創建新的組合特徵\n", - "2. **降維分析**:實現 PCA(主成分分析)\n", - "3. **統計檢驗**:使用 t-test 比較不同類別的特徵\n", - "4. **交互式可視化**:使用 Plotly 創建交互式圖表\n", - "\n", - "### 📚 推薦閱讀\n", - "\n", - "- [PyTorch 官方文檔 - Tensor Operations](https://pytorch.org/docs/stable/torch.html)\n", - "- [Pandas 數據分析教程](https://pandas.pydata.org/docs/user_guide/index.html)\n", - "- [Statistics for Machine Learning](https://machinelearningmastery.com/statistics_for_machine_learning/)\n", - "\n", - "---\n", - "\n", - "**恭喜完成項目 1!🎉**\n", - "\n", - "接下來嘗試 [項目 2:神經網絡數學基礎](02_neural_network_math.ipynb)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git "a/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/projects/02_neural_network_math.ipynb" "b/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/projects/02_neural_network_math.ipynb" deleted file mode 100644 index c9badbc..0000000 --- "a/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/projects/02_neural_network_math.ipynb" +++ /dev/null @@ -1,716 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 實踐項目 2:神經網絡數學基礎\n", - "\n", - "## 🎯 項目目標\n", - "\n", - "從零開始實現一個簡單的神經網絡,深入理解:\n", - "- 前向傳播的線性代數運算\n", - "- 激活函數的作用\n", - "- 損失函數的計算\n", - "- 反向傳播的微積分原理\n", - "\n", - "## 📚 涵蓋知識點\n", - "\n", - "- 矩陣乘法與向量運算\n", - "- 導數與鏈式法則\n", - "- 梯度下降優化\n", - "- 自動微分機制\n", - "\n", - "## 🔧 環境準備" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "import torch.nn as nn\n", - "import torch.nn.functional as F\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "from matplotlib.animation import FuncAnimation\n", - "from IPython.display import HTML\n", - "\n", - "# 設置隨機種子\n", - "torch.manual_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "print(\"✅ 環境準備完成!\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 📊 任務 1:生成分類數據集\n", - "\n", - "我們將生成一個簡單的二分類數據集(螺旋數據)。" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def generate_spiral_data(n_points=100, noise=0.2):\n", - " \"\"\"\n", - " 生成螺旋數據集\n", - " \n", - " 參數:\n", - " n_points: 每個類別的樣本數\n", - " noise: 噪聲強度\n", - " \n", - " 返回:\n", - " X: 特徵 (n_samples, 2)\n", - " y: 標籤 (n_samples,)\n", - " \"\"\"\n", - " n = n_points\n", - " X = np.zeros((n * 2, 2))\n", - " y = np.zeros(n * 2, dtype=int)\n", - " \n", - " for class_id in range(2):\n", - " ix = range(n * class_id, n * (class_id + 1))\n", - " r = np.linspace(0.0, 1, n) # 半徑\n", - " t = np.linspace(class_id * 4, (class_id + 1) * 4, n) + np.random.randn(n) * noise # 角度\n", - " X[ix] = np.c_[r * np.sin(t * 2.5), r * np.cos(t * 2.5)]\n", - " y[ix] = class_id\n", - " \n", - " return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.long)\n", - "\n", - "# 生成數據\n", - "X, y = generate_spiral_data(n_points=100)\n", - "\n", - "# 可視化\n", - "plt.figure(figsize=(8, 8))\n", - "plt.scatter(X[y == 0, 0], X[y == 0, 1], c='skyblue', s=50, alpha=0.8, edgecolors='black', label='Class 0')\n", - "plt.scatter(X[y == 1, 0], X[y == 1, 1], c='salmon', s=50, alpha=0.8, edgecolors='black', label='Class 1')\n", - "plt.xlabel('Feature 1', fontsize=12)\n", - "plt.ylabel('Feature 2', fontsize=12)\n", - "plt.title('Spiral Dataset', fontsize=14, fontweight='bold')\n", - "plt.legend(fontsize=12)\n", - "plt.grid(True, alpha=0.3)\n", - "plt.axis('equal')\n", - "plt.show()\n", - "\n", - "print(f\"數據集大小: {X.shape}\")\n", - "print(f\"類別分佈: Class 0: {(y == 0).sum()}, Class 1: {(y == 1).sum()}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 🧠 任務 2:手動實現前向傳播\n", - "\n", - "### 2.1 理解神經網絡的數學結構\n", - "\n", - "一個簡單的兩層神經網絡:\n", - "\n", - "$$\n", - "\\begin{align*}\n", - "z_1 &= W_1 x + b_1 \\quad &\\text{(線性變換)} \\\\\n", - "a_1 &= \\sigma(z_1) \\quad &\\text{(激活函數)} \\\\\n", - "z_2 &= W_2 a_1 + b_2 \\quad &\\text{(線性變換)} \\\\\n", - "\\hat{y} &= \\text{softmax}(z_2) \\quad &\\text{(輸出層)}\n", - "\\end{align*}\n", - "$$\n", - "\n", - "其中 $\\sigma$ 是激活函數(如 ReLU)。" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class SimpleNeuralNetwork:\n", - " \"\"\"\n", - " 手動實現的簡單神經網絡\n", - " \"\"\"\n", - " \n", - " def __init__(self, input_dim, hidden_dim, output_dim):\n", - " \"\"\"\n", - " 初始化網絡參數\n", - " \n", - " 參數:\n", - " input_dim: 輸入特徵維度\n", - " hidden_dim: 隱藏層維度\n", - " output_dim: 輸出維度(類別數)\n", - " \"\"\"\n", - " # TODO: 初始化權重和偏置\n", - " # 使用 He 初始化\n", - " self.W1 = torch.randn(input_dim, hidden_dim) * np.sqrt(2.0 / input_dim)\n", - " self.b1 = torch.zeros(hidden_dim)\n", - " \n", - " self.W2 = torch.randn(hidden_dim, output_dim) * np.sqrt(2.0 / hidden_dim)\n", - " self.b2 = torch.zeros(output_dim)\n", - " \n", - " # 存儲中間值(用於反向傳播)\n", - " self.cache = {}\n", - " \n", - " def relu(self, x):\n", - " \"\"\"ReLU 激活函數\"\"\"\n", - " return torch.maximum(x, torch.zeros_like(x))\n", - " \n", - " def relu_derivative(self, x):\n", - " \"\"\"ReLU 導數\"\"\"\n", - " return (x > 0).float()\n", - " \n", - " def softmax(self, x):\n", - " \"\"\"Softmax 函數(數值穩定版本)\"\"\"\n", - " # TODO: 實現 softmax\n", - " exp_x = torch.exp(x - x.max(dim=1, keepdim=True)[0]) # 數值穩定\n", - " return exp_x / exp_x.sum(dim=1, keepdim=True)\n", - " \n", - " def forward(self, X):\n", - " \"\"\"\n", - " 前向傳播\n", - " \n", - " 參數:\n", - " X: 輸入數據 (batch_size, input_dim)\n", - " \n", - " 返回:\n", - " predictions: 預測概率 (batch_size, output_dim)\n", - " \"\"\"\n", - " # TODO: 實現前向傳播\n", - " # 第一層\n", - " z1 = X @ self.W1 + self.b1 # 線性變換\n", - " a1 = self.relu(z1) # ReLU 激活\n", - " \n", - " # 第二層\n", - " z2 = a1 @ self.W2 + self.b2 # 線性變換\n", - " predictions = self.softmax(z2) # Softmax 輸出\n", - " \n", - " # 保存中間值\n", - " self.cache = {'X': X, 'z1': z1, 'a1': a1, 'z2': z2, 'predictions': predictions}\n", - " \n", - " return predictions\n", - " \n", - " def compute_loss(self, predictions, y):\n", - " \"\"\"\n", - " 計算交叉熵損失\n", - " \n", - " 參數:\n", - " predictions: 預測概率\n", - " y: 真實標籤\n", - " \n", - " 返回:\n", - " loss: 標量損失值\n", - " \"\"\"\n", - " # TODO: 實現交叉熵損失\n", - " n = predictions.shape[0]\n", - " # 選擇正確類別的概率\n", - " correct_log_probs = -torch.log(predictions[range(n), y] + 1e-8)\n", - " loss = correct_log_probs.mean()\n", - " return loss\n", - "\n", - "# 測試前向傳播\n", - "model = SimpleNeuralNetwork(input_dim=2, hidden_dim=10, output_dim=2)\n", - "predictions = model.forward(X)\n", - "loss = model.compute_loss(predictions, y)\n", - "\n", - "print(f\"前向傳播測試:\")\n", - "print(f\"預測形狀: {predictions.shape}\")\n", - "print(f\"初始損失: {loss.item():.4f}\")\n", - "print(f\"預測概率示例(前3個樣本):\\n{predictions[:3]}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 📐 任務 3:手動實現反向傳播\n", - "\n", - "### 3.1 反向傳播的數學推導\n", - "\n", - "使用鏈式法則計算梯度:\n", - "\n", - "$$\n", - "\\begin{align*}\n", - "\\frac{\\partial L}{\\partial W_2} &= \\frac{\\partial L}{\\partial z_2} \\frac{\\partial z_2}{\\partial W_2} = a_1^T \\delta_2 \\\\\n", - "\\frac{\\partial L}{\\partial W_1} &= \\frac{\\partial L}{\\partial z_1} \\frac{\\partial z_1}{\\partial W_1} = x^T \\delta_1\n", - "\\end{align*}\n", - "$$\n", - "\n", - "其中 $\\delta$ 是誤差項。" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def backward(model, y):\n", - " \"\"\"\n", - " 反向傳播\n", - " \n", - " 參數:\n", - " model: SimpleNeuralNetwork 實例\n", - " y: 真實標籤\n", - " \n", - " 返回:\n", - " gradients: 包含所有參數梯度的字典\n", - " \"\"\"\n", - " # 獲取前向傳播的中間值\n", - " X = model.cache['X']\n", - " z1 = model.cache['z1']\n", - " a1 = model.cache['a1']\n", - " predictions = model.cache['predictions']\n", - " \n", - " n = X.shape[0]\n", - " \n", - " # TODO: 計算輸出層的梯度\n", - " # dL/dz2(softmax + 交叉熵的梯度)\n", - " delta2 = predictions.clone()\n", - " delta2[range(n), y] -= 1 # softmax 梯度的簡化形式\n", - " delta2 /= n\n", - " \n", - " # dL/dW2 和 dL/db2\n", - " dW2 = a1.T @ delta2\n", - " db2 = delta2.sum(dim=0)\n", - " \n", - " # TODO: 計算隱藏層的梯度(使用鏈式法則)\n", - " # dL/da1\n", - " delta1 = delta2 @ model.W2.T\n", - " # dL/dz1 = dL/da1 * da1/dz1\n", - " delta1 = delta1 * model.relu_derivative(z1)\n", - " \n", - " # dL/dW1 和 dL/db1\n", - " dW1 = X.T @ delta1\n", - " db1 = delta1.sum(dim=0)\n", - " \n", - " return {'dW1': dW1, 'db1': db1, 'dW2': dW2, 'db2': db2}\n", - "\n", - "# 測試反向傳播\n", - "gradients = backward(model, y)\n", - "\n", - "print(\"反向傳播測試:\")\n", - "for name, grad in gradients.items():\n", - " print(f\"{name} 梯度形狀: {grad.shape}, 範數: {torch.norm(grad):.4f}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 🎯 練習 3.1:驗證梯度計算\n", - "\n", - "使用數值梯度驗證我們的解析梯度是否正確。\n", - "\n", - "數值梯度公式:\n", - "$$\n", - "\\frac{\\partial L}{\\partial w} \\approx \\frac{L(w + \\epsilon) - L(w - \\epsilon)}{2\\epsilon}\n", - "$$" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def numerical_gradient(model, X, y, param_name, epsilon=1e-5):\n", - " \"\"\"\n", - " 計算數值梯度\n", - " \n", - " 參數:\n", - " model: 模型\n", - " X, y: 數據\n", - " param_name: 參數名稱 ('W1', 'b1', 'W2', 'b2')\n", - " epsilon: 微小擾動\n", - " \n", - " 返回:\n", - " 數值梯度\n", - " \"\"\"\n", - " param = getattr(model, param_name)\n", - " grad = torch.zeros_like(param)\n", - " \n", - " # 對每個參數計算數值梯度(只計算一個元素作為示例)\n", - " it = np.nditer(param.numpy(), flags=['multi_index'], op_flags=['readwrite'])\n", - " \n", - " while not it.finished:\n", - " idx = it.multi_index\n", - " old_value = param[idx].item()\n", - " \n", - " # f(x + epsilon)\n", - " param[idx] = old_value + epsilon\n", - " pred_plus = model.forward(X)\n", - " loss_plus = model.compute_loss(pred_plus, y)\n", - " \n", - " # f(x - epsilon)\n", - " param[idx] = old_value - epsilon\n", - " pred_minus = model.forward(X)\n", - " loss_minus = model.compute_loss(pred_minus, y)\n", - " \n", - " # 數值梯度\n", - " grad[idx] = (loss_plus - loss_minus) / (2 * epsilon)\n", - " \n", - " # 恢復原值\n", - " param[idx] = old_value\n", - " it.iternext()\n", - " \n", - " return grad\n", - "\n", - "# 驗證梯度(使用小批量數據)\n", - "X_small = X[:10]\n", - "y_small = y[:10]\n", - "\n", - "model_test = SimpleNeuralNetwork(2, 5, 2) # 使用更小的網絡以加快計算\n", - "_ = model_test.forward(X_small)\n", - "analytical_grads = backward(model_test, y_small)\n", - "\n", - "print(\"梯度驗證(數值梯度 vs 解析梯度):\")\n", - "print(\"注意:由於計算量大,我們只驗證幾個元素\")\n", - "\n", - "# 驗證 W1 的幾個元素\n", - "numerical_grad_W1 = numerical_gradient(model_test, X_small, y_small, 'W1')\n", - "print(f\"\\nW1 梯度差異: {torch.norm(numerical_grad_W1 - analytical_grads['dW1']):.6f}\")\n", - "print(\"(差異應該很小,< 1e-5)\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 🎓 任務 4:實現梯度下降訓練\n", - "\n", - "### 4.1 梯度下降更新規則\n", - "\n", - "$$\n", - "W \\leftarrow W - \\eta \\frac{\\partial L}{\\partial W}\n", - "$$\n", - "\n", - "其中 $\\eta$ 是學習率。" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def train(model, X, y, learning_rate=0.1, epochs=1000, verbose=True):\n", - " \"\"\"\n", - " 訓練神經網絡\n", - " \n", - " 參數:\n", - " model: SimpleNeuralNetwork\n", - " X, y: 訓練數據\n", - " learning_rate: 學習率\n", - " epochs: 訓練輪數\n", - " verbose: 是否打印訓練信息\n", - " \n", - " 返回:\n", - " loss_history: 損失歷史\n", - " accuracy_history: 準確率歷史\n", - " \"\"\"\n", - " loss_history = []\n", - " accuracy_history = []\n", - " \n", - " for epoch in range(epochs):\n", - " # 前向傳播\n", - " predictions = model.forward(X)\n", - " loss = model.compute_loss(predictions, y)\n", - " \n", - " # 反向傳播\n", - " grads = backward(model, y)\n", - " \n", - " # TODO: 參數更新(梯度下降)\n", - " model.W1 -= learning_rate * grads['dW1']\n", - " model.b1 -= learning_rate * grads['db1']\n", - " model.W2 -= learning_rate * grads['dW2']\n", - " model.b2 -= learning_rate * grads['db2']\n", - " \n", - " # 計算準確率\n", - " predicted_class = predictions.argmax(dim=1)\n", - " accuracy = (predicted_class == y).float().mean()\n", - " \n", - " # 記錄\n", - " loss_history.append(loss.item())\n", - " accuracy_history.append(accuracy.item())\n", - " \n", - " # 打印進度\n", - " if verbose and (epoch + 1) % 100 == 0:\n", - " print(f\"Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.4f}, Accuracy: {accuracy.item():.4f}\")\n", - " \n", - " return loss_history, accuracy_history\n", - "\n", - "# 訓練模型\n", - "model = SimpleNeuralNetwork(input_dim=2, hidden_dim=20, output_dim=2)\n", - "loss_history, accuracy_history = train(model, X, y, learning_rate=0.5, epochs=2000)\n", - "\n", - "print(\"\\n✅ 訓練完成!\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 可視化訓練過程" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# 繪製損失和準確率曲線\n", - "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))\n", - "\n", - "# 損失曲線\n", - "ax1.plot(loss_history, linewidth=2, color='royalblue')\n", - "ax1.set_xlabel('Epoch', fontsize=12)\n", - "ax1.set_ylabel('Loss', fontsize=12)\n", - "ax1.set_title('Training Loss', fontsize=14, fontweight='bold')\n", - "ax1.grid(True, alpha=0.3)\n", - "\n", - "# 準確率曲線\n", - "ax2.plot(accuracy_history, linewidth=2, color='green')\n", - "ax2.set_xlabel('Epoch', fontsize=12)\n", - "ax2.set_ylabel('Accuracy', fontsize=12)\n", - "ax2.set_title('Training Accuracy', fontsize=14, fontweight='bold')\n", - "ax2.grid(True, alpha=0.3)\n", - "ax2.set_ylim([0, 1.05])\n", - "\n", - "plt.tight_layout()\n", - "plt.savefig('training_curves.png', dpi=150, bbox_inches='tight')\n", - "plt.show()\n", - "\n", - "print(f\"最終損失: {loss_history[-1]:.4f}\")\n", - "print(f\"最終準確率: {accuracy_history[-1]:.4f}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 可視化決策邊界" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def plot_decision_boundary(model, X, y):\n", - " \"\"\"\n", - " 繪製決策邊界\n", - " \"\"\"\n", - " # 創建網格\n", - " x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5\n", - " y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5\n", - " \n", - " xx, yy = np.meshgrid(np.linspace(x_min, x_max, 200),\n", - " np.linspace(y_min, y_max, 200))\n", - " \n", - " # 預測網格點\n", - " grid_points = torch.tensor(np.c_[xx.ravel(), yy.ravel()], dtype=torch.float32)\n", - " with torch.no_grad():\n", - " Z = model.forward(grid_points).argmax(dim=1).numpy()\n", - " Z = Z.reshape(xx.shape)\n", - " \n", - " # 繪圖\n", - " plt.figure(figsize=(10, 8))\n", - " plt.contourf(xx, yy, Z, alpha=0.3, cmap='RdYlBu', levels=1)\n", - " plt.scatter(X[y == 0, 0], X[y == 0, 1], c='skyblue', s=60, \n", - " alpha=0.8, edgecolors='black', linewidth=1.5, label='Class 0')\n", - " plt.scatter(X[y == 1, 0], X[y == 1, 1], c='salmon', s=60,\n", - " alpha=0.8, edgecolors='black', linewidth=1.5, label='Class 1')\n", - " \n", - " plt.xlabel('Feature 1', fontsize=12)\n", - " plt.ylabel('Feature 2', fontsize=12)\n", - " plt.title('Decision Boundary', fontsize=14, fontweight='bold')\n", - " plt.legend(fontsize=12)\n", - " plt.grid(True, alpha=0.3)\n", - " plt.savefig('decision_boundary.png', dpi=150, bbox_inches='tight')\n", - " plt.show()\n", - "\n", - "plot_decision_boundary(model, X, y)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 🤖 任務 5:使用 PyTorch 自動微分對比\n", - "\n", - "驗證我們手動實現的反向傳播是否正確。" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class PyTorchNN(nn.Module):\n", - " \"\"\"使用 PyTorch 自動微分的神經網絡\"\"\"\n", - " \n", - " def __init__(self, input_dim, hidden_dim, output_dim):\n", - " super().__init__()\n", - " self.fc1 = nn.Linear(input_dim, hidden_dim)\n", - " self.fc2 = nn.Linear(hidden_dim, output_dim)\n", - " \n", - " def forward(self, x):\n", - " x = F.relu(self.fc1(x))\n", - " x = self.fc2(x)\n", - " return x\n", - "\n", - "# 訓練 PyTorch 版本\n", - "pytorch_model = PyTorchNN(2, 20, 2)\n", - "criterion = nn.CrossEntropyLoss()\n", - "optimizer = torch.optim.SGD(pytorch_model.parameters(), lr=0.5)\n", - "\n", - "pytorch_loss_history = []\n", - "pytorch_accuracy_history = []\n", - "\n", - "for epoch in range(2000):\n", - " # 前向傳播\n", - " outputs = pytorch_model(X)\n", - " loss = criterion(outputs, y)\n", - " \n", - " # 反向傳播\n", - " optimizer.zero_grad()\n", - " loss.backward()\n", - " optimizer.step()\n", - " \n", - " # 計算準確率\n", - " with torch.no_grad():\n", - " predicted = outputs.argmax(dim=1)\n", - " accuracy = (predicted == y).float().mean()\n", - " \n", - " pytorch_loss_history.append(loss.item())\n", - " pytorch_accuracy_history.append(accuracy.item())\n", - " \n", - " if (epoch + 1) % 100 == 0:\n", - " print(f\"Epoch {epoch + 1}/2000, Loss: {loss.item():.4f}, Accuracy: {accuracy.item():.4f}\")\n", - "\n", - "print(\"\\n✅ PyTorch 訓練完成!\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 對比兩種實現" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# 對比訓練曲線\n", - "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))\n", - "\n", - "# 損失對比\n", - "ax1.plot(loss_history, linewidth=2, label='Manual Implementation', alpha=0.8)\n", - "ax1.plot(pytorch_loss_history, linewidth=2, label='PyTorch Autograd', alpha=0.8, linestyle='--')\n", - "ax1.set_xlabel('Epoch', fontsize=12)\n", - "ax1.set_ylabel('Loss', fontsize=12)\n", - "ax1.set_title('Loss Comparison', fontsize=14, fontweight='bold')\n", - "ax1.legend()\n", - "ax1.grid(True, alpha=0.3)\n", - "\n", - "# 準確率對比\n", - "ax2.plot(accuracy_history, linewidth=2, label='Manual Implementation', alpha=0.8)\n", - "ax2.plot(pytorch_accuracy_history, linewidth=2, label='PyTorch Autograd', alpha=0.8, linestyle='--')\n", - "ax2.set_xlabel('Epoch', fontsize=12)\n", - "ax2.set_ylabel('Accuracy', fontsize=12)\n", - "ax2.set_title('Accuracy Comparison', fontsize=14, fontweight='bold')\n", - "ax2.legend()\n", - "ax2.grid(True, alpha=0.3)\n", - "ax2.set_ylim([0, 1.05])\n", - "\n", - "plt.tight_layout()\n", - "plt.savefig('comparison.png', dpi=150, bbox_inches='tight')\n", - "plt.show()\n", - "\n", - "print(\"\\n對比結果:\")\n", - "print(f\"手動實現 - 最終損失: {loss_history[-1]:.4f}, 準確率: {accuracy_history[-1]:.4f}\")\n", - "print(f\"PyTorch - 最終損失: {pytorch_loss_history[-1]:.4f}, 準確率: {pytorch_accuracy_history[-1]:.4f}\")\n", - "print(\"\\n✅ 兩種實現的結果應該非常接近!\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 📝 項目總結\n", - "\n", - "### 你學到了什麼?\n", - "\n", - "✅ 神經網絡的數學結構(矩陣乘法、激活函數) \n", - "✅ 前向傳播的完整實現 \n", - "✅ 反向傳播和鏈式法則的應用 \n", - "✅ 梯度下降優化算法 \n", - "✅ 數值梯度驗證方法 \n", - "✅ PyTorch 自動微分機制 \n", - "\n", - "### 關鍵數學概念\n", - "\n", - "1. **矩陣乘法**: $y = Wx + b$\n", - "2. **鏈式法則**: $\\frac{\\partial L}{\\partial W} = \\frac{\\partial L}{\\partial y} \\frac{\\partial y}{\\partial W}$\n", - "3. **梯度下降**: $W \\leftarrow W - \\eta \\nabla_W L$\n", - "4. **Softmax**: $\\sigma(z)_i = \\frac{e^{z_i}}{\\sum_j e^{z_j}}$\n", - "5. **交叉熵**: $L = -\\sum_i y_i \\log(\\hat{y}_i)$\n", - "\n", - "### 🎯 進階挑戰\n", - "\n", - "1. **動量優化器**: 實現帶動量的梯度下降\n", - "2. **批量訓練**: 實現 mini-batch 訓練\n", - "3. **正則化**: 添加 L2 正則化防止過擬合\n", - "4. **學習率調度**: 實現學習率衰減策略\n", - "5. **更深的網絡**: 擴展到 3 層或更多層\n", - "\n", - "### 📚 推薦閱讀\n", - "\n", - "- [CS231n: Backpropagation](http://cs231n.stanford.edu/slides/2022/lecture_4.pdf)\n", - "- [Deep Learning Book - Chapter 6](https://www.deeplearningbook.org/contents/mlp.html)\n", - "- [PyTorch Autograd Tutorial](https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html)\n", - "\n", - "---\n", - "\n", - "**恭喜完成項目 2!🎉**\n", - "\n", - "你已經深入理解了神經網絡的數學基礎!\n", - "\n", - "繼續探索更多高級主題吧!" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git "a/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/projects/README.md" "b/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/projects/README.md" deleted file mode 100644 index cff68a8..0000000 --- "a/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/projects/README.md" +++ /dev/null @@ -1,364 +0,0 @@ -# 實踐項目集 - -## 📚 項目概覽 - -本目錄包含兩個精心設計的實踐項目,幫助你將深度學習的數學知識轉化為實際技能。 - -## 🎯 項目列表 - -### 項目 1:數據分析基礎 -**文件**: `01_data_analysis_basics.ipynb` - -**難度**: ⭐⭐ (入門-中級) - -**預計時間**: 3-4 小時 - -**學習目標**: -- 掌握 PyTorch 張量操作 -- 學習 Pandas 數據處理技巧 -- 理解統計分析基礎 -- 實現數據預處理方法 - -**涵蓋知識點**: -- 張量創建與操作 -- 數據可視化 -- 統計量計算(均值、方差、協方差) -- 數據標準化與歸一化 -- 相關性分析 -- 異常值檢測 - -**項目亮點**: -- ✅ 使用真實數據集(Wine 數據集) -- ✅ 完整的數據分析流程 -- ✅ 可視化工具應用 -- ✅ 手動實現與庫函數對比 - ---- - -### 項目 2:神經網絡數學基礎 -**文件**: `02_neural_network_math.ipynb` - -**難度**: ⭐⭐⭐ (中級-進階) - -**預計時間**: 4-6 小時 - -**學習目標**: -- 深入理解神經網絡的數學原理 -- 從零實現前向傳播和反向傳播 -- 掌握梯度下降優化算法 -- 理解自動微分機制 - -**涵蓋知識點**: -- 矩陣乘法與向量運算 -- 激活函數(ReLU, Softmax) -- 損失函數(交叉熵) -- 鏈式法則與反向傳播 -- 梯度驗證方法 -- PyTorch 自動微分 - -**項目亮點**: -- ✅ 手動實現完整的神經網絡 -- ✅ 數值梯度驗證 -- ✅ 與 PyTorch 自動微分對比 -- ✅ 決策邊界可視化 -- ✅ 訓練過程動態展示 - ---- - -## 🚀 快速開始 - -### 環境要求 - -```bash -# 核心依賴 -pip install torch numpy pandas matplotlib seaborn - -# 額外依賴(用於項目 1) -pip install scikit-learn scipy - -# Jupyter 環境 -pip install jupyter notebook -``` - -### 運行項目 - -```bash -# 啟動 Jupyter Notebook -jupyter notebook - -# 打開對應的 .ipynb 文件 -# - 01_data_analysis_basics.ipynb -# - 02_neural_network_math.ipynb -``` - ---- - -## 📖 學習路徑建議 - -### 路徑 1:順序學習(推薦新手) - -``` -預備知識 notebooks - ↓ -項目 1:數據分析基礎 - ↓ -項目 2:神經網絡數學基礎 -``` - -**適合人群**: 完全新手,希望循序漸進 - -**學習策略**: -1. 先完成所有預備知識的 notebooks -2. 逐個完成項目中的任務 -3. 嘗試所有練習題 -4. 閱讀項目總結和推薦資源 - -### 路徑 2:快速實戰(有基礎) - -``` -快速瀏覽預備知識 - ↓ -直接開始項目 2 - ↓ -遇到問題時查閱相關 notebook -``` - -**適合人群**: 有編程基礎,想快速上手 - -**學習策略**: -1. 直接開始項目 2 -2. 遇到不理解的概念時,回到對應的預備知識 notebook -3. 完成項目後深入研究感興趣的主題 - ---- - -## 🎯 項目詳解 - -### 項目 1 詳細內容 - -#### 任務 1:數據加載與探索 -- 加載 Wine 數據集 -- 基本統計量計算 -- 使用 Pandas 和 PyTorch 兩種方法 - -#### 任務 2:數據可視化 -- 特徵分佈圖 -- 相關性矩陣熱力圖 -- 類別分佈對比 - -#### 任務 3:數據預處理 -- 實現 Z-score 標準化 -- 實現 Min-Max 歸一化 -- 理解不同標準化方法的適用場景 - -#### 任務 4:統計分析 -- 按類別分組統計 -- 計算協方差矩陣 -- 理解數據的內在結構 - -#### 任務 5:異常檢測 -- 使用 Z-score 方法 -- 可視化異常值 -- 理解異常檢測的重要性 - ---- - -### 項目 2 詳細內容 - -#### 任務 1:生成數據集 -- 創建螺旋數據集 -- 理解分類問題的特性 -- 數據可視化 - -#### 任務 2:前向傳播實現 -- 手動實現線性層 -- 實現 ReLU 激活函數 -- 實現 Softmax 輸出層 -- 計算交叉熵損失 - -#### 任務 3:反向傳播實現 -- 推導梯度公式 -- 實現鏈式法則 -- 計算每層的梯度 - -#### 任務 4:梯度驗證 -- 實現數值梯度 -- 對比解析梯度 -- 理解梯度檢查的重要性 - -#### 任務 5:模型訓練 -- 實現梯度下降 -- 訓練神經網絡 -- 可視化訓練過程 -- 繪製決策邊界 - -#### 任務 6:自動微分對比 -- 使用 PyTorch 實現同樣的網絡 -- 對比訓練結果 -- 理解自動微分的優勢 - ---- - -## 📊 項目成果展示 - -完成項目後,你將獲得: - -### 項目 1 成果 -- [ ] Wine 數據集完整分析報告 -- [ ] 特徵分佈圖(15 張) -- [ ] 相關性矩陣熱力圖 -- [ ] 數據預處理函數庫 -- [ ] 異常檢測系統 - -### 項目 2 成果 -- [ ] 手動實現的神經網絡 -- [ ] 訓練曲線圖(損失 + 準確率) -- [ ] 決策邊界可視化 -- [ ] 梯度驗證報告 -- [ ] 對比分析報告 - ---- - -## 💡 學習技巧 - -### 1. 動手實踐 -- **不要只看代碼**: 親自運行每個單元格 -- **修改參數**: 嘗試不同的超參數 -- **觀察變化**: 理解參數對結果的影響 - -### 2. 深入理解 -- **數學推導**: 嘗試自己推導公式 -- **繪製圖表**: 可視化幫助理解 -- **寫筆記**: 用自己的話解釋概念 - -### 3. 問題解決 -- **遇到錯誤**: 仔細閱讀錯誤信息 -- **查閱文檔**: 善用 PyTorch/NumPy 文檔 -- **對比結果**: 驗證自己的實現 - -### 4. 擴展學習 -- **完成進階挑戰**: 每個項目末尾都有 -- **查閱推薦資源**: 深入學習相關主題 -- **分享成果**: 向他人解釋你的理解 - ---- - -## 🎓 進階挑戰 - -### 項目 1 進階挑戰 - -1. **特徵工程** - - 創建多項式特徵 - - 特徵選擇(移除冗餘特徵) - - 特徵重要性分析 - -2. **降維分析** - - 實現 PCA(主成分分析) - - 可視化降維結果 - - 分析特徵貢獻 - -3. **統計檢驗** - - t-test 比較不同類別 - - ANOVA 多組比較 - - 卡方檢驗 - -4. **交互式可視化** - - 使用 Plotly 創建交互圖表 - - 實現數據儀表板 - -### 項目 2 進階挑戰 - -1. **優化器改進** - - 實現 Momentum - - 實現 Adam 優化器 - - 對比不同優化器 - -2. **批量訓練** - - 實現 mini-batch SGD - - 實現數據加載器 - - 對比 batch size 影響 - -3. **正則化** - - 添加 L2 正則化 - - 實現 Dropout - - 分析正則化效果 - -4. **網絡架構** - - 擴展到 3+ 層 - - 嘗試不同的激活函數 - - 實現 Batch Normalization - -5. **學習率調度** - - 實現學習率衰減 - - 嘗試 warmup 策略 - - 對比不同調度方法 - ---- - -## 📚 相關資源 - -### 教程 -- [PyTorch 官方教程](https://pytorch.org/tutorials/) -- [NumPy 快速入門](https://numpy.org/doc/stable/user/quickstart.html) -- [Pandas 用戶指南](https://pandas.pydata.org/docs/user_guide/) - -### 書籍 -- 《深度學習》- Ian Goodfellow -- 《動手學深度學習》- 李沐 -- 《Python 數據分析》- Wes McKinney - -### 課程 -- [CS231n: CNN for Visual Recognition](http://cs231n.stanford.edu/) -- [Fast.ai: Practical Deep Learning](https://course.fast.ai/) -- [3Blue1Brown: Neural Networks](https://www.youtube.com/watch?v=aircAruvnKk&list=PLZHQObOWTQDNU6R1_67000Dx_ZCJB-3pi) - -### 工具文檔 -- [PyTorch Documentation](https://pytorch.org/docs/) -- [Matplotlib Gallery](https://matplotlib.org/stable/gallery/) -- [Seaborn Tutorial](https://seaborn.pydata.org/tutorial.html) - ---- - -## 🤝 貢獻與反饋 - -### 發現錯誤? -請提交 Issue 或 Pull Request。 - -### 有改進建議? -歡迎分享你的想法! - -### 完成項目? -分享你的成果和學習心得! - ---- - -## 📝 常見問題 - -### Q1: 項目適合什麼水平的學習者? -A: 項目 1 適合有基礎 Python 經驗的初學者,項目 2 適合有一定數學基礎的學習者。 - -### Q2: 必須按順序完成嗎? -A: 建議按順序,但如果你有基礎,可以直接挑戰項目 2。 - -### Q3: 完成項目需要多久? -A: 項目 1 約 3-4 小時,項目 2 約 4-6 小時。但建議不要趕時間,充分理解每個概念。 - -### Q4: 遇到錯誤怎麼辦? -A: -1. 仔細閱讀錯誤信息 -2. 檢查代碼是否完全一致 -3. 查閱相關文檔 -4. 在社群中尋求幫助 - -### Q5: 完成項目後該做什麼? -A: -1. 完成進階挑戰 -2. 閱讀推薦資源 -3. 繼續學習後續章節 -4. 嘗試自己的項目 - ---- - -**開始你的實踐之旅吧!🚀** - -記住:**理論 + 實踐 = 真正的理解** diff --git "a/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/ai_tools/README.md" "b/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/ai_tools/README.md" deleted file mode 100644 index 9b96937..0000000 --- "a/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/ai_tools/README.md" +++ /dev/null @@ -1,393 +0,0 @@ -# AI 輔助學習工具集 - -## 📚 工具概覽 - -本目錄包含三個強大的 AI 輔助學習工具,幫助你更高效地掌握深度學習的數學基礎。 - -### 🔧 工具列表 - -| 工具 | 文件 | 功能 | 使用場景 | -|------|------|------|----------| -| 練習生成器 | `exercise_generator.py` | 自動生成個性化練習題 | 需要額外練習時 | -| 概念可視化器 | `visualizer.py` | 可視化數學概念 | 理解抽象概念時 | -| 進度追蹤器 | `progress_tracker.py` | 追蹤學習進度 | 規劃學習路徑時 | - ---- - -## 🎯 1. 練習生成器 (exercise_generator.py) - -### 功能特性 - -- ✅ 支持多個主題:張量操作、線性代數、微積分、自動微分、概率統計 -- ✅ 四個難度等級:easy, medium, hard, expert -- ✅ 自動生成隨機參數 -- ✅ 提供詳細的解答和提示 -- ✅ 支持批量生成和導出 - -### 使用方法 - -#### 基本用法 - -```bash -# 生成 5 道中等難度的線性代數練習題 -python exercise_generator.py --topic linear_algebra --difficulty medium --count 5 - -# 生成並保存到文件 -python exercise_generator.py --topic ndarray --difficulty easy --count 10 --output exercises.json -``` - -#### 支持的主題 - -- `ndarray`: 張量操作(創建、變形、索引、廣播) -- `linear_algebra`: 線性代數(向量、矩陣、範數、點積) -- `calculus`: 微積分(導數、梯度、偏導數) -- `autograd`: 自動微分(計算圖、反向傳播) -- `probability`: 概率統計(分佈、期望、方差) - -#### 難度等級 - -- `easy`: 適合初學者,側重基礎概念 -- `medium`: 適合有一定基礎的學習者 -- `hard`: 適合進階學習者,包含複雜計算 -- `expert`: 適合專家級,需要深入理解 - -### 示例輸出 - -``` -📝 練習 1: linear_algebra_medium_1 -難度: medium - -問題: -給定矩陣 A (3×4),計算其轉置並驗證 (A^T)^T = A。 - -概念: 矩陣轉置, 矩陣性質 - -提示: - 💡 使用 .T 屬性 - 💡 使用 torch.equal() 比較 - -參考解答: -```python -import torch -A = torch.randn(3, 4) -A_T = A.T -A_T_T = A_T.T -print(torch.equal(A, A_T_T)) # True -``` - ---- - -## 📊 2. 概念可視化器 (visualizer.py) - -### 功能特性 - -- ✅ 交互式可視化數學概念 -- ✅ 高質量圖表生成 -- ✅ 支持多種概念的可視化 -- ✅ 自動保存圖片 - -### 使用方法 - -#### 可視化梯度下降 - -```bash -python visualizer.py --concept gradient_descent -``` - -生成的圖表包括: -- 函數曲線和優化路徑 -- 收斂曲線 - -#### 可視化線性變換 - -```bash -python visualizer.py --concept linear_transformation -``` - -展示 6 種線性變換: -- 恆等變換 -- 縮放變換 -- 旋轉變換 -- 剪切變換 -- 反射變換 -- 投影變換 - -#### 可視化激活函數 - -```bash -python visualizer.py --concept activation_functions -``` - -展示常用激活函數: -- Sigmoid -- Tanh -- ReLU -- Leaky ReLU -- ELU -- GELU - -#### 可視化概率分佈 - -```bash -python visualizer.py --concept probability_distributions -``` - -展示常見概率分佈: -- 正態分佈 -- 均勻分佈 -- 伯努利分佈 -- 指數分佈 -- 二項分佈 -- 泊松分佈 - -#### 可視化矩陣乘法 - -```bash -python visualizer.py --concept matrix_multiplication -``` - -直觀展示矩陣乘法過程。 - -#### 可視化所有概念 - -```bash -python visualizer.py --concept all -``` - ---- - -## 📈 3. 進度追蹤器 (progress_tracker.py) - -### 功能特性 - -- ✅ 記錄學習進度(分數、時間、練習數) -- ✅ 生成可視化報告 -- ✅ 分析薄弱環節 -- ✅ 提供個性化學習建議 -- ✅ 里程碑追蹤 -- ✅ 學習筆記功能 - -### 使用方法 - -#### 更新進度 - -```bash -# 更新某個主題的分數 -python progress_tracker.py --update --topic ndarray --score 85 - -# 記錄學習時間 -python progress_tracker.py --update --topic linear_algebra --time 3 - -# 記錄完成的練習數 -python progress_tracker.py --update --topic calculus --exercises 10 - -# 同時更新多項數據 -python progress_tracker.py --update --topic autograd --score 78 --time 2 --exercises 5 -``` - -#### 生成學習報告 - -```bash -python progress_tracker.py --report -``` - -報告包括: -- 整體學習時間統計 -- 各主題的分數和進度 -- 已達成的里程碑 -- 可視化圖表(雷達圖和柱狀圖) - -#### 獲取學習建議 - -```bash -python progress_tracker.py --suggest -``` - -建議包括: -- 需要加強的主題 -- 推薦的學習順序 -- 時間分配建議 -- 學習策略建議 - -#### 添加學習筆記 - -```bash -python progress_tracker.py --update --topic probability --note "理解了貝葉斯定理的應用" -``` - -### 里程碑系統 - -進度追蹤器會自動識別你達成的里程碑: - -- 🥉 **入門** (60分): 掌握基礎概念 -- 🥈 **熟練** (75分): 能夠獨立完成練習 -- 🥇 **精通** (90分): 深入理解原理 -- 🏆 **大師** (100分): 完全掌握並能教授他人 - ---- - -## 🚀 快速開始 - -### 安裝依賴 - -```bash -# 基礎依賴 -pip install torch numpy matplotlib - -# 額外依賴(用於可視化器) -pip install scipy -``` - -### 推薦學習流程 - -1. **第一週**: 使用進度追蹤器規劃學習路徑 -2. **學習過程中**: 使用可視化器理解抽象概念 -3. **每完成一個主題**: 用練習生成器生成額外練習 -4. **每週末**: 更新進度並查看報告 - -### 示例工作流 - -```bash -# 1. 開始學習線性代數 -python progress_tracker.py --update --topic linear_algebra --time 2 - -# 2. 可視化線性變換 -python visualizer.py --concept linear_transformation - -# 3. 生成練習題鞏固 -python exercise_generator.py --topic linear_algebra --difficulty medium --count 5 - -# 4. 完成練習後更新進度 -python progress_tracker.py --update --topic linear_algebra --score 82 --exercises 5 - -# 5. 週末查看報告 -python progress_tracker.py --report -``` - ---- - -## 💡 使用技巧 - -### 練習生成器 - -1. **循序漸進**: 從 easy 開始,逐步提升到 expert -2. **批量生成**: 使用 `--output` 保存練習題,方便離線學習 -3. **針對性練習**: 根據薄弱環節選擇主題 - -### 可視化器 - -1. **多次觀察**: 同一概念從不同角度可視化 -2. **對比學習**: 使用 `--concept all` 生成所有圖表,進行對比 -3. **保存圖片**: 圖片會自動保存,方便製作筆記 - -### 進度追蹤器 - -1. **及時更新**: 每次學習後立即更新進度 -2. **定期回顧**: 每週生成一次報告 -3. **記錄筆記**: 重要的理解和心得及時記錄 -4. **設定目標**: 為每個主題設定分數目標 - ---- - -## 🔧 高級用法 - -### 自定義練習模板 - -編輯 `exercise_generator.py`,在對應的生成函數中添加新的練習模板: - -```python -templates = { - 'medium': [ - { - 'question': '你的問題...', - 'solution': '你的解答...', - 'hints': ['提示1', '提示2'], - 'concepts': ['概念1', '概念2'] - } - ] -} -``` - -### 自定義可視化 - -在 `visualizer.py` 中添加新的可視化函數: - -```python -def visualize_custom_concept(self): - """自定義可視化""" - # 你的可視化代碼 - pass -``` - -### 數據導出 - -進度數據保存在 `progress_data.json`,可以導出用於: -- 生成詳細的學習報告 -- 與他人分享學習經驗 -- 備份學習記錄 - ---- - -## 📝 常見問題 - -### Q1: 練習題的答案都正確嗎? - -A: 練習題基於常見的學習場景設計,但建議你: -1. 親自驗證答案 -2. 嘗試不同的解法 -3. 理解背後的原理 - -### Q2: 如何重置進度追蹤? - -A: 刪除 `progress_data.json` 文件即可重新開始。 - -### Q3: 可以自定義可視化的樣式嗎? - -A: 可以!修改 `visualizer.py` 中的 matplotlib 設置: - -```python -plt.style.use('your_style') # 更改樣式 -self.figsize = (width, height) # 更改圖片大小 -``` - -### Q4: 如何批量生成多個主題的練習? - -A: 可以使用 shell 腳本: - -```bash -#!/bin/bash -for topic in ndarray linear_algebra calculus autograd probability; do - python exercise_generator.py --topic $topic --difficulty medium --count 5 --output "${topic}_exercises.json" -done -``` - ---- - -## 🤝 貢獻 - -歡迎改進這些工具!可以: - -1. 添加新的練習模板 -2. 實現新的可視化 -3. 改進進度追蹤算法 -4. 修復 bug - ---- - -## 📄 授權 - -MIT License - ---- - -## 🌟 致謝 - -這些工具基於深度學習社群的集體智慧開發,感謝所有貢獻者! - ---- - -**最後更新**: 2024-11 -**維護者**: AI Learning Community -**版本**: v1.0 - -開始使用這些工具,讓你的學習之旅更高效!🚀 diff --git "a/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/ai_tools/exercise_generator.py" "b/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/ai_tools/exercise_generator.py" deleted file mode 100644 index cce7df8..0000000 --- "a/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/ai_tools/exercise_generator.py" +++ /dev/null @@ -1,349 +0,0 @@ -""" -AI 輔助練習生成器 -================ - -根據學習主題和難度自動生成個性化練習題。 -支持的主題:張量操作、線性代數、微積分、概率統計等。 - -使用方法: - python exercise_generator.py --topic linear_algebra --difficulty medium --count 5 - -作者:AI Learning Community -版本:v1.0 -""" - -import argparse -import random -import json -from typing import List, Dict, Any -from datetime import datetime - - -class ExerciseGenerator: - """練習題生成器""" - - def __init__(self, topic: str, difficulty: str): - self.topic = topic - self.difficulty = difficulty - self.difficulty_levels = { - 'easy': 1, - 'medium': 2, - 'hard': 3, - 'expert': 4 - } - - def generate(self, count: int = 5) -> List[Dict[str, Any]]: - """生成指定數量的練習題""" - exercises = [] - - generators = { - 'ndarray': self._generate_ndarray_exercises, - 'linear_algebra': self._generate_linear_algebra_exercises, - 'calculus': self._generate_calculus_exercises, - 'autograd': self._generate_autograd_exercises, - 'probability': self._generate_probability_exercises, - } - - if self.topic in generators: - exercises = generators[self.topic](count) - else: - raise ValueError(f"不支持的主題: {self.topic}") - - return exercises - - def _generate_ndarray_exercises(self, count: int) -> List[Dict[str, Any]]: - """生成張量操作練習題""" - exercises = [] - templates = { - 'easy': [ - { - 'question': '創建一個形狀為 ({shape}) 的零張量,並將其轉換為形狀 ({reshape})。', - 'solution': 'import torch\nx = torch.zeros({shape})\ny = x.reshape({reshape})', - 'hints': ['使用 torch.zeros() 創建零張量', '使用 .reshape() 改變形狀'], - 'concepts': ['張量創建', '形狀變換'] - }, - { - 'question': '創建一個從 0 到 {n} 的整數張量,並計算其總和。', - 'solution': 'import torch\nx = torch.arange({n})\ntotal = x.sum()', - 'hints': ['使用 torch.arange()', '使用 .sum() 方法'], - 'concepts': ['張量創建', '聚合操作'] - } - ], - 'medium': [ - { - 'question': '創建兩個形狀為 ({m}, {n}) 的隨機張量 A 和 B,執行按元素乘法和矩陣乘法,並比較結果的形狀。', - 'solution': 'import torch\nA = torch.randn({m}, {n})\nB = torch.randn({m}, {n})\nelement_wise = A * B # 形狀: ({m}, {n})\nmatrix_mult = torch.mm(A, B.T) # 形狀: ({m}, {m})', - 'hints': ['按元素乘法使用 *', '矩陣乘法使用 torch.mm()', '注意矩陣乘法的維度要求'], - 'concepts': ['張量運算', '矩陣乘法', '廣播機制'] - }, - { - 'question': '使用廣播機制,將形狀為 ({m}, 1) 的張量與形狀為 (1, {n}) 的張量相加,並解釋結果的形狀。', - 'solution': 'import torch\nA = torch.randn({m}, 1)\nB = torch.randn(1, {n})\nC = A + B # 形狀: ({m}, {n})', - 'hints': ['理解廣播規則', '觀察輸出形狀的變化'], - 'concepts': ['廣播機制', '張量形狀'] - } - ], - 'hard': [ - { - 'question': '實現一個函數,使用 PyTorch 張量操作計算批量數據的標準化(z-score normalization)。輸入形狀為 (batch_size, features)。', - 'solution': 'import torch\n\ndef normalize(x):\n mean = x.mean(dim=0, keepdim=True)\n std = x.std(dim=0, keepdim=True)\n return (x - mean) / (std + 1e-8)\n\n# 測試\ndata = torch.randn(100, 10)\nnormalized = normalize(data)', - 'hints': ['使用 .mean() 和 .std()', '注意 keepdim 參數', '避免除以零'], - 'concepts': ['數據標準化', '統計運算', '數值穩定性'] - } - ] - } - - level_templates = templates.get(self.difficulty, templates['easy']) - - for i in range(count): - template = random.choice(level_templates) - exercise = template.copy() - - # 填充隨機參數 - params = { - 'shape': f"({random.randint(2, 5)}, {random.randint(2, 5)})", - 'reshape': f"({random.randint(2, 10)}, -1)", - 'n': random.randint(10, 100), - 'm': random.randint(3, 6), - 'n': random.randint(3, 6) - } - - exercise['question'] = exercise['question'].format(**params) - exercise['solution'] = exercise['solution'].format(**params) - exercise['id'] = f"{self.topic}_{self.difficulty}_{i+1}" - exercise['difficulty'] = self.difficulty - - exercises.append(exercise) - - return exercises - - def _generate_linear_algebra_exercises(self, count: int) -> List[Dict[str, Any]]: - """生成線性代數練習題""" - exercises = [] - templates = { - 'easy': [ - { - 'question': '計算向量 v = [{v}] 的 L2 範數(歐幾里得範數)。', - 'solution': 'import torch\nv = torch.tensor([{v}], dtype=torch.float32)\nnorm = torch.norm(v)', - 'hints': ['使用 torch.norm()', 'L2 範數是元素平方和的平方根'], - 'concepts': ['向量範數', '向量運算'] - } - ], - 'medium': [ - { - 'question': '給定矩陣 A ({m}×{n}),計算其轉置並驗證 (A^T)^T = A。', - 'solution': 'import torch\nA = torch.randn({m}, {n})\nA_T = A.T\nA_T_T = A_T.T\nprint(torch.equal(A, A_T_T)) # True', - 'hints': ['使用 .T 屬性', '使用 torch.equal() 比較'], - 'concepts': ['矩陣轉置', '矩陣性質'] - }, - { - 'question': '計算兩個向量 u = [{u}] 和 v = [{v}] 的點積,並驗證結果。', - 'solution': 'import torch\nu = torch.tensor([{u}], dtype=torch.float32)\nv = torch.tensor([{v}], dtype=torch.float32)\ndot_product = torch.dot(u, v)', - 'hints': ['使用 torch.dot()', '點積等於按元素乘積的和'], - 'concepts': ['向量點積', '內積運算'] - } - ], - 'hard': [ - { - 'question': '實現 Hadamard 積(按元素乘法)和矩陣乘法,並分析它們的計算複雜度差異。', - 'solution': '''import torch -import time - -A = torch.randn(1000, 1000) -B = torch.randn(1000, 1000) - -# Hadamard 積 -start = time.time() -hadamard = A * B -print(f"Hadamard 時間: {time.time() - start:.4f}s") - -# 矩陣乘法 -start = time.time() -matmul = torch.mm(A, B) -print(f"矩陣乘法時間: {time.time() - start:.4f}s")''', - 'hints': ['Hadamard 積: O(n²)', '矩陣乘法: O(n³)', '測量實際執行時間'], - 'concepts': ['計算複雜度', '矩陣運算', '性能分析'] - } - ] - } - - level_templates = templates.get(self.difficulty, templates['easy']) - - for i in range(count): - template = random.choice(level_templates) - exercise = template.copy() - - # 生成隨機參數 - params = { - 'v': ', '.join(str(random.randint(1, 10)) for _ in range(3)), - 'u': ', '.join(str(random.randint(1, 10)) for _ in range(4)), - 'm': random.randint(3, 5), - 'n': random.randint(3, 5) - } - - exercise['question'] = exercise['question'].format(**params) - exercise['solution'] = exercise['solution'].format(**params) - exercise['id'] = f"{self.topic}_{self.difficulty}_{i+1}" - exercise['difficulty'] = self.difficulty - - exercises.append(exercise) - - return exercises - - def _generate_calculus_exercises(self, count: int) -> List[Dict[str, Any]]: - """生成微積分練習題""" - exercises = [] - templates = { - 'easy': [ - { - 'question': '計算函數 f(x) = x² 在 x = {x} 處的導數(使用數值方法)。', - 'solution': '''import torch - -def f(x): - return x ** 2 - -x = torch.tensor([{x}], requires_grad=True) -y = f(x) -y.backward() -derivative = x.grad # 應該接近 2*{x}''', - 'hints': ['使用 requires_grad=True', '調用 .backward()', '從 .grad 獲取梯度'], - 'concepts': ['導數', '自動微分'] - } - ], - 'medium': [ - { - 'question': '計算函數 f(x, y) = x² + y² 在點 ({x}, {y}) 處的梯度。', - 'solution': '''import torch - -x = torch.tensor([{x}], requires_grad=True) -y = torch.tensor([{y}], requires_grad=True) -f = x**2 + y**2 -f.backward() -grad_x = x.grad # 2*{x} -grad_y = y.grad # 2*{y}''', - 'hints': ['多變量函數的梯度', '偏導數的計算'], - 'concepts': ['梯度', '偏導數', '多元微積分'] - } - ] - } - - level_templates = templates.get(self.difficulty, templates['easy']) - - for i in range(count): - template = random.choice(level_templates) - exercise = template.copy() - - params = { - 'x': random.uniform(1.0, 5.0), - 'y': random.uniform(1.0, 5.0) - } - - exercise['question'] = exercise['question'].format(**params) - exercise['solution'] = exercise['solution'].format(**params) - exercise['id'] = f"{self.topic}_{self.difficulty}_{i+1}" - exercise['difficulty'] = self.difficulty - - exercises.append(exercise) - - return exercises - - def _generate_autograd_exercises(self, count: int) -> List[Dict[str, Any]]: - """生成自動微分練習題""" - exercises = [] - # 實現類似的模板 - return self._generate_calculus_exercises(count) - - def _generate_probability_exercises(self, count: int) -> List[Dict[str, Any]]: - """生成概率統計練習題""" - exercises = [] - templates = { - 'easy': [ - { - 'question': '從標準正態分佈中生成 {n} 個樣本,並計算其均值和標準差。', - 'solution': '''import torch - -samples = torch.randn({n}) -mean = samples.mean() -std = samples.std() -print(f"均值: {{mean:.4f}}, 標準差: {{std:.4f}}")''', - 'hints': ['使用 torch.randn()', '理論均值=0,標準差=1'], - 'concepts': ['正態分佈', '統計量計算'] - } - ] - } - - level_templates = templates.get(self.difficulty, templates['easy']) - - for i in range(count): - template = random.choice(level_templates) - exercise = template.copy() - - params = {'n': random.randint(100, 1000)} - - exercise['question'] = exercise['question'].format(**params) - exercise['solution'] = exercise['solution'].format(**params) - exercise['id'] = f"{self.topic}_{self.difficulty}_{i+1}" - exercise['difficulty'] = self.difficulty - - exercises.append(exercise) - - return exercises - - -def save_exercises(exercises: List[Dict[str, Any]], output_file: str): - """保存練習題到文件""" - with open(output_file, 'w', encoding='utf-8') as f: - json.dump({ - 'generated_at': datetime.now().isoformat(), - 'total_count': len(exercises), - 'exercises': exercises - }, f, ensure_ascii=False, indent=2) - print(f"✅ 已保存 {len(exercises)} 道練習題到 {output_file}") - - -def print_exercises(exercises: List[Dict[str, Any]]): - """打印練習題""" - print(f"\n{'='*80}") - print(f"生成了 {len(exercises)} 道練習題") - print(f"{'='*80}\n") - - for i, ex in enumerate(exercises, 1): - print(f"📝 練習 {i}: {ex['id']}") - print(f"難度: {ex['difficulty']}") - print(f"\n問題:\n{ex['question']}") - print(f"\n概念: {', '.join(ex['concepts'])}") - print(f"\n提示:") - for hint in ex['hints']: - print(f" 💡 {hint}") - print(f"\n參考解答:\n```python\n{ex['solution']}\n```") - print(f"\n{'-'*80}\n") - - -def main(): - parser = argparse.ArgumentParser(description='AI 輔助練習生成器') - parser.add_argument('--topic', type=str, required=True, - choices=['ndarray', 'linear_algebra', 'calculus', 'autograd', 'probability'], - help='練習主題') - parser.add_argument('--difficulty', type=str, default='medium', - choices=['easy', 'medium', 'hard', 'expert'], - help='難度等級') - parser.add_argument('--count', type=int, default=5, - help='生成練習題數量') - parser.add_argument('--output', type=str, default=None, - help='輸出文件路徑(可選)') - - args = parser.parse_args() - - print(f"🤖 正在生成 {args.topic} 主題的 {args.difficulty} 難度練習題...") - - generator = ExerciseGenerator(args.topic, args.difficulty) - exercises = generator.generate(args.count) - - print_exercises(exercises) - - if args.output: - save_exercises(exercises, args.output) - - -if __name__ == '__main__': - main() diff --git "a/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/ai_tools/progress_tracker.py" "b/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/ai_tools/progress_tracker.py" deleted file mode 100644 index baf7141..0000000 --- "a/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/ai_tools/progress_tracker.py" +++ /dev/null @@ -1,278 +0,0 @@ -""" -學習進度追蹤器 -============ - -追蹤學習進度,分析薄弱環節,提供個性化學習建議。 - -使用方法: - python progress_tracker.py --update --topic ndarray --score 85 - python progress_tracker.py --report - python progress_tracker.py --suggest - -作者:AI Learning Community -版本:v1.0 -""" - -import argparse -import json -import os -from datetime import datetime -from typing import Dict, List, Any -import matplotlib.pyplot as plt -import numpy as np - - -class ProgressTracker: - """學習進度追蹤器""" - - def __init__(self, data_file='./ai_tools/progress_data.json'): - self.data_file = data_file - self.data = self._load_data() - - def _load_data(self) -> Dict[str, Any]: - """加載進度數據""" - if os.path.exists(self.data_file): - with open(self.data_file, 'r', encoding='utf-8') as f: - return json.load(f) - return { - 'user_id': 'learner_001', - 'start_date': datetime.now().isoformat(), - 'topics': { - 'ndarray': {'score': 0, 'time_spent': 0, 'exercises_completed': 0, 'last_update': None}, - 'pandas': {'score': 0, 'time_spent': 0, 'exercises_completed': 0, 'last_update': None}, - 'linear_algebra': {'score': 0, 'time_spent': 0, 'exercises_completed': 0, 'last_update': None}, - 'calculus': {'score': 0, 'time_spent': 0, 'exercises_completed': 0, 'last_update': None}, - 'autograd': {'score': 0, 'time_spent': 0, 'exercises_completed': 0, 'last_update': None}, - 'probability': {'score': 0, 'time_spent': 0, 'exercises_completed': 0, 'last_update': None}, - }, - 'total_time': 0, - 'milestones': [], - 'notes': [] - } - - def _save_data(self): - """保存進度數據""" - os.makedirs(os.path.dirname(self.data_file), exist_ok=True) - with open(self.data_file, 'w', encoding='utf-8') as f: - json.dump(self.data, f, ensure_ascii=False, indent=2) - print("✅ 進度已保存") - - def update_progress(self, topic: str, score: int = None, time_spent: int = None, - exercises: int = None): - """更新學習進度""" - if topic not in self.data['topics']: - print(f"❌ 未知主題: {topic}") - return - - topic_data = self.data['topics'][topic] - - if score is not None: - topic_data['score'] = max(topic_data['score'], score) # 保留最高分 - if time_spent is not None: - topic_data['time_spent'] += time_spent - self.data['total_time'] += time_spent - if exercises is not None: - topic_data['exercises_completed'] += exercises - - topic_data['last_update'] = datetime.now().isoformat() - - # 檢查里程碑 - self._check_milestones(topic, score) - - self._save_data() - print(f"✅ {topic} 進度已更新") - - def _check_milestones(self, topic: str, score: int): - """檢查是否達成里程碑""" - milestones = [ - (60, "入門"), - (75, "熟練"), - (90, "精通"), - (100, "大師") - ] - - for threshold, level in milestones: - if score >= threshold: - milestone = { - 'topic': topic, - 'level': level, - 'score': score, - 'achieved_at': datetime.now().isoformat() - } - # 避免重複添加 - if not any(m['topic'] == topic and m['level'] == level for m in self.data['milestones']): - self.data['milestones'].append(milestone) - print(f"🎉 恭喜!你在 {topic} 達到了 {level} 水平!") - - def generate_report(self): - """生成學習報告""" - print(f"\n{'='*80}") - print(f"📊 學習進度報告") - print(f"{'='*80}\n") - - print(f"用戶ID: {self.data['user_id']}") - print(f"開始日期: {self.data['start_date'][:10]}") - print(f"總學習時間: {self.data['total_time']} 小時\n") - - print(f"{'主題':<20} {'分數':<10} {'時間(h)':<10} {'練習數':<10} {'最後更新':<20}") - print(f"{'-'*80}") - - for topic, data in self.data['topics'].items(): - last_update = data['last_update'][:10] if data['last_update'] else 'N/A' - print(f"{topic:<20} {data['score']:<10} {data['time_spent']:<10} " - f"{data['exercises_completed']:<10} {last_update:<20}") - - # 計算總體進度 - avg_score = np.mean([d['score'] for d in self.data['topics'].values()]) - print(f"\n平均分數: {avg_score:.1f}") - - # 里程碑 - if self.data['milestones']: - print(f"\n🏆 已達成的里程碑:") - for m in self.data['milestones']: - print(f" - {m['topic']}: {m['level']} (分數: {m['score']})") - - # 可視化 - self._visualize_progress() - - def _visualize_progress(self): - """可視化學習進度""" - topics = list(self.data['topics'].keys()) - scores = [self.data['topics'][t]['score'] for t in topics] - times = [self.data['topics'][t]['time_spent'] for t in topics] - - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6)) - - # 分數雷達圖 - angles = np.linspace(0, 2 * np.pi, len(topics), endpoint=False).tolist() - scores_plot = scores + [scores[0]] - angles += angles[:1] - - ax1 = plt.subplot(121, projection='polar') - ax1.plot(angles, scores_plot, 'o-', linewidth=2, color='royalblue') - ax1.fill(angles, scores_plot, alpha=0.25, color='royalblue') - ax1.set_xticks(angles[:-1]) - ax1.set_xticklabels(topics, size=10) - ax1.set_ylim(0, 100) - ax1.set_title('各主題掌握程度', size=14, fontweight='bold', pad=20) - ax1.grid(True) - - # 學習時間柱狀圖 - ax2 = plt.subplot(122) - colors = plt.cm.viridis(np.linspace(0, 1, len(topics))) - bars = ax2.bar(topics, times, color=colors, alpha=0.7, edgecolor='black') - ax2.set_ylabel('學習時間 (小時)', fontsize=12) - ax2.set_title('各主題學習時間', fontsize=14, fontweight='bold') - ax2.tick_params(axis='x', rotation=45) - ax2.grid(True, alpha=0.3, axis='y') - - # 添加數值標籤 - for bar in bars: - height = bar.get_height() - ax2.text(bar.get_x() + bar.get_width()/2., height, - f'{height:.1f}h', - ha='center', va='bottom', fontsize=10, fontweight='bold') - - plt.tight_layout() - plt.savefig('./ai_tools/learning_progress.png', dpi=150, bbox_inches='tight') - print("\n✅ 進度圖表已保存至 learning_progress.png") - plt.show() - - def get_suggestions(self): - """獲取個性化學習建議""" - print(f"\n{'='*80}") - print(f"💡 個性化學習建議") - print(f"{'='*80}\n") - - suggestions = [] - - # 分析薄弱環節 - weak_topics = [(topic, data['score']) for topic, data in self.data['topics'].items() - if data['score'] < 70] - - if weak_topics: - weak_topics.sort(key=lambda x: x[1]) - print("📌 需要加強的主題:") - for topic, score in weak_topics: - print(f" - {topic} (當前分數: {score})") - suggestions.append(f"建議複習 {topic},目標提升至 75 分以上") - - # 推薦學習順序 - print("\n📚 推薦學習順序:") - topic_order = ['ndarray', 'pandas', 'linear_algebra', 'calculus', 'autograd', 'probability'] - for i, topic in enumerate(topic_order, 1): - status = "✅" if self.data['topics'][topic]['score'] >= 75 else "⬜" - print(f" {i}. {status} {topic}") - - # 時間分配建議 - print("\n⏰ 時間分配建議:") - for topic in weak_topics[:3]: # 前三個薄弱主題 - recommended_time = max(5, (75 - topic[1]) // 10) # 根據分數差距推薦時間 - print(f" - {topic[0]}: 建議再投入 {recommended_time} 小時") - - # 學習策略建議 - print("\n🎯 學習策略建議:") - avg_score = np.mean([d['score'] for d in self.data['topics'].values()]) - - if avg_score < 60: - print(" - 當前處於入門階段,建議:") - print(" 1. 按順序完成每個 notebook") - print(" 2. 務必完成所有練習題") - print(" 3. 使用 AI 輔助工具生成額外練習") - elif avg_score < 80: - print(" - 當前處於進階階段,建議:") - print(" 1. 深入理解數學原理") - print(" 2. 完成實踐項目") - print(" 3. 嘗試實現一些算法") - else: - print(" - 當前處於精通階段,建議:") - print(" 1. 閱讀相關論文") - print(" 2. 參與開源項目") - print(" 3. 分享學習心得,教授他人") - - return suggestions - - def add_note(self, topic: str, note: str): - """添加學習筆記""" - self.data['notes'].append({ - 'topic': topic, - 'content': note, - 'created_at': datetime.now().isoformat() - }) - self._save_data() - print(f"✅ 筆記已添加到 {topic}") - - -def main(): - parser = argparse.ArgumentParser(description='學習進度追蹤器') - parser.add_argument('--update', action='store_true', help='更新進度') - parser.add_argument('--topic', type=str, help='主題名稱') - parser.add_argument('--score', type=int, help='分數 (0-100)') - parser.add_argument('--time', type=int, help='學習時間(小時)') - parser.add_argument('--exercises', type=int, help='完成的練習數') - parser.add_argument('--report', action='store_true', help='生成學習報告') - parser.add_argument('--suggest', action='store_true', help='獲取學習建議') - parser.add_argument('--note', type=str, help='添加學習筆記') - - args = parser.parse_args() - - tracker = ProgressTracker() - - if args.update: - if not args.topic: - print("❌ 請指定主題 (--topic)") - return - tracker.update_progress(args.topic, args.score, args.time, args.exercises) - - if args.report: - tracker.generate_report() - - if args.suggest: - tracker.get_suggestions() - - if args.note and args.topic: - tracker.add_note(args.topic, args.note) - - -if __name__ == '__main__': - main() diff --git "a/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/ai_tools/visualizer.py" "b/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/ai_tools/visualizer.py" deleted file mode 100644 index 457a678..0000000 --- "a/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/1.\345\276\236AI\345\210\260LLM\345\237\272\347\244\216/4.DL/00.DL_Path/2_\351\240\220\345\202\231\347\237\245\350\255\230/ai_tools/visualizer.py" +++ /dev/null @@ -1,302 +0,0 @@ -""" -數學概念可視化工具 -================ - -交互式可視化深度學習中的數學概念。 -支持:梯度下降、線性變換、激活函數、概率分佈等。 - -使用方法: - python visualizer.py --concept gradient_descent - python visualizer.py --concept activation_functions --interactive - -作者:AI Learning Community -版本:v1.0 -""" - -import argparse -import numpy as np -import matplotlib.pyplot as plt -from matplotlib.animation import FuncAnimation -from typing import Callable, Tuple -import torch - - -class MathVisualizer: - """數學概念可視化器""" - - def __init__(self, figsize=(12, 8)): - self.figsize = figsize - plt.style.use('seaborn-v0_8-darkgrid') - - def visualize_gradient_descent(self, interactive=False): - """可視化梯度下降過程""" - print("📊 可視化梯度下降...") - - # 定義目標函數 f(x) = x^2 - def f(x): - return x ** 2 - - def df(x): - return 2 * x - - # 梯度下降 - x_init = 5.0 - learning_rate = 0.1 - iterations = 20 - - x_history = [x_init] - x = x_init - - for _ in range(iterations): - grad = df(x) - x = x - learning_rate * grad - x_history.append(x) - - # 繪圖 - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=self.figsize) - - # 左圖:函數曲線和梯度下降路徑 - x_range = np.linspace(-6, 6, 200) - y_range = f(x_range) - - ax1.plot(x_range, y_range, 'b-', linewidth=2, label='f(x) = x²') - ax1.plot(x_history, [f(x) for x in x_history], 'ro-', - markersize=8, linewidth=1.5, label='梯度下降路徑') - ax1.scatter([x_history[0]], [f(x_history[0])], color='green', - s=200, marker='*', label='起點', zorder=5) - ax1.scatter([x_history[-1]], [f(x_history[-1])], color='red', - s=200, marker='*', label='終點', zorder=5) - ax1.set_xlabel('x', fontsize=12) - ax1.set_ylabel('f(x)', fontsize=12) - ax1.set_title('梯度下降優化過程', fontsize=14, fontweight='bold') - ax1.legend() - ax1.grid(True, alpha=0.3) - - # 右圖:收斂曲線 - ax2.plot(range(len(x_history)), [f(x) for x in x_history], - 'g-o', linewidth=2, markersize=6) - ax2.set_xlabel('迭代次數', fontsize=12) - ax2.set_ylabel('函數值 f(x)', fontsize=12) - ax2.set_title('收斂曲線', fontsize=14, fontweight='bold') - ax2.grid(True, alpha=0.3) - - plt.tight_layout() - plt.savefig('./ai_tools/gradient_descent.png', dpi=150, bbox_inches='tight') - print("✅ 圖片已保存至 gradient_descent.png") - plt.show() - - def visualize_linear_transformation(self): - """可視化線性變換""" - print("📊 可視化線性變換...") - - fig, axes = plt.subplots(2, 3, figsize=(15, 10)) - - # 原始向量 - original_vectors = np.array([[1, 0], [0, 1], [1, 1], [2, 1]]) - - transformations = [ - ("恆等變換", np.array([[1, 0], [0, 1]])), - ("縮放變換", np.array([[2, 0], [0, 2]])), - ("旋轉變換 (45°)", np.array([[np.cos(np.pi/4), -np.sin(np.pi/4)], - [np.sin(np.pi/4), np.cos(np.pi/4)]])), - ("剪切變換", np.array([[1, 0.5], [0, 1]])), - ("反射變換", np.array([[1, 0], [0, -1]])), - ("投影變換", np.array([[1, 0], [0, 0]])), - ] - - for idx, (title, matrix) in enumerate(transformations): - ax = axes[idx // 3, idx % 3] - - # 繪製原始向量 - for vec in original_vectors: - ax.arrow(0, 0, vec[0], vec[1], head_width=0.1, head_length=0.1, - fc='blue', ec='blue', alpha=0.3, linewidth=1.5, label='原始' if vec[0] == 1 and vec[1] == 0 else '') - - # 繪製變換後的向量 - transformed_vectors = (matrix @ original_vectors.T).T - for vec in transformed_vectors: - ax.arrow(0, 0, vec[0], vec[1], head_width=0.1, head_length=0.1, - fc='red', ec='red', linewidth=2, label='變換後' if vec[0] == matrix[0,0] and vec[1] == matrix[1,0] else '') - - ax.set_xlim(-3, 3) - ax.set_ylim(-3, 3) - ax.set_aspect('equal') - ax.grid(True, alpha=0.3) - ax.axhline(y=0, color='k', linewidth=0.5) - ax.axvline(x=0, color='k', linewidth=0.5) - ax.set_title(title, fontsize=12, fontweight='bold') - if idx == 0: - ax.legend(loc='upper right') - - plt.tight_layout() - plt.savefig('./ai_tools/linear_transformations.png', dpi=150, bbox_inches='tight') - print("✅ 圖片已保存至 linear_transformations.png") - plt.show() - - def visualize_activation_functions(self): - """可視化激活函數""" - print("📊 可視化激活函數...") - - x = torch.linspace(-5, 5, 200) - - activations = { - 'Sigmoid': torch.sigmoid(x), - 'Tanh': torch.tanh(x), - 'ReLU': torch.relu(x), - 'Leaky ReLU': torch.nn.functional.leaky_relu(x, 0.1), - 'ELU': torch.nn.functional.elu(x), - 'GELU': torch.nn.functional.gelu(x) - } - - fig, axes = plt.subplots(2, 3, figsize=self.figsize) - axes = axes.flatten() - - for idx, (name, y) in enumerate(activations.items()): - ax = axes[idx] - ax.plot(x.numpy(), y.numpy(), linewidth=2.5, color='royalblue') - ax.axhline(y=0, color='k', linewidth=0.5, linestyle='--', alpha=0.3) - ax.axvline(x=0, color='k', linewidth=0.5, linestyle='--', alpha=0.3) - ax.grid(True, alpha=0.3) - ax.set_title(name, fontsize=13, fontweight='bold') - ax.set_xlabel('x') - ax.set_ylabel('f(x)') - - plt.tight_layout() - plt.savefig('./ai_tools/activation_functions.png', dpi=150, bbox_inches='tight') - print("✅ 圖片已保存至 activation_functions.png") - plt.show() - - def visualize_probability_distributions(self): - """可視化概率分佈""" - print("📊 可視化概率分佈...") - - fig, axes = plt.subplots(2, 3, figsize=self.figsize) - axes = axes.flatten() - - # 1. 正態分佈 - x = np.linspace(-5, 5, 200) - for mu, sigma in [(0, 1), (0, 2), (2, 1)]: - y = (1 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((x - mu) / sigma) ** 2) - axes[0].plot(x, y, linewidth=2, label=f'μ={mu}, σ={sigma}') - axes[0].set_title('正態分佈', fontweight='bold') - axes[0].legend() - axes[0].grid(True, alpha=0.3) - - # 2. 均勻分佈 - samples = torch.rand(10000) - axes[1].hist(samples.numpy(), bins=50, density=True, alpha=0.7, color='skyblue', edgecolor='black') - axes[1].set_title('均勻分佈', fontweight='bold') - axes[1].grid(True, alpha=0.3) - - # 3. 伯努利分佈 - p_values = [0.3, 0.5, 0.7] - x_bernoulli = [0, 1] - for p in p_values: - y_bernoulli = [1-p, p] - axes[2].plot(x_bernoulli, y_bernoulli, 'o-', linewidth=2, markersize=8, label=f'p={p}') - axes[2].set_title('伯努利分佈', fontweight='bold') - axes[2].set_xticks([0, 1]) - axes[2].legend() - axes[2].grid(True, alpha=0.3) - - # 4. 指數分佈 - x_exp = np.linspace(0, 5, 200) - for lambda_param in [0.5, 1, 2]: - y_exp = lambda_param * np.exp(-lambda_param * x_exp) - axes[3].plot(x_exp, y_exp, linewidth=2, label=f'λ={lambda_param}') - axes[3].set_title('指數分佈', fontweight='bold') - axes[3].legend() - axes[3].grid(True, alpha=0.3) - - # 5. 二項分佈 - n, p = 10, 0.5 - from scipy.stats import binom - x_binom = np.arange(0, n+1) - y_binom = binom.pmf(x_binom, n, p) - axes[4].bar(x_binom, y_binom, alpha=0.7, color='coral', edgecolor='black') - axes[4].set_title(f'二項分佈 (n={n}, p={p})', fontweight='bold') - axes[4].grid(True, alpha=0.3) - - # 6. 泊松分佈 - from scipy.stats import poisson - x_poisson = np.arange(0, 15) - for lambda_p in [1, 4, 7]: - y_poisson = poisson.pmf(x_poisson, lambda_p) - axes[5].plot(x_poisson, y_poisson, 'o-', linewidth=2, markersize=6, label=f'λ={lambda_p}') - axes[5].set_title('泊松分佈', fontweight='bold') - axes[5].legend() - axes[5].grid(True, alpha=0.3) - - plt.tight_layout() - plt.savefig('./ai_tools/probability_distributions.png', dpi=150, bbox_inches='tight') - print("✅ 圖片已保存至 probability_distributions.png") - plt.show() - - def visualize_matrix_multiplication(self): - """可視化矩陣乘法""" - print("📊 可視化矩陣乘法...") - - fig, axes = plt.subplots(1, 3, figsize=(15, 5)) - - # 創建示例矩陣 - A = np.array([[1, 2, 3], [4, 5, 6]]) - B = np.array([[7, 8], [9, 10], [11, 12]]) - C = A @ B - - matrices = [ - (A, 'Matrix A (2×3)'), - (B, 'Matrix B (3×2)'), - (C, 'Result C = A @ B (2×2)') - ] - - for ax, (matrix, title) in zip(axes, matrices): - im = ax.imshow(matrix, cmap='viridis', aspect='auto') - ax.set_title(title, fontsize=13, fontweight='bold') - - # 添加數值 - for i in range(matrix.shape[0]): - for j in range(matrix.shape[1]): - text = ax.text(j, i, f'{matrix[i, j]:.0f}', - ha="center", va="center", color="white", fontsize=14, fontweight='bold') - - plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04) - - plt.tight_layout() - plt.savefig('./ai_tools/matrix_multiplication.png', dpi=150, bbox_inches='tight') - print("✅ 圖片已保存至 matrix_multiplication.png") - plt.show() - - -def main(): - parser = argparse.ArgumentParser(description='數學概念可視化工具') - parser.add_argument('--concept', type=str, required=True, - choices=['gradient_descent', 'linear_transformation', 'activation_functions', - 'probability_distributions', 'matrix_multiplication', 'all'], - help='要可視化的概念') - parser.add_argument('--interactive', action='store_true', - help='啟用交互模式') - - args = parser.parse_args() - - visualizer = MathVisualizer() - - concepts = { - 'gradient_descent': visualizer.visualize_gradient_descent, - 'linear_transformation': visualizer.visualize_linear_transformation, - 'activation_functions': visualizer.visualize_activation_functions, - 'probability_distributions': visualizer.visualize_probability_distributions, - 'matrix_multiplication': visualizer.visualize_matrix_multiplication, - } - - if args.concept == 'all': - for name, func in concepts.items(): - print(f"\n{'='*60}") - print(f"可視化: {name}") - print(f"{'='*60}") - func() if name != 'gradient_descent' else func(args.interactive) - else: - concepts[args.concept](args.interactive) if args.concept == 'gradient_descent' else concepts[args.concept]() - - -if __name__ == '__main__': - main() diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/1.\345\234\226\347\211\207\347\224\237\346\210\220/README.md" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/1.\345\234\226\347\211\207\347\224\237\346\210\220/README.md" new file mode 100644 index 0000000..135eb02 --- /dev/null +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/1.\345\234\226\347\211\207\347\224\237\346\210\220/README.md" @@ -0,0 +1,1102 @@ +# 圖片生成 (Image Generation) + +本章節將深入探討AI圖片生成的核心技術,從基礎的Diffusion模型到進階的控制技術。 + +## 📋 目錄 + +1. [Stable Diffusion 基礎](#stable-diffusion-基礎) +2. [ControlNet 控制技術](#controlnet-控制技術) +3. [LoRA 訓練與應用](#lora-訓練與應用) +4. [進階技巧](#進階技巧) +5. [實戰案例](#實戰案例) + +--- + +## 🎯 學習目標 + +完成本章節後,你將能夠: + +- ✅ 理解Diffusion模型的工作原理 +- ✅ 使用Stable Diffusion生成高質量圖片 +- ✅ 運用ControlNet精確控制生成內容 +- ✅ 訓練和使用自定義LoRA模型 +- ✅ 掌握提示詞工程技巧 +- ✅ 構建實用的圖片生成應用 + +--- + +## 📚 Stable Diffusion 基礎 + +### 什麼是Stable Diffusion? + +Stable Diffusion是一個基於Latent Diffusion Model (LDM)的文生圖模型,由Stability AI開發。它通過在潛在空間中進行擴散過程,大幅降低了計算成本。 + +### 核心組件 + +``` +┌─────────────────────────────────────────┐ +│ Stable Diffusion 架構 │ +├─────────────────────────────────────────┤ +│ │ +│ Text ┌──────────────┐ │ +│ Prompt → │ Text Encoder │ │ +│ │ (CLIP) │ │ +│ └──────┬───────┘ │ +│ │ │ +│ ↓ │ +│ ┌──────────────┐ │ +│ │ U-Net │ │ +│ Noise → │ (Denoiser) │ → 潛在表示 │ +│ └──────────────┘ │ +│ │ │ +│ ↓ │ +│ ┌──────────────┐ │ +│ │ VAE Decoder │ │ +│ └──────┬───────┘ │ +│ │ │ +│ ↓ │ +│ 生成圖片 │ +│ │ +└─────────────────────────────────────────┘ +``` + +### 基本使用 + +#### 安裝依賴 + +```bash +pip install diffusers transformers accelerate torch torchvision xformers +``` + +#### 基礎圖片生成 + +```python +from diffusers import StableDiffusionPipeline +import torch + +# 加載模型 +model_id = "runwayml/stable-diffusion-v1-5" +pipe = StableDiffusionPipeline.from_pretrained( + model_id, + torch_dtype=torch.float16, + use_safetensors=True +) +pipe = pipe.to("cuda") + +# 啟用記憶體優化 +pipe.enable_attention_slicing() +pipe.enable_vae_slicing() + +# 生成圖片 +prompt = """ +a professional portrait photo of a young woman, +natural lighting, bokeh background, +highly detailed, 8k, photorealistic +""" + +negative_prompt = """ +cartoon, 3d, disfigured, bad art, deformed, +poorly drawn, extra limbs, close up, weird colors +""" + +image = pipe( + prompt=prompt, + negative_prompt=negative_prompt, + num_inference_steps=50, + guidance_scale=7.5, + width=512, + height=512, + generator=torch.Generator("cuda").manual_seed(42) +).images[0] + +image.save("generated_portrait.png") +``` + +### 重要參數說明 + +| 參數 | 說明 | 推薦值 | +|------|------|--------| +| `num_inference_steps` | 推理步數,步數越多質量越好但速度越慢 | 20-50 | +| `guidance_scale` | CFG scale,控制提示詞遵循程度 | 7-12 | +| `width`, `height` | 圖片尺寸,必須是64的倍數 | 512, 768 | +| `generator` | 隨機數生成器,設定seed可重現結果 | 固定seed | + +--- + +## 🎮 ControlNet 控制技術 + +### 什麼是ControlNet? + +ControlNet是一種條件控制技術,允許你使用邊緣檢測、深度圖、姿態等信息精確控制生成內容。 + +### 支持的控制類型 + +- **Canny Edge** - 邊緣檢測 +- **Depth** - 深度圖 +- **OpenPose** - 人體姿態 +- **Scribble** - 塗鴉線稿 +- **Segmentation** - 語義分割 +- **Normal Map** - 法線貼圖 +- **MLSD** - 直線檢測(建築物) + +### 安裝與使用 + +```python +from diffusers import StableDiffusionControlNetPipeline, ControlNetModel +from diffusers.utils import load_image +import torch +import cv2 +import numpy as np +from PIL import Image + +# 1. 準備控制圖(以Canny邊緣為例) +def get_canny_edge(image_path, low_threshold=100, high_threshold=200): + """從圖片提取Canny邊緣""" + image = load_image(image_path) + image = np.array(image) + + # 轉換為灰度圖 + gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) + + # 應用Canny邊緣檢測 + edges = cv2.Canny(gray, low_threshold, high_threshold) + edges = edges[:, :, None] + edges = np.concatenate([edges, edges, edges], axis=2) + + return Image.fromarray(edges) + +# 2. 加載ControlNet模型 +controlnet = ControlNetModel.from_pretrained( + "lllyasviel/sd-controlnet-canny", + torch_dtype=torch.float16 +) + +# 3. 創建Pipeline +pipe = StableDiffusionControlNetPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + controlnet=controlnet, + torch_dtype=torch.float16, + safety_checker=None +) +pipe = pipe.to("cuda") +pipe.enable_attention_slicing() + +# 4. 生成圖片 +control_image = get_canny_edge("input_image.jpg") +control_image.save("control_canny.png") + +prompt = "a luxury sports car, highly detailed, professional photo" +negative_prompt = "low quality, blurry, distorted" + +generated_image = pipe( + prompt=prompt, + negative_prompt=negative_prompt, + image=control_image, + num_inference_steps=30, + controlnet_conditioning_scale=1.0, # 控制強度 +).images[0] + +generated_image.save("controlnet_output.png") +``` + +### 多重ControlNet組合 + +```python +from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler + +# 加載多個ControlNet +controlnet_canny = ControlNetModel.from_pretrained( + "lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16 +) +controlnet_depth = ControlNetModel.from_pretrained( + "lllyasviel/sd-controlnet-depth", torch_dtype=torch.float16 +) + +# 創建多控制Pipeline +pipe = StableDiffusionControlNetPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + controlnet=[controlnet_canny, controlnet_depth], + torch_dtype=torch.float16 +) +pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) +pipe = pipe.to("cuda") + +# 準備控制圖 +canny_image = get_canny_edge("input.jpg") +depth_image = get_depth_map("input.jpg") # 需要實現深度估計 + +# 生成 +image = pipe( + prompt="a beautiful landscape", + image=[canny_image, depth_image], + controlnet_conditioning_scale=[0.5, 0.8], # 各自的控制強度 + num_inference_steps=25 +).images[0] + +image.save("multi_controlnet_output.png") +``` + +--- + +## 🔧 LoRA 訓練與應用 + +### 什麼是LoRA? + +LoRA (Low-Rank Adaptation) 是一種高效的模型微調技術,只需訓練少量參數即可定制化模型風格或特定主題。 + +### LoRA 優勢 + +- ✅ **參數少** - 通常只有幾MB到幾十MB +- ✅ **訓練快** - 相比完整微調快10-100倍 +- ✅ **可組合** - 可以疊加多個LoRA +- ✅ **易分享** - 文件小,方便傳播 + +### 使用現有LoRA + +```python +from diffusers import StableDiffusionPipeline +import torch + +# 加載基礎模型 +pipe = StableDiffusionPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + torch_dtype=torch.float16 +) + +# 加載LoRA權重 +pipe.load_lora_weights("path/to/lora_weights.safetensors") + +# 設定LoRA強度(0-1) +pipe.fuse_lora(lora_scale=0.8) + +pipe = pipe.to("cuda") + +# 生成圖片 +prompt = "a cute cat in anime style" # LoRA會影響風格 +image = pipe(prompt, num_inference_steps=30).images[0] +image.save("lora_output.png") +``` + +### 訓練自定義LoRA + +#### 準備訓練數據 + +```bash +# 數據集結構 +dataset/ +├── images/ +│ ├── img_001.jpg +│ ├── img_002.jpg +│ └── ... +└── metadata.csv # 包含圖片路徑和對應的caption +``` + +#### 訓練腳本 + +```python +# train_lora.py +from diffusers import StableDiffusionPipeline +from diffusers.loaders import AttnProcsLayers +from diffusers.models.attention_processor import LoRAAttnProcessor +import torch +from torch.utils.data import Dataset, DataLoader +from PIL import Image +import pandas as pd +from tqdm import tqdm + +class ImageCaptionDataset(Dataset): + """圖片-文本對數據集""" + def __init__(self, metadata_file, image_dir, tokenizer, image_processor): + self.df = pd.read_csv(metadata_file) + self.image_dir = image_dir + self.tokenizer = tokenizer + self.image_processor = image_processor + + def __len__(self): + return len(self.df) + + def __getitem__(self, idx): + row = self.df.iloc[idx] + + # 加載圖片 + image_path = f"{self.image_dir}/{row['image']}" + image = Image.open(image_path).convert("RGB") + image = self.image_processor(image) + + # 編碼文本 + caption = row['caption'] + text_inputs = self.tokenizer( + caption, + padding="max_length", + max_length=77, + truncation=True, + return_tensors="pt" + ) + + return { + "pixel_values": image, + "input_ids": text_inputs.input_ids[0] + } + +def train_lora( + model_id="runwayml/stable-diffusion-v1-5", + dataset_path="dataset/metadata.csv", + image_dir="dataset/images", + output_dir="lora_output", + rank=4, # LoRA rank + learning_rate=1e-4, + num_epochs=100, + batch_size=4 +): + """訓練LoRA模型""" + + # 1. 加載預訓練模型 + pipe = StableDiffusionPipeline.from_pretrained( + model_id, + torch_dtype=torch.float16 + ) + pipe = pipe.to("cuda") + + # 2. 注入LoRA層 + unet = pipe.unet + lora_attn_procs = {} + for name in unet.attn_processors.keys(): + cross_attention_dim = None if name.endswith("attn1.processor") else unet.config.cross_attention_dim + if name.startswith("mid_block"): + hidden_size = unet.config.block_out_channels[-1] + elif name.startswith("up_blocks"): + block_id = int(name[len("up_blocks.")]) + hidden_size = list(reversed(unet.config.block_out_channels))[block_id] + elif name.startswith("down_blocks"): + block_id = int(name[len("down_blocks.")]) + hidden_size = unet.config.block_out_channels[block_id] + + lora_attn_procs[name] = LoRAAttnProcessor( + hidden_size=hidden_size, + cross_attention_dim=cross_attention_dim, + rank=rank + ) + + unet.set_attn_processor(lora_attn_procs) + + # 3. 準備訓練參數 + lora_layers = AttnProcsLayers(unet.attn_processors) + lora_layers = lora_layers.to("cuda", dtype=torch.float16) + + optimizer = torch.optim.AdamW( + lora_layers.parameters(), + lr=learning_rate, + betas=(0.9, 0.999), + weight_decay=1e-2 + ) + + # 4. 準備數據集 + dataset = ImageCaptionDataset( + dataset_path, + image_dir, + pipe.tokenizer, + pipe.feature_extractor + ) + dataloader = DataLoader( + dataset, + batch_size=batch_size, + shuffle=True, + num_workers=4 + ) + + # 5. 訓練循環 + unet.train() + for epoch in range(num_epochs): + progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{num_epochs}") + + for batch in progress_bar: + pixel_values = batch["pixel_values"].to("cuda", dtype=torch.float16) + input_ids = batch["input_ids"].to("cuda") + + # 編碼圖片到潛在空間 + latents = pipe.vae.encode(pixel_values).latent_dist.sample() + latents = latents * pipe.vae.config.scaling_factor + + # 添加噪聲 + noise = torch.randn_like(latents) + timesteps = torch.randint( + 0, pipe.scheduler.config.num_train_timesteps, + (latents.shape[0],), device="cuda" + ).long() + noisy_latents = pipe.scheduler.add_noise(latents, noise, timesteps) + + # 獲取文本嵌入 + encoder_hidden_states = pipe.text_encoder(input_ids)[0] + + # 預測噪聲 + noise_pred = unet( + noisy_latents, + timesteps, + encoder_hidden_states + ).sample + + # 計算損失 + loss = torch.nn.functional.mse_loss(noise_pred, noise) + + # 反向傳播 + optimizer.zero_grad() + loss.backward() + optimizer.step() + + progress_bar.set_postfix({"loss": loss.item()}) + + # 每10個epoch保存一次 + if (epoch + 1) % 10 == 0: + save_path = f"{output_dir}/lora_epoch_{epoch+1}.safetensors" + pipe.save_lora_weights(save_path) + print(f"Saved LoRA weights to {save_path}") + + # 保存最終模型 + pipe.save_lora_weights(f"{output_dir}/lora_final.safetensors") + print("Training completed!") + +# 執行訓練 +if __name__ == "__main__": + train_lora( + model_id="runwayml/stable-diffusion-v1-5", + dataset_path="dataset/metadata.csv", + image_dir="dataset/images", + output_dir="my_lora", + rank=8, + learning_rate=1e-4, + num_epochs=100, + batch_size=2 + ) +``` + +### LoRA 訓練最佳實踐 + +#### 數據準備 + +```python +# prepare_dataset.py +from PIL import Image +import os +import pandas as pd +from tqdm import tqdm + +def prepare_training_data( + input_folder, + output_folder, + target_size=512, + caption_prefix="a photo of" +): + """ + 準備訓練數據 + - 調整圖片大小 + - 生成caption文件 + """ + os.makedirs(output_folder, exist_ok=True) + os.makedirs(f"{output_folder}/images", exist_ok=True) + + captions = [] + + for filename in tqdm(os.listdir(input_folder)): + if not filename.lower().endswith(('.png', '.jpg', '.jpeg')): + continue + + # 加載並調整圖片 + img_path = os.path.join(input_folder, filename) + img = Image.open(img_path).convert("RGB") + + # 智能裁剪到正方形 + width, height = img.size + min_dim = min(width, height) + left = (width - min_dim) // 2 + top = (height - min_dim) // 2 + img = img.crop((left, top, left + min_dim, top + min_dim)) + + # 調整大小 + img = img.resize((target_size, target_size), Image.LANCZOS) + + # 保存 + output_path = f"{output_folder}/images/{filename}" + img.save(output_path, quality=95) + + # 生成caption + # 可以使用BLIP等模型自動生成更好的caption + caption = f"{caption_prefix} {os.path.splitext(filename)[0].replace('_', ' ')}" + captions.append({ + "image": filename, + "caption": caption + }) + + # 保存metadata + df = pd.DataFrame(captions) + df.to_csv(f"{output_folder}/metadata.csv", index=False) + print(f"Prepared {len(captions)} images for training") + +# 使用示例 +prepare_training_data( + input_folder="raw_images", + output_folder="dataset", + target_size=512, + caption_prefix="a portrait of person" +) +``` + +--- + +## 💡 進階技巧 + +### 1. Prompt Engineering(提示詞工程) + +#### 高質量提示詞結構 + +```python +def build_quality_prompt( + subject, + style="photorealistic", + quality_tags=True, + camera_settings=True, + lighting=True +): + """構建高質量提示詞""" + + prompt_parts = [subject] + + if style: + prompt_parts.append(style) + + if lighting: + lighting_terms = [ + "natural lighting", + "soft shadows", + "golden hour" + ] + prompt_parts.extend(lighting_terms) + + if camera_settings: + camera_terms = [ + "bokeh background", + "shallow depth of field", + "85mm lens", + "f/1.8" + ] + prompt_parts.extend(camera_terms) + + if quality_tags: + quality_terms = [ + "highly detailed", + "sharp focus", + "8k uhd", + "professional photography" + ] + prompt_parts.extend(quality_terms) + + return ", ".join(prompt_parts) + +# 示例 +prompt = build_quality_prompt( + subject="a beautiful woman with long hair", + style="photorealistic portrait", + quality_tags=True, + camera_settings=True, + lighting=True +) +print(prompt) +# 輸出: "a beautiful woman with long hair, photorealistic portrait, +# natural lighting, soft shadows, golden hour, bokeh background, +# shallow depth of field, 85mm lens, f/1.8, highly detailed, +# sharp focus, 8k uhd, professional photography" +``` + +#### Negative Prompt 模板 + +```python +NEGATIVE_PROMPTS = { + "photorealistic": """ + cartoon, anime, 3d render, drawing, painting, + low quality, blurry, distorted, deformed, + bad anatomy, bad proportions, extra limbs, + duplicate, watermark, signature, text + """, + + "portrait": """ + multiple people, crowd, far away, + bad face, ugly face, bad eyes, crossed eyes, + bad hands, extra fingers, missing fingers, + low quality, blurry + """, + + "landscape": """ + people, person, human, character, + low quality, blurry, foggy, + oversaturated, undersaturated, + watermark, text + """, + + "product": """ + person, people, background clutter, + low quality, blurry, distorted, + bad lighting, shadows on product, + watermark, text + """ +} + +# 使用 +negative_prompt = NEGATIVE_PROMPTS["photorealistic"] +``` + +### 2. 批量生成與網格輸出 + +```python +from diffusers import StableDiffusionPipeline +import torch +from PIL import Image +import os + +def batch_generate_with_grid( + prompts, + output_dir="outputs", + grid_cols=4, + **generation_kwargs +): + """ + 批量生成圖片並創建網格預覽 + + Args: + prompts: 提示詞列表 + output_dir: 輸出目錄 + grid_cols: 網格列數 + **generation_kwargs: 傳遞給pipeline的其他參數 + """ + os.makedirs(output_dir, exist_ok=True) + + # 加載模型 + pipe = StableDiffusionPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + torch_dtype=torch.float16 + ) + pipe = pipe.to("cuda") + pipe.enable_attention_slicing() + + # 生成圖片 + generated_images = [] + + for idx, prompt in enumerate(prompts): + print(f"Generating {idx+1}/{len(prompts)}: {prompt[:50]}...") + + image = pipe(prompt, **generation_kwargs).images[0] + + # 保存單張圖片 + image_path = f"{output_dir}/image_{idx:04d}.png" + image.save(image_path) + generated_images.append(image) + + # 創建網格 + create_image_grid(generated_images, f"{output_dir}/grid.png", cols=grid_cols) + print(f"Generated {len(prompts)} images in {output_dir}") + +def create_image_grid(images, output_path, cols=4): + """創建圖片網格""" + n_images = len(images) + rows = (n_images + cols - 1) // cols + + w, h = images[0].size + grid = Image.new('RGB', size=(cols * w, rows * h)) + + for idx, img in enumerate(images): + row = idx // cols + col = idx % cols + grid.paste(img, box=(col * w, row * h)) + + grid.save(output_path) + print(f"Grid saved to {output_path}") + +# 使用示例 +prompts = [ + "a red apple on white background", + "a blue car in the city", + "a cute cat sleeping", + "a mountain landscape at sunset", + "a modern architecture building", + "a delicious pizza close-up", + "a flower in macro photography", + "a sci-fi spaceship design" +] + +batch_generate_with_grid( + prompts, + output_dir="batch_outputs", + grid_cols=4, + num_inference_steps=30, + guidance_scale=7.5, + width=512, + height=512 +) +``` + +### 3. 圖片修復 (Inpainting) + +```python +from diffusers import StableDiffusionInpaintPipeline +from PIL import Image +import torch + +def inpaint_image( + image_path, + mask_path, + prompt, + negative_prompt="", + strength=0.8 +): + """ + 圖片修復/編輯 + + Args: + image_path: 原始圖片路徑 + mask_path: 遮罩圖片路徑(白色區域會被重繪) + prompt: 描述要生成的內容 + strength: 修改強度 (0-1) + """ + # 加載模型 + pipe = StableDiffusionInpaintPipeline.from_pretrained( + "runwayml/stable-diffusion-inpainting", + torch_dtype=torch.float16 + ) + pipe = pipe.to("cuda") + + # 加載圖片和遮罩 + image = Image.open(image_path).convert("RGB") + mask = Image.open(mask_path).convert("L") # 灰度圖 + + # 執行修復 + result = pipe( + prompt=prompt, + negative_prompt=negative_prompt, + image=image, + mask_image=mask, + num_inference_steps=50, + strength=strength, + guidance_scale=7.5 + ).images[0] + + return result + +# 使用示例 +result = inpaint_image( + image_path="photo.jpg", + mask_path="mask.png", + prompt="a beautiful flower bouquet", + negative_prompt="low quality, blurry" +) +result.save("inpainted_result.png") +``` + +### 4. 圖片放大 (Upscaling) + +```python +from diffusers import StableDiffusionUpscalePipeline +from PIL import Image +import torch + +def upscale_image(image_path, prompt, scale_factor=4): + """ + 使用Stable Diffusion進行圖片放大 + + Args: + image_path: 輸入圖片路徑 + prompt: 圖片描述(幫助保持細節) + scale_factor: 放大倍數(2或4) + """ + # 加載放大模型 + pipe = StableDiffusionUpscalePipeline.from_pretrained( + "stabilityai/stable-diffusion-x4-upscaler", + torch_dtype=torch.float16 + ) + pipe = pipe.to("cuda") + pipe.enable_attention_slicing() + + # 加載低解析度圖片 + low_res_img = Image.open(image_path).convert("RGB") + + # 執行放大 + upscaled_image = pipe( + prompt=prompt, + image=low_res_img, + num_inference_steps=50, + guidance_scale=7.5 + ).images[0] + + return upscaled_image + +# 使用示例 +upscaled = upscale_image( + image_path="low_res.jpg", + prompt="a high quality portrait photo", + scale_factor=4 +) +upscaled.save("upscaled_4x.png") +``` + +--- + +## 🚀 實戰案例 + +### 案例1:AI頭像生成器 + +```python +# avatar_generator.py +import torch +from diffusers import StableDiffusionPipeline +from PIL import Image +import random + +class AvatarGenerator: + """AI頭像生成器""" + + def __init__(self, model_id="runwayml/stable-diffusion-v1-5"): + self.pipe = StableDiffusionPipeline.from_pretrained( + model_id, + torch_dtype=torch.float16 + ) + self.pipe = self.pipe.to("cuda") + self.pipe.enable_attention_slicing() + + def generate_avatar( + self, + gender="female", + style="realistic", + age_range="young adult", + hair_color=None, + customization=None + ): + """ + 生成頭像 + + Args: + gender: 性別 (male/female/neutral) + style: 風格 (realistic/anime/cartoon/artistic) + age_range: 年齡範圍 + hair_color: 髮色 + customization: 額外定制 + """ + # 構建提示詞 + prompt_parts = [] + + # 基礎描述 + if style == "realistic": + prompt_parts.append(f"professional portrait photo of a {age_range} {gender}") + elif style == "anime": + prompt_parts.append(f"anime style portrait of a {age_range} {gender}") + elif style == "cartoon": + prompt_parts.append(f"cartoon style avatar of a {age_range} {gender}") + else: + prompt_parts.append(f"artistic portrait of a {age_range} {gender}") + + # 髮色 + if hair_color: + prompt_parts.append(f"{hair_color} hair") + + # 定制化 + if customization: + prompt_parts.append(customization) + + # 質量標籤 + if style == "realistic": + prompt_parts.extend([ + "professional photography", + "studio lighting", + "bokeh background", + "highly detailed face", + "8k uhd", + "sharp focus" + ]) + + prompt = ", ".join(prompt_parts) + + # Negative prompt + negative_prompt = """ + multiple people, full body, far away, + low quality, blurry, distorted face, + bad anatomy, bad eyes, bad hands, + duplicate, watermark + """ + + # 生成 + image = self.pipe( + prompt=prompt, + negative_prompt=negative_prompt, + num_inference_steps=50, + guidance_scale=7.5, + width=512, + height=512, + generator=torch.Generator("cuda").manual_seed(random.randint(0, 2**32)) + ).images[0] + + return image, prompt + +# 使用示例 +generator = AvatarGenerator() + +# 生成不同風格的頭像 +avatar1, prompt1 = generator.generate_avatar( + gender="female", + style="realistic", + age_range="young adult", + hair_color="blonde", + customization="smiling, blue eyes" +) +avatar1.save("avatar_realistic.png") + +avatar2, prompt2 = generator.generate_avatar( + gender="male", + style="anime", + age_range="teenager", + hair_color="black", + customization="cool expression, wearing headphones" +) +avatar2.save("avatar_anime.png") + +print("Generated avatars:") +print(f"1. {prompt1}") +print(f"2. {prompt2}") +``` + +### 案例2:產品圖生成工具 + +```python +# product_image_generator.py +from diffusers import StableDiffusionPipeline, StableDiffusionControlNetPipeline, ControlNetModel +import torch +from PIL import Image + +class ProductImageGenerator: + """電商產品圖生成工具""" + + def __init__(self): + self.pipe = StableDiffusionPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + torch_dtype=torch.float16 + ) + self.pipe = self.pipe.to("cuda") + + def generate_product_shot( + self, + product_description, + background="white studio", + angle="front view", + lighting="professional studio lighting", + num_variants=4 + ): + """ + 生成產品展示圖 + + Args: + product_description: 產品描述 + background: 背景設置 + angle: 拍攝角度 + lighting: 光照設置 + num_variants: 生成變體數量 + """ + # 構建提示詞 + prompt = f""" + product photography of {product_description}, + {angle}, {background}, {lighting}, + professional commercial photo, high quality, + sharp focus, detailed, 8k uhd, clean composition, + no people, no text + """ + + negative_prompt = """ + person, people, hands holding product, + low quality, blurry, cluttered background, + shadows on product, poor lighting, + watermark, text, logo + """ + + # 生成多個變體 + images = [] + for i in range(num_variants): + seed = i * 1000 + image = self.pipe( + prompt=prompt, + negative_prompt=negative_prompt, + num_inference_steps=50, + guidance_scale=8.0, + width=768, + height=768, + generator=torch.Generator("cuda").manual_seed(seed) + ).images[0] + images.append(image) + + return images, prompt + +# 使用示例 +generator = ProductImageGenerator() + +# 生成手錶產品圖 +watch_images, prompt = generator.generate_product_shot( + product_description="luxury silver watch with leather strap", + background="white marble surface", + angle="45 degree angle", + lighting="soft diffused lighting", + num_variants=4 +) + +# 保存 +for idx, img in enumerate(watch_images): + img.save(f"product_watch_{idx+1}.png") + +print(f"Generated {len(watch_images)} product images") +print(f"Prompt used: {prompt}") +``` + +--- + +## 📚 參考資源 + +### 官方文檔 +- [Hugging Face Diffusers](https://huggingface.co/docs/diffusers/index) +- [Stable Diffusion GitHub](https://github.com/CompVis/stable-diffusion) +- [ControlNet Paper](https://arxiv.org/abs/2302.05543) +- [LoRA Paper](https://arxiv.org/abs/2106.09685) + +### 模型資源 +- [Hugging Face Models](https://huggingface.co/models?pipeline_tag=text-to-image) +- [Civitai](https://civitai.com/) - 社群模型和LoRA分享 +- [Stability AI](https://stability.ai/) + +### 學習資源 +- [Fast.ai Diffusion Course](https://www.fast.ai/) +- [Stable Diffusion Art](https://stable-diffusion-art.com/) + +--- + +## ✅ 檢查清單 + +完成本章節後,你應該能夠: + +- [ ] 理解Diffusion模型的基本原理 +- [ ] 使用Stable Diffusion生成高質量圖片 +- [ ] 掌握重要參數的調節技巧 +- [ ] 使用ControlNet進行精確控制 +- [ ] 組合多個ControlNet +- [ ] 加載和使用LoRA模型 +- [ ] 訓練自定義LoRA +- [ ] 編寫高質量的提示詞 +- [ ] 進行圖片修復和編輯 +- [ ] 實現圖片放大 +- [ ] 構建實用的生成應用 + +--- + +## 下一步 + +完成圖片生成後,建議繼續學習: + +1. **影片生成** - 了解如何將靜態圖片擴展到動態視頻 +2. **音樂生成** - 探索音頻生成技術 +3. **實戰項目** - 構建完整的多模態應用 + +--- + +最後更新:2024-11-19 +難度級別:🟡 中級 +預計學習時間:15-20小時 diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/1.\345\234\226\347\211\207\347\224\237\346\210\220/\347\257\204\344\276\213\344\273\243\347\242\274/01_stable_diffusion_basic.py" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/1.\345\234\226\347\211\207\347\224\237\346\210\220/\347\257\204\344\276\213\344\273\243\347\242\274/01_stable_diffusion_basic.py" new file mode 100644 index 0000000..a9c891f --- /dev/null +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/1.\345\234\226\347\211\207\347\224\237\346\210\220/\347\257\204\344\276\213\344\273\243\347\242\274/01_stable_diffusion_basic.py" @@ -0,0 +1,137 @@ +""" +Stable Diffusion 基礎圖片生成 +基本的文生圖功能,使用 Stable Diffusion v1.5 +""" + +import torch +from diffusers import StableDiffusionPipeline +from PIL import Image +import os + +def generate_image_basic( + prompt: str, + negative_prompt: str = "low quality, blurry, distorted", + num_inference_steps: int = 50, + guidance_scale: float = 7.5, + width: int = 512, + height: int = 512, + seed: int = None, + output_path: str = "output.png" +): + """ + 使用 Stable Diffusion 生成圖片 + + Args: + prompt: 正向提示詞 + negative_prompt: 負向提示詞 + num_inference_steps: 推理步數(越多質量越好但越慢) + guidance_scale: 引導強度(控制對提示詞的遵循程度) + width: 圖片寬度 + height: 圖片高度 + seed: 隨機種子(用於可重現性) + output_path: 輸出路徑 + """ + + # 載入模型 + print("正在載入 Stable Diffusion 模型...") + model_id = "runwayml/stable-diffusion-v1-5" + + # 檢查是否有 GPU + device = "cuda" if torch.cuda.is_available() else "cpu" + print(f"使用設備: {device}") + + # 建立 pipeline + pipe = StableDiffusionPipeline.from_pretrained( + model_id, + torch_dtype=torch.float16 if device == "cuda" else torch.float32, + safety_checker=None # 關閉安全檢查器(可選) + ) + pipe = pipe.to(device) + + # 啟用記憶體優化(如果使用 GPU) + if device == "cuda": + pipe.enable_attention_slicing() + # pipe.enable_xformers_memory_efficient_attention() # 需要安裝 xformers + + # 設定隨機種子 + generator = None + if seed is not None: + generator = torch.Generator(device=device).manual_seed(seed) + print(f"使用種子: {seed}") + + # 生成圖片 + print(f"正在生成圖片...") + print(f"提示詞: {prompt}") + + with torch.autocast(device): + image = pipe( + prompt=prompt, + negative_prompt=negative_prompt, + num_inference_steps=num_inference_steps, + guidance_scale=guidance_scale, + width=width, + height=height, + generator=generator + ).images[0] + + # 儲存圖片 + image.save(output_path) + print(f"圖片已儲存至: {output_path}") + + return image + + +def batch_generate(prompts: list, output_dir: str = "outputs"): + """ + 批量生成多張圖片 + + Args: + prompts: 提示詞列表 + output_dir: 輸出目錄 + """ + os.makedirs(output_dir, exist_ok=True) + + for i, prompt in enumerate(prompts): + output_path = os.path.join(output_dir, f"image_{i+1:03d}.png") + generate_image_basic( + prompt=prompt, + seed=42 + i, # 每張圖使用不同的種子 + output_path=output_path + ) + + +if __name__ == "__main__": + # 示例 1: 基本使用 + print("=== 示例 1: 基本圖片生成 ===") + generate_image_basic( + prompt="a beautiful landscape with mountains and lake, sunset, photorealistic", + negative_prompt="low quality, blurry, cartoon", + num_inference_steps=50, + guidance_scale=7.5, + seed=42, + output_path="landscape.png" + ) + + # 示例 2: 人物肖像 + print("\n=== 示例 2: 人物肖像 ===") + generate_image_basic( + prompt="professional portrait photo of a smiling woman, natural lighting, high quality", + negative_prompt="low quality, blurry, distorted face", + num_inference_steps=50, + guidance_scale=8.0, + seed=123, + output_path="portrait.png" + ) + + # 示例 3: 批量生成 + print("\n=== 示例 3: 批量生成 ===") + prompts = [ + "a cute cat sitting on a windowsill", + "a modern city skyline at night", + "a cozy coffee shop interior", + "a fantasy castle in the clouds", + "a vintage car on a desert road" + ] + batch_generate(prompts, output_dir="batch_outputs") + + print("\n所有圖片生成完成!") diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/1.\345\234\226\347\211\207\347\224\237\346\210\220/\347\257\204\344\276\213\344\273\243\347\242\274/02_controlnet_advanced.py" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/1.\345\234\226\347\211\207\347\224\237\346\210\220/\347\257\204\344\276\213\344\273\243\347\242\274/02_controlnet_advanced.py" new file mode 100644 index 0000000..3ba6c88 --- /dev/null +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/1.\345\234\226\347\211\207\347\224\237\346\210\220/\347\257\204\344\276\213\344\273\243\347\242\274/02_controlnet_advanced.py" @@ -0,0 +1,262 @@ +""" +ControlNet 進階控制 +使用 ControlNet 實現精確的圖片生成控制 +支持多種控制類型:Canny邊緣、深度圖、姿態等 +""" + +import torch +import cv2 +import numpy as np +from PIL import Image +from diffusers import ( + StableDiffusionControlNetPipeline, + ControlNetModel, + UniPCMultistepScheduler +) +from controlnet_aux import CannyDetector, OpenposeDetector, HEDdetector + + +class ControlNetGenerator: + """ControlNet 圖片生成器""" + + def __init__(self, controlnet_type: str = "canny"): + """ + 初始化 ControlNet + + Args: + controlnet_type: 控制類型 (canny, openpose, hed, depth 等) + """ + self.device = "cuda" if torch.cuda.is_available() else "cpu" + print(f"使用設備: {self.device}") + + # ControlNet 模型映射 + controlnet_models = { + "canny": "lllyasviel/sd-controlnet-canny", + "openpose": "lllyasviel/sd-controlnet-openpose", + "hed": "lllyasviel/sd-controlnet-hed", + "depth": "lllyasviel/sd-controlnet-depth", + "normal": "lllyasviel/sd-controlnet-normal", + "scribble": "lllyasviel/sd-controlnet-scribble", + "seg": "lllyasviel/sd-controlnet-seg" + } + + if controlnet_type not in controlnet_models: + raise ValueError(f"不支持的控制類型: {controlnet_type}") + + # 載入 ControlNet + print(f"正在載入 ControlNet ({controlnet_type})...") + controlnet = ControlNetModel.from_pretrained( + controlnet_models[controlnet_type], + torch_dtype=torch.float16 if self.device == "cuda" else torch.float32 + ) + + # 載入 Stable Diffusion Pipeline + print("正在載入 Stable Diffusion Pipeline...") + self.pipe = StableDiffusionControlNetPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + controlnet=controlnet, + torch_dtype=torch.float16 if self.device == "cuda" else torch.float32, + safety_checker=None + ) + + # 使用更快的調度器 + self.pipe.scheduler = UniPCMultistepScheduler.from_config( + self.pipe.scheduler.config + ) + + self.pipe = self.pipe.to(self.device) + + # 記憶體優化 + if self.device == "cuda": + self.pipe.enable_attention_slicing() + self.pipe.enable_model_cpu_offload() + + self.controlnet_type = controlnet_type + + def preprocess_canny(self, image: Image.Image, low_threshold: int = 100, + high_threshold: int = 200) -> Image.Image: + """ + Canny 邊緣檢測預處理 + + Args: + image: 輸入圖片 + low_threshold: Canny 低閾值 + high_threshold: Canny 高閾值 + """ + # 轉換為 numpy 陣列 + image_np = np.array(image) + + # Canny 邊緣檢測 + edges = cv2.Canny(image_np, low_threshold, high_threshold) + + # 轉換回 PIL Image + edges = Image.fromarray(edges) + return edges + + def preprocess_openpose(self, image: Image.Image) -> Image.Image: + """OpenPose 姿態檢測預處理""" + detector = OpenposeDetector.from_pretrained("lllyasviel/ControlNet") + pose = detector(image) + return pose + + def preprocess_hed(self, image: Image.Image) -> Image.Image: + """HED 邊緣檢測預處理""" + detector = HEDdetector.from_pretrained("lllyasviel/ControlNet") + edges = detector(image) + return edges + + def generate( + self, + control_image: Image.Image, + prompt: str, + negative_prompt: str = "low quality, blurry", + num_inference_steps: int = 30, + guidance_scale: float = 7.5, + controlnet_conditioning_scale: float = 1.0, + seed: int = None + ) -> Image.Image: + """ + 生成圖片 + + Args: + control_image: 控制圖片(已預處理) + prompt: 提示詞 + negative_prompt: 負向提示詞 + num_inference_steps: 推理步數 + guidance_scale: 引導強度 + controlnet_conditioning_scale: ControlNet 控制強度(0-2,越高控制越強) + seed: 隨機種子 + """ + # 設定種子 + generator = None + if seed is not None: + generator = torch.Generator(device=self.device).manual_seed(seed) + + # 生成圖片 + print(f"正在生成圖片...") + print(f"提示詞: {prompt}") + print(f"ControlNet 強度: {controlnet_conditioning_scale}") + + with torch.autocast(self.device): + output = self.pipe( + prompt=prompt, + image=control_image, + negative_prompt=negative_prompt, + num_inference_steps=num_inference_steps, + guidance_scale=guidance_scale, + controlnet_conditioning_scale=controlnet_conditioning_scale, + generator=generator + ) + + return output.images[0] + + +def example_canny_edge(): + """示例 1: Canny 邊緣控制""" + print("=== 示例 1: Canny 邊緣控制 ===") + + # 載入參考圖片(這裡假設有一張參考圖) + # 實際使用時請替換為您的圖片路徑 + reference_image = Image.new("RGB", (512, 512), color="white") + # reference_image = Image.open("your_reference_image.jpg") + + # 初始化生成器 + generator = ControlNetGenerator(controlnet_type="canny") + + # 預處理:提取邊緣 + canny_image = generator.preprocess_canny(reference_image) + canny_image.save("canny_edges.png") + print("邊緣圖已儲存: canny_edges.png") + + # 生成新圖片 + result = generator.generate( + control_image=canny_image, + prompt="a beautiful anime girl, high quality, detailed", + negative_prompt="low quality, blurry, ugly", + num_inference_steps=30, + controlnet_conditioning_scale=1.0, + seed=42 + ) + + result.save("canny_result.png") + print("生成結果已儲存: canny_result.png") + + +def example_style_transfer(): + """示例 2: 風格轉換""" + print("\n=== 示例 2: 保持構圖的風格轉換 ===") + + # 載入原圖 + original = Image.new("RGB", (512, 512), color="white") + # original = Image.open("original_photo.jpg") + + # 初始化 + generator = ControlNetGenerator(controlnet_type="canny") + + # 提取邊緣 + edges = generator.preprocess_canny(original, low_threshold=50, high_threshold=150) + + # 不同風格的提示詞 + styles = [ + ("oil painting style, vibrant colors", "oil_painting.png"), + ("watercolor style, soft colors", "watercolor.png"), + ("anime style, cell shading", "anime.png"), + ("cyberpunk style, neon lights", "cyberpunk.png") + ] + + for prompt, filename in styles: + result = generator.generate( + control_image=edges, + prompt=prompt, + num_inference_steps=30, + controlnet_conditioning_scale=0.8, + seed=42 + ) + result.save(filename) + print(f"已生成: {filename}") + + +def example_multiple_strength(): + """示例 3: 不同控制強度的效果""" + print("\n=== 示例 3: 不同 ControlNet 強度 ===") + + # 載入參考圖 + reference = Image.new("RGB", (512, 512), color="white") + + # 初始化 + generator = ControlNetGenerator(controlnet_type="canny") + edges = generator.preprocess_canny(reference) + + prompt = "a beautiful landscape with mountains and lake" + + # 測試不同的控制強度 + strengths = [0.3, 0.5, 0.8, 1.0, 1.5] + + for strength in strengths: + result = generator.generate( + control_image=edges, + prompt=prompt, + controlnet_conditioning_scale=strength, + seed=42 + ) + result.save(f"strength_{strength}.png") + print(f"已生成 (強度={strength}): strength_{strength}.png") + + +if __name__ == "__main__": + print("ControlNet 進階控制示例") + print("=" * 50) + + # 運行示例 + # 注意: 這些示例需要有實際的參考圖片才能正常運行 + # 請將 Image.new() 替換為 Image.open("your_image.jpg") + + example_canny_edge() + # example_style_transfer() + # example_multiple_strength() + + print("\n所有示例完成!") + print("\n使用提示:") + print("1. 替換示例中的 Image.new() 為實際圖片") + print("2. 調整 controlnet_conditioning_scale 控制強度") + print("3. 嘗試不同的預處理參數獲得最佳效果") diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/2.\345\275\261\347\211\207\347\224\237\346\210\220/README.md" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/2.\345\275\261\347\211\207\347\224\237\346\210\220/README.md" new file mode 100644 index 0000000..7326558 --- /dev/null +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/2.\345\275\261\347\211\207\347\224\237\346\210\220/README.md" @@ -0,0 +1,1105 @@ +# 影片生成 (Video Generation) + +本章節將深入探討AI影片生成技術,從圖片到視頻的擴展,涵蓋最新的視頻生成模型和技術。 + +## 📋 目錄 + +1. [視頻生成基礎](#視頻生成基礎) +2. [Stable Video Diffusion](#stable-video-diffusion) +3. [AnimateDiff](#animatediff) +4. [Text-to-Video](#text-to-video) +5. [視頻編輯與處理](#視頻編輯與處理) +6. [實戰案例](#實戰案例) + +--- + +## 🎯 學習目標 + +完成本章節後,你將能夠: + +- ✅ 理解視頻生成的基本原理 +- ✅ 使用Stable Video Diffusion生成流暢視頻 +- ✅ 運用AnimateDiff創建動畫 +- ✅ 實現文本到視頻的生成 +- ✅ 編輯和後處理生成的視頻 +- ✅ 構建視頻生成應用 + +--- + +## 📚 視頻生成基礎 + +### 視頻生成的挑戰 + +與圖片生成相比,視頻生成面臨更多挑戰: + +1. **時間一致性** - 幀與幀之間需要保持連貫 +2. **運動合理性** - 物體運動需符合物理規律 +3. **計算成本** - 需要生成多幀圖片 +4. **記憶體需求** - 同時處理多幀數據 + +### 主要技術方案 + +``` +視頻生成技術路線 +├── Image-to-Video (圖片到視頻) +│ ├── Stable Video Diffusion (SVD) +│ └── DynamiCrafter +│ +├── Text-to-Video (文本到視頻) +│ ├── ModelScope +│ ├── ZeroScope +│ └── AnimateDiff +│ +└── Video-to-Video (視頻到視頻) + ├── Runway Gen-2 + └── Pika Labs +``` + +--- + +## 🎬 Stable Video Diffusion + +### 什麼是SVD? + +Stable Video Diffusion (SVD) 是Stability AI推出的圖片到視頻模型,基於Stable Diffusion架構,專門針對視頻生成進行優化。 + +### 核心特點 + +- ✅ **高質量輸出** - 生成流暢自然的視頻 +- ✅ **可控性強** - 可控制運動幅度和方向 +- ✅ **多分辨率** - 支持不同分辨率輸出 +- ✅ **開源免費** - 可本地部署 + +### 安裝與設置 + +```bash +# 安裝依賴 +pip install diffusers transformers accelerate torch torchvision +pip install opencv-python pillow imageio imageio-ffmpeg + +# 可選:安裝xformers加速 +pip install xformers +``` + +### 基本使用 + +```python +from diffusers import StableVideoDiffusionPipeline +from diffusers.utils import load_image, export_to_video +import torch + +# 加載模型 +pipe = StableVideoDiffusionPipeline.from_pretrained( + "stabilityai/stable-video-diffusion-img2vid-xt", + torch_dtype=torch.float16, + variant="fp16" +) +pipe = pipe.to("cuda") + +# 啟用記憶體優化 +pipe.enable_model_cpu_offload() +pipe.enable_vae_slicing() + +# 加載輸入圖片 +image = load_image("input_image.jpg") +image = image.resize((1024, 576)) # SVD推薦的寬高比 + +# 生成視頻(默認25幀) +frames = pipe( + image=image, + num_frames=25, + decode_chunk_size=8, # 降低記憶體使用 + num_inference_steps=25, + fps=7, + motion_bucket_id=127, # 運動強度 (0-255) + noise_aug_strength=0.02 # 噪聲增強強度 +).frames[0] + +# 導出視頻 +export_to_video(frames, "output_video.mp4", fps=7) +print("Video generated successfully!") +``` + +### 參數調節指南 + +#### motion_bucket_id (運動強度) + +```python +# 測試不同運動強度 +motion_levels = { + "subtle": 20, # 微妙的運動 + "gentle": 60, # 溫和的運動 + "moderate": 127, # 中等運動(默認) + "strong": 180, # 強烈運動 + "extreme": 255 # 極度運動 +} + +for name, motion_id in motion_levels.items(): + frames = pipe( + image=image, + motion_bucket_id=motion_id, + num_frames=25 + ).frames[0] + + export_to_video(frames, f"video_{name}_motion.mp4", fps=7) +``` + +#### 調整視頻長度和幀率 + +```python +def generate_custom_video( + image_path, + duration_seconds=3, + fps=15, + motion_strength=127 +): + """ + 生成自定義時長和幀率的視頻 + + Args: + image_path: 輸入圖片路徑 + duration_seconds: 視頻時長(秒) + fps: 幀率 + motion_strength: 運動強度 (0-255) + """ + # 計算需要的幀數 + num_frames = duration_seconds * fps + + # SVD-XT最多支持25幀,需要分段生成長視頻 + max_frames_per_segment = 25 + + pipe = StableVideoDiffusionPipeline.from_pretrained( + "stabilityai/stable-video-diffusion-img2vid-xt", + torch_dtype=torch.float16 + ) + pipe = pipe.to("cuda") + pipe.enable_model_cpu_offload() + + image = load_image(image_path).resize((1024, 576)) + + if num_frames <= max_frames_per_segment: + # 單次生成 + frames = pipe( + image=image, + num_frames=num_frames, + motion_bucket_id=motion_strength, + fps=fps + ).frames[0] + else: + # 分段生成並拼接 + all_frames = [] + segments = (num_frames + max_frames_per_segment - 1) // max_frames_per_segment + + for i in range(segments): + frames_this_segment = min(max_frames_per_segment, num_frames - i * max_frames_per_segment) + + segment_frames = pipe( + image=image if i == 0 else all_frames[-1], # 使用上一段的最後一幀 + num_frames=frames_this_segment, + motion_bucket_id=motion_strength, + fps=fps + ).frames[0] + + all_frames.extend(segment_frames) + + frames = all_frames + + export_to_video(frames, "custom_video.mp4", fps=fps) + return frames + +# 使用示例:生成10秒、30fps的視頻 +frames = generate_custom_video( + "input.jpg", + duration_seconds=10, + fps=30, + motion_strength=100 +) +``` + +### 進階控制技巧 + +#### 1. 條件引導視頻生成 + +```python +from PIL import Image +import numpy as np + +def generate_with_motion_control( + image_path, + motion_mask_path=None, + camera_motion="zoom_in" +): + """ + 帶運動控制的視頻生成 + + Args: + image_path: 輸入圖片 + motion_mask_path: 運動遮罩(可選) + camera_motion: 相機運動類型 + """ + pipe = StableVideoDiffusionPipeline.from_pretrained( + "stabilityai/stable-video-diffusion-img2vid-xt", + torch_dtype=torch.float16 + ) + pipe = pipe.to("cuda") + + image = load_image(image_path).resize((1024, 576)) + + # 根據相機運動類型調整參數 + motion_params = { + "zoom_in": {"motion_bucket_id": 180, "noise_aug_strength": 0.01}, + "zoom_out": {"motion_bucket_id": 160, "noise_aug_strength": 0.015}, + "pan_left": {"motion_bucket_id": 140, "noise_aug_strength": 0.02}, + "pan_right": {"motion_bucket_id": 140, "noise_aug_strength": 0.02}, + "static": {"motion_bucket_id": 50, "noise_aug_strength": 0.005} + } + + params = motion_params.get(camera_motion, motion_params["static"]) + + frames = pipe( + image=image, + num_frames=25, + **params + ).frames[0] + + export_to_video(frames, f"video_{camera_motion}.mp4", fps=7) + return frames + +# 生成不同相機運動的視頻 +for motion in ["zoom_in", "pan_left", "static"]: + generate_with_motion_control("landscape.jpg", camera_motion=motion) +``` + +#### 2. 批量圖片到視頻轉換 + +```python +import os +from pathlib import Path + +def batch_image_to_video( + input_folder, + output_folder, + motion_strength=127, + fps=7 +): + """批量將圖片轉換為視頻""" + + os.makedirs(output_folder, exist_ok=True) + + pipe = StableVideoDiffusionPipeline.from_pretrained( + "stabilityai/stable-video-diffusion-img2vid-xt", + torch_dtype=torch.float16 + ) + pipe = pipe.to("cuda") + pipe.enable_model_cpu_offload() + + # 支持的圖片格式 + image_extensions = {'.jpg', '.jpeg', '.png', '.webp'} + + for img_file in Path(input_folder).iterdir(): + if img_file.suffix.lower() not in image_extensions: + continue + + print(f"Processing {img_file.name}...") + + try: + image = load_image(str(img_file)).resize((1024, 576)) + + frames = pipe( + image=image, + num_frames=25, + motion_bucket_id=motion_strength, + fps=fps + ).frames[0] + + output_path = f"{output_folder}/{img_file.stem}.mp4" + export_to_video(frames, output_path, fps=fps) + + print(f"✓ Saved to {output_path}") + + except Exception as e: + print(f"✗ Error processing {img_file.name}: {e}") + + print("Batch processing completed!") + +# 使用 +batch_image_to_video( + input_folder="input_images", + output_folder="output_videos", + motion_strength=127, + fps=7 +) +``` + +--- + +## 🎨 AnimateDiff + +### 什麼是AnimateDiff? + +AnimateDiff是一個將任何Stable Diffusion模型變成視頻生成模型的插件,通過添加時間層來實現動畫生成。 + +### 核心優勢 + +- ✅ **兼容性強** - 可與任何SD模型和LoRA組合 +- ✅ **靈活控制** - 支持MotionLoRA精確控制運動 +- ✅ **社群資源豐富** - 大量預訓練運動模塊 + +### 安裝 + +```bash +pip install diffusers transformers accelerate torch +pip install git+https://github.com/guoyww/AnimateDiff.git +``` + +### 基本使用 + +```python +from diffusers import MotionAdapter, AnimateDiffPipeline, DDIMScheduler +from diffusers.utils import export_to_gif +import torch + +# 加載運動適配器 +adapter = MotionAdapter.from_pretrained( + "guoyww/animatediff-motion-adapter-v1-5-2", + torch_dtype=torch.float16 +) + +# 創建AnimateDiff Pipeline +pipe = AnimateDiffPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + motion_adapter=adapter, + torch_dtype=torch.float16 +) +pipe.scheduler = DDIMScheduler.from_config( + pipe.scheduler.config, + beta_schedule="linear", + clip_sample=False +) +pipe = pipe.to("cuda") +pipe.enable_vae_slicing() +pipe.enable_model_cpu_offload() + +# 生成動畫 +prompt = "a cat walking on the street, high quality, detailed" +negative_prompt = "low quality, blurry, static" + +frames = pipe( + prompt=prompt, + negative_prompt=negative_prompt, + num_frames=16, + num_inference_steps=25, + guidance_scale=7.5 +).frames[0] + +# 導出為GIF +export_to_gif(frames, "animated_cat.gif", fps=8) +``` + +### 使用MotionLoRA + +```python +from diffusers import MotionAdapter, AnimateDiffPipeline, DDIMScheduler +import torch + +# 加載基礎組件 +adapter = MotionAdapter.from_pretrained( + "guoyww/animatediff-motion-adapter-v1-5-2", + torch_dtype=torch.float16 +) + +pipe = AnimateDiffPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + motion_adapter=adapter, + torch_dtype=torch.float16 +) +pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config) +pipe = pipe.to("cuda") + +# 加載MotionLoRA(控制特定運動) +pipe.load_lora_weights( + "guoyww/animatediff-motion-lora-zoom-in", + adapter_name="zoom_in" +) + +# 生成帶特定運動的視頻 +frames = pipe( + prompt="a beautiful landscape, mountains and lake", + num_frames=16, + guidance_scale=7.5, + cross_attention_kwargs={"scale": 0.8} # LoRA強度 +).frames[0] + +export_to_gif(frames, "zoom_in_landscape.gif") +``` + +### 組合多個LoRA + +```python +def generate_with_style_and_motion( + prompt, + style_lora_path, + motion_lora_path, + output_path="output.gif" +): + """ + 組合風格LoRA和運動LoRA生成動畫 + + Args: + prompt: 提示詞 + style_lora_path: 風格LoRA路徑 + motion_lora_path: 運動LoRA路徑 + output_path: 輸出路徑 + """ + adapter = MotionAdapter.from_pretrained( + "guoyww/animatediff-motion-adapter-v1-5-2", + torch_dtype=torch.float16 + ) + + pipe = AnimateDiffPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + motion_adapter=adapter, + torch_dtype=torch.float16 + ) + pipe = pipe.to("cuda") + + # 加載風格LoRA + pipe.load_lora_weights(style_lora_path, adapter_name="style") + + # 加載運動LoRA + pipe.load_lora_weights(motion_lora_path, adapter_name="motion") + + # 設置兩個LoRA的權重 + pipe.set_adapters(["style", "motion"], adapter_weights=[0.8, 0.6]) + + frames = pipe( + prompt=prompt, + num_frames=16, + guidance_scale=7.5 + ).frames[0] + + export_to_gif(frames, output_path) + return frames + +# 使用示例 +generate_with_style_and_motion( + prompt="a cyberpunk city at night, neon lights", + style_lora_path="path/to/cyberpunk_style_lora", + motion_lora_path="guoyww/animatediff-motion-lora-pan-left", + output_path="cyberpunk_city.gif" +) +``` + +--- + +## 📝 Text-to-Video + +### 使用ModelScope + +```python +from diffusers import DiffusionPipeline +from diffusers.utils import export_to_video +import torch + +# 加載ModelScope Text-to-Video模型 +pipe = DiffusionPipeline.from_pretrained( + "damo-vilab/text-to-video-ms-1.7b", + torch_dtype=torch.float16, + variant="fp16" +) +pipe = pipe.to("cuda") +pipe.enable_model_cpu_offload() +pipe.enable_vae_slicing() + +# 生成視頻 +prompt = "a panda eating bamboo in a bamboo forest" + +video_frames = pipe( + prompt=prompt, + num_inference_steps=25, + num_frames=16, + height=320, + width=576 +).frames[0] + +# 導出視頻 +export_to_video(video_frames, "panda_video.mp4", fps=8) +``` + +### 進階:長視頻生成 + +```python +def generate_long_video( + prompt, + total_frames=64, + frames_per_batch=16, + overlap_frames=4 +): + """ + 生成長視頻(通過分段生成並平滑過渡) + + Args: + prompt: 提示詞 + total_frames: 總幀數 + frames_per_batch: 每批生成的幀數 + overlap_frames: 重疊幀數(用於平滑過渡) + """ + pipe = DiffusionPipeline.from_pretrained( + "damo-vilab/text-to-video-ms-1.7b", + torch_dtype=torch.float16 + ) + pipe = pipe.to("cuda") + + all_frames = [] + num_batches = (total_frames + frames_per_batch - 1) // frames_per_batch + + for i in range(num_batches): + print(f"Generating batch {i+1}/{num_batches}...") + + # 生成這一批幀 + frames = pipe( + prompt=prompt, + num_frames=frames_per_batch, + num_inference_steps=25 + ).frames[0] + + if i == 0: + # 第一批:全部保留 + all_frames.extend(frames) + else: + # 後續批次:跳過重疊部分 + all_frames.extend(frames[overlap_frames:]) + + # 限制到目標幀數 + all_frames = all_frames[:total_frames] + + export_to_video(all_frames, "long_video.mp4", fps=8) + return all_frames + +# 生成64幀的長視頻 +frames = generate_long_video( + prompt="a beautiful sunset over the ocean, waves gently rolling", + total_frames=64, + frames_per_batch=16, + overlap_frames=4 +) +``` + +--- + +## 🛠️ 視頻編輯與處理 + +### 視頻插幀(提高幀率) + +```python +import cv2 +import numpy as np +from PIL import Image + +def interpolate_frames(video_path, output_path, target_fps=30): + """ + 使用光流法進行視頻插幀 + + Args: + video_path: 輸入視頻路徑 + output_path: 輸出視頻路徑 + target_fps: 目標幀率 + """ + # 讀取視頻 + cap = cv2.VideoCapture(video_path) + source_fps = cap.get(cv2.CAP_PROP_FPS) + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + + # 計算插值倍數 + interpolation_factor = target_fps / source_fps + + # 創建視頻寫入器 + fourcc = cv2.VideoWriter_fourcc(*'mp4v') + out = cv2.VideoWriter(output_path, fourcc, target_fps, (width, height)) + + # 讀取第一幀 + ret, prev_frame = cap.read() + if not ret: + return + + out.write(prev_frame) + + while True: + ret, next_frame = cap.read() + if not ret: + break + + # 計算需要插入的幀數 + num_interpolated = int(interpolation_factor) - 1 + + # 簡單線性插值(可以使用更高級的光流算法) + for i in range(1, num_interpolated + 1): + alpha = i / (num_interpolated + 1) + interpolated = cv2.addWeighted( + prev_frame, 1 - alpha, + next_frame, alpha, + 0 + ) + out.write(interpolated) + + out.write(next_frame) + prev_frame = next_frame + + cap.release() + out.release() + print(f"Interpolated video saved to {output_path}") + +# 使用 +interpolate_frames("input.mp4", "output_30fps.mp4", target_fps=30) +``` + +### 視頻穩定化 + +```python +def stabilize_video(input_path, output_path): + """使用OpenCV穩定視頻""" + import cv2 + + cap = cv2.VideoCapture(input_path) + + # 獲取視頻信息 + n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + fps = cap.get(cv2.CAP_PROP_FPS) + w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + + # 讀取第一幀 + _, prev = cap.read() + prev_gray = cv2.cvtColor(prev, cv2.COLOR_BGR2GRAY) + + transforms = np.zeros((n_frames-1, 3), np.float32) + + # 計算幀間變換 + for i in range(n_frames-2): + ret, curr = cap.read() + if not ret: + break + + curr_gray = cv2.cvtColor(curr, cv2.COLOR_BGR2GRAY) + + # 檢測特徵點 + prev_pts = cv2.goodFeaturesToTrack( + prev_gray, + maxCorners=200, + qualityLevel=0.01, + minDistance=30, + blockSize=3 + ) + + # 光流追蹤 + curr_pts, status, err = cv2.calcOpticalFlowPyrLK( + prev_gray, curr_gray, prev_pts, None + ) + + # 過濾有效點 + idx = np.where(status==1)[0] + prev_pts = prev_pts[idx] + curr_pts = curr_pts[idx] + + # 計算變換矩陣 + m, _ = cv2.estimateAffinePartial2D(prev_pts, curr_pts) + + if m is not None: + dx = m[0,2] + dy = m[1,2] + da = np.arctan2(m[1,0], m[0,0]) + else: + dx = dy = da = 0 + + transforms[i] = [dx, dy, da] + + prev_gray = curr_gray + + # 計算平滑軌跡 + trajectory = np.cumsum(transforms, axis=0) + + # 應用移動平均平滑 + smoothed_trajectory = smooth(trajectory) + + # 計算穩定化變換 + difference = smoothed_trajectory - trajectory + transforms_smooth = transforms + difference + + # 應用穩定化 + cap.set(cv2.CAP_PROP_POS_FRAMES, 0) + + fourcc = cv2.VideoWriter_fourcc(*'mp4v') + out = cv2.VideoWriter(output_path, fourcc, fps, (w, h)) + + for i in range(n_frames-1): + ret, frame = cap.read() + if not ret: + break + + dx, dy, da = transforms_smooth[i] + + m = np.array([[np.cos(da), -np.sin(da), dx], + [np.sin(da), np.cos(da), dy]]) + + frame_stabilized = cv2.warpAffine(frame, m, (w, h)) + out.write(frame_stabilized) + + cap.release() + out.release() + +def smooth(trajectory, smoothing_radius=50): + """移動平均平滑""" + smoothed_trajectory = np.copy(trajectory) + for i in range(3): + for j in range(smoothing_radius, len(trajectory) - smoothing_radius): + smoothed_trajectory[j, i] = np.mean( + trajectory[j - smoothing_radius:j + smoothing_radius + 1, i] + ) + return smoothed_trajectory + +# 使用 +stabilize_video("shaky_video.mp4", "stabilized_video.mp4") +``` + +### 添加音樂和音效 + +```python +from moviepy.editor import VideoFileClip, AudioFileClip, CompositeAudioClip + +def add_audio_to_video( + video_path, + audio_path, + output_path, + audio_start=0, + audio_volume=1.0 +): + """ + 為視頻添加音頻 + + Args: + video_path: 視頻文件路徑 + audio_path: 音頻文件路徑 + output_path: 輸出路徑 + audio_start: 音頻開始時間(秒) + audio_volume: 音頻音量 (0-1) + """ + # 加載視頻 + video = VideoFileClip(video_path) + + # 加載音頻 + audio = AudioFileClip(audio_path) + + # 調整音頻長度匹配視頻 + if audio.duration > video.duration: + audio = audio.subclip(0, video.duration) + elif audio.duration < video.duration: + # 循環音頻 + n_loops = int(video.duration / audio.duration) + 1 + audio = CompositeAudioClip([audio] * n_loops).subclip(0, video.duration) + + # 調整音量 + audio = audio.volumex(audio_volume) + + # 設置音頻起始時間 + audio = audio.set_start(audio_start) + + # 合成 + final_video = video.set_audio(audio) + + # 導出 + final_video.write_videofile( + output_path, + codec='libx264', + audio_codec='aac', + fps=video.fps + ) + + print(f"Video with audio saved to {output_path}") + +# 使用 +add_audio_to_video( + "generated_video.mp4", + "background_music.mp3", + "final_video.mp4", + audio_volume=0.5 +) +``` + +--- + +## 🚀 實戰案例 + +### 案例1:短視頻自動生成系統 + +```python +# short_video_generator.py +from diffusers import StableVideoDiffusionPipeline +from diffusers.utils import export_to_video +from PIL import Image, ImageDraw, ImageFont +import torch + +class ShortVideoGenerator: + """短視頻自動生成系統""" + + def __init__(self): + self.pipe = StableVideoDiffusionPipeline.from_pretrained( + "stabilityai/stable-video-diffusion-img2vid-xt", + torch_dtype=torch.float16 + ) + self.pipe = self.pipe.to("cuda") + self.pipe.enable_model_cpu_offload() + + def add_text_overlay(self, frame, text, position="bottom"): + """在幀上添加文字""" + draw = ImageDraw.Draw(frame) + + # 嘗試加載字體 + try: + font = ImageFont.truetype("Arial.ttf", 40) + except: + font = ImageFont.load_default() + + # 計算文字位置 + bbox = draw.textbbox((0, 0), text, font=font) + text_width = bbox[2] - bbox[0] + text_height = bbox[3] - bbox[1] + + if position == "bottom": + x = (frame.width - text_width) // 2 + y = frame.height - text_height - 50 + elif position == "top": + x = (frame.width - text_width) // 2 + y = 50 + else: # center + x = (frame.width - text_width) // 2 + y = (frame.height - text_height) // 2 + + # 繪製文字陰影 + draw.text((x+2, y+2), text, font=font, fill=(0, 0, 0)) + # 繪製文字 + draw.text((x, y), text, font=font, fill=(255, 255, 255)) + + return frame + + def generate_short_video( + self, + image_path, + title_text, + output_path="short_video.mp4", + duration=3, + fps=15 + ): + """ + 生成帶標題的短視頻 + + Args: + image_path: 輸入圖片路徑 + title_text: 標題文字 + output_path: 輸出路徑 + duration: 視頻時長(秒) + fps: 幀率 + """ + # 加載圖片 + image = Image.open(image_path).convert("RGB") + image = image.resize((1024, 576)) + + # 生成視頻幀 + num_frames = min(25, duration * fps) # SVD限制 + + frames = self.pipe( + image=image, + num_frames=num_frames, + motion_bucket_id=100, + fps=fps + ).frames[0] + + # 添加文字覆蓋 + frames_with_text = [] + for frame in frames: + frame_pil = Image.fromarray(frame) + frame_with_text = self.add_text_overlay( + frame_pil, + title_text, + position="bottom" + ) + frames_with_text.append(frame_with_text) + + # 導出 + export_to_video(frames_with_text, output_path, fps=fps) + print(f"Short video saved to {output_path}") + + return frames_with_text + +# 使用示例 +generator = ShortVideoGenerator() + +generator.generate_short_video( + image_path="product.jpg", + title_text="新品上市!限時優惠", + output_path="product_promo.mp4", + duration=3, + fps=15 +) +``` + +### 案例2:故事視頻生成器 + +```python +# story_video_generator.py +from diffusers import AnimateDiffPipeline, MotionAdapter, DDIMScheduler +from diffusers.utils import export_to_gif, export_to_video +from moviepy.editor import VideoFileClip, concatenate_videoclips +import torch + +class StoryVideoGenerator: + """基於故事腳本的視頻生成器""" + + def __init__(self): + adapter = MotionAdapter.from_pretrained( + "guoyww/animatediff-motion-adapter-v1-5-2", + torch_dtype=torch.float16 + ) + + self.pipe = AnimateDiffPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + motion_adapter=adapter, + torch_dtype=torch.float16 + ) + self.pipe.scheduler = DDIMScheduler.from_config( + self.pipe.scheduler.config + ) + self.pipe = self.pipe.to("cuda") + + def generate_scene(self, prompt, num_frames=16): + """生成單個場景""" + frames = self.pipe( + prompt=prompt, + num_frames=num_frames, + num_inference_steps=25, + guidance_scale=7.5 + ).frames[0] + + return frames + + def generate_story_video( + self, + story_script, + output_path="story_video.mp4", + fps=8 + ): + """ + 根據故事腳本生成視頻 + + Args: + story_script: 故事腳本列表 + [{"prompt": "...", "duration": 2}, ...] + output_path: 輸出路徑 + fps: 幀率 + """ + temp_clips = [] + + for i, scene in enumerate(story_script): + print(f"Generating scene {i+1}/{len(story_script)}...") + + prompt = scene["prompt"] + duration = scene.get("duration", 2) + num_frames = int(duration * fps) + + # 生成場景 + frames = self.generate_scene(prompt, num_frames=min(16, num_frames)) + + # 保存為臨時視頻 + temp_path = f"temp_scene_{i}.mp4" + export_to_video(frames, temp_path, fps=fps) + temp_clips.append(temp_path) + + # 合併所有場景 + clips = [VideoFileClip(path) for path in temp_clips] + final_video = concatenate_videoclips(clips) + final_video.write_videofile(output_path, fps=fps) + + # 清理臨時文件 + import os + for path in temp_clips: + os.remove(path) + + print(f"Story video saved to {output_path}") + +# 使用示例 +generator = StoryVideoGenerator() + +# 定義故事腳本 +story = [ + { + "prompt": "a peaceful forest at dawn, birds flying, misty atmosphere", + "duration": 3 + }, + { + "prompt": "a deer walking through the forest, sunlight filtering through trees", + "duration": 3 + }, + { + "prompt": "a crystal clear stream flowing over rocks, surrounded by green moss", + "duration": 3 + }, + { + "prompt": "close-up of a butterfly landing on a flower, beautiful colors", + "duration": 2 + } +] + +generator.generate_story_video( + story_script=story, + output_path="nature_story.mp4", + fps=8 +) +``` + +--- + +## 📚 參考資源 + +### 官方文檔 +- [Stable Video Diffusion](https://stability.ai/news/stable-video-diffusion-open-ai-video-model) +- [AnimateDiff GitHub](https://github.com/guoyww/AnimateDiff) +- [Hugging Face Diffusers](https://huggingface.co/docs/diffusers/api/pipelines/stable_video_diffusion) + +### 模型資源 +- [Hugging Face Video Models](https://huggingface.co/models?pipeline_tag=text-to-video) +- [CivitAI Motion Modules](https://civitai.com/models?tag=motion%20module) + +### 學習資源 +- [Two Minute Papers - Video Generation](https://www.youtube.com/@TwoMinutePapers) +- [Stable Diffusion Art - Video Guide](https://stable-diffusion-art.com/video/) + +--- + +## ✅ 檢查清單 + +完成本章節後,你應該能夠: + +- [ ] 理解視頻生成的核心挑戰 +- [ ] 使用Stable Video Diffusion生成流暢視頻 +- [ ] 調節運動強度和視頻參數 +- [ ] 使用AnimateDiff創建動畫 +- [ ] 組合風格和運動LoRA +- [ ] 實現文本到視頻生成 +- [ ] 進行視頻插幀和穩定化 +- [ ] 添加音頻和文字覆蓋 +- [ ] 構建完整的視頻生成應用 + +--- + +## 下一步 + +完成影片生成後,建議繼續學習: + +1. **音樂生成** - 為你的視頻添加自動生成的背景音樂 +2. **實戰項目** - 構建端到端的多模態內容生成系統 + +--- + +最後更新:2024-11-19 +難度級別:🔴 高級 +預計學習時間:12-15小時 diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/2.\345\275\261\347\211\207\347\224\237\346\210\220/\347\257\204\344\276\213\344\273\243\347\242\274/01_stable_video_diffusion.py" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/2.\345\275\261\347\211\207\347\224\237\346\210\220/\347\257\204\344\276\213\344\273\243\347\242\274/01_stable_video_diffusion.py" new file mode 100644 index 0000000..5ac4fea --- /dev/null +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/2.\345\275\261\347\211\207\347\224\237\346\210\220/\347\257\204\344\276\213\344\273\243\347\242\274/01_stable_video_diffusion.py" @@ -0,0 +1,262 @@ +""" +Stable Video Diffusion 影片生成 +從單張圖片生成短視頻 +""" + +import torch +from diffusers import StableVideoDiffusionPipeline +from diffusers.utils import load_image, export_to_video +from PIL import Image +import os + + +class VideoGenerator: + """影片生成器""" + + def __init__(self, model_id: str = "stabilityai/stable-video-diffusion-img2vid-xt"): + """ + 初始化影片生成器 + + Args: + model_id: 模型 ID + - stabilityai/stable-video-diffusion-img2vid: 14 幀版本 + - stabilityai/stable-video-diffusion-img2vid-xt: 25 幀版本(推薦) + """ + self.device = "cuda" if torch.cuda.is_available() else "cpu" + print(f"使用設備: {self.device}") + + if self.device == "cpu": + print("警告: CPU 生成影片會非常慢,強烈建議使用 GPU") + + # 載入模型 + print(f"正在載入模型: {model_id}") + self.pipe = StableVideoDiffusionPipeline.from_pretrained( + model_id, + torch_dtype=torch.float16 if self.device == "cuda" else torch.float32, + variant="fp16" if self.device == "cuda" else None + ) + self.pipe = self.pipe.to(self.device) + + # 記憶體優化 + if self.device == "cuda": + self.pipe.enable_model_cpu_offload() + # self.pipe.enable_attention_slicing() + + def generate_video( + self, + image: Image.Image, + num_frames: int = 25, + fps: int = 7, + motion_bucket_id: int = 127, + noise_aug_strength: float = 0.02, + decode_chunk_size: int = 8, + seed: int = 42, + output_path: str = "output.mp4" + ) -> str: + """ + 從圖片生成影片 + + Args: + image: 輸入圖片 + num_frames: 生成幀數 (14 或 25) + fps: 影片幀率 + motion_bucket_id: 運動強度 (1-255,數值越大運動越明顯) + noise_aug_strength: 噪聲增強強度 (0-1) + decode_chunk_size: 解碼批次大小(影響記憶體使用) + seed: 隨機種子 + output_path: 輸出路徑 + + Returns: + 輸出路徑 + """ + # 確保圖片尺寸是 8 的倍數 + width, height = image.size + width = (width // 8) * 8 + height = (height // 8) * 8 + image = image.resize((width, height)) + + print(f"圖片尺寸: {width}x{height}") + print(f"生成幀數: {num_frames}") + print(f"運動強度: {motion_bucket_id}") + + # 設定種子 + generator = torch.Generator(device=self.device).manual_seed(seed) + + # 生成影片幀 + print("正在生成影片...") + frames = self.pipe( + image=image, + num_frames=num_frames, + decode_chunk_size=decode_chunk_size, + motion_bucket_id=motion_bucket_id, + noise_aug_strength=noise_aug_strength, + generator=generator + ).frames[0] + + # 匯出影片 + export_to_video(frames, output_path, fps=fps) + print(f"影片已儲存至: {output_path}") + + return output_path + + def batch_generate( + self, + images: list, + output_dir: str = "videos", + **kwargs + ): + """ + 批量生成影片 + + Args: + images: 圖片列表(PIL Image 或路徑) + output_dir: 輸出目錄 + **kwargs: 傳遞給 generate_video 的參數 + """ + os.makedirs(output_dir, exist_ok=True) + + for i, img in enumerate(images): + if isinstance(img, str): + img = Image.open(img) + + output_path = os.path.join(output_dir, f"video_{i+1:03d}.mp4") + self.generate_video( + image=img, + output_path=output_path, + **kwargs + ) + + +def example_basic_generation(): + """示例 1: 基本影片生成""" + print("=== 示例 1: 基本影片生成 ===") + + # 創建或載入圖片 + # 方法 1: 從本地載入 + # image = Image.open("input_image.jpg") + + # 方法 2: 從 URL 載入 + image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png" + image = load_image(image_url) + + # 初始化生成器 + generator = VideoGenerator() + + # 生成影片 + generator.generate_video( + image=image, + num_frames=25, + fps=7, + motion_bucket_id=127, + output_path="basic_video.mp4" + ) + + +def example_motion_control(): + """示例 2: 控制運動強度""" + print("\n=== 示例 2: 不同運動強度 ===") + + # 載入圖片 + image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png" + image = load_image(image_url) + + generator = VideoGenerator() + + # 測試不同的運動強度 + motion_strengths = [50, 100, 150, 200] + + for motion in motion_strengths: + print(f"\n生成運動強度 {motion} 的影片...") + generator.generate_video( + image=image, + num_frames=25, + fps=7, + motion_bucket_id=motion, + output_path=f"motion_{motion}.mp4" + ) + + +def example_long_video(): + """示例 3: 生成較長的影片(通過連接多個片段)""" + print("\n=== 示例 3: 生成長影片 ===") + + from moviepy.editor import VideoFileClip, concatenate_videoclips + + # 載入圖片 + image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png" + image = load_image(image_url) + + generator = VideoGenerator() + + # 生成多個片段 + num_segments = 3 + segment_paths = [] + + for i in range(num_segments): + print(f"\n生成片段 {i+1}/{num_segments}...") + output_path = f"segment_{i+1}.mp4" + generator.generate_video( + image=image, + num_frames=25, + fps=7, + seed=42 + i, # 每個片段使用不同種子 + output_path=output_path + ) + segment_paths.append(output_path) + + # 連接片段 + print("\n正在連接影片片段...") + clips = [VideoFileClip(path) for path in segment_paths] + final_clip = concatenate_videoclips(clips) + final_clip.write_videofile("long_video.mp4", fps=7) + + # 清理臨時文件 + for path in segment_paths: + os.remove(path) + + print("長影片已生成: long_video.mp4") + + +def example_image_sequence(): + """示例 4: 從圖片序列生成影片序列""" + print("\n=== 示例 4: 圖片序列處理 ===") + + # 假設有多張圖片 + image_urls = [ + "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png", + # 添加更多圖片 URL + ] + + generator = VideoGenerator() + + # 批量處理 + images = [load_image(url) for url in image_urls] + generator.batch_generate( + images=images, + output_dir="sequence_videos", + num_frames=25, + fps=7, + motion_bucket_id=127 + ) + + +if __name__ == "__main__": + print("Stable Video Diffusion 影片生成示例") + print("=" * 60) + print("\n注意: 影片生成需要大量 GPU 記憶體 (建議 12GB+)") + print(" 如果記憶體不足,可以減少 num_frames 或使用 CPU offload\n") + + # 運行基本示例 + example_basic_generation() + + # 運行其他示例(取消註釋以執行) + # example_motion_control() + # example_long_video() # 需要安裝 moviepy + # example_image_sequence() + + print("\n所有示例完成!") + print("\n調參建議:") + print("1. motion_bucket_id: 控制運動幅度 (50-200)") + print("2. noise_aug_strength: 控制變化程度 (0.0-0.1)") + print("3. num_frames: 影響影片長度和記憶體使用") + print("4. fps: 影響播放速度 (建議 6-12)") diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/3.\351\237\263\346\250\202\347\224\237\346\210\220/README.md" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/3.\351\237\263\346\250\202\347\224\237\346\210\220/README.md" new file mode 100644 index 0000000..bfd73e3 --- /dev/null +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/3.\351\237\263\346\250\202\347\224\237\346\210\220/README.md" @@ -0,0 +1,1376 @@ +# 音樂生成 (Music & Audio Generation) + +本章節將深入探討AI音樂和音頻生成技術,從基礎的音樂生成到進階的語音合成和音效設計。 + +## 📋 目錄 + +1. [音頻生成基礎](#音頻生成基礎) +2. [MusicGen - 音樂生成](#musicgen---音樂生成) +3. [AudioLDM - 音效生成](#audioldm---音效生成) +4. [Bark - 語音合成](#bark---語音合成) +5. [音頻處理與編輯](#音頻處理與編輯) +6. [實戰案例](#實戰案例) + +--- + +## 🎯 學習目標 + +完成本章節後,你將能夠: + +- ✅ 理解音頻生成的基本原理 +- ✅ 使用MusicGen生成各種風格的音樂 +- ✅ 運用AudioLDM創建音效 +- ✅ 使用Bark進行語音合成 +- ✅ 處理和編輯生成的音頻 +- ✅ 構建音頻生成應用 + +--- + +## 📚 音頻生成基礎 + +### 音頻生成的核心概念 + +``` +音頻信號基礎 +├── 採樣率 (Sample Rate) +│ ├── 8kHz - 語音 +│ ├── 16kHz - 語音高質量 +│ ├── 44.1kHz - CD音質 +│ └── 48kHz - 專業音頻 +│ +├── 位深度 (Bit Depth) +│ ├── 16-bit - 標準 +│ └── 24-bit - 高保真 +│ +└── 聲道 (Channels) + ├── Mono - 單聲道 + └── Stereo - 立體聲 +``` + +### 音頻生成技術分類 + +1. **音樂生成** + - 旋律生成 + - 伴奏生成 + - 完整編曲 + +2. **音效生成** + - 環境音 + - 特效音 + - 擬音效果 + +3. **語音合成** + - 文本轉語音 (TTS) + - 語音克隆 + - 情感語音 + +--- + +## 🎵 MusicGen - 音樂生成 + +### 什麼是MusicGen? + +MusicGen是Meta開發的音樂生成模型,可以根據文本描述生成高質量的音樂片段。 + +### 核心特點 + +- ✅ **高音質輸出** - 支持高達48kHz採樣率 +- ✅ **多樣性** - 支持各種音樂風格 +- ✅ **可控性** - 可控制旋律、節奏、風格 +- ✅ **條件生成** - 支持音頻條件引導 + +### 安裝 + +```bash +pip install transformers torch torchaudio scipy +pip install audiocraft # Meta的音頻生成庫 +``` + +### 基本使用 + +```python +from audiocraft.models import MusicGen +from audiocraft.data.audio import audio_write +import torch + +# 加載模型 +# 可選: small (300M), medium (1.5B), large (3.3B), melody (1.5B) +model = MusicGen.get_pretrained('facebook/musicgen-medium') + +# 設置生成參數 +model.set_generation_params( + duration=10, # 生成時長(秒) + temperature=1.0, # 隨機性 (0.1-2.0) + top_k=250, # 採樣策略 + top_p=0.95, + cfg_coef=3.0 # 提示詞遵循程度 +) + +# 生成音樂 +descriptions = [ + "upbeat electronic dance music with a strong beat", + "relaxing piano melody with ambient background", + "rock music with electric guitar solo", +] + +# 批量生成 +wav = model.generate(descriptions) + +# 保存音頻 +for idx, one_wav in enumerate(wav): + audio_write( + f'generated_music_{idx}', + one_wav.cpu(), + model.sample_rate, + strategy="loudness", # 音量標準化 + loudness_compressor=True + ) + print(f"Saved: generated_music_{idx}.wav") +``` + +### 旋律條件生成 + +```python +from audiocraft.models import MusicGen +from audiocraft.data.audio import audio_write +import torchaudio + +# 加載melody模型(支持旋律條件) +model = MusicGen.get_pretrained('facebook/musicgen-melody') +model.set_generation_params(duration=15) + +# 加載參考旋律 +melody, sr = torchaudio.load("reference_melody.wav") + +# 重採樣到模型需要的採樣率 +if sr != model.sample_rate: + melody = torchaudio.functional.resample(melody, sr, model.sample_rate) + +# 基於旋律生成音樂 +descriptions = [ + "jazz arrangement with saxophone and piano", + "orchestral version with strings", + "electronic remix with synth bass" +] + +# 生成(使用旋律作為條件) +wav = model.generate_with_chroma( + descriptions=descriptions, + melody_wavs=melody[None].expand(len(descriptions), -1, -1), + melody_sample_rate=model.sample_rate, + progress=True +) + +# 保存 +for idx, one_wav in enumerate(wav): + audio_write(f'melody_based_{idx}', one_wav.cpu(), model.sample_rate) +``` + +### 音樂風格控制 + +```python +def generate_music_by_style( + style, + duration=30, + num_variations=3, + output_prefix="music" +): + """ + 根據風格生成音樂 + + Args: + style: 音樂風格 + duration: 時長(秒) + num_variations: 變體數量 + output_prefix: 輸出文件前綴 + """ + # 風格模板 + style_templates = { + "lo-fi": "lo-fi hip hop beat, chill, relaxing, soft piano, vinyl crackle", + "epic": "epic orchestral music, cinematic, dramatic, powerful strings and brass", + "jazz": "smooth jazz, saxophone, piano, double bass, brush drums", + "edm": "electronic dance music, energetic, synthesizer, heavy bass, 128 bpm", + "ambient": "ambient atmospheric music, ethereal pads, no drums, peaceful", + "rock": "rock music, electric guitar, drums, bass guitar, energetic", + "classical": "classical piano composition, elegant, expressive, romantic era", + "funk": "funky groove, slap bass, rhythmic guitar, horn section, 110 bpm" + } + + base_description = style_templates.get( + style.lower(), + f"{style} music" + ) + + model = MusicGen.get_pretrained('facebook/musicgen-medium') + model.set_generation_params( + duration=duration, + temperature=1.0, + top_k=250, + cfg_coef=3.0 + ) + + # 生成多個變體 + descriptions = [base_description] * num_variations + + wav = model.generate(descriptions, progress=True) + + # 保存 + for idx, one_wav in enumerate(wav): + filename = f"{output_prefix}_{style}_{idx+1}" + audio_write(filename, one_wav.cpu(), model.sample_rate) + print(f"✓ Generated: {filename}.wav") + +# 使用示例 +generate_music_by_style("lo-fi", duration=30, num_variations=3) +generate_music_by_style("epic", duration=20, num_variations=2) +``` + +### 進階:多段音樂拼接 + +```python +from audiocraft.models import MusicGen +import torch +import torchaudio + +def generate_music_segments( + segments_config, + output_path="complete_music.wav", + transition_duration=2 +): + """ + 生成並拼接多個音樂段落 + + Args: + segments_config: 段落配置列表 + [{"description": "...", "duration": 10}, ...] + output_path: 輸出路徑 + transition_duration: 過渡時長(秒) + """ + model = MusicGen.get_pretrained('facebook/musicgen-medium') + + all_segments = [] + + for i, config in enumerate(segments_config): + print(f"Generating segment {i+1}/{len(segments_config)}...") + + model.set_generation_params(duration=config["duration"]) + + description = config["description"] + + # 生成段落 + wav = model.generate([description], progress=True) + segment = wav[0] + + all_segments.append(segment) + + # 拼接音頻(帶淡入淡出過渡) + final_audio = crossfade_segments( + all_segments, + model.sample_rate, + transition_duration + ) + + # 保存 + torchaudio.save( + output_path, + final_audio.cpu(), + model.sample_rate + ) + + print(f"Complete music saved to {output_path}") + +def crossfade_segments(segments, sample_rate, transition_duration): + """使用淡入淡出拼接音頻段落""" + transition_samples = int(transition_duration * sample_rate) + + result = segments[0] + + for segment in segments[1:]: + # 創建淡出淡入曲線 + fadeout = torch.linspace(1, 0, transition_samples) + fadein = torch.linspace(0, 1, transition_samples) + + # 應用淡出到前一段的末尾 + result[:, -transition_samples:] *= fadeout + + # 應用淡入到當前段的開頭 + segment[:, :transition_samples] *= fadein + + # 重疊部分相加 + result[:, -transition_samples:] += segment[:, :transition_samples] + + # 拼接剩餘部分 + result = torch.cat([result, segment[:, transition_samples:]], dim=1) + + return result + +# 使用示例:創建多段音樂作品 +segments = [ + { + "description": "gentle piano introduction, slow tempo, melancholic", + "duration": 15 + }, + { + "description": "building up with strings, crescendo, emotional", + "duration": 20 + }, + { + "description": "full orchestral climax, powerful, dramatic", + "duration": 25 + }, + { + "description": "peaceful ending, soft piano and strings, fade out", + "duration": 15 + } +] + +generate_music_segments(segments, "orchestral_piece.wav", transition_duration=3) +``` + +--- + +## 🔊 AudioLDM - 音效生成 + +### 什麼是AudioLDM? + +AudioLDM是基於Latent Diffusion的音頻生成模型,專門用於生成音效和環境音。 + +### 安裝 + +```bash +pip install diffusers transformers scipy +``` + +### 基本使用 + +```python +from diffusers import AudioLDMPipeline +import torch +import scipy + +# 加載模型 +pipe = AudioLDMPipeline.from_pretrained( + "cvssp/audioldm-s-full-v2", + torch_dtype=torch.float16 +) +pipe = pipe.to("cuda") + +# 生成音效 +prompt = "dog barking in the distance, outdoor environment, realistic" + +audio = pipe( + prompt, + num_inference_steps=50, + audio_length_in_s=5.0, # 音頻長度(秒) + guidance_scale=2.5 +).audios[0] + +# 保存音頻 +scipy.io.wavfile.write( + "dog_bark.wav", + rate=16000, + data=audio +) +``` + +### 音效類別生成 + +```python +def generate_sound_effects(sound_type, num_variations=3): + """ + 生成特定類型的音效 + + Args: + sound_type: 音效類型 + num_variations: 變體數量 + """ + # 音效模板 + sound_templates = { + "nature": [ + "gentle rain falling on leaves", + "ocean waves crashing on beach", + "birds chirping in forest", + "wind blowing through trees", + "thunder rumbling in distance" + ], + "urban": [ + "city traffic ambience", + "subway train arriving", + "cafe background chatter", + "construction site sounds", + "police siren passing by" + ], + "home": [ + "door creaking open", + "footsteps on wooden floor", + "clock ticking", + "kettle boiling water", + "cat meowing" + ], + "sci-fi": [ + "spaceship engine humming", + "laser gun shooting", + "alien creature sound", + "futuristic computer beep", + "teleportation effect" + ], + "musical": [ + "drum kit being played", + "acoustic guitar strumming", + "piano chord progression", + "violin melody", + "synthesizer arpeggio" + ] + } + + prompts = sound_templates.get(sound_type, [f"{sound_type} sound"]) + + pipe = AudioLDMPipeline.from_pretrained( + "cvssp/audioldm-s-full-v2", + torch_dtype=torch.float16 + ) + pipe = pipe.to("cuda") + + for idx, prompt in enumerate(prompts[:num_variations]): + print(f"Generating: {prompt}") + + audio = pipe( + prompt, + num_inference_steps=50, + audio_length_in_s=5.0, + guidance_scale=2.5, + num_waveforms_per_prompt=1 + ).audios[0] + + # 保存 + output_file = f"{sound_type}_{idx+1}.wav" + scipy.io.wavfile.write(output_file, rate=16000, data=audio) + print(f"✓ Saved: {output_file}") + +# 生成不同類型的音效 +generate_sound_effects("nature", num_variations=5) +generate_sound_effects("sci-fi", num_variations=3) +``` + +### 長音效生成 + +```python +def generate_long_soundscape( + description, + total_duration=30, + segment_duration=10, + overlap_duration=2 +): + """ + 生成長時間的音景 + + Args: + description: 音效描述 + total_duration: 總時長(秒) + segment_duration: 每段時長(秒) + overlap_duration: 重疊時長(秒) + """ + import numpy as np + + pipe = AudioLDMPipeline.from_pretrained( + "cvssp/audioldm-s-full-v2", + torch_dtype=torch.float16 + ) + pipe = pipe.to("cuda") + + # 計算需要生成的段數 + num_segments = int(np.ceil(total_duration / (segment_duration - overlap_duration))) + + all_audio = [] + sample_rate = 16000 + + for i in range(num_segments): + print(f"Generating segment {i+1}/{num_segments}...") + + audio = pipe( + description, + num_inference_steps=50, + audio_length_in_s=segment_duration, + guidance_scale=2.5 + ).audios[0] + + all_audio.append(audio) + + # 拼接音頻(帶重疊) + overlap_samples = int(overlap_duration * sample_rate) + final_audio = all_audio[0] + + for audio in all_audio[1:]: + # 創建淡入淡出 + fadeout = np.linspace(1, 0, overlap_samples) + fadein = np.linspace(0, 1, overlap_samples) + + # 混合重疊部分 + overlap_mix = ( + final_audio[-overlap_samples:] * fadeout + + audio[:overlap_samples] * fadein + ) + + # 拼接 + final_audio = np.concatenate([ + final_audio[:-overlap_samples], + overlap_mix, + audio[overlap_samples:] + ]) + + # 截取到目標時長 + target_samples = int(total_duration * sample_rate) + final_audio = final_audio[:target_samples] + + # 保存 + scipy.io.wavfile.write( + "long_soundscape.wav", + rate=sample_rate, + data=final_audio + ) + + print(f"Generated {total_duration}s soundscape") + +# 生成30秒的雨聲環境音 +generate_long_soundscape( + description="continuous rainfall, thunder occasionally, ambient outdoor", + total_duration=30, + segment_duration=10, + overlap_duration=3 +) +``` + +--- + +## 🎙️ Bark - 語音合成 + +### 什麼是Bark? + +Bark是Suno開發的多語言文本轉語音模型,支持語音生成、音樂生成和非語言聲音。 + +### 核心特點 + +- ✅ **多語言支持** - 支持多種語言包括中文 +- ✅ **情感表達** - 可以表達不同情緒 +- ✅ **非語言聲音** - 支持笑聲、嘆息等 +- ✅ **音樂生成** - 可以哼唱和唱歌 + +### 安裝 + +```bash +pip install git+https://github.com/suno-ai/bark.git +pip install scipy numpy transformers +``` + +### 基本使用 + +```python +from bark import SAMPLE_RATE, generate_audio, preload_models +from scipy.io.wavfile import write as write_wav +import numpy as np + +# 預加載模型 +preload_models() + +# 基本文本轉語音 +text_prompt = """ + Hello, I am Bark, a text-to-speech model created by Suno. + I can speak in many different voices and languages. +""" + +audio_array = generate_audio(text_prompt) + +# 保存音頻 +write_wav("bark_output.wav", SAMPLE_RATE, audio_array) +print("Audio saved!") +``` + +### 語音預設與控制 + +```python +from bark import generate_audio, SAMPLE_RATE +from scipy.io.wavfile import write as write_wav + +# Bark語音預設格式: [language]_[gender]_[id] +# 例如: en_speaker_0, zh_speaker_0, es_speaker_3 + +def generate_with_voice( + text, + voice_preset="v2/en_speaker_6", + output_file="output.wav" +): + """ + 使用特定語音預設生成音頻 + + Args: + text: 要轉換的文本 + voice_preset: 語音預設 + output_file: 輸出文件 + """ + # 生成音頻 + audio_array = generate_audio( + text, + history_prompt=voice_preset + ) + + # 保存 + write_wav(output_file, SAMPLE_RATE, audio_array) + print(f"Generated with voice {voice_preset}: {output_file}") + +# 測試不同語音 +voices = [ + "v2/en_speaker_0", # 男聲 + "v2/en_speaker_1", # 女聲 + "v2/en_speaker_6", # 年輕男聲 + "v2/en_speaker_9", # 成熟女聲 +] + +text = "The quick brown fox jumps over the lazy dog." + +for idx, voice in enumerate(voices): + generate_with_voice( + text, + voice_preset=voice, + output_file=f"voice_{idx}.wav" + ) +``` + +### 情感與非語言聲音 + +```python +from bark import generate_audio, SAMPLE_RATE +from scipy.io.wavfile import write as write_wav + +# Bark支持特殊標記來控制情感和非語言聲音 +# [laughter] - 笑聲 +# [laughs] - 笑 +# [sighs] - 嘆息 +# [music] - 音樂 +# [gasps] - 喘息 +# [clears throat] - 清嗓子 +# CAPITALIZATION - 強調 + +def generate_emotional_speech(output_file="emotional.wav"): + """生成帶情感的語音""" + + text = """ + [clears throat] Ladies and gentlemen, [laughs] + I have some AMAZING news to share with you today! + [gasps] You won't believe what just happened! + [sighs] But first, let me tell you a story... + """ + + audio = generate_audio(text) + write_wav(output_file, SAMPLE_RATE, audio) + print(f"Emotional speech saved to {output_file}") + +# 生成音樂/歌唱 +def generate_singing(output_file="singing.wav"): + """生成歌唱""" + + text = """ + ♪ La la la la la ♪ + ♪ Do re mi fa so la ti do ♪ + [music] [clears throat] That was beautiful! + """ + + audio = generate_audio(text) + write_wav(output_file, SAMPLE_RATE, audio) + print(f"Singing saved to {output_file}") + +generate_emotional_speech() +generate_singing() +``` + +### 多語言語音生成 + +```python +def generate_multilingual_speech(): + """生成多語言語音""" + + languages = { + "english": { + "text": "Hello! How are you today?", + "preset": "v2/en_speaker_6" + }, + "chinese": { + "text": "你好!今天過得怎麼樣?", + "preset": "v2/zh_speaker_0" + }, + "spanish": { + "text": "¡Hola! ¿Cómo estás hoy?", + "preset": "v2/es_speaker_0" + }, + "french": { + "text": "Bonjour! Comment allez-vous aujourd'hui?", + "preset": "v2/fr_speaker_0" + }, + "german": { + "text": "Hallo! Wie geht es dir heute?", + "preset": "v2/de_speaker_0" + } + } + + for lang, config in languages.items(): + print(f"Generating {lang}...") + + audio = generate_audio( + config["text"], + history_prompt=config["preset"] + ) + + output_file = f"speech_{lang}.wav" + write_wav(output_file, SAMPLE_RATE, audio) + print(f"✓ Saved: {output_file}") + +generate_multilingual_speech() +``` + +### 長文本語音生成 + +```python +def generate_long_form_speech( + text, + voice_preset="v2/en_speaker_6", + output_file="long_speech.wav", + segment_length=200 # 字符數 +): + """ + 生成長文本語音(分段處理) + + Args: + text: 長文本 + voice_preset: 語音預設 + output_file: 輸出文件 + segment_length: 每段字符數 + """ + import numpy as np + + # 分割文本為段落 + sentences = text.split('. ') + segments = [] + current_segment = "" + + for sentence in sentences: + if len(current_segment) + len(sentence) < segment_length: + current_segment += sentence + ". " + else: + if current_segment: + segments.append(current_segment.strip()) + current_segment = sentence + ". " + + if current_segment: + segments.append(current_segment.strip()) + + # 生成每個段落 + audio_segments = [] + + for i, segment in enumerate(segments): + print(f"Generating segment {i+1}/{len(segments)}...") + + audio = generate_audio( + segment, + history_prompt=voice_preset + ) + + audio_segments.append(audio) + + # 拼接所有段落 + # 添加短暫的靜音間隔 + silence = np.zeros(int(0.5 * SAMPLE_RATE)) # 0.5秒靜音 + + final_audio = audio_segments[0] + for audio in audio_segments[1:]: + final_audio = np.concatenate([final_audio, silence, audio]) + + # 保存 + write_wav(output_file, SAMPLE_RATE, final_audio) + print(f"Long-form speech saved to {output_file}") + +# 使用示例 +long_text = """ +Artificial intelligence is transforming the world as we know it. +From healthcare to finance, from transportation to entertainment, +AI is making a significant impact. Machine learning algorithms +can now recognize patterns, make predictions, and even create +original content. The future of AI is bright and full of possibilities. +As we continue to develop more advanced systems, we must also +consider the ethical implications and ensure that AI benefits +all of humanity. +""" + +generate_long_form_speech( + long_text, + voice_preset="v2/en_speaker_6", + output_file="ai_speech.wav" +) +``` + +--- + +## 🎛️ 音頻處理與編輯 + +### 基礎音頻處理 + +```python +import librosa +import soundfile as sf +import numpy as np + +def adjust_audio_properties( + input_file, + output_file, + target_sr=None, + volume_factor=1.0, + trim_silence=True, + normalize=True +): + """ + 調整音頻屬性 + + Args: + input_file: 輸入文件 + output_file: 輸出文件 + target_sr: 目標採樣率 + volume_factor: 音量倍數 + trim_silence: 是否裁剪靜音 + normalize: 是否標準化音量 + """ + # 加載音頻 + audio, sr = librosa.load(input_file, sr=target_sr) + + # 裁剪靜音 + if trim_silence: + audio, _ = librosa.effects.trim( + audio, + top_db=20, # 靜音閾值 + frame_length=2048, + hop_length=512 + ) + + # 調整音量 + audio = audio * volume_factor + + # 標準化 + if normalize: + audio = librosa.util.normalize(audio) + + # 保存 + sf.write(output_file, audio, sr) + print(f"Processed audio saved to {output_file}") + +# 使用 +adjust_audio_properties( + "input.wav", + "output.wav", + target_sr=44100, + volume_factor=1.2, + trim_silence=True, + normalize=True +) +``` + +### 音頻特效 + +```python +def apply_audio_effects( + input_file, + output_file, + effect="reverb" +): + """ + 應用音頻特效 + + Args: + input_file: 輸入文件 + output_file: 輸出文件 + effect: 特效類型 (reverb/echo/pitch_shift/time_stretch) + """ + audio, sr = librosa.load(input_file) + + if effect == "reverb": + # 簡單混響效果 + delay = int(0.1 * sr) # 100ms延遲 + reverb = np.zeros(len(audio) + delay) + reverb[:len(audio)] = audio + reverb[delay:] += audio * 0.3 # 添加延遲信號 + processed = reverb[:len(audio)] + + elif effect == "echo": + # 回聲效果 + delay = int(0.3 * sr) + echo = np.copy(audio) + if len(audio) > delay: + echo[delay:] += audio[:-delay] * 0.5 + processed = echo + + elif effect == "pitch_shift": + # 音高變換(升高2個半音) + processed = librosa.effects.pitch_shift( + audio, + sr=sr, + n_steps=2 + ) + + elif effect == "time_stretch": + # 時間拉伸(加速1.2倍) + processed = librosa.effects.time_stretch(audio, rate=1.2) + + else: + processed = audio + + # 標準化 + processed = librosa.util.normalize(processed) + + # 保存 + sf.write(output_file, processed, sr) + print(f"Applied {effect} effect: {output_file}") + +# 測試不同效果 +effects = ["reverb", "echo", "pitch_shift", "time_stretch"] +for effect in effects: + apply_audio_effects( + "input.wav", + f"output_{effect}.wav", + effect=effect + ) +``` + +### 音頻混合 + +```python +def mix_audio_tracks( + tracks, + volumes, + output_file="mixed.wav", + target_sr=44100 +): + """ + 混合多個音軌 + + Args: + tracks: 音軌文件列表 + volumes: 各音軌音量 (0-1) + output_file: 輸出文件 + target_sr: 目標採樣率 + """ + # 加載所有音軌 + audio_tracks = [] + max_length = 0 + + for track_file in tracks: + audio, sr = librosa.load(track_file, sr=target_sr) + audio_tracks.append(audio) + max_length = max(max_length, len(audio)) + + # 填充到相同長度 + for i in range(len(audio_tracks)): + if len(audio_tracks[i]) < max_length: + padding = max_length - len(audio_tracks[i]) + audio_tracks[i] = np.pad( + audio_tracks[i], + (0, padding), + mode='constant' + ) + + # 混合 + mixed = np.zeros(max_length) + for audio, volume in zip(audio_tracks, volumes): + mixed += audio * volume + + # 標準化避免削波 + mixed = librosa.util.normalize(mixed) + + # 保存 + sf.write(output_file, mixed, target_sr) + print(f"Mixed audio saved to {output_file}") + +# 使用示例:混合音樂、旁白和音效 +mix_audio_tracks( + tracks=["music.wav", "narration.wav", "sfx.wav"], + volumes=[0.3, 0.7, 0.4], + output_file="final_mix.wav" +) +``` + +--- + +## 🚀 實戰案例 + +### 案例1:播客自動生成器 + +```python +# podcast_generator.py +from bark import generate_audio, SAMPLE_RATE +from scipy.io.wavfile import write as write_wav +from audiocraft.models import MusicGen +from audiocraft.data.audio import audio_write +import numpy as np +import librosa +import soundfile as sf + +class PodcastGenerator: + """播客自動生成器""" + + def __init__(self): + self.music_model = MusicGen.get_pretrained('facebook/musicgen-small') + + def generate_intro_music(self, duration=10): + """生成開場音樂""" + self.music_model.set_generation_params(duration=duration) + + wav = self.music_model.generate([ + "upbeat podcast intro music, energetic, modern, professional" + ]) + + # 保存臨時文件 + audio_write("temp_intro", wav[0].cpu(), self.music_model.sample_rate) + + # 加載並返回 + audio, sr = librosa.load("temp_intro.wav", sr=SAMPLE_RATE) + return audio + + def generate_outro_music(self, duration=10): + """生成結尾音樂""" + self.music_model.set_generation_params(duration=duration) + + wav = self.music_model.generate([ + "podcast outro music, calm, reflective, fade out" + ]) + + audio_write("temp_outro", wav[0].cpu(), self.music_model.sample_rate) + audio, sr = librosa.load("temp_outro.wav", sr=SAMPLE_RATE) + return audio + + def generate_speech_segment(self, text, voice="v2/en_speaker_6"): + """生成語音段落""" + audio = generate_audio(text, history_prompt=voice) + return audio + + def create_podcast( + self, + title, + host_text, + segments, + output_file="podcast.wav" + ): + """ + 創建完整播客 + + Args: + title: 播客標題 + host_text: 主持人開場白 + segments: 內容段落列表 + output_file: 輸出文件 + """ + print("Generating podcast components...") + + # 生成音樂 + intro_music = self.generate_intro_music(duration=8) + outro_music = self.generate_outro_music(duration=8) + + # 生成開場白 + intro_speech = self.generate_speech_segment( + f"Welcome to {title}. {host_text}", + voice="v2/en_speaker_6" + ) + + # 生成內容段落 + segment_audios = [] + for i, segment in enumerate(segments): + print(f"Generating segment {i+1}/{len(segments)}...") + audio = self.generate_speech_segment( + segment["text"], + voice=segment.get("voice", "v2/en_speaker_6") + ) + segment_audios.append(audio) + + # 段落間添加短暫靜音 + silence = np.zeros(int(1.0 * SAMPLE_RATE)) + segment_audios.append(silence) + + # 生成結束語 + outro_speech = self.generate_speech_segment( + "Thank you for listening! Don't forget to subscribe!", + voice="v2/en_speaker_6" + ) + + # 組合所有元素 + # 1. 開場音樂(淡出) + intro_music = self.apply_fade(intro_music, fade_in=True, fade_out=True) + + # 2. 拼接:intro music + intro speech + podcast = np.concatenate([intro_music, intro_speech]) + + # 3. 添加所有內容段落 + for audio in segment_audios: + podcast = np.concatenate([podcast, audio]) + + # 4. 添加結束語和結尾音樂 + outro_music = self.apply_fade(outro_music, fade_in=True, fade_out=True) + podcast = np.concatenate([podcast, outro_speech, outro_music]) + + # 標準化 + podcast = librosa.util.normalize(podcast) + + # 保存 + write_wav(output_file, SAMPLE_RATE, podcast) + print(f"Podcast saved to {output_file}") + + def apply_fade(self, audio, fade_in=True, fade_out=True, duration=2.0): + """應用淡入淡出""" + fade_samples = int(duration * SAMPLE_RATE) + + if fade_in: + fade_in_curve = np.linspace(0, 1, fade_samples) + audio[:fade_samples] *= fade_in_curve + + if fade_out: + fade_out_curve = np.linspace(1, 0, fade_samples) + audio[-fade_samples:] *= fade_out_curve + + return audio + +# 使用示例 +generator = PodcastGenerator() + +segments = [ + { + "text": "Today we're discussing artificial intelligence and its impact on society.", + "voice": "v2/en_speaker_6" + }, + { + "text": "AI has the potential to transform every aspect of our lives, from healthcare to education.", + "voice": "v2/en_speaker_6" + }, + { + "text": "[clears throat] But we must also consider the ethical implications.", + "voice": "v2/en_speaker_9" + } +] + +generator.create_podcast( + title="Tech Talk Podcast", + host_text="I'm your host, and today we have a fascinating discussion.", + segments=segments, + output_file="tech_talk_episode_001.wav" +) +``` + +### 案例2:背景音樂生成器 + +```python +# background_music_generator.py +from audiocraft.models import MusicGen +from audiocraft.data.audio import audio_write +import torch + +class BackgroundMusicGenerator: + """為視頻生成背景音樂""" + + def __init__(self): + self.model = MusicGen.get_pretrained('facebook/musicgen-medium') + + def generate_for_video( + self, + video_duration, + mood="upbeat", + genre="electronic", + output_file="bg_music.wav" + ): + """ + 根據視頻時長和情緒生成背景音樂 + + Args: + video_duration: 視頻時長(秒) + mood: 情緒 (upbeat/calm/dramatic/playful/mysterious) + genre: 風格 (electronic/acoustic/orchestral/ambient) + output_file: 輸出文件 + """ + # 構建提示詞 + mood_descriptors = { + "upbeat": "energetic, positive, motivating", + "calm": "peaceful, relaxing, gentle", + "dramatic": "intense, cinematic, powerful", + "playful": "fun, cheerful, lighthearted", + "mysterious": "suspenseful, dark, intriguing" + } + + genre_descriptors = { + "electronic": "synthesizer, modern, digital", + "acoustic": "guitar, piano, organic", + "orchestral": "strings, brass, classical", + "ambient": "atmospheric, ethereal, spacious" + } + + prompt = f""" + {mood_descriptors.get(mood, mood)} {genre} background music, + {genre_descriptors.get(genre, "")}, + seamless loop, no vocals, suitable for video content + """ + + # 設置生成參數 + self.model.set_generation_params( + duration=min(30, video_duration), # 最多30秒 + temperature=1.0, + cfg_coef=3.0 + ) + + # 生成 + wav = self.model.generate([prompt], progress=True) + + # 如果視頻更長,需要循環音樂 + if video_duration > 30: + # 保存基礎循環 + audio_write("temp_loop", wav[0].cpu(), self.model.sample_rate) + + # 循環拼接 + import librosa + import soundfile as sf + + loop_audio, sr = librosa.load("temp_loop.wav") + + # 計算需要多少次循環 + num_loops = int(np.ceil(video_duration / 30)) + + # 拼接 + extended_audio = np.tile(loop_audio, num_loops) + + # 截取到準確時長 + target_samples = int(video_duration * sr) + extended_audio = extended_audio[:target_samples] + + # 應用淡出 + fade_duration = 3 # 3秒淡出 + fade_samples = int(fade_duration * sr) + fade_curve = np.linspace(1, 0, fade_samples) + extended_audio[-fade_samples:] *= fade_curve + + # 保存 + sf.write(output_file, extended_audio, sr) + else: + audio_write(output_file.replace('.wav', ''), wav[0].cpu(), self.model.sample_rate) + + print(f"Background music saved to {output_file}") + + def generate_playlist( + self, + num_tracks=5, + duration=30, + theme="work_focus" + ): + """ + 生成音樂播放列表 + + Args: + num_tracks: 曲目數量 + duration: 每首時長 + theme: 主題 + """ + themes = { + "work_focus": [ + "calm electronic music for concentration", + "minimal ambient background music", + "lo-fi beats for studying", + "peaceful piano for productivity", + "soft jazz for focus" + ], + "workout": [ + "high energy electronic music, 130 bpm", + "motivating rock music, intense", + "upbeat hip hop beats", + "powerful drum and bass", + "energetic EDM workout music" + ], + "relaxation": [ + "peaceful ambient music, nature sounds", + "calm meditation music", + "soothing spa music", + "gentle acoustic guitar", + "soft piano lullaby" + ] + } + + prompts = themes.get(theme, themes["work_focus"]) + + self.model.set_generation_params(duration=duration) + + for idx, prompt in enumerate(prompts[:num_tracks]): + print(f"Generating track {idx+1}/{num_tracks}...") + + wav = self.model.generate([prompt], progress=True) + + filename = f"{theme}_track_{idx+1:02d}" + audio_write(filename, wav[0].cpu(), self.model.sample_rate) + + print(f"✓ Saved: {filename}.wav") + +# 使用示例 +generator = BackgroundMusicGenerator() + +# 為視頻生成背景音樂 +generator.generate_for_video( + video_duration=120, # 2分鐘 + mood="upbeat", + genre="electronic", + output_file="video_bg_music.wav" +) + +# 生成工作專注播放列表 +generator.generate_playlist( + num_tracks=5, + duration=180, # 3分鐘每首 + theme="work_focus" +) +``` + +--- + +## 📚 參考資源 + +### 官方文檔 +- [MusicGen GitHub](https://github.com/facebookresearch/audiocraft) +- [AudioLDM Paper](https://arxiv.org/abs/2301.12503) +- [Bark GitHub](https://github.com/suno-ai/bark) +- [Librosa Documentation](https://librosa.org/doc/latest/index.html) + +### 模型資源 +- [Hugging Face Audio Models](https://huggingface.co/models?pipeline_tag=text-to-audio) +- [AudioCraft Models](https://huggingface.co/facebook) + +### 學習資源 +- [Digital Signal Processing Course](https://www.coursera.org/learn/dsp) +- [Music Information Retrieval](https://www.audiolabs-erlangen.de/resources/MIR) + +--- + +## ✅ 檢查清單 + +完成本章節後,你應該能夠: + +- [ ] 理解音頻生成的基本概念 +- [ ] 使用MusicGen生成不同風格的音樂 +- [ ] 控制音樂的情緒和風格 +- [ ] 使用AudioLDM生成各種音效 +- [ ] 創建長時間的音景 +- [ ] 使用Bark進行文本轉語音 +- [ ] 生成多語言和情感豐富的語音 +- [ ] 處理和編輯音頻文件 +- [ ] 混合多個音軌 +- [ ] 構建實用的音頻生成應用 + +--- + +## 下一步 + +完成音樂生成後,建議: + +1. **實戰項目** - 構建完整的多模態內容生成系統 +2. **整合應用** - 將圖片、視頻、音頻生成整合到實際應用中 +3. **探索更多** - 研究最新的音頻生成技術和模型 + +--- + +最後更新:2024-11-19 +難度級別:🔴 高級 +預計學習時間:10-12小時 diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/3.\351\237\263\346\250\202\347\224\237\346\210\220/\347\257\204\344\276\213\344\273\243\347\242\274/01_musicgen_basic.py" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/3.\351\237\263\346\250\202\347\224\237\346\210\220/\347\257\204\344\276\213\344\273\243\347\242\274/01_musicgen_basic.py" new file mode 100644 index 0000000..a0e71b6 --- /dev/null +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/3.\351\237\263\346\250\202\347\224\237\346\210\220/\347\257\204\344\276\213\344\273\243\347\242\274/01_musicgen_basic.py" @@ -0,0 +1,334 @@ +""" +MusicGen 音樂生成 +使用 Meta 的 MusicGen 模型生成音樂 +""" + +import torch +import torchaudio +from transformers import AutoProcessor, MusicgenForConditionalGeneration +import scipy +import numpy as np +from typing import List, Optional + + +class MusicGenerator: + """音樂生成器""" + + def __init__(self, model_size: str = "small"): + """ + 初始化音樂生成器 + + Args: + model_size: 模型大小 + - small: 最快,品質較低 (300M) + - medium: 平衡 (1.5B) + - large: 最佳品質,最慢 (3.3B) + - melody: 支持旋律條件 (1.5B) + """ + self.device = "cuda" if torch.cuda.is_available() else "cpu" + print(f"使用設備: {self.device}") + + # 模型映射 + model_map = { + "small": "facebook/musicgen-small", + "medium": "facebook/musicgen-medium", + "large": "facebook/musicgen-large", + "melody": "facebook/musicgen-melody" + } + + if model_size not in model_map: + raise ValueError(f"不支持的模型大小: {model_size}") + + model_id = model_map[model_size] + + # 載入模型和處理器 + print(f"正在載入模型: {model_id}") + self.processor = AutoProcessor.from_pretrained(model_id) + self.model = MusicgenForConditionalGeneration.from_pretrained(model_id) + self.model = self.model.to(self.device) + + self.model_size = model_size + self.sampling_rate = self.model.config.audio_encoder.sampling_rate + + def generate( + self, + prompt: str, + duration: float = 10.0, + temperature: float = 1.0, + top_k: int = 250, + top_p: float = 0.0, + guidance_scale: float = 3.0, + do_sample: bool = True, + seed: Optional[int] = None + ) -> np.ndarray: + """ + 生成音樂 + + Args: + prompt: 音樂描述文字 + duration: 音樂長度(秒) + temperature: 溫度參數(越高越有創意,但可能不連貫) + top_k: Top-K 採樣 + top_p: Top-P 採樣 + guidance_scale: 引導強度(對提示詞的遵循程度) + do_sample: 是否使用採樣(False 則使用貪婪解碼) + seed: 隨機種子 + + Returns: + 音訊陣列 (sampling_rate, audio_values) + """ + print(f"\n生成音樂:") + print(f" 提示詞: {prompt}") + print(f" 時長: {duration}秒") + + # 處理輸入 + inputs = self.processor( + text=[prompt], + padding=True, + return_tensors="pt" + ).to(self.device) + + # 計算最大長度(tokens) + max_new_tokens = int(duration * self.model.config.audio_encoder.frame_rate) + + # 設定種子 + if seed is not None: + torch.manual_seed(seed) + + # 生成音樂 + with torch.no_grad(): + audio_values = self.model.generate( + **inputs, + max_new_tokens=max_new_tokens, + do_sample=do_sample, + temperature=temperature, + top_k=top_k, + top_p=top_p, + guidance_scale=guidance_scale + ) + + # 轉換為 numpy 陣列 + audio_array = audio_values[0, 0].cpu().numpy() + + return audio_array + + def save_audio( + self, + audio_array: np.ndarray, + output_path: str, + format: str = "wav" + ): + """ + 儲存音訊文件 + + Args: + audio_array: 音訊陣列 + output_path: 輸出路徑 + format: 音訊格式 (wav, mp3) + """ + if format == "wav": + scipy.io.wavfile.write( + output_path, + rate=self.sampling_rate, + data=audio_array + ) + elif format == "mp3": + # 需要安裝 pydub 和 ffmpeg + from pydub import AudioSegment + temp_wav = "temp_audio.wav" + scipy.io.wavfile.write(temp_wav, rate=self.sampling_rate, data=audio_array) + audio = AudioSegment.from_wav(temp_wav) + audio.export(output_path, format="mp3") + import os + os.remove(temp_wav) + else: + raise ValueError(f"不支持的格式: {format}") + + print(f"音訊已儲存至: {output_path}") + + def generate_and_save( + self, + prompt: str, + output_path: str, + **kwargs + ): + """生成並儲存音樂(便捷方法)""" + audio = self.generate(prompt, **kwargs) + self.save_audio(audio, output_path) + return audio + + +def example_basic_generation(): + """示例 1: 基本音樂生成""" + print("=== 示例 1: 基本音樂生成 ===") + + generator = MusicGenerator(model_size="small") + + # 生成不同風格的音樂 + prompts = [ + "upbeat pop music with electric guitar", + "calm piano melody for relaxation", + "energetic electronic dance music", + "acoustic folk song with guitar" + ] + + for i, prompt in enumerate(prompts): + output_path = f"music_{i+1}.wav" + generator.generate_and_save( + prompt=prompt, + output_path=output_path, + duration=10.0, + seed=42 + ) + + +def example_different_parameters(): + """示例 2: 不同參數效果""" + print("\n=== 示例 2: 參數調整 ===") + + generator = MusicGenerator(model_size="small") + prompt = "jazz music with saxophone" + + # 測試不同溫度 + temperatures = [0.8, 1.0, 1.2] + for temp in temperatures: + print(f"\n溫度 = {temp}") + generator.generate_and_save( + prompt=prompt, + output_path=f"jazz_temp_{temp}.wav", + duration=8.0, + temperature=temp, + seed=42 + ) + + # 測試不同引導強度 + guidance_scales = [2.0, 3.0, 5.0] + for scale in guidance_scales: + print(f"\n引導強度 = {scale}") + generator.generate_and_save( + prompt=prompt, + output_path=f"jazz_guidance_{scale}.wav", + duration=8.0, + guidance_scale=scale, + seed=42 + ) + + +def example_music_styles(): + """示例 3: 各種音樂風格""" + print("\n=== 示例 3: 音樂風格庫 ===") + + generator = MusicGenerator(model_size="small") + + # 詳細的風格描述模板 + styles = { + "古典": "classical orchestral music with strings and piano, elegant and sophisticated", + "搖滾": "energetic rock music with electric guitar, bass and drums, powerful and intense", + "爵士": "smooth jazz music with saxophone and piano, relaxed and sophisticated", + "電子": "electronic dance music with synthesizers, upbeat and energetic", + "環境音樂": "ambient atmospheric music, calm and meditative", + "流行": "catchy pop music with melody, upbeat and cheerful", + "嘻哈": "hip hop beat with bass and drums, rhythmic and groovy", + "鄉村": "country music with acoustic guitar, warm and storytelling", + "雷鬼": "reggae music with offbeat rhythm, relaxed and tropical", + "金屬": "heavy metal music with distorted guitars, aggressive and powerful" + } + + for style_name, prompt in styles.items(): + print(f"\n生成 {style_name} 風格...") + generator.generate_and_save( + prompt=prompt, + output_path=f"style_{style_name}.wav", + duration=10.0, + seed=42 + ) + + +def example_loop_generation(): + """示例 4: 生成循環音樂""" + print("\n=== 示例 4: 循環音樂生成 ===") + + generator = MusicGenerator(model_size="small") + + # 生成適合循環的音樂 + loop_prompts = [ + "short looping drum beat, 4 bar loop", + "looping bass line, repetitive and groovy", + "ambient pad loop, atmospheric and continuous", + "melodic synth loop, catchy and repetitive" + ] + + for i, prompt in enumerate(loop_prompts): + generator.generate_and_save( + prompt=prompt, + output_path=f"loop_{i+1}.wav", + duration=4.0, # 短循環 + temperature=0.9, + seed=42 + ) + + +def example_batch_generation(): + """示例 5: 批量生成""" + print("\n=== 示例 5: 批量生成音樂庫 ===") + + generator = MusicGenerator(model_size="small") + + # 為遊戲或影片創建音樂庫 + music_library = { + "background": [ + "calm background music for cafe", + "peaceful background music for study", + "uplifting background music for video" + ], + "action": [ + "intense action music with drums", + "fast-paced chase music", + "epic battle music with orchestra" + ], + "emotional": [ + "sad emotional piano music", + "hopeful uplifting music", + "romantic music with strings" + ] + } + + import os + for category, prompts in music_library.items(): + os.makedirs(f"music_library/{category}", exist_ok=True) + + for i, prompt in enumerate(prompts): + output_path = f"music_library/{category}/track_{i+1}.wav" + print(f"\n生成 {category}/{i+1}...") + generator.generate_and_save( + prompt=prompt, + output_path=output_path, + duration=15.0, + seed=42 + i + ) + + +if __name__ == "__main__": + print("MusicGen 音樂生成示例") + print("=" * 60) + print("\n注意:") + print("1. 首次運行會下載模型(small: ~300MB, medium: ~1.5GB)") + print("2. 建議使用 GPU 加速生成") + print("3. 生成時間取決於音樂長度和模型大小\n") + + # 運行基本示例 + example_basic_generation() + + # 運行其他示例(取消註釋以執行) + # example_different_parameters() + # example_music_styles() + # example_loop_generation() + # example_batch_generation() + + print("\n所有示例完成!") + print("\n提示詞技巧:") + print("1. 描述樂器: 'with piano', 'with guitar'") + print("2. 描述節奏: 'upbeat', 'slow', 'energetic'") + print("3. 描述情緒: 'happy', 'sad', 'peaceful'") + print("4. 描述風格: 'jazz', 'rock', 'classical'") + print("5. 組合使用: 'calm piano jazz music for relaxation'") diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/4.\345\257\246\346\210\260\351\240\205\347\233\256/README.md" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/4.\345\257\246\346\210\260\351\240\205\347\233\256/README.md" new file mode 100644 index 0000000..bcbcf45 --- /dev/null +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/10.\345\244\232\346\250\241\346\205\213\347\224\237\346\210\220/4.\345\257\246\346\210\260\351\240\205\347\233\256/README.md" @@ -0,0 +1,1378 @@ +# 實戰項目 (Practical Projects) + +本章節提供完整的端到端多模態生成項目,整合圖片、視頻、音樂生成技術。 + +## 📋 目錄 + +1. [項目1:AI內容創作平台](#項目1ai內容創作平台) +2. [項目2:自動短視頻生成器](#項目2自動短視頻生成器) +3. [項目3:產品營銷素材生成系統](#項目3產品營銷素材生成系統) +4. [部署指南](#部署指南) + +--- + +## 🎯 項目概覽 + +| 項目 | 難度 | 技術棧 | 預計時間 | +|------|------|--------|----------| +| AI內容創作平台 | 🔴 高級 | SD, MusicGen, FastAPI, React | 40-50h | +| 自動短視頻生成器 | 🟡 中級 | SVD, AnimateDiff, Bark | 20-30h | +| 產品營銷素材生成 | 🟢 初級 | SD, ControlNet, AudioLDM | 15-20h | + +--- + +## 項目1:AI內容創作平台 + +### 項目描述 + +構建一個全功能的AI內容創作平台,用戶可以通過簡單的文本描述生成圖片、視頻和音樂。 + +### 功能特性 + +- ✅ 文本生成圖片(支持多種風格) +- ✅ 圖片生成視頻 +- ✅ 文本生成音樂 +- ✅ 批量生成和管理 +- ✅ 用戶認證和配額管理 +- ✅ Web界面和API接口 + +### 技術架構 + +``` +┌─────────────────────────────────────────┐ +│ 前端 (React + TypeScript) │ +├─────────────────────────────────────────┤ +│ - 圖片生成界面 │ +│ - 視頻生成界面 │ +│ - 音樂生成界面 │ +│ - 項目管理 │ +└─────────────────┬───────────────────────┘ + │ + │ REST API + ↓ +┌─────────────────────────────────────────┐ +│ 後端 (FastAPI + Python) │ +├─────────────────────────────────────────┤ +│ ┌────────────────────────────────────┐ │ +│ │ API 路由層 │ │ +│ │ - /api/generate/image │ │ +│ │ - /api/generate/video │ │ +│ │ - /api/generate/music │ │ +│ └────────────────────────────────────┘ │ +│ ┌────────────────────────────────────┐ │ +│ │ 業務邏輯層 │ │ +│ │ - 生成管理器 │ │ +│ │ - 任務隊列 │ │ +│ │ - 用戶管理 │ │ +│ └────────────────────────────────────┘ │ +│ ┌────────────────────────────────────┐ │ +│ │ AI模型層 │ │ +│ │ - Stable Diffusion │ │ +│ │ - Stable Video Diffusion │ │ +│ │ - MusicGen │ │ +│ └────────────────────────────────────┘ │ +└─────────────────┬───────────────────────┘ + │ + ↓ +┌─────────────────────────────────────────┐ +│ 數據存儲 │ +│ - PostgreSQL (元數據) │ +│ - Redis (緩存/任務隊列) │ +│ - S3/MinIO (生成內容) │ +└─────────────────────────────────────────┘ +``` + +### 實現代碼 + +#### 後端 API (FastAPI) + +```python +# main.py +from fastapi import FastAPI, BackgroundTasks, HTTPException, Depends +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel +from typing import Optional, List +import uuid +from datetime import datetime +import redis +import json + +app = FastAPI(title="AI Content Creation Platform") + +# CORS配置 +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Redis連接(用於任務隊列) +redis_client = redis.Redis(host='localhost', port=6379, db=0, decode_responses=True) + +# 數據模型 +class ImageGenerationRequest(BaseModel): + prompt: str + negative_prompt: Optional[str] = "" + width: int = 512 + height: int = 512 + num_images: int = 1 + style: Optional[str] = "realistic" + +class VideoGenerationRequest(BaseModel): + image_url: Optional[str] = None + prompt: Optional[str] = None + duration: int = 3 + motion_strength: int = 127 + +class MusicGenerationRequest(BaseModel): + prompt: str + duration: int = 10 + style: Optional[str] = "electronic" + +class GenerationStatus(BaseModel): + task_id: str + status: str # pending/processing/completed/failed + progress: int + result_url: Optional[str] = None + created_at: datetime + completed_at: Optional[datetime] = None + +# 生成管理器 +from generators import ImageGenerator, VideoGenerator, MusicGenerator + +image_gen = ImageGenerator() +video_gen = VideoGenerator() +music_gen = MusicGenerator() + +# API路由 +@app.post("/api/generate/image") +async def generate_image( + request: ImageGenerationRequest, + background_tasks: BackgroundTasks +): + """生成圖片""" + task_id = str(uuid.uuid4()) + + # 創建任務記錄 + task_data = { + "task_id": task_id, + "type": "image", + "status": "pending", + "progress": 0, + "params": request.dict(), + "created_at": datetime.now().isoformat() + } + + redis_client.set(f"task:{task_id}", json.dumps(task_data)) + + # 添加到後台任務 + background_tasks.add_task( + process_image_generation, + task_id, + request + ) + + return {"task_id": task_id, "status": "pending"} + +@app.post("/api/generate/video") +async def generate_video( + request: VideoGenerationRequest, + background_tasks: BackgroundTasks +): + """生成視頻""" + task_id = str(uuid.uuid4()) + + task_data = { + "task_id": task_id, + "type": "video", + "status": "pending", + "progress": 0, + "params": request.dict(), + "created_at": datetime.now().isoformat() + } + + redis_client.set(f"task:{task_id}", json.dumps(task_data)) + + background_tasks.add_task( + process_video_generation, + task_id, + request + ) + + return {"task_id": task_id, "status": "pending"} + +@app.post("/api/generate/music") +async def generate_music( + request: MusicGenerationRequest, + background_tasks: BackgroundTasks +): + """生成音樂""" + task_id = str(uuid.uuid4()) + + task_data = { + "task_id": task_id, + "type": "music", + "status": "pending", + "progress": 0, + "params": request.dict(), + "created_at": datetime.now().isoformat() + } + + redis_client.set(f"task:{task_id}", json.dumps(task_data)) + + background_tasks.add_task( + process_music_generation, + task_id, + request + ) + + return {"task_id": task_id, "status": "pending"} + +@app.get("/api/task/{task_id}") +async def get_task_status(task_id: str): + """查詢任務狀態""" + task_data = redis_client.get(f"task:{task_id}") + + if not task_data: + raise HTTPException(status_code=404, detail="Task not found") + + return json.loads(task_data) + +# 後台處理函數 +async def process_image_generation(task_id: str, request: ImageGenerationRequest): + """處理圖片生成任務""" + try: + # 更新狀態為處理中 + update_task_status(task_id, "processing", 10) + + # 生成圖片 + images = image_gen.generate( + prompt=request.prompt, + negative_prompt=request.negative_prompt, + width=request.width, + height=request.height, + num_images=request.num_images, + style=request.style, + progress_callback=lambda p: update_task_status(task_id, "processing", 10 + int(p * 0.8)) + ) + + # 保存圖片到存儲 + image_urls = [] + for idx, image in enumerate(images): + url = save_to_storage(image, f"{task_id}_{idx}.png") + image_urls.append(url) + + # 更新為完成 + update_task_status(task_id, "completed", 100, result_url=image_urls[0] if image_urls else None) + + except Exception as e: + update_task_status(task_id, "failed", 0, error=str(e)) + +async def process_video_generation(task_id: str, request: VideoGenerationRequest): + """處理視頻生成任務""" + try: + update_task_status(task_id, "processing", 10) + + # 生成視頻 + video_path = video_gen.generate( + image_url=request.image_url, + prompt=request.prompt, + duration=request.duration, + motion_strength=request.motion_strength, + progress_callback=lambda p: update_task_status(task_id, "processing", 10 + int(p * 0.8)) + ) + + # 保存到存儲 + video_url = save_to_storage(video_path, f"{task_id}.mp4") + + update_task_status(task_id, "completed", 100, result_url=video_url) + + except Exception as e: + update_task_status(task_id, "failed", 0, error=str(e)) + +async def process_music_generation(task_id: str, request: MusicGenerationRequest): + """處理音樂生成任務""" + try: + update_task_status(task_id, "processing", 10) + + # 生成音樂 + audio_path = music_gen.generate( + prompt=request.prompt, + duration=request.duration, + style=request.style, + progress_callback=lambda p: update_task_status(task_id, "processing", 10 + int(p * 0.8)) + ) + + # 保存到存儲 + audio_url = save_to_storage(audio_path, f"{task_id}.wav") + + update_task_status(task_id, "completed", 100, result_url=audio_url) + + except Exception as e: + update_task_status(task_id, "failed", 0, error=str(e)) + +def update_task_status(task_id: str, status: str, progress: int, result_url: str = None, error: str = None): + """更新任務狀態""" + task_data = json.loads(redis_client.get(f"task:{task_id}")) + task_data["status"] = status + task_data["progress"] = progress + + if result_url: + task_data["result_url"] = result_url + + if error: + task_data["error"] = error + + if status == "completed" or status == "failed": + task_data["completed_at"] = datetime.now().isoformat() + + redis_client.set(f"task:{task_id}", json.dumps(task_data)) + +def save_to_storage(file_path, filename): + """保存文件到存儲(S3/MinIO)""" + # 實現文件上傳邏輯 + # 這裡簡化為返回本地路徑 + return f"/storage/{filename}" + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8000) +``` + +#### 生成器模塊 + +```python +# generators.py +from diffusers import StableDiffusionPipeline, StableVideoDiffusionPipeline +from audiocraft.models import MusicGen +import torch +from PIL import Image +import os + +class ImageGenerator: + """圖片生成器""" + + def __init__(self): + self.pipe = StableDiffusionPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + torch_dtype=torch.float16 + ) + self.pipe = self.pipe.to("cuda") + self.pipe.enable_attention_slicing() + + def generate( + self, + prompt, + negative_prompt="", + width=512, + height=512, + num_images=1, + style="realistic", + progress_callback=None + ): + """生成圖片""" + + # 根據風格調整提示詞 + style_prompts = { + "realistic": ", photorealistic, highly detailed, 8k uhd", + "anime": ", anime style, vibrant colors, detailed", + "artistic": ", artistic, painterly, expressive", + "3d": ", 3d render, octane render, highly detailed" + } + + full_prompt = prompt + style_prompts.get(style, "") + + images = [] + for i in range(num_images): + if progress_callback: + progress_callback((i / num_images) * 100) + + image = self.pipe( + prompt=full_prompt, + negative_prompt=negative_prompt, + width=width, + height=height, + num_inference_steps=50, + guidance_scale=7.5 + ).images[0] + + images.append(image) + + # 保存臨時文件 + os.makedirs("temp", exist_ok=True) + image.save(f"temp/image_{i}.png") + + if progress_callback: + progress_callback(100) + + return images + +class VideoGenerator: + """視頻生成器""" + + def __init__(self): + self.pipe = StableVideoDiffusionPipeline.from_pretrained( + "stabilityai/stable-video-diffusion-img2vid-xt", + torch_dtype=torch.float16 + ) + self.pipe = self.pipe.to("cuda") + self.pipe.enable_model_cpu_offload() + + def generate( + self, + image_url=None, + prompt=None, + duration=3, + motion_strength=127, + progress_callback=None + ): + """生成視頻""" + from diffusers.utils import load_image, export_to_video + + # 加載或生成輸入圖片 + if image_url: + image = load_image(image_url) + elif prompt: + # 使用圖片生成器創建初始圖片 + img_gen = ImageGenerator() + images = img_gen.generate(prompt, num_images=1) + image = images[0] + else: + raise ValueError("Must provide either image_url or prompt") + + image = image.resize((1024, 576)) + + if progress_callback: + progress_callback(20) + + # 生成視頻 + frames = self.pipe( + image=image, + num_frames=min(25, duration * 7), + motion_bucket_id=motion_strength, + decode_chunk_size=8 + ).frames[0] + + if progress_callback: + progress_callback(90) + + # 導出視頻 + os.makedirs("temp", exist_ok=True) + output_path = "temp/video_output.mp4" + export_to_video(frames, output_path, fps=7) + + if progress_callback: + progress_callback(100) + + return output_path + +class MusicGenerator: + """音樂生成器""" + + def __init__(self): + self.model = MusicGen.get_pretrained('facebook/musicgen-medium') + + def generate( + self, + prompt, + duration=10, + style="electronic", + progress_callback=None + ): + """生成音樂""" + from audiocraft.data.audio import audio_write + + # 風格模板 + style_templates = { + "electronic": "electronic music, synthesizer, modern", + "acoustic": "acoustic music, guitar, organic", + "orchestral": "orchestral music, cinematic, dramatic", + "ambient": "ambient music, atmospheric, peaceful" + } + + full_prompt = f"{prompt}, {style_templates.get(style, '')}" + + self.model.set_generation_params( + duration=duration, + temperature=1.0, + cfg_coef=3.0 + ) + + if progress_callback: + progress_callback(30) + + # 生成 + wav = self.model.generate([full_prompt], progress=True) + + if progress_callback: + progress_callback(90) + + # 保存 + os.makedirs("temp", exist_ok=True) + output_path = "temp/music_output" + audio_write(output_path, wav[0].cpu(), self.model.sample_rate) + + if progress_callback: + progress_callback(100) + + return f"{output_path}.wav" +``` + +#### 前端界面 (React) + +```typescript +// App.tsx +import React, { useState } from 'react'; +import axios from 'axios'; + +const API_BASE_URL = 'http://localhost:8000'; + +interface GenerationTask { + task_id: string; + status: string; + progress: number; + result_url?: string; +} + +function App() { + const [activeTab, setActiveTab] = useState<'image' | 'video' | 'music'>('image'); + const [currentTask, setCurrentTask] = useState(null); + + // 圖片生成 + const [imagePrompt, setImagePrompt] = useState(''); + const [imageStyle, setImageStyle] = useState('realistic'); + + // 視頻生成 + const [videoPrompt, setVideoPrompt] = useState(''); + const [videoDuration, setVideoDuration] = useState(3); + + // 音樂生成 + const [musicPrompt, setMusicPrompt] = useState(''); + const [musicDuration, setMusicDuration] = useState(10); + + const generateImage = async () => { + try { + const response = await axios.post(`${API_BASE_URL}/api/generate/image`, { + prompt: imagePrompt, + style: imageStyle, + num_images: 1 + }); + + setCurrentTask(response.data); + pollTaskStatus(response.data.task_id); + } catch (error) { + console.error('Error generating image:', error); + } + }; + + const generateVideo = async () => { + try { + const response = await axios.post(`${API_BASE_URL}/api/generate/video`, { + prompt: videoPrompt, + duration: videoDuration, + motion_strength: 127 + }); + + setCurrentTask(response.data); + pollTaskStatus(response.data.task_id); + } catch (error) { + console.error('Error generating video:', error); + } + }; + + const generateMusic = async () => { + try { + const response = await axios.post(`${API_BASE_URL}/api/generate/music`, { + prompt: musicPrompt, + duration: musicDuration, + style: 'electronic' + }); + + setCurrentTask(response.data); + pollTaskStatus(response.data.task_id); + } catch (error) { + console.error('Error generating music:', error); + } + }; + + const pollTaskStatus = async (taskId: string) => { + const interval = setInterval(async () => { + try { + const response = await axios.get(`${API_BASE_URL}/api/task/${taskId}`); + setCurrentTask(response.data); + + if (response.data.status === 'completed' || response.data.status === 'failed') { + clearInterval(interval); + } + } catch (error) { + console.error('Error polling task status:', error); + clearInterval(interval); + } + }, 2000); + }; + + return ( +
+
+

+ AI Content Creation Platform +

+ + {/* 標籤切換 */} +
+ + + +
+ + {/* 圖片生成 */} + {activeTab === 'image' && ( +
+

Generate Image

+ +
+ +