Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 0 additions & 40 deletions .env.example

This file was deleted.

2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,5 @@ urls.txt
# OS specific
.DS_Store
Thumbs.db
temp_*/
logs/
65 changes: 17 additions & 48 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,17 @@
![Version](https://img.shields.io/badge/version-1.0.0-blue.svg)
![Python](https://img.shields.io/badge/python-3.8+-green.svg)

## 👤 作者信息
## Huggingface space: [rednote-gen](https://huggingface.co/spaces/windane/rednote-gen)
PS: 最好还是本地运行,Space 直接下载视频失败率高,可以使用MD文件解析。

## 👤 原作者信息

- **作者**:玄清
- **博客**:[天天悦读](https://blog.example.com)
- **Email**:grow8org@gmail.com
- **GitHub**:[whotto/Video_note_generator](https://github.com/whotto/Video_note_generator)


## 🎯 应用场景

- **内容创作者**:快速将视频/直播内容转换为文章
Expand Down Expand Up @@ -96,24 +100,12 @@ graph TD

## 🚀 使用方式

支持三种使用方式:

1. **处理单个视频**:
```bash
python video_note_generator.py https://example.com/video
```

2. **批量处理 URL 文件**:
```bash
# urls.txt 文件,每行一个视频链接
python video_note_generator.py urls.txt
启动 WebUI:
```shell
$ python web.py
```

3. **处理 Markdown 文件**:
```bash
# 支持 Markdown 链接和直接 URL
python video_note_generator.py notes.md
```
打开:http://127.0.0.1:7860

## 🛠️ 使用工具

Expand All @@ -134,40 +126,13 @@ python video_note_generator.py notes.md

# 安装 Python 依赖
pip install -r requirements.txt

# 配置环境变量
cp .env.example .env
```

### 2. 配置 API 密钥

编辑 `.env` 文件,填入必要的 API 密钥:
```ini
# OpenRouter API(必需)
OPENROUTER_API_KEY=your-api-key-here

# Unsplash API(必需)
UNSPLASH_ACCESS_KEY=your-unsplash-access-key-here
UNSPLASH_SECRET_KEY=your-unsplash-secret-key-here
```

### 3. 开始使用

1. 创建 `urls.txt` 文件,每行一个视频链接
2. 运行环境检查:
```bash
python check_environment.py
```
3. 运行生成器:
```bash
python video_note_generator.py test.md
```

## 📄 输出文件

每个视频会生成三个文件:

1. **原始笔记** (`YYYYMMDD_HHMMSS.md`)
1. **原始笔记** :
- 完整的视频转录文本
- 保留所有细节内容

Expand All @@ -185,14 +150,18 @@ python video_note_generator.py test.md

## ⚙️ 配置说明

在 `.env` 文件中可以调整以下参数:
在设置页面填写必要的 API Key:
- OpenRouter API(必需)
- Unsplash API (建议)

还有其他设置:

```ini
# 内容生成配置
```
MAX_TOKENS=2000 # 生成小红书内容的最大长度
CONTENT_CHUNK_SIZE=2000 # 长文本分块大小(字符数)
TEMPERATURE=0.7 # AI 创造性程度 (0.0-1.0)


# 代理设置(可选)
# HTTP_PROXY=http://127.0.0.1:7890
# HTTPS_PROXY=http://127.0.0.1:7890
Expand Down
29 changes: 12 additions & 17 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,17 +1,12 @@
openai>=1.0.0
httpx>=0.24.1
yt-dlp>=2023.11.16
openai-whisper>=2023.11.17
python-dotenv>=1.0.0
requests>=2.31.0
beautifulsoup4>=4.12.2
python-unsplash>=1.1.0
Pillow>=10.1.0
urllib3>=2.1.0
certifi>=2023.11.17
ffmpeg-python>=0.2.0

# Optional dependencies for better performance
torch>=2.1.0
pytube>=15.0.0
you-get>=0.4.1650
beautifulsoup4==4.12.3
gradio==5.9.1
gradio_modal==0.0.4
httpx==0.28.1
openai==1.58.1
openai_whisper==20240930
python-dotenv==1.0.1
pytube==15.0.0
setuptools==75.1.0
yt_dlp==2024.12.23
python-unsplash==1.2.5
moviepy~=2.1.1
Empty file added src/__init__.py
Empty file.
Empty file added src/adapter/__init__.py
Empty file.
24 changes: 24 additions & 0 deletions src/adapter/ffmpeg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import subprocess

from src.logger import app_logger


class FfmpegAdapter:
def __init__(self):
ffmpeg_path = None
try:
subprocess.run(["/opt/homebrew/bin/ffmpeg", "-version"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
app_logger.info("✅ ffmpeg is available at /opt/homebrew/bin/ffmpeg")
ffmpeg_path = "/opt/homebrew/bin/ffmpeg"
except Exception:
try:
subprocess.run(["ffmpeg", "-version"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
app_logger.info("✅ ffmpeg is available (from PATH)")
ffmpeg_path = "ffmpeg"
except Exception as e:
app_logger.warning(f"⚠️ ffmpeg not found: {str(e)}")
self.ffmpeg_path = ffmpeg_path
78 changes: 78 additions & 0 deletions src/adapter/openrouter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import asyncio
from typing import Optional

import openai

from src.environment.env import Environment
from src.logger import app_logger
from src.video.prompt import share_prompt


class OpenRouterAdapter:
def __init__(self, env: Environment):
self.client = openai.OpenAI(
api_key=env.openrouter_api_key,
base_url=env.openrouter_api_url or 'https://openrouter.ai/api/v1',
default_headers={
"HTTP-Referer": env.openrouter_http_referer,
"X-Title": env.openrouter_app_name
}
)
self.ai_model = env.openrouter_ai_model
self.api_available = False
# Initialize the wait queue and flow limiter
self.wait_queue = asyncio.Queue()
self.flow_limiter = asyncio.Semaphore(5) # Allow 5 requests per minute

def connect(self):
if self.client.api_key:
try:
app_logger.info(f"正在测试 OpenRouter API 连接...")
response = self.client.models.list()
app_logger.info("✅ OpenRouter API 连接测试成功")
self.api_available = True
except Exception as e:
app_logger.error(f"❌ OpenRouter API 连接测试失败: {e}")
app_logger.error("将继续尝试使用API,但可能会遇到问题")
self.api_available = False

async def generate(self, system_prompt_type, user_prompt_type, content,
temperature=0.7, max_tokens=4000) -> Optional[str]:
if not self.api_available:
app_logger.error("OpenRouter API 不可用,无法生成")
return None

# Wait for the flow limiter to allow the request
await self.flow_limiter.acquire()

try:
system_prompt = share_prompt(prompt_type=system_prompt_type, content=content)
user_prompt = share_prompt(prompt_type=user_prompt_type, content=content) or content
app_logger.info('OpenRouter API 开始请求')

# Add the request to the wait queue
await self.wait_queue.put(None)

# Wait for the previous requests to complete
await asyncio.sleep(60 / 5) # 5 requests per minute

response = self.client.chat.completions.create(
model=self.ai_model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
temperature=temperature,
max_tokens=max_tokens
)
if not response.choices:
app_logger.error("OpenRouter API 返回结果为空")
return None
return response.choices[0].message.content
except Exception as e:
app_logger.error(f"OpenRouter API 请求失败: {e}")
return None
finally:
# Release the flow limiter
self.flow_limiter.release()
await self.wait_queue.get()
99 changes: 99 additions & 0 deletions src/adapter/unsplash.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from typing import List

import httpx

from src.environment.env import Environment
from unsplash.api import Api as UnsplashApi
from unsplash.auth import Auth as UnsplashAuth

from src.logger import app_logger


class UnsplashAdapter:
def __init__(self, env: Environment):
self.unsplash_client = None
self.unsplash_available = False
self.unsplash_access_key = env.unsplash_access_key

if env.unsplash_access_key:
auth = UnsplashAuth(
client_id=env.unsplash_access_key,
client_secret=None,
redirect_uri=None
)
try:
unsplash_client = UnsplashApi(auth)
self.unsplash_client = unsplash_client
self.unsplash_available = True
app_logger.info("✅ Unsplash API 配置成功")
except Exception as e:
app_logger.error(f"❌ Failed to initialize Unsplash client: {str(e)}")
self.unsplash_available = False
else:
app_logger.warning("⚠️ 未设置 Unsplash API 密钥")
self.unsplash_available = False

def get_images(self, query: str, count: int = 3) -> List[str]:
if not self.unsplash_available:
app_logger.warning("⚠️ Unsplash API 不可用")
return []

try:
headers = {
'Authorization': f'Client-ID {self.unsplash_access_key}'
}
# Query each keyword
all_photos = []
for keyword in query.split(','):
response = httpx.get(
'https://api.unsplash.com/search/photos',
params={
'query': keyword.strip(),
'per_page': count,
'orientation': 'portrait', # 小红书偏好竖版图片
'content_filter': 'high' # 只返回高质量图片
},
headers=headers,
verify=False # 禁用SSL验证
)

if response.status_code == 200:
data = response.json()
if data['results']:
# 获取图片URL,优先使用regular尺寸
photos = [photo['urls'].get('regular', photo['urls']['small'])
for photo in data['results']]
all_photos.extend(photos)

# 如果收集到的图片不够,用最后一个关键词继续搜索
while len(all_photos) < count and query:
response = httpx.get(
'https://api.unsplash.com/search/photos',
params={
'query': query.split(',')[-1].strip(),
'per_page': count - len(all_photos),
'orientation': 'portrait',
'content_filter': 'high',
'page': 2 # 获取下一页的结果
},
headers=headers,
verify=False
)

if response.status_code == 200:
data = response.json()
if data['results']:
photos = [photo['urls'].get('regular', photo['urls']['small'])
for photo in data['results']]
all_photos.extend(photos)
else:
break
else:
break

# 返回指定数量的图片
return all_photos[:count]

except Exception as e:
app_logger.error(f"⚠️ 获取图片失败: {str(e)}")
return []
Empty file added src/downloader/__init__.py
Empty file.
Loading