God-2077
diff --git a/‎novel_crawler/README.md‎
Lines changed: 20 additions & 7 deletions b/‎novel_crawler/README.md‎
Lines changed: 20 additions & 7 deletions
diff --git a/‎novel_crawler/config.yml‎
Lines changed: 90 additions & 0 deletions b/‎novel_crawler/config.yml‎
Lines changed: 90 additions & 0 deletions
@@ -7,7 +7,7 @@
 爬取小说章节内容页面, 保存到本地
 最后去阅读小说了
 
-最新版：[novel_crawler_v.25.06.05.py][5]
+最新版：[novel_crawler_v.25.07.06][6]
 
 ## 说明
 
@@ -77,7 +77,7 @@ rules:
   novel_chapter_content_div: 'div.content'
 
   # 小说单段规则(相对于小说章节内容区域)
-  novel_chapter_content_p: 'p'  # 可以是 p, div, span 等
+  novel_chapter_content_p: 'p'  # 可以是 br, p, div, span 等
 
   # 小说净化内容配置
   purify:
@@ -121,6 +121,12 @@ python ***.py config.yml
 
 ## 日志
 
+- [novel_crawler_v.25.07.06][6]
+    - 使用 rich 库增强输出
+    - 增加对 br 标签的特殊处理逻辑
+    - 排除目录 JS 链接
+    - 若干优化
+
 - [novel_crawler_v.25.06.05.py][5]
     - 更新 epub 章节渲染模板
 
@@ -139,8 +145,15 @@ python ***.py config.yml
 - [novel_crawler_v.24.12.01.py][2]
     - 第一个版本
 
-[1]: https://github.com/God-2077/python-code/tree/main/novel_crawler/novel_crawler_v.24.12.01.py
-[2]: https://github.com/God-2077/python-code/tree/main/ftp_server/novel_crawler/novel_crawler_v.24.12.01.py
-[3]: https://github.com/God-2077/python-code/tree/main/ftp_server/novel_crawler/novel_crawler_v.25.02.03.py
-[4]: https://github.com/God-2077/python-code/tree/main/ftp_server/novel_crawler/novel_crawler_v.25.06.02.py
-[5]: https://github.com/God-2077/python-code/tree/main/ftp_server/novel_crawler/novel_crawler_v.25.06.05.py
+<!-- [1]: novel_crawler_v.24.12.01.py
+[2]: novel_crawler_v.24.12.01.py
+[3]: novel_crawler_v.25.02.03.py
+[4]: novel_crawler_v.25.06.02.py
+[5]: novel_crawler_v.25.06.05.py
+[5]: novel_crawler_v.25.06.05.py -->
+[1]: novel_crawler_v.24.12.01.py
+[2]: novel_crawler_v.24.12.01.py
+[3]: novel_crawler_v.25.02.03.py
+[4]: novel_crawler_v.25.06.02.py
+[5]: novel_crawler_v.25.06.05.py
+[6]: novel_crawler_v.25.07.06.py
@@ -0,0 +1,90 @@
+# 小说爬虫配置（示例）
+# 使用方式: python text.py config.yml
+
+# 基本配置
+basic:
+  # 小说详情页面的 URL
+  novel_detail_url: 'https://www.31ec1.lol/read/45159/'
+  
+  # 小说章节列表页面的 URL 列表
+  # 如果章节列表在小说详情页面，可以直接使用 novel_detail_url
+  novel_chapter_url: 
+    - 'https://www.31ec1.lol/read/45159/'
+  
+  # 小说保存路径，默认为当前目录
+  download_path: './downloads'
+  
+  # 小说文件的编码(TXT) (utf-8, gbk, gb2312等)
+  novel_file_encoding: 'utf-8'
+  
+  # 输出格式 (txt, epub)
+  output_format: 'txt'
+  
+  # 缩进字符串
+  indent_string: '    '
+  
+  # 是否启用调试模式
+  debug: True
+
+# 选择器规则配置
+rules:
+  # 小说名称 CSS 选择器
+  novel_name: 'body > div.book > div.info > h1'
+  
+  # 小说作者 CSS 选择器
+  novel_author: 'body > div.book > div.info > div.small > span:nth-child(1)'
+  
+  # 小说简介 CSS 选择器
+  novel_intro: 'body > div.book > div.info > div.intro > dl > dd'
+  
+  # 小说章节区域 CSS 选择器
+  novel_chapter_div: 'body > div.listmain'
+  
+  # 小说单个章节的区域(相对于小说章节区域)
+  novel_chapter_div_only: 'dd'
+  
+  # 小说章节名称(相对于小说章节区域)
+  novel_chapter_name: 'a'
+  
+  # 小说章节 URL(相对于小说章节区域)
+  novel_chapter_url: 'a'
+  
+  # 小说章节内容区域 CSS 选择器
+  novel_chapter_content_div: '#chaptercontent'
+  
+  # 小说单段规则(相对于小说章节内容区域)
+  novel_chapter_content_p: 'br'  # 可以是 p, div, span 等
+  
+  # 小说净化内容配置
+  purify:
+    # 需要净化的文本列表
+    text:
+      - '广告内容1'
+      - '广告内容2'
+    
+    # 需要净化的正则表达式列表
+    re:
+      - '[\d]{4}-[\d]{2}-[\d]{2}'  # 去除日期格式
+      - '本章节.*更新'             # 去除更新提示
+
+# 网络请求配置
+network:
+  # 请求头设置
+  headers:
+    User-Agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
+    Referer: 'https://www.31ec1.lol/'
+    Accept-Language: 'zh-CN,zh;q=0.9'
+  
+  # Cookies 设置
+  cookies:
+    # session_id: 'abc123'
+    # token: 'xyz456'
+  
+  # 请求超时时间(秒)
+  timeout: 5
+  
+  # 失败重试次数
+  max_retries: 5
+  
+  # 请求间隔时间(毫秒)
+  request_interval_ms: 0