Skip to content

Commit 88d63e0

Browse files
committed
feat(listener): 屏蔽 Discord 自身域名避免用户复制消息链接误入库
用户经常在监听频道右键点击 Discord 消息 → "复制消息链接" 粘贴到频道里, 这时 bot 会把 discord.com/channels/.../.../... 当作一条分享提交进 shared_links,污染 /feed。 新增 _SKIP_HOSTS 静默跳过以下域名: - discord.com / www.discord.com / canary.discord.com / ptb.discord.com - discord.gg(邀请短链) - discordapp.com / cdn.discordapp.com / media.discordapp.net(附件 CDN) 匹配策略: - host 精确匹配(防范 discord.com.evil.com 这类钓鱼子域名) - 大小写不敏感 - 静默忽略(不 reply / 不提交)——像 bot 没看到,不打扰群聊 tests: 新增 test_listener_skip.py × 16 case,覆盖各类 Discord 链接、 钓鱼变体、坏 URL。全仓 58/58 通过。
1 parent 4140f9c commit 88d63e0

2 files changed

Lines changed: 90 additions & 0 deletions

File tree

src/chat_bot/cogs/listener.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
import asyncio
1717
import re
18+
from urllib.parse import urlparse
1819

1920
import discord
2021
import structlog
@@ -25,6 +26,32 @@
2526

2627
_URL_RE = re.compile(r"https?://[^\s<>\"'\]\)]+", re.IGNORECASE)
2728

29+
# 跳过 Discord 自身的各种链接:用户经常复制错(比如右键"复制消息链接"会粘
30+
# discord.com/channels/.../... 出来,这不该被当作"分享"入库)。静默忽略,不
31+
# 回复也不提交,像 bot 没看到一样。
32+
_SKIP_HOSTS = frozenset({
33+
# 主站
34+
"discord.com",
35+
"www.discord.com",
36+
"canary.discord.com",
37+
"ptb.discord.com",
38+
# 邀请短链
39+
"discord.gg",
40+
# 附件 / CDN
41+
"discordapp.com",
42+
"cdn.discordapp.com",
43+
"media.discordapp.net",
44+
})
45+
46+
47+
def _should_skip(url: str) -> bool:
48+
"""URL 是否属于需要跳过的源(当前只屏蔽 Discord 自身域名)。"""
49+
try:
50+
host = urlparse(url).netloc.lower().split(":")[0]
51+
except Exception:
52+
return False
53+
return host in _SKIP_HOSTS
54+
2855
# 轮询最终状态的参数:每 2s 查一次,最多 30s
2956
_POLL_INTERVAL_SEC = 2.0
3057
_POLL_TIMEOUT_SEC = 30.0
@@ -61,6 +88,10 @@ async def on_message(self, message: discord.Message) -> None:
6188

6289
async def _handle_one_url(self, message: discord.Message, url: str) -> None:
6390
"""提交单个 URL,并根据后端响应给用户即时反馈 + 延迟最终状态通知。"""
91+
if _should_skip(url):
92+
# Discord 自身链接静默忽略——不提交、不回复、不打扰群聊
93+
log.debug("share_skip_blocked_host", url=url)
94+
return
6495
try:
6596
result = await submit_internal(
6697
base_url=self.settings.internal_submit_url,

tests/test_listener_skip.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
"""listener._should_skip 过滤 Discord 自身链接的测试。
2+
3+
不起 Discord.Client,只测这个纯函数就够了——Cog 层的集成测试价值低(见 README)。
4+
"""
5+
6+
from __future__ import annotations
7+
8+
import pytest
9+
10+
from chat_bot.cogs.listener import _should_skip
11+
12+
13+
@pytest.mark.parametrize(
14+
"url",
15+
[
16+
# 深链消息
17+
"https://discord.com/channels/1243830688315342860/1243830688998752279/1496945561784549386",
18+
# 邀请
19+
"https://discord.gg/invitecode",
20+
# 主站其它
21+
"https://www.discord.com/",
22+
"https://canary.discord.com/channels/foo/bar",
23+
"https://ptb.discord.com/channels/foo/bar",
24+
# CDN / 附件
25+
"https://cdn.discordapp.com/attachments/xxx/yyy/file.png",
26+
"https://media.discordapp.net/attachments/xxx/yyy/image.jpg",
27+
"https://discordapp.com/something",
28+
],
29+
)
30+
def test_should_skip_discord_urls(url: str) -> None:
31+
assert _should_skip(url) is True
32+
33+
34+
@pytest.mark.parametrize(
35+
"url",
36+
[
37+
"https://arxiv.org/abs/2501.00001",
38+
"https://mp.weixin.qq.com/s/abc",
39+
"https://github.com/InvolutionHell/ChatBot",
40+
"https://scholar.google.com/scholar?q=rag",
41+
# 只有 host 相似但不完全匹配就不该 skip(防范未来新域名放行策略)
42+
"https://not-discord.com/x",
43+
"https://discord.com.evil.com/phishing",
44+
],
45+
)
46+
def test_should_not_skip_other_urls(url: str) -> None:
47+
assert _should_skip(url) is False
48+
49+
50+
def test_should_skip_handles_bad_url_gracefully() -> None:
51+
# 坏 URL 不应抛异常;当前 urlparse 对大多数输入都不抛,兜底返回 False
52+
assert _should_skip("not-a-url") is False
53+
assert _should_skip("") is False
54+
55+
56+
def test_should_skip_is_case_insensitive() -> None:
57+
# 大小写混杂也要 skip(URL host 实际上总是小写但保险起见)
58+
assert _should_skip("https://DISCORD.com/channels/x/y/z") is True
59+
assert _should_skip("https://Cdn.DiscordApp.com/file.png") is True

0 commit comments

Comments
 (0)