From c6652a54b0c5ff7d9718cac83c776c41ff3758ab Mon Sep 17 00:00:00 2001 From: tsinghu Date: Tue, 21 Apr 2026 15:03:26 +0800 Subject: [PATCH 1/2] feat: expose url field for link/appmsg messages Extract from appmsg XML in type-49 messages and append it as a 'url' field in history/search output. The field is omitted when the message has no valid URL (non-link types, empty, non-http). --- src/daemon/query.rs | 45 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 18cf28e..abf91cc 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -520,15 +520,24 @@ fn query_messages( let content = decompress_message(&content_bytes, ct); let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map); let text = fmt_content(local_id, local_type, &content, is_group); + let url = if (local_type as u64 & 0xFFFFFFFF) == 49 { + extract_appmsg_url(&content) + } else { + None + }; - result.push(json!({ + let mut msg = json!({ "timestamp": ts, "time": fmt_time(ts, "%Y-%m-%d %H:%M"), "sender": sender, "content": text, "type": fmt_type(local_type), "local_id": local_id, - })); + }); + if let Some(u) = url { + msg["url"] = serde_json::Value::String(u); + } + result.push(msg); } Ok(result) } @@ -591,15 +600,24 @@ fn search_in_table( let content = decompress_message(&content_bytes, ct); let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map); let text = fmt_content(local_id, local_type, &content, is_group); + let url = if (local_type as u64 & 0xFFFFFFFF) == 49 { + extract_appmsg_url(&content) + } else { + None + }; - result.push(json!({ + let mut msg = json!({ "timestamp": ts, "time": fmt_time(ts, "%Y-%m-%d %H:%M"), "chat": "", "sender": sender, "content": text, "type": fmt_type(local_type), - })); + }); + if let Some(u) = url { + msg["url"] = serde_json::Value::String(u); + } + result.push(msg); } Ok(result) } @@ -806,6 +824,25 @@ fn extract_xml_text(xml: &str, tag: &str) -> Option { Some(xml[content_start..content_start + end].trim().to_string()) } +/// 从 appmsg XML 中提取链接 URL(优先取 ,fallback 到 ) +fn extract_appmsg_url(text: &str) -> Option { + // 群消息前缀 "wxid_xxx:\n" 需先剥离 + let xml = if text.contains(":\n") { + text.splitn(2, ":\n").nth(1).unwrap_or(text) + } else { + text + }; + if !xml.contains(" Option { let open = format!("<{}", tag); let start = xml.find(&open)?; From e96d19c42c3e370ed86e64fe3079a2434623905d Mon Sep 17 00:00:00 2001 From: jackwener Date: Tue, 21 Apr 2026 22:13:38 +0800 Subject: [PATCH 2/2] fix: normalize appmsg urls across query outputs --- src/daemon/query.rs | 116 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 95 insertions(+), 21 deletions(-) diff --git a/src/daemon/query.rs b/src/daemon/query.rs index abf91cc..08b7d82 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -520,11 +520,7 @@ fn query_messages( let content = decompress_message(&content_bytes, ct); let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map); let text = fmt_content(local_id, local_type, &content, is_group); - let url = if (local_type as u64 & 0xFFFFFFFF) == 49 { - extract_appmsg_url(&content) - } else { - None - }; + let url = appmsg_url_for_message(local_type, &content); let mut msg = json!({ "timestamp": ts, @@ -600,11 +596,7 @@ fn search_in_table( let content = decompress_message(&content_bytes, ct); let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map); let text = fmt_content(local_id, local_type, &content, is_group); - let url = if (local_type as u64 & 0xFFFFFFFF) == 49 { - extract_appmsg_url(&content) - } else { - None - }; + let url = appmsg_url_for_message(local_type, &content); let mut msg = json!({ "timestamp": ts, @@ -824,20 +816,32 @@ fn extract_xml_text(xml: &str, tag: &str) -> Option { Some(xml[content_start..content_start + end].trim().to_string()) } +fn appmsg_url_for_message(local_type: i64, content: &str) -> Option { + if (local_type as u64 & 0xFFFFFFFF) != 49 { + return None; + } + extract_appmsg_url(content) +} + +fn strip_xml_cdata(s: &str) -> &str { + s.strip_prefix("")) + .unwrap_or(s) +} + /// 从 appmsg XML 中提取链接 URL(优先取 ,fallback 到 ) fn extract_appmsg_url(text: &str) -> Option { - // 群消息前缀 "wxid_xxx:\n" 需先剥离 - let xml = if text.contains(":\n") { - text.splitn(2, ":\n").nth(1).unwrap_or(text) - } else { - text - }; + let xml = strip_group_prefix(text); if !xml.contains("(result) }).await { @@ -2163,6 +2172,71 @@ mod sns_tests { assert_eq!(escape_like_pattern(""), ""); } + #[test] + fn extract_appmsg_url_unescapes_html_entities() { + let xml = concat!( + "", + "5", + "https://mp.weixin.qq.com/s?__biz=MzI4&mid=2247&idx=1", + "" + ); + assert_eq!( + extract_appmsg_url(xml).as_deref(), + Some("https://mp.weixin.qq.com/s?__biz=MzI4&mid=2247&idx=1") + ); + } + + #[test] + fn extract_appmsg_url_strips_group_prefix_and_cdata() { + let xml = concat!( + "wxid_sender:\n", + "", + "5", + "", + "" + ); + assert_eq!( + extract_appmsg_url(xml).as_deref(), + Some("https://example.com/x?a=1&b=2") + ); + } + + #[test] + fn extract_appmsg_url_falls_back_to_url1() { + let xml = concat!( + "", + "5", + "https://example.com/fallback", + "" + ); + assert_eq!( + extract_appmsg_url(xml).as_deref(), + Some("https://example.com/fallback") + ); + } + + #[test] + fn extract_appmsg_url_ignores_non_http_values() { + let xml = concat!( + "", + "5", + "weixin://bizmsgmenu?msgmenucontent=foo", + "" + ); + assert_eq!(extract_appmsg_url(xml), None); + } + + #[test] + fn extract_appmsg_url_ignores_refermsg() { + let xml = concat!( + "", + "57", + "https://example.com/nested", + "" + ); + assert_eq!(extract_appmsg_url(xml), None); + } + fn media_object(value: &Value) -> &serde_json::Map { value.as_object().expect("media entry should be an object") }