diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 18cf28e..08b7d82 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -520,15 +520,20 @@ fn query_messages( let content = decompress_message(&content_bytes, ct); let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map); let text = fmt_content(local_id, local_type, &content, is_group); + let url = appmsg_url_for_message(local_type, &content); - result.push(json!({ + let mut msg = json!({ "timestamp": ts, "time": fmt_time(ts, "%Y-%m-%d %H:%M"), "sender": sender, "content": text, "type": fmt_type(local_type), "local_id": local_id, - })); + }); + if let Some(u) = url { + msg["url"] = serde_json::Value::String(u); + } + result.push(msg); } Ok(result) } @@ -591,15 +596,20 @@ fn search_in_table( let content = decompress_message(&content_bytes, ct); let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map); let text = fmt_content(local_id, local_type, &content, is_group); + let url = appmsg_url_for_message(local_type, &content); - result.push(json!({ + let mut msg = json!({ "timestamp": ts, "time": fmt_time(ts, "%Y-%m-%d %H:%M"), "chat": "", "sender": sender, "content": text, "type": fmt_type(local_type), - })); + }); + if let Some(u) = url { + msg["url"] = serde_json::Value::String(u); + } + result.push(msg); } Ok(result) } @@ -806,6 +816,37 @@ fn extract_xml_text(xml: &str, tag: &str) -> Option { Some(xml[content_start..content_start + end].trim().to_string()) } +fn appmsg_url_for_message(local_type: i64, content: &str) -> Option { + if (local_type as u64 & 0xFFFFFFFF) != 49 { + return None; + } + extract_appmsg_url(content) +} + +fn strip_xml_cdata(s: &str) -> &str { + s.strip_prefix("")) + .unwrap_or(s) +} + +/// 从 appmsg XML 中提取链接 URL(优先取 ,fallback 到 ) +fn extract_appmsg_url(text: &str) -> Option { + let xml = strip_group_prefix(text); + if !xml.contains(" Option { let open = format!("<{}", tag); let start = xml.find(&open)?; @@ -1203,7 +1244,8 @@ pub async fn q_new_messages( let content = decompress_message(&content_bytes, ct); let sender = sender_label(real_sender_id, &content, is_group, &uname2, &id2u, &names_map); let text = fmt_content(local_id, local_type, &content, is_group); - result.push(json!({ + let url = appmsg_url_for_message(local_type, &content); + let mut msg = json!({ "chat": display2, "username": uname2, "is_group": is_group, @@ -1213,7 +1255,11 @@ pub async fn q_new_messages( "sender": sender, "content": text, "type": fmt_type(local_type), - })); + }); + if let Some(u) = url { + msg["url"] = serde_json::Value::String(u); + } + result.push(msg); } Ok::<_, anyhow::Error>(result) }).await { @@ -2126,6 +2172,71 @@ mod sns_tests { assert_eq!(escape_like_pattern(""), ""); } + #[test] + fn extract_appmsg_url_unescapes_html_entities() { + let xml = concat!( + "", + "5", + "https://mp.weixin.qq.com/s?__biz=MzI4&mid=2247&idx=1", + "" + ); + assert_eq!( + extract_appmsg_url(xml).as_deref(), + Some("https://mp.weixin.qq.com/s?__biz=MzI4&mid=2247&idx=1") + ); + } + + #[test] + fn extract_appmsg_url_strips_group_prefix_and_cdata() { + let xml = concat!( + "wxid_sender:\n", + "", + "5", + "", + "" + ); + assert_eq!( + extract_appmsg_url(xml).as_deref(), + Some("https://example.com/x?a=1&b=2") + ); + } + + #[test] + fn extract_appmsg_url_falls_back_to_url1() { + let xml = concat!( + "", + "5", + "https://example.com/fallback", + "" + ); + assert_eq!( + extract_appmsg_url(xml).as_deref(), + Some("https://example.com/fallback") + ); + } + + #[test] + fn extract_appmsg_url_ignores_non_http_values() { + let xml = concat!( + "", + "5", + "weixin://bizmsgmenu?msgmenucontent=foo", + "" + ); + assert_eq!(extract_appmsg_url(xml), None); + } + + #[test] + fn extract_appmsg_url_ignores_refermsg() { + let xml = concat!( + "", + "57", + "https://example.com/nested", + "" + ); + assert_eq!(extract_appmsg_url(xml), None); + } + fn media_object(value: &Value) -> &serde_json::Map { value.as_object().expect("media entry should be an object") }