Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions src/memory_layer/memcell_extractor/conv_memcell_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,57 @@
logger = get_logger(__name__)


def _normalize_message_content_value(value: Any) -> str:
"""Normalize message content into plain text for downstream memory extraction.

Handles plugin-wrapped/nested content structures (e.g. OpenClaw/Feishu payloads)
by recursively extracting textual fields and flattening them into a readable string.
"""
if value is None:
return ""
if isinstance(value, str):
return value
if isinstance(value, (int, float, bool)):
return str(value)

if isinstance(value, list):
parts = [_normalize_message_content_value(item) for item in value]
return "\n".join(part for part in parts if part)

if isinstance(value, dict):
preferred_keys = [
'text', 'content', 'message', 'body', 'value',
'output_text', 'input_text', 'title', 'description'
]

collected = []
seen = set()
for key in preferred_keys:
if key in value:
normalized = _normalize_message_content_value(value.get(key))
if normalized and normalized not in seen:
collected.append(normalized)
seen.add(normalized)

if collected:
return "\n".join(collected)

skip_keys = {
'type', 'role', 'id', '_id', 'msgType', 'timestamp', 'time',
'speaker_id', 'speaker_name', 'sender', 'sender_name', 'referList',
'metadata', 'extra', 'tool_calls', 'tool_call_id', 'arguments', 'name'
}
for key, nested in value.items():
if key in skip_keys:
continue
normalized = _normalize_message_content_value(nested)
if normalized:
return normalized
return ""

return str(value)


@dataclass
class BoundaryDetectionResult:
"""Boundary detection result."""
Expand Down Expand Up @@ -580,4 +631,16 @@ def _data_process(self, raw_data: RawData) -> Dict[str, Any]:
f"[ConvMemCellExtractor] Message type {msg_type} converted to placeholder: {placeholder}"
)

if isinstance(content, dict) and 'content' in content:
normalized_text = _normalize_message_content_value(content.get('content'))
if normalized_text != content.get('content'):
content = content.copy()
content['content'] = normalized_text

if isinstance(content, dict) and 'content' in content:
normalized_text = _normalize_message_content_value(content.get('content'))
if normalized_text != content.get('content'):
content = content.copy()
content['content'] = normalized_text

return content
49 changes: 49 additions & 0 deletions tests/test_conv_memcell_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,55 @@ def create_raw_data_list(self, messages: List[Dict[str, Any]]) -> List[RawData]:
raw_data_list.append(raw_data)
return raw_data_list

def test_data_process_normalizes_plugin_wrapped_content(self):
"""Should flatten nested plugin-wrapped content into plain text."""
extractor = ConvMemCellExtractor(None)

raw_data = RawData(
content={
"speaker_id": "user_1",
"speaker_name": "Alice",
"content": {
"type": "message",
"content": [
{"type": "text", "text": "今天讨论 EverMemOS 的修复方案"},
{"type": "tool_result", "content": {"text": "需要过滤插件包裹结构"}},
],
"metadata": {"plugin": "openclaw-feishu"},
},
"timestamp": self.base_time.isoformat(),
"msgType": 1,
},
data_id="wrapped_1",
metadata={},
)

processed = extractor._data_process(raw_data)

assert processed is not None
assert processed["content"] == "今天讨论 EverMemOS 的修复方案\n需要过滤插件包裹结构"

def test_data_process_preserves_plain_text_content(self):
"""Should keep plain text content unchanged."""
extractor = ConvMemCellExtractor(None)

raw_data = RawData(
content={
"speaker_id": "user_1",
"speaker_name": "Alice",
"content": "普通文本消息",
"timestamp": self.base_time.isoformat(),
"msgType": 1,
},
data_id="plain_1",
metadata={},
)

processed = extractor._data_process(raw_data)

assert processed is not None
assert processed["content"] == "普通文本消息"

def create_realistic_conversation(self) -> tuple[List[RawData], List[RawData]]:
"""Create realistic conversation scenario"""
# Historical conversation - Project discussion
Expand Down