@@ -1570,12 +1639,134 @@ export default function AgentImportWizard({
{t("market.install.config.description", "Please configure the following required fields for this agent and its sub-agents.")}
- {collapseItems.length > 0 ? (
-
+ {Object.keys(groupedFields).length > 0 ? (
+
+ {Object.entries(groupedFields)
+ .sort(([keyA], [keyB]) => {
+ // Main agent first
+ const mainAgentId = String(initialData?.agent_id);
+ if (keyA === mainAgentId) return -1;
+ if (keyB === mainAgentId) return 1;
+ return 0;
+ })
+ .map(([agentKey, agentGroup]) => (
+
+ {/* Agent Header */}
+
+
+ {agentKey === String(initialData?.agent_id) && (
+
+ {t("market.install.agent.main", "Main")}
+
+ )}
+ {agentGroup.agentDisplayName}
+
+
+
+ {/* Basic Fields */}
+ {agentGroup.basicFields.length > 0 && (
+ <>
+
+
+ {t("market.install.config.basicFields", "Basic Configuration")}
+
+
+
+ {agentGroup.basicFields.map((field) => {
+ const paramLabel = field.fieldLabel.replace(`${agentGroup.agentDisplayName} - `, "");
+ return (
+
+
+
+ {paramLabel}:
+
+ {
+ setConfigValues(prev => ({
+ ...prev,
+ [field.valueKey]: e.target.value,
+ }));
+ }}
+ placeholder={t("market.install.config.placeholderWithParam", { param: paramLabel })}
+ size="middle"
+ style={{ flex: 1 }}
+ className={needsConfig(field.currentValue) ? "bg-gray-50 dark:bg-gray-800" : ""}
+ />
+
+ {/* Show hint with clickable links if available */}
+ {field.promptHint && (
+
+
+ {parseMarkdownLinks(field.promptHint)}
+
+
+ )}
+
+ );
+ })}
+
+ >
+ )}
+
+ {/* Tools */}
+ {Object.entries(agentGroup.tools).map(([toolKey, toolGroup]) => (
+
+ {/* Tool Header */}
+
+
+
+ {toolGroup.toolName}
+
+
+
+ {/* Tool Parameters */}
+
+ {toolGroup.fields.map((field) => {
+ const toolMatch = field.fieldPath.match(/^tools\[\d+\]\.params\.(.+)$/);
+ const paramKey = toolMatch ? toolMatch[1] : field.fieldPath;
+ const paramLabel = paramKey.replace(/_/g, ' ').replace(/\b\w/g, l => l.toUpperCase());
+
+ return (
+
+
+
+ {paramLabel}:
+
+ {
+ setConfigValues(prev => ({
+ ...prev,
+ [field.valueKey]: e.target.value,
+ }));
+ }}
+ placeholder={t("market.install.config.placeholderWithParam", { param: paramLabel })}
+ size="middle"
+ style={{ flex: 1 }}
+ className={needsConfig(field.currentValue) ? "bg-gray-50 dark:bg-gray-800" : ""}
+ />
+
+ {/* Show hint with clickable links if available */}
+ {field.promptHint && (
+
+
+ {parseMarkdownLinks(field.promptHint)}
+
+
+ )}
+
+ );
+ })}
+
+
+ ))}
+
+ ))}
+
) : (
{t("market.install.config.noFields", "No configuration fields required.")}
diff --git a/frontend/public/locales/en/common.json b/frontend/public/locales/en/common.json
index 3085e3aa2..de0a550ad 100644
--- a/frontend/public/locales/en/common.json
+++ b/frontend/public/locales/en/common.json
@@ -1272,9 +1272,11 @@
"market.install.config.description": "Please configure the following required fields for this agent and its sub-agents.",
"market.install.config.fields": "fields",
"market.install.config.noFields": "No configuration fields required.",
+ "market.install.config.basicFields": "Basic Configuration",
"market.install.agent.defaultName": "Agent",
"market.install.agent.main": "Main",
"market.install.config.placeholder": "Enter configuration value",
+ "market.install.config.placeholderWithParam": "Enter {{param}}",
"market.install.mcp.description": "This agent requires the following MCP servers. Please install or configure them.",
"market.install.mcp.installed": "Installed",
"market.install.mcp.notInstalled": "Not Installed",
@@ -1324,6 +1326,13 @@
"market.install.success.nameRegeneratedAndResolved": "Agent names regenerated successfully and all conflicts resolved",
"market.install.info.notImplemented": "Installation will be implemented in next phase",
"market.install.success": "Agent installed successfully!",
+ "market.install.warning.title": "Agent May Be Unusable",
+ "market.install.warning.description": "The following issues may make the agent unusable:",
+ "market.install.warning.nameConflict": "Unresolved name conflicts exist",
+ "market.install.warning.mcpNotInstalled": "Uninstalled MCP services exist",
+ "market.install.warning.question": "Do you want to continue with the installation anyway?",
+ "market.install.warning.continue": "Continue Anyway",
+ "market.install.warning.goBack": "Go Back to Configure",
"market.error.fetchDetailFailed": "Failed to load agent details",
"market.error.retry": "Retry",
"market.error.timeout.title": "Request Timeout",
diff --git a/frontend/public/locales/zh/common.json b/frontend/public/locales/zh/common.json
index 364b13cf8..4d44b4a17 100644
--- a/frontend/public/locales/zh/common.json
+++ b/frontend/public/locales/zh/common.json
@@ -1251,9 +1251,11 @@
"market.install.config.description": "请为该智能体及其子智能体配置以下必填字段。",
"market.install.config.fields": "个字段",
"market.install.config.noFields": "无需配置字段。",
+ "market.install.config.basicFields": "基础配置",
"market.install.agent.defaultName": "智能体",
"market.install.agent.main": "主",
"market.install.config.placeholder": "输入配置值",
+ "market.install.config.placeholderWithParam": "输入 {{param}}",
"market.install.mcp.description": "该智能体需要以下 MCP 服务器。请安装或配置它们。",
"market.install.mcp.installed": "已安装",
"market.install.mcp.notInstalled": "未安装",
@@ -1303,6 +1305,13 @@
"market.install.success.nameRegeneratedAndResolved": "智能体名称重新生成成功,且所有冲突已解决",
"market.install.info.notImplemented": "安装功能将在下一阶段实现",
"market.install.success": "智能体安装成功!",
+ "market.install.warning.title": "智能体可能不可用",
+ "market.install.warning.description": "以下问题可能导致智能体不可用:",
+ "market.install.warning.nameConflict": "存在未解决的名称冲突",
+ "market.install.warning.mcpNotInstalled": "存在未安装的MCP服务",
+ "market.install.warning.question": "您确定要继续安装吗?",
+ "market.install.warning.continue": "仍要继续",
+ "market.install.warning.goBack": "返回配置",
"market.error.fetchDetailFailed": "加载智能体详情失败",
"market.error.retry": "重试",
"market.error.timeout.title": "请求超时",
From cbec7a5fbd11c4a7d944db183c6d4552d01d3c9d Mon Sep 17 00:00:00 2001
From: zhizhi <928570418@qq.com>
Date: Thu, 22 Jan 2026 09:49:02 +0800
Subject: [PATCH 29/48] =?UTF-8?q?=E2=9C=A8Develop=20datamate=20core=20part?=
=?UTF-8?q?1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
backend/consts/const.py | 3 +-
backend/consts/model.py | 8 +
.../services/tool_configuration_service.py | 2 +-
backend/services/vectordatabase_service.py | 6 +-
backend/services/voice_service.py | 32 +-
sdk/nexent/__init__.py | 3 +-
sdk/nexent/core/agents/nexent_agent.py | 6 +
.../core/tools/analyze_text_file_tool.py | 11 +-
sdk/nexent/core/tools/datamate_search_tool.py | 194 ++---
sdk/nexent/datamate/__init__.py | 7 +
sdk/nexent/datamate/datamate_client.py | 377 +++++++++
sdk/nexent/vector_database/__init__.py | 5 +
sdk/nexent/vector_database/datamate_core.py | 251 ++++++
.../backend/app/test_knowledge_summary_app.py | 5 +
.../test_conversation_management_service.py | 90 ++-
.../test_tool_configuration_service.py | 13 +
.../services/test_vectordatabase_service.py | 14 +
test/pytest.ini | 2 +-
test/sdk/core/agents/test_nexent_agent.py | 80 ++
test/sdk/core/models/test_openai_llm.py | 61 ++
.../core/tools/test_analyze_text_file_tool.py | 1 -
.../core/tools/test_datamate_search_tool.py | 501 ++++++------
test/sdk/datamate/test_datamate_client.py | 615 +++++++++++++++
test/sdk/vector_database/__init__.py | 0
.../sdk/vector_database/test_datamate_core.py | 157 ++++
.../test_elasticsearch_core.py | 103 ++-
.../test_elasticsearch_core_coverage.py | 731 ------------------
27 files changed, 2066 insertions(+), 1212 deletions(-)
create mode 100644 sdk/nexent/datamate/__init__.py
create mode 100644 sdk/nexent/datamate/datamate_client.py
create mode 100644 sdk/nexent/vector_database/datamate_core.py
create mode 100644 test/sdk/datamate/test_datamate_client.py
create mode 100644 test/sdk/vector_database/__init__.py
create mode 100644 test/sdk/vector_database/test_datamate_core.py
delete mode 100644 test/sdk/vector_database/test_elasticsearch_core_coverage.py
diff --git a/backend/consts/const.py b/backend/consts/const.py
index a76227614..6fdefdaee 100644
--- a/backend/consts/const.py
+++ b/backend/consts/const.py
@@ -14,6 +14,7 @@
# Vector database providers
class VectorDatabaseType(str, Enum):
ELASTICSEARCH = "elasticsearch"
+ DATAMATE = "datamate"
# Elasticsearch Configuration
@@ -23,7 +24,6 @@ class VectorDatabaseType(str, Enum):
ES_USERNAME = "elastic"
ELASTICSEARCH_SERVICE = os.getenv("ELASTICSEARCH_SERVICE")
-
# Data Processing Service Configuration
DATA_PROCESS_SERVICE = os.getenv("DATA_PROCESS_SERVICE")
CLIP_MODEL_PATH = os.getenv("CLIP_MODEL_PATH")
@@ -253,6 +253,7 @@ class VectorDatabaseType(str, Enum):
TENANT_NAME = "TENANT_NAME"
TENANT_ID = "TENANT_ID"
DEFAULT_GROUP_ID = "DEFAULT_GROUP_ID"
+DATAMATE_URL = "DATAMATE_URL"
# Task Status Constants
TASK_STATUS = {
diff --git a/backend/consts/model.py b/backend/consts/model.py
index 633a1fc82..8a0ef3f13 100644
--- a/backend/consts/model.py
+++ b/backend/consts/model.py
@@ -460,6 +460,14 @@ class MCPConfigRequest(BaseModel):
..., description="Dictionary of MCP server configurations")
+class UpdateKnowledgeListRequest(BaseModel):
+ """Request model for updating user's selected knowledge base list grouped by source"""
+ nexent: Optional[List[str]] = Field(
+ None, description="List of knowledge base index names from nexent source")
+ datamate: Optional[List[str]] = Field(
+ None, description="List of knowledge base index names from datamate source")
+
+
# Tenant Management Data Models
# ---------------------------------------------------------------------------
class TenantCreateRequest(BaseModel):
diff --git a/backend/services/tool_configuration_service.py b/backend/services/tool_configuration_service.py
index 24ca69ce5..bd7ab8ffd 100644
--- a/backend/services/tool_configuration_service.py
+++ b/backend/services/tool_configuration_service.py
@@ -502,7 +502,7 @@ def _validate_local_tool(
user_id: User ID for knowledge base tools (optional)
Returns:
- Dict[str, Any]: The actual result returned by the tool's forward method,
+ Dict[str, Any]: The actual result returned by the tool's forward method,
serving as proof that the tool works correctly
Raises:
diff --git a/backend/services/vectordatabase_service.py b/backend/services/vectordatabase_service.py
index 92d7da368..4dd8070e4 100644
--- a/backend/services/vectordatabase_service.py
+++ b/backend/services/vectordatabase_service.py
@@ -23,8 +23,9 @@
from nexent.core.models.embedding_model import OpenAICompatibleEmbedding, JinaEmbedding, BaseEmbedding
from nexent.vector_database.base import VectorDatabaseCore
from nexent.vector_database.elasticsearch_core import ElasticSearchCore
+from nexent.vector_database.datamate_core import DataMateCore
-from consts.const import ES_API_KEY, ES_HOST, LANGUAGE, VectorDatabaseType, IS_SPEED_MODE
+from consts.const import DATAMATE_URL, ES_API_KEY, ES_HOST, LANGUAGE, VectorDatabaseType, IS_SPEED_MODE
from consts.model import ChunkCreateRequest, ChunkUpdateRequest
from database.attachment_db import delete_file
from database.knowledge_db import (
@@ -111,6 +112,9 @@ def get_vector_db_core(
ssl_show_warn=False,
)
+ if db_type == VectorDatabaseType.DATAMATE:
+ return DataMateCore(base_url=DATAMATE_URL)
+
raise ValueError(f"Unsupported vector database type: {db_type}")
diff --git a/backend/services/voice_service.py b/backend/services/voice_service.py
index 0bffec895..05dba6231 100644
--- a/backend/services/voice_service.py
+++ b/backend/services/voice_service.py
@@ -48,10 +48,10 @@ def __init__(self):
async def start_stt_streaming_session(self, websocket) -> None:
"""
Start STT streaming session
-
+
Args:
websocket: WebSocket connection for real-time audio streaming
-
+
Raises:
STTConnectionException: If STT streaming fails
"""
@@ -65,20 +65,20 @@ async def start_stt_streaming_session(self, websocket) -> None:
async def generate_tts_speech(self, text: str, stream: bool = True) -> Any:
"""
Generate TTS speech from text
-
+
Args:
text: Text to convert to speech
stream: Whether to stream the audio or return complete audio
-
+
Returns:
Audio data (streaming or complete)
-
+
Raises:
TTSConnectionException: If TTS generation fails
"""
if not text:
raise VoiceServiceException("No text provided for TTS generation")
-
+
try:
logger.info(f"Generating TTS speech for text: {text[:50]}...")
speech_result = await self.tts_model.generate_speech(text, stream=stream)
@@ -90,11 +90,11 @@ async def generate_tts_speech(self, text: str, stream: bool = True) -> Any:
async def stream_tts_to_websocket(self, websocket, text: str) -> None:
"""
Stream TTS audio to WebSocket with proper error handling and fallback
-
+
Args:
websocket: WebSocket connection to stream to
text: Text to convert to speech
-
+
Raises:
TTSConnectionException: If TTS service connection fails
VoiceServiceException: If TTS streaming fails
@@ -142,10 +142,10 @@ async def stream_tts_to_websocket(self, websocket, text: str) -> None:
async def check_stt_connectivity(self) -> bool:
"""
Check STT service connectivity
-
+
Returns:
bool: True if STT service is connected, False otherwise
-
+
Raises:
STTConnectionException: If connectivity check fails
"""
@@ -165,10 +165,10 @@ async def check_stt_connectivity(self) -> bool:
async def check_tts_connectivity(self) -> bool:
"""
Check TTS service connectivity
-
+
Returns:
bool: True if TTS service is connected, False otherwise
-
+
Raises:
TTSConnectionException: If connectivity check fails
"""
@@ -188,13 +188,13 @@ async def check_tts_connectivity(self) -> bool:
async def check_voice_connectivity(self, model_type: str) -> bool:
"""
Check voice service connectivity based on model type
-
+
Args:
model_type: Type of model to check ('stt' or 'tts')
-
+
Returns:
bool: True if the specified service is connected, False otherwise
-
+
Raises:
VoiceServiceException: If model_type is invalid
STTConnectionException: If STT connectivity check fails
@@ -222,7 +222,7 @@ async def check_voice_connectivity(self, model_type: str) -> bool:
def get_voice_service() -> VoiceService:
"""
Get the global voice service instance
-
+
Returns:
VoiceService: The global voice service instance
"""
diff --git a/sdk/nexent/__init__.py b/sdk/nexent/__init__.py
index a7242e554..425f820fb 100644
--- a/sdk/nexent/__init__.py
+++ b/sdk/nexent/__init__.py
@@ -1,9 +1,10 @@
from .core import *
from .data_process import *
+from .datamate import *
from .memory import *
from .storage import *
from .vector_database import *
from .container import *
-__all__ = ["core", "data_process", "memory", "storage", "vector_database", "container"]
\ No newline at end of file
+__all__ = ["core", "data_process", "memory", "storage", "vector_database", "container", "datamate"]
\ No newline at end of file
diff --git a/sdk/nexent/core/agents/nexent_agent.py b/sdk/nexent/core/agents/nexent_agent.py
index 290dfb45e..12d7737df 100644
--- a/sdk/nexent/core/agents/nexent_agent.py
+++ b/sdk/nexent/core/agents/nexent_agent.py
@@ -89,6 +89,12 @@ def create_local_tool(self, tool_config: ToolConfig):
name_resolver = tool_config.metadata.get(
"name_resolver", None) if tool_config.metadata else None
tools_obj.name_resolver = {} if name_resolver is None else name_resolver
+ elif class_name == "DataMateSearchTool":
+ tools_obj = tool_class(**params)
+ tools_obj.observer = self.observer
+ index_names = tool_config.metadata.get(
+ "index_names", None) if tool_config.metadata else None
+ tools_obj.index_names = [] if index_names is None else index_names
elif class_name == "AnalyzeTextFileTool":
tools_obj = tool_class(observer=self.observer,
llm_model=tool_config.metadata.get("llm_model", []),
diff --git a/sdk/nexent/core/tools/analyze_text_file_tool.py b/sdk/nexent/core/tools/analyze_text_file_tool.py
index 43cecb742..78b78543d 100644
--- a/sdk/nexent/core/tools/analyze_text_file_tool.py
+++ b/sdk/nexent/core/tools/analyze_text_file_tool.py
@@ -26,14 +26,14 @@
class AnalyzeTextFileTool(Tool):
"""Tool for analyzing text file content using a large language model"""
-
+
name = "analyze_text_file"
description = (
"Extract content from text files and analyze them using a large language model based on your query. "
"Supports multiple files from S3 URLs (s3://bucket/key or /bucket/key), HTTP, and HTTPS URLs. "
"The tool will extract the text content from each file and return an analysis based on your question."
)
-
+
inputs = {
"file_url_list": {
"type": "array",
@@ -75,6 +75,7 @@ def __init__(
self.llm_model = llm_model
self.data_process_service_url = data_process_service_url
self.mm = LoadSaveObjectManager(storage_client=self.storage_client)
+ self.time_out = 60 * 5
self.running_prompt_zh = "正在分析文件..."
self.running_prompt_en = "Analyzing file..."
@@ -137,7 +138,7 @@ def _forward_impl(
analysis_results.append(str(analysis_error))
return analysis_results
-
+
except Exception as e:
logger.error(f"Error analyzing text file: {str(e)}", exc_info=True)
error_msg = f"Error analyzing text file: {str(e)}"
@@ -160,9 +161,9 @@ def process_text_file(self, filename: str, file_content: bytes,) -> str:
}
data = {
'chunking_strategy': 'basic',
- 'timeout': 60
+ 'timeout': self.time_out,
}
- with httpx.Client(timeout=60) as client:
+ with httpx.Client(timeout=self.time_out) as client:
response = client.post(api_url, files=files, data=data)
if response.status_code == 200:
diff --git a/sdk/nexent/core/tools/datamate_search_tool.py b/sdk/nexent/core/tools/datamate_search_tool.py
index bf1009269..60eb0415d 100644
--- a/sdk/nexent/core/tools/datamate_search_tool.py
+++ b/sdk/nexent/core/tools/datamate_search_tool.py
@@ -1,19 +1,27 @@
import json
import logging
-from typing import List, Optional
+from typing import Optional, List, Union
-import httpx
from pydantic import Field
from smolagents.tools import Tool
+from ...vector_database import DataMateCore
from ..utils.observer import MessageObserver, ProcessType
from ..utils.tools_common_message import SearchResultTextMessage, ToolCategory, ToolSign
-
# Get logger instance
logger = logging.getLogger("datamate_search_tool")
+def _normalize_index_names(index_names: Optional[Union[str, List[str]]]) -> List[str]:
+ """Normalize index_names to list; accept single string and keep None as empty list."""
+ if index_names is None:
+ return []
+ if isinstance(index_names, str):
+ return [index_names]
+ return list(index_names)
+
+
class DataMateSearchTool(Tool):
"""DataMate knowledge base search tool"""
name = "datamate_search_tool"
@@ -41,6 +49,11 @@ class DataMateSearchTool(Tool):
"default": 0.2,
"nullable": True,
},
+ "index_names": {
+ "type": "array",
+ "description": "The list of knowledge base names to search (supports user-facing knowledge_name or internal index_name). If not provided, will search all available knowledge bases.",
+ "nullable": True,
+ },
"kb_page": {
"type": "integer",
"description": "Page index when listing knowledge bases from DataMate.",
@@ -64,7 +77,10 @@ def __init__(
self,
server_ip: str = Field(description="DataMate server IP or hostname"),
server_port: int = Field(description="DataMate server port"),
- observer: MessageObserver = Field(description="Message observer", default=None, exclude=True),
+ index_names: List[str] = Field(
+ description="The list of index names to search", default=None, exclude=True),
+ observer: MessageObserver = Field(
+ description="Message observer", default=None, exclude=True),
):
"""Initialize the DataMateSearchTool.
@@ -79,14 +95,20 @@ def __init__(
raise ValueError("server_ip is required for DataMateSearchTool")
if not isinstance(server_port, int) or not (1 <= server_port <= 65535):
- raise ValueError("server_port must be an integer between 1 and 65535")
+ raise ValueError(
+ "server_port must be an integer between 1 and 65535")
# Store raw host and port
self.server_ip = server_ip.strip()
self.server_port = server_port
+ self.index_names = [] if index_names is None else index_names
# Build base URL: http://host:port
- self.server_base_url = f"http://{self.server_ip}:{self.server_port}".rstrip("/")
+ self.server_base_url = f"http://{self.server_ip}:{self.server_port}".rstrip(
+ "/")
+
+ # Initialize DataMate vector database core
+ self.datamate_core = DataMateCore(base_url=self.server_base_url)
self.kb_page = 0
self.kb_page_size = 20
@@ -101,6 +123,7 @@ def forward(
query: str,
top_k: int = 10,
threshold: float = 0.2,
+ index_names: Union[str, List[str], None] = None,
kb_page: int = 0,
kb_page_size: int = 20,
) -> str:
@@ -110,6 +133,7 @@ def forward(
query: Search query text.
top_k: Optional override for maximum number of search results.
threshold: Optional override for similarity threshold.
+ index_names: The list of knowledge base names to search (supports user-facing knowledge_name or internal index_name). If not provided, will search all available knowledge bases.
kb_page: Optional override for knowledge base list page index.
kb_page_size: Optional override for knowledge base list page size.
"""
@@ -122,25 +146,36 @@ def forward(
running_prompt = self.running_prompt_zh if self.observer.lang == "zh" else self.running_prompt_en
self.observer.add_message("", ProcessType.TOOL, running_prompt)
card_content = [{"icon": "search", "text": query}]
- self.observer.add_message("", ProcessType.CARD, json.dumps(card_content, ensure_ascii=False))
+ self.observer.add_message("", ProcessType.CARD, json.dumps(
+ card_content, ensure_ascii=False))
logger.info(
f"DataMateSearchTool called with query: '{query}', base_url: '{self.server_base_url}', "
- f"top_k: {top_k}, threshold: {threshold}"
+ f"top_k: {top_k}, threshold: {threshold}, index_names: {index_names}"
)
try:
- # Step 1: Get knowledge base list
- knowledge_base_ids = self._get_knowledge_base_list()
- if not knowledge_base_ids:
- return json.dumps("No knowledge base found. No relevant information found.", ensure_ascii=False)
-
- # Step 2: Retrieve knowledge base content
- kb_search_results = self._retrieve_knowledge_base_content(query, knowledge_base_ids, top_k, threshold
- )
-
- if not kb_search_results:
- raise Exception("No results found! Try a less restrictive/shorter query.")
+ # Step 1: Determine knowledge base IDs to search
+ # Use provided index_names if available, otherwise use default
+ knowledge_base_ids = _normalize_index_names(
+ index_names if index_names is not None else self.index_names)
+
+ if len(knowledge_base_ids) == 0:
+ return json.dumps("No knowledge base selected. No relevant information found.", ensure_ascii=False)
+
+ # Step 2: Retrieve knowledge base content using DataMateCore hybrid search
+ kb_search_results = []
+ for knowledge_base_id in knowledge_base_ids:
+ kb_search = self.datamate_core.hybrid_search(
+ query_text=query,
+ index_names=[knowledge_base_id],
+ top_k=top_k,
+ weight_accurate=threshold,
+ )
+ if not kb_search:
+ raise Exception(
+ "No results found! Try a less restrictive/shorter query.")
+ kb_search_results.extend(kb_search)
# Format search results
search_results_json = [] # Organize search results into a unified format
@@ -149,9 +184,11 @@ def forward(
# Extract fields from DataMate API response
entity_data = single_search_result.get("entity", {})
metadata = self._parse_metadata(entity_data.get("metadata"))
- dataset_id = self._extract_dataset_id(metadata.get("absolute_directory_path", ""))
+ dataset_id = self._extract_dataset_id(
+ metadata.get("absolute_directory_path", ""))
file_id = metadata.get("original_file_id")
- download_url = self._build_file_download_url(dataset_id, file_id)
+ download_url = self.datamate_core.client.build_file_download_url(
+ dataset_id, file_id)
score_details = entity_data.get("scoreDetails", {}) or {}
score_details.update({
@@ -176,14 +213,17 @@ def forward(
)
search_results_json.append(search_result_message.to_dict())
- search_results_return.append(search_result_message.to_model_dict())
+ search_results_return.append(
+ search_result_message.to_model_dict())
self.record_ops += len(search_results_return)
# Record the detailed content of this search
if self.observer:
- search_results_data = json.dumps(search_results_json, ensure_ascii=False)
- self.observer.add_message("", ProcessType.SEARCH_CONTENT, search_results_data)
+ search_results_data = json.dumps(
+ search_results_json, ensure_ascii=False)
+ self.observer.add_message(
+ "", ProcessType.SEARCH_CONTENT, search_results_data)
return json.dumps(search_results_return, ensure_ascii=False)
except Exception as e:
@@ -191,100 +231,6 @@ def forward(
logger.error(error_msg)
raise Exception(error_msg)
- def _get_knowledge_base_list(self) -> List[str]:
- """Get knowledge base list from DataMate API.
-
- Returns:
- List[str]: List of knowledge base IDs.
- """
- try:
- url = f"{self.server_base_url}/api/knowledge-base/list"
- payload = {"page": self.kb_page, "size": self.kb_page_size}
-
- with httpx.Client(timeout=30) as client:
- response = client.post(url, json=payload)
-
- if response.status_code != 200:
- error_detail = (
- response.json().get("detail", "unknown error")
- if response.headers.get("content-type", "").startswith("application/json")
- else response.text
- )
- raise Exception(f"Failed to get knowledge base list (status {response.status_code}): {error_detail}")
-
- result = response.json()
- # Extract knowledge base IDs from response
- # Assuming the response structure contains a list of knowledge bases with 'id' field
- data = result.get("data", {})
- knowledge_bases = data.get("content", []) if data else []
-
- knowledge_base_ids = []
- for kb in knowledge_bases:
- kb_id = kb.get("id")
- chunk_count = kb.get("chunkCount")
- if kb_id and chunk_count:
- knowledge_base_ids.append(str(kb_id))
-
- logger.info(f"Retrieved {len(knowledge_base_ids)} knowledge base(s): {knowledge_base_ids}")
- return knowledge_base_ids
-
- except httpx.TimeoutException:
- raise Exception("Timeout while getting knowledge base list from DataMate API")
- except httpx.RequestError as e:
- raise Exception(f"Request error while getting knowledge base list: {str(e)}")
- except Exception as e:
- raise Exception(f"Error getting knowledge base list: {str(e)}")
-
- def _retrieve_knowledge_base_content(
- self, query: str, knowledge_base_ids: List[str], top_k: int, threshold: float
- ) -> List[dict]:
- """Retrieve knowledge base content from DataMate API.
-
- Args:
- query (str): Search query.
- knowledge_base_ids (List[str]): List of knowledge base IDs to search.
- top_k (int): Maximum number of results to return.
- threshold (float): Similarity threshold.
-
- Returns:
- List[dict]: List of search results.
- """
- search_results = []
- for knowledge_base_id in knowledge_base_ids:
- try:
- url = f"{self.server_base_url}/api/knowledge-base/retrieve"
- payload = {
- "query": query,
- "topK": top_k,
- "threshold": threshold,
- "knowledgeBaseIds": [knowledge_base_id],
- }
-
- with httpx.Client(timeout=60) as client:
- response = client.post(url, json=payload)
-
- if response.status_code != 200:
- error_detail = (
- response.json().get("detail", "unknown error")
- if response.headers.get("content-type", "").startswith("application/json")
- else response.text
- )
- raise Exception(
- f"Failed to retrieve knowledge base content (status {response.status_code}): {error_detail}")
-
- result = response.json()
- # Extract search results from response
- for data in result.get("data", {}):
- search_results.append(data)
- except httpx.TimeoutException:
- raise Exception("Timeout while retrieving knowledge base content from DataMate API")
- except httpx.RequestError as e:
- raise Exception(f"Request error while retrieving knowledge base content: {str(e)}")
- except Exception as e:
- raise Exception(f"Error retrieving knowledge base content: {str(e)}")
- logger.info(f"Retrieved {len(search_results)} search result(s)")
- return search_results
-
@staticmethod
def _parse_metadata(metadata_raw: Optional[str]) -> dict:
"""Parse metadata payload safely."""
@@ -295,7 +241,8 @@ def _parse_metadata(metadata_raw: Optional[str]) -> dict:
try:
return json.loads(metadata_raw)
except (json.JSONDecodeError, TypeError):
- logger.warning("Failed to parse metadata payload, falling back to empty metadata.")
+ logger.warning(
+ "Failed to parse metadata payload, falling back to empty metadata.")
return {}
@staticmethod
@@ -303,11 +250,6 @@ def _extract_dataset_id(absolute_path: str) -> str:
"""Extract dataset identifier from an absolute directory path."""
if not absolute_path:
return ""
- segments = [segment for segment in absolute_path.strip("/").split("/") if segment]
+ segments = [segment for segment in absolute_path.strip(
+ "/").split("/") if segment]
return segments[-1] if segments else ""
-
- def _build_file_download_url(self, dataset_id: str, file_id: str) -> str:
- """Build the download URL for a dataset file."""
- if not (self.server_base_url and dataset_id and file_id):
- return ""
- return f"{self.server_base_url}/api/data-management/datasets/{dataset_id}/files/{file_id}/download"
\ No newline at end of file
diff --git a/sdk/nexent/datamate/__init__.py b/sdk/nexent/datamate/__init__.py
new file mode 100644
index 000000000..c5a345632
--- /dev/null
+++ b/sdk/nexent/datamate/__init__.py
@@ -0,0 +1,7 @@
+"""
+DataMate SDK client for interacting with DataMate knowledge base APIs.
+"""
+from .datamate_client import DataMateClient
+
+__all__ = ["DataMateClient"]
+
diff --git a/sdk/nexent/datamate/datamate_client.py b/sdk/nexent/datamate/datamate_client.py
new file mode 100644
index 000000000..ee76625ce
--- /dev/null
+++ b/sdk/nexent/datamate/datamate_client.py
@@ -0,0 +1,377 @@
+"""
+DataMate API client for datamate knowledge base operations.
+
+This SDK provides a unified interface for interacting with DataMate knowledge base APIs,
+including listing knowledge bases, retrieving files, and retrieving content.
+"""
+import logging
+from typing import Dict, List, Optional, Any
+import httpx
+
+logger = logging.getLogger("datamate_client")
+
+
+class DataMateClient:
+ """
+ Client for interacting with DataMate knowledge base APIs.
+
+ This client encapsulates all DataMate API calls and provides a clean interface
+ for datamate knowledge base operations.
+ """
+
+ def __init__(self, base_url: str, timeout: float = 30.0):
+ """
+ Initialize DataMate client.
+
+ Args:
+ base_url: Base URL of DataMate server (e.g., "http://jasonwang.site:30000")
+ timeout: Request timeout in seconds (default: 30.0)
+ """
+ self.base_url = base_url.rstrip("/")
+ self.timeout = timeout
+ logger.info(f"Initialized DataMateClient with base_url: {self.base_url}")
+
+ def _build_url(self, path: str) -> str:
+ """Build full URL from path."""
+ if path.startswith("/"):
+ return f"{self.base_url}{path}"
+ return f"{self.base_url}/{path}"
+
+ def _build_headers(self, authorization: Optional[str] = None) -> Dict[str, str]:
+ """
+ Build request headers with optional authorization.
+
+ Args:
+ authorization: Optional authorization header value
+
+ Returns:
+ Dictionary of headers
+ """
+ headers = {}
+ if authorization:
+ headers["Authorization"] = authorization
+ return headers
+
+ def _handle_error_response(self, response: httpx.Response, error_message: str) -> None:
+ """
+ Handle error response and raise appropriate exception.
+
+ Args:
+ response: HTTP response object
+ error_message: Base error message to include in exception (e.g., "Failed to get knowledge base list")
+
+ Raises:
+ Exception: With detailed error message
+ """
+ error_detail = (
+ response.json().get("detail", "unknown error")
+ if response.headers.get("content-type", "").startswith("application/json")
+ else response.text
+ )
+ raise Exception(f"{error_message} (status {response.status_code}): {error_detail}")
+
+ def _make_request(
+ self,
+ method: str,
+ url: str,
+ headers: Dict[str, str],
+ json: Optional[Dict[str, Any]] = None,
+ timeout: Optional[float] = None,
+ error_message: str = "Request failed"
+ ) -> httpx.Response:
+ """
+ Make HTTP request with error handling.
+
+ Args:
+ method: HTTP method ("GET" or "POST")
+ url: Request URL
+ headers: Request headers
+ json: Optional JSON payload for POST requests
+ timeout: Optional timeout override
+ error_message: Error message to use if request fails
+
+ Returns:
+ HTTP response object
+
+ Raises:
+ Exception: If the request fails (with detailed error message)
+ """
+ request_timeout = timeout if timeout is not None else self.timeout
+
+ with httpx.Client(timeout=request_timeout) as client:
+ if method.upper() == "GET":
+ response = client.get(url, headers=headers)
+ elif method.upper() == "POST":
+ response = client.post(url, json=json, headers=headers)
+ else:
+ raise ValueError(f"Unsupported HTTP method: {method}")
+
+ if response.status_code != 200:
+ self._handle_error_response(response, error_message)
+
+ return response
+
+ def list_knowledge_bases(
+ self,
+ page: int = 0,
+ size: int = 20,
+ authorization: Optional[str] = None
+ ) -> List[Dict[str, Any]]:
+ """
+ Get list of knowledge bases from DataMate.
+
+ Args:
+ page: Page index (default: 0)
+ size: Page size (default: 20)
+ authorization: Optional authorization header
+
+ Returns:
+ List of knowledge base dictionaries with their IDs and metadata.
+
+ Raises:
+ RuntimeError: If the API request fails
+ """
+ try:
+ url = self._build_url("/api/knowledge-base/list")
+ payload = {"page": page, "size": size}
+ headers = self._build_headers(authorization)
+
+ logger.info(f"Fetching DataMate knowledge bases from: {url}, page={page}, size={size}")
+
+ response = self._make_request("POST", url, headers, json=payload, error_message="Failed to get knowledge base list")
+ data = response.json()
+
+ # Extract knowledge base list from response
+ knowledge_bases = []
+ if data.get("data"):
+ knowledge_bases = data.get("data").get("content", [])
+
+ logger.info(f"Successfully fetched {len(knowledge_bases)} knowledge bases from DataMate")
+ return knowledge_bases
+
+ except httpx.HTTPError as e:
+ logger.error(f"HTTP error while fetching DataMate knowledge bases: {str(e)}")
+ raise RuntimeError(f"Failed to fetch DataMate knowledge bases: {str(e)}")
+ except Exception as e:
+ logger.error(f"Unexpected error while fetching DataMate knowledge bases: {str(e)}")
+ raise RuntimeError(f"Failed to fetch DataMate knowledge bases: {str(e)}")
+
+ def get_knowledge_base_files(
+ self,
+ knowledge_base_id: str,
+ authorization: Optional[str] = None
+ ) -> List[Dict[str, Any]]:
+ """
+ Get file list for a specific DataMate knowledge base.
+
+ Args:
+ knowledge_base_id: The ID of the knowledge base
+ authorization: Optional authorization header
+
+ Returns:
+ List of file dictionaries with name, status, size, upload_date, etc.
+
+ Raises:
+ RuntimeError: If the API request fails
+ """
+ try:
+ url = self._build_url(f"/api/knowledge-base/{knowledge_base_id}/files")
+ logger.info(f"Fetching files for DataMate knowledge base {knowledge_base_id} from: {url}")
+
+ headers = self._build_headers(authorization)
+ response = self._make_request("GET", url, headers, error_message="Failed to get knowledge base files")
+ data = response.json()
+
+ # Extract file list from response
+ files = []
+ if data.get("data"):
+ files = data.get("data").get("content", [])
+
+ logger.info(f"Successfully fetched {len(files)} files for datamate knowledge base {knowledge_base_id}")
+ return files
+
+ except httpx.HTTPError as e:
+ logger.error(f"HTTP error while fetching files for datamate knowledge base {knowledge_base_id}: {str(e)}")
+ raise RuntimeError(f"Failed to fetch files for datamate knowledge base {knowledge_base_id}: {str(e)}")
+ except Exception as e:
+ logger.error(f"Unexpected error while fetching files for datamate knowledge base {knowledge_base_id}: {str(e)}")
+ raise RuntimeError(f"Failed to fetch files for datamate knowledge base {knowledge_base_id}: {str(e)}")
+
+ def get_knowledge_base_info(
+ self,
+ knowledge_base_id: str,
+ authorization: Optional[str] = None
+ ) -> Dict[str, Any]:
+ """
+ Get details for a specific DataMate knowledge base.
+
+ Args:
+ knowledge_base_id: The ID of the knowledge base
+ authorization: Optional authorization header
+
+ Returns:
+ Dictionary containing knowledge base details.
+
+ Raises:
+ RuntimeError: If the API request fails
+ """
+ try:
+ url = self._build_url(f"/api/knowledge-base/{knowledge_base_id}")
+ logger.info(f"Fetching details for DataMate knowledge base {knowledge_base_id} from: {url}")
+
+ headers = self._build_headers(authorization)
+ response = self._make_request("GET", url, headers, error_message="Failed to get knowledge base details")
+ data = response.json()
+
+ # Extract knowledge base details from response
+ knowledge_base = data.get("data", {})
+
+ logger.info(f"Successfully fetched details for datamate knowledge base {knowledge_base_id}")
+ return knowledge_base
+
+ except httpx.HTTPError as e:
+ logger.error(f"HTTP error while fetching details for datamate knowledge base {knowledge_base_id}: {str(e)}")
+ raise RuntimeError(f"Failed to fetch details for datamate knowledge base {knowledge_base_id}: {str(e)}")
+ except Exception as e:
+ logger.error(f"Unexpected error while fetching details for datamate knowledge base {knowledge_base_id}: {str(e)}")
+ raise RuntimeError(f"Failed to fetch details for datamate knowledge base {knowledge_base_id}: {str(e)}")
+
+ def retrieve_knowledge_base(
+ self,
+ query: str,
+ knowledge_base_ids: List[str],
+ top_k: int = 10,
+ threshold: float = 0.2,
+ authorization: Optional[str] = None
+ ) -> List[Dict[str, Any]]:
+ """
+ Retrieve content in DataMate knowledge bases.
+
+ Args:
+ query: Retrieve query text
+ knowledge_base_ids: List of knowledge base IDs to retrieve
+ top_k: Maximum number of results to return (default: 10)
+ threshold: Similarity threshold (default: 0.2)
+ authorization: Optional authorization header
+
+ Returns:
+ List of retrieve result dictionaries
+
+ Raises:
+ RuntimeError: If the API request fails
+ """
+ try:
+ url = self._build_url("/api/knowledge-base/retrieve")
+ payload = {
+ "query": query,
+ "topK": top_k,
+ "threshold": threshold,
+ "knowledgeBaseIds": knowledge_base_ids,
+ }
+
+ headers = self._build_headers(authorization)
+
+ logger.info(
+ f"Retrieving DataMate knowledge bases: query='{query}', "
+ f"knowledge_base_ids={knowledge_base_ids}, top_k={top_k}, threshold={threshold}"
+ )
+
+ # Longer timeout for retrieve operation
+ response = self._make_request(
+ "POST", url, headers, json=payload, timeout=self.timeout * 2,
+ error_message="Failed to retrieve knowledge base content"
+ )
+
+ search_results = []
+ data = response.json()
+ # Extract search results from response
+ for result in data.get("data", {}):
+ search_results.append(result)
+
+ logger.info(f"Successfully retrieved {len(search_results)} retrieve result(s)")
+ return search_results
+
+ except httpx.HTTPError as e:
+ logger.error(f"HTTP error while retrieving DataMate knowledge bases: {str(e)}")
+ raise RuntimeError(f"Failed to retrieve DataMate knowledge bases: {str(e)}")
+ except Exception as e:
+ logger.error(f"Unexpected error while retrieving DataMate knowledge bases: {str(e)}")
+ raise RuntimeError(f"Failed to retrieve DataMate knowledge bases: {str(e)}")
+
+ def build_file_download_url(self, dataset_id: str, file_id: str) -> str:
+ """
+ Build download URL for a DataMate file.
+
+ Args:
+ dataset_id: Dataset ID
+ file_id: File ID
+
+ Returns:
+ Full download URL for the file
+ """
+ if not (dataset_id and file_id):
+ return ""
+ return f"{self.base_url}/api/data-management/datasets/{dataset_id}/files/{file_id}/download"
+
+ def sync_all_knowledge_bases(
+ self,
+ authorization: Optional[str] = None
+ ) -> Dict[str, Any]:
+ """
+ Sync all DataMate knowledge bases and their files.
+
+ Args:
+ authorization: Optional authorization header
+
+ Returns:
+ Dictionary containing knowledge bases with their file lists.
+ Format: {
+ "success": bool,
+ "knowledge_bases": [
+ {
+ "knowledge_base": {...},
+ "files": [...],
+ "error": str (optional)
+ }
+ ],
+ "total_count": int
+ }
+ """
+ try:
+ # Fetch all knowledge bases
+ knowledge_bases = self.list_knowledge_bases(authorization=authorization)
+
+ # Fetch files for each knowledge base
+ result = []
+ for kb in knowledge_bases:
+ kb_id = kb.get("id")
+
+ try:
+ files = self.get_knowledge_base_files(str(kb_id), authorization=authorization)
+ result.append({
+ "knowledge_base": kb,
+ "files": files,
+ })
+ except Exception as e:
+ logger.error(f"Failed to fetch files for datamate knowledge base {kb_id}: {str(e)}")
+ # Continue with other knowledge bases even if one fails
+ result.append({
+ "knowledge_base": kb,
+ "files": [],
+ "error": str(e),
+ })
+
+ return {
+ "success": True,
+ "knowledge_bases": result,
+ "total_count": len(result),
+ }
+
+ except Exception as e:
+ logger.error(f"Error syncing DataMate knowledge bases: {str(e)}")
+ return {
+ "success": False,
+ "error": str(e),
+ "knowledge_bases": [],
+ "total_count": 0,
+ }
diff --git a/sdk/nexent/vector_database/__init__.py b/sdk/nexent/vector_database/__init__.py
index e69de29bb..9c811f9c6 100644
--- a/sdk/nexent/vector_database/__init__.py
+++ b/sdk/nexent/vector_database/__init__.py
@@ -0,0 +1,5 @@
+"""Vector database SDK public exports."""
+
+from .datamate_core import DataMateCore
+
+__all__ = ["DataMateCore"]
diff --git a/sdk/nexent/vector_database/datamate_core.py b/sdk/nexent/vector_database/datamate_core.py
new file mode 100644
index 000000000..20da8ffb3
--- /dev/null
+++ b/sdk/nexent/vector_database/datamate_core.py
@@ -0,0 +1,251 @@
+"""
+DataMate adapter implementing the VectorDatabaseCore interface.
+
+Not all operations are supported by the DataMate HTTP API. Unsupported methods
+raise NotImplementedError to make limitations explicit.
+"""
+import logging
+from datetime import datetime
+from typing import Any, Dict, List, Optional, Callable, Tuple
+
+from .base import VectorDatabaseCore
+from ..datamate.datamate_client import DataMateClient
+from ..core.models.embedding_model import BaseEmbedding
+
+logger = logging.getLogger("datamate_core")
+
+
+def _parse_timestamp(timestamp: Any, default: int = 0) -> int:
+ """
+ Parse timestamp from various formats to milliseconds since epoch.
+
+ Args:
+ timestamp: Timestamp value (int, str, or None)
+ default: Default value if parsing fails
+
+ Returns:
+ Timestamp in milliseconds since epoch
+ """
+ if timestamp is None:
+ return default
+
+ if isinstance(timestamp, int):
+ # If already an int, assume it's in milliseconds (or seconds if < 1e10)
+ if timestamp < 1e10:
+ return timestamp * 1000
+ return timestamp
+
+ if isinstance(timestamp, str):
+ try:
+ # Try ISO format
+ dt = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
+ return int(dt.timestamp() * 1000)
+ except Exception:
+ try:
+ # Try as integer string
+ ts_int = int(timestamp)
+ if ts_int < 1e10:
+ return ts_int * 1000
+ return ts_int
+ except Exception:
+ return default
+
+ return default
+
+
+class DataMateCore(VectorDatabaseCore):
+ """VectorDatabaseCore implementation backed by the DataMate REST API."""
+
+ def __init__(self, base_url: str, timeout: float = 30.0):
+ self.client = DataMateClient(base_url=base_url, timeout=timeout)
+
+ # ---- INDEX MANAGEMENT ----
+ def create_index(self, index_name: str, embedding_dim: Optional[int] = None) -> bool:
+ """DataMate API does not support index creation via SDK."""
+ _ = embedding_dim
+ raise NotImplementedError("DataMate SDK does not support creating indices.")
+
+ def delete_index(self, index_name: str) -> bool:
+ """DataMate API does not support deleting indices via SDK."""
+ raise NotImplementedError("DataMate SDK does not support deleting indices.")
+
+ def get_user_indices(self, index_pattern: str = "*") -> List[str]:
+ """Return DataMate knowledge base IDs as index identifiers."""
+ _ = index_pattern
+ knowledge_bases = self.client.list_knowledge_bases()
+ return [str(kb.get("id")) for kb in knowledge_bases if kb.get("id") is not None]
+
+ def check_index_exists(self, index_name: str) -> bool:
+ """Check existence by knowledge base id."""
+ return index_name in self.get_user_indices()
+
+ # ---- DOCUMENT OPERATIONS ----
+ def vectorize_documents(
+ self,
+ index_name: str,
+ embedding_model: BaseEmbedding,
+ documents: List[Dict[str, Any]],
+ batch_size: int = 64,
+ content_field: str = "content",
+ embedding_batch_size: int = 10,
+ progress_callback: Optional[Callable[[int, int], None]] = None,
+ ) -> int:
+ _ = (
+ index_name,
+ embedding_model,
+ documents,
+ batch_size,
+ content_field,
+ embedding_batch_size,
+ progress_callback,
+ )
+ raise NotImplementedError("DataMate SDK does not support direct document ingestion.")
+
+ def delete_documents(self, index_name: str, path_or_url: str) -> int:
+ _ = (index_name, path_or_url)
+ raise NotImplementedError("DataMate SDK does not support deleting documents.")
+
+ def get_index_chunks(
+ self,
+ index_name: str,
+ page: Optional[int] = None,
+ page_size: Optional[int] = None,
+ path_or_url: Optional[str] = None,
+ ) -> Dict[str, Any]:
+ _ = (page, page_size, path_or_url)
+ files = self.client.get_knowledge_base_files(index_name)
+ return {
+ "chunks": files,
+ "total": len(files),
+ "page": page,
+ "page_size": page_size,
+ }
+
+ def create_chunk(self, index_name: str, chunk: Dict[str, Any]) -> Dict[str, Any]:
+ _ = (index_name, chunk)
+ raise NotImplementedError("DataMate SDK does not support creating individual chunks.")
+
+ def update_chunk(self, index_name: str, chunk_id: str, chunk_updates: Dict[str, Any]) -> Dict[str, Any]:
+ _ = (index_name, chunk_id, chunk_updates)
+ raise NotImplementedError("DataMate SDK does not support updating chunks.")
+
+ def delete_chunk(self, index_name: str, chunk_id: str) -> bool:
+ _ = (index_name, chunk_id)
+ raise NotImplementedError("DataMate SDK does not support deleting chunks.")
+
+ def count_documents(self, index_name: str) -> int:
+ files = self.client.get_knowledge_base_files(index_name)
+ return len(files)
+
+ # ---- SEARCH OPERATIONS ----
+ def search(self, index_name: str, query: Dict[str, Any]) -> Dict[str, Any]:
+ _ = (index_name, query)
+ raise NotImplementedError("DataMate SDK does not support raw search API.")
+
+ def multi_search(self, body: List[Dict[str, Any]], index_name: str) -> Dict[str, Any]:
+ _ = (body, index_name)
+ raise NotImplementedError("DataMate SDK does not support multi search API.")
+
+ def accurate_search(self, index_names: List[str], query_text: str, top_k: int = 5) -> List[Dict[str, Any]]:
+ _ = (index_names, query_text, top_k)
+ raise NotImplementedError("DataMate SDK does not support accurate search API.")
+
+ def semantic_search(
+ self, index_names: List[str], query_text: str, embedding_model: BaseEmbedding, top_k: int = 5
+ ) -> List[Dict[str, Any]]:
+ _ = (index_names, query_text, embedding_model, top_k)
+ raise NotImplementedError("DataMate SDK does not support semantic search API.")
+
+ # ---- SEARCH OPERATIONS ----
+ def hybrid_search(
+ self,
+ index_names: List[str],
+ query_text: str,
+ embedding_model: Optional[BaseEmbedding] = None,
+ top_k: int = 10,
+ weight_accurate: float = 0.2,
+ ) -> List[Dict[str, Any]]:
+ """
+ Retrieve content in DataMate knowledge bases.
+
+ Args:
+ index_names: List of knowledge base IDs to retrieve
+ query_text: Retrieve query text
+ embedding_model: Optional embedding model
+ top_k: Maximum number of results to return (default: 10)
+ weight_accurate: Similarity threshold (default: 0.2)
+
+ Returns:
+ List of retrieve result dictionaries
+
+ Raises:
+ RuntimeError: If the API request fails
+ """
+ _ = embedding_model # Explicitly ignored
+ retrieve_knowledge = self.client.retrieve_knowledge_base(query_text, index_names, top_k, weight_accurate)
+ return retrieve_knowledge
+
+ # ---- STATISTICS AND MONITORING ----
+ def get_documents_detail(self, index_name: str) -> List[Dict[str, Any]]:
+ files_list = self.client.get_knowledge_base_files(index_name)
+ results = []
+ for info in files_list:
+ file_info = {
+ "path_or_url": info.get("path_or_url", ""),
+ "file": info.get("fileName", ""),
+ "file_size": info.get("fileSize", ""),
+ "create_time": _parse_timestamp(info.get("createdAt", "")),
+ "chunk_count": info.get("chunkCount", ""),
+ "status": "COMPLETED",
+ "latest_task_id": "",
+ "error_reason": info.get("errMsg", ""),
+ "has_error_info": False,
+ "processed_chunk_num": None,
+ "total_chunk_num": None,
+ "chunks": []
+ }
+ results.append(file_info)
+ return results
+
+ def get_indices_detail(self, index_names: List[str], embedding_dim: Optional[int] = None) -> Tuple[Dict[
+ str, Dict[str, Any]], List[str]]:
+ details: Dict[str, Dict[str, Any]] = {}
+ knowledge_base_names = []
+ for kb_id in index_names:
+ try:
+ # Get knowledge base info and files
+ kb_info = self.client.get_knowledge_base_info(kb_id)
+
+ # Extract data from knowledge base info
+ doc_count = kb_info.get("fileCount") # Number of unique documents (files)
+ knowledge_base_name = kb_info.get("name")
+ knowledge_base_names.append(knowledge_base_name)
+ chunk_count = kb_info.get("chunkCount")
+ store_size = kb_info.get("storeSize", "")
+ process_source = kb_info.get("processSource", "Unstructured")
+ embedding_model = kb_info.get("embedding").get("modelName")
+
+ # Parse timestamps
+ creation_date = _parse_timestamp(kb_info.get("createdAt"))
+ update_date = _parse_timestamp(kb_info.get("updatedAt"))
+
+ # Build base_info dict
+ base_info = {
+ "doc_count": doc_count,
+ "chunk_count": chunk_count,
+ "store_size": str(store_size),
+ "process_source": str(process_source),
+ "embedding_model": str(embedding_model),
+ "embedding_dim": embedding_dim or 1024,
+ "creation_date": creation_date,
+ "update_date": update_date,
+ }
+
+ # Build performance dict (DataMate API may not provide search stats)
+ performance = {"total_search_count": 0, "hit_count": 0}
+
+ details[kb_id] = {"base_info": base_info, "search_performance": performance}
+ except Exception as exc:
+ logger.error(f"Error getting stats for knowledge base {kb_id}: {str(exc)}")
+ details[kb_id] = {"error": str(exc)}
+ return details, knowledge_base_names
diff --git a/test/backend/app/test_knowledge_summary_app.py b/test/backend/app/test_knowledge_summary_app.py
index 80fe99029..722cff1cb 100644
--- a/test/backend/app/test_knowledge_summary_app.py
+++ b/test/backend/app/test_knowledge_summary_app.py
@@ -44,6 +44,11 @@ def __init__(self, *args, **kwargs):
sys.modules['nexent.vector_database'] = vector_db_module
sys.modules['nexent.vector_database.base'] = vector_db_base_module
sys.modules['nexent.vector_database.elasticsearch_core'] = MagicMock()
+# Provide datamate_core module with DataMateCore to satisfy imports like
+# `from nexent.vector_database.datamate_core import DataMateCore`
+datamate_core_module = types.ModuleType("nexent.vector_database.datamate_core")
+datamate_core_module.DataMateCore = MagicMock()
+sys.modules['nexent.vector_database.datamate_core'] = datamate_core_module
# Mock specific classes that are imported
class MockToolConfig:
diff --git a/test/backend/services/test_conversation_management_service.py b/test/backend/services/test_conversation_management_service.py
index 3fdbb6bab..25018c9fd 100644
--- a/test/backend/services/test_conversation_management_service.py
+++ b/test/backend/services/test_conversation_management_service.py
@@ -1,12 +1,22 @@
+from backend.consts.model import MessageRequest, AgentRequest, MessageUnit
+from unittest.mock import patch
+from datetime import datetime
+import asyncio
+import json
+import unittest
import sys
import types
+from unittest.mock import MagicMock
+
def _stub_nexent_openai_model():
# Provide a simple OpenAIModel stub for import-time safety
mod = types.ModuleType("nexent.core.models")
+
class Stub:
def __init__(self, *a, **k):
self.generated = None
+
def generate(self, messages):
# record messages for assertion and return object with content
self.generated = messages
@@ -18,43 +28,51 @@ def generate(self, messages):
# Stub jinja2 to avoid importing the dependency during tests
jinja2_mod = types.ModuleType("jinja2")
+
+
class StrictUndefined:
pass
+
+
class Template:
def __init__(self, text, undefined=None):
self.text = text
+
def render(self, ctx):
# very small render: replace {{content}} occurrence
return self.text.replace("{{content}}", ctx.get("content", ""))
+
+
jinja2_mod.StrictUndefined = StrictUndefined
jinja2_mod.Template = Template
sys.modules["jinja2"] = jinja2_mod
-# Stub nexent.core.agents.agent_model to satisfy imports in consts.model
-agent_model_mod = types.ModuleType("nexent.core.agents.agent_model")
-agent_model_mod.ToolConfig = object
-sys.modules["nexent.core.agents"] = types.ModuleType("nexent.core.agents")
-sys.modules["nexent.core.agents.agent_model"] = agent_model_mod
-# Stub nexent.core.utils.observer ProcessType and MessageObserver used by conversation service
-observer_mod = types.ModuleType("nexent.core.utils.observer")
-observer_mod.MessageObserver = lambda *a, **k: types.SimpleNamespace(add_model_new_token=lambda t: None, add_model_reasoning_content=lambda r: None, flush_remaining_tokens=lambda: None)
-observer_mod.ProcessType = types.SimpleNamespace(MODEL_OUTPUT_CODE=types.SimpleNamespace(value="model_output_code"), MODEL_OUTPUT_THINKING=types.SimpleNamespace(value="model_output_thinking"))
-sys.modules["nexent.core.utils.observer"] = observer_mod
+# Update existing observer mock with ProcessType
+sys.modules["nexent.core.utils.observer"].ProcessType = types.SimpleNamespace(MODEL_OUTPUT_CODE=types.SimpleNamespace(
+ value="model_output_code"), MODEL_OUTPUT_THINKING=types.SimpleNamespace(value="model_output_thinking"))
#
# Stub consts.model to avoid pydantic/email-validator heavy imports during tests.
consts_model_mod = types.ModuleType("consts.model")
+
+
class AgentRequest:
def __init__(self, **kwargs):
for k, v in kwargs.items():
setattr(self, k, v)
+
+
class ConversationResponse:
def __init__(self, code=0, message="", data=None):
self.code = code
self.message = message
self.data = data
+
+
class MessageUnit:
def __init__(self, type="", content=""):
self.type = type
self.content = content
+
+
class MessageRequest:
def __init__(self, conversation_id=None, message_idx=None, role=None, message=None, minio_files=None):
self.conversation_id = conversation_id
@@ -62,6 +80,7 @@ def __init__(self, conversation_id=None, message_idx=None, role=None, message=No
self.role = role
self.message = message
self.minio_files = minio_files
+
def model_dump(self):
return {
"conversation_id": self.conversation_id,
@@ -71,6 +90,7 @@ def model_dump(self):
"minio_files": self.minio_files,
}
+
consts_model_mod.AgentRequest = AgentRequest
consts_model_mod.ConversationResponse = ConversationResponse
consts_model_mod.MessageUnit = MessageUnit
@@ -104,40 +124,36 @@ def __enter__(self):
def __exit__(self, exc_type, exc, tb):
return False
+
db_client_stub.get_db_session = lambda *a, **k: _DummySessionCM()
sys.modules["database.client"] = db_client_stub
# Stub utils.prompt_template_utils to avoid requiring PyYAML
prompt_mod = types.ModuleType("utils.prompt_template_utils")
-prompt_mod.get_generate_title_prompt_template = lambda language="zh": {"USER_PROMPT":"{{content}}", "SYSTEM_PROMPT":"SYS"}
+prompt_mod.get_generate_title_prompt_template = lambda language="zh": {
+ "USER_PROMPT": "{{content}}", "SYSTEM_PROMPT": "SYS"}
sys.modules["utils.prompt_template_utils"] = prompt_mod
-
def test_call_llm_for_title_flattening(monkeypatch):
# Patch tenant_config_manager.get_model_config and prompt template
- monkeypatch.setattr("backend.services.conversation_management_service.tenant_config_manager", types.SimpleNamespace(get_model_config=lambda *a, **k: {"base_url":"u","api_key":"k","model_factory":"modelengine","model_name":"m"}))
- monkeypatch.setattr("backend.services.conversation_management_service.get_generate_title_prompt_template", lambda language="zh": {"USER_PROMPT":"{{content}}", "SYSTEM_PROMPT":"SYS"})
+ monkeypatch.setattr("backend.services.conversation_management_service.tenant_config_manager", types.SimpleNamespace(
+ get_model_config=lambda *a, **k: {"base_url": "u", "api_key": "k", "model_factory": "modelengine", "model_name": "m"}))
+ monkeypatch.setattr("backend.services.conversation_management_service.get_generate_title_prompt_template",
+ lambda language="zh": {"USER_PROMPT": "{{content}}", "SYSTEM_PROMPT": "SYS"})
# Stub get_model_name_from_config to avoid dependency on config utils
- monkeypatch.setattr("backend.services.conversation_management_service.get_model_name_from_config", lambda cfg: cfg.get("model_name", "") if cfg else "")
+ monkeypatch.setattr("backend.services.conversation_management_service.get_model_name_from_config",
+ lambda cfg: cfg.get("model_name", "") if cfg else "")
# Call with some content; expect OpenAIModel.generate to receive flattened messages
- title = call_llm_for_title("some conversation content", tenant_id="t", language="zh")
+ title = call_llm_for_title(
+ "some conversation content", tenant_id="t", language="zh")
assert title == "The Title"
-from backend.consts.model import MessageRequest, AgentRequest, MessageUnit
-import unittest
-import json
-import asyncio
-import os
-from datetime import datetime
-from unittest.mock import patch, MagicMock
# Environment variables are now configured in conftest.py
-
# Mock boto3 and minio client before importing the module under test
-import sys
boto3_mock = MagicMock()
sys.modules['boto3'] = boto3_mock
@@ -145,9 +161,12 @@ def test_call_llm_for_title_flattening(monkeypatch):
# These patches must be started before any imports that use MinioClient
storage_client_mock = MagicMock()
minio_client_mock = MagicMock()
-patch('nexent.storage.storage_client_factory.create_storage_client_from_config', return_value=storage_client_mock).start()
-patch('nexent.storage.minio_config.MinIOStorageConfig.validate', lambda self: None).start()
-patch('backend.database.client.MinioClient', return_value=minio_client_mock).start()
+patch('nexent.storage.storage_client_factory.create_storage_client_from_config',
+ return_value=storage_client_mock).start()
+patch('nexent.storage.minio_config.MinIOStorageConfig.validate',
+ lambda self: None).start()
+patch('backend.database.client.MinioClient',
+ return_value=minio_client_mock).start()
with patch('backend.database.client.MinioClient', return_value=minio_client_mock):
from backend.services.conversation_management_service import (
@@ -188,7 +207,8 @@ def test_save_message_picture_web_invalid_json(self, mock_create_image, mock_cre
conversation_id=456,
message_idx=99,
role="assistant",
- message=[MessageUnit(type="picture_web", content="not a valid json")],
+ message=[MessageUnit(type="picture_web",
+ content="not a valid json")],
minio_files=[]
)
result = save_message(
@@ -200,7 +220,8 @@ def test_get_sources_service_no_id(self):
"""Should return error when both conversation_id and message_id are None."""
result = get_sources_service(None, None, user_id=self.user_id)
self.assertEqual(result['code'], 400)
- self.assertEqual(result['message'], "Must provide conversation_id or message_id parameter")
+ self.assertEqual(
+ result['message'], "Must provide conversation_id or message_id parameter")
@patch('backend.services.conversation_management_service.extract_user_messages')
@patch('backend.services.conversation_management_service.call_llm_for_title')
@@ -209,7 +230,8 @@ def test_get_sources_service_no_id(self):
def test_generate_conversation_title_service_no_title(
self, mock_get_config, mock_update, mock_call_llm, mock_extract
):
- mock_get_config.return_value = {"model_name": "gpt-4", "api_key": "fake"}
+ mock_get_config.return_value = {
+ "model_name": "gpt-4", "api_key": "fake"}
mock_extract.return_value = "content"
mock_call_llm.return_value = None
result = asyncio.run(generate_conversation_title_service(
@@ -431,10 +453,12 @@ def test_save_conversation_assistant(self, mock_save_message):
# Check that consecutive model_output_thinking messages were merged
self.assertEqual(len(request_arg.message), 1)
first_unit = request_arg.message[0]
- unit_type = getattr(first_unit, "type", None) or (first_unit.get("type") if isinstance(first_unit, dict) else None)
+ unit_type = getattr(first_unit, "type", None) or (
+ first_unit.get("type") if isinstance(first_unit, dict) else None)
self.assertEqual(unit_type, "model_output_thinking")
first_unit = request_arg.message[0]
- unit_content = getattr(first_unit, "content", None) or (first_unit.get("content") if isinstance(first_unit, dict) else None)
+ unit_content = getattr(first_unit, "content", None) or (
+ first_unit.get("content") if isinstance(first_unit, dict) else None)
self.assertEqual(unit_content, "Machine learning is a field of AI")
def test_extract_user_messages(self):
diff --git a/test/backend/services/test_tool_configuration_service.py b/test/backend/services/test_tool_configuration_service.py
index 86412ee44..996918352 100644
--- a/test/backend/services/test_tool_configuration_service.py
+++ b/test/backend/services/test_tool_configuration_service.py
@@ -190,6 +190,19 @@ def __init__(self, *args, **kwargs):
pass
+# Provide a mock DataMateCore to satisfy imports in vectordatabase_service
+vector_database_datamate_module = types.ModuleType('nexent.vector_database.datamate_core')
+
+
+class MockDataMateCore(MockVectorDatabaseCore):
+ def __init__(self, *args, **kwargs):
+ pass
+
+vector_database_datamate_module.DataMateCore = MockDataMateCore
+sys.modules['nexent.vector_database.datamate_core'] = vector_database_datamate_module
+setattr(sys.modules['nexent.vector_database'], 'datamate_core', vector_database_datamate_module)
+setattr(sys.modules['nexent.vector_database'], 'DataMateCore', MockDataMateCore)
+
vector_database_base_module.VectorDatabaseCore = MockVectorDatabaseCore
vector_database_elasticsearch_module.ElasticSearchCore = MockElasticSearchCore
sys.modules['nexent.vector_database.base'] = vector_database_base_module
diff --git a/test/backend/services/test_vectordatabase_service.py b/test/backend/services/test_vectordatabase_service.py
index 58706a34c..26e713dbe 100644
--- a/test/backend/services/test_vectordatabase_service.py
+++ b/test/backend/services/test_vectordatabase_service.py
@@ -57,6 +57,7 @@ class _VectorDatabaseCore:
vector_db_base_module.VectorDatabaseCore = _VectorDatabaseCore
sys.modules['nexent.vector_database.base'] = vector_db_base_module
sys.modules['nexent.vector_database.elasticsearch_core'] = MagicMock()
+sys.modules['nexent.vector_database.datamate_core'] = MagicMock()
# Mock nexent.storage module and its submodules before any imports
sys.modules['nexent.storage'] = _create_package_mock('nexent.storage')
storage_factory_module = MagicMock()
@@ -2806,6 +2807,19 @@ def test_get_vector_db_core_unsupported_type(self):
self.assertIn("Unsupported vector database type", str(exc.exception))
+ def test_get_vector_db_core_datamate_type(self):
+ """get_vector_db_core returns DataMateCore for DATAMATE type."""
+ from backend.services.vectordatabase_service import get_vector_db_core
+ from consts.const import VectorDatabaseType, DATAMATE_URL
+
+ with patch('backend.services.vectordatabase_service.DataMateCore') as mock_datamate_core:
+ mock_datamate_core.return_value = MagicMock()
+
+ result = get_vector_db_core(db_type=VectorDatabaseType.DATAMATE)
+
+ mock_datamate_core.assert_called_once_with(base_url=DATAMATE_URL)
+ self.assertEqual(result, mock_datamate_core.return_value)
+
def test_rethrow_or_plain_parses_error_code(self):
"""_rethrow_or_plain rethrows JSON error_code payloads unchanged."""
from backend.services.vectordatabase_service import _rethrow_or_plain
diff --git a/test/pytest.ini b/test/pytest.ini
index c3170b6ad..21e178bdd 100644
--- a/test/pytest.ini
+++ b/test/pytest.ini
@@ -7,4 +7,4 @@ asyncio_default_fixture_loop_scope = function
# Configure warning filters to ignore all warnings
filterwarnings =
# Disable all warnings
- ignore
\ No newline at end of file
+ ignore
diff --git a/test/sdk/core/agents/test_nexent_agent.py b/test/sdk/core/agents/test_nexent_agent.py
index 2c05a19ff..4c1c34b79 100644
--- a/test/sdk/core/agents/test_nexent_agent.py
+++ b/test/sdk/core/agents/test_nexent_agent.py
@@ -1329,6 +1329,86 @@ def test_agent_run_with_observer_with_reset_false(nexent_agent_instance, mock_co
mock_core_agent.run.assert_called_once_with(
"test query", stream=True, reset=False)
+def test_create_local_tool_datamate_search_tool_success(nexent_agent_instance):
+ """Test successful creation of DataMateSearchTool with metadata."""
+ mock_datamate_tool_class = MagicMock()
+ mock_datamate_tool_instance = MagicMock()
+ mock_datamate_tool_class.return_value = mock_datamate_tool_instance
+
+ tool_config = ToolConfig(
+ class_name="DataMateSearchTool",
+ name="datamate_search",
+ description="desc",
+ inputs="{}",
+ output_type="string",
+ params={"top_k": 10, "server_ip": "127.0.0.1", "server_port": 8080},
+ source="local",
+ metadata={
+ "index_names": ["datamate_index1", "datamate_index2"],
+ },
+ )
+
+ original_value = nexent_agent.__dict__.get("DataMateSearchTool")
+ nexent_agent.__dict__["DataMateSearchTool"] = mock_datamate_tool_class
+
+ try:
+ result = nexent_agent_instance.create_local_tool(tool_config)
+ finally:
+ # Restore original value
+ if original_value is not None:
+ nexent_agent.__dict__["DataMateSearchTool"] = original_value
+ elif "DataMateSearchTool" in nexent_agent.__dict__:
+ del nexent_agent.__dict__["DataMateSearchTool"]
+
+ # Verify tool was created with all params
+ mock_datamate_tool_class.assert_called_once_with(
+ top_k=10, server_ip="127.0.0.1", server_port=8080
+ )
+ # Verify excluded parameters were set directly as attributes after instantiation
+ assert result == mock_datamate_tool_instance
+ assert mock_datamate_tool_instance.observer == nexent_agent_instance.observer
+ assert mock_datamate_tool_instance.index_names == ["datamate_index1", "datamate_index2"]
+
+
+
+def test_create_local_tool_datamate_search_tool_with_none_defaults(nexent_agent_instance):
+ """Test DataMateSearchTool creation with None defaults when metadata is missing."""
+ mock_datamate_tool_class = MagicMock()
+ mock_datamate_tool_instance = MagicMock()
+ mock_datamate_tool_class.return_value = mock_datamate_tool_instance
+
+ tool_config = ToolConfig(
+ class_name="DataMateSearchTool",
+ name="datamate_search",
+ description="desc",
+ inputs="{}",
+ output_type="string",
+ params={"top_k": 5, "server_ip": "127.0.0.1", "server_port": 8080},
+ source="local",
+ metadata={}, # No metadata provided
+ )
+
+ original_value = nexent_agent.__dict__.get("DataMateSearchTool")
+ nexent_agent.__dict__["DataMateSearchTool"] = mock_datamate_tool_class
+
+ try:
+ result = nexent_agent_instance.create_local_tool(tool_config)
+ finally:
+ # Restore original value
+ if original_value is not None:
+ nexent_agent.__dict__["DataMateSearchTool"] = original_value
+ elif "DataMateSearchTool" in nexent_agent.__dict__:
+ del nexent_agent.__dict__["DataMateSearchTool"]
+
+ # Verify tool was created with all params
+ mock_datamate_tool_class.assert_called_once_with(
+ top_k=5, server_ip="127.0.0.1", server_port=8080
+ )
+ # Verify excluded parameters were set directly as attributes with None defaults when metadata is missing
+ assert result == mock_datamate_tool_instance
+ assert mock_datamate_tool_instance.observer == nexent_agent_instance.observer
+ assert mock_datamate_tool_instance.index_names == [] # Empty list when None
+
if __name__ == "__main__":
pytest.main([__file__])
diff --git a/test/sdk/core/models/test_openai_llm.py b/test/sdk/core/models/test_openai_llm.py
index 6dbc6bc25..1533f5098 100644
--- a/test/sdk/core/models/test_openai_llm.py
+++ b/test/sdk/core/models/test_openai_llm.py
@@ -5,6 +5,58 @@
# Ensure SDK package is importable by adding sdk/ to sys.path (do not fallback to stubs)
sys.path.insert(0, str(Path(__file__).resolve().parents[4] / "sdk"))
+# Ensure minimal `nexent` package structure exists in sys.modules so string-based
+# patch targets like "nexent.core.models.openai_llm.asyncio.to_thread" can be
+# resolved by unittest.mock during tests that run outside the temporary patch
+# contexts used below.
+_sdk_root = Path(__file__).resolve().parents[4] / "sdk" / "nexent"
+if "nexent" not in sys.modules:
+ _top_pkg = types.ModuleType("nexent")
+ _top_pkg.__path__ = [str(_sdk_root)]
+ sys.modules["nexent"] = _top_pkg
+if "nexent.core" not in sys.modules:
+ _core_pkg = types.ModuleType("nexent.core")
+ _core_pkg.__path__ = [str(_sdk_root / "core")]
+ sys.modules["nexent.core"] = _core_pkg
+if "nexent.core.models" not in sys.modules:
+ _models_pkg = types.ModuleType("nexent.core.models")
+ _models_pkg.__path__ = [str(_sdk_root / "core" / "models")]
+ sys.modules["nexent.core.models"] = _models_pkg
+
+# Ensure the package attributes exist on the top-level `nexent` module so that
+# string-based patch targets (e.g. "nexent.core.models.openai_llm.asyncio.to_thread")
+# resolve via getattr during unittest.mock's import lookup.
+try:
+ top_mod = sys.modules.get("nexent")
+ core_mod = sys.modules.get("nexent.core")
+ models_mod = sys.modules.get("nexent.core.models")
+ if top_mod and core_mod and not hasattr(top_mod, "core"):
+ setattr(top_mod, "core", core_mod)
+ if core_mod and models_mod and not hasattr(core_mod, "models"):
+ setattr(core_mod, "models", models_mod)
+except Exception:
+ # If anything goes wrong, do not fail test import phase; the test will create
+ # the necessary entries later within its patch context.
+ pass
+
+# Ensure the concrete openai_llm submodule is available in sys.modules so that
+# string-based patch targets resolve outside of temporary patch contexts.
+try:
+ _openai_name = "nexent.core.models.openai_llm"
+ _openai_path = Path(__file__).resolve().parents[4] / "sdk" / "nexent" / "core" / "models" / "openai_llm.py"
+ if _openai_path.exists() and _openai_name not in sys.modules:
+ _spec = importlib.util.spec_from_file_location(_openai_name, _openai_path)
+ _mod = importlib.util.module_from_spec(_spec)
+ sys.modules[_openai_name] = _mod
+ assert _spec and _spec.loader
+ _spec.loader.exec_module(_mod)
+ pkg = sys.modules.get("nexent.core.models")
+ if pkg is not None and not hasattr(pkg, "openai_llm"):
+ setattr(pkg, "openai_llm", _mod)
+except Exception:
+ # Best-effort only; if this fails tests will still attempt to load/open the module later.
+ pass
+
# Dynamically load the openai_llm module to avoid importing full sdk package
MODULE_NAME = "nexent.core.models.openai_llm"
MODULE_PATH = (
@@ -275,6 +327,15 @@ class MockProcessType:
sys.modules[MODULE_NAME] = openai_llm_module
assert spec and spec.loader
spec.loader.exec_module(openai_llm_module)
+ # Expose the loaded submodule as an attribute on the package object so that
+ # string-based patch targets like "nexent.core.models.openai_llm.asyncio.to_thread"
+ # resolve via getattr during unittest.mock's import lookup.
+ try:
+ models_pkg = sys.modules.get("nexent.core.models")
+ if models_pkg is not None:
+ setattr(models_pkg, "openai_llm", openai_llm_module)
+ except Exception:
+ pass
ImportedOpenAIModel = openai_llm_module.OpenAIModel
# -----------------------------------------------------------------------
diff --git a/test/sdk/core/tools/test_analyze_text_file_tool.py b/test/sdk/core/tools/test_analyze_text_file_tool.py
index 7eab52d89..c0a91e355 100644
--- a/test/sdk/core/tools/test_analyze_text_file_tool.py
+++ b/test/sdk/core/tools/test_analyze_text_file_tool.py
@@ -1,4 +1,3 @@
-import json
from unittest.mock import MagicMock, patch
import pytest
diff --git a/test/sdk/core/tools/test_datamate_search_tool.py b/test/sdk/core/tools/test_datamate_search_tool.py
index ebfdb3bba..a0be7ff78 100644
--- a/test/sdk/core/tools/test_datamate_search_tool.py
+++ b/test/sdk/core/tools/test_datamate_search_tool.py
@@ -2,12 +2,12 @@
from typing import List
from unittest.mock import ANY, MagicMock
-import httpx
import pytest
from pytest_mock import MockFixture
-from sdk.nexent.core.tools.datamate_search_tool import DataMateSearchTool
+from sdk.nexent.core.tools.datamate_search_tool import DataMateSearchTool, _normalize_index_names
from sdk.nexent.core.utils.observer import MessageObserver, ProcessType
+from sdk.nexent.datamate.datamate_client import DataMateClient
@pytest.fixture
@@ -17,47 +17,42 @@ def mock_observer() -> MessageObserver:
return observer
+
+
@pytest.fixture
def datamate_tool(mock_observer: MessageObserver) -> DataMateSearchTool:
- return DataMateSearchTool(
+ tool = DataMateSearchTool(
server_ip="127.0.0.1",
server_port=8080,
observer=mock_observer,
)
-
-
-def _build_kb_list_response(ids: List[str]):
- return {
- "data": {
- "content": [
- {"id": kb_id, "chunkCount": 1}
- for kb_id in ids
- ]
- }
- }
-
-
-def _build_search_response(kb_id: str, count: int = 2):
- return {
- "data": [
- {
- "entity": {
- "id": f"file-{i}",
- "text": f"content-{i}",
- "createTime": "2024-01-01T00:00:00Z",
- "score": 0.9 - i * 0.1,
- "metadata": json.dumps(
- {
- "file_name": f"file-{i}.txt",
- "absolute_directory_path": f"/data/{kb_id}",
- }
- ),
- "scoreDetails": {"raw": 0.8},
- }
+ return tool
+
+
+def _build_kb_list(ids: List[str]):
+ return [{"id": kb_id, "chunkCount": 1} for kb_id in ids]
+
+
+def _build_search_results(kb_id: str, count: int = 2):
+ return [
+ {
+ "entity": {
+ "id": f"file-{i}",
+ "text": f"content-{i}",
+ "createTime": "2024-01-01T00:00:00Z",
+ "score": 0.9 - i * 0.1,
+ "metadata": json.dumps(
+ {
+ "file_name": f"file-{i}.txt",
+ "absolute_directory_path": f"/data/{kb_id}",
+ "original_file_id": f"orig-{i}",
+ }
+ ),
+ "scoreDetails": {"raw": 0.8},
}
- for i in range(count)
- ]
- }
+ }
+ for i in range(count)
+ ]
class TestDataMateSearchToolInit:
@@ -74,6 +69,21 @@ def test_init_success(self, mock_observer: MessageObserver):
assert tool.kb_page == 0
assert tool.kb_page_size == 20
assert tool.observer is mock_observer
+ # index_names is excluded from the model, so we can't directly test it
+ # The tool exposes the DataMate client via datamate_core.client
+ assert isinstance(tool.datamate_core.client, DataMateClient)
+
+ def test_init_with_index_names(self, mock_observer: MessageObserver):
+ """Test initialization with custom index_names."""
+ custom_index_names = ["kb1", "kb2"]
+ tool = DataMateSearchTool(
+ server_ip="127.0.0.1",
+ server_port=8080,
+ index_names=custom_index_names,
+ observer=mock_observer,
+ )
+
+ assert tool.index_names == custom_index_names
@pytest.mark.parametrize("server_ip", ["", None])
def test_init_invalid_server_ip(self, server_ip):
@@ -109,267 +119,272 @@ def test_parse_metadata(self, datamate_tool: DataMateSearchTool, metadata_raw, e
("/single", "single"),
("/a/b/c", "c"),
("////", ""),
+ ("/a/b/c/d/", "d"),
+ ("no-leading-slash", "no-leading-slash"),
+ ("///multiple///slashes///", "slashes"), # After filtering empty segments, last is "slashes"
],
)
def test_extract_dataset_id(self, datamate_tool: DataMateSearchTool, path, expected):
assert datamate_tool._extract_dataset_id(path) == expected
+
+class TestNormalizeIndexNames:
@pytest.mark.parametrize(
- "dataset_id, file_id, expected",
+ "input_names, expected",
[
- ("ds1", "f1", "http://127.0.0.1:8080/api/data-management/datasets/ds1/files/f1/download"),
- ("", "f1", ""),
- ("ds1", "", ""),
+ (None, []),
+ ("single_kb", ["single_kb"]),
+ (["kb1", "kb2"], ["kb1", "kb2"]),
+ ([], []),
+ ("", [""]), # Edge case: empty string becomes list with empty string
],
)
- def test_build_file_download_url(self, datamate_tool: DataMateSearchTool, dataset_id, file_id, expected):
- assert datamate_tool._build_file_download_url(dataset_id, file_id) == expected
+ def test_normalize_index_names(self, input_names, expected):
+ result = _normalize_index_names(input_names)
+ assert result == expected
-class TestKnowledgeBaseList:
- def test_get_knowledge_base_list_success(self, mocker: MockFixture, datamate_tool: DataMateSearchTool):
- client_cls = mocker.patch("sdk.nexent.core.tools.datamate_search_tool.httpx.Client")
- client = client_cls.return_value.__enter__.return_value
+class TestForward:
+ def test_forward_success_with_observer_en(self, datamate_tool: DataMateSearchTool, mocker: MockFixture):
+ # Mock the hybrid_search method to return search results
+ mock_hybrid_search = mocker.patch.object(datamate_tool.datamate_core, 'hybrid_search')
+ mock_hybrid_search.return_value = _build_search_results("kb1", count=2)
- response = MagicMock()
- response.status_code = 200
- response.json.return_value = _build_kb_list_response(["kb1", "kb2"])
- client.post.return_value = response
+ # Mock the build_file_download_url method
+ mock_build_url = mocker.patch.object(datamate_tool.datamate_core.client, 'build_file_download_url')
+ mock_build_url.side_effect = lambda ds, fid: f"http://dl/{ds}/{fid}"
- kb_ids = datamate_tool._get_knowledge_base_list()
+ result_json = datamate_tool.forward("test query", index_names=["kb1"], top_k=2, threshold=0.5)
+ results = json.loads(result_json)
- assert kb_ids == ["kb1", "kb2"]
- client.post.assert_called_once_with(
- f"{datamate_tool.server_base_url}/api/knowledge-base/list",
- json={"page": datamate_tool.kb_page, "size": datamate_tool.kb_page_size},
+ assert len(results) == 2
+ datamate_tool.observer.add_message.assert_any_call("", ProcessType.TOOL, datamate_tool.running_prompt_en)
+ datamate_tool.observer.add_message.assert_any_call(
+ "", ProcessType.CARD, json.dumps([{"icon": "search", "text": "test query"}], ensure_ascii=False)
)
+ datamate_tool.observer.add_message.assert_any_call("", ProcessType.SEARCH_CONTENT, ANY)
+ assert datamate_tool.record_ops == 1 + len(results)
- def test_get_knowledge_base_list_http_error_json_detail(self, mocker: MockFixture, datamate_tool: DataMateSearchTool):
- client_cls = mocker.patch("sdk.nexent.core.tools.datamate_search_tool.httpx.Client")
- client = client_cls.return_value.__enter__.return_value
-
- response = MagicMock()
- response.status_code = 500
- response.headers = {"content-type": "application/json"}
- response.json.return_value = {"detail": "server error"}
- client.post.return_value = response
-
- with pytest.raises(Exception) as excinfo:
- datamate_tool._get_knowledge_base_list()
-
- assert "Failed to get knowledge base list" in str(excinfo.value)
-
- def test_get_knowledge_base_list_http_error_text_detail(self, mocker: MockFixture, datamate_tool: DataMateSearchTool):
- client_cls = mocker.patch("sdk.nexent.core.tools.datamate_search_tool.httpx.Client")
- client = client_cls.return_value.__enter__.return_value
-
- response = MagicMock()
- response.status_code = 400
- response.headers = {"content-type": "text/plain"}
- response.text = "bad request"
- client.post.return_value = response
-
- with pytest.raises(Exception) as excinfo:
- datamate_tool._get_knowledge_base_list()
-
- assert "bad request" in str(excinfo.value)
-
- def test_get_knowledge_base_list_timeout(self, mocker: MockFixture, datamate_tool: DataMateSearchTool):
- client_cls = mocker.patch("sdk.nexent.core.tools.datamate_search_tool.httpx.Client")
- client = client_cls.return_value.__enter__.return_value
- client.post.side_effect = httpx.TimeoutException("timeout")
-
- with pytest.raises(Exception) as excinfo:
- datamate_tool._get_knowledge_base_list()
-
- assert "Timeout while getting knowledge base list" in str(excinfo.value)
-
- def test_get_knowledge_base_list_request_error(self, mocker: MockFixture, datamate_tool: DataMateSearchTool):
- client_cls = mocker.patch("sdk.nexent.core.tools.datamate_search_tool.httpx.Client")
- client = client_cls.return_value.__enter__.return_value
- client.post.side_effect = httpx.RequestError("network", request=MagicMock())
+ # Verify hybrid_search was called correctly
+ mock_hybrid_search.assert_called_once_with(
+ query_text="test query",
+ index_names=["kb1"],
+ top_k=2,
+ weight_accurate=0.5
+ )
+ mock_build_url.assert_any_call("kb1", "orig-0")
- with pytest.raises(Exception) as excinfo:
- datamate_tool._get_knowledge_base_list()
+ def test_forward_success_with_observer_zh(self, datamate_tool: DataMateSearchTool, mocker: MockFixture):
+ datamate_tool.observer.lang = "zh"
- assert "Request error while getting knowledge base list" in str(excinfo.value)
+ # Mock the hybrid_search method to return search results
+ mock_hybrid_search = mocker.patch.object(datamate_tool.datamate_core, 'hybrid_search')
+ mock_hybrid_search.return_value = _build_search_results("kb1", count=1)
+ # Mock the build_file_download_url method
+ mock_build_url = mocker.patch.object(datamate_tool.datamate_core.client, 'build_file_download_url')
+ mock_build_url.return_value = "http://dl/kb1/file-1"
-class TestRetrieveKnowledgeBaseContent:
- def test_retrieve_content_success(self, mocker: MockFixture, datamate_tool: DataMateSearchTool):
- client_cls = mocker.patch("sdk.nexent.core.tools.datamate_search_tool.httpx.Client")
- client = client_cls.return_value.__enter__.return_value
+ datamate_tool.forward("测试查询", index_names=["kb1"])
- response = MagicMock()
- response.status_code = 200
- response.json.return_value = _build_search_response("kb1", count=2)
- client.post.return_value = response
+ datamate_tool.observer.add_message.assert_any_call("", ProcessType.TOOL, datamate_tool.running_prompt_zh)
- results = datamate_tool._retrieve_knowledge_base_content(
- "query",
- ["kb1"],
- top_k=3,
- threshold=0.2,
- )
+ def test_forward_no_observer(self, mocker: MockFixture):
+ tool = DataMateSearchTool(server_ip="127.0.0.1", server_port=8080, observer=None)
- assert len(results) == 2
- client.post.assert_called_once()
+ # Mock the hybrid_search method to return search results
+ mock_hybrid_search = mocker.patch.object(tool.datamate_core, 'hybrid_search')
+ mock_hybrid_search.return_value = _build_search_results("kb1", count=1)
- def test_retrieve_content_http_error(self, mocker: MockFixture, datamate_tool: DataMateSearchTool):
- client_cls = mocker.patch("sdk.nexent.core.tools.datamate_search_tool.httpx.Client")
- client = client_cls.return_value.__enter__.return_value
+ # Mock the build_file_download_url method
+ mock_build_url = mocker.patch.object(tool.datamate_core.client, 'build_file_download_url')
+ mock_build_url.return_value = "http://dl/kb1/file-1"
- response = MagicMock()
- response.status_code = 500
- response.headers = {"content-type": "application/json"}
- response.json.return_value = {"detail": "server error"}
- client.post.return_value = response
+ result_json = tool.forward("query", index_names=["kb1"])
+ assert len(json.loads(result_json)) == 1
- with pytest.raises(Exception) as excinfo:
- datamate_tool._retrieve_knowledge_base_content(
- "query",
- ["kb1"],
- top_k=3,
- threshold=0.2,
- )
+ def test_forward_no_knowledge_bases(self, datamate_tool: DataMateSearchTool, mocker: MockFixture):
+ # Mock the hybrid_search method
+ mock_hybrid_search = mocker.patch.object(datamate_tool.datamate_core, 'hybrid_search')
- assert "Failed to retrieve knowledge base content" in str(excinfo.value)
+ result = datamate_tool.forward("query", index_names=[])
+ assert result == json.dumps("No knowledge base selected. No relevant information found.", ensure_ascii=False)
+ mock_hybrid_search.assert_not_called()
- def test_retrieve_content_timeout(self, mocker: MockFixture, datamate_tool: DataMateSearchTool):
- client_cls = mocker.patch("sdk.nexent.core.tools.datamate_search_tool.httpx.Client")
- client = client_cls.return_value.__enter__.return_value
- client.post.side_effect = httpx.TimeoutException("timeout")
+ def test_forward_no_results(self, datamate_tool: DataMateSearchTool, mocker: MockFixture):
+ # Mock the hybrid_search method to return empty results
+ mock_hybrid_search = mocker.patch.object(datamate_tool.datamate_core, 'hybrid_search')
+ mock_hybrid_search.return_value = []
with pytest.raises(Exception) as excinfo:
- datamate_tool._retrieve_knowledge_base_content(
- "query",
- ["kb1"],
- top_k=3,
- threshold=0.2,
- )
+ datamate_tool.forward("query", index_names=["kb1"])
- assert "Timeout while retrieving knowledge base content" in str(excinfo.value)
+ assert "No results found! Try a less restrictive/shorter query." in str(excinfo.value)
- def test_retrieve_content_request_error(self, mocker: MockFixture, datamate_tool: DataMateSearchTool):
- client_cls = mocker.patch("sdk.nexent.core.tools.datamate_search_tool.httpx.Client")
- client = client_cls.return_value.__enter__.return_value
- client.post.side_effect = httpx.RequestError("network", request=MagicMock())
+ def test_forward_wrapped_error(self, datamate_tool: DataMateSearchTool, mocker: MockFixture):
+ # Mock the hybrid_search method to raise an error
+ mock_hybrid_search = mocker.patch.object(datamate_tool.datamate_core, 'hybrid_search')
+ mock_hybrid_search.side_effect = RuntimeError("low level error")
with pytest.raises(Exception) as excinfo:
- datamate_tool._retrieve_knowledge_base_content(
- "query",
- ["kb1"],
- top_k=3,
- threshold=0.2,
- )
+ datamate_tool.forward("query", index_names=["kb1"])
- assert "Request error while retrieving knowledge base content" in str(excinfo.value)
-
-
-class TestForward:
- def _setup_success_flow(self, mocker: MockFixture, tool: DataMateSearchTool):
- # Mock knowledge base list
- client_cls = mocker.patch("sdk.nexent.core.tools.datamate_search_tool.httpx.Client")
- client = client_cls.return_value.__enter__.return_value
-
- kb_response = MagicMock()
- kb_response.status_code = 200
- kb_response.json.return_value = _build_kb_list_response(["kb1"])
+ msg = str(excinfo.value)
+ assert "Error during DataMate knowledge base search" in msg
+ assert "low level error" in msg
- search_response = MagicMock()
- search_response.status_code = 200
- search_response.json.return_value = _build_search_response("kb1", count=2)
+ def test_forward_with_default_index_names(self, datamate_tool: DataMateSearchTool, mocker: MockFixture):
+ """Test forward method using default index_names from constructor."""
+ # Set default index_names in the tool
+ datamate_tool.index_names = ["default_kb1", "default_kb2"]
- # First call for list, second for retrieve
- client.post.side_effect = [kb_response, search_response]
- return client
+ # Mock the hybrid_search method to return results for each knowledge base
+ mock_hybrid_search = mocker.patch.object(datamate_tool.datamate_core, 'hybrid_search')
+ mock_hybrid_search.side_effect = [
+ _build_search_results("default_kb1", count=1), # First call returns results for kb1
+ _build_search_results("default_kb2", count=1), # Second call returns results for kb2
+ ]
- def test_forward_success_with_observer_en(self, mocker: MockFixture, datamate_tool: DataMateSearchTool):
- client = self._setup_success_flow(mocker, datamate_tool)
+ # Mock the build_file_download_url method
+ mock_build_url = mocker.patch.object(datamate_tool.datamate_core.client, 'build_file_download_url')
+ mock_build_url.return_value = "http://dl/default_kb/file-1"
- result_json = datamate_tool.forward("test query", top_k=2, threshold=0.5)
+ result_json = datamate_tool.forward("query")
results = json.loads(result_json)
- assert len(results) == 2
- # Check that observer received running prompt and card
- datamate_tool.observer.add_message.assert_any_call(
- "", ProcessType.TOOL, datamate_tool.running_prompt_en
+ assert len(results) == 2 # One result from each knowledge base
+ assert mock_hybrid_search.call_count == 2
+ mock_hybrid_search.assert_any_call(
+ query_text="query",
+ index_names=["default_kb1"],
+ top_k=10,
+ weight_accurate=0.2
)
- datamate_tool.observer.add_message.assert_any_call(
- "", ProcessType.CARD, json.dumps([{"icon": "search", "text": "test query"}], ensure_ascii=False)
- )
- # Check that search content message is added (payload content is not strictly validated here)
- datamate_tool.observer.add_message.assert_any_call(
- "", ProcessType.SEARCH_CONTENT, ANY
- )
- assert datamate_tool.record_ops == 1 + len(results)
- assert all(isinstance(item["index"], str) for item in results)
-
- # Ensure both list and retrieve endpoints were called
- assert client.post.call_count == 2
-
- def test_forward_success_with_observer_zh(self, mocker: MockFixture, datamate_tool: DataMateSearchTool):
- datamate_tool.observer.lang = "zh"
- self._setup_success_flow(mocker, datamate_tool)
-
- datamate_tool.forward("测试查询")
-
- datamate_tool.observer.add_message.assert_any_call(
- "", ProcessType.TOOL, datamate_tool.running_prompt_zh
+ mock_hybrid_search.assert_any_call(
+ query_text="query",
+ index_names=["default_kb2"],
+ top_k=10,
+ weight_accurate=0.2
)
- def test_forward_no_observer(self, mocker: MockFixture):
- tool = DataMateSearchTool(server_ip="127.0.0.1", server_port=8080, observer=None)
- self._setup_success_flow(mocker, tool)
-
- # Should not raise and should not call observer
- result_json = tool.forward("query")
- assert len(json.loads(result_json)) == 2
-
- def test_forward_no_knowledge_bases(self, mocker: MockFixture, datamate_tool: DataMateSearchTool):
- client_cls = mocker.patch("sdk.nexent.core.tools.datamate_search_tool.httpx.Client")
- client = client_cls.return_value.__enter__.return_value
+ def test_forward_multiple_knowledge_bases(self, datamate_tool: DataMateSearchTool, mocker: MockFixture):
+ """Test forward method with multiple knowledge bases."""
+ # Mock the hybrid_search method to return results from multiple KBs
+ mock_hybrid_search = mocker.patch.object(datamate_tool.datamate_core, 'hybrid_search')
+ mock_hybrid_search.side_effect = [
+ _build_search_results("kb1", count=1), # First call returns results from kb1
+ _build_search_results("kb2", count=2), # Second call returns results from kb2
+ ]
- kb_response = MagicMock()
- kb_response.status_code = 200
- kb_response.json.return_value = _build_kb_list_response([])
- client.post.return_value = kb_response
+ # Mock the build_file_download_url method
+ mock_build_url = mocker.patch.object(datamate_tool.datamate_core.client, 'build_file_download_url')
+ mock_build_url.side_effect = lambda ds, fid: f"http://dl/{ds}/{fid}"
- result = datamate_tool.forward("query")
- assert result == json.dumps("No knowledge base found. No relevant information found.", ensure_ascii=False)
+ result_json = datamate_tool.forward("query", index_names=["kb1", "kb2"])
+ results = json.loads(result_json)
- def test_forward_no_results(self, mocker: MockFixture, datamate_tool: DataMateSearchTool):
- client_cls = mocker.patch("sdk.nexent.core.tools.datamate_search_tool.httpx.Client")
- client = client_cls.return_value.__enter__.return_value
+ assert len(results) == 3 # 1 from kb1 + 2 from kb2
- kb_response = MagicMock()
- kb_response.status_code = 200
- kb_response.json.return_value = _build_kb_list_response(["kb1"])
+ # Verify hybrid_search was called for each knowledge base
+ assert mock_hybrid_search.call_count == 2
+ mock_hybrid_search.assert_any_call(
+ query_text="query",
+ index_names=["kb1"],
+ top_k=10,
+ weight_accurate=0.2
+ )
+ mock_hybrid_search.assert_any_call(
+ query_text="query",
+ index_names=["kb2"],
+ top_k=10,
+ weight_accurate=0.2
+ )
- search_response = MagicMock()
- search_response.status_code = 200
- search_response.json.return_value = {"data": []}
+ def test_forward_with_custom_parameters(self, datamate_tool: DataMateSearchTool, mocker: MockFixture):
+ """Test forward method with custom parameters."""
+ # Mock the hybrid_search method
+ mock_hybrid_search = mocker.patch.object(datamate_tool.datamate_core, 'hybrid_search')
+ mock_hybrid_search.return_value = _build_search_results("kb1", count=1)
+
+ # Mock the build_file_download_url method
+ mock_build_url = mocker.patch.object(datamate_tool.datamate_core.client, 'build_file_download_url')
+ mock_build_url.return_value = "http://dl/kb1/file-1"
+
+ result_json = datamate_tool.forward(
+ query="custom query",
+ index_names=["kb1"],
+ top_k=5,
+ threshold=0.8,
+ kb_page=2,
+ kb_page_size=50
+ )
+ results = json.loads(result_json)
- client.post.side_effect = [kb_response, search_response]
+ assert len(results) == 1
+ assert datamate_tool.kb_page == 2
+ assert datamate_tool.kb_page_size == 50
- with pytest.raises(Exception) as excinfo:
- datamate_tool.forward("query")
+ mock_hybrid_search.assert_called_once_with(
+ query_text="custom query",
+ index_names=["kb1"],
+ top_k=5,
+ weight_accurate=0.8
+ )
- assert "No results found!" in str(excinfo.value)
+ def test_forward_metadata_parsing_edge_cases(self, datamate_tool: DataMateSearchTool, mocker: MockFixture):
+ """Test forward method with various metadata parsing edge cases."""
+ # Create search results with different metadata formats
+ search_results = [
+ {
+ "entity": {
+ "id": "file-1",
+ "text": "content-1",
+ "createTime": "2024-01-01T00:00:00Z",
+ "score": 0.9,
+ "metadata": json.dumps({
+ "file_name": "file-1.txt",
+ "absolute_directory_path": "/data/kb1",
+ "original_file_id": "orig-1",
+ }),
+ "scoreDetails": {"raw": 0.8},
+ }
+ },
+ {
+ "entity": {
+ "id": "file-2",
+ "text": "content-2",
+ "createTime": "2024-01-01T00:00:00Z",
+ "score": 0.8,
+ "metadata": {}, # Empty dict metadata
+ "scoreDetails": {"raw": 0.7},
+ }
+ },
+ {
+ "entity": {
+ "id": "file-3",
+ "text": "content-3",
+ "createTime": "2024-01-01T00:00:00Z",
+ "score": 0.7,
+ "metadata": "invalid-json", # Invalid JSON metadata
+ "scoreDetails": {"raw": 0.6},
+ }
+ },
+ ]
- def test_forward_wrapped_error(self, mocker: MockFixture, datamate_tool: DataMateSearchTool):
- # Simulate error in underlying method to verify top-level error wrapping
- mocker.patch.object(
- datamate_tool,
- "_get_knowledge_base_list",
- side_effect=Exception("low level error"),
- )
+ # Mock the hybrid_search method
+ mock_hybrid_search = mocker.patch.object(datamate_tool.datamate_core, 'hybrid_search')
+ mock_hybrid_search.return_value = search_results
- with pytest.raises(Exception) as excinfo:
- datamate_tool.forward("query")
+ # Mock the build_file_download_url method
+ mock_build_url = mocker.patch.object(datamate_tool.datamate_core.client, 'build_file_download_url')
+ mock_build_url.return_value = "http://dl/kb1/file"
- msg = str(excinfo.value)
- assert "Error during DataMate knowledge base search" in msg
- assert "low level error" in msg
+ result_json = datamate_tool.forward("query", index_names=["kb1"])
+ results = json.loads(result_json)
+ assert len(results) == 3
+ # Verify that missing metadata fields are handled gracefully
+ assert results[0]["title"] == "file-1.txt"
+ assert results[1]["title"] == "" # Empty metadata dict
+ assert results[2]["title"] == "" # Invalid JSON metadata
diff --git a/test/sdk/datamate/test_datamate_client.py b/test/sdk/datamate/test_datamate_client.py
new file mode 100644
index 000000000..78972bf7e
--- /dev/null
+++ b/test/sdk/datamate/test_datamate_client.py
@@ -0,0 +1,615 @@
+import pytest
+from unittest.mock import MagicMock
+
+import httpx
+from pytest_mock import MockFixture
+
+from sdk.nexent.datamate.datamate_client import DataMateClient
+
+
+@pytest.fixture
+def client() -> DataMateClient:
+ return DataMateClient(base_url="http://datamate.local:30000", timeout=1.0)
+
+
+def _mock_response(mocker: MockFixture, status: int, json_data=None, text: str = ""):
+ response = MagicMock()
+ response.status_code = status
+ response.headers = {"content-type": "application/json"} if json_data is not None else {"content-type": "text/plain"}
+ response.json.return_value = json_data
+ response.text = text
+ return response
+
+
+class TestListKnowledgeBases:
+ def test_success(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.post.return_value = _mock_response(
+ mocker,
+ 200,
+ {"data": {"content": [{"id": "kb1"}, {"id": "kb2"}]}},
+ )
+
+ kbs = client.list_knowledge_bases(page=1, size=10, authorization="token")
+
+ assert len(kbs) == 2
+ http_client.post.assert_called_once_with(
+ "http://datamate.local:30000/api/knowledge-base/list",
+ json={"page": 1, "size": 10},
+ headers={"Authorization": "token"},
+ )
+
+ def test_non_200_json_error(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.post.return_value = _mock_response(
+ mocker,
+ 500,
+ {"detail": "boom"},
+ )
+
+ with pytest.raises(RuntimeError) as excinfo:
+ client.list_knowledge_bases()
+ assert "Failed to fetch DataMate knowledge bases" in str(excinfo.value)
+
+ def test_http_error(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.post.side_effect = httpx.HTTPError("network")
+
+ with pytest.raises(RuntimeError):
+ client.list_knowledge_bases()
+
+
+class TestGetKnowledgeBaseFiles:
+ def test_success(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.get.return_value = _mock_response(
+ mocker,
+ 200,
+ {"data": {"content": [{"id": "f1"}, {"id": "f2"}]}},
+ )
+
+ files = client.get_knowledge_base_files("kb1")
+
+ assert len(files) == 2
+ http_client.get.assert_called_once_with(
+ "http://datamate.local:30000/api/knowledge-base/kb1/files",
+ headers={},
+ )
+
+ def test_non_200(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.get.return_value = _mock_response(
+ mocker,
+ 404,
+ {"detail": "not found"},
+ )
+
+ with pytest.raises(RuntimeError):
+ client.get_knowledge_base_files("kb1")
+
+ def test_http_error(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.get.side_effect = httpx.HTTPError("network")
+
+ with pytest.raises(RuntimeError):
+ client.get_knowledge_base_files("kb1")
+
+
+class TestRetrieveKnowledgeBase:
+ def test_success(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.post.return_value = _mock_response(
+ mocker,
+ 200,
+ {"data": [{"entity": {"id": "1"}}, {"entity": {"id": "2"}}]},
+ )
+
+ results = client.retrieve_knowledge_base("q", ["kb1"], top_k=5, threshold=0.1, authorization="auth")
+
+ assert len(results) == 2
+ http_client.post.assert_called_once_with(
+ "http://datamate.local:30000/api/knowledge-base/retrieve",
+ json={
+ "query": "q",
+ "topK": 5,
+ "threshold": 0.1,
+ "knowledgeBaseIds": ["kb1"],
+ },
+ headers={"Authorization": "auth"},
+ )
+
+ def test_non_200(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.post.return_value = _mock_response(
+ mocker,
+ 500,
+ {"detail": "error"},
+ )
+
+ with pytest.raises(RuntimeError):
+ client.retrieve_knowledge_base("q", ["kb1"])
+
+ def test_http_error(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.post.side_effect = httpx.HTTPError("network")
+
+ with pytest.raises(RuntimeError):
+ client.retrieve_knowledge_base("q", ["kb1"])
+
+
+class TestBuildFileDownloadUrl:
+ def test_build_url(self, client: DataMateClient):
+ assert client.build_file_download_url("ds1", "f1") == \
+ "http://datamate.local:30000/api/data-management/datasets/ds1/files/f1/download"
+
+ def test_missing_parts(self, client: DataMateClient):
+ assert client.build_file_download_url("", "f1") == ""
+ assert client.build_file_download_url("ds1", "") == ""
+
+
+class TestSyncAllKnowledgeBases:
+ def test_success_and_partial_error(self, mocker: MockFixture, client: DataMateClient):
+ mocker.patch.object(client, "list_knowledge_bases", return_value=[{"id": "kb1"}, {"id": "kb2"}])
+ mocker.patch.object(client, "get_knowledge_base_files", side_effect=[["f1"], RuntimeError("oops")])
+
+ result = client.sync_all_knowledge_bases()
+
+ assert result["success"] is True
+ assert result["total_count"] == 2
+ assert result["knowledge_bases"][0]["files"] == ["f1"]
+ assert result["knowledge_bases"][1]["files"] == []
+ assert "oops" in result["knowledge_bases"][1]["error"]
+
+ def test_sync_failure(self, mocker: MockFixture, client: DataMateClient):
+ mocker.patch.object(client, "list_knowledge_bases", side_effect=RuntimeError("boom"))
+
+ result = client.sync_all_knowledge_bases()
+
+ assert result["success"] is False
+ assert result["total_count"] == 0
+ assert "boom" in result["error"]
+
+
+class TestGetKnowledgeBaseInfo:
+ def test_success(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.get.return_value = _mock_response(
+ mocker,
+ 200,
+ {"data": {"id": "kb1", "name": "KB1"}},
+ )
+
+ kb = client.get_knowledge_base_info("kb1")
+
+ assert isinstance(kb, dict)
+ assert kb["id"] == "kb1"
+ http_client.get.assert_called_once_with(
+ "http://datamate.local:30000/api/knowledge-base/kb1",
+ headers={},
+ )
+
+ def test_success_with_authorization(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.get.return_value = _mock_response(
+ mocker,
+ 200,
+ {"data": {"id": "kb1", "name": "KB1"}},
+ )
+
+ kb = client.get_knowledge_base_info("kb1", authorization="Bearer token123")
+
+ assert isinstance(kb, dict)
+ assert kb["id"] == "kb1"
+ http_client.get.assert_called_once_with(
+ "http://datamate.local:30000/api/knowledge-base/kb1",
+ headers={"Authorization": "Bearer token123"},
+ )
+
+ def test_empty_data(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.get.return_value = _mock_response(
+ mocker,
+ 200,
+ {"data": {}},
+ )
+
+ kb = client.get_knowledge_base_info("kb1")
+ assert kb == {}
+
+ def test_non_200_json_error(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.get.return_value = _mock_response(
+ mocker,
+ 500,
+ {"detail": "boom"},
+ text="",
+ )
+
+ with pytest.raises(RuntimeError) as excinfo:
+ client.get_knowledge_base_info("kb1")
+
+ assert "Failed to fetch details for datamate knowledge base kb1" in str(excinfo.value)
+ assert "Failed to get knowledge base details" in str(excinfo.value)
+
+ def test_non_200_text_error(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ # simulate plain text error response
+ resp = _mock_response(mocker, 404, None, text="not found")
+ # override headers to be text/plain
+ resp.headers = {"content-type": "text/plain"}
+ http_client.get.return_value = resp
+
+ with pytest.raises(RuntimeError) as excinfo:
+ client.get_knowledge_base_info("kb1")
+
+ assert "Failed to fetch details for datamate knowledge base kb1" in str(excinfo.value)
+ assert "not found" in str(excinfo.value)
+
+ def test_http_error_raised(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.get.side_effect = httpx.HTTPError("network")
+
+ with pytest.raises(RuntimeError) as excinfo:
+ client.get_knowledge_base_info("kb1")
+
+ assert "Failed to fetch details for datamate knowledge base kb1" in str(excinfo.value)
+ assert "network" in str(excinfo.value)
+
+
+class TestBuildHeaders:
+ """Test the internal _build_headers method."""
+
+ def test_with_authorization(self, client: DataMateClient):
+ headers = client._build_headers("Bearer token123")
+ assert headers == {"Authorization": "Bearer token123"}
+
+ def test_without_authorization(self, client: DataMateClient):
+ headers = client._build_headers()
+ assert headers == {}
+
+ def test_with_none_authorization(self, client: DataMateClient):
+ headers = client._build_headers(None)
+ assert headers == {}
+
+
+class TestBuildUrl:
+ """Test the internal _build_url method."""
+
+ def test_path_with_leading_slash(self, client: DataMateClient):
+ url = client._build_url("/api/test")
+ assert url == "http://datamate.local:30000/api/test"
+
+ def test_path_without_leading_slash(self, client: DataMateClient):
+ url = client._build_url("api/test")
+ assert url == "http://datamate.local:30000/api/test"
+
+ def test_base_url_without_trailing_slash(self, client: DataMateClient):
+ # base_url is already stripped of trailing slash in __init__
+ url = client._build_url("/api/test")
+ assert url == "http://datamate.local:30000/api/test"
+
+
+class TestMakeRequest:
+ """Test the internal _make_request method."""
+
+ def test_get_request_success(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.get.return_value = _mock_response(mocker, 200, {"result": "ok"})
+
+ response = client._make_request("GET", "http://test.com/api", {"X-Header": "value"})
+
+ assert response.status_code == 200
+ http_client.get.assert_called_once_with("http://test.com/api", headers={"X-Header": "value"})
+
+ def test_post_request_success(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.post.return_value = _mock_response(mocker, 200, {"result": "ok"})
+
+ response = client._make_request(
+ "POST", "http://test.com/api", {"X-Header": "value"}, json={"key": "value"}
+ )
+
+ assert response.status_code == 200
+ http_client.post.assert_called_once_with(
+ "http://test.com/api", json={"key": "value"}, headers={"X-Header": "value"}
+ )
+
+ def test_custom_timeout(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.get.return_value = _mock_response(mocker, 200, {"result": "ok"})
+
+ client._make_request("GET", "http://test.com/api", {}, timeout=5.0)
+
+ # Verify timeout was passed to Client
+ client_cls.assert_called_once()
+ call_kwargs = client_cls.call_args[1]
+ assert call_kwargs["timeout"] == 5.0
+
+ def test_default_timeout(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.get.return_value = _mock_response(mocker, 200, {"result": "ok"})
+
+ client._make_request("GET", "http://test.com/api", {})
+
+ # Verify default timeout (1.0) was used
+ client_cls.assert_called_once()
+ call_kwargs = client_cls.call_args[1]
+ assert call_kwargs["timeout"] == 1.0
+
+ def test_non_200_status_code(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.get.return_value = _mock_response(mocker, 404, {"detail": "not found"})
+
+ with pytest.raises(Exception) as excinfo:
+ client._make_request("GET", "http://test.com/api", {}, error_message="Custom error")
+
+ assert "Custom error" in str(excinfo.value)
+ assert "404" in str(excinfo.value)
+
+ def test_unsupported_method(self, client: DataMateClient):
+ with pytest.raises(ValueError) as excinfo:
+ client._make_request("PUT", "http://test.com/api", {})
+
+ assert "Unsupported HTTP method: PUT" in str(excinfo.value)
+
+
+class TestHandleErrorResponse:
+ """Test the internal _handle_error_response method."""
+
+ def test_json_error_response(self, client: DataMateClient):
+ response = MagicMock()
+ response.status_code = 500
+ response.headers = {"content-type": "application/json"}
+ response.json.return_value = {"detail": "Internal server error"}
+
+ with pytest.raises(Exception) as excinfo:
+ client._handle_error_response(response, "Test error")
+
+ assert "Test error" in str(excinfo.value)
+ assert "500" in str(excinfo.value)
+ assert "Internal server error" in str(excinfo.value)
+
+ def test_text_error_response(self, client: DataMateClient):
+ response = MagicMock()
+ response.status_code = 404
+ response.headers = {"content-type": "text/plain"}
+ response.text = "Resource not found"
+
+ with pytest.raises(Exception) as excinfo:
+ client._handle_error_response(response, "Test error")
+
+ assert "Test error" in str(excinfo.value)
+ assert "404" in str(excinfo.value)
+ assert "Resource not found" in str(excinfo.value)
+
+ def test_json_error_without_detail(self, client: DataMateClient):
+ response = MagicMock()
+ response.status_code = 500
+ response.headers = {"content-type": "application/json"}
+ response.json.return_value = {}
+
+ with pytest.raises(Exception) as excinfo:
+ client._handle_error_response(response, "Test error")
+
+ assert "Test error" in str(excinfo.value)
+ assert "unknown error" in str(excinfo.value)
+
+
+class TestListKnowledgeBasesEdgeCases:
+ """Test edge cases for list_knowledge_bases."""
+
+ def test_empty_list(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.post.return_value = _mock_response(mocker, 200, {"data": {"content": []}})
+
+ kbs = client.list_knowledge_bases()
+ assert kbs == []
+
+ def test_no_data_field(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.post.return_value = _mock_response(mocker, 200, {})
+
+ kbs = client.list_knowledge_bases()
+ assert kbs == []
+
+ def test_default_parameters(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.post.return_value = _mock_response(
+ mocker, 200, {"data": {"content": [{"id": "kb1"}]}}
+ )
+
+ client.list_knowledge_bases()
+
+ http_client.post.assert_called_once_with(
+ "http://datamate.local:30000/api/knowledge-base/list",
+ json={"page": 0, "size": 20},
+ headers={},
+ )
+
+
+class TestGetKnowledgeBaseFilesEdgeCases:
+ """Test edge cases for get_knowledge_base_files."""
+
+ def test_empty_file_list(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.get.return_value = _mock_response(mocker, 200, {"data": {"content": []}})
+
+ files = client.get_knowledge_base_files("kb1")
+ assert files == []
+
+ def test_no_data_field(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.get.return_value = _mock_response(mocker, 200, {})
+
+ files = client.get_knowledge_base_files("kb1")
+ assert files == []
+
+ def test_with_authorization(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.get.return_value = _mock_response(
+ mocker, 200, {"data": {"content": [{"id": "f1"}]}}
+ )
+
+ client.get_knowledge_base_files("kb1", authorization="Bearer token")
+
+ http_client.get.assert_called_once_with(
+ "http://datamate.local:30000/api/knowledge-base/kb1/files",
+ headers={"Authorization": "Bearer token"},
+ )
+
+
+class TestRetrieveKnowledgeBaseEdgeCases:
+ """Test edge cases for retrieve_knowledge_base."""
+
+ def test_empty_results(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.post.return_value = _mock_response(mocker, 200, {"data": []})
+
+ results = client.retrieve_knowledge_base("query", ["kb1"])
+ assert results == []
+
+ def test_no_data_field(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.post.return_value = _mock_response(mocker, 200, {})
+
+ results = client.retrieve_knowledge_base("query", ["kb1"])
+ assert results == []
+
+ def test_default_parameters(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.post.return_value = _mock_response(mocker, 200, {"data": []})
+
+ client.retrieve_knowledge_base("query", ["kb1"])
+
+ http_client.post.assert_called_once_with(
+ "http://datamate.local:30000/api/knowledge-base/retrieve",
+ json={
+ "query": "query",
+ "topK": 10,
+ "threshold": 0.2,
+ "knowledgeBaseIds": ["kb1"],
+ },
+ headers={},
+ )
+
+ def test_custom_timeout(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.post.return_value = _mock_response(mocker, 200, {"data": []})
+
+ client.retrieve_knowledge_base("query", ["kb1"])
+
+ # Verify timeout is doubled for retrieve (1.0 * 2 = 2.0)
+ client_cls.assert_called_once()
+ call_kwargs = client_cls.call_args[1]
+ assert call_kwargs["timeout"] == 2.0
+
+ def test_multiple_knowledge_base_ids(self, mocker: MockFixture, client: DataMateClient):
+ client_cls = mocker.patch("sdk.nexent.datamate.datamate_client.httpx.Client")
+ http_client = client_cls.return_value.__enter__.return_value
+ http_client.post.return_value = _mock_response(mocker, 200, {"data": []})
+
+ client.retrieve_knowledge_base("query", ["kb1", "kb2", "kb3"], top_k=5, threshold=0.3)
+
+ http_client.post.assert_called_once_with(
+ "http://datamate.local:30000/api/knowledge-base/retrieve",
+ json={
+ "query": "query",
+ "topK": 5,
+ "threshold": 0.3,
+ "knowledgeBaseIds": ["kb1", "kb2", "kb3"],
+ },
+ headers={},
+ )
+
+
+class TestSyncAllKnowledgeBasesEdgeCases:
+ """Test edge cases for sync_all_knowledge_bases."""
+
+ def test_empty_knowledge_bases_list(self, mocker: MockFixture, client: DataMateClient):
+ mocker.patch.object(client, "list_knowledge_bases", return_value=[])
+
+ result = client.sync_all_knowledge_bases()
+
+ assert result["success"] is True
+ assert result["total_count"] == 0
+ assert result["knowledge_bases"] == []
+
+ def test_all_success(self, mocker: MockFixture, client: DataMateClient):
+ mocker.patch.object(
+ client, "list_knowledge_bases", return_value=[{"id": "kb1"}, {"id": "kb2"}]
+ )
+ mocker.patch.object(
+ client, "get_knowledge_base_files", side_effect=[[{"id": "f1"}], [{"id": "f2"}]]
+ )
+
+ result = client.sync_all_knowledge_bases()
+
+ assert result["success"] is True
+ assert result["total_count"] == 2
+ assert len(result["knowledge_bases"][0]["files"]) == 1
+ assert len(result["knowledge_bases"][1]["files"]) == 1
+ assert "error" not in result["knowledge_bases"][0]
+ assert "error" not in result["knowledge_bases"][1]
+
+ def test_with_authorization(self, mocker: MockFixture, client: DataMateClient):
+ list_mock = mocker.patch.object(
+ client, "list_knowledge_bases", return_value=[{"id": "kb1"}]
+ )
+ files_mock = mocker.patch.object(
+ client, "get_knowledge_base_files", return_value=[{"id": "f1"}]
+ )
+
+ client.sync_all_knowledge_bases(authorization="Bearer token")
+
+ list_mock.assert_called_once_with(authorization="Bearer token")
+ files_mock.assert_called_once_with("kb1", authorization="Bearer token")
+
+
+class TestClientInitialization:
+ """Test DataMateClient initialization."""
+
+ def test_default_timeout(self):
+ client = DataMateClient(base_url="http://test.com")
+ assert client.timeout == 30.0
+
+ def test_custom_timeout(self):
+ client = DataMateClient(base_url="http://test.com", timeout=5.0)
+ assert client.timeout == 5.0
+
+ def test_base_url_stripping(self):
+ client = DataMateClient(base_url="http://test.com/", timeout=1.0)
+ assert client.base_url == "http://test.com"
+ # Verify _build_url works correctly
+ assert client._build_url("/api/test") == "http://test.com/api/test"
+
+
diff --git a/test/sdk/vector_database/__init__.py b/test/sdk/vector_database/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/sdk/vector_database/test_datamate_core.py b/test/sdk/vector_database/test_datamate_core.py
new file mode 100644
index 000000000..70c79dc73
--- /dev/null
+++ b/test/sdk/vector_database/test_datamate_core.py
@@ -0,0 +1,157 @@
+import pytest
+from unittest.mock import MagicMock, patch
+from datetime import datetime
+
+from sdk.nexent.vector_database import datamate_core
+
+
+def test_parse_timestamp_variants():
+ # None -> default
+ assert datamate_core._parse_timestamp(None, default=7) == 7
+
+ # Integer already in milliseconds
+ ms = 1600000000000
+ assert datamate_core._parse_timestamp(ms) == ms
+
+ # Integer in seconds (less than 1e10) should be converted to ms
+ seconds = 1600000000
+ assert datamate_core._parse_timestamp(seconds) == seconds * 1000
+
+ # ISO8601 string with Z
+ iso = "2020-09-13T12:00:00Z"
+ expected = int(datetime.fromisoformat(iso.replace("Z", "+00:00")).timestamp() * 1000)
+ assert datamate_core._parse_timestamp(iso) == expected
+
+ # Numeric string representing seconds
+ assert datamate_core._parse_timestamp("123456") == 123456 * 1000
+
+ # Invalid string -> default
+ assert datamate_core._parse_timestamp("not-a-ts", default=11) == 11
+
+
+@patch("sdk.nexent.vector_database.datamate_core.DataMateClient")
+def test_user_indices_and_count(mock_client_cls):
+ mock_client = MagicMock()
+ mock_client.list_knowledge_bases.return_value = [{"id": 1}, {"no_id": True}, {"id": "2"}]
+ mock_client.get_knowledge_base_files.return_value = [{"fileName": "a"}, {"fileName": "b"}]
+ mock_client_cls.return_value = mock_client
+
+ core = datamate_core.DataMateCore(base_url="http://example")
+
+ # get_user_indices filters out entries without id and returns string ids
+ assert core.get_user_indices() == ["1", "2"]
+
+ # check_index_exists uses get_user_indices
+ assert core.check_index_exists("1") is True
+ assert core.check_index_exists("missing") is False
+
+ # get_index_chunks and count_documents rely on get_knowledge_base_files
+ chunks = core.get_index_chunks("1")
+ assert isinstance(chunks, dict)
+ assert chunks["total"] == 2
+ assert core.count_documents("1") == 2
+
+
+@patch("sdk.nexent.vector_database.datamate_core.DataMateClient")
+def test_hybrid_search_and_retrieve(mock_client_cls):
+ mock_client = MagicMock()
+ mock_client.retrieve_knowledge_base.return_value = [{"id": "res1"}]
+ mock_client_cls.return_value = mock_client
+
+ core = datamate_core.DataMateCore(base_url="http://example")
+ res = core.hybrid_search(["kb1"], "query", embedding_model=None, top_k=2, weight_accurate=0.1)
+ assert res == [{"id": "res1"}]
+ mock_client.retrieve_knowledge_base.assert_called_once_with("query", ["kb1"], 2, 0.1)
+
+
+@patch("sdk.nexent.vector_database.datamate_core.DataMateClient")
+def test_get_documents_detail_parsing(mock_client_cls):
+ mock_client = MagicMock()
+ mock_client.get_knowledge_base_files.return_value = [
+ {
+ "path_or_url": "s3://bucket/file.txt",
+ "fileName": "file.txt",
+ "fileSize": 12345,
+ "createdAt": "2021-01-01T00:00:00Z",
+ "chunkCount": 3,
+ "errMsg": "no error",
+ }
+ ]
+ mock_client_cls.return_value = mock_client
+
+ core = datamate_core.DataMateCore(base_url="http://example")
+ details = core.get_documents_detail("kb1")
+ assert isinstance(details, list) and len(details) == 1
+ d = details[0]
+ assert d["file"] == "file.txt"
+ assert d["file_size"] == 12345
+ assert d["chunk_count"] == 3
+ assert isinstance(d["create_time"], int) and d["create_time"] > 0
+ assert d["error_reason"] == "no error"
+
+
+@patch("sdk.nexent.vector_database.datamate_core.DataMateClient")
+def test_get_indices_detail_success_and_error(mock_client_cls):
+ mock_client = MagicMock()
+
+ def side_effect_get_info(kb_id):
+ if kb_id == "bad":
+ raise RuntimeError("boom")
+ return {
+ "fileCount": 10,
+ "name": "KnowledgeBaseName",
+ "chunkCount": 20,
+ "storeSize": 999,
+ "processSource": "Unstructured",
+ "embedding": {"modelName": "embed-v1"},
+ "createdAt": "2022-01-01T00:00:00Z",
+ "updatedAt": "2022-02-01T00:00:00Z",
+ }
+
+ mock_client.get_knowledge_base_info.side_effect = side_effect_get_info
+ mock_client_cls.return_value = mock_client
+
+ core = datamate_core.DataMateCore(base_url="http://example")
+ details, names = core.get_indices_detail(["good", "bad"], embedding_dim=512)
+
+ # success case
+ assert "good" in details
+ assert details["good"]["base_info"]["embedding_model"] == "embed-v1"
+ assert details["good"]["base_info"]["embedding_dim"] == 512
+ assert "KnowledgeBaseName" in names
+
+ # error case
+ assert "bad" in details
+ assert "error" in details["bad"]
+
+
+@patch("sdk.nexent.vector_database.datamate_core.DataMateClient")
+def test_not_implemented_methods_raise(mock_client_cls):
+ mock_client_cls.return_value = MagicMock()
+ core = datamate_core.DataMateCore(base_url="http://example")
+
+ # Methods that are intentionally not implemented should raise NotImplementedError
+ with pytest.raises(NotImplementedError):
+ core.create_index("i")
+ with pytest.raises(NotImplementedError):
+ core.delete_index("i")
+ with pytest.raises(NotImplementedError):
+ core.vectorize_documents("i", None, [])
+ with pytest.raises(NotImplementedError):
+ core.delete_documents("i", "path")
+ with pytest.raises(NotImplementedError):
+ core.create_chunk("i", {})
+ with pytest.raises(NotImplementedError):
+ core.update_chunk("i", "cid", {})
+ with pytest.raises(NotImplementedError):
+ core.delete_chunk("i", "cid")
+ with pytest.raises(NotImplementedError):
+ core.search("i", {})
+ with pytest.raises(NotImplementedError):
+ core.multi_search([], "i")
+ with pytest.raises(NotImplementedError):
+ core.accurate_search(["i"], "q")
+ with pytest.raises(NotImplementedError):
+ core.semantic_search(["i"], "q", None)
+
+
diff --git a/test/sdk/vector_database/test_elasticsearch_core.py b/test/sdk/vector_database/test_elasticsearch_core.py
index f9f878852..40b29853a 100644
--- a/test/sdk/vector_database/test_elasticsearch_core.py
+++ b/test/sdk/vector_database/test_elasticsearch_core.py
@@ -7,7 +7,6 @@
# Import the class under test
from sdk.nexent.vector_database.elasticsearch_core import ElasticSearchCore
-
# ----------------------------------------------------------------------------
# Fixtures
# ----------------------------------------------------------------------------
@@ -56,12 +55,12 @@ def test_preprocess_documents_with_complete_document(elasticsearch_core_instance
# Use the second document which has all fields
complete_doc = [sample_documents[1]]
content_field = "content"
-
+
result = elasticsearch_core_instance._preprocess_documents(complete_doc, content_field)
-
+
assert len(result) == 1
doc = result[0]
-
+
# Should preserve existing values
assert doc["content"] == "This is test content 2"
assert doc["title"] == "Test Document 2"
@@ -79,33 +78,33 @@ def test_preprocess_documents_with_incomplete_document(elasticsearch_core_instan
# Use the first document which is missing several fields
incomplete_doc = [sample_documents[0]]
content_field = "content"
-
+
with patch('time.strftime') as mock_strftime, \
patch('time.time') as mock_time, \
patch('time.gmtime') as mock_gmtime:
-
+
# Mock time functions
mock_strftime.side_effect = lambda fmt, t: "2025-01-15T10:30:00" if "T" in fmt else "2025-01-15"
mock_time.return_value = 1642234567
mock_gmtime.return_value = None
-
+
result = elasticsearch_core_instance._preprocess_documents(incomplete_doc, content_field)
-
+
assert len(result) == 1
doc = result[0]
-
+
# Should preserve existing values
assert doc["content"] == "This is test content 1"
assert doc["title"] == "Test Document 1"
assert doc["filename"] == "test1.pdf"
assert doc["path_or_url"] == "/path/to/test1.pdf"
-
+
# Should add missing fields with default values
assert doc["create_time"] == "2025-01-15T10:30:00"
assert doc["date"] == "2025-01-15"
assert doc["file_size"] == 0
assert doc["process_source"] == "Unstructured"
-
+
# Should generate an ID
assert "id" in doc
assert doc["id"].startswith("1642234567_")
@@ -115,20 +114,20 @@ def test_preprocess_documents_with_incomplete_document(elasticsearch_core_instan
def test_preprocess_documents_with_multiple_documents(elasticsearch_core_instance, sample_documents):
"""Test preprocessing multiple documents."""
content_field = "content"
-
+
with patch('time.strftime') as mock_strftime, \
patch('time.time') as mock_time, \
patch('time.gmtime') as mock_gmtime:
-
+
# Mock time functions
mock_strftime.side_effect = lambda fmt, t: "2025-01-15T10:30:00" if "T" in fmt else "2025-01-15"
mock_time.return_value = 1642234567
mock_gmtime.return_value = None
-
+
result = elasticsearch_core_instance._preprocess_documents(sample_documents, content_field)
-
+
assert len(result) == 2
-
+
# First document should have defaults added
doc1 = result[0]
assert doc1["create_time"] == "2025-01-15T10:30:00"
@@ -136,7 +135,7 @@ def test_preprocess_documents_with_multiple_documents(elasticsearch_core_instanc
assert doc1["file_size"] == 0
assert doc1["process_source"] == "Unstructured"
assert "id" in doc1
-
+
# Second document should preserve existing values
doc2 = result[1]
assert doc2["create_time"] == "2025-01-15T10:30:00"
@@ -155,20 +154,20 @@ def test_preprocess_documents_preserves_original_data(elasticsearch_core_instanc
}
]
content_field = "content"
-
+
with patch('time.strftime') as mock_strftime, \
patch('time.time') as mock_time, \
patch('time.gmtime') as mock_gmtime:
-
+
mock_strftime.side_effect = lambda fmt, t: "2025-01-15T10:30:00" if "T" in fmt else "2025-01-15"
mock_time.return_value = 1642234567
mock_gmtime.return_value = None
-
+
result = elasticsearch_core_instance._preprocess_documents(original_docs, content_field)
-
+
# Original document should remain unchanged
assert original_docs[0] == {"content": "Original content", "title": "Original title"}
-
+
# Result should be a new document with added fields
assert result[0]["content"] == "Original content"
assert result[0]["title"] == "Original title"
@@ -182,9 +181,9 @@ def test_preprocess_documents_preserves_original_data(elasticsearch_core_instanc
def test_preprocess_documents_with_empty_list(elasticsearch_core_instance):
"""Test preprocessing an empty list of documents."""
content_field = "content"
-
+
result = elasticsearch_core_instance._preprocess_documents([], content_field)
-
+
assert result == []
@@ -196,27 +195,27 @@ def test_preprocess_documents_id_generation(elasticsearch_core_instance):
{"content": "Content 1"} # Same content as first
]
content_field = "content"
-
+
with patch('time.strftime') as mock_strftime, \
patch('time.time') as mock_time, \
patch('time.gmtime') as mock_gmtime:
-
+
mock_strftime.side_effect = lambda fmt, t: "2025-01-15T10:30:00" if "T" in fmt else "2025-01-15"
mock_time.return_value = 1642234567
mock_gmtime.return_value = None
-
+
result = elasticsearch_core_instance._preprocess_documents(docs, content_field)
-
+
assert len(result) == 3
-
+
# All documents should have IDs
assert "id" in result[0]
assert "id" in result[1]
assert "id" in result[2]
-
+
# IDs should be different for different content
assert result[0]["id"] != result[1]["id"]
-
+
# Same content should generate same hash part (but might be different due to time)
id1_parts = result[0]["id"].split("_")
id3_parts = result[2]["id"].split("_")
@@ -237,19 +236,19 @@ def test_preprocess_documents_with_none_values(elasticsearch_core_instance):
}
]
content_field = "content"
-
+
with patch('time.strftime') as mock_strftime, \
patch('time.time') as mock_time, \
patch('time.gmtime') as mock_gmtime:
-
+
mock_strftime.side_effect = lambda fmt, t: "2025-01-15T10:30:00" if "T" in fmt else "2025-01-15"
mock_time.return_value = 1642234567
mock_gmtime.return_value = None
-
+
result = elasticsearch_core_instance._preprocess_documents(docs, content_field)
-
+
doc = result[0]
-
+
# None values should be replaced with defaults
assert doc["file_size"] == 0
assert doc["create_time"] == "2025-01-15T10:30:00"
@@ -270,19 +269,19 @@ def test_preprocess_documents_with_zero_values(elasticsearch_core_instance):
}
]
content_field = "content"
-
+
with patch('time.strftime') as mock_strftime, \
patch('time.time') as mock_time, \
patch('time.gmtime') as mock_gmtime:
-
+
mock_strftime.side_effect = lambda fmt, t: "2025-01-15T10:30:00" if "T" in fmt else "2025-01-15"
mock_time.return_value = 1642234567
mock_gmtime.return_value = None
-
+
result = elasticsearch_core_instance._preprocess_documents(docs, content_field)
-
+
doc = result[0]
-
+
# Zero values should be preserved
assert doc["file_size"] == 0
assert doc["create_time"] == "2025-01-15T10:30:00"
@@ -760,12 +759,12 @@ def test_create_chunk_exception(elasticsearch_core_instance):
"""Test create_chunk raises exception when client.index fails."""
elasticsearch_core_instance.client = MagicMock()
elasticsearch_core_instance.client.index.side_effect = Exception("Index operation failed")
-
+
payload = {"id": "chunk-1", "content": "A"}
-
+
with pytest.raises(Exception) as exc_info:
elasticsearch_core_instance.create_chunk("kb-index", payload)
-
+
assert "Index operation failed" in str(exc_info.value)
elasticsearch_core_instance.client.index.assert_called_once()
@@ -779,10 +778,10 @@ def test_update_chunk_exception_from_resolve(elasticsearch_core_instance):
side_effect=Exception("Resolve failed"),
):
updates = {"content": "updated"}
-
+
with pytest.raises(Exception) as exc_info:
elasticsearch_core_instance.update_chunk("kb-index", "chunk-1", updates)
-
+
assert "Resolve failed" in str(exc_info.value)
elasticsearch_core_instance.client.update.assert_not_called()
@@ -796,12 +795,12 @@ def test_update_chunk_exception_from_update(elasticsearch_core_instance):
return_value="es-id-1",
):
elasticsearch_core_instance.client.update.side_effect = Exception("Update operation failed")
-
+
updates = {"content": "updated"}
-
+
with pytest.raises(Exception) as exc_info:
elasticsearch_core_instance.update_chunk("kb-index", "chunk-1", updates)
-
+
assert "Update operation failed" in str(exc_info.value)
elasticsearch_core_instance.client.update.assert_called_once()
@@ -816,7 +815,7 @@ def test_delete_chunk_exception_from_resolve(elasticsearch_core_instance):
):
with pytest.raises(Exception) as exc_info:
elasticsearch_core_instance.delete_chunk("kb-index", "chunk-1")
-
+
assert "Resolve failed" in str(exc_info.value)
elasticsearch_core_instance.client.delete.assert_not_called()
@@ -830,10 +829,10 @@ def test_delete_chunk_exception_from_delete(elasticsearch_core_instance):
return_value="es-id-1",
):
elasticsearch_core_instance.client.delete.side_effect = Exception("Delete operation failed")
-
+
with pytest.raises(Exception) as exc_info:
elasticsearch_core_instance.delete_chunk("kb-index", "chunk-1")
-
+
assert "Delete operation failed" in str(exc_info.value)
elasticsearch_core_instance.client.delete.assert_called_once()
diff --git a/test/sdk/vector_database/test_elasticsearch_core_coverage.py b/test/sdk/vector_database/test_elasticsearch_core_coverage.py
deleted file mode 100644
index 757bbc566..000000000
--- a/test/sdk/vector_database/test_elasticsearch_core_coverage.py
+++ /dev/null
@@ -1,731 +0,0 @@
-"""
-Supplementary test module for elasticsearch_core to improve code coverage
-
-Tests for functions not fully covered in the main test file.
-"""
-import pytest
-from unittest.mock import MagicMock, patch, mock_open
-import time
-import os
-import sys
-from typing import List, Dict, Any
-from datetime import datetime, timedelta
-
-# Add the project root to the path
-current_dir = os.path.dirname(os.path.abspath(__file__))
-project_root = os.path.abspath(os.path.join(current_dir, "../../.."))
-sys.path.insert(0, project_root)
-
-# Import the class under test
-from sdk.nexent.vector_database.elasticsearch_core import ElasticSearchCore, BulkOperation
-from elasticsearch import exceptions
-
-
-class TestElasticSearchCoreCoverage:
- """Test class for improving elasticsearch_core coverage"""
-
- @pytest.fixture
- def vdb_core(self):
- """Create an ElasticSearchCore instance for testing."""
- return ElasticSearchCore(
- host="http://localhost:9200",
- api_key="test_api_key",
- verify_certs=False,
- ssl_show_warn=False
- )
-
- def test_force_refresh_with_retry_success(self, vdb_core):
- """Test _force_refresh_with_retry successful refresh"""
- vdb_core.client = MagicMock()
- vdb_core.client.indices.refresh.return_value = {"_shards": {"total": 1, "successful": 1}}
-
- result = vdb_core._force_refresh_with_retry("test_index")
- assert result is True
- vdb_core.client.indices.refresh.assert_called_once_with(index="test_index")
-
- def test_force_refresh_with_retry_failure_retry(self, vdb_core):
- """Test _force_refresh_with_retry with retries"""
- vdb_core.client = MagicMock()
- vdb_core.client.indices.refresh.side_effect = [
- Exception("Connection error"),
- Exception("Still failing"),
- {"_shards": {"total": 1, "successful": 1}}
- ]
-
- with patch('time.sleep'): # Mock sleep to speed up test
- result = vdb_core._force_refresh_with_retry("test_index", max_retries=3)
- assert result is True
- assert vdb_core.client.indices.refresh.call_count == 3
-
- def test_force_refresh_with_retry_max_retries_exceeded(self, vdb_core):
- """Test _force_refresh_with_retry when max retries exceeded"""
- vdb_core.client = MagicMock()
- vdb_core.client.indices.refresh.side_effect = Exception("Persistent error")
-
- with patch('time.sleep'): # Mock sleep to speed up test
- result = vdb_core._force_refresh_with_retry("test_index", max_retries=2)
- assert result is False
- assert vdb_core.client.indices.refresh.call_count == 2
-
- def test_ensure_index_ready_success(self, vdb_core):
- """Test _ensure_index_ready successful case"""
- vdb_core.client = MagicMock()
- vdb_core.client.cluster.health.return_value = {"status": "green"}
- vdb_core.client.search.return_value = {"hits": {"total": {"value": 0}}}
-
- result = vdb_core._ensure_index_ready("test_index")
- assert result is True
-
- def test_ensure_index_ready_yellow_status(self, vdb_core):
- """Test _ensure_index_ready with yellow status"""
- vdb_core.client = MagicMock()
- vdb_core.client.cluster.health.return_value = {"status": "yellow"}
- vdb_core.client.search.return_value = {"hits": {"total": {"value": 0}}}
-
- result = vdb_core._ensure_index_ready("test_index")
- assert result is True
-
- def test_ensure_index_ready_timeout(self, vdb_core):
- """Test _ensure_index_ready timeout scenario"""
- vdb_core.client = MagicMock()
- vdb_core.client.cluster.health.return_value = {"status": "red"}
-
- with patch('time.sleep'): # Mock sleep to speed up test
- result = vdb_core._ensure_index_ready("test_index", timeout=1)
- assert result is False
-
- def test_ensure_index_ready_exception(self, vdb_core):
- """Test _ensure_index_ready with exception"""
- vdb_core.client = MagicMock()
- vdb_core.client.cluster.health.side_effect = Exception("Connection error")
-
- with patch('time.sleep'): # Mock sleep to speed up test
- result = vdb_core._ensure_index_ready("test_index", timeout=1)
- assert result is False
-
- def test_apply_bulk_settings_success(self, vdb_core):
- """Test _apply_bulk_settings successful case"""
- vdb_core.client = MagicMock()
- vdb_core.client.indices.put_settings.return_value = {"acknowledged": True}
-
- vdb_core._apply_bulk_settings("test_index")
- vdb_core.client.indices.put_settings.assert_called_once()
-
- def test_apply_bulk_settings_failure(self, vdb_core):
- """Test _apply_bulk_settings with exception"""
- vdb_core.client = MagicMock()
- vdb_core.client.indices.put_settings.side_effect = Exception("Settings error")
-
- # Should not raise exception, just log warning
- vdb_core._apply_bulk_settings("test_index")
- vdb_core.client.indices.put_settings.assert_called_once()
-
- def test_restore_normal_settings_success(self, vdb_core):
- """Test _restore_normal_settings successful case"""
- vdb_core.client = MagicMock()
- vdb_core.client.indices.put_settings.return_value = {"acknowledged": True}
- vdb_core._force_refresh_with_retry = MagicMock(return_value=True)
-
- vdb_core._restore_normal_settings("test_index")
- vdb_core.client.indices.put_settings.assert_called_once()
- vdb_core._force_refresh_with_retry.assert_called_once_with("test_index")
-
- def test_restore_normal_settings_failure(self, vdb_core):
- """Test _restore_normal_settings with exception"""
- vdb_core.client = MagicMock()
- vdb_core.client.indices.put_settings.side_effect = Exception("Settings error")
-
- # Should not raise exception, just log warning
- vdb_core._restore_normal_settings("test_index")
- vdb_core.client.indices.put_settings.assert_called_once()
-
- def test_delete_index_success(self, vdb_core):
- """Test delete_index successful case"""
- vdb_core.client = MagicMock()
- vdb_core.client.indices.delete.return_value = {"acknowledged": True}
-
- result = vdb_core.delete_index("test_index")
- assert result is True
- vdb_core.client.indices.delete.assert_called_once_with(index="test_index")
-
- def test_delete_index_not_found(self, vdb_core):
- """Test delete_index when index not found"""
- vdb_core.client = MagicMock()
- # Create a proper NotFoundError with required parameters
- not_found_error = exceptions.NotFoundError(404, "Index not found", {"error": {"type": "index_not_found_exception"}})
- vdb_core.client.indices.delete.side_effect = not_found_error
-
- result = vdb_core.delete_index("test_index")
- assert result is False
- vdb_core.client.indices.delete.assert_called_once_with(index="test_index")
-
- def test_delete_index_general_exception(self, vdb_core):
- """Test delete_index with general exception"""
- vdb_core.client = MagicMock()
- vdb_core.client.indices.delete.side_effect = Exception("General error")
-
- result = vdb_core.delete_index("test_index")
- assert result is False
- vdb_core.client.indices.delete.assert_called_once_with(index="test_index")
-
- def test_handle_bulk_errors_no_errors(self, vdb_core):
- """Test _handle_bulk_errors when no errors in response"""
- response = {"errors": False, "items": []}
- vdb_core._handle_bulk_errors(response)
- # Should not raise any exceptions
-
- def test_handle_bulk_errors_with_version_conflict(self, vdb_core):
- """Test _handle_bulk_errors with version conflict (should be ignored)"""
- response = {
- "errors": True,
- "items": [
- {
- "index": {
- "error": {
- "type": "version_conflict_engine_exception",
- "reason": "Document already exists",
- "caused_by": {
- "type": "version_conflict",
- "reason": "Document version conflict"
- }
- }
- }
- }
- ]
- }
- vdb_core._handle_bulk_errors(response)
- # Should not raise any exceptions for version conflicts
-
- def test_handle_bulk_errors_with_fatal_error(self, vdb_core):
- """Test _handle_bulk_errors with fatal error"""
- response = {
- "errors": True,
- "items": [
- {
- "index": {
- "error": {
- "type": "mapper_parsing_exception",
- "reason": "Failed to parse field",
- "caused_by": {
- "type": "json_parse_exception",
- "reason": "Unexpected character"
- }
- }
- }
- }
- ]
- }
- with pytest.raises(Exception) as exc_info:
- vdb_core._handle_bulk_errors(response)
- assert "Bulk indexing failed" in str(exc_info.value)
-
- def test_handle_bulk_errors_with_caused_by(self, vdb_core):
- """Test _handle_bulk_errors with caused_by information"""
- response = {
- "errors": True,
- "items": [
- {
- "index": {
- "error": {
- "type": "illegal_argument_exception",
- "reason": "Invalid argument",
- "caused_by": {
- "type": "json_parse_exception",
- "reason": "JSON parsing failed"
- }
- }
- }
- }
- ]
- }
- with pytest.raises(Exception) as exc_info:
- vdb_core._handle_bulk_errors(response)
- assert "Invalid argument" in str(exc_info.value)
- assert "JSON parsing failed" in str(exc_info.value)
-
- def test_delete_documents_success(self, vdb_core):
- """Test delete_documents successful case"""
- vdb_core.client = MagicMock()
- vdb_core.client.delete_by_query.return_value = {"deleted": 5}
-
- result = vdb_core.delete_documents("test_index", "/path/to/file.pdf")
- assert result == 5
- vdb_core.client.delete_by_query.assert_called_once()
-
- def test_delete_documents_exception(self, vdb_core):
- """Test delete_documents with exception"""
- vdb_core.client = MagicMock()
- vdb_core.client.delete_by_query.side_effect = Exception("Delete error")
-
- result = vdb_core.delete_documents("test_index", "/path/to/file.pdf")
- assert result == 0
- vdb_core.client.delete_by_query.assert_called_once()
-
- def test_get_index_chunks_not_found(self, vdb_core):
- """Ensure get_index_chunks handles missing index gracefully."""
- vdb_core.client = MagicMock()
- vdb_core.client.count.side_effect = exceptions.NotFoundError(
- 404, "missing", {})
-
- result = vdb_core.get_index_chunks("missing-index")
-
- assert result == {"chunks": [], "total": 0,
- "page": None, "page_size": None}
- vdb_core.client.clear_scroll.assert_not_called()
-
- def test_get_index_chunks_cleanup_warning(self, vdb_core):
- """Ensure clear_scroll errors are swallowed."""
- vdb_core.client = MagicMock()
- vdb_core.client.count.return_value = {"count": 1}
- vdb_core.client.search.return_value = {
- "_scroll_id": "scroll123",
- "hits": {"hits": [{"_id": "doc-1", "_source": {"content": "A"}}]}
- }
- vdb_core.client.scroll.return_value = {
- "_scroll_id": "scroll123",
- "hits": {"hits": []}
- }
- vdb_core.client.clear_scroll.side_effect = Exception("cleanup-failed")
-
- result = vdb_core.get_index_chunks("kb-index")
-
- assert len(result["chunks"]) == 1
- assert result["chunks"][0]["id"] == "doc-1"
- vdb_core.client.clear_scroll.assert_called_once_with(
- scroll_id="scroll123")
-
- def test_create_index_request_error_existing(self, vdb_core):
- """Ensure RequestError with resource already exists still succeeds."""
- vdb_core.client = MagicMock()
- vdb_core.client.indices.exists.return_value = False
- meta = MagicMock(status=400)
- vdb_core.client.indices.create.side_effect = exceptions.RequestError(
- "resource_already_exists_exception", meta, {"error": {"reason": "exists"}}
- )
- vdb_core._ensure_index_ready = MagicMock(return_value=True)
-
- assert vdb_core.create_index("test_index") is True
- vdb_core._ensure_index_ready.assert_called_once_with("test_index")
-
- def test_create_index_request_error_failure(self, vdb_core):
- """Ensure create_index returns False for non recoverable RequestError."""
- vdb_core.client = MagicMock()
- vdb_core.client.indices.exists.return_value = False
- meta = MagicMock(status=400)
- vdb_core.client.indices.create.side_effect = exceptions.RequestError(
- "validation_exception", meta, {"error": {"reason": "bad"}}
- )
-
- assert vdb_core.create_index("test_index") is False
-
- def test_create_index_general_exception(self, vdb_core):
- """Ensure unexpected exception from create_index returns False."""
- vdb_core.client = MagicMock()
- vdb_core.client.indices.exists.return_value = False
- vdb_core.client.indices.create.side_effect = Exception("boom")
-
- assert vdb_core.create_index("test_index") is False
-
- def test_force_refresh_with_retry_zero_attempts(self, vdb_core):
- """Ensure guard clause without attempts returns False."""
- vdb_core.client = MagicMock()
- result = vdb_core._force_refresh_with_retry("idx", max_retries=0)
- assert result is False
-
- def test_bulk_operation_context_preexisting_operation(self, vdb_core):
- """Ensure context skips apply/restore when operations remain."""
- existing = BulkOperation(
- index_name="test_index",
- operation_id="existing",
- start_time=datetime.utcnow(),
- expected_duration=timedelta(seconds=30),
- )
- vdb_core._bulk_operations = {"test_index": [existing]}
-
- with patch.object(vdb_core, "_apply_bulk_settings") as mock_apply, \
- patch.object(vdb_core, "_restore_normal_settings") as mock_restore:
-
- with vdb_core.bulk_operation_context("test_index") as op_id:
- assert op_id != existing.operation_id
-
- mock_apply.assert_not_called()
- mock_restore.assert_not_called()
- assert vdb_core._bulk_operations["test_index"] == [existing]
-
- def test_get_user_indices_exception(self, vdb_core):
- """Ensure get_user_indices returns empty list on failure."""
- vdb_core.client = MagicMock()
- vdb_core.client.indices.get_alias.side_effect = Exception("failure")
-
- assert vdb_core.get_user_indices() == []
-
- def test_check_index_exists(self, vdb_core):
- """Ensure check_index_exists delegates to client."""
- vdb_core.client = MagicMock()
- vdb_core.client.indices.exists.return_value = True
-
- assert vdb_core.check_index_exists("idx") is True
- vdb_core.client.indices.exists.assert_called_once_with(index="idx")
-
- def test_small_batch_insert_sets_embedding_model_name(self, vdb_core):
- """_small_batch_insert should attach embedding model name."""
- vdb_core.client = MagicMock()
- vdb_core.client.bulk.return_value = {"errors": False, "items": []}
- vdb_core._preprocess_documents = MagicMock(return_value=[{"content": "body"}])
- vdb_core._handle_bulk_errors = MagicMock()
-
- mock_embedding_model = MagicMock()
- mock_embedding_model.get_embeddings.return_value = [[0.1, 0.2]]
- mock_embedding_model.embedding_model_name = "demo-model"
-
- vdb_core._small_batch_insert("idx", [{"content": "body"}], "content", mock_embedding_model)
- operations = vdb_core.client.bulk.call_args.kwargs["operations"]
- inserted_doc = operations[1]
- assert inserted_doc["embedding_model_name"] == "demo-model"
-
- def test_large_batch_insert_sets_default_embedding_model_name(self, vdb_core):
- """_large_batch_insert should fall back to 'unknown' when attr missing."""
- vdb_core.client = MagicMock()
- vdb_core.client.bulk.return_value = {"errors": False, "items": []}
- vdb_core._preprocess_documents = MagicMock(return_value=[{"content": "body"}])
- vdb_core._handle_bulk_errors = MagicMock()
-
- class SimpleEmbedding:
- def get_embeddings(self, texts):
- return [[0.1 for _ in texts]]
-
- embedding_model = SimpleEmbedding()
-
- vdb_core._large_batch_insert("idx", [{"content": "body"}], 10, "content", embedding_model)
- operations = vdb_core.client.bulk.call_args.kwargs["operations"]
- inserted_doc = operations[1]
- assert inserted_doc["embedding_model_name"] == "unknown"
-
- def test_large_batch_insert_bulk_exception(self, vdb_core):
- """Ensure bulk exceptions are handled and indexing continues."""
- vdb_core.client = MagicMock()
- vdb_core.client.bulk.side_effect = Exception("bulk error")
- vdb_core._preprocess_documents = MagicMock(return_value=[{"content": "body"}])
-
- mock_embedding_model = MagicMock()
- mock_embedding_model.get_embeddings.return_value = [[0.1]]
-
- with pytest.raises(Exception) as exc_info:
- vdb_core._large_batch_insert("idx", [{"content": "body"}], 1, "content", mock_embedding_model)
- assert "bulk error" in str(exc_info.value)
-
- def test_large_batch_insert_preprocess_exception(self, vdb_core):
- """Ensure outer exception handler returns zero on preprocess failure."""
- vdb_core._preprocess_documents = MagicMock(side_effect=Exception("fail"))
-
- mock_embedding_model = MagicMock()
- with pytest.raises(Exception) as exc_info:
- vdb_core._large_batch_insert("idx", [{"content": "body"}], 10, "content", mock_embedding_model)
- assert "fail" in str(exc_info.value)
-
- def test_count_documents_success(self, vdb_core):
- """Ensure count_documents returns ES count."""
- vdb_core.client = MagicMock()
- vdb_core.client.count.return_value = {"count": 42}
-
- assert vdb_core.count_documents("idx") == 42
-
- def test_count_documents_exception(self, vdb_core):
- """Ensure count_documents returns zero on error."""
- vdb_core.client = MagicMock()
- vdb_core.client.count.side_effect = Exception("fail")
-
- assert vdb_core.count_documents("idx") == 0
-
- def test_search_and_multi_search_passthrough(self, vdb_core):
- """Ensure search helpers delegate to the client."""
- vdb_core.client = MagicMock()
- vdb_core.client.search.return_value = {"hits": {}}
- vdb_core.client.msearch.return_value = {"responses": []}
-
- assert vdb_core.search("idx", {"query": {"match_all": {}}}) == {"hits": {}}
- assert vdb_core.multi_search([{"query": {"match_all": {}}}], "idx") == {"responses": []}
-
- def test_exec_query_formats_results(self, vdb_core):
- """Ensure exec_query strips metadata and exposes scores."""
- vdb_core.client = MagicMock()
- vdb_core.client.search.return_value = {
- "hits": {
- "hits": [
- {
- "_score": 1.23,
- "_index": "idx",
- "_source": {"id": "doc1", "content": "body"},
- }
- ]
- }
- }
-
- results = vdb_core.exec_query("idx", {"query": {}})
- assert results == [
- {"score": 1.23, "document": {"id": "doc1", "content": "body"}, "index": "idx"}
- ]
-
- def test_hybrid_search_missing_fields_logged_for_accurate(self, vdb_core):
- """Ensure hybrid_search tolerates missing accurate fields."""
- mock_embedding_model = MagicMock()
- with patch.object(vdb_core, "accurate_search", return_value=[{"score": 1.0}]), \
- patch.object(vdb_core, "semantic_search", return_value=[]):
- assert vdb_core.hybrid_search(["idx"], "query", mock_embedding_model) == []
-
- def test_hybrid_search_missing_fields_logged_for_semantic(self, vdb_core):
- """Ensure hybrid_search tolerates missing semantic fields."""
- mock_embedding_model = MagicMock()
- with patch.object(vdb_core, "accurate_search", return_value=[]), \
- patch.object(vdb_core, "semantic_search", return_value=[{"score": 0.5}]):
- assert vdb_core.hybrid_search(["idx"], "query", mock_embedding_model) == []
-
- def test_hybrid_search_faulty_combined_results(self, vdb_core):
- """Inject faulty combined result to hit KeyError handling in final loop."""
- mock_embedding_model = MagicMock()
- accurate_payload = [
- {"score": 1.0, "document": {"id": "doc1"}, "index": "idx"}
- ]
-
- with patch.object(vdb_core, "accurate_search", return_value=accurate_payload), \
- patch.object(vdb_core, "semantic_search", return_value=[]):
-
- injected = {"done": False}
-
- def tracer(frame, event, arg):
- if (
- frame.f_code.co_name == "hybrid_search"
- and event == "line"
- and frame.f_lineno == 788
- and not injected["done"]
- ):
- frame.f_locals["combined_results"]["faulty"] = {
- "accurate_score": 0,
- "semantic_score": 0,
- }
- injected["done"] = True
- return tracer
-
- sys.settrace(tracer)
- try:
- results = vdb_core.hybrid_search(["idx"], "query", mock_embedding_model)
- finally:
- sys.settrace(None)
-
- assert len(results) == 1
-
- def test_get_documents_detail_exception(self, vdb_core):
- """Ensure get_documents_detail returns empty list on failure."""
- vdb_core.client = MagicMock()
- vdb_core.client.search.side_effect = Exception("fail")
-
- assert vdb_core.get_documents_detail("idx") == []
-
- def test_get_indices_detail_success(self, vdb_core):
- """Test get_indices_detail successful case"""
- vdb_core.client = MagicMock()
- vdb_core.client.indices.stats.return_value = {
- "indices": {
- "test_index": {
- "primaries": {
- "docs": {"count": 100},
- "store": {"size_in_bytes": 1024},
- "search": {"query_total": 50},
- "request_cache": {"hit_count": 25}
- }
- }
- }
- }
- vdb_core.client.indices.get_settings.return_value = {
- "test_index": {
- "settings": {
- "index": {
- "number_of_shards": "1",
- "number_of_replicas": "0",
- "creation_date": "1640995200000"
- }
- }
- }
- }
- vdb_core.client.search.return_value = {
- "aggregations": {
- "unique_path_or_url_count": {"value": 10},
- "process_sources": {"buckets": [{"key": "test_source"}]},
- "embedding_models": {"buckets": [{"key": "test_model"}]}
- }
- }
-
- result = vdb_core.get_indices_detail(["test_index"])
- assert "test_index" in result
- assert "base_info" in result["test_index"]
- assert "search_performance" in result["test_index"]
-
- def test_get_indices_detail_exception(self, vdb_core):
- """Test get_indices_detail with exception"""
- vdb_core.client = MagicMock()
- vdb_core.client.indices.stats.side_effect = Exception("Stats error")
-
- result = vdb_core.get_indices_detail(["test_index"])
- # The function returns error info for failed indices, not empty dict
- assert "test_index" in result
- assert "error" in result["test_index"]
-
- def test_get_indices_detail_with_embedding_dim(self, vdb_core):
- """Test get_indices_detail with embedding dimension"""
- vdb_core.client = MagicMock()
- vdb_core.client.indices.stats.return_value = {
- "indices": {
- "test_index": {
- "primaries": {
- "docs": {"count": 100},
- "store": {"size_in_bytes": 1024},
- "search": {"query_total": 50},
- "request_cache": {"hit_count": 25}
- }
- }
- }
- }
- vdb_core.client.indices.get_settings.return_value = {
- "test_index": {
- "settings": {
- "index": {
- "number_of_shards": "1",
- "number_of_replicas": "0",
- "creation_date": "1640995200000"
- }
- }
- }
- }
- vdb_core.client.search.return_value = {
- "aggregations": {
- "unique_path_or_url_count": {"value": 10},
- "process_sources": {"buckets": [{"key": "test_source"}]},
- "embedding_models": {"buckets": [{"key": "test_model"}]}
- }
- }
-
- result = vdb_core.get_indices_detail(["test_index"], embedding_dim=512)
- assert "test_index" in result
- assert "base_info" in result["test_index"]
- assert "search_performance" in result["test_index"]
- assert result["test_index"]["base_info"]["embedding_dim"] == 512
-
- def test_bulk_operation_context_success(self, vdb_core):
- """Test bulk_operation_context successful case"""
- vdb_core._bulk_operations = {}
- vdb_core._operation_counter = 0
- vdb_core._settings_lock = MagicMock()
- vdb_core._apply_bulk_settings = MagicMock()
- vdb_core._restore_normal_settings = MagicMock()
-
- with vdb_core.bulk_operation_context("test_index") as operation_id:
- assert operation_id is not None
- assert "test_index" in vdb_core._bulk_operations
- vdb_core._apply_bulk_settings.assert_called_once_with("test_index")
-
- # After context exit, should restore settings
- vdb_core._restore_normal_settings.assert_called_once_with("test_index")
-
- def test_bulk_operation_context_multiple_operations(self, vdb_core):
- """Test bulk_operation_context with multiple operations"""
- vdb_core._bulk_operations = {}
- vdb_core._operation_counter = 0
- vdb_core._settings_lock = MagicMock()
- vdb_core._apply_bulk_settings = MagicMock()
- vdb_core._restore_normal_settings = MagicMock()
-
- # First operation
- with vdb_core.bulk_operation_context("test_index") as op1:
- assert op1 is not None
- vdb_core._apply_bulk_settings.assert_called_once()
-
- # After first operation exits, settings should be restored
- vdb_core._restore_normal_settings.assert_called_once_with("test_index")
-
- # Second operation - will apply settings again since first operation is done
- with vdb_core.bulk_operation_context("test_index") as op2:
- assert op2 is not None
- # Should call apply_bulk_settings again since first operation is done
- assert vdb_core._apply_bulk_settings.call_count == 2
-
- # After second operation exits, should restore settings again
- assert vdb_core._restore_normal_settings.call_count == 2
-
- def test_small_batch_insert_success(self, vdb_core):
- """Test _small_batch_insert successful case"""
- vdb_core.client = MagicMock()
- vdb_core.client.bulk.return_value = {"items": [], "errors": False}
- vdb_core._preprocess_documents = MagicMock(return_value=[
- {"content": "test content", "title": "test"}
- ])
- vdb_core._handle_bulk_errors = MagicMock()
-
- mock_embedding_model = MagicMock()
- mock_embedding_model.get_embeddings.return_value = [[0.1, 0.2, 0.3]]
- mock_embedding_model.embedding_model_name = "test_model"
-
- documents = [{"content": "test content", "title": "test"}]
-
- result = vdb_core._small_batch_insert("test_index", documents, "content", mock_embedding_model)
- assert result == 1
- vdb_core.client.bulk.assert_called_once()
-
- def test_small_batch_insert_exception(self, vdb_core):
- """Test _small_batch_insert with exception"""
- vdb_core._preprocess_documents = MagicMock(side_effect=Exception("Preprocess error"))
-
- mock_embedding_model = MagicMock()
- documents = [{"content": "test content", "title": "test"}]
-
- with pytest.raises(Exception) as exc_info:
- vdb_core._small_batch_insert("test_index", documents, "content", mock_embedding_model)
- assert "Preprocess error" in str(exc_info.value)
-
- def test_large_batch_insert_success(self, vdb_core):
- """Test _large_batch_insert successful case"""
- vdb_core.client = MagicMock()
- vdb_core.client.bulk.return_value = {"items": [], "errors": False}
- vdb_core._preprocess_documents = MagicMock(return_value=[
- {"content": "test content", "title": "test"}
- ])
- vdb_core._handle_bulk_errors = MagicMock()
-
- mock_embedding_model = MagicMock()
- mock_embedding_model.get_embeddings.return_value = [[0.1, 0.2, 0.3]]
- mock_embedding_model.embedding_model_name = "test_model"
-
- documents = [{"content": "test content", "title": "test"}]
-
- result = vdb_core._large_batch_insert("test_index", documents, 10, "content", mock_embedding_model)
- assert result == 1
- vdb_core.client.bulk.assert_called_once()
-
- def test_large_batch_insert_embedding_error(self, vdb_core):
- """Test _large_batch_insert with embedding API error"""
- vdb_core.client = MagicMock()
- vdb_core._preprocess_documents = MagicMock(return_value=[
- {"content": "test content", "title": "test"}
- ])
-
- mock_embedding_model = MagicMock()
- mock_embedding_model.get_embeddings.side_effect = Exception("Embedding API error")
-
- documents = [{"content": "test content", "title": "test"}]
-
- result = vdb_core._large_batch_insert("test_index", documents, 10, "content", mock_embedding_model)
- assert result == 0 # No documents indexed due to embedding error
-
- def test_large_batch_insert_no_embeddings(self, vdb_core):
- """Test _large_batch_insert with no successful embeddings"""
- vdb_core.client = MagicMock()
- vdb_core._preprocess_documents = MagicMock(return_value=[
- {"content": "test content", "title": "test"}
- ])
-
- mock_embedding_model = MagicMock()
- mock_embedding_model.get_embeddings.side_effect = Exception("Embedding API error")
-
- documents = [{"content": "test content", "title": "test"}]
-
- result = vdb_core._large_batch_insert("test_index", documents, 10, "content", mock_embedding_model)
- assert result == 0 # No documents indexed
From 9cd002ba22186df4b925755c4e4789ba0aa3d3ee Mon Sep 17 00:00:00 2001
From: biansimeng
Date: Thu, 22 Jan 2026 10:16:45 +0800
Subject: [PATCH 30/48] Unify datamate&dify search tool name
---
sdk/nexent/core/tools/__init__.py | 4 +-
sdk/nexent/core/tools/datamate_search_tool.py | 4 +-
...ase_search_tool.py => dify_search_tool.py} | 27 +++---
sdk/nexent/core/utils/tools_common_message.py | 8 +-
...earch_tool.py => test_dify_search_tool.py} | 89 +++++++++----------
5 files changed, 62 insertions(+), 70 deletions(-)
rename sdk/nexent/core/tools/{dify_knowledge_base_search_tool.py => dify_search_tool.py} (94%)
rename test/sdk/core/tools/{test_dify_knowledge_base_search_tool.py => test_dify_search_tool.py} (85%)
diff --git a/sdk/nexent/core/tools/__init__.py b/sdk/nexent/core/tools/__init__.py
index 88c3e0866..cdd61af14 100644
--- a/sdk/nexent/core/tools/__init__.py
+++ b/sdk/nexent/core/tools/__init__.py
@@ -1,7 +1,7 @@
from .exa_search_tool import ExaSearchTool
from .get_email_tool import GetEmailTool
from .knowledge_base_search_tool import KnowledgeBaseSearchTool
-from .dify_knowledge_base_search_tool import DifyKnowledgeBaseSearchTool
+from .dify_search_tool import DifySearchTool
from .datamate_search_tool import DataMateSearchTool
from .send_email_tool import SendEmailTool
from .tavily_search_tool import TavilySearchTool
@@ -20,7 +20,7 @@
__all__ = [
"ExaSearchTool",
"KnowledgeBaseSearchTool",
- "DifyKnowledgeBaseSearchTool",
+ "DifySearchTool",
"DataMateSearchTool",
"SendEmailTool",
"GetEmailTool",
diff --git a/sdk/nexent/core/tools/datamate_search_tool.py b/sdk/nexent/core/tools/datamate_search_tool.py
index bf1009269..d217b2430 100644
--- a/sdk/nexent/core/tools/datamate_search_tool.py
+++ b/sdk/nexent/core/tools/datamate_search_tool.py
@@ -16,7 +16,7 @@
class DataMateSearchTool(Tool):
"""DataMate knowledge base search tool"""
- name = "datamate_search_tool"
+ name = "datamate_search"
description = (
"Performs a DataMate knowledge base search based on your query then returns the top search results. "
"A tool for retrieving domain-specific knowledge, documents, and information stored in the DataMate knowledge base. "
@@ -58,7 +58,7 @@ class DataMateSearchTool(Tool):
category = ToolCategory.SEARCH.value
# Used to distinguish different index sources for summaries
- tool_sign = ToolSign.DATAMATE_KNOWLEDGE_BASE.value
+ tool_sign = ToolSign.DATAMATE_SEARCH.value
def __init__(
self,
diff --git a/sdk/nexent/core/tools/dify_knowledge_base_search_tool.py b/sdk/nexent/core/tools/dify_search_tool.py
similarity index 94%
rename from sdk/nexent/core/tools/dify_knowledge_base_search_tool.py
rename to sdk/nexent/core/tools/dify_search_tool.py
index 5655be808..b744ae55f 100644
--- a/sdk/nexent/core/tools/dify_knowledge_base_search_tool.py
+++ b/sdk/nexent/core/tools/dify_search_tool.py
@@ -11,13 +11,13 @@
# Get logger instance
-logger = logging.getLogger("dify_knowledge_base_search_tool")
-
+logger = logging.getLogger("dify_search_tool")
-class DifyKnowledgeBaseSearchTool(Tool):
+
+class DifySearchTool(Tool):
"""Dify knowledge base search tool"""
- name = "dify_knowledge_base_search"
+ name = "dify_search"
description = (
"Performs a search on a Dify knowledge base based on your query then returns the top search results. "
"A tool for retrieving domain-specific knowledge, documents, and information stored in Dify knowledge bases. "
@@ -27,12 +27,6 @@ class DifyKnowledgeBaseSearchTool(Tool):
)
inputs = {
"query": {"type": "string", "description": "The search query to perform."},
- "top_k": {
- "type": "integer",
- "description": "Maximum number of search results to return per dataset .",
- "default": 3,
- "nullable": True,
- },
"search_method": {
"type": "string",
"description": "The search method to use. Options: keyword_search, semantic_search, full_text_search, hybrid_search",
@@ -42,7 +36,7 @@ class DifyKnowledgeBaseSearchTool(Tool):
}
output_type = "string"
category = ToolCategory.SEARCH.value
- tool_sign = ToolSign.DIFY_KNOWLEDGE_BASE.value
+ tool_sign = ToolSign.DIFY_SEARCH.value
def __init__(
self,
@@ -52,7 +46,7 @@ def __init__(
top_k: int = Field(description="Maximum number of search results per dataset", default=3),
observer: MessageObserver = Field(description="Message observer", default=None, exclude=True),
):
- """Initialize the DifyKnowledgeBaseSearchTool.
+ """Initialize the DifySearchTool.
Args:
dify_api_base (str): Dify API base URL
@@ -94,7 +88,6 @@ def __init__(
def forward(
self,
query: str,
- top_k: Optional[int] = None,
search_method: str = "semantic_search"
) -> str:
# Send tool run message
@@ -104,12 +97,12 @@ def forward(
card_content = [{"icon": "search", "text": query}]
self.observer.add_message("", ProcessType.CARD, json.dumps(card_content, ensure_ascii=False))
- # Use provided parameters or defaults
- search_top_k = top_k if top_k is not None else self.top_k
+ # Use instance default top_k
+ search_top_k = self.top_k
# Log the search parameters
logger.info(
- f"DifyKnowledgeBaseSearchTool called with query: '{query}', top_k: {search_top_k}, search_method: '{search_method}'"
+ f"DifySearchTool called with query: '{query}', top_k: {search_top_k}, search_method: '{search_method}'"
)
# Perform searches across all datasets
@@ -189,7 +182,7 @@ def forward(
logger.error(error_msg)
raise Exception(error_msg)
-
+
def _get_document_download_url(self, document_id: str, dataset_id: str = None) -> str:
"""Get download URL for a document from Dify API.
diff --git a/sdk/nexent/core/utils/tools_common_message.py b/sdk/nexent/core/utils/tools_common_message.py
index df1c23541..7c73f827b 100644
--- a/sdk/nexent/core/utils/tools_common_message.py
+++ b/sdk/nexent/core/utils/tools_common_message.py
@@ -9,8 +9,8 @@ class ToolSign(Enum):
EXA_SEARCH = "b" # Exa search tool identifier
LINKUP_SEARCH = "c" # Linkup search tool identifier
TAVILY_SEARCH = "d" # Tavily search tool identifier
- DATAMATE_KNOWLEDGE_BASE = "e" # DataMate knowledge base search tool identifier
- DIFY_KNOWLEDGE_BASE = "g" # Dify knowledge base search tool identifier
+ DATAMATE_SEARCH = "e" # DataMate search tool identifier
+ DIFY_SEARCH = "g" # Dify search tool identifier
FILE_OPERATION = "f" # File operation tool identifier
TERMINAL_OPERATION = "t" # Terminal operation tool identifier
MULTIMODAL_OPERATION = "m" # Multimodal operation tool identifier
@@ -22,8 +22,8 @@ class ToolSign(Enum):
"tavily_search": ToolSign.TAVILY_SEARCH.value,
"linkup_search": ToolSign.LINKUP_SEARCH.value,
"exa_search": ToolSign.EXA_SEARCH.value,
- "datamate_knowledge_base_search": ToolSign.DATAMATE_KNOWLEDGE_BASE.value,
- "dify_knowledge_base_search": ToolSign.DIFY_KNOWLEDGE_BASE.value,
+ "datamate_search": ToolSign.DATAMATE_SEARCH.value,
+ "dify_search": ToolSign.DIFY_SEARCH.value,
"file_operation": ToolSign.FILE_OPERATION.value,
"terminal_operation": ToolSign.TERMINAL_OPERATION.value,
"multimodal_operation": ToolSign.MULTIMODAL_OPERATION.value,
diff --git a/test/sdk/core/tools/test_dify_knowledge_base_search_tool.py b/test/sdk/core/tools/test_dify_search_tool.py
similarity index 85%
rename from test/sdk/core/tools/test_dify_knowledge_base_search_tool.py
rename to test/sdk/core/tools/test_dify_search_tool.py
index fbef0d684..a2522114f 100644
--- a/test/sdk/core/tools/test_dify_knowledge_base_search_tool.py
+++ b/test/sdk/core/tools/test_dify_search_tool.py
@@ -6,7 +6,7 @@
import pytest
from pytest_mock import MockFixture
-from sdk.nexent.core.tools.dify_knowledge_base_search_tool import DifyKnowledgeBaseSearchTool
+from sdk.nexent.core.tools.dify_search_tool import DifySearchTool
from sdk.nexent.core.utils.observer import MessageObserver, ProcessType
@@ -18,8 +18,8 @@ def mock_observer() -> MessageObserver:
@pytest.fixture
-def dify_tool(mock_observer: MessageObserver) -> DifyKnowledgeBaseSearchTool:
- return DifyKnowledgeBaseSearchTool(
+def dify_tool(mock_observer: MessageObserver) -> DifySearchTool:
+ return DifySearchTool(
dify_api_base="https://api.dify.ai/v1",
api_key="test_api_key",
dataset_ids='["dataset1", "dataset2"]',
@@ -59,9 +59,9 @@ def _build_download_url_response(download_url: str = "https://download.example.c
return {"download_url": download_url}
-class TestDifyKnowledgeBaseSearchToolInit:
+class TestDifySearchToolInit:
def test_init_success(self, mock_observer: MessageObserver):
- tool = DifyKnowledgeBaseSearchTool(
+ tool = DifySearchTool(
dify_api_base="https://api.dify.ai/v1",
api_key="test_key",
dataset_ids='["ds1", "ds2"]',
@@ -79,7 +79,7 @@ def test_init_success(self, mock_observer: MessageObserver):
assert tool.running_prompt_en == "Searching Dify knowledge base..."
def test_init_singledataset_id(self, mock_observer: MessageObserver):
- tool = DifyKnowledgeBaseSearchTool(
+ tool = DifySearchTool(
dify_api_base="https://api.dify.ai/v1/",
api_key="test_key",
dataset_ids='["single_dataset"]',
@@ -90,7 +90,7 @@ def test_init_singledataset_id(self, mock_observer: MessageObserver):
assert tool.dataset_ids == ["single_dataset"]
def test_init_json_string_array_dataset_ids(self, mock_observer: MessageObserver):
- tool = DifyKnowledgeBaseSearchTool(
+ tool = DifySearchTool(
dify_api_base="https://api.dify.ai/v1/",
api_key="test_key",
dataset_ids='["0ab7096c-dfa5-4e0e-9dad-9265781447a3"]',
@@ -101,7 +101,7 @@ def test_init_json_string_array_dataset_ids(self, mock_observer: MessageObserver
assert tool.dataset_ids == ["0ab7096c-dfa5-4e0e-9dad-9265781447a3"]
def test_init_json_string_array_multiple_dataset_ids(self, mock_observer: MessageObserver):
- tool = DifyKnowledgeBaseSearchTool(
+ tool = DifySearchTool(
dify_api_base="https://api.dify.ai/v1/",
api_key="test_key",
dataset_ids='["ds1", "ds2", "ds3"]',
@@ -117,7 +117,7 @@ def test_init_json_string_array_multiple_dataset_ids(self, mock_observer: Messag
])
def test_init_invalid_api_base(self, dify_api_base, expected_error):
with pytest.raises(ValueError) as excinfo:
- DifyKnowledgeBaseSearchTool(
+ DifySearchTool(
dify_api_base=dify_api_base,
api_key="test_key",
dataset_ids='["ds1"]',
@@ -130,7 +130,7 @@ def test_init_invalid_api_base(self, dify_api_base, expected_error):
])
def test_init_invalid_api_key(self, api_key, expected_error):
with pytest.raises(ValueError) as excinfo:
- DifyKnowledgeBaseSearchTool(
+ DifySearchTool(
dify_api_base="https://api.dify.ai/v1",
api_key=api_key,
dataset_ids='["ds1"]',
@@ -144,7 +144,7 @@ def test_init_invalid_api_key(self, api_key, expected_error):
])
def test_init_invaliddataset_ids(self, dataset_ids, expected_error):
with pytest.raises(ValueError) as excinfo:
- DifyKnowledgeBaseSearchTool(
+ DifySearchTool(
dify_api_base="https://api.dify.ai/v1",
api_key="test_key",
dataset_ids=dataset_ids,
@@ -153,8 +153,8 @@ def test_init_invaliddataset_ids(self, dataset_ids, expected_error):
class TestGetDocumentDownloadUrl:
- def test_get_document_download_url_success(self, mocker: MockFixture, dify_tool: DifyKnowledgeBaseSearchTool):
- client_cls = mocker.patch("sdk.nexent.core.tools.dify_knowledge_base_search_tool.httpx.Client")
+ def test_get_document_download_url_success(self, mocker: MockFixture, dify_tool: DifySearchTool):
+ client_cls = mocker.patch("sdk.nexent.core.tools.dify_search_tool.httpx.Client")
client = client_cls.return_value.__enter__.return_value
response = MagicMock()
@@ -173,12 +173,12 @@ def test_get_document_download_url_success(self, mocker: MockFixture, dify_tool:
}
)
- def test_get_document_download_url_empty_document_id(self, dify_tool: DifyKnowledgeBaseSearchTool):
+ def test_get_document_download_url_empty_document_id(self, dify_tool: DifySearchTool):
url = dify_tool._get_document_download_url("", "dataset1")
assert url == ""
- def test_get_document_download_url_nodataset_id(self, mocker: MockFixture, dify_tool: DifyKnowledgeBaseSearchTool):
- client_cls = mocker.patch("sdk.nexent.core.tools.dify_knowledge_base_search_tool.httpx.Client")
+ def test_get_document_download_url_nodataset_id(self, mocker: MockFixture, dify_tool: DifySearchTool):
+ client_cls = mocker.patch("sdk.nexent.core.tools.dify_search_tool.httpx.Client")
client = client_cls.return_value.__enter__.return_value
response = MagicMock()
@@ -198,8 +198,8 @@ def test_get_document_download_url_nodataset_id(self, mocker: MockFixture, dify_
}
)
- def test_get_document_download_url_request_error(self, mocker: MockFixture, dify_tool: DifyKnowledgeBaseSearchTool):
- client_cls = mocker.patch("sdk.nexent.core.tools.dify_knowledge_base_search_tool.httpx.Client")
+ def test_get_document_download_url_request_error(self, mocker: MockFixture, dify_tool: DifySearchTool):
+ client_cls = mocker.patch("sdk.nexent.core.tools.dify_search_tool.httpx.Client")
client = client_cls.return_value.__enter__.return_value
client.get.side_effect = httpx.RequestError("Connection error", request=MagicMock())
@@ -207,8 +207,8 @@ def test_get_document_download_url_request_error(self, mocker: MockFixture, dify
assert url == ""
- def test_get_document_download_url_json_decode_error(self, mocker: MockFixture, dify_tool: DifyKnowledgeBaseSearchTool):
- client_cls = mocker.patch("sdk.nexent.core.tools.dify_knowledge_base_search_tool.httpx.Client")
+ def test_get_document_download_url_json_decode_error(self, mocker: MockFixture, dify_tool: DifySearchTool):
+ client_cls = mocker.patch("sdk.nexent.core.tools.dify_search_tool.httpx.Client")
client = client_cls.return_value.__enter__.return_value
response = MagicMock()
@@ -220,8 +220,8 @@ def test_get_document_download_url_json_decode_error(self, mocker: MockFixture,
assert url == ""
- def test_get_document_download_url_missing_key(self, mocker: MockFixture, dify_tool: DifyKnowledgeBaseSearchTool):
- client_cls = mocker.patch("sdk.nexent.core.tools.dify_knowledge_base_search_tool.httpx.Client")
+ def test_get_document_download_url_missing_key(self, mocker: MockFixture, dify_tool: DifySearchTool):
+ client_cls = mocker.patch("sdk.nexent.core.tools.dify_search_tool.httpx.Client")
client = client_cls.return_value.__enter__.return_value
response = MagicMock()
@@ -235,8 +235,8 @@ def test_get_document_download_url_missing_key(self, mocker: MockFixture, dify_t
class TestSearchDifyKnowledgeBase:
- def test_search_dify_knowledge_base_success(self, mocker: MockFixture, dify_tool: DifyKnowledgeBaseSearchTool):
- client_cls = mocker.patch("sdk.nexent.core.tools.dify_knowledge_base_search_tool.httpx.Client")
+ def test_search_dify_knowledge_base_success(self, mocker: MockFixture, dify_tool: DifySearchTool):
+ client_cls = mocker.patch("sdk.nexent.core.tools.dify_search_tool.httpx.Client")
client = client_cls.return_value.__enter__.return_value
response = MagicMock()
@@ -279,8 +279,8 @@ def test_search_dify_knowledge_base_success(self, mocker: MockFixture, dify_tool
}
)
- def test_search_dify_knowledge_base_no_records(self, mocker: MockFixture, dify_tool: DifyKnowledgeBaseSearchTool):
- client_cls = mocker.patch("sdk.nexent.core.tools.dify_knowledge_base_search_tool.httpx.Client")
+ def test_search_dify_knowledge_base_no_records(self, mocker: MockFixture, dify_tool: DifySearchTool):
+ client_cls = mocker.patch("sdk.nexent.core.tools.dify_search_tool.httpx.Client")
client = client_cls.return_value.__enter__.return_value
response = MagicMock()
@@ -292,8 +292,8 @@ def test_search_dify_knowledge_base_no_records(self, mocker: MockFixture, dify_t
assert result == {"query": "test query", "records": []}
- def test_search_dify_knowledge_base_request_error(self, mocker: MockFixture, dify_tool: DifyKnowledgeBaseSearchTool):
- client_cls = mocker.patch("sdk.nexent.core.tools.dify_knowledge_base_search_tool.httpx.Client")
+ def test_search_dify_knowledge_base_request_error(self, mocker: MockFixture, dify_tool: DifySearchTool):
+ client_cls = mocker.patch("sdk.nexent.core.tools.dify_search_tool.httpx.Client")
client = client_cls.return_value.__enter__.return_value
client.post.side_effect = httpx.RequestError("API error", request=MagicMock())
@@ -302,8 +302,8 @@ def test_search_dify_knowledge_base_request_error(self, mocker: MockFixture, dif
assert "Dify API request failed" in str(excinfo.value)
- def test_search_dify_knowledge_base_json_decode_error(self, mocker: MockFixture, dify_tool: DifyKnowledgeBaseSearchTool):
- client_cls = mocker.patch("sdk.nexent.core.tools.dify_knowledge_base_search_tool.httpx.Client")
+ def test_search_dify_knowledge_base_json_decode_error(self, mocker: MockFixture, dify_tool: DifySearchTool):
+ client_cls = mocker.patch("sdk.nexent.core.tools.dify_search_tool.httpx.Client")
client = client_cls.return_value.__enter__.return_value
response = MagicMock()
@@ -316,8 +316,8 @@ def test_search_dify_knowledge_base_json_decode_error(self, mocker: MockFixture,
assert "Failed to parse Dify API response" in str(excinfo.value)
- def test_search_dify_knowledge_base_missing_key(self, mocker: MockFixture, dify_tool: DifyKnowledgeBaseSearchTool):
- client_cls = mocker.patch("sdk.nexent.core.tools.dify_knowledge_base_search_tool.httpx.Client")
+ def test_search_dify_knowledge_base_missing_key(self, mocker: MockFixture, dify_tool: DifySearchTool):
+ client_cls = mocker.patch("sdk.nexent.core.tools.dify_search_tool.httpx.Client")
client = client_cls.return_value.__enter__.return_value
response = MagicMock()
@@ -332,9 +332,9 @@ def test_search_dify_knowledge_base_missing_key(self, mocker: MockFixture, dify_
class TestForward:
- def _setup_success_flow(self, mocker: MockFixture, tool: DifyKnowledgeBaseSearchTool):
+ def _setup_success_flow(self, mocker: MockFixture, tool: DifySearchTool):
# Mock httpx.Client for both search and download operations
- client_cls = mocker.patch("sdk.nexent.core.tools.dify_knowledge_base_search_tool.httpx.Client")
+ client_cls = mocker.patch("sdk.nexent.core.tools.dify_search_tool.httpx.Client")
client = client_cls.return_value.__enter__.return_value
# Mock search method to return records
@@ -380,10 +380,10 @@ def mock_request(method, url, **kwargs):
return client
- def test_forward_success_with_observer_en(self, mocker: MockFixture, dify_tool: DifyKnowledgeBaseSearchTool):
+ def test_forward_success_with_observer_en(self, mocker: MockFixture, dify_tool: DifySearchTool):
client = self._setup_success_flow(mocker, dify_tool)
- result_json = dify_tool.forward("test query", top_k=2, search_method="keyword_search")
+ result_json = dify_tool.forward("test query", search_method="keyword_search")
results = json.loads(result_json)
assert len(results) == 2 # 2 datasets * 1 record each
@@ -408,7 +408,7 @@ def test_forward_success_with_observer_en(self, mocker: MockFixture, dify_tool:
# Verify API calls were made for both datasets
assert client.post.call_count == 2 # Called once per dataset
- def test_forward_success_with_observer_zh(self, mocker: MockFixture, dify_tool: DifyKnowledgeBaseSearchTool):
+ def test_forward_success_with_observer_zh(self, mocker: MockFixture, dify_tool: DifySearchTool):
dify_tool.observer.lang = "zh"
self._setup_success_flow(mocker, dify_tool)
@@ -419,7 +419,7 @@ def test_forward_success_with_observer_zh(self, mocker: MockFixture, dify_tool:
)
def test_forward_no_observer(self, mocker: MockFixture):
- tool = DifyKnowledgeBaseSearchTool(
+ tool = DifySearchTool(
dify_api_base="https://api.dify.ai/v1",
api_key="test_api_key",
dataset_ids='["dataset1"]',
@@ -431,7 +431,7 @@ def test_forward_no_observer(self, mocker: MockFixture):
result_json = tool.forward("query")
assert len(json.loads(result_json)) == 1
- def test_forward_no_results(self, mocker: MockFixture, dify_tool: DifyKnowledgeBaseSearchTool):
+ def test_forward_no_results(self, mocker: MockFixture, dify_tool: DifySearchTool):
# Mock empty search results
search_response = {"query": "test query", "records": []}
@@ -440,7 +440,7 @@ def test_forward_no_results(self, mocker: MockFixture, dify_tool: DifyKnowledgeB
mock_response.json.return_value = search_response
# Mock httpx.Client instead of requests
- client_cls = mocker.patch("sdk.nexent.core.tools.dify_knowledge_base_search_tool.httpx.Client")
+ client_cls = mocker.patch("sdk.nexent.core.tools.dify_search_tool.httpx.Client")
client = client_cls.return_value.__enter__.return_value
client.post.return_value = mock_response
@@ -451,9 +451,9 @@ def test_forward_no_results(self, mocker: MockFixture, dify_tool: DifyKnowledgeB
assert "No results found!" in str(excinfo.value)
assert "Error searching Dify knowledge base" in str(excinfo.value)
- def test_forward_search_api_error(self, mocker: MockFixture, dify_tool: DifyKnowledgeBaseSearchTool):
+ def test_forward_search_api_error(self, mocker: MockFixture, dify_tool: DifySearchTool):
# Mock API error during search
- client_cls = mocker.patch("sdk.nexent.core.tools.dify_knowledge_base_search_tool.httpx.Client")
+ client_cls = mocker.patch("sdk.nexent.core.tools.dify_search_tool.httpx.Client")
client = client_cls.return_value.__enter__.return_value
client.post.side_effect = httpx.RequestError("API error", request=MagicMock())
@@ -463,9 +463,9 @@ def test_forward_search_api_error(self, mocker: MockFixture, dify_tool: DifyKnow
assert "Error searching Dify knowledge base" in str(excinfo.value)
assert "Dify API request failed" in str(excinfo.value)
- def test_forward_download_url_error_still_works(self, mocker: MockFixture, dify_tool: DifyKnowledgeBaseSearchTool):
+ def test_forward_download_url_error_still_works(self, mocker: MockFixture, dify_tool: DifySearchTool):
# Mock httpx.Client
- client_cls = mocker.patch("sdk.nexent.core.tools.dify_knowledge_base_search_tool.httpx.Client")
+ client_cls = mocker.patch("sdk.nexent.core.tools.dify_search_tool.httpx.Client")
client = client_cls.return_value.__enter__.return_value
# Mock successful search but failed download URL
@@ -500,4 +500,3 @@ def test_forward_download_url_error_still_works(self, mocker: MockFixture, dify_
assert len(results) == 2 # Still processes results even with download URL failure
assert results[0]["title"] == "document1.txt"
# URL should be empty string due to download failure
-
From 9159e87a2f3f2a89f57eb676cb998af292f94b3b Mon Sep 17 00:00:00 2001
From: zhizhi <928570418@qq.com>
Date: Thu, 22 Jan 2026 10:40:33 +0800
Subject: [PATCH 31/48] =?UTF-8?q?=E2=9C=A8=20Update=20DocumentList=20compo?=
=?UTF-8?q?nent=20and=20localization=20for=20DataMate=20restrictions:=20En?=
=?UTF-8?q?hanced=20the=20DocumentList=20to=20conditionally=20render=20an?=
=?UTF-8?q?=20upload=20area=20or=20a=20message=20indicating=20editing=20re?=
=?UTF-8?q?strictions=20for=20DataMate=20knowledge=20bases.=20Added=20corr?=
=?UTF-8?q?esponding=20localization=20strings=20in=20English=20and=20Chine?=
=?UTF-8?q?se=20for=20user=20guidance=20on=20upload=20limitations.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../components/document/DocumentList.tsx | 57 +++++++++++--------
frontend/public/locales/en/common.json | 4 ++
frontend/public/locales/zh/common.json | 4 ++
3 files changed, 41 insertions(+), 24 deletions(-)
diff --git a/frontend/app/[locale]/knowledges/components/document/DocumentList.tsx b/frontend/app/[locale]/knowledges/components/document/DocumentList.tsx
index 02a3297be..d8b09f9e7 100644
--- a/frontend/app/[locale]/knowledges/components/document/DocumentList.tsx
+++ b/frontend/app/[locale]/knowledges/components/document/DocumentList.tsx
@@ -432,6 +432,7 @@ const DocumentListContainer = forwardRef(
{/* Document list */}
+