diff --git a/chat_rag_explorer/rag_config_service.py b/chat_rag_explorer/rag_config_service.py
index 1a2a756..1d2be0c 100644
--- a/chat_rag_explorer/rag_config_service.py
+++ b/chat_rag_explorer/rag_config_service.py
@@ -178,6 +178,97 @@ def validate_local_path(self, path, request_id=None):
'details': {'exists': True, 'is_directory': True, 'has_database': True}
}
+ def discover_databases(self, request_id=None):
+ """
+ Discover ChromaDB databases in the ./data/ directory.
+
+ Searches for subdirectories containing chroma.sqlite3 files and returns
+ metadata about each discovered database.
+
+ Returns:
+ dict with:
+ - success: bool indicating if discovery succeeded
+ - databases: list of discovered databases with metadata
+ - search_path: where we searched
+ - current_path: currently configured database path (if any)
+ """
+ log_prefix = f"[{request_id}] " if request_id else ""
+
+ databases = []
+
+ try:
+ current_config = self.get_config(request_id)
+ current_path = current_config.get('local_path', '')
+
+ # Get the data directory (relative to project root)
+ project_root = Path(__file__).parent.parent
+ data_dir = project_root / "data"
+ if not data_dir.exists():
+ logger.info(f"{log_prefix}Data directory does not exist: {data_dir}")
+ return {
+ 'success': True,
+ 'databases': [],
+ 'search_path': './data/',
+ 'current_path': current_path
+ }
+
+ # Search for chroma.sqlite3 files in subdirectories
+ for subdir in data_dir.iterdir():
+ if not subdir.is_dir():
+ continue
+
+ chroma_db_file = subdir / 'chroma.sqlite3'
+ if chroma_db_file.exists():
+ try:
+ # Get database metadata
+ stat = chroma_db_file.stat()
+
+ # Try to get collection count (non-blocking)
+ collection_count = None
+ try:
+ client = chromadb.PersistentClient(path=str(subdir))
+ collections = client.list_collections()
+ collection_count = len(collections)
+ except Exception as e:
+ logger.debug(f"{log_prefix}Could not read collections from {subdir}: {e}")
+
+ database_info = {
+ 'name': subdir.name,
+ 'path': str(subdir),
+ 'relative_path': f"./data/{subdir.name}",
+ 'size_bytes': stat.st_size,
+ 'size_mb': round(stat.st_size / (1024 * 1024), 2),
+ 'last_modified': stat.st_mtime,
+ 'collection_count': collection_count,
+ 'is_current': str(subdir) == current_path or str(subdir.absolute()) == current_path
+ }
+ databases.append(database_info)
+ logger.debug(f"{log_prefix}Found database: {subdir.name}")
+
+ except Exception as e:
+ logger.warning(f"{log_prefix}Error reading database info from {subdir}: {e}")
+
+ # Sort by name for consistent ordering
+ databases.sort(key=lambda x: x['name'])
+
+ logger.info(f"{log_prefix}Discovered {len(databases)} database(s) in ./data/")
+ return {
+ 'success': True,
+ 'databases': databases,
+ 'search_path': './data/',
+ 'current_path': current_path
+ }
+
+ except Exception as e:
+ logger.error(f"{log_prefix}Database discovery failed: {e}")
+ return {
+ 'success': False,
+ 'databases': [],
+ 'search_path': './data/',
+ 'current_path': '',
+ 'error': str(e)
+ }
+
def test_connection(self, config_data, request_id=None):
"""Test ChromaDB connection with given configuration."""
log_prefix = f"[{request_id}] " if request_id else ""
diff --git a/chat_rag_explorer/routes.py b/chat_rag_explorer/routes.py
index 1211ea9..5e768b6 100644
--- a/chat_rag_explorer/routes.py
+++ b/chat_rag_explorer/routes.py
@@ -569,7 +569,7 @@ def save_rag_config():
return jsonify(result), 400
logger.info(f"[{request_id}] POST /api/rag/config - Saved ({elapsed:.3f}s)")
- return jsonify({"data": result['config']})
+ return jsonify(result)
except Exception as e:
elapsed = time.time() - start_time
logger.error(f"[{request_id}] POST /api/rag/config - Failed after {elapsed:.3f}s: {str(e)}", exc_info=True)
@@ -632,6 +632,24 @@ def get_rag_api_key_status():
return jsonify({"configured": False, "masked": None}), 500
+@main_bp.route("/api/rag/discover-databases")
+def discover_rag_databases():
+ """GET - Discover ChromaDB databases in ./data/ directory."""
+ request_id = generate_request_id()
+ start_time = time.time()
+ logger.info(f"[{request_id}] GET /api/rag/discover-databases - Discovering databases")
+
+ try:
+ result = rag_config_service.discover_databases(request_id)
+ elapsed = time.time() - start_time
+ logger.info(f"[{request_id}] GET /api/rag/discover-databases - Found {len(result.get('databases', []))} database(s) ({elapsed:.3f}s)")
+ return jsonify(result)
+ except Exception as e:
+ elapsed = time.time() - start_time
+ logger.error(f"[{request_id}] GET /api/rag/discover-databases - Failed after {elapsed:.3f}s: {str(e)}", exc_info=True)
+ return jsonify({"success": False, "databases": [], "error": str(e)}), 500
+
+
@main_bp.route("/api/rag/sample", methods=["POST"])
def get_rag_sample():
"""POST - Fetch sample records from a ChromaDB collection."""
diff --git a/chat_rag_explorer/static/settings.js b/chat_rag_explorer/static/settings.js
index a37cf27..2ab51c6 100644
--- a/chat_rag_explorer/static/settings.js
+++ b/chat_rag_explorer/static/settings.js
@@ -623,6 +623,7 @@ document.addEventListener('DOMContentLoaded', () => {
// ===== RAG Settings Functions =====
const RAG_CONFIG_KEY = 'chat-rag-rag-config';
+ const RAG_PATH_MODE_KEY = 'chat-rag-path-mode'; // Track user's preference for select vs manual mode
// DOM Elements
const ragModeRadios = document.querySelectorAll('input[name="rag-mode"]');
@@ -631,6 +632,15 @@ document.addEventListener('DOMContentLoaded', () => {
const ragCloudSettings = document.getElementById('rag-cloud-settings');
const ragLocalPath = document.getElementById('rag-local-path');
const ragPathStatus = document.getElementById('rag-path-status');
+
+ // New elements for database discovery
+ const ragPathModeToggle = document.getElementById('rag-path-mode-toggle');
+ const ragPathSelectMode = document.getElementById('rag-path-select-mode');
+ const ragPathManualMode = document.getElementById('rag-path-manual-mode');
+ const ragLocalSelect = document.getElementById('rag-local-select');
+ const ragSelectLoading = document.getElementById('rag-select-loading');
+ const ragSelectHint = document.getElementById('rag-select-hint');
+ const ragSelectHelp = document.getElementById('rag-select-help');
const ragServerHost = document.getElementById('rag-server-host');
const ragServerPort = document.getElementById('rag-server-port');
const ragTenantId = document.getElementById('rag-tenant-id');
@@ -663,6 +673,8 @@ document.addEventListener('DOMContentLoaded', () => {
let originalRagConfig = null;
let pathValidateTimeout = null;
let availableCollections = [];
+ let isManualPathMode = false; // Track whether we're in manual path mode
+ let discoveredDatabases = []; // Store discovered databases
function getSelectedRagMode() {
const selected = document.querySelector('input[name="rag-mode"]:checked');
@@ -689,10 +701,142 @@ document.addEventListener('DOMContentLoaded', () => {
loadApiKeyStatus();
}
+ // Discover databases when switching to local mode
+ if (mode === 'local' && !isManualPathMode) {
+ discoverDatabases();
+ }
+
updateRagSaveButtonState();
SettingsLogger.debug('RAG mode toggled', { mode });
}
+ async function discoverDatabases() {
+ SettingsLogger.info('Discovering ChromaDB databases');
+
+ // Show loading state
+ if (ragSelectLoading) ragSelectLoading.style.display = 'inline-block';
+ if (ragLocalSelect) {
+ ragLocalSelect.innerHTML = '';
+ ragLocalSelect.disabled = true;
+ }
+
+ try {
+ const response = await fetch('/api/rag/discover-databases');
+ const data = await response.json();
+
+ if (!data.success) {
+ throw new Error(data.error || 'Failed to discover databases');
+ }
+
+ discoveredDatabases = data.databases || [];
+ SettingsLogger.info(`Discovered ${discoveredDatabases.length} database(s)`, { databases: discoveredDatabases });
+
+ // Update the select element
+ if (ragLocalSelect) {
+ ragLocalSelect.innerHTML = '';
+
+ if (discoveredDatabases.length === 0) {
+ // No databases found
+ ragLocalSelect.innerHTML = '';
+ ragLocalSelect.disabled = true;
+
+ // Show help message
+ if (ragSelectHelp) ragSelectHelp.style.display = 'block';
+ if (ragSelectHint) ragSelectHint.style.display = 'none';
+ } else {
+ // Add placeholder option
+ const placeholderOption = document.createElement('option');
+ placeholderOption.value = '';
+ placeholderOption.textContent = 'Select a database...';
+ ragLocalSelect.appendChild(placeholderOption);
+
+ // Add discovered databases
+ discoveredDatabases.forEach(db => {
+ const option = document.createElement('option');
+ option.value = db.path;
+
+ // Create descriptive text with folder icon
+ let text = '📁 ' + db.name;
+ if (db.collection_count !== null) {
+ text += ` (${db.collection_count} collection${db.collection_count !== 1 ? 's' : ''})`;
+ }
+ if (db.is_current) {
+ text += ' [current]';
+ }
+
+ option.textContent = text;
+ ragLocalSelect.appendChild(option);
+ });
+
+ ragLocalSelect.disabled = false;
+
+ // Hide help message, show hint
+ if (ragSelectHelp) ragSelectHelp.style.display = 'none';
+ if (ragSelectHint) ragSelectHint.style.display = 'block';
+
+ // If there's a current path that matches one of the discovered databases, select it
+ const currentPath = originalRagConfig?.local_path;
+ if (currentPath) {
+ const matchingDb = discoveredDatabases.find(db =>
+ db.path === currentPath || db.path === currentPath.replace(/\\/g, '/')
+ );
+ if (matchingDb) {
+ ragLocalSelect.value = matchingDb.path;
+ }
+ }
+ }
+ }
+ } catch (error) {
+ SettingsLogger.error('Failed to discover databases', { error: error.message });
+ if (ragLocalSelect) {
+ ragLocalSelect.innerHTML = '';
+ ragLocalSelect.disabled = true;
+ }
+ } finally {
+ if (ragSelectLoading) ragSelectLoading.style.display = 'none';
+ }
+ }
+
+ function togglePathMode() {
+ isManualPathMode = !isManualPathMode;
+
+ // Save the user's preference
+ localStorage.setItem(RAG_PATH_MODE_KEY, isManualPathMode ? 'manual' : 'select');
+
+ if (isManualPathMode) {
+ // Switch to manual mode
+ if (ragPathSelectMode) ragPathSelectMode.style.display = 'none';
+ if (ragPathManualMode) ragPathManualMode.style.display = 'block';
+ if (ragPathModeToggle) ragPathModeToggle.textContent = 'Switch to database selector';
+
+ // If a database was selected, populate the manual input
+ if (ragLocalSelect && ragLocalSelect.value && ragLocalPath) {
+ ragLocalPath.value = ragLocalSelect.value;
+ }
+ } else {
+ // Switch to select mode
+ if (ragPathSelectMode) ragPathSelectMode.style.display = 'block';
+ if (ragPathManualMode) ragPathManualMode.style.display = 'none';
+ if (ragPathModeToggle) ragPathModeToggle.textContent = 'Switch to manual entry';
+
+ // Refresh database list
+ discoverDatabases();
+
+ // If the manual path matches a discovered database, select it
+ if (ragLocalPath && ragLocalPath.value && discoveredDatabases.length > 0) {
+ const matchingDb = discoveredDatabases.find(db =>
+ db.path === ragLocalPath.value || db.path === ragLocalPath.value.replace(/\\/g, '/')
+ );
+ if (matchingDb && ragLocalSelect) {
+ ragLocalSelect.value = matchingDb.path;
+ }
+ }
+ }
+
+ updateRagSaveButtonState();
+ SettingsLogger.debug('Path mode toggled', { isManual: isManualPathMode });
+ }
+
async function loadRagConfig() {
SettingsLogger.info('Loading RAG configuration');
try {
@@ -720,8 +864,41 @@ document.addEventListener('DOMContentLoaded', () => {
ragDistanceSlider.value = threshold;
ragDistanceValue.textContent = threshold === 0 ? 'Off' : threshold.toFixed(1);
+ // Load saved mode preference from localStorage
+ const savedMode = localStorage.getItem(RAG_PATH_MODE_KEY);
+ if (savedMode === 'manual') {
+ isManualPathMode = true;
+ // Apply manual mode UI state
+ if (ragPathSelectMode) ragPathSelectMode.style.display = 'none';
+ if (ragPathManualMode) ragPathManualMode.style.display = 'block';
+ if (ragPathModeToggle) ragPathModeToggle.textContent = 'Switch to database selector';
+ } else {
+ // Default to select mode (including when no preference is saved)
+ isManualPathMode = false;
+ if (ragPathSelectMode) ragPathSelectMode.style.display = 'block';
+ if (ragPathManualMode) ragPathManualMode.style.display = 'none';
+ if (ragPathModeToggle) ragPathModeToggle.textContent = 'Switch to manual entry';
+ }
+
toggleRagMode();
+ // If in local mode and select mode, discover databases and try to select the current one
+ if (originalRagConfig.mode === 'local' && !isManualPathMode) {
+ await discoverDatabases();
+ // Try to select the current database if it was discovered
+ if (originalRagConfig.local_path && ragLocalSelect) {
+ const matchingDb = discoveredDatabases.find(db =>
+ db.path === originalRagConfig.local_path ||
+ db.path === originalRagConfig.local_path.replace(/\\/g, '/')
+ );
+ if (matchingDb) {
+ ragLocalSelect.value = matchingDb.path;
+ }
+ // Note: We no longer auto-switch to manual mode if database not found
+ // User can manually switch if needed
+ }
+ }
+
// If a collection was previously saved, restore the collection selector state
if (originalRagConfig.collection) {
// Add the saved collection as an option and select it
@@ -814,7 +991,16 @@ document.addEventListener('DOMContentLoaded', () => {
let isValid = true;
if (mode === 'local') {
- isValid = validateRequiredField(ragLocalPath, 'ChromaDB path is required');
+ // Check the appropriate field based on select vs manual mode
+ if (isManualPathMode) {
+ isValid = validateRequiredField(ragLocalPath, 'ChromaDB path is required');
+ } else {
+ // In select mode, check if a database is selected
+ if (!ragLocalSelect || !ragLocalSelect.value.trim()) {
+ ragTestResult.innerHTML = '
Please select a database
';
+ return;
+ }
+ }
} else if (mode === 'server') {
isValid = validateRequiredField(ragServerHost, 'Host is required') && isValid;
isValid = validateRequiredField(ragServerPort, 'Port is required') && isValid;
@@ -884,7 +1070,9 @@ document.addEventListener('DOMContentLoaded', () => {
`;
ragCollectionSection.style.display = 'none';
} finally {
- ragTestBtn.disabled = false;
+ // Restore button state based on form validity
+ const isValid = validateRagForm();
+ ragTestBtn.disabled = !isValid;
ragTestBtn.innerHTML = '→ Test Connection';
updateWizardFromState();
}
@@ -916,9 +1104,19 @@ document.addEventListener('DOMContentLoaded', () => {
const distanceVal = parseFloat(ragDistanceSlider.value);
const distanceThreshold = distanceVal === 0 ? null : distanceVal;
+ // Get local path from select or manual input depending on mode
+ let localPath = '';
+ if (getSelectedRagMode() === 'local') {
+ if (isManualPathMode) {
+ localPath = ragLocalPath.value.trim();
+ } else {
+ localPath = ragLocalSelect ? ragLocalSelect.value : '';
+ }
+ }
+
return {
mode: getSelectedRagMode(),
- local_path: ragLocalPath.value.trim(),
+ local_path: localPath,
server_host: ragServerHost.value.trim(),
server_port: parseInt(ragServerPort.value) || 8000,
cloud_tenant: ragTenantId.value.trim(),
@@ -946,7 +1144,13 @@ document.addEventListener('DOMContentLoaded', () => {
function validateRagForm() {
const mode = getSelectedRagMode();
if (mode === 'local') {
- return ragLocalPath.value.trim().length > 0;
+ // Check the appropriate field based on select vs manual mode
+ if (isManualPathMode) {
+ return ragLocalPath.value.trim().length > 0;
+ } else {
+ // In select mode, check if a database is selected
+ return ragLocalSelect && ragLocalSelect.value.trim().length > 0;
+ }
} else if (mode === 'server') {
return ragServerHost.value.trim().length > 0 && ragServerPort.value;
} else if (mode === 'cloud') {
@@ -1053,6 +1257,9 @@ document.addEventListener('DOMContentLoaded', () => {
const isValid = validateRagForm();
ragSaveBtn.disabled = !hasChanges || !isValid;
+ // Also update the test button state based on form validity
+ ragTestBtn.disabled = !isValid;
+
// Update badge visibility and button state for unsaved changes
if (ragSaveBadge) {
ragSaveBadge.style.display = hasChanges ? 'inline-block' : 'none';
@@ -1224,6 +1431,18 @@ document.addEventListener('DOMContentLoaded', () => {
radio.addEventListener('change', toggleRagMode);
});
+ // New event listeners for database discovery
+ if (ragPathModeToggle) {
+ ragPathModeToggle.addEventListener('click', togglePathMode);
+ }
+
+ if (ragLocalSelect) {
+ ragLocalSelect.addEventListener('change', () => {
+ onConnectionParamChange();
+ updateRagSaveButtonState();
+ });
+ }
+
function onConnectionParamChange() {
// Hide collection section when connection parameters change
ragCollectionSection.style.display = 'none';
diff --git a/chat_rag_explorer/static/style.css b/chat_rag_explorer/static/style.css
index 65fd6dd..6c3c54e 100644
--- a/chat_rag_explorer/static/style.css
+++ b/chat_rag_explorer/static/style.css
@@ -2408,3 +2408,57 @@ button:disabled {
max-height: 150px;
overflow-y: auto;
}
+
+/* ===== ChromaDB Path Selector UI ===== */
+
+.path-mode-toggle {
+ margin-left: 1rem;
+ padding: 0.25rem 0.75rem;
+ background: transparent;
+ border: 1px solid #d0d7de;
+ border-radius: 4px;
+ color: #0969da;
+ font-size: 0.85rem;
+ cursor: pointer;
+ transition: all 0.2s;
+}
+
+.path-mode-toggle:hover {
+ background: #f3f7fb;
+ border-color: #0969da;
+}
+
+.path-mode-section {
+ margin-top: 0.5rem;
+}
+
+.help-message {
+ margin-top: 1rem;
+ padding: 1rem;
+ background: #fff8dc;
+ border: 1px solid #ffd700;
+ border-radius: 6px;
+ font-size: 0.9rem;
+ line-height: 1.6;
+}
+
+.help-message strong {
+ color: #996600;
+}
+
+.help-message code {
+ background: #f6f8fa;
+ padding: 2px 6px;
+ border-radius: 3px;
+ font-family: 'Monaco', 'Consolas', monospace;
+ font-size: 0.85rem;
+}
+
+.help-message ol {
+ margin: 0.5rem 0 0.5rem 1.5rem;
+ padding-left: 0;
+}
+
+.help-message li {
+ margin: 0.25rem 0;
+}
diff --git a/chat_rag_explorer/templates/settings.html b/chat_rag_explorer/templates/settings.html
index fc07ff6..3cd42fd 100644
--- a/chat_rag_explorer/templates/settings.html
+++ b/chat_rag_explorer/templates/settings.html
@@ -152,10 +152,40 @@ Settings
-
-
-
+
+
+
+
+
+
+
+
+
+ Looking in: ./data/* subdirectories
+
+
+ No ChromaDB databases found in ./data/
+
+ See utils/README.md for instructions on creating a database.
+
+ Quick start: uv run utils/ingest.py
+
+
+
+
+
+
+
Enter the absolute path to a ChromaDB database directory
+
+
diff --git a/data/chroma_db_sample/chroma.sqlite3 b/data/chroma_db_sample/chroma.sqlite3
index faf3ea7..7c4c51f 100644
Binary files a/data/chroma_db_sample/chroma.sqlite3 and b/data/chroma_db_sample/chroma.sqlite3 differ
diff --git a/data/corpus/paul-graham-essays/gba.md b/data/corpus/paul-graham-essays/gba.md
index 9922da8..6924540 100644
--- a/data/corpus/paul-graham-essays/gba.md
+++ b/data/corpus/paul-graham-essays/gba.md
@@ -1,5 +1,5 @@
---
-title: "The Word "Hacker""
+title: "The Word 'Hacker'"
author: "Paul Graham"
date: "April 2004"
tags: ["programming"]
diff --git a/tests/unit/test_rag_config_service.py b/tests/unit/test_rag_config_service.py
index 0c52db4..21238ce 100644
--- a/tests/unit/test_rag_config_service.py
+++ b/tests/unit/test_rag_config_service.py
@@ -6,6 +6,7 @@
"""
import json
import pytest
+from pathlib import Path
from unittest.mock import patch, MagicMock
from chat_rag_explorer.rag_config_service import RagConfigService, DEFAULT_RAG_CONFIG
@@ -382,6 +383,223 @@ def test_unknown_mode(self):
assert "unknown" in result["message"].lower()
+class TestDiscoverDatabases:
+ """Tests for discover_databases() method."""
+
+ def test_no_data_directory(self, tmp_path, monkeypatch):
+ """Returns empty list when data directory doesn't exist."""
+ service = RagConfigService()
+ # Make data directory not exist
+ fake_project = tmp_path / "fake_project"
+ monkeypatch.setattr("chat_rag_explorer.rag_config_service.Path",
+ lambda x: fake_project / Path(x).name)
+
+ result = service.discover_databases()
+
+ assert result["success"] is True
+ assert result["databases"] == []
+ assert result["search_path"] == "./data/"
+
+ def test_empty_data_directory(self, tmp_path, monkeypatch):
+ """Returns empty list when data directory is empty."""
+ service = RagConfigService()
+ data_dir = tmp_path / "data"
+ data_dir.mkdir()
+
+ # Mock the path resolution to use our temp directory
+ def mock_path(path_str):
+ p = Path(path_str)
+ if p.name == "rag_config_service.py":
+ return tmp_path / "chat_rag_explorer" / "rag_config_service.py"
+ return p
+
+ monkeypatch.setattr("chat_rag_explorer.rag_config_service.Path", mock_path)
+
+ result = service.discover_databases()
+
+ assert result["success"] is True
+ assert result["databases"] == []
+
+ def test_discovers_single_database(self, tmp_path, monkeypatch):
+ """Discovers a single ChromaDB database."""
+ service = RagConfigService()
+ data_dir = tmp_path / "data"
+ data_dir.mkdir()
+
+ # Create a fake ChromaDB database
+ db_dir = data_dir / "test_db"
+ db_dir.mkdir()
+ (db_dir / "chroma.sqlite3").write_text("fake db content")
+
+ # Mock path resolution
+ def mock_path(path_str):
+ p = Path(path_str)
+ if p.name == "rag_config_service.py":
+ return tmp_path / "chat_rag_explorer" / "rag_config_service.py"
+ return p
+
+ monkeypatch.setattr("chat_rag_explorer.rag_config_service.Path", mock_path)
+
+ result = service.discover_databases()
+
+ assert result["success"] is True
+ assert len(result["databases"]) == 1
+ db = result["databases"][0]
+ assert db["name"] == "test_db"
+ assert "test_db" in db["path"]
+ assert db["relative_path"] == "./data/test_db"
+ assert db["size_bytes"] > 0
+
+ def test_discovers_multiple_databases(self, tmp_path, monkeypatch):
+ """Discovers multiple ChromaDB databases."""
+ service = RagConfigService()
+ data_dir = tmp_path / "data"
+ data_dir.mkdir()
+
+ # Create multiple fake ChromaDB databases
+ for name in ["db1", "db2", "db3"]:
+ db_dir = data_dir / name
+ db_dir.mkdir()
+ (db_dir / "chroma.sqlite3").write_text(f"fake {name}")
+
+ # Create a non-database directory (should be ignored)
+ (data_dir / "not_a_db").mkdir()
+
+ # Mock path resolution
+ def mock_path(path_str):
+ p = Path(path_str)
+ if p.name == "rag_config_service.py":
+ return tmp_path / "chat_rag_explorer" / "rag_config_service.py"
+ return p
+
+ monkeypatch.setattr("chat_rag_explorer.rag_config_service.Path", mock_path)
+
+ result = service.discover_databases()
+
+ assert result["success"] is True
+ assert len(result["databases"]) == 3
+ names = [db["name"] for db in result["databases"]]
+ assert "db1" in names
+ assert "db2" in names
+ assert "db3" in names
+ assert "not_a_db" not in names
+
+ def test_ignores_files_in_data_dir(self, tmp_path, monkeypatch):
+ """Ignores files (non-directories) in data directory."""
+ service = RagConfigService()
+ data_dir = tmp_path / "data"
+ data_dir.mkdir()
+
+ # Create a file (should be ignored)
+ (data_dir / "readme.txt").write_text("not a directory")
+
+ # Create a valid database
+ db_dir = data_dir / "valid_db"
+ db_dir.mkdir()
+ (db_dir / "chroma.sqlite3").write_text("fake db")
+
+ # Mock path resolution
+ def mock_path(path_str):
+ p = Path(path_str)
+ if p.name == "rag_config_service.py":
+ return tmp_path / "chat_rag_explorer" / "rag_config_service.py"
+ return p
+
+ monkeypatch.setattr("chat_rag_explorer.rag_config_service.Path", mock_path)
+
+ result = service.discover_databases()
+
+ assert result["success"] is True
+ assert len(result["databases"]) == 1
+ assert result["databases"][0]["name"] == "valid_db"
+
+ @patch("chat_rag_explorer.rag_config_service.chromadb.PersistentClient")
+ def test_includes_collection_count(self, mock_client_class, tmp_path, monkeypatch):
+ """Includes collection count when accessible."""
+ service = RagConfigService()
+ data_dir = tmp_path / "data"
+ data_dir.mkdir()
+
+ db_dir = data_dir / "test_db"
+ db_dir.mkdir()
+ (db_dir / "chroma.sqlite3").write_text("fake db")
+
+ # Mock ChromaDB client
+ mock_client = MagicMock()
+ mock_collection1 = MagicMock()
+ mock_collection1.name = "collection1"
+ mock_collection2 = MagicMock()
+ mock_collection2.name = "collection2"
+ mock_client.list_collections.return_value = [mock_collection1, mock_collection2]
+ mock_client_class.return_value = mock_client
+
+ # Mock path resolution
+ def mock_path(path_str):
+ p = Path(path_str)
+ if p.name == "rag_config_service.py":
+ return tmp_path / "chat_rag_explorer" / "rag_config_service.py"
+ return p
+
+ monkeypatch.setattr("chat_rag_explorer.rag_config_service.Path", mock_path)
+
+ result = service.discover_databases()
+
+ assert result["success"] is True
+ assert len(result["databases"]) == 1
+ assert result["databases"][0]["collection_count"] == 2
+
+ def test_marks_current_database(self, tmp_path, monkeypatch):
+ """Marks the currently configured database."""
+ service = RagConfigService()
+ data_dir = tmp_path / "data"
+ data_dir.mkdir()
+
+ # Create databases
+ current_db = data_dir / "current_db"
+ current_db.mkdir()
+ (current_db / "chroma.sqlite3").write_text("current")
+
+ other_db = data_dir / "other_db"
+ other_db.mkdir()
+ (other_db / "chroma.sqlite3").write_text("other")
+
+ # Mock path resolution
+ def mock_path(path_str):
+ p = Path(path_str)
+ if p.name == "rag_config_service.py":
+ return tmp_path / "chat_rag_explorer" / "rag_config_service.py"
+ return p
+
+ monkeypatch.setattr("chat_rag_explorer.rag_config_service.Path", mock_path)
+
+ # Mock get_config to return current_db as configured
+ monkeypatch.setattr(service, "get_config",
+ lambda request_id=None: {"local_path": str(current_db)})
+
+ result = service.discover_databases()
+
+ assert result["success"] is True
+ current_marked = [db for db in result["databases"] if db["is_current"]]
+ assert len(current_marked) == 1
+ assert current_marked[0]["name"] == "current_db"
+
+ def test_handles_discovery_error(self, tmp_path, monkeypatch):
+ """Handles errors during discovery gracefully."""
+ service = RagConfigService()
+
+ # Mock to raise an exception
+ def mock_path(path_str):
+ raise PermissionError("Access denied")
+
+ monkeypatch.setattr("chat_rag_explorer.rag_config_service.Path", mock_path)
+
+ result = service.discover_databases()
+
+ assert result["success"] is False
+ assert "error" in result
+ assert result["databases"] == []
+
+
class TestQueryCollection:
"""Tests for query_collection() method."""
diff --git a/tests/unit/test_rag_routes.py b/tests/unit/test_rag_routes.py
new file mode 100644
index 0000000..a6ed578
--- /dev/null
+++ b/tests/unit/test_rag_routes.py
@@ -0,0 +1,293 @@
+"""
+Unit tests for RAG-related routes.
+
+Tests the RAG configuration, database discovery, and connection endpoints.
+"""
+import json
+import pytest
+from unittest.mock import patch, MagicMock
+
+
+class TestRagConfigRoute:
+ """Tests for /api/rag/config endpoint."""
+
+ @patch("chat_rag_explorer.routes.rag_config_service")
+ def test_get_config_success(self, mock_service, client):
+ """GET /api/rag/config returns configuration."""
+ mock_service.get_config.return_value = {
+ "mode": "local",
+ "local_path": "/path/to/db",
+ "collection": "test_collection"
+ }
+
+ response = client.get("/api/rag/config")
+
+ assert response.status_code == 200
+ data = json.loads(response.data)
+ assert data["data"]["mode"] == "local"
+ assert data["data"]["local_path"] == "/path/to/db"
+
+ @patch("chat_rag_explorer.routes.rag_config_service")
+ def test_get_config_error(self, mock_service, client):
+ """GET /api/rag/config handles errors."""
+ mock_service.get_config.side_effect = Exception("Config error")
+
+ response = client.get("/api/rag/config")
+
+ assert response.status_code == 500
+ data = json.loads(response.data)
+ assert "error" in data
+
+ @patch("chat_rag_explorer.routes.rag_config_service")
+ def test_save_config_success(self, mock_service, client):
+ """POST /api/rag/config saves configuration."""
+ mock_service.save_config.return_value = {
+ "success": True,
+ "config": {"mode": "local", "local_path": "/new/path"}
+ }
+
+ response = client.post(
+ "/api/rag/config",
+ json={"mode": "local", "local_path": "/new/path"}
+ )
+
+ assert response.status_code == 200
+ data = json.loads(response.data)
+ assert data["success"] is True
+
+ @patch("chat_rag_explorer.routes.rag_config_service")
+ def test_save_config_validation_error(self, mock_service, client):
+ """POST /api/rag/config returns 400 on validation error."""
+ mock_service.save_config.return_value = {
+ "error": "Local path is required"
+ }
+
+ response = client.post(
+ "/api/rag/config",
+ json={"mode": "local"}
+ )
+
+ assert response.status_code == 400
+ data = json.loads(response.data)
+ assert "error" in data
+
+
+class TestDiscoverDatabasesRoute:
+ """Tests for /api/rag/discover-databases endpoint."""
+
+ @patch("chat_rag_explorer.routes.rag_config_service")
+ def test_discover_databases_success(self, mock_service, client):
+ """GET /api/rag/discover-databases returns discovered databases."""
+ mock_service.discover_databases.return_value = {
+ "success": True,
+ "databases": [
+ {
+ "name": "test_db",
+ "path": "/data/test_db",
+ "relative_path": "./data/test_db",
+ "size_mb": 1.5,
+ "collection_count": 2,
+ "is_current": True
+ }
+ ],
+ "search_path": "./data/",
+ "current_path": "/data/test_db"
+ }
+
+ response = client.get("/api/rag/discover-databases")
+
+ assert response.status_code == 200
+ data = json.loads(response.data)
+ assert data["success"] is True
+ assert len(data["databases"]) == 1
+ assert data["databases"][0]["name"] == "test_db"
+ assert data["databases"][0]["collection_count"] == 2
+ assert data["databases"][0]["is_current"] is True
+ assert data["search_path"] == "./data/"
+
+ @patch("chat_rag_explorer.routes.rag_config_service")
+ def test_discover_databases_empty(self, mock_service, client):
+ """GET /api/rag/discover-databases handles no databases found."""
+ mock_service.discover_databases.return_value = {
+ "success": True,
+ "databases": [],
+ "search_path": "./data/",
+ "current_path": ""
+ }
+
+ response = client.get("/api/rag/discover-databases")
+
+ assert response.status_code == 200
+ data = json.loads(response.data)
+ assert data["success"] is True
+ assert data["databases"] == []
+
+ @patch("chat_rag_explorer.routes.rag_config_service")
+ def test_discover_databases_error(self, mock_service, client):
+ """GET /api/rag/discover-databases handles errors."""
+ mock_service.discover_databases.side_effect = Exception("Discovery failed")
+
+ response = client.get("/api/rag/discover-databases")
+
+ assert response.status_code == 500
+ data = json.loads(response.data)
+ assert data["success"] is False
+ assert "error" in data
+
+
+class TestValidatePathRoute:
+ """Tests for /api/rag/validate-path endpoint."""
+
+ @patch("chat_rag_explorer.routes.rag_config_service")
+ def test_validate_path_valid(self, mock_service, client):
+ """POST /api/rag/validate-path validates valid path."""
+ mock_service.validate_local_path.return_value = {
+ "valid": True,
+ "message": "Valid ChromaDB database"
+ }
+
+ response = client.post(
+ "/api/rag/validate-path",
+ json={"path": "/valid/path"}
+ )
+
+ assert response.status_code == 200
+ data = json.loads(response.data)
+ assert data["valid"] is True
+
+ @patch("chat_rag_explorer.routes.rag_config_service")
+ def test_validate_path_invalid(self, mock_service, client):
+ """POST /api/rag/validate-path handles invalid path."""
+ mock_service.validate_local_path.return_value = {
+ "valid": False,
+ "message": "Path does not exist"
+ }
+
+ response = client.post(
+ "/api/rag/validate-path",
+ json={"path": "/invalid/path"}
+ )
+
+ assert response.status_code == 200
+ data = json.loads(response.data)
+ assert data["valid"] is False
+
+
+class TestTestConnectionRoute:
+ """Tests for /api/rag/test-connection endpoint."""
+
+ @patch("chat_rag_explorer.routes.rag_config_service")
+ def test_test_connection_success(self, mock_service, client):
+ """POST /api/rag/test-connection tests connection successfully."""
+ mock_service.test_connection.return_value = {
+ "success": True,
+ "message": "Connected to local ChromaDB",
+ "collections": ["collection1", "collection2"]
+ }
+
+ response = client.post(
+ "/api/rag/test-connection",
+ json={"mode": "local", "local_path": "/test/path"}
+ )
+
+ assert response.status_code == 200
+ data = json.loads(response.data)
+ assert data["success"] is True
+ assert "collections" in data
+ assert len(data["collections"]) == 2
+
+ @patch("chat_rag_explorer.routes.rag_config_service")
+ def test_test_connection_failure(self, mock_service, client):
+ """POST /api/rag/test-connection handles connection failure."""
+ mock_service.test_connection.return_value = {
+ "success": False,
+ "message": "Connection failed"
+ }
+
+ response = client.post(
+ "/api/rag/test-connection",
+ json={"mode": "local", "local_path": "/bad/path"}
+ )
+
+ assert response.status_code == 200
+ data = json.loads(response.data)
+ assert data["success"] is False
+ assert "message" in data
+
+
+class TestApiKeyStatusRoute:
+ """Tests for /api/rag/api-key-status endpoint."""
+
+ @patch("chat_rag_explorer.routes.rag_config_service")
+ def test_api_key_configured(self, mock_service, client):
+ """GET /api/rag/api-key-status returns configured status."""
+ mock_service.get_api_key_status.return_value = {
+ "configured": True,
+ "masked": "abcd...efgh"
+ }
+
+ response = client.get("/api/rag/api-key-status")
+
+ assert response.status_code == 200
+ data = json.loads(response.data)
+ assert data["configured"] is True
+ assert data["masked"] == "abcd...efgh"
+
+ @patch("chat_rag_explorer.routes.rag_config_service")
+ def test_api_key_not_configured(self, mock_service, client):
+ """GET /api/rag/api-key-status returns not configured."""
+ mock_service.get_api_key_status.return_value = {
+ "configured": False,
+ "masked": None
+ }
+
+ response = client.get("/api/rag/api-key-status")
+
+ assert response.status_code == 200
+ data = json.loads(response.data)
+ assert data["configured"] is False
+
+
+class TestSampleRoute:
+ """Tests for /api/rag/sample endpoint."""
+
+ @patch("chat_rag_explorer.routes.rag_config_service")
+ def test_get_sample_success(self, mock_service, client):
+ """POST /api/rag/sample returns sample records."""
+ mock_service.get_sample_records.return_value = {
+ "success": True,
+ "collection": "test_collection",
+ "count": 2,
+ "records": [
+ {"id": "1", "document": "Sample 1"},
+ {"id": "2", "document": "Sample 2"}
+ ]
+ }
+
+ response = client.post(
+ "/api/rag/sample",
+ json={"collection": "test_collection"}
+ )
+
+ assert response.status_code == 200
+ data = json.loads(response.data)
+ assert data["success"] is True
+ assert data["count"] == 2
+ assert len(data["records"]) == 2
+
+ @patch("chat_rag_explorer.routes.rag_config_service")
+ def test_get_sample_failure(self, mock_service, client):
+ """POST /api/rag/sample handles failure."""
+ mock_service.get_sample_records.return_value = {
+ "success": False,
+ "message": "Collection not found"
+ }
+
+ response = client.post(
+ "/api/rag/sample",
+ json={"collection": "missing"}
+ )
+
+ assert response.status_code == 400
+ data = json.loads(response.data)
+ assert data["success"] is False
\ No newline at end of file
diff --git a/utils/ingest.py b/utils/ingest.py
index a5fb19d..eb46d79 100644
--- a/utils/ingest.py
+++ b/utils/ingest.py
@@ -870,7 +870,7 @@ def select_directory() -> Path:
print(f" [{len(corpus_dirs) + 1}] Enter a custom path")
print()
- choice = input("Select directory: ").strip()
+ choice = input("Ender a directory number: ").strip()
if not choice:
print(" Error: Selection is required. Try again.\n")