Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@ agent → adapter → daemon spawn chain. All are optional.
| `CONTINUUM_PRELOAD_MODEL` | unset | Set to `1`, `true`, `yes`, or `on` to load semantic search at daemon startup instead of lazily. |
| `CONTINUUM_IDLE_MINUTES` | `30` | Idle minutes before the daemon exits (`0` = never). |
| `CONTINUUM_MAX_FILE_KIB` | `2048` | Largest file size indexed, in KiB. |
| `CONTINUUM_MAX_FILES` | `50000` | Files indexed per pass (`0` = unlimited). Caps memory on huge trees. |
| `CONTINUUM_ALLOW_LARGE_ROOT` | unset | Set truthy to auto-index even when the workspace root is a drive root or home directory. |
| `CONTINUUM_DEBOUNCE_MS` | `300` | Filesystem-watch debounce window. |

## MCP tools
Expand Down
84 changes: 64 additions & 20 deletions crates/continuum-daemon/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ mod lifecycle;
mod mcp;
mod tools;

use std::path::PathBuf;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicU8, AtomicUsize, Ordering};
use std::sync::Arc;
use std::time::{Duration, Instant};
Expand Down Expand Up @@ -92,22 +92,35 @@ async fn main() -> Result<()> {
// blocks startup on a model download.
let semantic = Arc::new(continuum_search::SemanticEngine::new());

// Index in the background so the daemon serves immediately; navigation
// tools return progressively richer results as the scan completes.
{
let graph = graph.clone();
let semantic = semantic.clone();
let root = ws.root_path();
let ws_snapshot = ws.clone();
tokio::spawn(async move {
let n = continuum_indexer::index_workspace(&root, graph.clone(), semantic).await;
tracing::info!("initial index complete: {n} files");
ws_snapshot.write_snapshot(&graph.read().await.snapshot());
});
}
let _watcher =
continuum_indexer::start_watcher(ws.root_path(), graph.clone(), semantic.clone())
.map_err(|e| anyhow::anyhow!("start file watcher: {e}"))?;
// A workspace rooted at a drive/filesystem root or the user's home directory
// would walk an enormous tree and exhaust memory. Refuse to auto-index (and
// to recursively watch) such a root; the daemon still serves memory and
// on-demand text search. CONTINUUM_ALLOW_LARGE_ROOT=1 overrides.
let _watcher = if let Some(reason) = unsafe_index_root(&ws.root_path()) {
tracing::warn!(
"skipping automatic indexing: {reason}. Open a project subdirectory, \
or set CONTINUUM_ALLOW_LARGE_ROOT=1 to override."
);
None
} else {
// Index in the background so the daemon serves immediately; navigation
// tools return progressively richer results as the scan completes.
{
let graph = graph.clone();
let semantic = semantic.clone();
let root = ws.root_path();
let ws_snapshot = ws.clone();
tokio::spawn(async move {
let n = continuum_indexer::index_workspace(&root, graph.clone(), semantic).await;
tracing::info!("initial index complete: {n} files");
ws_snapshot.write_snapshot(&graph.read().await.snapshot());
});
}
Some(
continuum_indexer::start_watcher(ws.root_path(), graph.clone(), semantic.clone())
.map_err(|e| anyhow::anyhow!("start file watcher: {e}"))?,
)
};

let listener = TcpListener::bind("127.0.0.1:0")
.await
Expand Down Expand Up @@ -306,10 +319,41 @@ pub(crate) fn maybe_start_semantic_load(daemon: &Arc<Daemon>) {
);
}

fn semantic_preload_enabled() -> bool {
std::env::var("CONTINUUM_PRELOAD_MODEL")
/// A human-readable reason if `root` is too broad to auto-index — a filesystem
/// root or the user's home directory — or `None` when it is safe (or the
/// `CONTINUUM_ALLOW_LARGE_ROOT` escape hatch is set). `root` is already
/// canonicalized by [`Workspace::resolve`], as is the home directory here, so
/// the comparison is exact.
fn unsafe_index_root(root: &Path) -> Option<String> {
if env_flag("CONTINUUM_ALLOW_LARGE_ROOT") {
return None;
}
if root.parent().is_none() {
return Some(format!("{} is a filesystem root", root.display()));
}
if home_dir().is_some_and(|home| home == root) {
return Some(format!("{} is your home directory", root.display()));
}
None
}

/// The user's home directory, canonicalized to match a resolved workspace root.
fn home_dir() -> Option<PathBuf> {
std::env::var_os("USERPROFILE")
.or_else(|| std::env::var_os("HOME"))
.map(PathBuf::from)
.and_then(|p| p.canonicalize().ok())
}

/// Whether an environment variable is set to a truthy value.
fn env_flag(name: &str) -> bool {
std::env::var(name)
.ok()
.is_some_and(|value| matches!(value.as_str(), "1" | "true" | "yes" | "on"))
.is_some_and(|v| matches!(v.as_str(), "1" | "true" | "yes" | "on"))
}

fn semantic_preload_enabled() -> bool {
env_flag("CONTINUUM_PRELOAD_MODEL")
}

/// Validate the Continuum handshake, then serve MCP for the connection's life.
Expand Down
78 changes: 73 additions & 5 deletions crates/continuum-indexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,49 @@ pub use textsearch::search_text;
pub use watcher::start_watcher;

/// Directory names never descended into during indexing.
///
/// Covers version-control metadata, dependency stores, build output, language
/// caches, and the bulky home-directory trees (`AppData`, `Library`) that a
/// misaimed workspace root would otherwise drag in. These are the safety net
/// over `.gitignore`/`.ignore`, which many of these directories lack.
const SKIP_DIRS: &[&str] = &[
// Version control
".git",
"target",
"node_modules",
".svn",
".hg",
// Continuum's own state
".continuum",
// Dependency stores
"node_modules",
"vendor",
"Pods",
// Build output
"target",
"dist",
"build",
"out",
// Python environments and caches
".venv",
"venv",
"__pycache__",
".mypy_cache",
".pytest_cache",
".tox",
// JS/TS framework output
".next",
".nuxt",
"coverage",
// Language / tool caches
".cache",
".cargo",
".rustup",
".npm",
".gradle",
".m2",
// Editor / OS home-directory bloat
".idea",
"AppData",
"Library",
];

/// Full one-shot index of a workspace. Returns the number of files indexed.
Expand Down Expand Up @@ -186,15 +220,41 @@ pub(crate) fn is_skipped_path(root: &Path, path: &Path) -> bool {
.any(|name| SKIP_DIRS.contains(&name))
}

/// Hard ceiling on files pulled into one index pass — override with
/// `CONTINUUM_MAX_FILES` (`0` disables the cap). Keeps a workspace rooted at a
/// huge tree (a home directory, a drive root) from exhausting memory: every
/// indexed file's symbols, BM25 tokens, and embeddings live in RAM.
static MAX_FILES: std::sync::LazyLock<usize> = std::sync::LazyLock::new(|| {
match std::env::var("CONTINUUM_MAX_FILES")
.ok()
.and_then(|v| v.parse().ok())
{
Some(0) => usize::MAX,
Some(n) => n,
None => 50_000,
}
});

fn collect_source_files(root: &Path) -> Vec<PathBuf> {
ignore::WalkBuilder::new(root)
let cap = *MAX_FILES;
let mut files = Vec::new();
for entry in ignore::WalkBuilder::new(root)
.require_git(false)
.filter_entry(|entry| !is_skipped_dir(entry))
.build()
.filter_map(|entry| entry.ok())
.filter(|entry| entry.file_type().is_some_and(|t| t.is_file()))
.map(|entry| entry.path().to_path_buf())
.collect()
{
if files.len() >= cap {
tracing::warn!(
"file cap ({cap}) reached; indexing truncated — narrow the workspace \
or raise CONTINUUM_MAX_FILES (0 disables the cap)"
);
break;
}
files.push(entry.path().to_path_buf());
}
files
}

#[cfg(test)]
Expand All @@ -213,6 +273,14 @@ mod tests {
root,
Path::new("/workspace/pkg/node_modules/a.js")
));
assert!(is_skipped_path(
root,
Path::new("/workspace/.venv/lib/site-packages/a.py")
));
assert!(is_skipped_path(
root,
Path::new("/workspace/AppData/Local/cache/a.ts")
));
assert!(!is_skipped_path(root, Path::new("/workspace/src/lib.rs")));
}
}
Loading