diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 152990aa479..e43e3899ae2 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -1170,6 +1170,7 @@ name = "codex-cli" version = "0.0.0" dependencies = [ "anyhow", + "assert_cmd", "clap", "clap_complete", "codex-arg0", @@ -1179,10 +1180,14 @@ dependencies = [ "codex-exec", "codex-login", "codex-mcp-server", + "codex-memory", "codex-protocol", "codex-protocol-ts", "codex-tui", + "indicatif", + "predicates", "serde_json", + "tempfile", "tokio", "tracing", "tracing-subscriber", @@ -1643,6 +1648,7 @@ dependencies = [ "encode_unicode", "libc", "once_cell", + "unicode-width 0.2.1", "windows-sys 0.59.0", ] @@ -2173,7 +2179,7 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "web-time", + "web-time 0.2.4", "wgpu", "winapi", "winit", @@ -2208,7 +2214,7 @@ dependencies = [ "log", "thiserror 1.0.69", "type-map", - "web-time", + "web-time 0.2.4", "wgpu", "winit", ] @@ -2226,7 +2232,7 @@ dependencies = [ "log", "raw-window-handle 0.6.2", "smithay-clipboard", - "web-time", + "web-time 0.2.4", "webbrowser", "winit", ] @@ -3547,6 +3553,19 @@ dependencies = [ "serde", ] +[[package]] +name = "indicatif" +version = "0.17.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" +dependencies = [ + "console", + "number_prefix", + "portable-atomic", + "unicode-width 0.2.1", + "web-time 1.1.0", +] + [[package]] name = "indoc" version = "2.0.6" @@ -4472,6 +4491,12 @@ dependencies = [ "libc", ] +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + [[package]] name = "objc" version = "0.2.7" @@ -7905,6 +7930,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "webbrowser" version = "1.0.5" @@ -8632,7 +8667,7 @@ dependencies = [ "wayland-protocols 0.31.2", "wayland-protocols-plasma", "web-sys", - "web-time", + "web-time 0.2.4", "windows-sys 0.48.0", "x11-dl", "x11rb", diff --git a/codex-rs/cli/Cargo.toml b/codex-rs/cli/Cargo.toml index f7af3349e0a..1279790843d 100644 --- a/codex-rs/cli/Cargo.toml +++ b/codex-rs/cli/Cargo.toml @@ -23,6 +23,7 @@ codex-chatgpt = { path = "../chatgpt" } codex-common = { path = "../common", features = ["cli"] } codex-core = { path = "../core" } codex-exec = { path = "../exec" } +codex-memory = { path = "../memory", features = ["sqlite"] } codex-login = { path = "../login" } codex-mcp-server = { path = "../mcp-server" } codex-protocol = { path = "../protocol" } @@ -38,3 +39,9 @@ tokio = { version = "1", features = [ tracing = "0.1.41" tracing-subscriber = "0.3.19" codex-protocol-ts = { path = "../protocol-ts" } +indicatif = "0.17" + +[dev-dependencies] +assert_cmd = "2" +predicates = "3" +tempfile = "3" diff --git a/codex-rs/cli/src/main.rs b/codex-rs/cli/src/main.rs index 2acc3d84c50..e8a073f9417 100644 --- a/codex-rs/cli/src/main.rs +++ b/codex-rs/cli/src/main.rs @@ -56,6 +56,9 @@ enum Subcommand { /// Remove stored authentication credentials. Logout(LogoutCommand), + /// Memory utilities. + Memory(MemoryCommand), + /// Experimental: run Codex as an MCP server. Mcp, @@ -118,6 +121,43 @@ enum LoginSubcommand { Status, } +#[derive(Debug, Parser)] +struct MemoryCommand { + #[command(subcommand)] + action: MemorySubcommand, +} + +#[derive(Debug, clap::Subcommand)] +enum MemorySubcommand { + /// Migrate a JSONL memory file to SQLite. + Migrate(MemoryMigrateArgs), + + /// Compact a JSONL memory file by removing duplicate ids. + Compact(MemoryCompactArgs), +} + +#[derive(Debug, Parser)] +struct MemoryMigrateArgs { + /// Source JSONL file + #[arg(long, value_name = "JSONL")] + jsonl: std::path::PathBuf, + + /// Destination SQLite database file + #[arg(long, value_name = "SQLITE")] + sqlite: std::path::PathBuf, +} + +#[derive(Debug, Parser)] +struct MemoryCompactArgs { + /// Input JSONL file to compact + #[arg(long, value_name = "INPUT")] + input: std::path::PathBuf, + + /// Output JSONL file (defaults to in-place) + #[arg(long, value_name = "OUTPUT")] + output: Option, +} + #[derive(Debug, Parser)] struct LogoutCommand { #[clap(skip)] @@ -184,6 +224,9 @@ async fn cli_main(codex_linux_sandbox_exe: Option) -> anyhow::Result<() prepend_config_flags(&mut proto_cli.config_overrides, cli.config_overrides); proto::run_main(proto_cli).await?; } + Some(Subcommand::Memory(memory_cli)) => { + run_memory_command(memory_cli).await?; + } Some(Subcommand::Completion(completion_cli)) => { print_completion(completion_cli); } @@ -217,6 +260,40 @@ async fn cli_main(codex_linux_sandbox_exe: Option) -> anyhow::Result<() Ok(()) } +async fn run_memory_command(cmd: MemoryCommand) -> anyhow::Result<()> { + match cmd.action { + MemorySubcommand::Migrate(args) => { + eprintln!( + "Migrating {} -> {}...", + args.jsonl.display(), + args.sqlite.display() + ); + let pb = indicatif::ProgressBar::new_spinner(); + pb.enable_steady_tick(std::time::Duration::from_millis(100)); + let count = codex_memory::migrate::migrate_jsonl_to_sqlite(&args.jsonl, &args.sqlite)?; + pb.finish_and_clear(); + println!("Migrated {count} entries"); + } + MemorySubcommand::Compact(args) => { + let out = args.output.unwrap_or_else(|| args.input.clone()); + eprintln!( + "Compacting {} -> {}...", + args.input.display(), + out.display() + ); + let pb = indicatif::ProgressBar::new_spinner(); + pb.enable_steady_tick(std::time::Duration::from_millis(100)); + let (read, written) = codex_memory::migrate::compact_jsonl(&args.input, &out)?; + pb.finish_and_clear(); + println!( + "Read {read} entries, wrote {written} entries (removed {})", + read - written + ); + } + } + Ok(()) +} + /// Prepend root-level overrides so they have lower precedence than /// CLI-specific ones specified after the subcommand (if any). fn prepend_config_flags( diff --git a/codex-rs/cli/tests/memory.rs b/codex-rs/cli/tests/memory.rs new file mode 100644 index 00000000000..3b152e96c1c --- /dev/null +++ b/codex-rs/cli/tests/memory.rs @@ -0,0 +1,66 @@ +use assert_cmd::Command; +use predicates::str::contains; +use std::fs; +use tempfile::tempdir; + +fn sample_line(id: &str, content: &str) -> String { + format!( + r#"{{"id":"{id}","created_at":"2025-01-01T00:00:00Z","updated_at":"2025-01-01T00:00:00Z","schema_version":1,"source":"test","scope":"Repo","status":"Active","kind":"Note","content":"{content}","tags":[],"relevance_hints":{{"files":[],"crates":[],"languages":[],"commands":[]}},"counters":{{"seen_count":0,"used_count":0,"last_used_at":null}},"expiry":null}}"# + ) +} + +#[test] +fn memory_compact_removes_duplicates() -> Result<(), Box> { + let dir = tempdir()?; + let input = dir.path().join("mem.jsonl"); + let output = dir.path().join("out.jsonl"); + let data = [ + sample_line("1", "one"), + sample_line("2", "two"), + sample_line("1", "one"), + ] + .join("\n"); + fs::write(&input, data + "\n")?; + + Command::cargo_bin("codex")? + .args([ + "memory", + "compact", + "--input", + input.to_str().unwrap(), + "--output", + output.to_str().unwrap(), + ]) + .assert() + .success() + .stdout(contains("Read 3 entries, wrote 2 entries")); + + let out_data = fs::read_to_string(&output)?; + assert_eq!(out_data.lines().count(), 2); + Ok(()) +} + +#[test] +fn memory_migrate_imports_entries() -> Result<(), Box> { + let dir = tempdir()?; + let jsonl = dir.path().join("mem.jsonl"); + let sqlite = dir.path().join("mem.sqlite"); + let data = [sample_line("1", "one"), sample_line("2", "two")].join("\n"); + fs::write(&jsonl, data + "\n")?; + + Command::cargo_bin("codex")? + .args([ + "memory", + "migrate", + "--jsonl", + jsonl.to_str().unwrap(), + "--sqlite", + sqlite.to_str().unwrap(), + ]) + .assert() + .success() + .stdout(contains("Migrated 2 entries")); + + assert!(sqlite.exists()); + Ok(()) +} diff --git a/codex-rs/memory/src/migrate.rs b/codex-rs/memory/src/migrate.rs index 7c715428994..6e3f4d08d12 100644 --- a/codex-rs/memory/src/migrate.rs +++ b/codex-rs/memory/src/migrate.rs @@ -9,8 +9,8 @@ pub fn migrate_jsonl_to_sqlite( jsonl_path: &std::path::Path, sqlite_path: &std::path::Path, ) -> anyhow::Result { - use crate::store::sqlite::SqliteMemoryStore; use crate::store::MemoryStore; + use crate::store::sqlite::SqliteMemoryStore; use std::io::Read as _; let mut data = String::new(); @@ -28,3 +28,61 @@ pub fn migrate_jsonl_to_sqlite( ) -> anyhow::Result { anyhow::bail!("sqlite backend not compiled; enable with `--features codex-memory/sqlite`"); } + +/// Compact a JSONL file by removing duplicate entries based on the `id` field. +/// +/// - `input_path`: source JSONL file +/// - `output_path`: destination JSONL file (may be the same as `input_path`) +/// +/// Returns a tuple of `(read_count, written_count)`. +pub fn compact_jsonl( + input_path: &std::path::Path, + output_path: &std::path::Path, +) -> anyhow::Result<(usize, usize)> { + use crate::types::MemoryItem; + use std::collections::HashSet; + use std::io::BufRead as _; + use std::io::BufReader; + use std::io::BufWriter; + use std::io::Write as _; + + let infile = std::fs::File::open(input_path)?; + let reader = BufReader::new(infile); + + let tmp_path = if output_path == input_path { + let mut p = output_path.to_path_buf(); + p.set_extension("jsonl.tmp"); + p + } else { + output_path.to_path_buf() + }; + if let Some(parent) = tmp_path.parent() { + std::fs::create_dir_all(parent)?; + } + let outfile = std::fs::File::create(&tmp_path)?; + let mut writer = BufWriter::new(outfile); + + let mut seen = HashSet::new(); + let mut read = 0usize; + let mut written = 0usize; + + for line in reader.lines() { + let line = line?; + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + read += 1; + if let Ok(item) = serde_json::from_str::(trimmed) + && seen.insert(item.id) { + writer.write_all(trimmed.as_bytes())?; + writer.write_all(b"\n")?; + written += 1; + } + } + writer.flush()?; + if output_path == input_path { + std::fs::rename(tmp_path, output_path)?; + } + Ok((read, written)) +}