From 3d857afb97b1626cc0ecb94244d95b0c328648c1 Mon Sep 17 00:00:00 2001 From: "cyrus@tinyhumans.ai" Date: Thu, 28 May 2026 07:54:32 +0530 Subject: [PATCH] feat(artifacts): define artifact storage types, config, and persistence layer Add new `artifacts` domain with ArtifactMeta struct, ArtifactKind/ArtifactStatus enums, filesystem-backed persistence under /artifacts//, and three RPC endpoints (ai_list_artifacts, ai_get_artifact, ai_delete_artifact). Includes 38 unit tests covering CRUD operations, pagination, path-sandboxing enforcement, and corrupt metadata handling. Closes #2776 --- .claude/memory.md | 7 + src/core/all.rs | 4 + src/openhuman/artifacts/mod.rs | 10 + src/openhuman/artifacts/ops.rs | 114 ++++++++ src/openhuman/artifacts/ops_tests.rs | 46 ++++ src/openhuman/artifacts/schemas.rs | 362 +++++++++++++++++++++++++ src/openhuman/artifacts/store.rs | 235 ++++++++++++++++ src/openhuman/artifacts/store_tests.rs | 184 +++++++++++++ src/openhuman/artifacts/types.rs | 251 +++++++++++++++++ src/openhuman/mod.rs | 1 + 10 files changed, 1214 insertions(+) create mode 100644 src/openhuman/artifacts/mod.rs create mode 100644 src/openhuman/artifacts/ops.rs create mode 100644 src/openhuman/artifacts/ops_tests.rs create mode 100644 src/openhuman/artifacts/schemas.rs create mode 100644 src/openhuman/artifacts/store.rs create mode 100644 src/openhuman/artifacts/store_tests.rs create mode 100644 src/openhuman/artifacts/types.rs diff --git a/.claude/memory.md b/.claude/memory.md index 6ad9c79084..3731748cb8 100644 --- a/.claude/memory.md +++ b/.claude/memory.md @@ -210,6 +210,13 @@ Quick reference for anyone starting with Claude on this project. Updated by the - **Kill stuck processes** — `lsof -i :7788` then `kill `. Useful when `dev:app` reports a stale listener and you want to force a fresh boot rather than relying on the handle's auto-recovery. - **Skills runtime removed** — the QuickJS / `rquickjs` runtime is gone; `src/openhuman/skills/` is metadata-only ("Legacy skill metadata helpers retained after QuickJS runtime removal"). Skill execution surfaces are being rebuilt; don't assume a `.skill` can run end-to-end without checking the current code. +## Artifacts Domain (Issue #2776) + +- **Filesystem-backed persistence, no SQLite** — `src/openhuman/artifacts/` stores JSON metadata (`meta.json`) + binary blobs under `/artifacts//`. Pattern mirrors `memory/ops/files.rs` but simpler. +- **`"ai"` namespace in controller registry** — RPC methods are `openhuman.ai_list_artifacts`, `openhuman.ai_get_artifact`, `openhuman.ai_delete_artifact`. Future `ai_*` methods should use this same namespace. +- **Two-layer path validation required** — (1) `validate_artifact_id` rejects empty strings, `/`, `\`, `..`, absolute Unix paths, Windows `C:` and UNC `\\` paths; (2) `assert_within_root` canonicalizes and checks containment. Replicate this pattern for any new filesystem-backed domain. +- **`cargo test --lib` required for lib crate tests** — `cargo test -p openhuman -- "artifacts"` lists tests but filters to 0. Must use `cargo test -p openhuman --lib -- "artifacts"` because tests are in the lib crate, not integration test binaries. + ## Rust Testing Patterns - **Memory tree tests filter** — `cargo test -p openhuman -- "memory::tree"` runs the memory tree unit tests (602 tests); full module paths are `openhuman::memory::tree::ingest::tests::*` and `openhuman::memory::tree::canonicalize::email_clean::tests::*`. diff --git a/src/core/all.rs b/src/core/all.rs index c43faa68e7..a621d2e6fc 100644 --- a/src/core/all.rs +++ b/src/core/all.rs @@ -134,6 +134,8 @@ fn build_registered_controllers() -> Vec { controllers.extend(crate::openhuman::security::all_security_registered_controllers()); // Interactive approval workflow (#1339 — gate external-effect tool calls) controllers.extend(crate::openhuman::approval::all_approval_registered_controllers()); + // Agent-generated artifact storage, retrieval, and lifecycle management + controllers.extend(crate::openhuman::artifacts::all_artifacts_registered_controllers()); // Background heartbeat loop controls controllers.extend(crate::openhuman::heartbeat::all_heartbeat_registered_controllers()); // Ad-hoc static directory HTTP hosting for local file sharing / previews @@ -294,6 +296,7 @@ fn build_declared_controller_schemas() -> Vec { schemas.extend(crate::openhuman::encryption::all_encryption_controller_schemas()); schemas.extend(crate::openhuman::security::all_security_controller_schemas()); schemas.extend(crate::openhuman::approval::all_approval_controller_schemas()); + schemas.extend(crate::openhuman::artifacts::all_artifacts_controller_schemas()); schemas.extend(crate::openhuman::heartbeat::all_heartbeat_controller_schemas()); schemas.extend(crate::openhuman::http_host::all_http_host_controller_schemas()); schemas.extend(crate::openhuman::cost::all_cost_controller_schemas()); @@ -389,6 +392,7 @@ pub fn rpc_method_name(schema: &ControllerSchema) -> String { pub fn namespace_description(namespace: &str) -> Option<&'static str> { match namespace { "about_app" => Some("Catalog the app's user-facing capabilities and where to find them."), + "ai" => Some("Agent-generated artifact storage, retrieval, and lifecycle management."), "app_state" => Some("Expose core-owned app shell state for frontend polling."), "auth" => Some("Manage app session and provider credentials."), "agent_experience" => Some("Local procedural experience capture and retrieval for agents."), diff --git a/src/openhuman/artifacts/mod.rs b/src/openhuman/artifacts/mod.rs new file mode 100644 index 0000000000..bb9ab4f88b --- /dev/null +++ b/src/openhuman/artifacts/mod.rs @@ -0,0 +1,10 @@ +pub mod ops; +pub mod schemas; +pub mod store; +pub mod types; + +pub use schemas::{ + all_controller_schemas as all_artifacts_controller_schemas, + all_registered_controllers as all_artifacts_registered_controllers, +}; +pub use types::{ArtifactKind, ArtifactMeta, ArtifactStatus}; diff --git a/src/openhuman/artifacts/ops.rs b/src/openhuman/artifacts/ops.rs new file mode 100644 index 0000000000..bc6685cf6d --- /dev/null +++ b/src/openhuman/artifacts/ops.rs @@ -0,0 +1,114 @@ +use serde_json::{json, Value}; + +use crate::openhuman::config::Config; +use crate::rpc::RpcOutcome; + +use super::store; + +/// Default page size for `ai_list_artifacts`. +const DEFAULT_LIMIT: usize = 50; +/// Maximum page size cap for `ai_list_artifacts`. +const MAX_LIMIT: usize = 200; + +/// List artifacts in the workspace with pagination. +/// +/// Returns `{ "artifacts": [...], "total": N, "offset": M, "limit": L }`. +pub async fn ai_list_artifacts( + config: &Config, + offset: Option, + limit: Option, +) -> Result, String> { + let offset = offset.unwrap_or(0); + let limit = limit.unwrap_or(DEFAULT_LIMIT).min(MAX_LIMIT); + log::debug!( + "[artifacts] ai_list_artifacts: workspace={:?} offset={offset} limit={limit}", + config.workspace_dir + ); + + let (artifacts, total) = store::list_artifacts(&config.workspace_dir, offset, limit).await?; + + log::debug!( + "[artifacts] ai_list_artifacts: returning {} of {total} total", + artifacts.len() + ); + + let value = json!({ + "artifacts": artifacts, + "total": total, + "offset": offset, + "limit": limit, + }); + Ok(RpcOutcome::new(value, vec![])) +} + +/// Retrieve a single artifact by ID. +/// +/// Returns the serialized `ArtifactMeta` plus an `absolute_path` field +/// pointing to the full on-disk location of the artifact files. +pub async fn ai_get_artifact( + config: &Config, + artifact_id: &str, +) -> Result, String> { + log::debug!( + "[artifacts] ai_get_artifact: id={artifact_id} workspace={:?}", + config.workspace_dir + ); + + if artifact_id.is_empty() { + return Err("[artifacts] artifact_id must not be empty".to_string()); + } + + let meta = store::get_artifact(&config.workspace_dir, artifact_id).await?; + + // Compute absolute path for the caller's convenience. + let absolute_path = config + .workspace_dir + .join("artifacts") + .join(&meta.path) + .to_string_lossy() + .into_owned(); + + let mut value = + serde_json::to_value(&meta).map_err(|e| format!("[artifacts] serialization error: {e}"))?; + if let Some(obj) = value.as_object_mut() { + obj.insert( + "absolute_path".to_string(), + Value::String(absolute_path.clone()), + ); + } + + log::debug!( + "[artifacts] ai_get_artifact: found id={artifact_id} absolute_path={absolute_path}" + ); + Ok(RpcOutcome::new(value, vec![])) +} + +/// Delete an artifact and all associated files. +/// +/// Returns `{ "artifact_id": "...", "deleted": true }`. +pub async fn ai_delete_artifact( + config: &Config, + artifact_id: &str, +) -> Result, String> { + log::debug!( + "[artifacts] ai_delete_artifact: id={artifact_id} workspace={:?}", + config.workspace_dir + ); + + if artifact_id.is_empty() { + return Err("[artifacts] artifact_id must not be empty".to_string()); + } + + store::delete_artifact(&config.workspace_dir, artifact_id).await?; + + log::debug!("[artifacts] ai_delete_artifact: deleted id={artifact_id}"); + let value = json!({ + "artifact_id": artifact_id, + "deleted": true, + }); + Ok(RpcOutcome::new(value, vec![])) +} + +#[cfg(test)] +#[path = "ops_tests.rs"] +mod tests; diff --git a/src/openhuman/artifacts/ops_tests.rs b/src/openhuman/artifacts/ops_tests.rs new file mode 100644 index 0000000000..1d1bf07f4c --- /dev/null +++ b/src/openhuman/artifacts/ops_tests.rs @@ -0,0 +1,46 @@ +use tempfile::TempDir; + +use super::*; +use crate::openhuman::config::Config; + +fn test_config(tmp: &TempDir) -> Config { + Config { + workspace_dir: tmp.path().to_path_buf(), + config_path: tmp.path().join("config.toml"), + ..Config::default() + } +} + +// ── ai_list_artifacts ────────────────────────────────────────────────────── + +#[tokio::test] +async fn list_empty() { + let tmp = TempDir::new().unwrap(); + let config = test_config(&tmp); + let outcome = ai_list_artifacts(&config, None, None).await.unwrap(); + let value = outcome.into_cli_compatible_json().unwrap(); + assert_eq!(value["total"], 0); + assert_eq!(value["artifacts"].as_array().unwrap().len(), 0); + assert_eq!(value["offset"], 0); + assert_eq!(value["limit"], DEFAULT_LIMIT as u64); +} + +// ── ai_get_artifact ──────────────────────────────────────────────────────── + +#[tokio::test] +async fn get_missing_id_error() { + let tmp = TempDir::new().unwrap(); + let config = test_config(&tmp); + let err = ai_get_artifact(&config, "").await.unwrap_err(); + assert!(err.contains("must not be empty"), "unexpected error: {err}"); +} + +// ── ai_delete_artifact ───────────────────────────────────────────────────── + +#[tokio::test] +async fn delete_missing_id_error() { + let tmp = TempDir::new().unwrap(); + let config = test_config(&tmp); + let err = ai_delete_artifact(&config, "").await.unwrap_err(); + assert!(err.contains("must not be empty"), "unexpected error: {err}"); +} diff --git a/src/openhuman/artifacts/schemas.rs b/src/openhuman/artifacts/schemas.rs new file mode 100644 index 0000000000..d76f2139ff --- /dev/null +++ b/src/openhuman/artifacts/schemas.rs @@ -0,0 +1,362 @@ +use serde::de::DeserializeOwned; +use serde_json::{Map, Value}; + +use crate::core::all::{ControllerFuture, RegisteredController}; +use crate::core::{ControllerSchema, FieldSchema, TypeSchema}; +use crate::openhuman::config::rpc as config_rpc; +use crate::rpc::RpcOutcome; + +pub fn all_controller_schemas() -> Vec { + vec![ + schemas("list_artifacts"), + schemas("get_artifact"), + schemas("delete_artifact"), + ] +} + +pub fn all_registered_controllers() -> Vec { + vec![ + RegisteredController { + schema: schemas("list_artifacts"), + handler: handle_list_artifacts, + }, + RegisteredController { + schema: schemas("get_artifact"), + handler: handle_get_artifact, + }, + RegisteredController { + schema: schemas("delete_artifact"), + handler: handle_delete_artifact, + }, + ] +} + +pub fn schemas(function: &str) -> ControllerSchema { + match function { + "list_artifacts" => ControllerSchema { + namespace: "ai", + function: "list_artifacts", + description: "List agent-generated artifacts in the workspace with pagination.", + inputs: vec![ + FieldSchema { + name: "offset", + ty: TypeSchema::Option(Box::new(TypeSchema::U64)), + comment: "Zero-based index of the first artifact to return; defaults to 0.", + required: false, + }, + FieldSchema { + name: "limit", + ty: TypeSchema::Option(Box::new(TypeSchema::U64)), + comment: + "Maximum number of artifacts to return; defaults to 50, capped at 200.", + required: false, + }, + ], + outputs: vec![ + FieldSchema { + name: "artifacts", + ty: TypeSchema::Array(Box::new(TypeSchema::Ref("ArtifactMeta"))), + comment: "Artifact metadata records sorted by created_at descending.", + required: true, + }, + FieldSchema { + name: "total", + ty: TypeSchema::U64, + comment: "Total number of artifacts in the workspace before pagination.", + required: true, + }, + FieldSchema { + name: "offset", + ty: TypeSchema::U64, + comment: "Offset used for this page.", + required: true, + }, + FieldSchema { + name: "limit", + ty: TypeSchema::U64, + comment: "Limit used for this page.", + required: true, + }, + ], + }, + "get_artifact" => ControllerSchema { + namespace: "ai", + function: "get_artifact", + description: "Retrieve metadata for a single artifact by ID.", + inputs: vec![artifact_id_input("Identifier of the artifact to retrieve.")], + outputs: vec![FieldSchema { + name: "artifact", + ty: TypeSchema::Ref("ArtifactMeta"), + comment: "Artifact metadata plus absolute_path field.", + required: true, + }], + }, + "delete_artifact" => ControllerSchema { + namespace: "ai", + function: "delete_artifact", + description: "Delete an artifact and all its associated files from the workspace.", + inputs: vec![artifact_id_input("Identifier of the artifact to delete.")], + outputs: vec![FieldSchema { + name: "result", + ty: TypeSchema::Object { + fields: vec![ + FieldSchema { + name: "artifact_id", + ty: TypeSchema::String, + comment: "Identifier that was requested for deletion.", + required: true, + }, + FieldSchema { + name: "deleted", + ty: TypeSchema::Bool, + comment: "True when the artifact was successfully deleted.", + required: true, + }, + ], + }, + comment: "Deletion result payload.", + required: true, + }], + }, + _other => ControllerSchema { + namespace: "ai", + function: "unknown", + description: "Unknown artifacts controller function.", + inputs: vec![FieldSchema { + name: "function", + ty: TypeSchema::String, + comment: "Unknown function requested for schema lookup.", + required: true, + }], + outputs: vec![FieldSchema { + name: "error", + ty: TypeSchema::String, + comment: "Lookup error details.", + required: true, + }], + }, + } +} + +fn artifact_id_input(comment: &'static str) -> FieldSchema { + FieldSchema { + name: "artifact_id", + ty: TypeSchema::String, + comment, + required: true, + } +} + +fn handle_list_artifacts(params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + let offset = read_optional_u64(¶ms, "offset")? + .map(|raw| { + usize::try_from(raw).map_err(|_| "offset is too large for usize".to_string()) + }) + .transpose()?; + let limit = read_optional_u64(¶ms, "limit")? + .map(|raw| usize::try_from(raw).map_err(|_| "limit is too large for usize".to_string())) + .transpose()?; + to_json(crate::openhuman::artifacts::ops::ai_list_artifacts(&config, offset, limit).await?) + }) +} + +fn handle_get_artifact(params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + let artifact_id = read_required::(¶ms, "artifact_id")?; + to_json( + crate::openhuman::artifacts::ops::ai_get_artifact(&config, artifact_id.trim()).await?, + ) + }) +} + +fn handle_delete_artifact(params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + let artifact_id = read_required::(¶ms, "artifact_id")?; + to_json( + crate::openhuman::artifacts::ops::ai_delete_artifact(&config, artifact_id.trim()) + .await?, + ) + }) +} + +fn read_required(params: &Map, key: &str) -> Result { + let value = params + .get(key) + .cloned() + .ok_or_else(|| format!("missing required param '{key}'"))?; + serde_json::from_value(value).map_err(|e| format!("invalid '{key}': {e}")) +} + +fn read_optional_u64(params: &Map, key: &str) -> Result, String> { + match params.get(key) { + None => Ok(None), + Some(Value::Null) => Ok(None), + Some(Value::Number(n)) => n + .as_u64() + .map(Some) + .ok_or_else(|| format!("invalid '{key}': expected unsigned integer")), + Some(other) => Err(format!( + "invalid '{key}': expected unsigned integer, got {}", + type_name(other) + )), + } +} + +fn to_json(outcome: RpcOutcome) -> Result { + outcome.into_cli_compatible_json() +} + +fn type_name(value: &Value) -> &'static str { + match value { + Value::Null => "null", + Value::Bool(_) => "bool", + Value::Number(_) => "number", + Value::String(_) => "string", + Value::Array(_) => "array", + Value::Object(_) => "object", + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + // ── schemas() branch coverage ─────────────────────────────────── + + #[test] + fn schemas_list_artifacts_has_pagination_inputs_and_correct_outputs() { + let s = schemas("list_artifacts"); + assert_eq!(s.namespace, "ai"); + assert_eq!(s.function, "list_artifacts"); + let input_names: Vec<_> = s.inputs.iter().map(|f| f.name).collect(); + assert!(input_names.contains(&"offset")); + assert!(input_names.contains(&"limit")); + assert!(s.inputs.iter().all(|f| !f.required)); + let output_names: Vec<_> = s.outputs.iter().map(|f| f.name).collect(); + assert!(output_names.contains(&"artifacts")); + assert!(output_names.contains(&"total")); + assert!(output_names.contains(&"offset")); + assert!(output_names.contains(&"limit")); + } + + #[test] + fn schemas_get_artifact_requires_artifact_id() { + let s = schemas("get_artifact"); + assert_eq!(s.namespace, "ai"); + assert_eq!(s.function, "get_artifact"); + assert_eq!(s.inputs.len(), 1); + assert_eq!(s.inputs[0].name, "artifact_id"); + assert!(s.inputs[0].required); + } + + #[test] + fn schemas_delete_artifact_has_artifact_id_input_and_result_output() { + let s = schemas("delete_artifact"); + assert_eq!(s.inputs.len(), 1); + assert_eq!(s.inputs[0].name, "artifact_id"); + assert!(s.inputs[0].required); + assert_eq!(s.outputs[0].name, "result"); + if let TypeSchema::Object { fields } = &s.outputs[0].ty { + let names: Vec<_> = fields.iter().map(|f| f.name).collect(); + assert!(names.contains(&"artifact_id")); + assert!(names.contains(&"deleted")); + } else { + panic!("expected object output type"); + } + } + + #[test] + fn schemas_unknown_function_returns_placeholder_with_error_output() { + let s = schemas("does-not-exist"); + assert_eq!(s.function, "unknown"); + assert_eq!(s.outputs[0].name, "error"); + } + + // ── registry helpers ──────────────────────────────────────────── + + #[test] + fn all_controller_schemas_covers_every_supported_function() { + let names: Vec<_> = all_controller_schemas() + .into_iter() + .map(|s| s.function) + .collect(); + assert_eq!( + names, + vec!["list_artifacts", "get_artifact", "delete_artifact"] + ); + } + + #[test] + fn all_registered_controllers_has_handler_per_schema() { + let controllers = all_registered_controllers(); + assert_eq!(controllers.len(), 3); + let names: Vec<_> = controllers.iter().map(|c| c.schema.function).collect(); + assert_eq!( + names, + vec!["list_artifacts", "get_artifact", "delete_artifact"] + ); + } + + // ── read_required ─────────────────────────────────────────────── + + #[test] + fn read_required_returns_value_for_present_key() { + let mut params = Map::new(); + params.insert("artifact_id".into(), json!("abc")); + let got: String = read_required(¶ms, "artifact_id").unwrap(); + assert_eq!(got, "abc"); + } + + #[test] + fn read_required_errors_when_key_missing() { + let params = Map::new(); + let err = read_required::(¶ms, "artifact_id").unwrap_err(); + assert!(err.contains("missing required param 'artifact_id'")); + } + + // ── read_optional_u64 ─────────────────────────────────────────── + + #[test] + fn read_optional_u64_absent_key_is_none() { + assert_eq!(read_optional_u64(&Map::new(), "limit").unwrap(), None); + } + + #[test] + fn read_optional_u64_explicit_null_is_none() { + let mut params = Map::new(); + params.insert("limit".into(), Value::Null); + assert_eq!(read_optional_u64(¶ms, "limit").unwrap(), None); + } + + #[test] + fn read_optional_u64_accepts_unsigned_integer() { + let mut params = Map::new(); + params.insert("limit".into(), json!(50)); + assert_eq!(read_optional_u64(¶ms, "limit").unwrap(), Some(50)); + } + + #[test] + fn read_optional_u64_rejects_negative_number() { + let mut params = Map::new(); + params.insert("limit".into(), json!(-1)); + let err = read_optional_u64(¶ms, "limit").unwrap_err(); + assert!(err.contains("expected unsigned integer")); + } + + // ── type_name ─────────────────────────────────────────────────── + + #[test] + fn type_name_reports_each_json_variant() { + assert_eq!(type_name(&Value::Null), "null"); + assert_eq!(type_name(&json!(true)), "bool"); + assert_eq!(type_name(&json!(1)), "number"); + assert_eq!(type_name(&json!("s")), "string"); + assert_eq!(type_name(&json!([])), "array"); + assert_eq!(type_name(&json!({})), "object"); + } +} diff --git a/src/openhuman/artifacts/store.rs b/src/openhuman/artifacts/store.rs new file mode 100644 index 0000000000..b816b9a5d8 --- /dev/null +++ b/src/openhuman/artifacts/store.rs @@ -0,0 +1,235 @@ +use std::path::{Path, PathBuf}; + +use super::types::{ArtifactMeta, ArtifactStatus}; + +const ARTIFACTS_SUBDIR: &str = "artifacts"; +const META_FILENAME: &str = "meta.json"; + +/// Returns the artifacts root directory, creating it if it doesn't exist. +/// +/// The root lives at `/artifacts/`. +pub(crate) async fn artifacts_root(workspace_dir: &Path) -> Result { + let root = workspace_dir.join(ARTIFACTS_SUBDIR); + log::debug!("[artifacts] artifacts_root: {:?}", root); + tokio::fs::create_dir_all(&root).await.map_err(|e| { + format!( + "[artifacts] failed to create artifacts root {:?}: {e}", + root + ) + })?; + Ok(root) +} + +/// Validate that an artifact ID is safe to use as a filesystem path component. +/// +/// Rejects empty strings, absolute paths, and path traversal patterns. +fn validate_artifact_id(id: &str) -> Result<(), String> { + if id.is_empty() { + return Err("[artifacts] artifact_id must not be empty".to_string()); + } + if id.contains('/') { + return Err(format!( + "[artifacts] artifact_id must not contain '/': {id:?}" + )); + } + if id.contains('\\') { + return Err(format!( + "[artifacts] artifact_id must not contain '\\': {id:?}" + )); + } + if id == ".." || id.starts_with("../") || id.starts_with("..\\") { + return Err(format!( + "[artifacts] artifact_id must not be a path traversal: {id:?}" + )); + } + // Reject absolute paths (Unix /foo or Windows C:\foo / \\server\share) + if id.starts_with('/') || id.starts_with('\\') { + return Err(format!( + "[artifacts] artifact_id must not be an absolute path: {id:?}" + )); + } + // Reject Windows drive-letter paths like C: + if id.len() >= 2 && id.as_bytes()[1] == b':' { + return Err(format!( + "[artifacts] artifact_id must not be an absolute path: {id:?}" + )); + } + Ok(()) +} + +/// Confirm that `resolved` is under `root`, preventing path traversal escapes. +fn assert_within_root(root: &Path, resolved: &Path) -> Result<(), String> { + if !resolved.starts_with(root) { + return Err(format!( + "[artifacts] path {:?} escapes artifacts root {:?}", + resolved, root + )); + } + Ok(()) +} + +/// Persist artifact metadata to `/artifacts//meta.json`. +pub(crate) async fn save_artifact_meta( + workspace_dir: &Path, + meta: &ArtifactMeta, +) -> Result<(), String> { + log::debug!("[artifacts] save_artifact_meta: id={}", meta.id); + validate_artifact_id(&meta.id)?; + let root = artifacts_root(workspace_dir).await?; + let artifact_dir = root.join(&meta.id); + // Verify sandboxing before writing + assert_within_root(&root, &artifact_dir)?; + tokio::fs::create_dir_all(&artifact_dir) + .await + .map_err(|e| { + format!( + "[artifacts] failed to create artifact dir {:?}: {e}", + artifact_dir + ) + })?; + let meta_path = artifact_dir.join(META_FILENAME); + let json = serde_json::to_string_pretty(meta).map_err(|e| { + format!( + "[artifacts] failed to serialize meta for id={}: {e}", + meta.id + ) + })?; + tokio::fs::write(&meta_path, json).await.map_err(|e| { + format!( + "[artifacts] failed to write meta.json for id={}: {e}", + meta.id + ) + })?; + log::debug!("[artifacts] saved meta.json for id={}", meta.id); + Ok(()) +} + +/// List artifacts in the workspace, sorted by `created_at` descending. +/// +/// Corrupt or unreadable `meta.json` files are skipped with a `warn!` log. +/// Returns `(page, total)` where `page` is the requested slice and `total` is +/// the count before pagination. +pub(crate) async fn list_artifacts( + workspace_dir: &Path, + offset: usize, + limit: usize, +) -> Result<(Vec, usize), String> { + log::debug!( + "[artifacts] list_artifacts: offset={offset} limit={limit} workspace={:?}", + workspace_dir + ); + let root = artifacts_root(workspace_dir).await?; + + let mut read_dir = match tokio::fs::read_dir(&root).await { + Ok(rd) => rd, + Err(e) => { + return Err(format!( + "[artifacts] failed to read artifacts dir {:?}: {e}", + root + )) + } + }; + + let mut all: Vec = Vec::new(); + + loop { + let entry = match read_dir.next_entry().await { + Ok(Some(e)) => e, + Ok(None) => break, + Err(e) => { + log::warn!("[artifacts] error reading directory entry: {e}"); + continue; + } + }; + + let entry_path = entry.path(); + // Only process directories + match entry.file_type().await { + Ok(ft) if ft.is_dir() => {} + Ok(_) => continue, + Err(e) => { + log::warn!( + "[artifacts] failed to get file type for {:?}: {e}", + entry_path + ); + continue; + } + } + + let meta_path = entry_path.join(META_FILENAME); + let contents = match tokio::fs::read_to_string(&meta_path).await { + Ok(c) => c, + Err(e) => { + log::warn!( + "[artifacts] skipping {:?}: failed to read meta.json: {e}", + entry_path + ); + continue; + } + }; + + match serde_json::from_str::(&contents) { + Ok(meta) => all.push(meta), + Err(e) => { + log::warn!( + "[artifacts] skipping {:?}: corrupt meta.json: {e}", + entry_path + ); + } + } + } + + // Sort descending by created_at (newest first) + all.sort_by(|a, b| b.created_at.cmp(&a.created_at)); + + let total = all.len(); + let page = all.into_iter().skip(offset).take(limit).collect::>(); + + log::debug!( + "[artifacts] list_artifacts: total={total} returning {} items", + page.len() + ); + Ok((page, total)) +} + +/// Retrieve a single artifact by ID. +pub(crate) async fn get_artifact( + workspace_dir: &Path, + artifact_id: &str, +) -> Result { + log::debug!("[artifacts] get_artifact: id={artifact_id}"); + validate_artifact_id(artifact_id)?; + let root = artifacts_root(workspace_dir).await?; + let artifact_dir = root.join(artifact_id); + assert_within_root(&root, &artifact_dir)?; + let meta_path = artifact_dir.join(META_FILENAME); + let contents = tokio::fs::read_to_string(&meta_path).await.map_err(|e| { + format!("[artifacts] artifact not found or unreadable id={artifact_id}: {e}") + })?; + let meta: ArtifactMeta = serde_json::from_str(&contents) + .map_err(|e| format!("[artifacts] corrupt meta.json for id={artifact_id}: {e}"))?; + log::debug!("[artifacts] get_artifact: found id={artifact_id}"); + Ok(meta) +} + +/// Delete an artifact directory and all its contents. +pub(crate) async fn delete_artifact(workspace_dir: &Path, artifact_id: &str) -> Result<(), String> { + log::debug!("[artifacts] delete_artifact: id={artifact_id}"); + validate_artifact_id(artifact_id)?; + let root = artifacts_root(workspace_dir).await?; + let artifact_dir = root.join(artifact_id); + assert_within_root(&root, &artifact_dir)?; + tokio::fs::remove_dir_all(&artifact_dir) + .await + .map_err(|e| format!("[artifacts] failed to delete artifact id={artifact_id}: {e}"))?; + log::debug!("[artifacts] delete_artifact: deleted id={artifact_id}"); + Ok(()) +} + +// Mark a status as unused — referenced only in tests via the store +#[allow(dead_code)] +fn _assert_status_used(_: ArtifactStatus) {} + +#[cfg(test)] +#[path = "store_tests.rs"] +mod tests; diff --git a/src/openhuman/artifacts/store_tests.rs b/src/openhuman/artifacts/store_tests.rs new file mode 100644 index 0000000000..8b7882e690 --- /dev/null +++ b/src/openhuman/artifacts/store_tests.rs @@ -0,0 +1,184 @@ +use chrono::{TimeZone, Utc}; +use tempfile::TempDir; + +use super::*; +use crate::openhuman::artifacts::types::{ArtifactKind, ArtifactMeta, ArtifactStatus}; + +fn make_meta(id: &str, title: &str, created_at: chrono::DateTime) -> ArtifactMeta { + ArtifactMeta { + id: id.to_string(), + kind: ArtifactKind::Document, + title: title.to_string(), + path: format!("{id}/file.txt"), + size_bytes: 100, + status: ArtifactStatus::Ready, + created_at, + } +} + +#[tokio::test] +async fn save_and_get_roundtrip() { + let tmp = TempDir::new().unwrap(); + let meta = make_meta( + "test-id-1", + "My Document", + Utc.with_ymd_and_hms(2025, 6, 1, 12, 0, 0).unwrap(), + ); + save_artifact_meta(tmp.path(), &meta).await.unwrap(); + let got = get_artifact(tmp.path(), "test-id-1").await.unwrap(); + assert_eq!(got.id, meta.id); + assert_eq!(got.title, meta.title); + assert_eq!(got.kind, meta.kind); + assert_eq!(got.status, meta.status); + assert_eq!(got.size_bytes, meta.size_bytes); + assert_eq!(got.created_at, meta.created_at); +} + +#[tokio::test] +async fn list_returns_saved_items_sorted_by_created_at() { + let tmp = TempDir::new().unwrap(); + + let t1 = Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap(); + let t2 = Utc.with_ymd_and_hms(2025, 6, 1, 0, 0, 0).unwrap(); + let t3 = Utc.with_ymd_and_hms(2025, 12, 1, 0, 0, 0).unwrap(); + + save_artifact_meta(tmp.path(), &make_meta("a", "A", t1)) + .await + .unwrap(); + save_artifact_meta(tmp.path(), &make_meta("b", "B", t3)) + .await + .unwrap(); + save_artifact_meta(tmp.path(), &make_meta("c", "C", t2)) + .await + .unwrap(); + + let (items, total) = list_artifacts(tmp.path(), 0, 100).await.unwrap(); + assert_eq!(total, 3); + assert_eq!(items.len(), 3); + // Newest first + assert_eq!(items[0].id, "b"); + assert_eq!(items[1].id, "c"); + assert_eq!(items[2].id, "a"); +} + +#[tokio::test] +async fn list_empty_workspace() { + let tmp = TempDir::new().unwrap(); + let (items, total) = list_artifacts(tmp.path(), 0, 50).await.unwrap(); + assert_eq!(total, 0); + assert!(items.is_empty()); +} + +#[tokio::test] +async fn list_pagination() { + let tmp = TempDir::new().unwrap(); + + for i in 0..5_u32 { + let ts = Utc + .with_ymd_and_hms(2025, 1, i as u32 + 1, 0, 0, 0) + .unwrap(); + save_artifact_meta(tmp.path(), &make_meta(&format!("id-{i}"), "x", ts)) + .await + .unwrap(); + } + + let (items, total) = list_artifacts(tmp.path(), 1, 2).await.unwrap(); + assert_eq!(total, 5); + assert_eq!(items.len(), 2); +} + +#[tokio::test] +async fn delete_removes_directory_and_meta() { + let tmp = TempDir::new().unwrap(); + let meta = make_meta( + "del-id", + "Delete Me", + Utc.with_ymd_and_hms(2025, 3, 1, 0, 0, 0).unwrap(), + ); + save_artifact_meta(tmp.path(), &meta).await.unwrap(); + + // Confirm it exists + get_artifact(tmp.path(), "del-id").await.unwrap(); + + delete_artifact(tmp.path(), "del-id").await.unwrap(); + + // Should now be gone + let err = get_artifact(tmp.path(), "del-id").await.unwrap_err(); + assert!( + err.contains("not found") || err.contains("No such file"), + "unexpected error: {err}" + ); +} + +#[tokio::test] +async fn delete_nonexistent_returns_error() { + let tmp = TempDir::new().unwrap(); + let err = delete_artifact(tmp.path(), "nonexistent-id") + .await + .unwrap_err(); + assert!( + err.contains("failed to delete") || err.contains("No such file"), + "unexpected error: {err}" + ); +} + +#[tokio::test] +async fn get_rejects_path_traversal() { + let tmp = TempDir::new().unwrap(); + for bad_id in ["../secrets", "foo/../bar"] { + let err = get_artifact(tmp.path(), bad_id).await.unwrap_err(); + assert!( + err.contains("must not contain") + || err.contains("traversal") + || err.contains("escapes"), + "id={bad_id:?} error was: {err}" + ); + } +} + +#[tokio::test] +async fn get_rejects_absolute_paths() { + let tmp = TempDir::new().unwrap(); + let err = get_artifact(tmp.path(), "/tmp/evil").await.unwrap_err(); + assert!( + err.contains("must not contain") || err.contains("absolute") || err.contains("escapes"), + "unexpected error: {err}" + ); +} + +#[tokio::test] +async fn list_skips_corrupt_meta() { + let tmp = TempDir::new().unwrap(); + + // Write a valid artifact + let ts = Utc.with_ymd_and_hms(2025, 5, 1, 0, 0, 0).unwrap(); + save_artifact_meta(tmp.path(), &make_meta("good-id", "Good", ts)) + .await + .unwrap(); + + // Create a subdirectory with invalid JSON as meta.json + let corrupt_dir = tmp.path().join("artifacts").join("corrupt-id"); + std::fs::create_dir_all(&corrupt_dir).unwrap(); + std::fs::write(corrupt_dir.join("meta.json"), b"this is not json").unwrap(); + + let (items, total) = list_artifacts(tmp.path(), 0, 100).await.unwrap(); + // Only the valid one should be returned + assert_eq!(total, 1); + assert_eq!(items[0].id, "good-id"); +} + +#[tokio::test] +async fn validate_artifact_id_rejects_slashes() { + let tmp = TempDir::new().unwrap(); + let err = get_artifact(tmp.path(), "a/b").await.unwrap_err(); + assert!( + err.contains("must not contain '/'"), + "unexpected error: {err}" + ); + + let err = get_artifact(tmp.path(), "a\\b").await.unwrap_err(); + assert!( + err.contains("must not contain '\\'"), + "unexpected error: {err}" + ); +} diff --git a/src/openhuman/artifacts/types.rs b/src/openhuman/artifacts/types.rs new file mode 100644 index 0000000000..33d2c69726 --- /dev/null +++ b/src/openhuman/artifacts/types.rs @@ -0,0 +1,251 @@ +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; + +/// The category of an artifact produced by the agent. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub enum ArtifactKind { + Presentation, + Document, + Image, + Other, +} + +impl Default for ArtifactKind { + fn default() -> Self { + Self::Other + } +} + +impl ArtifactKind { + pub fn as_str(&self) -> &'static str { + match self { + Self::Presentation => "presentation", + Self::Document => "document", + Self::Image => "image", + Self::Other => "other", + } + } + + /// Parse a raw string into an `ArtifactKind`. Case-insensitive; unknown + /// values fall back to `Other`. + pub fn parse(raw: &str) -> Self { + match raw.to_ascii_lowercase().as_str() { + "presentation" => Self::Presentation, + "document" => Self::Document, + "image" => Self::Image, + _ => Self::Other, + } + } +} + +/// Lifecycle status of an artifact. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub enum ArtifactStatus { + Pending, + Ready, + Failed, +} + +impl Default for ArtifactStatus { + fn default() -> Self { + Self::Pending + } +} + +impl ArtifactStatus { + pub fn as_str(&self) -> &'static str { + match self { + Self::Pending => "pending", + Self::Ready => "ready", + Self::Failed => "failed", + } + } + + /// Parse a raw string into an `ArtifactStatus`. Case-insensitive; unknown + /// values fall back to `Pending`. + pub fn parse(raw: &str) -> Self { + match raw.to_ascii_lowercase().as_str() { + "ready" => Self::Ready, + "failed" => Self::Failed, + _ => Self::Pending, + } + } +} + +/// Metadata record for a single agent-generated artifact. +/// +/// Persisted as `/artifacts//meta.json`. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ArtifactMeta { + /// Unique artifact identifier (UUID string). + pub id: String, + /// Category of the artifact. + pub kind: ArtifactKind, + /// Human-readable title. + pub title: String, + /// Relative path from the artifacts root, e.g. `"/deck.pptx"`. + pub path: String, + /// Artifact file size in bytes. + pub size_bytes: u64, + /// Current lifecycle status. + pub status: ArtifactStatus, + /// UTC timestamp when this artifact was created. + pub created_at: DateTime, +} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::TimeZone; + use serde_json::json; + + // ── ArtifactKind ─────────────────────────────────────────────────────────── + + #[test] + fn artifact_kind_default_is_other() { + assert_eq!(ArtifactKind::default(), ArtifactKind::Other); + } + + #[test] + fn artifact_kind_as_str_roundtrip() { + assert_eq!(ArtifactKind::Presentation.as_str(), "presentation"); + assert_eq!(ArtifactKind::Document.as_str(), "document"); + assert_eq!(ArtifactKind::Image.as_str(), "image"); + assert_eq!(ArtifactKind::Other.as_str(), "other"); + } + + #[test] + fn artifact_kind_parse_case_insensitive() { + assert_eq!( + ArtifactKind::parse("presentation"), + ArtifactKind::Presentation + ); + assert_eq!( + ArtifactKind::parse("PRESENTATION"), + ArtifactKind::Presentation + ); + assert_eq!(ArtifactKind::parse("Document"), ArtifactKind::Document); + assert_eq!(ArtifactKind::parse("IMAGE"), ArtifactKind::Image); + assert_eq!(ArtifactKind::parse("other"), ArtifactKind::Other); + assert_eq!(ArtifactKind::parse("unknown"), ArtifactKind::Other); + assert_eq!(ArtifactKind::parse(""), ArtifactKind::Other); + } + + #[test] + fn artifact_kind_serde_roundtrip() { + for kind in [ + ArtifactKind::Presentation, + ArtifactKind::Document, + ArtifactKind::Image, + ArtifactKind::Other, + ] { + let json = serde_json::to_value(&kind).unwrap(); + let back: ArtifactKind = serde_json::from_value(json).unwrap(); + assert_eq!(back, kind); + } + } + + #[test] + fn artifact_kind_serializes_lowercase() { + assert_eq!( + serde_json::to_string(&ArtifactKind::Presentation).unwrap(), + "\"presentation\"" + ); + assert_eq!( + serde_json::to_string(&ArtifactKind::Document).unwrap(), + "\"document\"" + ); + } + + // ── ArtifactStatus ───────────────────────────────────────────────────────── + + #[test] + fn artifact_status_default_is_pending() { + assert_eq!(ArtifactStatus::default(), ArtifactStatus::Pending); + } + + #[test] + fn artifact_status_as_str_roundtrip() { + assert_eq!(ArtifactStatus::Pending.as_str(), "pending"); + assert_eq!(ArtifactStatus::Ready.as_str(), "ready"); + assert_eq!(ArtifactStatus::Failed.as_str(), "failed"); + } + + #[test] + fn artifact_status_parse_case_insensitive() { + assert_eq!(ArtifactStatus::parse("pending"), ArtifactStatus::Pending); + assert_eq!(ArtifactStatus::parse("READY"), ArtifactStatus::Ready); + assert_eq!(ArtifactStatus::parse("Failed"), ArtifactStatus::Failed); + assert_eq!(ArtifactStatus::parse("unknown"), ArtifactStatus::Pending); + assert_eq!(ArtifactStatus::parse(""), ArtifactStatus::Pending); + } + + #[test] + fn artifact_status_serde_roundtrip() { + for status in [ + ArtifactStatus::Pending, + ArtifactStatus::Ready, + ArtifactStatus::Failed, + ] { + let json = serde_json::to_value(&status).unwrap(); + let back: ArtifactStatus = serde_json::from_value(json).unwrap(); + assert_eq!(back, status); + } + } + + // ── ArtifactMeta ─────────────────────────────────────────────────────────── + + #[test] + fn artifact_meta_serde_roundtrip() { + let meta = ArtifactMeta { + id: "abc-123".to_string(), + kind: ArtifactKind::Presentation, + title: "Q3 Deck".to_string(), + path: "abc-123/deck.pptx".to_string(), + size_bytes: 204800, + status: ArtifactStatus::Ready, + created_at: Utc.with_ymd_and_hms(2025, 6, 1, 12, 0, 0).unwrap(), + }; + let json = serde_json::to_value(&meta).unwrap(); + assert_eq!(json["id"], "abc-123"); + assert_eq!(json["kind"], "presentation"); + assert_eq!(json["status"], "ready"); + let back: ArtifactMeta = serde_json::from_value(json).unwrap(); + assert_eq!(back.id, meta.id); + assert_eq!(back.kind, meta.kind); + assert_eq!(back.status, meta.status); + assert_eq!(back.size_bytes, meta.size_bytes); + } + + #[test] + fn artifact_meta_json_shape() { + let meta = ArtifactMeta { + id: "x".to_string(), + kind: ArtifactKind::Other, + title: "test".to_string(), + path: "x/file.txt".to_string(), + size_bytes: 0, + status: ArtifactStatus::Pending, + created_at: Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap(), + }; + let v = serde_json::to_value(&meta).unwrap(); + // Verify all expected fields are present + assert!(v.get("id").is_some()); + assert!(v.get("kind").is_some()); + assert!(v.get("title").is_some()); + assert!(v.get("path").is_some()); + assert!(v.get("size_bytes").is_some()); + assert!(v.get("status").is_some()); + assert!(v.get("created_at").is_some()); + } + + #[test] + fn artifact_meta_missing_field_deserializes_error() { + // Ensure missing required fields cause a deserialization error + let incomplete = json!({ "id": "x", "kind": "other" }); + let result: Result = serde_json::from_value(incomplete); + assert!(result.is_err()); + } +} diff --git a/src/openhuman/mod.rs b/src/openhuman/mod.rs index 8a4f8d658e..3a54b0fc1d 100644 --- a/src/openhuman/mod.rs +++ b/src/openhuman/mod.rs @@ -21,6 +21,7 @@ pub mod agent_experience; pub mod agent_tool_policy; pub mod app_state; pub mod approval; +pub mod artifacts; pub mod audio_toolkit; pub mod autocomplete; pub mod billing;