diff --git a/crates/amalthea/src/comm/data_explorer_comm.rs b/crates/amalthea/src/comm/data_explorer_comm.rs index 56dd29927..f15666728 100644 --- a/crates/amalthea/src/comm/data_explorer_comm.rs +++ b/crates/amalthea/src/comm/data_explorer_comm.rs @@ -1,7 +1,7 @@ // @generated /*--------------------------------------------------------------------------------------------- - * Copyright (C) 2024-2025 Posit Software, PBC. All rights reserved. + * Copyright (C) 2024-2026 Posit Software, PBC. All rights reserved. *--------------------------------------------------------------------------------------------*/ // @@ -60,6 +60,13 @@ pub struct FilterResult { pub had_errors: Option } +/// Result of setting import options +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] +pub struct SetDatasetImportOptionsResult { + /// An error message if setting the options failed + pub error_message: Option +} + /// The current backend state for the data explorer #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] pub struct BackendState { @@ -703,6 +710,15 @@ pub struct ColumnSelection { pub spec: ArraySelection } +/// Import options for file-based data sources. Currently supports options +/// for delimited text files (CSV, TSV). +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] +pub struct DatasetImportOptions { + /// Whether the first row contains column headers (for delimited text + /// files) + pub has_header_row: Option +} + /// Possible values for SortOrder in SearchSchema #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, strum_macros::Display, strum_macros::EnumString)] pub enum SearchSchemaSortOrder { @@ -1193,6 +1209,13 @@ pub struct GetColumnProfilesParams { pub format_options: FormatOptions, } +/// Parameters for the SetDatasetImportOptions method. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] +pub struct SetDatasetImportOptionsParams { + /// Import options to apply + pub options: DatasetImportOptions, +} + /// Parameters for the ReturnColumnProfiles method. #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] pub struct ReturnColumnProfilesParams { @@ -1289,6 +1312,23 @@ pub enum DataExplorerBackendRequest { #[serde(rename = "get_column_profiles")] GetColumnProfiles(GetColumnProfilesParams), + /// Set import options for file-based data sources + /// + /// Set import options for file-based data sources (like CSV files) and + /// reimport the data. This method is primarily used by file-based + /// backends like DuckDB. + #[serde(rename = "set_dataset_import_options")] + SetDatasetImportOptions(SetDatasetImportOptionsParams), + + /// Open a full data explorer for the same data + /// + /// Creates a new, independent data explorer comm for the same underlying + /// data. The new comm has its own state (filters, sorts). Used when + /// promoting an inline notebook data explorer to a full data explorer + /// panel. + #[serde(rename = "open_data_explorer")] + OpenDataExplorer, + /// Get the state /// /// Request the current backend state (table metadata, explorer state, and @@ -1337,6 +1377,12 @@ pub enum DataExplorerBackendReply { /// Reply for the get_column_profiles method (no result) GetColumnProfilesReply(), + /// Result of setting import options + SetDatasetImportOptionsReply(SetDatasetImportOptionsResult), + + /// Reply for the open_data_explorer method (no result) + OpenDataExplorerReply(), + /// The current backend state for the data explorer GetStateReply(BackendState), diff --git a/crates/ark/src/console.rs b/crates/ark/src/console.rs index 85064352f..421671e2d 100644 --- a/crates/ark/src/console.rs +++ b/crates/ark/src/console.rs @@ -82,7 +82,6 @@ use harp::utils::r_poke_option; use harp::utils::r_typeof; use harp::CONSOLE_THREAD_ID; use libr::R_BaseNamespace; -use libr::R_GlobalEnv; use libr::R_ProcessEvents; use libr::R_RunPendingFinalizers; use libr::Rf_ScalarInteger; diff --git a/crates/ark/src/console/console_integration.rs b/crates/ark/src/console/console_integration.rs index 22779f400..fee036d9f 100644 --- a/crates/ark/src/console/console_integration.rs +++ b/crates/ark/src/console/console_integration.rs @@ -8,6 +8,11 @@ //! Help, LSP, UI comm, and frontend method integration for the R console. use super::*; +use crate::data_explorer::r_data_explorer::DataExplorerMode; +use crate::data_explorer::r_data_explorer::InlineDataExplorerData; +use crate::data_explorer::r_data_explorer::InlineDataExplorerShape; +use crate::data_explorer::r_data_explorer::RDataExplorer; +use crate::data_explorer::r_data_explorer::DATA_EXPLORER_COMM_NAME; /// UI comm integration. impl Console { @@ -164,6 +169,55 @@ impl Console { } } +/// Inline data explorer integration. +impl Console { + /// Open an inline data explorer for a data frame value and return the MIME + /// type payload to include in the execute result. + pub(super) fn open_inline_data_explorer( + &mut self, + value: SEXP, + ) -> anyhow::Result { + let data = RObject::new(value); + + // `source` is the R class family (e.g. "tbl_df", "data.table", + // "data.frame"), following the Python kernel convention where `source` + // is the library name ("pandas", "polars"). + let source = data + .class() + .ok() + .flatten() + .and_then(|classes| classes.into_iter().next()) + .unwrap_or_else(|| String::from("data.frame")); + + // `title` is the variable name when available, falling back to + // `source`. For inline explorers we don't have a variable binding, so + // we always use `source` as the title. + let title = source.clone(); + + let explorer = RDataExplorer::new(title.clone(), data, None, DataExplorerMode::Inline)?; + let shape = &explorer.shape(); + let inline_data = InlineDataExplorerData { + version: 1, + comm_id: String::new(), // placeholder, filled after comm_open + shape: InlineDataExplorerShape { + rows: shape.num_rows, + columns: shape.columns.len(), + }, + title, + source, + }; + + let comm_id = self.comm_open_backend(DATA_EXPLORER_COMM_NAME, Box::new(explorer))?; + + let inline_data = InlineDataExplorerData { + comm_id, + ..inline_data + }; + + Ok(serde_json::to_value(inline_data)?) + } +} + /// Reference to the UI comm. Returned by `Console::ui_comm()`. /// /// Existence of this value guarantees the comm is connected. diff --git a/crates/ark/src/console/console_repl.rs b/crates/ark/src/console/console_repl.rs index bc1dd20da..62c8241c4 100644 --- a/crates/ark/src/console/console_repl.rs +++ b/crates/ark/src/console/console_repl.rs @@ -11,6 +11,7 @@ //! ReadConsole, WriteConsole, and R frontend callbacks. use super::*; +use crate::data_explorer::r_data_explorer::POSITRON_DATA_EXPLORER_MIME; use crate::r_task::QueuedRTask; use crate::r_task::RTask; @@ -1131,18 +1132,46 @@ impl Console { data.insert("text/plain".to_string(), json!(autoprint)); } - // Include HTML representation of data.frame - unsafe { - let value = Rf_findVarInFrame(R_GlobalEnv, r_symbol!(".Last.value")); - if r_is_data_frame(value) { - match to_html(value) { - Ok(html) => { - data.insert("text/html".to_string(), json!(html)); + // Include HTML representation of data.frame and optionally open an + // inline data explorer in Positron notebook mode. Only do this when + // there is visible output (autoprint produced text/plain). + let Ok(value) = harp::environment::last_value() else { + return data; + }; + + // If there is no data, return early + if data.is_empty() { + return data; + } + + // If this is a data frame, add HTML representation and open inline explorer + // (only in Positron notebook mode) + if r_is_data_frame(value.sexp) { + let value = value.sexp; + match to_html(value) { + Ok(html) => { + data.insert("text/html".to_string(), json!(html)); + }, + Err(err) => { + log::error!("{err:?}"); + }, + }; + + // The inline data explorer is a Positron-specific feature that + // requires comm support. Other Jupyter frontends don't understand + // this MIME type, so we gate on the POSITRON env var to avoid + // sending it to vanilla Jupyter notebooks. + if self.session_mode == SessionMode::Notebook && + std::env::var("POSITRON").as_deref() == Ok("1") + { + match self.open_inline_data_explorer(value) { + Ok(mime_data) => { + data.insert(POSITRON_DATA_EXPLORER_MIME.to_string(), mime_data); }, Err(err) => { - log::error!("{:?}", err); + log::error!("Failed to open inline data explorer: {err:?}"); }, - }; + } } } diff --git a/crates/ark/src/data_explorer/r_data_explorer.rs b/crates/ark/src/data_explorer/r_data_explorer.rs index 3d06e9b87..43b208cdf 100644 --- a/crates/ark/src/data_explorer/r_data_explorer.rs +++ b/crates/ark/src/data_explorer/r_data_explorer.rs @@ -100,7 +100,35 @@ use crate::r_task; use crate::r_task::RTask; use crate::variables::variable::WorkspaceVariableDisplayType; +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum DataExplorerMode { + Inline, + Full, +} + pub const DATA_EXPLORER_COMM_NAME: &str = "positron.dataExplorer"; +pub const POSITRON_DATA_EXPLORER_MIME: &str = "application/vnd.positron.dataExplorer+json"; + +/// Payload for the `application/vnd.positron.dataExplorer+json` MIME type +/// included in notebook execute results for data frames. This tells Positron's +/// notebook renderer to display an inline data explorer widget. +/// +/// Must stay in sync with `ParsedDataExplorerOutput` in Positron's +/// `IPositronNotebookCell.ts`. +#[derive(Clone, Debug, serde::Serialize)] +pub struct InlineDataExplorerData { + pub version: u32, + pub comm_id: String, + pub shape: InlineDataExplorerShape, + pub title: String, + pub source: String, +} + +#[derive(Clone, Debug, serde::Serialize)] +pub struct InlineDataExplorerShape { + pub rows: i32, + pub columns: usize, +} /// A name/value binding pair in an environment. /// @@ -157,6 +185,10 @@ pub struct RDataExplorer { /// row indices. This is the set of row indices that are displayed in the /// data viewer. view_indices: Option>, + + /// The display mode for this explorer. `Inline` renders a compact grid + /// in a notebook cell output; `Full` opens the full Data Explorer panel. + explorer_mode: DataExplorerMode, } impl std::fmt::Debug for RDataExplorer { @@ -173,6 +205,7 @@ impl RDataExplorer { title: String, data: RObject, binding: Option, + explorer_mode: DataExplorerMode, ) -> anyhow::Result { let table = Table::new(data); let shape = Self::get_shape(table.get().clone())?; @@ -187,9 +220,14 @@ impl RDataExplorer { sort_keys: vec![], row_filters: vec![], col_filters: vec![], + explorer_mode, }) } + pub(crate) fn shape(&self) -> &DataObjectShape { + &self.shape + } + /// Check the environment bindings for updates to the underlying value /// /// Returns true if the update was processed; false if the binding has been @@ -416,13 +454,31 @@ impl RDataExplorer { DataExplorerBackendRequest::SuggestCodeSyntax => Ok( DataExplorerBackendReply::SuggestCodeSyntaxReply(self.suggest_code_syntax()), ), + + DataExplorerBackendRequest::SetDatasetImportOptions(_) => { + Err(anyhow!("Data Explorer: Not yet supported")) + }, + + // Promotes an inline data explorer to a full data explorer. + DataExplorerBackendRequest::OpenDataExplorer => { + let explorer = RDataExplorer::new( + self.title.clone(), + self.table.get().clone(), + None, + DataExplorerMode::Full, + )?; + Console::get_mut() + .comm_open_backend(DATA_EXPLORER_COMM_NAME, Box::new(explorer))?; + Ok(DataExplorerBackendReply::OpenDataExplorerReply()) + }, } } } impl CommHandler for RDataExplorer { fn open_metadata(&self) -> serde_json::Value { - serde_json::json!({ "title": self.title }) + let inline_only = self.explorer_mode == DataExplorerMode::Inline; + serde_json::json!({ "title": self.title, "inline_only": inline_only }) } fn handle_msg(&mut self, msg: CommMsg, ctx: &CommHandlerContext) { @@ -1210,7 +1266,7 @@ pub unsafe extern "C-unwind" fn ps_view_data_frame( None }; - let explorer = RDataExplorer::new(title, x, env_info)?; + let explorer = RDataExplorer::new(title, x, env_info, DataExplorerMode::Full)?; Console::get_mut().comm_open_backend(DATA_EXPLORER_COMM_NAME, Box::new(explorer))?; Ok(R_NilValue) diff --git a/crates/ark/src/variables/r_variables.rs b/crates/ark/src/variables/r_variables.rs index f49c2d6de..e49421db9 100644 --- a/crates/ark/src/variables/r_variables.rs +++ b/crates/ark/src/variables/r_variables.rs @@ -42,6 +42,7 @@ use stdext::spawn; use crate::console; use crate::console::Console; +use crate::data_explorer::r_data_explorer::DataExplorerMode; use crate::data_explorer::r_data_explorer::DataObjectEnvInfo; use crate::data_explorer::r_data_explorer::RDataExplorer; use crate::data_explorer::r_data_explorer::DATA_EXPLORER_COMM_NAME; @@ -357,8 +358,9 @@ impl RVariables { env, }; - let explorer = RDataExplorer::new(name.clone(), obj, Some(binding)) - .map_err(harp::Error::Anyhow)?; + let explorer = + RDataExplorer::new(name.clone(), obj, Some(binding), DataExplorerMode::Full) + .map_err(harp::Error::Anyhow)?; let viewer_id = Console::get_mut() .comm_open_backend(DATA_EXPLORER_COMM_NAME, Box::new(explorer)) .map_err(harp::Error::Anyhow)?; diff --git a/crates/ark/tests/data_explorer.rs b/crates/ark/tests/data_explorer.rs index c0ea656ec..69e761975 100644 --- a/crates/ark/tests/data_explorer.rs +++ b/crates/ark/tests/data_explorer.rs @@ -72,6 +72,7 @@ use ark::comm_handler::CommHandlerContext; use ark::comm_handler::EnvironmentChanged; use ark::data_explorer::format::format_column; use ark::data_explorer::format::format_string; +use ark::data_explorer::r_data_explorer::DataExplorerMode; use ark::data_explorer::r_data_explorer::DataObjectEnvInfo; use ark::data_explorer::r_data_explorer::RDataExplorer; use ark::r_task::r_task; @@ -106,7 +107,7 @@ fn open_data_explorer(dataset: String) -> TestSetup { let inner = r_task(|| unsafe { let data = RObject::new(Rf_eval(r_symbol!(&dataset), R_GlobalEnv)); - let handler = RDataExplorer::new(dataset, data, None).unwrap(); + let handler = RDataExplorer::new(dataset, data, None, DataExplorerMode::Full).unwrap(); TestInner(handler, ctx) }); @@ -131,7 +132,8 @@ fn open_data_explorer_from_expression(expr: &str, bind: Option<&str>) -> anyhow: name: name.to_string(), env: RObject::view(R_ENVS.global), }); - let handler = RDataExplorer::new(String::from("obj"), object, binding)?; + let handler = + RDataExplorer::new(String::from("obj"), object, binding, DataExplorerMode::Full)?; Ok(TestInner(handler, ctx)) })?; diff --git a/crates/ark/tests/kernel-notebook-data-explorer.rs b/crates/ark/tests/kernel-notebook-data-explorer.rs new file mode 100644 index 000000000..d38b7cf22 --- /dev/null +++ b/crates/ark/tests/kernel-notebook-data-explorer.rs @@ -0,0 +1,100 @@ +// +// kernel-notebook-data-explorer.rs +// +// Copyright (C) 2026 Posit Software, PBC. All rights reserved. +// +// + +use amalthea::fixtures::dummy_frontend::ExecuteRequestOptions; +use ark_test::DummyArkPositronNotebook; + +/// Drain the UI comm messages that arrive during execution (busy=true, +/// busy=false, prompt_state). These are CommMsg messages on the UI comm's +/// channel that interleave with the execute result on IOPub. +fn drain_ui_comm_msgs(frontend: &DummyArkPositronNotebook, ui_comm_id: &str) { + // busy=true + let msg = frontend.recv_iopub_comm_msg(); + assert_eq!(msg.comm_id, ui_comm_id); + assert_eq!(msg.data["method"], "busy"); + assert_eq!(msg.data["params"]["busy"], true); + + // busy=false + let msg = frontend.recv_iopub_comm_msg(); + assert_eq!(msg.comm_id, ui_comm_id); + assert_eq!(msg.data["method"], "busy"); + assert_eq!(msg.data["params"]["busy"], false); +} + +fn drain_ui_comm_prompt_state(frontend: &DummyArkPositronNotebook, ui_comm_id: &str) { + let msg = frontend.recv_iopub_comm_msg(); + assert_eq!(msg.comm_id, ui_comm_id); + assert_eq!(msg.data["method"], "prompt_state"); +} + +#[test] +fn test_notebook_inline_data_explorer() { + let frontend = DummyArkPositronNotebook::lock(); + let ui_comm_id = frontend.open_ui_comm(); + + frontend.send_execute_request( + "data.frame(x = 1:3, y = 4:6)", + ExecuteRequestOptions::default(), + ); + frontend.recv_iopub_busy(); + frontend.recv_iopub_execute_input(); + + // Drain UI comm busy events + drain_ui_comm_msgs(&frontend, &ui_comm_id); + + let result_data = frontend.recv_iopub_execute_result_data(); + + // Should have text/plain (autoprint output) + assert!(result_data.contains_key("text/plain")); + + // Should have the inline data explorer MIME type + let mime_key = "application/vnd.positron.dataExplorer+json"; + assert!(result_data.contains_key(mime_key)); + + let de_data = result_data.get(mime_key).unwrap(); + assert_eq!(de_data["version"], 1); + assert_eq!(de_data["shape"]["rows"], 3); + assert_eq!(de_data["shape"]["columns"], 2); + assert!(de_data["comm_id"].as_str().is_some()); + assert!(de_data["title"].as_str().is_some()); + + // prompt_state arrives after execute_result + drain_ui_comm_prompt_state(&frontend, &ui_comm_id); + + frontend.recv_iopub_idle(); + frontend.recv_shell_execute_reply(); + + // The comm_open for the inline data explorer arrives after Idle + // (it goes through Shell's comm event channel) + let comm_open = frontend.recv_iopub_comm_open(); + assert_eq!(comm_open.target_name, "positron.dataExplorer"); + assert_eq!(comm_open.data["inline_only"], true); + assert_eq!(comm_open.comm_id, de_data["comm_id"].as_str().unwrap()); +} + +#[test] +fn test_notebook_no_inline_data_explorer_for_non_data_frame() { + let frontend = DummyArkPositronNotebook::lock(); + let ui_comm_id = frontend.open_ui_comm(); + + frontend.send_execute_request("1:10", ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + frontend.recv_iopub_execute_input(); + + drain_ui_comm_msgs(&frontend, &ui_comm_id); + + let result_data = frontend.recv_iopub_execute_result_data(); + + // Should have text/plain but NOT the data explorer MIME type + assert!(result_data.contains_key("text/plain")); + assert!(!result_data.contains_key("application/vnd.positron.dataExplorer+json")); + + drain_ui_comm_prompt_state(&frontend, &ui_comm_id); + + frontend.recv_iopub_idle(); + frontend.recv_shell_execute_reply(); +} diff --git a/crates/ark_test/src/dummy_frontend.rs b/crates/ark_test/src/dummy_frontend.rs index 9cb8db2ef..6aa166901 100644 --- a/crates/ark_test/src/dummy_frontend.rs +++ b/crates/ark_test/src/dummy_frontend.rs @@ -602,6 +602,21 @@ impl DummyArkFrontend { } } + /// Receive from IOPub and assert ExecuteResult message. + /// Returns the full data map. + /// Automatically skips any Stream messages. + #[track_caller] + pub fn recv_iopub_execute_result_data(&self) -> serde_json::Map { + let msg = self.recv_iopub_next(); + match msg { + Message::ExecuteResult(data) => match data.content.data { + serde_json::Value::Object(map) => map, + other => panic!("Expected ExecuteResult data to be Object, got {:?}", other), + }, + other => panic!("Expected ExecuteResult, got {:?}", other), + } + } + /// Receive from IOPub and assert ExecuteError message. /// Automatically skips any Stream messages. /// Returns the `evalue` field. @@ -1801,6 +1816,52 @@ impl DerefMut for DummyArkFrontendNotebook { } } +/// Wrapper around `DummyArkFrontend` that uses `SessionMode::Notebook` and +/// sets the `POSITRON` env var to simulate running inside Positron. +pub struct DummyArkPositronNotebook { + inner: DummyArkFrontend, +} + +impl DummyArkPositronNotebook { + /// Lock a Positron notebook frontend. + /// + /// NOTE: Only one `DummyArkFrontend` variant should call `lock()` within + /// a given process. + pub fn lock() -> Self { + Self::init(); + + Self { + inner: DummyArkFrontend::lock(), + } + } + + /// Initialize with Notebook session mode and `POSITRON=1` + fn init() { + unsafe { std::env::set_var("POSITRON", "1") }; + + let options = DummyArkFrontendOptions { + session_mode: SessionMode::Notebook, + ..Default::default() + }; + FRONTEND.get_or_init(|| Arc::new(Mutex::new(DummyArkFrontend::init(options)))); + } +} + +// Allow method calls to be forwarded to inner type +impl Deref for DummyArkPositronNotebook { + type Target = DummyArkFrontend; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl DerefMut for DummyArkPositronNotebook { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + impl DummyArkFrontendDefaultRepos { /// Lock a frontend with a default repos setting. ///