From d3da6dbdc5bf6e570448ee3e40ede14052025942 Mon Sep 17 00:00:00 2001 From: Linwei Zhang Date: Mon, 22 Jun 2026 11:28:37 +0800 Subject: [PATCH 1/2] refactor(mdb): use SQLExecDirect instead of prepare+execute for infer_schema mdbtools does not support SQL_ATTR_PARAMSET_SIZE so odbc-api's safe conn.execute() fails with HY092. Use SQLExecDirect directly (one ODBC call instead of two: prepare + execute). Metadata is collected from the cursor after exec_direct. --- remote-table/src/connection/mdb/mod.rs | 30 ++++++++++++++--------- remote-table/src/connection/mdb/schema.rs | 10 +++----- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/remote-table/src/connection/mdb/mod.rs b/remote-table/src/connection/mdb/mod.rs index 3a4eeb1..29dce40 100644 --- a/remote-table/src/connection/mdb/mod.rs +++ b/remote-table/src/connection/mdb/mod.rs @@ -190,21 +190,27 @@ impl Connection for MdbConnection { let sql = RemoteDbType::Mdb.limit_1_query_if_possible(source)?; debug!("[remote-table] inferring mdb schema with: {sql}"); let conn = self.conn.lock().await; - let mut prepared = conn.prepare(&sql).map_err(|e| { - DataFusionError::Plan(format!( - "Failed to prepare query for schema inference on mdb: {e:?}, sql: {sql}" - )) - })?; - // mdbtools does not populate result-set metadata (SQLNumResultCols / - // SQLDescribeCol) until after SQLExecute. Prepared::execute(()) is the - // safe wrapper around SQLExecute; we discard the returned cursor since - // column metadata is then available on `prepared` itself. - prepared.execute(()).map_err(|e| { + // mdbtools 1.0.0 does not support SQL_ATTR_PARAMSET_SIZE, so + // odbc-api's safe `conn.execute()` fails with HY092. Use + // SQLExecDirect directly — one call, no prepare+execute needed. + let pre = conn.preallocate().map_err(|e| { DataFusionError::Plan(format!( - "Failed to execute query for schema inference on mdb: {e:?}, sql: {sql}" + "Failed to preallocate statement for schema inference on mdb: {e:?}, sql: {sql}" )) })?; - let remote_schema = Arc::new(build_remote_schema(&mut prepared)?); + let mut stmt = pre.into_handle(); + let sql_text = SqlText::new(&sql); + // SAFETY: `sql` is owned by this stack frame and outlives `stmt`. + // `exec_direct` is unsafe (may dereference bound parameters; we have none). + if unsafe { stmt.exec_direct(&sql_text) }.is_err() { + return Err(DataFusionError::Plan(format!( + "Failed to exec_direct for schema inference on mdb: {sql}" + ))); + } + // SAFETY: `stmt` is in cursor state after a successful exec_direct + // that produced a result set. + let cursor = unsafe { CursorImpl::new(stmt) }; + let remote_schema = Arc::new(build_remote_schema(cursor)?); Ok(remote_schema) } diff --git a/remote-table/src/connection/mdb/schema.rs b/remote-table/src/connection/mdb/schema.rs index 1fd3882..8b6b502 100644 --- a/remote-table/src/connection/mdb/schema.rs +++ b/remote-table/src/connection/mdb/schema.rs @@ -4,7 +4,7 @@ use crate::RemoteField; use crate::RemoteSchema; use crate::RemoteType; use datafusion_common::DataFusionError; -use odbc_api::Prepared; +use odbc_api::CursorImpl; use odbc_api::ResultSetMetadata; use odbc_api::handles::{AsStatementRef, ColumnDescription, Statement, StatementImpl}; @@ -26,10 +26,8 @@ fn sql_chars_to_string_lossy(name: &[u16]) -> String { String::from_utf16_lossy(name) } -pub(super) fn build_remote_schema( - prepared: &mut Prepared>, -) -> DFResult { - let col_count = prepared +pub(super) fn build_remote_schema(mut cursor: CursorImpl) -> DFResult { + let col_count = cursor .num_result_cols() .map_err(|e| DataFusionError::External(Box::new(e)))? as u16; let mut remote_fields = vec![]; @@ -39,7 +37,7 @@ pub(super) fn build_remote_schema( // col_nullability return NoDiagnostics), so we go through the // low-level SQLDescribeCol path. let mut col_desc = ColumnDescription::default(); - let describe_result = prepared.as_stmt_ref().describe_col(i, &mut col_desc); + let describe_result = cursor.as_stmt_ref().describe_col(i, &mut col_desc); if describe_result.is_err() { return Err(DataFusionError::Plan(format!( "describe_col failed for column {i} on mdb" From 060f5d869a94d3033a75a245583865771b5d0e24 Mon Sep 17 00:00:00 2001 From: Linwei Zhang Date: Mon, 22 Jun 2026 11:30:42 +0800 Subject: [PATCH 2/2] refactor(mdb): use safe conn.execute() for infer_schema, matching DM pattern MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the manual preallocate + unsafe exec_direct + unsafe CursorImpl with odbc-api's safe conn.execute(&sql, (), None). This is the same pattern used by the DM connection — zero unsafe blocks. --- remote-table/src/connection/mdb/mod.rs | 28 +++++++++----------------- 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/remote-table/src/connection/mdb/mod.rs b/remote-table/src/connection/mdb/mod.rs index 29dce40..9c6011a 100644 --- a/remote-table/src/connection/mdb/mod.rs +++ b/remote-table/src/connection/mdb/mod.rs @@ -190,28 +190,20 @@ impl Connection for MdbConnection { let sql = RemoteDbType::Mdb.limit_1_query_if_possible(source)?; debug!("[remote-table] inferring mdb schema with: {sql}"); let conn = self.conn.lock().await; - // mdbtools 1.0.0 does not support SQL_ATTR_PARAMSET_SIZE, so - // odbc-api's safe `conn.execute()` fails with HY092. Use - // SQLExecDirect directly — one call, no prepare+execute needed. - let pre = conn.preallocate().map_err(|e| { + let cursor_opt = conn.execute(&sql, (), None).map_err(|e| { DataFusionError::Plan(format!( - "Failed to preallocate statement for schema inference on mdb: {e:?}, sql: {sql}" + "Failed to execute query for schema inference on mdb: {e:?}, sql: {sql}" )) })?; - let mut stmt = pre.into_handle(); - let sql_text = SqlText::new(&sql); - // SAFETY: `sql` is owned by this stack frame and outlives `stmt`. - // `exec_direct` is unsafe (may dereference bound parameters; we have none). - if unsafe { stmt.exec_direct(&sql_text) }.is_err() { - return Err(DataFusionError::Plan(format!( - "Failed to exec_direct for schema inference on mdb: {sql}" - ))); + match cursor_opt { + None => Err(DataFusionError::Plan( + "No rows returned to infer schema".to_string(), + )), + Some(cursor) => { + let remote_schema = Arc::new(build_remote_schema(cursor)?); + Ok(remote_schema) + } } - // SAFETY: `stmt` is in cursor state after a successful exec_direct - // that produced a result set. - let cursor = unsafe { CursorImpl::new(stmt) }; - let remote_schema = Arc::new(build_remote_schema(cursor)?); - Ok(remote_schema) } async fn query(