From 62b1cab560564859f5514fadbc22f58bf3d15ee0 Mon Sep 17 00:00:00 2001 From: Linwei Zhang Date: Mon, 22 Jun 2026 14:07:14 +0800 Subject: [PATCH 1/3] refactor(mdb): remove dummy bind_col workaround from query path The dummy column bind was a workaround for mdbtools 1.0.x which required SQLBindCol before SQLFetch. With odbc-api's safe conn.execute() wrapper handling the statement lifecycle, try removing this workaround to see if mdbtools no longer hangs on SQLFetch without prior SQLBindCol. --- remote-table/src/connection/mdb/mod.rs | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/remote-table/src/connection/mdb/mod.rs b/remote-table/src/connection/mdb/mod.rs index 52f7ee7..6a74566 100644 --- a/remote-table/src/connection/mdb/mod.rs +++ b/remote-table/src/connection/mdb/mod.rs @@ -19,7 +19,7 @@ use datafusion_physical_plan::stream::RecordBatchStreamAdapter; use futures::lock::Mutex; use log::debug; use odbc_api::Environment; -use odbc_api::handles::{AsStatementRef, SqlResult, SqlText, Statement, StatementImpl}; +use odbc_api::handles::{SqlResult, SqlText, Statement, StatementImpl}; use odbc_api::{Cursor, CursorImpl}; use std::collections::HashMap; use std::path::PathBuf; @@ -249,28 +249,6 @@ impl Connection for MdbConnection { )) })?; - // mdbtools 1.0.x requires SQLBindCol before SQLFetch — without it, - // SQLFetch hangs indefinitely. odbc-api's Cursor::next_row() calls - // SQLFetch without binding columns. - // - // Workaround: bind a single dummy column (column 1) to allow - // SQLFetch to proceed, then use the row-by-row Cursor::next_row() - // path which calls SQLGetData for each cell. SQLGetData after - // SQLBindCol works because the dummy buffer is ignored. - let mut dummy = odbc_api::Nullable::::null(); - match unsafe { cursor.as_stmt_ref().bind_col(1, &mut dummy) } { - SqlResult::Success(()) | SqlResult::SuccessWithInfo(()) => {} - SqlResult::Error { function } => { - return Err(DataFusionError::Execution(format!( - "{function} failed binding dummy column for mdb" - ))); - } - other => { - return Err(DataFusionError::Execution(format!( - "Unexpected result binding dummy column: {other:?}" - ))); - } - } let mut exhausted = false; loop { From bd819b5880d7af0579fda1a05a030871123042c0 Mon Sep 17 00:00:00 2001 From: Linwei Zhang Date: Mon, 22 Jun 2026 14:09:55 +0800 Subject: [PATCH 2/3] chore: trigger CI From 465fc71aac0c21e38084990f5dbbc5fd8c148d17 Mon Sep 17 00:00:00 2001 From: Linwei Zhang Date: Mon, 22 Jun 2026 14:26:59 +0800 Subject: [PATCH 3/3] refactor(mdb): use safe conn.execute() for fetch_table_row_count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the manual preallocate + unsafe exec_direct + unsafe bind_col + unsafe fetch pattern with odbc-api's safe conn.execute() + cursor.next_row(). Counts rows by iterating the cursor — zero unsafe blocks. --- remote-table/src/connection/mdb/mod.rs | 59 +++++++++----------------- 1 file changed, 19 insertions(+), 40 deletions(-) diff --git a/remote-table/src/connection/mdb/mod.rs b/remote-table/src/connection/mdb/mod.rs index 6a74566..454aa85 100644 --- a/remote-table/src/connection/mdb/mod.rs +++ b/remote-table/src/connection/mdb/mod.rs @@ -357,54 +357,33 @@ impl MdbConnection { tokio::task::spawn_blocking(move || { let handle = Handle::current(); let conn = handle.block_on(async { conn.lock().await }); - let pre = conn.preallocate().map_err(|e| { - DataFusionError::Execution(format!( - "Failed to preallocate statement for MDB row count: {e:?}" - )) - })?; - let mut stmt = pre.into_handle(); - let sql_text = SqlText::new(&count_query); - // SAFETY: `count_query` is owned by this closure and outlives `stmt`. - if unsafe { stmt.exec_direct(&sql_text) }.is_err() { - return Err(DataFusionError::Execution(format!( - "Failed to execute MDB row count query: {count_query}" - ))); - } // mdbtools ODBC returns 0 for aggregate COUNT(*) even though - // mdb-sql returns the correct value. Count table rows by fetching - // the first column instead; this keeps COUNT pushdown table-only - // without materializing Arrow batches. - let mut dummy = odbc_api::Nullable::::null(); - match unsafe { stmt.bind_col(1, &mut dummy) } { - SqlResult::Success(()) | SqlResult::SuccessWithInfo(()) => {} - SqlResult::Error { function } => { - return Err(DataFusionError::Execution(format!( - "{function} failed binding MDB row-count column" - ))); - } - other => { - return Err(DataFusionError::Execution(format!( - "Unexpected result binding MDB row-count column: {other:?}" - ))); - } - } + // mdb-sql returns the correct value. Count table rows by iterating + // the cursor instead. + let mut cursor = conn + .execute(&count_query, (), None) + .map_err(|e| { + DataFusionError::Execution(format!( + "Failed to execute MDB row count query: {e:?}, sql: {count_query}" + )) + })? + .ok_or_else(|| { + DataFusionError::Execution(format!( + "No result set for MDB row count query: {count_query}" + )) + })?; let mut row_count = 0usize; loop { - match unsafe { stmt.fetch() } { - SqlResult::Success(()) | SqlResult::SuccessWithInfo(()) => { + match cursor.next_row() { + Ok(Some(_row)) => { row_count += 1; } - SqlResult::NoData => break, - SqlResult::Error { function } => { - return Err(DataFusionError::Execution(format!( - "{function} failed fetching MDB row count" - ))); - } - other => { + Ok(None) => break, + Err(e) => { return Err(DataFusionError::Execution(format!( - "Unexpected result fetching MDB row count: {other:?}" + "Failed fetching MDB row count: {e}" ))); } }