From 6e42bf6205ae6ae2d8154c139ada8eaf1e359d35 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Fri, 22 May 2026 14:39:52 +0800
Subject: [PATCH] feat: add lance_dataset_drop_columns for metadata-only column
 removal
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

First of three PRs covering the schema-evolution roadmap entry. Exposes
upstream's `drop_columns` — a metadata-only manifest commit that removes
the named columns from the schema without rewriting data files.
---
 include/lance/lance.h      |  31 +++++
 include/lance/lance.hpp    |  26 ++++
 src/drop_columns.rs        | 102 ++++++++++++++
 src/lib.rs                 |   2 +
 tests/c_api_test.rs        | 268 +++++++++++++++++++++++++++++++++++++
 tests/cpp/test_c_api.c     |  78 +++++++++++
 tests/cpp/test_cpp_api.cpp |  58 ++++++++
 7 files changed, 565 insertions(+)
 create mode 100644 src/drop_columns.rs
diff --git a/include/lance/lance.h b/include/lance/lance.h
index e02c964..db6d557 100644
--- a/include/lance/lance.h
+++ b/include/lance/lance.h
@@ -453,6 +453,37 @@ int32_t lance_dataset_compact_files(
     LanceCompactionMetrics* out_metrics
 );
 
+/* ─── lance_dataset_drop_columns ──────────────────────────────────────────── */
+
+/**
+ * Drop one or more columns from the dataset's schema, committing a new
+ * manifest. This is a metadata-only operation: the data files on storage
+ * are not rewritten until a later `lance_dataset_compact_files` call
+ * materializes the projection (after which the previous version's files
+ * can be removed by a future cleanup operation).
+ *
+ * Mutates `dataset` in place — the same handle remains valid afterward
+ * and sees the new version. Scanners already in flight against this
+ * dataset keep their pre-drop schema view.
+ *
+ * @param dataset      Open dataset (not consumed). Mutated in place to
+ *                     see the new version. Must not be NULL.
+ * @param columns      Array of NUL-terminated UTF-8 column names to drop.
+ *                     Must not be NULL; entries must be non-NULL and
+ *                     non-empty.
+ * @param num_columns  Length of `columns`. Must be > 0.
+ * @return 0 on success, -1 on error. Error codes:
+ *         LANCE_ERR_INVALID_ARGUMENT for NULL/empty inputs, NULL or empty
+ *         entries, non-UTF-8 column names, unknown columns, or an attempt
+ *         to drop every column;
+ *         LANCE_ERR_COMMIT_CONFLICT for a concurrent writer.
+ */
+int32_t lance_dataset_drop_columns(
+    LanceDataset* dataset,
+    const char* const* columns,
+    size_t num_columns
+);
+
 /**
  * Export the dataset schema via Arrow C Data Interface.
  * @param out  Pointer to caller-allocated ArrowSchema struct
diff --git a/include/lance/lance.hpp b/include/lance/lance.hpp
index 6a3cef0..eeb55d6 100644
--- a/include/lance/lance.hpp
+++ b/include/lance/lance.hpp
@@ -437,6 +437,32 @@ class Dataset {
         return metrics;
     }
 
+    /// Drop columns from the dataset's schema and commit a new manifest.
+    /// Metadata-only — data files remain until a later `compact_files()`
+    /// call rewrites them. Mutates this dataset in place; the handle
+    /// continues to point at the new version.
+    ///
+    /// `columns` must be non-empty. Throws lance::Error on failure (empty
+    /// list, unknown column, attempt to drop every column, commit
+    /// conflict, ...).
+    void drop_columns(const std::vector<std::string>& columns) {
+        std::vector<const char*> col_ptrs;
+        col_ptrs.reserve(columns.size());
+        for (const auto& c : columns) {
+            col_ptrs.push_back(c.c_str());
+        }
+        // Pass `col_ptrs.data()` unconditionally — matches the `update`
+        // and `merge_insert` siblings whose inputs are also required to
+        // be non-empty. The Rust layer rejects `num_columns == 0` before
+        // dereferencing the pointer, so an empty vector still surfaces
+        // INVALID_ARGUMENT with the precise "num_columns must be > 0"
+        // message rather than the misleading "columns must not be NULL".
+        if (lance_dataset_drop_columns(
+                handle_.get(), col_ptrs.data(), columns.size()) != 0) {
+            check_error();
+        }
+    }
+
     /// Export the schema as an Arrow C Data Interface struct.
     void schema(ArrowSchema* out) const {
         if (lance_dataset_schema(handle_.get(), out) != 0) {
diff --git a/src/drop_columns.rs b/src/drop_columns.rs
new file mode 100644
index 0000000..77a908d
--- /dev/null
+++ b/src/drop_columns.rs
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+//! Drop columns C API: remove columns from the dataset's schema, committing a
+//! new manifest. Metadata-only — data files are not rewritten until a later
+//! `lance_dataset_compact_files` call materializes the projection.
+//!
+//! Mutates the dataset in place under an exclusive write lock; existing
+//! scanners that already cloned the inner Arc keep their pre-drop schema view.
+
+use std::ffi::c_char;
+
+use lance_core::Result;
+use snafu::location;
+
+use crate::dataset::LanceDataset;
+use crate::error::ffi_try;
+use crate::helpers;
+use crate::runtime::block_on;
+
+/// Drop one or more columns from the dataset's schema and commit a new
+/// manifest. This is a metadata-only operation: data files remain on storage
+/// until they are rewritten by `lance_dataset_compact_files` (and then
+/// cleaned up by version cleanup).
+///
+/// - `dataset`: Open dataset (mutated; same handle remains valid afterward).
+///   Must not be NULL.
+/// - `columns`: Pointer to an array of NUL-terminated C strings naming the
+///   columns to drop. Must not be NULL. Entries must be non-NULL and
+///   non-empty UTF-8.
+/// - `num_columns`: Length of the `columns` array. Must be non-zero.
+///
+/// Returns 0 on success, -1 on error. Error codes:
+/// `LANCE_ERR_INVALID_ARGUMENT` for NULL/empty args, NULL or empty entries,
+/// non-UTF-8 names, unknown columns, or an attempt to drop every column
+/// (upstream rejects that since a Lance dataset must retain at least one
+/// field). `LANCE_ERR_COMMIT_CONFLICT` for a concurrent writer.
+#[unsafe(no_mangle)]
+pub unsafe extern "C" fn lance_dataset_drop_columns(
+    dataset: *mut LanceDataset,
+    columns: *const *const c_char,
+    num_columns: usize,
+) -> i32 {
+    ffi_try!(
+        unsafe { drop_columns_inner(dataset, columns, num_columns) },
+        neg
+    )
+}
+
+unsafe fn drop_columns_inner(
+    dataset: *mut LanceDataset,
+    columns: *const *const c_char,
+    num_columns: usize,
+) -> Result<i32> {
+    if dataset.is_null() {
+        return Err(lance_core::Error::InvalidInput {
+            source: "dataset must not be NULL".into(),
+            location: location!(),
+        });
+    }
+    if columns.is_null() {
+        return Err(lance_core::Error::InvalidInput {
+            source: "columns must not be NULL".into(),
+            location: location!(),
+        });
+    }
+    if num_columns == 0 {
+        return Err(lance_core::Error::InvalidInput {
+            source: "num_columns must be > 0".into(),
+            location: location!(),
+        });
+    }
+
+    // Materialize the column names up front so any per-index validation
+    // error fires before the dataset's write lock is taken — matches the
+    // pre-lock validation pattern used by `update.rs`.
+    let mut names: Vec<String> = Vec::with_capacity(num_columns);
+    for i in 0..num_columns {
+        // SAFETY: `columns` is non-NULL (checked above) and the caller
+        // guarantees the array has at least `num_columns` entries.
+        let entry = unsafe { *columns.add(i) };
+        // SAFETY: each entry is either NULL (rejected below) or a
+        // NUL-terminated C string the caller keeps alive for this call.
+        let name = unsafe { helpers::parse_c_string(entry)? }
+            .filter(|s| !s.is_empty())
+            .ok_or_else(|| lance_core::Error::InvalidInput {
+                source: format!("columns[{i}] must not be NULL or empty").into(),
+                location: location!(),
+            })?;
+        names.push(name.to_string());
+    }
+
+    // SAFETY: `dataset` is non-NULL (checked above) and the caller guarantees
+    // it points to a live `LanceDataset`. `with_mut` takes an exclusive
+    // write lock on the inner `Arc<Dataset>` before yielding `&mut Dataset`,
+    // so a shared `&*dataset` borrow here is sound — interior mutability
+    // is the synchronization point.
+    let ds = unsafe { &*dataset };
+    let names_refs: Vec<&str> = names.iter().map(String::as_str).collect();
+    ds.with_mut(|d| block_on(d.drop_columns(&names_refs)))?;
+    Ok(0)
+}
diff --git a/src/lib.rs b/src/lib.rs
index 666ec05..5c34477 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -20,6 +20,7 @@ mod batch;
 mod compact;
 mod dataset;
 mod delete;
+mod drop_columns;
 mod error;
 mod fragment_writer;
 mod helpers;
@@ -37,6 +38,7 @@ pub use batch::*;
 pub use compact::*;
 pub use dataset::*;
 pub use delete::*;
+pub use drop_columns::*;
 pub use error::{
     LanceErrorCode, lance_free_string, lance_last_error_code, lance_last_error_message,
 };
diff --git a/tests/c_api_test.rs b/tests/c_api_test.rs
index d6c7b86..ebc8260 100644
--- a/tests/c_api_test.rs
+++ b/tests/c_api_test.rs
@@ -6268,3 +6268,271 @@ fn test_scanner_set_index_segments_null_safety() {
     unsafe { lance_scanner_close(scanner) };
     unsafe { lance_dataset_close(ds) };
 }
+
+// ===========================================================================
+// lance_dataset_drop_columns
+// ===========================================================================
+
+/// Return the dataset's column names by exporting its schema through the
+/// Arrow C Data Interface.
+fn schema_field_names(ds: *const LanceDataset) -> Vec<String> {
+    debug_assert!(!ds.is_null(), "schema_field_names called with NULL dataset");
+    let mut ffi_schema = FFI_ArrowSchema::empty();
+    let rc = unsafe { lance_dataset_schema(ds, &mut ffi_schema) };
+    assert_eq!(rc, 0, "lance_dataset_schema failed");
+    let arrow_schema =
+        arrow_schema::Schema::try_from(&ffi_schema).expect("FFI_ArrowSchema -> Schema");
+    arrow_schema
+        .fields()
+        .iter()
+        .map(|f| f.name().to_string())
+        .collect()
+}
+
+#[test]
+fn test_drop_columns_single() {
+    let (_tmp, uri) = create_large_dataset(10);
+    let c_uri = c_str(&uri);
+    let ds = unsafe { lance_dataset_open(c_uri.as_ptr(), ptr::null(), 0) };
+    assert!(!ds.is_null());
+
+    let col = c_str("value");
+    let cols = [col.as_ptr()];
+    let rc = unsafe { lance_dataset_drop_columns(ds, cols.as_ptr(), cols.len()) };
+    assert_eq!(rc, 0);
+
+    // Row count is unchanged — drop is metadata-only.
+    assert_eq!(unsafe { lance_dataset_count_rows(ds) }, 10);
+    // Schema reflects the dropped column.
+    let names = schema_field_names(ds);
+    assert_eq!(names, vec!["id".to_string(), "label".to_string()]);
+
+    unsafe { lance_dataset_close(ds) };
+}
+
+#[test]
+fn test_drop_columns_multiple() {
+    let (_tmp, uri) = create_large_dataset(5);
+    let c_uri = c_str(&uri);
+    let ds = unsafe { lance_dataset_open(c_uri.as_ptr(), ptr::null(), 0) };
+    assert!(!ds.is_null());
+
+    let c_value = c_str("value");
+    let c_label = c_str("label");
+    let cols = [c_value.as_ptr(), c_label.as_ptr()];
+    let rc = unsafe { lance_dataset_drop_columns(ds, cols.as_ptr(), cols.len()) };
+    assert_eq!(rc, 0);
+
+    let names = schema_field_names(ds);
+    assert_eq!(names, vec!["id".to_string()]);
+
+    unsafe { lance_dataset_close(ds) };
+}
+
+#[test]
+fn test_drop_columns_bumps_version() {
+    let (_tmp, uri) = create_large_dataset(3);
+    let c_uri = c_str(&uri);
+    let ds = unsafe { lance_dataset_open(c_uri.as_ptr(), ptr::null(), 0) };
+    assert!(!ds.is_null());
+
+    let v_before = unsafe { lance_dataset_version(ds) };
+    let col = c_str("label");
+    let cols = [col.as_ptr()];
+    let rc = unsafe { lance_dataset_drop_columns(ds, cols.as_ptr(), cols.len()) };
+    assert_eq!(rc, 0);
+    let v_after = unsafe { lance_dataset_version(ds) };
+    assert!(
+        v_after > v_before,
+        "version should increase: before={v_before}, after={v_after}"
+    );
+
+    unsafe { lance_dataset_close(ds) };
+}
+
+#[test]
+fn test_drop_columns_preserves_remaining_data() {
+    let (_tmp, uri) = create_large_dataset(4);
+    let c_uri = c_str(&uri);
+    let ds = unsafe { lance_dataset_open(c_uri.as_ptr(), ptr::null(), 0) };
+    assert!(!ds.is_null());
+
+    let col = c_str("value");
+    let cols = [col.as_ptr()];
+    let rc = unsafe { lance_dataset_drop_columns(ds, cols.as_ptr(), cols.len()) };
+    assert_eq!(rc, 0);
+
+    // Materialize the remaining columns to confirm row data is intact.
+    let indices: [u64; 4] = [0, 1, 2, 3];
+    let mut stream = FFI_ArrowArrayStream::empty();
+    let rc = unsafe {
+        lance_dataset_take(
+            ds,
+            indices.as_ptr(),
+            indices.len(),
+            ptr::null(),
+            &mut stream,
+        )
+    };
+    assert_eq!(rc, 0);
+
+    let reader = unsafe { ArrowArrayStreamReader::from_raw(&mut stream) }.unwrap();
+    let schema = reader.schema();
+    let names: Vec<&str> = schema.fields().iter().map(|f| f.name().as_str()).collect();
+    assert_eq!(names, vec!["id", "label"]);
+
+    // Concatenate the resulting batches and assert the surviving column
+    // values are unchanged — a regression that zeroed surviving data
+    // would slip past a shape-only check.
+    let batches: Vec<_> = reader.into_iter().collect::<Result<Vec<_>, _>>().unwrap();
+    let total: usize = batches.iter().map(|b| b.num_rows()).sum();
+    assert_eq!(total, 4);
+
+    let mut ids: Vec<i32> = Vec::with_capacity(4);
+    let mut labels: Vec<String> = Vec::with_capacity(4);
+    for batch in &batches {
+        let id_col = batch
+            .column_by_name("id")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        let label_col = batch
+            .column_by_name("label")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+        for i in 0..batch.num_rows() {
+            ids.push(id_col.value(i));
+            labels.push(label_col.value(i).to_string());
+        }
+    }
+    assert_eq!(ids, vec![0, 1, 2, 3]);
+    assert_eq!(labels, vec!["row_0", "row_1", "row_2", "row_3"]);
+
+    unsafe { lance_dataset_close(ds) };
+}
+
+#[test]
+fn test_drop_columns_unknown_column_rejected() {
+    let (_tmp, uri) = create_large_dataset(3);
+    let c_uri = c_str(&uri);
+    let ds = unsafe { lance_dataset_open(c_uri.as_ptr(), ptr::null(), 0) };
+    assert!(!ds.is_null());
+
+    let col = c_str("no_such_column");
+    let cols = [col.as_ptr()];
+    let rc = unsafe { lance_dataset_drop_columns(ds, cols.as_ptr(), cols.len()) };
+    assert_eq!(rc, -1);
+    // Upstream surfaces this as InvalidInput → InvalidArgument at the FFI
+    // boundary. The dataset is left untouched on the error path.
+    assert_eq!(lance_last_error_code(), LanceErrorCode::InvalidArgument);
+    let names = schema_field_names(ds);
+    assert_eq!(names, vec!["id", "value", "label"]);
+
+    unsafe { lance_dataset_close(ds) };
+}
+
+#[test]
+fn test_drop_columns_cannot_drop_all() {
+    let (_tmp, uri) = create_large_dataset(2);
+    let c_uri = c_str(&uri);
+    let ds = unsafe { lance_dataset_open(c_uri.as_ptr(), ptr::null(), 0) };
+    assert!(!ds.is_null());
+
+    let c_id = c_str("id");
+    let c_value = c_str("value");
+    let c_label = c_str("label");
+    let cols = [c_id.as_ptr(), c_value.as_ptr(), c_label.as_ptr()];
+    let rc = unsafe { lance_dataset_drop_columns(ds, cols.as_ptr(), cols.len()) };
+    assert_eq!(rc, -1);
+    assert_eq!(lance_last_error_code(), LanceErrorCode::InvalidArgument);
+    // Schema is unchanged.
+    let names = schema_field_names(ds);
+    assert_eq!(names, vec!["id", "value", "label"]);
+
+    unsafe { lance_dataset_close(ds) };
+}
+
+#[test]
+fn test_drop_columns_null_dataset_rejected() {
+    let col = c_str("value");
+    let cols = [col.as_ptr()];
+    let rc = unsafe { lance_dataset_drop_columns(ptr::null_mut(), cols.as_ptr(), cols.len()) };
+    assert_eq!(rc, -1);
+    assert_eq!(lance_last_error_code(), LanceErrorCode::InvalidArgument);
+}
+
+#[test]
+fn test_drop_columns_null_columns_rejected() {
+    let (_tmp, uri) = create_large_dataset(2);
+    let c_uri = c_str(&uri);
+    let ds = unsafe { lance_dataset_open(c_uri.as_ptr(), ptr::null(), 0) };
+    assert!(!ds.is_null());
+
+    let rc = unsafe { lance_dataset_drop_columns(ds, ptr::null(), 1) };
+    assert_eq!(rc, -1);
+    assert_eq!(lance_last_error_code(), LanceErrorCode::InvalidArgument);
+    assert_eq!(unsafe { lance_dataset_count_rows(ds) }, 2);
+
+    unsafe { lance_dataset_close(ds) };
+}
+
+#[test]
+fn test_drop_columns_zero_count_rejected() {
+    let (_tmp, uri) = create_large_dataset(2);
+    let c_uri = c_str(&uri);
+    let ds = unsafe { lance_dataset_open(c_uri.as_ptr(), ptr::null(), 0) };
+    assert!(!ds.is_null());
+
+    let v_before = unsafe { lance_dataset_version(ds) };
+    let dummy = c_str("value");
+    let cols = [dummy.as_ptr()];
+    let rc = unsafe { lance_dataset_drop_columns(ds, cols.as_ptr(), 0) };
+    assert_eq!(rc, -1);
+    assert_eq!(lance_last_error_code(), LanceErrorCode::InvalidArgument);
+    // No version bump on the error path — compare against the pre-call value.
+    assert_eq!(unsafe { lance_dataset_version(ds) }, v_before);
+
+    unsafe { lance_dataset_close(ds) };
+}
+
+#[test]
+fn test_drop_columns_null_entry_rejected() {
+    let (_tmp, uri) = create_large_dataset(2);
+    let c_uri = c_str(&uri);
+    let ds = unsafe { lance_dataset_open(c_uri.as_ptr(), ptr::null(), 0) };
+    assert!(!ds.is_null());
+
+    let c_value = c_str("value");
+    let cols: [*const c_char; 2] = [c_value.as_ptr(), ptr::null()];
+    let rc = unsafe { lance_dataset_drop_columns(ds, cols.as_ptr(), cols.len()) };
+    assert_eq!(rc, -1);
+    assert_eq!(lance_last_error_code(), LanceErrorCode::InvalidArgument);
+    // Dataset is unchanged.
+    let names = schema_field_names(ds);
+    assert_eq!(names, vec!["id", "value", "label"]);
+
+    unsafe { lance_dataset_close(ds) };
+}
+
+#[test]
+fn test_drop_columns_empty_entry_rejected() {
+    let (_tmp, uri) = create_large_dataset(2);
+    let c_uri = c_str(&uri);
+    let ds = unsafe { lance_dataset_open(c_uri.as_ptr(), ptr::null(), 0) };
+    assert!(!ds.is_null());
+
+    // Empty string is rejected by the `.filter(!empty)` guard at the FFI
+    // boundary, distinct from the NULL-entry path.
+    let empty = c_str("");
+    let cols = [empty.as_ptr()];
+    let rc = unsafe { lance_dataset_drop_columns(ds, cols.as_ptr(), cols.len()) };
+    assert_eq!(rc, -1);
+    assert_eq!(lance_last_error_code(), LanceErrorCode::InvalidArgument);
+    let names = schema_field_names(ds);
+    assert_eq!(names, vec!["id", "value", "label"]);
+
+    unsafe { lance_dataset_close(ds) };
+}
diff --git a/tests/cpp/test_c_api.c b/tests/cpp/test_c_api.c
index 89f64d5..4ff81c8 100644
--- a/tests/cpp/test_c_api.c
+++ b/tests/cpp/test_c_api.c
@@ -345,6 +345,83 @@ static void test_merge_insert(const char *write_uri) {
     printf("OK\n");
 }
 
+/* Re-opens the dataset just written by `test_dataset_write_roundtrip` and
+ * exercises `lance_dataset_drop_columns`. Drops the `name` column so the
+ * dataset is left with `id` only; subsequent tests (`compact_files`,
+ * `delete`) do not reference any dropped column. Must run after
+ * `test_update` / `test_merge_insert`, which both still need `name` / `id`. */
+static void test_drop_columns(const char *write_uri) {
+    printf("  test_drop_columns... ");
+
+    LanceDataset *ds = lance_dataset_open(write_uri, NULL, 0);
+    ASSERT(ds != NULL, "open failed");
+    uint64_t before_rows = lance_dataset_count_rows(ds);
+    CHECK_OK();
+    ASSERT(before_rows > 0, "fixture expected to have rows");
+    uint64_t v_before = lance_dataset_version(ds);
+
+    /* Snapshot the schema so we can confirm the field count decreased
+     * (otherwise a bug that bumped the version without modifying the
+     * schema would pass silently). The ArrowSchema struct owns its
+     * children — release it before any potentially-aborting assert so
+     * we don't leak under sanitizer runs in CI. */
+    struct ArrowSchema schema_before;
+    memset(&schema_before, 0, sizeof(schema_before));
+    int32_t rc = lance_dataset_schema(ds, &schema_before);
+    ASSERT(rc == 0, "schema export failed");
+    int64_t fields_before = schema_before.n_children;
+    if (schema_before.release) schema_before.release(&schema_before);
+
+    const char *cols[] = {"name"};
+    rc = lance_dataset_drop_columns(ds, cols, 1);
+    ASSERT(rc == 0, "drop_columns failed");
+    uint64_t after_rows = lance_dataset_count_rows(ds);
+    CHECK_OK();
+    ASSERT(after_rows == before_rows,
+           "metadata-only drop must not change row count");
+    ASSERT(lance_dataset_version(ds) > v_before,
+           "drop_columns must bump the version");
+
+    /* Schema field count must have decreased by exactly 1. The C-test
+     * fixture has 2 columns (`id`, `name`) — assert `fields_after == 1`
+     * so this self-documents the post-drop expectation and trips if the
+     * fixture ever grows additional columns. Release the exported
+     * schema before any assertion so we never leak it on failure. */
+    struct ArrowSchema schema_after;
+    memset(&schema_after, 0, sizeof(schema_after));
+    rc = lance_dataset_schema(ds, &schema_after);
+    ASSERT(rc == 0, "schema export failed after drop");
+    int64_t fields_after = schema_after.n_children;
+    if (schema_after.release) schema_after.release(&schema_after);
+    ASSERT(fields_after == fields_before - 1,
+           "schema field count must decrease by 1 after drop");
+    ASSERT(fields_after == 1,
+           "C-test fixture should be left with `id` only after dropping `name`");
+
+    /* NULL `columns` and num_columns == 0 must both be rejected. */
+    rc = lance_dataset_drop_columns(ds, NULL, 1);
+    ASSERT(rc == -1, "NULL columns must fail");
+    ASSERT(lance_last_error_code() == LANCE_ERR_INVALID_ARGUMENT,
+           "expected INVALID_ARGUMENT");
+
+    rc = lance_dataset_drop_columns(ds, cols, 0);
+    ASSERT(rc == -1, "num_columns=0 must fail");
+    ASSERT(lance_last_error_code() == LANCE_ERR_INVALID_ARGUMENT,
+           "expected INVALID_ARGUMENT");
+
+    /* Dropping the sole remaining column (`id`) must fail with
+     * INVALID_ARGUMENT — upstream refuses to leave a dataset with zero
+     * fields. */
+    const char *last_col[] = {"id"};
+    rc = lance_dataset_drop_columns(ds, last_col, 1);
+    ASSERT(rc == -1, "dropping last column must fail");
+    ASSERT(lance_last_error_code() == LANCE_ERR_INVALID_ARGUMENT,
+           "expected INVALID_ARGUMENT");
+
+    lance_dataset_close(ds);
+    printf("OK\n");
+}
+
 /* Re-opens the dataset just written by `test_dataset_write_roundtrip` and
  * exercises `lance_dataset_compact_files`. The smoke fixture is a single
  * fragment, so the default planner has nothing to compact — we expect
@@ -423,6 +500,7 @@ int main(int argc, char **argv) {
     test_dataset_write_roundtrip(uri, write_uri);
     test_update(write_uri);
     test_merge_insert(write_uri);
+    test_drop_columns(write_uri);
     test_compact_files(write_uri);
     test_delete(write_uri);
 
diff --git a/tests/cpp/test_cpp_api.cpp b/tests/cpp/test_cpp_api.cpp
index 1a28446..ea1f693 100644
--- a/tests/cpp/test_cpp_api.cpp
+++ b/tests/cpp/test_cpp_api.cpp
@@ -397,6 +397,63 @@ static void test_merge_insert(const std::string& dst_uri) {
     PASS();
 }
 
+// Re-opens the dataset just written by `test_dataset_write_roundtrip` and
+// exercises `Dataset::drop_columns`. Drops the `name` column so the dataset
+// is left with `id` only; subsequent tests (`compact_files`, `delete_rows`)
+// do not reference any dropped column. Must run after `test_update` /
+// `test_merge_insert`.
+static void test_drop_columns(const std::string& dst_uri) {
+    TEST(test_drop_columns);
+
+    auto ds = lance::Dataset::open(dst_uri);
+    uint64_t before_rows = ds.count_rows();
+    uint64_t v_before = ds.version();
+
+    ds.drop_columns({"name"});
+    assert(ds.count_rows() == before_rows
+           && "metadata-only drop must preserve row count");
+    assert(ds.version() > v_before
+           && "drop_columns must bump the version");
+    uint64_t v_after_drop = ds.version();
+
+    // Dropping an unknown column must throw with INVALID_ARGUMENT and
+    // leave the dataset unchanged (no version bump on the error path).
+    bool caught_unknown = false;
+    try {
+        ds.drop_columns({"no_such_column"});
+    } catch (const lance::Error& e) {
+        caught_unknown = true;
+        assert(e.code == LANCE_ERR_INVALID_ARGUMENT);
+    }
+    assert(caught_unknown);
+    assert(ds.version() == v_after_drop
+           && "failed drop must not bump the version");
+
+    // Empty column list must throw with INVALID_ARGUMENT.
+    bool caught_empty = false;
+    try {
+        ds.drop_columns({});
+    } catch (const lance::Error& e) {
+        caught_empty = true;
+        assert(e.code == LANCE_ERR_INVALID_ARGUMENT);
+    }
+    assert(caught_empty);
+
+    // Dropping the sole remaining column (`id`) must throw with
+    // INVALID_ARGUMENT — upstream refuses to leave a dataset with zero
+    // fields.
+    bool caught_last = false;
+    try {
+        ds.drop_columns({"id"});
+    } catch (const lance::Error& e) {
+        caught_last = true;
+        assert(e.code == LANCE_ERR_INVALID_ARGUMENT);
+    }
+    assert(caught_last);
+
+    PASS();
+}
+
 // Re-opens the dataset just written by `test_dataset_write_roundtrip` and
 // exercises `Dataset::compact_files`. The smoke fixture is a single fragment
 // so the default planner has nothing to compact — we expect a no-op (zero
@@ -468,6 +525,7 @@ int main(int argc, char** argv) {
     test_dataset_write_roundtrip(uri, write_uri);
     test_update(write_uri);
     test_merge_insert(write_uri);
+    test_drop_columns(write_uri);
     test_compact_files(write_uri);
     test_delete_rows(write_uri);