From 59552a476817d029f24a1abc60486a6a10807b53 Mon Sep 17 00:00:00 2001 From: "zhanghaobo@kanzhun.com" Date: Fri, 23 Jan 2026 14:55:08 +0800 Subject: [PATCH 1/4] chore: dropTable deletes data when using hive2 namespace and hive3 namespace --- .../main/java/org/lance/namespace/hive2/Hive2Namespace.java | 4 +++- .../main/java/org/lance/namespace/hive3/Hive3Namespace.java | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Namespace.java b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Namespace.java index 1a25d88..6ebe1a2 100644 --- a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Namespace.java +++ b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Namespace.java @@ -553,9 +553,11 @@ protected String doDropTable(ObjectIdentifier id) { Hive2Util.validateLanceTable(hmsTable.get()); String location = hmsTable.get().getSd().getLocation(); + final boolean deleteData = true; + final boolean ignoreUnknownTable = true; clientPool.run( client -> { - client.dropTable(db, tableName, false, true); + client.dropTable(db, tableName, deleteData, ignoreUnknownTable); return null; }); diff --git a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Namespace.java b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Namespace.java index 5a259b4..fcbaf85 100644 --- a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Namespace.java +++ b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Namespace.java @@ -596,9 +596,11 @@ protected String doDropTable(ObjectIdentifier id) { Hive3Util.validateLanceTable(hmsTable.get()); String location = hmsTable.get().getSd().getLocation(); + final boolean deleteData = true; + final boolean ignoreUnknownTable = true; clientPool.run( client -> { - client.dropTable(catalog, db, tableName, false, true); + client.dropTable(catalog, db, tableName, deleteData, ignoreUnknownTable); return null; }); From be194a650bbc72c5a5ce6cc293fb81e74d318867 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Thu, 5 Feb 2026 16:19:58 -0800 Subject: [PATCH 2/4] fix: implement dropTable and fix deregisterTable semantics for hive2/hive3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add dropTable() method that deletes both metadata and data (deleteData=true) - Fix deregisterTable() to only remove metadata without deleting data (deleteData=false) - Update both Java and Python implementations for hive2 and hive3 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .../lance/namespace/hive2/Hive2Namespace.java | 33 ++++++++++--------- .../lance/namespace/hive3/Hive3Namespace.java | 25 +++++++++++--- python/src/lance_namespace_impls/hive2.py | 30 +++++++++++++++-- python/src/lance_namespace_impls/hive3.py | 28 ++++++++++++++++ 4 files changed, 93 insertions(+), 23 deletions(-) diff --git a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Namespace.java b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Namespace.java index 6ebe1a2..6f19192 100644 --- a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Namespace.java +++ b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Namespace.java @@ -266,16 +266,6 @@ public void tableExists(TableExistsRequest request) { Hive2Util.validateLanceTable(hmsTable.get()); } - @Override - public DropTableResponse dropTable(DropTableRequest request) { - ObjectIdentifier tableId = ObjectIdentifier.of(request.getId()); - String location = doDropTable(tableId); - DropTableResponse response = new DropTableResponse(); - response.setId(request.getId()); - response.setLocation(location); - return response; - } - @Override public DescribeTableResponse describeTable(DescribeTableRequest request) { if (Boolean.TRUE.equals(request.getLoadDetailedMetadata())) { @@ -324,6 +314,21 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) { return response; } + @Override + public DropTableResponse dropTable(DropTableRequest request) { + ObjectIdentifier tableId = ObjectIdentifier.of(request.getId()); + + ValidationUtil.checkArgument( + tableId.levels() == 2, "Expect 2-level table identifier but get %s", tableId); + + String location = doDropTable(tableId, true); + + DropTableResponse response = new DropTableResponse(); + response.setId(request.getId()); + response.setLocation(location); + return response; + } + @Override public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) { ObjectIdentifier tableId = ObjectIdentifier.of(request.getId()); @@ -331,7 +336,7 @@ public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) { ValidationUtil.checkArgument( tableId.levels() == 2, "Expect 2-level table identifier but get %s", tableId); - String location = doDropTable(tableId); + String location = doDropTable(tableId, false); DeregisterTableResponse response = new DeregisterTableResponse(); response.setId(request.getId()); @@ -537,7 +542,7 @@ protected List doListTables(String db) { } } - protected String doDropTable(ObjectIdentifier id) { + protected String doDropTable(ObjectIdentifier id, boolean deleteData) { String db = id.levelAtListPos(0).toLowerCase(); String tableName = id.levelAtListPos(1).toLowerCase(); @@ -553,11 +558,9 @@ protected String doDropTable(ObjectIdentifier id) { Hive2Util.validateLanceTable(hmsTable.get()); String location = hmsTable.get().getSd().getLocation(); - final boolean deleteData = true; - final boolean ignoreUnknownTable = true; clientPool.run( client -> { - client.dropTable(db, tableName, deleteData, ignoreUnknownTable); + client.dropTable(db, tableName, deleteData, true /* ignoreUnknownTable */); return null; }); diff --git a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Namespace.java b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Namespace.java index fcbaf85..15f02bb 100644 --- a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Namespace.java +++ b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Namespace.java @@ -35,6 +35,8 @@ import org.lance.namespace.model.DescribeTableResponse; import org.lance.namespace.model.DropNamespaceRequest; import org.lance.namespace.model.DropNamespaceResponse; +import org.lance.namespace.model.DropTableRequest; +import org.lance.namespace.model.DropTableResponse; import org.lance.namespace.model.ListNamespacesRequest; import org.lance.namespace.model.ListNamespacesResponse; import org.lance.namespace.model.ListTablesRequest; @@ -329,6 +331,21 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) { return response; } + @Override + public DropTableResponse dropTable(DropTableRequest request) { + ObjectIdentifier tableId = ObjectIdentifier.of(request.getId()); + + ValidationUtil.checkArgument( + tableId.levels() == 3, "Expect 3-level table identifier but get %s", tableId); + + String location = doDropTable(tableId, true); + + DropTableResponse response = new DropTableResponse(); + response.setId(request.getId()); + response.setLocation(location); + return response; + } + @Override public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) { ObjectIdentifier tableId = ObjectIdentifier.of(request.getId()); @@ -336,7 +353,7 @@ public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) { ValidationUtil.checkArgument( tableId.levels() == 3, "Expect 3-level table identifier but get %s", tableId); - String location = doDropTable(tableId); + String location = doDropTable(tableId, false); DeregisterTableResponse response = new DeregisterTableResponse(); response.setId(request.getId()); @@ -581,7 +598,7 @@ protected List doListTables(String catalog, String db) { } } - protected String doDropTable(ObjectIdentifier id) { + protected String doDropTable(ObjectIdentifier id, boolean deleteData) { String catalog = id.levelAtListPos(0).toLowerCase(); String db = id.levelAtListPos(1).toLowerCase(); String tableName = id.levelAtListPos(2).toLowerCase(); @@ -596,11 +613,9 @@ protected String doDropTable(ObjectIdentifier id) { Hive3Util.validateLanceTable(hmsTable.get()); String location = hmsTable.get().getSd().getLocation(); - final boolean deleteData = true; - final boolean ignoreUnknownTable = true; clientPool.run( client -> { - client.dropTable(catalog, db, tableName, deleteData, ignoreUnknownTable); + client.dropTable(catalog, db, tableName, deleteData, true /* ignoreUnknownTable */); return null; }); diff --git a/python/src/lance_namespace_impls/hive2.py b/python/src/lance_namespace_impls/hive2.py index 47c9a2d..1e7bdcc 100644 --- a/python/src/lance_namespace_impls/hive2.py +++ b/python/src/lance_namespace_impls/hive2.py @@ -72,6 +72,8 @@ CreateNamespaceResponse, DropNamespaceRequest, DropNamespaceResponse, + DropTableRequest, + DropTableResponse, ListTablesRequest, ListTablesResponse, DeclareTableRequest, @@ -397,6 +399,31 @@ def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse logger.error(f"Failed to describe table {request.id}: {e}") raise + def drop_table(self, request: DropTableRequest) -> DropTableResponse: + """Drop a table from the Hive Metastore and delete its data.""" + try: + database, table_name = self._normalize_identifier(request.id) + + with self.client as client: + table = client.get_table(database, table_name) + + if not table.parameters: + raise ValueError(f"Table {request.id} is not a Lance table") + table_type = table.parameters.get(TABLE_TYPE_KEY, "").lower() + if table_type != LANCE_TABLE_FORMAT: + raise ValueError(f"Table {request.id} is not a Lance table") + + location = table.sd.location if table.sd else None + + client.drop_table(database, table_name, deleteData=True) + + return DropTableResponse(location=location) + except Exception as e: + if NoSuchObjectException and isinstance(e, NoSuchObjectException): + raise ValueError(f"Table {request.id} does not exist") + logger.error(f"Failed to drop table {request.id}: {e}") + raise + def deregister_table( self, request: DeregisterTableRequest ) -> DeregisterTableResponse: @@ -405,10 +432,8 @@ def deregister_table( database, table_name = self._normalize_identifier(request.id) with self.client as client: - # Get table to check if it's a Lance table table = client.get_table(database, table_name) - # Check if it's a Lance table (case insensitive) if not table.parameters: raise ValueError(f"Table {request.id} is not a Lance table") table_type = table.parameters.get(TABLE_TYPE_KEY, "").lower() @@ -417,7 +442,6 @@ def deregister_table( location = table.sd.location if table.sd else None - # Drop the table metadata only (don't delete data) client.drop_table(database, table_name, deleteData=False) return DeregisterTableResponse(location=location) diff --git a/python/src/lance_namespace_impls/hive3.py b/python/src/lance_namespace_impls/hive3.py index 32c9d6f..2680fc3 100644 --- a/python/src/lance_namespace_impls/hive3.py +++ b/python/src/lance_namespace_impls/hive3.py @@ -74,6 +74,8 @@ CreateNamespaceResponse, DropNamespaceRequest, DropNamespaceResponse, + DropTableRequest, + DropTableResponse, ListTablesRequest, ListTablesResponse, DeclareTableRequest, @@ -477,6 +479,32 @@ def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse logger.error(f"Failed to describe table {request.id}: {e}") raise + def drop_table(self, request: DropTableRequest) -> DropTableResponse: + """Drop a table and delete its data.""" + try: + catalog, database, table_name = self._normalize_identifier(request.id) + + with self.client as client: + table = client.get_table(database, table_name) + + if not table.parameters: + raise ValueError(f"Table {request.id} is not a Lance table") + table_type = table.parameters.get(TABLE_TYPE_KEY, "").lower() + if table_type != LANCE_TABLE_FORMAT: + raise ValueError(f"Table {request.id} is not a Lance table") + + location = table.sd.location if table.sd else None + + client.drop_table(database, table_name, deleteData=True) + + return DropTableResponse(location=location) + + except Exception as e: + if NoSuchObjectException and isinstance(e, NoSuchObjectException): + raise ValueError(f"Table {request.id} does not exist") + logger.error(f"Failed to drop table {request.id}: {e}") + raise + def deregister_table( self, request: DeregisterTableRequest ) -> DeregisterTableResponse: From aa3bd0d16f6e2b3e9b2dd93a734f733cd30e647d Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Thu, 5 Feb 2026 17:05:45 -0800 Subject: [PATCH 3/4] update impl spec --- docs/src/hive2.md | 18 ++++++++++++++++++ docs/src/hive3.md | 18 ++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/docs/src/hive2.md b/docs/src/hive2.md index 0b116c5..5d21282 100644 --- a/docs/src/hive2.md +++ b/docs/src/hive2.md @@ -164,6 +164,24 @@ If the table is not a Lance table, return error code `13` (InvalidInput). If the HMS connection fails, return error code `17` (ServiceUnavailable). +### DropTable + +Removes a Lance table from HMS and deletes the underlying data. + +The implementation: + +1. Parse the table identifier +2. Retrieve the Table object and validate it is a Lance table +3. Drop the table from HMS with `deleteData=true`, which removes both the metadata and the underlying Lance table data + +**Error Handling:** + +If the table does not exist, return error code `4` (TableNotFound). + +If the table is not a Lance table, return error code `13` (InvalidInput). + +If the HMS connection fails, return error code `17` (ServiceUnavailable). + ### DeregisterTable Removes a Lance table registration from HMS without deleting the underlying data. diff --git a/docs/src/hive3.md b/docs/src/hive3.md index 382acca..58c7bf7 100644 --- a/docs/src/hive3.md +++ b/docs/src/hive3.md @@ -171,6 +171,24 @@ If the table is not a Lance table, return error code `13` (InvalidInput). If the HMS connection fails, return error code `17` (ServiceUnavailable). +### DropTable + +Removes a Lance table from HMS and deletes the underlying data. + +The implementation: + +1. Parse the table identifier +2. Retrieve the Table object and validate it is a Lance table +3. Drop the table from HMS with `deleteData=true`, which removes both the metadata and the underlying Lance table data + +**Error Handling:** + +If the table does not exist, return error code `4` (TableNotFound). + +If the table is not a Lance table, return error code `13` (InvalidInput). + +If the HMS connection fails, return error code `17` (ServiceUnavailable). + ### DeregisterTable Removes a Lance table registration from HMS without deleting the underlying data. From f36eaf2b459f9238767ec36774be5dedd2b4e3b3 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Thu, 5 Feb 2026 17:07:03 -0800 Subject: [PATCH 4/4] fix --- docs/src/hive3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/hive3.md b/docs/src/hive3.md index 58c7bf7..94ad14e 100644 --- a/docs/src/hive3.md +++ b/docs/src/hive3.md @@ -4,7 +4,7 @@ This document describes how the Hive 3.x MetaStore implements the Lance Namespac ## Background -Apache Hive MetaStore (HMS) is a centralized metadata repository for Apache Hive that stores schema and partition information for Hive tables. Hive 3.x introduces a 3-level namespace hierarchy (catalog.database.table) with an additional catalog level. For details on HMS 3.x, see the [HMS AdminManual 3.x](https://hive.apache.org/docs/latest/adminmanual-metastore-3-0-administration_75978150/). +Apache Hive MetaStore (HMS) is a centralized metadata repository for Apache Hive that stores schema and partition information for Hive tables. Hive 3+.x introduces a 3-level namespace hierarchy (catalog.database.table) with an additional catalog level. For details on HMS 3+.x, see the [HMS AdminManual 3.x](https://hive.apache.org/docs/latest/adminmanual-metastore-3-0-administration_75978150/). ## Namespace Implementation Configuration Properties