Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions docs/src/hive2.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,24 @@ If the table is not a Lance table, return error code `13` (InvalidInput).

If the HMS connection fails, return error code `17` (ServiceUnavailable).

### DropTable

Removes a Lance table from HMS and deletes the underlying data.

The implementation:

1. Parse the table identifier
2. Retrieve the Table object and validate it is a Lance table
3. Drop the table from HMS with `deleteData=true`, which removes both the metadata and the underlying Lance table data

**Error Handling:**

If the table does not exist, return error code `4` (TableNotFound).

If the table is not a Lance table, return error code `13` (InvalidInput).

If the HMS connection fails, return error code `17` (ServiceUnavailable).

### DeregisterTable

Removes a Lance table registration from HMS without deleting the underlying data.
Expand Down
20 changes: 19 additions & 1 deletion docs/src/hive3.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ This document describes how the Hive 3.x MetaStore implements the Lance Namespac

## Background

Apache Hive MetaStore (HMS) is a centralized metadata repository for Apache Hive that stores schema and partition information for Hive tables. Hive 3.x introduces a 3-level namespace hierarchy (catalog.database.table) with an additional catalog level. For details on HMS 3.x, see the [HMS AdminManual 3.x](https://hive.apache.org/docs/latest/adminmanual-metastore-3-0-administration_75978150/).
Apache Hive MetaStore (HMS) is a centralized metadata repository for Apache Hive that stores schema and partition information for Hive tables. Hive 3+.x introduces a 3-level namespace hierarchy (catalog.database.table) with an additional catalog level. For details on HMS 3+.x, see the [HMS AdminManual 3.x](https://hive.apache.org/docs/latest/adminmanual-metastore-3-0-administration_75978150/).

## Namespace Implementation Configuration Properties

Expand Down Expand Up @@ -171,6 +171,24 @@ If the table is not a Lance table, return error code `13` (InvalidInput).

If the HMS connection fails, return error code `17` (ServiceUnavailable).

### DropTable

Removes a Lance table from HMS and deletes the underlying data.

The implementation:

1. Parse the table identifier
2. Retrieve the Table object and validate it is a Lance table
3. Drop the table from HMS with `deleteData=true`, which removes both the metadata and the underlying Lance table data

**Error Handling:**

If the table does not exist, return error code `4` (TableNotFound).

If the table is not a Lance table, return error code `13` (InvalidInput).

If the HMS connection fails, return error code `17` (ServiceUnavailable).

### DeregisterTable

Removes a Lance table registration from HMS without deleting the underlying data.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -266,16 +266,6 @@ public void tableExists(TableExistsRequest request) {
Hive2Util.validateLanceTable(hmsTable.get());
}

@Override
public DropTableResponse dropTable(DropTableRequest request) {
ObjectIdentifier tableId = ObjectIdentifier.of(request.getId());
String location = doDropTable(tableId);
DropTableResponse response = new DropTableResponse();
response.setId(request.getId());
response.setLocation(location);
return response;
}

@Override
public DescribeTableResponse describeTable(DescribeTableRequest request) {
if (Boolean.TRUE.equals(request.getLoadDetailedMetadata())) {
Expand Down Expand Up @@ -324,14 +314,29 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) {
return response;
}

@Override
public DropTableResponse dropTable(DropTableRequest request) {
ObjectIdentifier tableId = ObjectIdentifier.of(request.getId());

ValidationUtil.checkArgument(
tableId.levels() == 2, "Expect 2-level table identifier but get %s", tableId);

String location = doDropTable(tableId, true);

DropTableResponse response = new DropTableResponse();
response.setId(request.getId());
response.setLocation(location);
return response;
}

@Override
public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) {
ObjectIdentifier tableId = ObjectIdentifier.of(request.getId());

ValidationUtil.checkArgument(
tableId.levels() == 2, "Expect 2-level table identifier but get %s", tableId);

String location = doDropTable(tableId);
String location = doDropTable(tableId, false);

DeregisterTableResponse response = new DeregisterTableResponse();
response.setId(request.getId());
Expand Down Expand Up @@ -537,7 +542,7 @@ protected List<String> doListTables(String db) {
}
}

protected String doDropTable(ObjectIdentifier id) {
protected String doDropTable(ObjectIdentifier id, boolean deleteData) {
String db = id.levelAtListPos(0).toLowerCase();
String tableName = id.levelAtListPos(1).toLowerCase();

Expand All @@ -555,7 +560,7 @@ protected String doDropTable(ObjectIdentifier id) {

clientPool.run(
client -> {
client.dropTable(db, tableName, false, true);
client.dropTable(db, tableName, deleteData, true /* ignoreUnknownTable */);
return null;
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
import org.lance.namespace.model.DescribeTableResponse;
import org.lance.namespace.model.DropNamespaceRequest;
import org.lance.namespace.model.DropNamespaceResponse;
import org.lance.namespace.model.DropTableRequest;
import org.lance.namespace.model.DropTableResponse;
import org.lance.namespace.model.ListNamespacesRequest;
import org.lance.namespace.model.ListNamespacesResponse;
import org.lance.namespace.model.ListTablesRequest;
Expand Down Expand Up @@ -329,14 +331,29 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) {
return response;
}

@Override
public DropTableResponse dropTable(DropTableRequest request) {
ObjectIdentifier tableId = ObjectIdentifier.of(request.getId());

ValidationUtil.checkArgument(
tableId.levels() == 3, "Expect 3-level table identifier but get %s", tableId);

String location = doDropTable(tableId, true);

DropTableResponse response = new DropTableResponse();
response.setId(request.getId());
response.setLocation(location);
return response;
}

@Override
public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) {
ObjectIdentifier tableId = ObjectIdentifier.of(request.getId());

ValidationUtil.checkArgument(
tableId.levels() == 3, "Expect 3-level table identifier but get %s", tableId);

String location = doDropTable(tableId);
String location = doDropTable(tableId, false);

DeregisterTableResponse response = new DeregisterTableResponse();
response.setId(request.getId());
Expand Down Expand Up @@ -581,7 +598,7 @@ protected List<String> doListTables(String catalog, String db) {
}
}

protected String doDropTable(ObjectIdentifier id) {
protected String doDropTable(ObjectIdentifier id, boolean deleteData) {
String catalog = id.levelAtListPos(0).toLowerCase();
String db = id.levelAtListPos(1).toLowerCase();
String tableName = id.levelAtListPos(2).toLowerCase();
Expand All @@ -598,7 +615,7 @@ protected String doDropTable(ObjectIdentifier id) {

clientPool.run(
client -> {
client.dropTable(catalog, db, tableName, false, true);
client.dropTable(catalog, db, tableName, deleteData, true /* ignoreUnknownTable */);
return null;
});

Expand Down
30 changes: 27 additions & 3 deletions python/src/lance_namespace_impls/hive2.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@
CreateNamespaceResponse,
DropNamespaceRequest,
DropNamespaceResponse,
DropTableRequest,
DropTableResponse,
ListTablesRequest,
ListTablesResponse,
DeclareTableRequest,
Expand Down Expand Up @@ -397,6 +399,31 @@ def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse
logger.error(f"Failed to describe table {request.id}: {e}")
raise

def drop_table(self, request: DropTableRequest) -> DropTableResponse:
"""Drop a table from the Hive Metastore and delete its data."""
try:
database, table_name = self._normalize_identifier(request.id)

with self.client as client:
table = client.get_table(database, table_name)

if not table.parameters:
raise ValueError(f"Table {request.id} is not a Lance table")
table_type = table.parameters.get(TABLE_TYPE_KEY, "").lower()
if table_type != LANCE_TABLE_FORMAT:
raise ValueError(f"Table {request.id} is not a Lance table")

location = table.sd.location if table.sd else None

client.drop_table(database, table_name, deleteData=True)

return DropTableResponse(location=location)
except Exception as e:
if NoSuchObjectException and isinstance(e, NoSuchObjectException):
raise ValueError(f"Table {request.id} does not exist")
logger.error(f"Failed to drop table {request.id}: {e}")
raise

def deregister_table(
self, request: DeregisterTableRequest
) -> DeregisterTableResponse:
Expand All @@ -405,10 +432,8 @@ def deregister_table(
database, table_name = self._normalize_identifier(request.id)

with self.client as client:
# Get table to check if it's a Lance table
table = client.get_table(database, table_name)

# Check if it's a Lance table (case insensitive)
if not table.parameters:
raise ValueError(f"Table {request.id} is not a Lance table")
table_type = table.parameters.get(TABLE_TYPE_KEY, "").lower()
Expand All @@ -417,7 +442,6 @@ def deregister_table(

location = table.sd.location if table.sd else None

# Drop the table metadata only (don't delete data)
client.drop_table(database, table_name, deleteData=False)

return DeregisterTableResponse(location=location)
Expand Down
28 changes: 28 additions & 0 deletions python/src/lance_namespace_impls/hive3.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@
CreateNamespaceResponse,
DropNamespaceRequest,
DropNamespaceResponse,
DropTableRequest,
DropTableResponse,
ListTablesRequest,
ListTablesResponse,
DeclareTableRequest,
Expand Down Expand Up @@ -477,6 +479,32 @@ def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse
logger.error(f"Failed to describe table {request.id}: {e}")
raise

def drop_table(self, request: DropTableRequest) -> DropTableResponse:
"""Drop a table and delete its data."""
try:
catalog, database, table_name = self._normalize_identifier(request.id)

with self.client as client:
table = client.get_table(database, table_name)

if not table.parameters:
raise ValueError(f"Table {request.id} is not a Lance table")
table_type = table.parameters.get(TABLE_TYPE_KEY, "").lower()
if table_type != LANCE_TABLE_FORMAT:
raise ValueError(f"Table {request.id} is not a Lance table")

location = table.sd.location if table.sd else None

client.drop_table(database, table_name, deleteData=True)

return DropTableResponse(location=location)

except Exception as e:
if NoSuchObjectException and isinstance(e, NoSuchObjectException):
raise ValueError(f"Table {request.id} does not exist")
logger.error(f"Failed to drop table {request.id}: {e}")
raise

def deregister_table(
self, request: DeregisterTableRequest
) -> DeregisterTableResponse:
Expand Down