diff --git a/.github/workflows/java-integ-glue.yml b/.github/workflows/java-integ-glue.yml
index 9d7364a..a5f6efd 100644
--- a/.github/workflows/java-integ-glue.yml
+++ b/.github/workflows/java-integ-glue.yml
@@ -36,6 +36,7 @@ concurrency:
jobs:
integration-test:
+ if: github.event_name != 'pull_request_target' || github.event.pull_request.head.repo.full_name == github.repository
runs-on: ubuntu-24.04
timeout-minutes: 30
steps:
diff --git a/.github/workflows/java-integ-unity.yml b/.github/workflows/java-integ-unity.yml
index ed4d054..f067c08 100644
--- a/.github/workflows/java-integ-unity.yml
+++ b/.github/workflows/java-integ-unity.yml
@@ -55,8 +55,8 @@ jobs:
run: |
echo "Waiting for Unity Catalog to be ready..."
timeout 120 bash -c '
- until curl -sf http://localhost:8080/api/2.1/unity-catalog/catalogs > /dev/null 2>&1; do
- echo "Waiting for Unity Catalog API..."
+ until [ "$(docker inspect --format="{{.State.Health.Status}}" unity-catalog 2>/dev/null)" = "healthy" ]; do
+ echo "Waiting for Unity Catalog container health..."
sleep 5
done
' || {
@@ -64,6 +64,7 @@ jobs:
docker compose -f docker/unity/docker-compose.yml logs
exit 1
}
+ curl -sf http://localhost:8080/api/2.1/unity-catalog/catalogs > /dev/null
echo "Unity Catalog is ready"
- name: Create test catalog
run: |
diff --git a/.github/workflows/java-publish.yml b/.github/workflows/java-publish.yml
index 46304e3..07e05a1 100644
--- a/.github/workflows/java-publish.yml
+++ b/.github/workflows/java-publish.yml
@@ -96,6 +96,17 @@ jobs:
"lance-namespace-glue"
"lance-namespace-hive2"
"lance-namespace-hive3"
+ "lance-namespace-iceberg"
+ "lance-namespace-unity"
+ "lance-namespace-polaris"
+ )
+
+ # Implementation artifacts also publish an attached shaded bundle classifier.
+ BUNDLE_ARTIFACTS=(
+ "lance-namespace-glue"
+ "lance-namespace-hive2"
+ "lance-namespace-hive3"
+ "lance-namespace-iceberg"
"lance-namespace-unity"
"lance-namespace-polaris"
)
@@ -122,6 +133,17 @@ jobs:
fi
done
+ for ARTIFACT_ID in "${BUNDLE_ARTIFACTS[@]}"; do
+ URL="https://repo1.maven.org/maven2/org/lance/${ARTIFACT_ID}/${VERSION}/${ARTIFACT_ID}-${VERSION}-bundle.jar"
+
+ if curl --head --silent --fail "$URL" > /dev/null 2>&1; then
+ echo "OK ${ARTIFACT_ID} bundle is available"
+ else
+ echo "X ${ARTIFACT_ID} bundle is not yet available"
+ ALL_AVAILABLE=false
+ fi
+ done
+
if [ "$ALL_AVAILABLE" = true ]; then
echo ""
echo "All artifacts are now available in Maven Central!"
diff --git a/.github/workflows/python-integ-glue.yml b/.github/workflows/python-integ-glue.yml
index c83f233..ed0f086 100644
--- a/.github/workflows/python-integ-glue.yml
+++ b/.github/workflows/python-integ-glue.yml
@@ -40,6 +40,7 @@ concurrency:
jobs:
integration-test:
+ if: github.event_name != 'pull_request_target' || github.event.pull_request.head.repo.full_name == github.repository
runs-on: ubuntu-24.04
timeout-minutes: 30
steps:
diff --git a/docker/unity/docker-compose.yml b/docker/unity/docker-compose.yml
index 6e72cfe..322f5a1 100644
--- a/docker/unity/docker-compose.yml
+++ b/docker/unity/docker-compose.yml
@@ -15,7 +15,7 @@ services:
networks:
- unity-network
healthcheck:
- test: ["CMD", "curl", "-f", "http://localhost:8080/api/2.1/unity-catalog/catalogs"]
+ test: ["CMD", "wget", "-q", "-O", "/dev/null", "http://localhost:8080/api/2.1/unity-catalog/catalogs"]
interval: 10s
timeout: 10s
retries: 10
diff --git a/docs/src/glue.md b/docs/src/glue.md
index aee4da6..eae1fe9 100644
--- a/docs/src/glue.md
+++ b/docs/src/glue.md
@@ -78,7 +78,7 @@ The **table location** is stored in the [`StorageDescriptor.Location`](https://d
## Lance Table Identification
-A table in AWS Glue is identified as a Lance table when it meets the following criteria: the `TableType` is `EXTERNAL_TABLE`, and the `Parameters` map contains a key `table_type` with value `lance` (case insensitive). The `StorageDescriptor.Location` must point to a valid Lance table root directory.
+A table in AWS Glue is identified as a Lance table when it meets the following criteria: the `TableType` is `EXTERNAL_TABLE`, and the `Parameters` map contains a key `table_type` with value `lance` (case insensitive). The `StorageDescriptor.Location` may be declared before a Lance dataset exists; storage is checked only for `include_declared=false` listing or `check_declared=true` describe requests.
## Basic Operations
@@ -191,9 +191,10 @@ The implementation:
- `DatabaseName`: the database name
- `TableInput.Name`: the table name
- `TableInput.TableType`: `EXTERNAL_TABLE`
- - `TableInput.Parameters`: include `table_type=lance` and other properties
+ - `TableInput.Parameters`: request `properties` merged with implementation markers such as `table_type=lance`
- `TableInput.StorageDescriptor.Location`: the specified table location
4. POST the CreateTable request to Glue
+5. Return the declared table location, catalog table properties, optional storage options, and `managed_versioning=false`
**Error Handling:**
@@ -215,7 +216,8 @@ The implementation:
2. Verify the namespace exists using [GetDatabase](https://docs.aws.amazon.com/glue/latest/webapi/API_GetDatabase.html)
3. Use [GetTables](https://docs.aws.amazon.com/glue/latest/webapi/API_GetTables.html) with `CatalogId` and `DatabaseName`
4. Filter tables where `Parameters.table_type=lance` (case insensitive)
-5. Sort the results and apply pagination using `NextToken`
+5. If `include_declared=false`, only include catalog entries whose `StorageDescriptor.Location` can be opened as a Lance dataset
+6. Sort the results and apply pagination using `NextToken`
**Error Handling:**
@@ -227,14 +229,15 @@ If the Glue service is unavailable, return error code `17` (ServiceUnavailable).
### DescribeTable
-Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. When `load_detailed_metadata=false`, only the table location and storage_options are returned; other fields (version, table_uri, schema, stats) are null.
+Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. The response includes the table location, catalog table properties, `managed_versioning=false`, and any implementation storage options that should be returned to the caller.
The implementation:
1. Parse the table identifier to extract catalog, database, and table name
2. Use [GetTable](https://docs.aws.amazon.com/glue/latest/webapi/API_GetTable.html) with `CatalogId`, `DatabaseName`, and `Name`
3. Validate that the table is a Lance table (check `Parameters.table_type=lance`)
-4. Return the table location from `StorageDescriptor.Location` and storage_options from `Parameters`
+4. Return the table location from `StorageDescriptor.Location` and catalog properties from `Parameters`
+5. If `check_declared=true`, set `is_only_declared=true` when the location cannot be opened as a Lance dataset
**Error Handling:**
@@ -255,7 +258,8 @@ The implementation:
1. Parse the table identifier to extract catalog, database, and table name
2. Use [GetTable](https://docs.aws.amazon.com/glue/latest/webapi/API_GetTable.html) to retrieve and validate the table is a Lance table
3. Use [DeleteTable](https://docs.aws.amazon.com/glue/latest/webapi/API_DeleteTable.html) with `CatalogId`, `DatabaseName`, and `Name`
-4. The underlying Lance table data at `StorageDescriptor.Location` is not deleted
+4. Return the table id, location, and catalog properties
+5. The underlying Lance table data at `StorageDescriptor.Location` is not deleted
**Error Handling:**
diff --git a/docs/src/hive2.md b/docs/src/hive2.md
index 5d21282..8deda59 100644
--- a/docs/src/hive2.md
+++ b/docs/src/hive2.md
@@ -38,7 +38,7 @@ The **table location** is stored in the `location` field of the table's `storage
## Lance Table Identification
-A table in HMS is identified as a Lance table when it meets the following criteria: the `tableType` is `EXTERNAL_TABLE`, and the `parameters` map contains a key `table_type` with value `lance` (case insensitive). The `location` in `storageDescriptor` must point to a valid Lance table root directory.
+A table in HMS is identified as a Lance table when it meets the following criteria: the `tableType` is `EXTERNAL_TABLE`, and the `parameters` map contains a key `table_type` with value `lance` (case insensitive). The `location` in `storageDescriptor` may be declared before a Lance dataset exists; storage is checked only for `include_declared=false` listing or `check_declared=true` describe requests.
## Basic Operations
@@ -116,8 +116,9 @@ The implementation:
2. Verify the parent namespace exists
3. Create an HMS Table object with `tableType=EXTERNAL_TABLE`
4. Set the storage descriptor with the specified or default location. When location is not specified, it defaults to `{root}/{database}.db/{table}`
-5. Add `table_type=lance` to the table parameters
+5. Merge request `properties` with required table parameters such as `table_type=lance` and `managed_by=storage`
6. Register the table in HMS
+7. Return the declared table location, table parameters, and `managed_versioning=false`
**Error Handling:**
@@ -137,7 +138,8 @@ The implementation:
2. Verify the namespace exists
3. Retrieve all tables in the database
4. Filter tables where `parameters.table_type=lance`
-5. Sort the results and apply pagination
+5. If `include_declared=false`, only include catalog entries whose storage descriptor location can be opened as a Lance dataset
+6. Sort the results and apply pagination
**Error Handling:**
@@ -147,14 +149,15 @@ If the HMS connection fails, return error code `17` (ServiceUnavailable).
### DescribeTable
-Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. When `load_detailed_metadata=false`, only the table location is returned; other fields (version, table_uri, schema, stats) are null.
+Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. The response includes the table location, HMS table parameters as `properties`, and `managed_versioning=false`.
The implementation:
1. Parse the table identifier
2. Retrieve the Table object from HMS
3. Validate that it is a Lance table (check `table_type=lance`)
-4. Return the table location from `storageDescriptor.location`
+4. Return the table location from `storageDescriptor.location` and the table parameters as `properties`
+5. If `check_declared=true`, set `is_only_declared=true` when the location cannot be opened as a Lance dataset
**Error Handling:**
@@ -191,6 +194,7 @@ The implementation:
1. Parse the table identifier
2. Retrieve the Table object and validate it is a Lance table
3. Drop the table from HMS with `deleteData=false`
+4. Return the table id, location, and table parameters
**Error Handling:**
diff --git a/docs/src/hive3.md b/docs/src/hive3.md
index 94ad14e..aa553a6 100644
--- a/docs/src/hive3.md
+++ b/docs/src/hive3.md
@@ -38,7 +38,7 @@ The **table location** is stored in the [`location`](https://github.com/apache/h
## Lance Table Identification
-A table in HMS is identified as a Lance table when it meets the following criteria: the `tableType` is `EXTERNAL_TABLE`, and the `parameters` map contains a key `table_type` with value `lance` (case insensitive). The `location` in `storageDescriptor` must point to a valid Lance table root directory.
+A table in HMS is identified as a Lance table when it meets the following criteria: the `tableType` is `EXTERNAL_TABLE`, and the `parameters` map contains a key `table_type` with value `lance` (case insensitive). The `location` in `storageDescriptor` may be declared before a Lance dataset exists; storage is checked only for `include_declared=false` listing or `check_declared=true` describe requests.
## Basic Operations
@@ -123,8 +123,9 @@ The implementation:
2. Verify the parent namespace exists
3. Create an HMS Table object with `tableType=EXTERNAL_TABLE`
4. Set the storage descriptor with the specified or default location. When location is not specified, it defaults to `{root}/{database}.db/{table}` for the default `hive` catalog (hive2-compatible), or `{root}/{catalog}/{database}.db/{table}` for other catalogs
-5. Add `table_type=lance` to the table parameters
+5. Merge request `properties` with required table parameters such as `table_type=lance` and `managed_by=storage`
6. Register the table in HMS
+7. Return the declared table location, table parameters, and `managed_versioning=false`
**Error Handling:**
@@ -144,7 +145,8 @@ The implementation:
2. Verify the namespace exists
3. Retrieve all tables in the database
4. Filter tables where `parameters.table_type=lance`
-5. Sort the results and apply pagination
+5. If `include_declared=false`, only include catalog entries whose storage descriptor location can be opened as a Lance dataset
+6. Sort the results and apply pagination
**Error Handling:**
@@ -154,14 +156,15 @@ If the HMS connection fails, return error code `17` (ServiceUnavailable).
### DescribeTable
-Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. When `load_detailed_metadata=false`, only the table location is returned; other fields (version, table_uri, schema, stats) are null.
+Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. The response includes the table location, HMS table parameters as `properties`, and `managed_versioning=false`.
The implementation:
1. Parse the table identifier
2. Retrieve the Table object from HMS
3. Validate that it is a Lance table (check `table_type=lance`)
-4. Return the table location from `storageDescriptor.location`
+4. Return the table location from `storageDescriptor.location` and the table parameters as `properties`
+5. If `check_declared=true`, set `is_only_declared=true` when the location cannot be opened as a Lance dataset
**Error Handling:**
@@ -198,6 +201,7 @@ The implementation:
1. Parse the table identifier
2. Retrieve the Table object and validate it is a Lance table
3. Drop the table from HMS with `deleteData=false`
+4. Return the table id, location, and table parameters
**Error Handling:**
diff --git a/docs/src/iceberg.md b/docs/src/iceberg.md
index 54904d1..8b5697b 100644
--- a/docs/src/iceberg.md
+++ b/docs/src/iceberg.md
@@ -50,7 +50,7 @@ The **table location** is stored in the `location` field of the Iceberg table me
## Lance Table Identification
-A table in Iceberg REST Catalog is identified as a Lance table when the `properties` map contains a key `table_type` with value `lance` (case insensitive). The `location` must point to a valid Lance table root directory. The Iceberg table itself serves as a metadata wrapper, with the actual data stored in Lance format.
+A table in Iceberg REST Catalog is identified as a Lance table when the `properties` map contains a key `table_type` with value `lance` (case insensitive). The `location` may be declared before a Lance dataset exists. The Iceberg table itself serves as a metadata wrapper, with the actual data stored in Lance format once the table is materialized.
## Basic Operations
@@ -146,7 +146,7 @@ The implementation:
- `schema`: a dummy Iceberg schema with a single nullable string column `dummy`
- `properties`: table properties including `table_type=lance`
6. POST to `/v1/{prefix}/namespaces/{namespace}/tables`
-7. Return the declared table location
+7. Return the declared table location, catalog table properties, and `managed_versioning=false`
**Error Handling:**
@@ -167,7 +167,8 @@ The implementation:
3. Extract the namespace path from the remaining elements
4. GET `/v1/{prefix}/namespaces/{namespace}/tables`
5. For each table, load its metadata and filter tables where `properties.table_type=lance`
-6. Extract table names from the response identifiers
+6. If `include_declared=false`, only include catalog entries whose Iceberg metadata location can be opened as a Lance dataset
+7. Extract table names from the response identifiers
**Error Handling:**
@@ -177,7 +178,7 @@ If the server returns an error, return error code `18` (Internal).
### DescribeTable
-Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. When `load_detailed_metadata=false`, only the table location and storage_options are returned; other fields (version, table_uri, schema, stats) are null.
+Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. The response includes the table location, Iceberg table properties as `properties`, and `managed_versioning=false`.
The implementation:
@@ -187,7 +188,8 @@ The implementation:
4. Extract the table name from the last element
5. GET `/v1/{prefix}/namespaces/{namespace}/tables/{table}`
6. Verify the table has `table_type=lance` property
-7. Return the table location and storage_options from `properties`
+7. Return the table location and Iceberg table properties
+8. If `check_declared=true`, set `is_only_declared=true` when the location cannot be opened as a Lance dataset
**Error Handling:**
@@ -207,7 +209,8 @@ The implementation:
2. Resolve the API prefix from the warehouse config cache
3. Extract the namespace path from the middle elements
4. Extract the table name from the last element
-5. DELETE `/v1/{prefix}/namespaces/{namespace}/tables/{table}?purgeRequested=false`
+5. Load the table metadata, then DELETE `/v1/{prefix}/namespaces/{namespace}/tables/{table}?purgeRequested=false`
+6. Return the table id, location, and Iceberg table properties
**Error Handling:**
diff --git a/docs/src/polaris.md b/docs/src/polaris.md
index 9c7bd3f..2f4951c 100644
--- a/docs/src/polaris.md
+++ b/docs/src/polaris.md
@@ -48,7 +48,7 @@ The **table location** is stored in the `base-location` field of the Generic Tab
## Lance Table Identification
-A table in Polaris is identified as a Lance table when it is a Generic Table with `format` set to `lance`. The `base-location` must point to a valid Lance table root directory. The table `properties` should contain `table_type=lance` for consistency with other catalog implementations.
+A table in Polaris is identified as a Lance table when it is a Generic Table with `format` set to `lance`. The `base-location` may be declared before a Lance dataset exists. The table `properties` should contain `table_type=lance` for consistency with other catalog implementations.
## Basic Operations
@@ -140,7 +140,7 @@ The implementation:
- `doc`: optional description from properties
- `properties`: table properties including `table_type=lance`
4. POST to `/api/catalog/polaris/v1/{catalog}/namespaces/{namespace}/generic-tables`
-5. Return the created table location and properties
+5. Return the created table location, table properties, and `managed_versioning=false`
**Error Handling:**
@@ -159,7 +159,8 @@ The implementation:
1. Parse the namespace identifier to extract the catalog (first level) and namespace path
2. Validate that at least 2 levels are provided (catalog + namespace)
3. GET `/api/catalog/polaris/v1/{catalog}/namespaces/{namespace}/generic-tables`
-4. Extract table names from the response identifiers
+4. When `include_declared=true` or unset, extract table names from the response identifiers
+5. When `include_declared=false`, load each generic table and only include entries whose `base-location` can be opened as a Lance dataset
**Error Handling:**
@@ -169,7 +170,7 @@ If the server returns an error, return error code `18` (Internal).
### DescribeTable
-Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. When `load_detailed_metadata=false`, only the table location and storage_options are returned; other fields (version, table_uri, schema, stats) are null.
+Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. The response includes the table location, Polaris table properties as `properties`, and `managed_versioning=false`.
The implementation:
@@ -177,7 +178,8 @@ The implementation:
2. Validate that at least 3 levels are provided (catalog + namespace + table)
3. GET `/api/catalog/polaris/v1/{catalog}/namespaces/{namespace}/generic-tables/{table}`
4. Verify the table format is `lance`
-5. Return the table location from `base-location` and storage_options from `properties`
+5. Return the table location from `base-location` and Polaris table properties
+6. If `check_declared=true`, set `is_only_declared=true` when the location cannot be opened as a Lance dataset
**Error Handling:**
@@ -195,7 +197,8 @@ The implementation:
1. Parse the table identifier to extract catalog (first level), namespace (middle levels), and table name (last level)
2. Validate that at least 3 levels are provided (catalog + namespace + table)
-3. DELETE `/api/catalog/polaris/v1/{catalog}/namespaces/{namespace}/generic-tables/{table}`
+3. Load the generic table, then DELETE `/api/catalog/polaris/v1/{catalog}/namespaces/{namespace}/generic-tables/{table}`
+4. Return the table id, location, and Polaris table properties
**Error Handling:**
diff --git a/docs/src/unity.md b/docs/src/unity.md
index a8199dd..39bab4e 100644
--- a/docs/src/unity.md
+++ b/docs/src/unity.md
@@ -46,7 +46,7 @@ The **table location** is stored in the `storage_location` field of the Unity Ta
## Lance Table Identification
-A table in Unity Catalog is identified as a Lance table when it meets the following criteria: the `table_type` is `EXTERNAL`, and the `properties` map contains a key `table_type` with value `lance` (case insensitive). The `storage_location` must point to a valid Lance table root directory.
+A table in Unity Catalog is identified as a Lance table when it meets the following criteria: the `table_type` is `EXTERNAL`, and the `properties` map contains a key `table_type` with value `lance` (case insensitive). The `storage_location` may be declared before a Lance dataset exists; storage is checked only for `include_declared=false` listing or `check_declared=true` describe requests.
Note: Unity Catalog does not natively recognize the `LANCE` data source format, so `data_source_format` is set to `TEXT` as a generic format for external tables. The actual format is determined by the `table_type=lance` property.
@@ -135,7 +135,7 @@ The implementation:
- `storage_location`: the specified or default location
- `properties`: including `table_type=lance`
3. POST to `/tables` endpoint
-4. Return the created table location and properties
+4. Return the created table location, table properties, and `managed_versioning=false`
**Error Handling:**
@@ -154,7 +154,8 @@ The implementation:
1. Parse the namespace identifier (must be 2-level: catalog.schema)
2. GET `/tables` with catalog_name and schema_name parameters
3. Filter tables where `properties.table_type=lance`
-4. Sort the results
+4. If `include_declared=false`, only include catalog entries whose `storage_location` can be opened as a Lance dataset
+5. Sort the results
**Error Handling:**
@@ -164,14 +165,15 @@ If the server returns an error, return error code `18` (Internal).
### DescribeTable
-Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. When `load_detailed_metadata=false`, only the table location and storage_options are returned; other fields (version, table_uri, schema, stats) are null.
+Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. The response includes the table location, Unity table properties as `properties`, and `managed_versioning=false`.
The implementation:
1. Parse the table identifier (must be 3-level: catalog.schema.table)
2. GET `/tables/{catalog}.{schema}.{table}`
3. Verify the table is a Lance table (check `properties.table_type=lance`)
-4. Return the table location from `storage_location` and storage_options from `properties`
+4. Return the table location from `storage_location` and Unity table properties
+5. If `check_declared=true`, set `is_only_declared=true` when the location cannot be opened as a Lance dataset
**Error Handling:**
@@ -190,6 +192,7 @@ The implementation:
1. Parse the table identifier (must be 3-level: catalog.schema.table)
2. GET the table and verify it is a Lance table
3. DELETE `/tables/{catalog}.{schema}.{table}`
+4. Return the table id, location, and Unity table properties
**Error Handling:**
diff --git a/java/Makefile b/java/Makefile
index ac70fc4..0c178ef 100644
--- a/java/Makefile
+++ b/java/Makefile
@@ -23,6 +23,10 @@ build-glue:
./mvnw spotless:apply -pl lance-namespace-glue -am
./mvnw install -pl lance-namespace-glue -am -DskipTests
+.PHONY: bundle-glue
+bundle-glue:
+ ./mvnw package -pl lance-namespace-glue -am -DskipTests
+
.PHONY: test-glue
test-glue:
./mvnw test -pl lance-namespace-glue -Dtest="!*Integration"
@@ -129,6 +133,10 @@ build:
./mvnw spotless:apply
./mvnw install -DskipTests
+.PHONY: bundle
+bundle:
+ ./mvnw package -DskipTests
+
.PHONY: test
test:
./mvnw test
diff --git a/java/lance-namespace-glue/pom.xml b/java/lance-namespace-glue/pom.xml
index 04cd34d..daecbf2 100644
--- a/java/lance-namespace-glue/pom.xml
+++ b/java/lance-namespace-glue/pom.xml
@@ -22,6 +22,12 @@
+
+ org.lance
+ lance-namespace-impls-core
+ ${project.version}
+
+
software.amazon.awssdk
glue
@@ -93,6 +99,10 @@
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+
diff --git a/java/lance-namespace-glue/src/main/java/org/lance/namespace/glue/GlueNamespace.java b/java/lance-namespace-glue/src/main/java/org/lance/namespace/glue/GlueNamespace.java
index 82af17e..66a7cc8 100644
--- a/java/lance-namespace-glue/src/main/java/org/lance/namespace/glue/GlueNamespace.java
+++ b/java/lance-namespace-glue/src/main/java/org/lance/namespace/glue/GlueNamespace.java
@@ -31,11 +31,15 @@
import org.lance.namespace.model.DescribeTableResponse;
import org.lance.namespace.model.DropNamespaceRequest;
import org.lance.namespace.model.DropNamespaceResponse;
+import org.lance.namespace.model.DropTableRequest;
+import org.lance.namespace.model.DropTableResponse;
import org.lance.namespace.model.JsonArrowSchema;
import org.lance.namespace.model.ListNamespacesRequest;
import org.lance.namespace.model.ListNamespacesResponse;
import org.lance.namespace.model.ListTablesRequest;
import org.lance.namespace.model.ListTablesResponse;
+import org.lance.namespace.model.TableExistsRequest;
+import org.lance.namespace.util.LanceTableUtil;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;
@@ -54,7 +58,6 @@
import software.amazon.awssdk.services.glue.model.GetDatabasesRequest;
import software.amazon.awssdk.services.glue.model.GetDatabasesResponse;
import software.amazon.awssdk.services.glue.model.GetTableRequest;
-import software.amazon.awssdk.services.glue.model.GetTableVersionRequest;
import software.amazon.awssdk.services.glue.model.GetTablesRequest;
import software.amazon.awssdk.services.glue.model.GetTablesResponse;
import software.amazon.awssdk.services.glue.model.GlueException;
@@ -221,7 +224,10 @@ public ListTablesResponse listTables(ListTablesRequest request) {
GetTablesResponse response =
glueClient.getTables(
listRequest.maxResults(fetchSize).nextToken(glueNextToken).build());
- response.tableList().stream().filter(this::isLanceTable).forEach(t -> tables.add(t.name()));
+ response.tableList().stream()
+ .filter(this::isLanceTable)
+ .filter(t -> shouldIncludeTable(t, request.getIncludeDeclared()))
+ .forEach(t -> tables.add(t.name()));
glueNextToken = response.nextToken();
remaining = pageSize - tables.size();
} while (glueNextToken != null && remaining > 0);
@@ -245,7 +251,7 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) {
String namespaceName = request.getId().get(0);
String tableName = request.getId().get(1);
- Table table = getGlueTableAtVersion(namespaceName, tableName, request.getVersion());
+ Table table = getGlueTable(namespaceName, tableName);
ensureLanceTable(table);
DescribeTableResponse response = new DescribeTableResponse();
@@ -253,9 +259,25 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) {
response.setLocation(table.storageDescriptor().location());
}
response.setStorageOptions(config.getStorageOptions());
+ response.setProperties(table.parameters());
+ response.setManagedVersioning(false);
+ if (Boolean.TRUE.equals(request.getCheckDeclared())) {
+ response.setIsOnlyDeclared(
+ LanceTableUtil.isOnlyDeclared(response.getLocation(), config.getStorageOptions()));
+ }
return response;
}
+ @Override
+ public void tableExists(TableExistsRequest request) {
+ validateTableId(request.getId());
+ String namespaceName = request.getId().get(0);
+ String tableName = request.getId().get(1);
+
+ Table table = getGlueTable(namespaceName, tableName);
+ ensureLanceTable(table);
+ }
+
@Override
public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) {
validateTableId(request.getId());
@@ -286,6 +308,39 @@ public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) {
}
}
+ @Override
+ public DropTableResponse dropTable(DropTableRequest request) {
+ validateTableId(request.getId());
+ String namespaceName = request.getId().get(0);
+ String tableName = request.getId().get(1);
+
+ try {
+ Table table = getGlueTable(namespaceName, tableName);
+ ensureLanceTable(table);
+ String location =
+ table.storageDescriptor() != null ? table.storageDescriptor().location() : null;
+
+ deleteGlueTable(namespaceName, tableName, true);
+ if (location != null && !location.isEmpty()) {
+ safeDropDataset(location);
+ }
+
+ DropTableResponse response = new DropTableResponse();
+ response.setId(request.getId());
+ response.setLocation(location);
+ if (table.parameters() != null && !table.parameters().isEmpty()) {
+ response.setProperties(table.parameters());
+ }
+ return response;
+ } catch (EntityNotFoundException e) {
+ throw GlueToLanceErrorConverter.notFound(
+ e, "Glue table not found: %s.%s", namespaceName, tableName);
+ } catch (GlueException e) {
+ throw GlueToLanceErrorConverter.serverError(
+ e, "Failed to drop table: %s.%s", namespaceName, tableName);
+ }
+ }
+
@Override
public DeclareTableResponse declareTable(DeclareTableRequest request) {
validateTableId(request.getId());
@@ -298,9 +353,11 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) {
}
try {
- Map params = Maps.newHashMap();
- params.put(TABLE_TYPE_PROP, LANCE_TABLE_TYPE_VALUE);
- params.put(MANAGED_BY_PROP, STORAGE_VALUE);
+ Map params =
+ LanceTableUtil.mergeTableProperties(
+ request.getProperties(),
+ ImmutableMap.of(
+ TABLE_TYPE_PROP, LANCE_TABLE_TYPE_VALUE, MANAGED_BY_PROP, STORAGE_VALUE));
TableInput tableInput =
TableInput.builder()
@@ -320,6 +377,8 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) {
DeclareTableResponse response = new DeclareTableResponse();
response.setLocation(location);
response.setStorageOptions(config.getStorageOptions());
+ response.setProperties(params);
+ response.setManagedVersioning(false);
return response;
} catch (AlreadyExistsException e) {
throw GlueToLanceErrorConverter.tableConflict(
@@ -522,6 +581,15 @@ private boolean isLanceTable(Table table) {
return LANCE_TABLE_TYPE_VALUE.equalsIgnoreCase(table.parameters().get(TABLE_TYPE_PROP));
}
+ private boolean shouldIncludeTable(Table table, Boolean includeDeclared) {
+ if (LanceTableUtil.includeDeclared(includeDeclared)) {
+ return true;
+ }
+ String location =
+ table.storageDescriptor() != null ? table.storageDescriptor().location() : null;
+ return LanceTableUtil.hasStorageComponents(location, config.getStorageOptions());
+ }
+
private void ensureLanceTable(Table table) {
if (!isLanceTable(table)) {
throw new TableNotFoundException(
@@ -586,37 +654,6 @@ private void deleteGlueTable(String namespaceName, String tableName, boolean fai
}
}
- private Table getGlueTableAtVersion(String namespaceName, String tableName, Long version) {
- try {
- Table table;
- if (version != null) {
- // Get specific table version
- String tableVersion = String.valueOf(version);
- table =
- glueClient
- .getTableVersion(
- GetTableVersionRequest.builder()
- .catalogId(config.catalogId())
- .databaseName(namespaceName)
- .tableName(tableName)
- .versionId(tableVersion)
- .build())
- .tableVersion()
- .table();
- } else {
- // Get current table version
- table = getGlueTable(namespaceName, tableName);
- }
- return table;
- } catch (EntityNotFoundException e) {
- throw GlueToLanceErrorConverter.notFound(
- e, "Glue table not found: %s.%s", namespaceName, tableName);
- } catch (GlueException e) {
- throw GlueToLanceErrorConverter.serverError(
- e, "Failed to get Glue table: %s.%s", namespaceName, tableName);
- }
- }
-
private void validateSchemaNotNull(
JsonArrowSchema schema, String namespaceName, String tableName) {
if (schema == null) {
diff --git a/java/lance-namespace-glue/src/test/java/org/lance/namespace/glue/TestGlueNamespace.java b/java/lance-namespace-glue/src/test/java/org/lance/namespace/glue/TestGlueNamespace.java
index 0735518..bfb322c 100644
--- a/java/lance-namespace-glue/src/test/java/org/lance/namespace/glue/TestGlueNamespace.java
+++ b/java/lance-namespace-glue/src/test/java/org/lance/namespace/glue/TestGlueNamespace.java
@@ -24,10 +24,13 @@
import org.lance.namespace.model.DescribeTableRequest;
import org.lance.namespace.model.DescribeTableResponse;
import org.lance.namespace.model.DropNamespaceRequest;
+import org.lance.namespace.model.DropTableRequest;
+import org.lance.namespace.model.DropTableResponse;
import org.lance.namespace.model.ListNamespacesRequest;
import org.lance.namespace.model.ListNamespacesResponse;
import org.lance.namespace.model.ListTablesRequest;
import org.lance.namespace.model.ListTablesResponse;
+import org.lance.namespace.model.TableExistsRequest;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
@@ -57,6 +60,7 @@
import software.amazon.awssdk.services.glue.model.GetDatabasesResponse;
import software.amazon.awssdk.services.glue.model.GetTableRequest;
import software.amazon.awssdk.services.glue.model.GetTableResponse;
+import software.amazon.awssdk.services.glue.model.GetTableVersionRequest;
import software.amazon.awssdk.services.glue.model.GetTablesRequest;
import software.amazon.awssdk.services.glue.model.GetTablesResponse;
import software.amazon.awssdk.services.glue.model.StorageDescriptor;
@@ -565,6 +569,25 @@ public void testDescribeTableBasic() {
assertEquals("s3://bucket/tbl", resp.getLocation());
}
+ @Test
+ public void testDescribeTableWithLanceVersionUsesCurrentGlueTable() {
+ Table tbl =
+ Table.builder()
+ .name("tbl")
+ .storageDescriptor(StorageDescriptor.builder().location("s3://bucket/tbl").build())
+ .parameters(ImmutableMap.of(TABLE_TYPE_PROP, LANCE_TABLE_TYPE_VALUE))
+ .build();
+ when(glue.getTable(any(GetTableRequest.class)))
+ .thenReturn(GetTableResponse.builder().table(tbl).build());
+
+ DescribeTableResponse resp =
+ glueNamespace.describeTable(
+ new DescribeTableRequest().id(ImmutableList.of("ns1", "tbl")).version(42L));
+
+ assertEquals("s3://bucket/tbl", resp.getLocation());
+ verify(glue, never()).getTableVersion(any(GetTableVersionRequest.class));
+ }
+
@Test
public void testDescribeTableNonLanceTable() {
Table tbl =
@@ -602,6 +625,76 @@ public void testDescribeTableWithInvalidId() {
() -> glueNamespace.describeTable(new DescribeTableRequest().id(ImmutableList.of("ns1"))));
}
+ @Test
+ public void testTableExistsBasic() {
+ Table tbl =
+ Table.builder()
+ .name("tbl")
+ .storageDescriptor(StorageDescriptor.builder().location("s3://bucket/tbl").build())
+ .parameters(ImmutableMap.of(TABLE_TYPE_PROP, LANCE_TABLE_TYPE_VALUE))
+ .build();
+ when(glue.getTable(any(GetTableRequest.class)))
+ .thenReturn(GetTableResponse.builder().table(tbl).build());
+
+ glueNamespace.tableExists(new TableExistsRequest().id(ImmutableList.of("ns1", "tbl")));
+ }
+
+ @Test
+ public void testTableExistsWithLanceVersionUsesCurrentGlueTable() {
+ Table tbl =
+ Table.builder()
+ .name("tbl")
+ .storageDescriptor(StorageDescriptor.builder().location("s3://bucket/tbl").build())
+ .parameters(ImmutableMap.of(TABLE_TYPE_PROP, LANCE_TABLE_TYPE_VALUE))
+ .build();
+ when(glue.getTable(any(GetTableRequest.class)))
+ .thenReturn(GetTableResponse.builder().table(tbl).build());
+
+ glueNamespace.tableExists(
+ new TableExistsRequest().id(ImmutableList.of("ns1", "tbl")).version(42L));
+
+ verify(glue, never()).getTableVersion(any(GetTableVersionRequest.class));
+ }
+
+ @Test
+ public void testTableExistsNonLanceTable() {
+ Table tbl =
+ Table.builder()
+ .name("tbl")
+ .storageDescriptor(StorageDescriptor.builder().location("s3://bucket/tbl").build())
+ .build();
+
+ when(glue.getTable(any(GetTableRequest.class)))
+ .thenReturn(GetTableResponse.builder().table(tbl).build());
+
+ assertThrows(
+ LanceNamespaceException.class,
+ () ->
+ glueNamespace.tableExists(new TableExistsRequest().id(ImmutableList.of("ns", "tbl"))));
+ }
+
+ @Test
+ public void testTableExistsNotFound() {
+ when(glue.getTable(any(GetTableRequest.class)))
+ .thenThrow(EntityNotFoundException.builder().message("Entity Not Found").build());
+
+ assertThrows(
+ LanceNamespaceException.class,
+ () ->
+ glueNamespace.tableExists(new TableExistsRequest().id(ImmutableList.of("ns1", "tbl"))));
+ }
+
+ @Test
+ public void testTableExistsWithInvalidId() {
+ assertThrows(
+ LanceNamespaceException.class,
+ () -> glueNamespace.tableExists(new TableExistsRequest().id(ImmutableList.of("ns1"))));
+ assertThrows(
+ LanceNamespaceException.class, () -> glueNamespace.tableExists(new TableExistsRequest()));
+
+ verify(glue, never()).getTable(any(GetTableRequest.class));
+ }
+
@Test
public void testBasicDeregisterTable() {
List id = ImmutableList.of("ns1", "tbl");
@@ -624,6 +717,68 @@ public void testBasicDeregisterTable() {
assertEquals(ImmutableMap.of("key", "val", "table_type", "lance"), resp.getProperties());
}
+ @Test
+ public void testBasicDropTable() {
+ List id = ImmutableList.of("ns1", "tbl");
+ Table tbl =
+ Table.builder()
+ .name("tbl")
+ .storageDescriptor(StorageDescriptor.builder().location("s3://bucket/tbl").build())
+ .parameters(ImmutableMap.of("key", "val", TABLE_TYPE_PROP, LANCE_TABLE_TYPE_VALUE))
+ .build();
+ when(glue.getTable(any(GetTableRequest.class)))
+ .thenReturn(GetTableResponse.builder().table(tbl).build());
+ when(glue.deleteTable(any(DeleteTableRequest.class)))
+ .thenReturn(DeleteTableResponse.builder().build());
+
+ DropTableResponse resp = glueNamespace.dropTable(new DropTableRequest().id(id));
+
+ assertEquals(id, resp.getId());
+ assertEquals("s3://bucket/tbl", resp.getLocation());
+ assertEquals(ImmutableMap.of("key", "val", "table_type", "lance"), resp.getProperties());
+ verify(glue).deleteTable(any(DeleteTableRequest.class));
+ }
+
+ @Test
+ public void testDropTableRejectsNonLanceTable() {
+ Table tbl =
+ Table.builder()
+ .name("tbl")
+ .storageDescriptor(StorageDescriptor.builder().location("s3://bucket/tbl").build())
+ .build();
+
+ when(glue.getTable(any(GetTableRequest.class)))
+ .thenReturn(GetTableResponse.builder().table(tbl).build());
+
+ assertThrows(
+ LanceNamespaceException.class,
+ () -> glueNamespace.dropTable(new DropTableRequest().id(ImmutableList.of("ns", "tbl"))));
+ verify(glue, never()).deleteTable(any(DeleteTableRequest.class));
+ }
+
+ @Test
+ public void testDropTableNotFound() {
+ when(glue.getTable(any(GetTableRequest.class)))
+ .thenThrow(EntityNotFoundException.builder().message("Entity Not Found").build());
+
+ assertThrows(
+ LanceNamespaceException.class,
+ () -> glueNamespace.dropTable(new DropTableRequest().id(ImmutableList.of("ns1", "tbl"))));
+ verify(glue, never()).deleteTable(any(DeleteTableRequest.class));
+ }
+
+ @Test
+ public void testDropTableWithInvalidId() {
+ assertThrows(
+ LanceNamespaceException.class,
+ () -> glueNamespace.dropTable(new DropTableRequest().id(ImmutableList.of("ns1"))));
+ assertThrows(
+ LanceNamespaceException.class, () -> glueNamespace.dropTable(new DropTableRequest()));
+
+ verify(glue, never()).getTable(any(GetTableRequest.class));
+ verify(glue, never()).deleteTable(any(DeleteTableRequest.class));
+ }
+
@Test
public void testDeregisterTableRejectsNonLanceTable() {
Table tbl =
diff --git a/java/lance-namespace-hive2/pom.xml b/java/lance-namespace-hive2/pom.xml
index be0e29d..6533a3a 100644
--- a/java/lance-namespace-hive2/pom.xml
+++ b/java/lance-namespace-hive2/pom.xml
@@ -207,7 +207,6 @@
org.lance
lance-namespace-impls-core
${project.version}
- test
org.junit.jupiter
@@ -222,4 +221,13 @@
test
-
\ No newline at end of file
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+
+
+
+
diff --git a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Namespace.java b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Namespace.java
index 6f19192..f61ef01 100644
--- a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Namespace.java
+++ b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Namespace.java
@@ -42,6 +42,7 @@
import org.lance.namespace.model.ListTablesResponse;
import org.lance.namespace.model.NamespaceExistsRequest;
import org.lance.namespace.model.TableExistsRequest;
+import org.lance.namespace.util.LanceTableUtil;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
@@ -231,7 +232,7 @@ public ListTablesResponse listTables(ListTablesRequest request) {
!nsId.isRoot() && nsId.levels() == 1, "Expect a 1-level namespace but get %s", nsId);
String db = nsId.levelAtListPos(0).toLowerCase();
- List tables = doListTables(db);
+ List tables = doListTables(db, request.getIncludeDeclared());
Collections.sort(tables);
PageUtil.Page page =
@@ -278,17 +279,33 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) {
ValidationUtil.checkArgument(
tableId.levels() == 2, "Expect 2-level table identifier but get %s", tableId);
- Optional location = doDescribeTable(tableId);
+ String db = tableId.levelAtListPos(0).toLowerCase();
+ String table = tableId.levelAtListPos(1).toLowerCase();
+ Optional hmsTable = Hive2Util.getTable(clientPool, db, table);
- if (!location.isPresent()) {
+ if (!hmsTable.isPresent()) {
throw new TableNotFoundException(
String.format("Table does not exist: %s", tableId.stringStyleId()),
TableNotFound.getType(),
tableId.stringStyleId());
}
+ Hive2Util.validateLanceTable(hmsTable.get());
+ String location = hmsTable.get().getSd() != null ? hmsTable.get().getSd().getLocation() : null;
+ if (location == null || location.isEmpty()) {
+ throw new TableNotFoundException(
+ String.format("Table does not have a location: %s", tableId.stringStyleId()),
+ TableNotFound.getType(),
+ tableId.stringStyleId());
+ }
+
DescribeTableResponse response = new DescribeTableResponse();
- response.setLocation(location.get());
+ response.setLocation(location);
+ response.setProperties(hmsTable.get().getParameters());
+ response.setManagedVersioning(false);
+ if (Boolean.TRUE.equals(request.getCheckDeclared())) {
+ response.setIsOnlyDeclared(LanceTableUtil.isOnlyDeclared(location, Collections.emptyMap()));
+ }
return response;
}
@@ -306,11 +323,15 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) {
location = getDefaultTableLocation(tableId.levelAtListPos(0), tableId.levelAtListPos(1));
}
+ Map properties = Hive2Util.createLanceTableParams(request.getProperties());
+
// Create table in metastore without data (pass null for requestData)
- doCreateTable(tableId, null, location, null, null);
+ doCreateTable(tableId, null, location, properties, null);
DeclareTableResponse response = new DeclareTableResponse();
response.setLocation(location);
+ response.setProperties(properties);
+ response.setManagedVersioning(false);
return response;
}
@@ -504,7 +525,7 @@ protected void doCreateTable(
}
}
- protected List doListTables(String db) {
+ protected List doListTables(String db, Boolean includeDeclared) {
try {
// First validate that database exists
Database database = Hive2Util.getDatabaseOrNull(clientPool, db);
@@ -521,7 +542,13 @@ protected List doListTables(String db) {
Optional table = Hive2Util.getTable(clientPool, db, tableName);
if (table.isPresent()) {
Map params = table.get().getParameters();
- if (params != null && "lance".equalsIgnoreCase(params.get("table_type"))) {
+ boolean isLanceTable =
+ params != null && "lance".equalsIgnoreCase(params.get("table_type"));
+ if (isLanceTable
+ && (LanceTableUtil.includeDeclared(includeDeclared)
+ || LanceTableUtil.hasStorageComponents(
+ table.get().getSd() != null ? table.get().getSd().getLocation() : null,
+ Collections.emptyMap()))) {
lanceTables.add(tableName);
}
}
@@ -592,7 +619,7 @@ protected Map doDropNamespace(ObjectIdentifier id, String mode,
// Check if database contains tables (RESTRICT behavior only, not for Cascade)
boolean cascade = "Cascade".equalsIgnoreCase(behavior);
if (!cascade) {
- List tables = doListTables(db);
+ List tables = doListTables(db, true);
if (!tables.isEmpty()) {
throw new InvalidInputException(
String.format(
diff --git a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Util.java b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Util.java
index 23c5374..3d82220 100644
--- a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Util.java
+++ b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Util.java
@@ -123,7 +123,7 @@ public static Map createLanceTableParams(Map pro
params.putAll(properties);
}
params.put("table_type", "lance");
- params.putIfAbsent("managed_by", "storage");
+ params.put("managed_by", "storage");
return params;
}
}
diff --git a/java/lance-namespace-hive3/pom.xml b/java/lance-namespace-hive3/pom.xml
index 1523146..0a95bff 100644
--- a/java/lance-namespace-hive3/pom.xml
+++ b/java/lance-namespace-hive3/pom.xml
@@ -122,7 +122,6 @@
org.lance
lance-namespace-impls-core
${project.version}
- test
org.junit.jupiter
@@ -135,4 +134,13 @@
test
-
\ No newline at end of file
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+
+
+
+
diff --git a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Namespace.java b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Namespace.java
index 15f02bb..3a69698 100644
--- a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Namespace.java
+++ b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Namespace.java
@@ -43,6 +43,7 @@
import org.lance.namespace.model.ListTablesResponse;
import org.lance.namespace.model.NamespaceExistsRequest;
import org.lance.namespace.model.TableExistsRequest;
+import org.lance.namespace.util.LanceTableUtil;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
@@ -270,7 +271,7 @@ public ListTablesResponse listTables(ListTablesRequest request) {
String catalog = nsId.levelAtListPos(0).toLowerCase();
String db = nsId.levelAtListPos(1).toLowerCase();
- List tables = doListTables(catalog, db);
+ List tables = doListTables(catalog, db, request.getIncludeDeclared());
Collections.sort(tables);
PageUtil.Page page =
@@ -295,15 +296,30 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) {
ValidationUtil.checkArgument(
tableId.levels() == 3, "Expect 3-level table identifier but get %s", tableId);
- Optional location = doDescribeTable(tableId);
+ String catalog = tableId.levelAtListPos(0).toLowerCase();
+ String db = tableId.levelAtListPos(1).toLowerCase();
+ String table = tableId.levelAtListPos(2).toLowerCase();
+ Optional hmsTable = Hive3Util.getTable(clientPool, catalog, db, table);
- if (!location.isPresent()) {
+ if (!hmsTable.isPresent()) {
throw new TableNotFoundException(
String.format("Table does not exist: %s", tableId.stringStyleId()));
}
+ Hive3Util.validateLanceTable(hmsTable.get());
+ String location = hmsTable.get().getSd() != null ? hmsTable.get().getSd().getLocation() : null;
+ if (location == null || location.isEmpty()) {
+ throw new TableNotFoundException(
+ String.format("Table does not have a location: %s", tableId.stringStyleId()));
+ }
+
DescribeTableResponse response = new DescribeTableResponse();
- response.setLocation(location.get());
+ response.setLocation(location);
+ response.setProperties(hmsTable.get().getParameters());
+ response.setManagedVersioning(false);
+ if (Boolean.TRUE.equals(request.getCheckDeclared())) {
+ response.setIsOnlyDeclared(LanceTableUtil.isOnlyDeclared(location, Collections.emptyMap()));
+ }
return response;
}
@@ -323,11 +339,15 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) {
tableId.levelAtListPos(0), tableId.levelAtListPos(1), tableId.levelAtListPos(2));
}
- // Create table in metastore without data (pass null for requestData and properties)
- doCreateTable(tableId, null, location, null, null);
+ Map properties = Hive3Util.createLanceTableParams(request.getProperties());
+
+ // Create table in metastore without data (pass null for requestData)
+ doCreateTable(tableId, null, location, properties, null);
DeclareTableResponse response = new DeclareTableResponse();
response.setLocation(location);
+ response.setProperties(properties);
+ response.setManagedVersioning(false);
return response;
}
@@ -556,7 +576,7 @@ protected void doCreateTable(
}
}
- protected List doListTables(String catalog, String db) {
+ protected List doListTables(String catalog, String db, Boolean includeDeclared) {
try {
// First validate that catalog and database exist
Catalog catalogObj = Hive3Util.getCatalogOrNull(clientPool, catalog);
@@ -578,7 +598,13 @@ protected List doListTables(String catalog, String db) {
Optional table = Hive3Util.getTable(clientPool, catalog, db, tableName);
if (table.isPresent()) {
Map params = table.get().getParameters();
- if (params != null && "lance".equalsIgnoreCase(params.get("table_type"))) {
+ boolean isLanceTable =
+ params != null && "lance".equalsIgnoreCase(params.get("table_type"));
+ if (isLanceTable
+ && (LanceTableUtil.includeDeclared(includeDeclared)
+ || LanceTableUtil.hasStorageComponents(
+ table.get().getSd() != null ? table.get().getSd().getLocation() : null,
+ Collections.emptyMap()))) {
lanceTables.add(tableName);
}
}
@@ -708,7 +734,7 @@ private Map doDropDatabase(
// Check if database contains tables (RESTRICT behavior only, not for Cascade)
boolean cascade = "Cascade".equalsIgnoreCase(behavior);
if (!cascade) {
- List tables = doListTables(catalog, db);
+ List tables = doListTables(catalog, db, true);
if (!tables.isEmpty()) {
throw new InvalidInputException(
String.format(
diff --git a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Util.java b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Util.java
index b77eb89..9ffff4c 100644
--- a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Util.java
+++ b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Util.java
@@ -174,6 +174,7 @@ public static Map createLanceTableParams(Map pro
params.putAll(properties);
}
params.put("table_type", "lance");
+ params.put("managed_by", "storage");
return params;
}
}
diff --git a/java/lance-namespace-iceberg/pom.xml b/java/lance-namespace-iceberg/pom.xml
index 3a6ec89..86803a5 100644
--- a/java/lance-namespace-iceberg/pom.xml
+++ b/java/lance-namespace-iceberg/pom.xml
@@ -79,6 +79,10 @@
org.apache.maven.plugins
maven-surefire-plugin
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+
diff --git a/java/lance-namespace-iceberg/src/main/java/org/lance/namespace/iceberg/IcebergNamespace.java b/java/lance-namespace-iceberg/src/main/java/org/lance/namespace/iceberg/IcebergNamespace.java
index ca7038c..33ade28 100644
--- a/java/lance-namespace-iceberg/src/main/java/org/lance/namespace/iceberg/IcebergNamespace.java
+++ b/java/lance-namespace-iceberg/src/main/java/org/lance/namespace/iceberg/IcebergNamespace.java
@@ -40,6 +40,7 @@
import org.lance.namespace.model.TableExistsRequest;
import org.lance.namespace.rest.RestClient;
import org.lance.namespace.rest.RestClientException;
+import org.lance.namespace.util.LanceTableUtil;
import org.lance.namespace.util.ObjectIdentifier;
import org.lance.namespace.util.ValidationUtil;
@@ -325,7 +326,8 @@ public ListTablesResponse listTables(ListTablesRequest request) {
List tables = new ArrayList<>();
if (response != null && response.getIdentifiers() != null) {
for (IcebergModels.TableIdentifier tableId : response.getIdentifiers()) {
- if (isLanceTable(prefix, namespace, tableId.getName())) {
+ if (shouldIncludeTable(
+ prefix, namespace, tableId.getName(), request.getIncludeDeclared())) {
tables.add(tableId.getName());
}
}
@@ -369,8 +371,9 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) {
createRequest.setLocation(tablePath);
createRequest.setSchema(IcebergModels.createDummySchema());
- Map properties = new HashMap<>();
- properties.put(TABLE_TYPE_KEY, TABLE_TYPE_LANCE);
+ Map properties =
+ LanceTableUtil.mergeTableProperties(
+ request.getProperties(), Collections.singletonMap(TABLE_TYPE_KEY, TABLE_TYPE_LANCE));
createRequest.setProperties(properties);
String namespacePath = encodeNamespace(namespace);
@@ -383,6 +386,8 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) {
DeclareTableResponse result = new DeclareTableResponse();
result.setLocation(tablePath);
+ result.setProperties(properties);
+ result.setManagedVersioning(false);
return result;
} catch (RestClientException e) {
if (e.isConflict()) {
@@ -435,7 +440,13 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) {
DescribeTableResponse result = new DescribeTableResponse();
result.setLocation(response.getMetadata().getLocation());
- result.setStorageOptions(props);
+ result.setProperties(props);
+ result.setManagedVersioning(false);
+ if (Boolean.TRUE.equals(request.getCheckDeclared())) {
+ result.setIsOnlyDeclared(
+ LanceTableUtil.isOnlyDeclared(
+ response.getMetadata().getLocation(), Collections.emptyMap()));
+ }
return result;
} catch (RestClientException e) {
if (e.isNotFound()) {
@@ -470,17 +481,27 @@ public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) {
prefixPath + "/namespaces/" + namespacePath + "/tables/" + encodedTableName,
IcebergModels.LoadTableResponse.class);
- String location = null;
- if (getResponse != null && getResponse.getMetadata() != null) {
- location = getResponse.getMetadata().getLocation();
+ if (getResponse == null || getResponse.getMetadata() == null) {
+ throw new TableNotFoundException("Table not found: " + tableId.stringStyleId());
+ }
+ Map properties = getResponse.getMetadata().getProperties();
+ if (properties == null
+ || !TABLE_TYPE_LANCE.equalsIgnoreCase(properties.get(TABLE_TYPE_KEY))) {
+ throw new InvalidInputException(
+ String.format(
+ "Table %s is not a Lance table (missing table_type property)",
+ tableId.stringStyleId()));
}
+ String location = getResponse.getMetadata().getLocation();
restClient.delete(
prefixPath + "/namespaces/" + namespacePath + "/tables/" + encodedTableName);
LOG.info("Deregistered table: {}", tableId.stringStyleId());
DeregisterTableResponse result = new DeregisterTableResponse();
+ result.setId(request.getId());
result.setLocation(location);
+ result.setProperties(properties);
return result;
} catch (RestClientException e) {
if (e.isNotFound()) {
@@ -513,7 +534,8 @@ private String urlEncode(String s) {
}
}
- private boolean isLanceTable(String prefix, List namespace, String tableName) {
+ private boolean shouldIncludeTable(
+ String prefix, List namespace, String tableName, Boolean includeDeclared) {
try {
String prefixPath = getPrefixPath(prefix);
String namespacePath = encodeNamespace(namespace);
@@ -526,7 +548,12 @@ private boolean isLanceTable(String prefix, List namespace, String table
if (response != null && response.getMetadata() != null) {
Map props = response.getMetadata().getProperties();
- return props != null && TABLE_TYPE_LANCE.equalsIgnoreCase(props.get(TABLE_TYPE_KEY));
+ if (props == null || !TABLE_TYPE_LANCE.equalsIgnoreCase(props.get(TABLE_TYPE_KEY))) {
+ return false;
+ }
+ return LanceTableUtil.includeDeclared(includeDeclared)
+ || LanceTableUtil.hasStorageComponents(
+ response.getMetadata().getLocation(), Collections.emptyMap());
}
} catch (Exception e) {
LOG.debug("Failed to check if table is Lance table: {}", e.getMessage());
diff --git a/java/lance-namespace-impls-core/src/main/java/org/lance/namespace/util/LanceTableUtil.java b/java/lance-namespace-impls-core/src/main/java/org/lance/namespace/util/LanceTableUtil.java
new file mode 100644
index 0000000..eec729d
--- /dev/null
+++ b/java/lance-namespace-impls-core/src/main/java/org/lance/namespace/util/LanceTableUtil.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.lance.namespace.util;
+
+import org.lance.Dataset;
+import org.lance.ReadOptions;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+/** Utility methods for Lance table metadata shared by namespace implementations. */
+public final class LanceTableUtil {
+
+ private LanceTableUtil() {}
+
+ public static Map mergeTableProperties(
+ Map properties, Map requiredProperties) {
+ Map result = new HashMap<>();
+ if (properties != null) {
+ result.putAll(properties);
+ }
+ if (requiredProperties != null) {
+ result.putAll(requiredProperties);
+ }
+ return result;
+ }
+
+ public static boolean includeDeclared(Boolean includeDeclared) {
+ return includeDeclared == null || includeDeclared;
+ }
+
+ public static boolean isOnlyDeclared(String location, Map storageOptions) {
+ return !hasStorageComponents(location, storageOptions);
+ }
+
+ public static boolean hasStorageComponents(String location, Map storageOptions) {
+ if (location == null || location.isEmpty()) {
+ return false;
+ }
+
+ Map options = storageOptions != null ? storageOptions : Collections.emptyMap();
+ ReadOptions readOptions = new ReadOptions.Builder().setStorageOptions(options).build();
+ try (Dataset ignored = Dataset.open(location, readOptions)) {
+ return true;
+ } catch (RuntimeException | LinkageError e) {
+ return false;
+ }
+ }
+}
diff --git a/java/lance-namespace-polaris/pom.xml b/java/lance-namespace-polaris/pom.xml
index 29052b1..1f51d44 100644
--- a/java/lance-namespace-polaris/pom.xml
+++ b/java/lance-namespace-polaris/pom.xml
@@ -79,4 +79,13 @@
test
-
\ No newline at end of file
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+
+
+
+
diff --git a/java/lance-namespace-polaris/src/main/java/org/lance/namespace/polaris/PolarisNamespace.java b/java/lance-namespace-polaris/src/main/java/org/lance/namespace/polaris/PolarisNamespace.java
index 21f63a3..b359145 100644
--- a/java/lance-namespace-polaris/src/main/java/org/lance/namespace/polaris/PolarisNamespace.java
+++ b/java/lance-namespace-polaris/src/main/java/org/lance/namespace/polaris/PolarisNamespace.java
@@ -40,6 +40,7 @@
import org.lance.namespace.model.TableExistsRequest;
import org.lance.namespace.rest.RestClient;
import org.lance.namespace.rest.RestClientException;
+import org.lance.namespace.util.LanceTableUtil;
import org.lance.namespace.util.ObjectIdentifier;
import org.lance.namespace.util.ValidationUtil;
@@ -50,7 +51,6 @@
import java.io.Closeable;
import java.io.IOException;
import java.util.Collections;
-import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
@@ -286,8 +286,10 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) {
List namespaceParts = parts.subList(1, parts.size() - 1);
String namespacePath = String.join(".", namespaceParts);
- Map properties = new HashMap<>();
- properties.put(TABLE_TYPE_KEY, TABLE_FORMAT_LANCE);
+ Map properties =
+ LanceTableUtil.mergeTableProperties(
+ request.getProperties(),
+ Collections.singletonMap(TABLE_TYPE_KEY, TABLE_FORMAT_LANCE));
String comment = null;
PolarisModels.CreateGenericTableRequest tableRequest =
@@ -303,7 +305,16 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) {
LOG.info("Created Lance table: {}.{}.{}", catalog, namespacePath, tableName);
DeclareTableResponse result = new DeclareTableResponse();
- result.setLocation(response.getTable().getBaseLocation());
+ PolarisModels.GenericTable responseTable = response != null ? response.getTable() : null;
+ result.setLocation(
+ responseTable != null && responseTable.getBaseLocation() != null
+ ? responseTable.getBaseLocation()
+ : request.getLocation());
+ result.setProperties(
+ responseTable != null && responseTable.getProperties() != null
+ ? responseTable.getProperties()
+ : properties);
+ result.setManagedVersioning(false);
return result;
} catch (RestClientException e) {
if (e.isConflict()) {
@@ -352,7 +363,12 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) {
DescribeTableResponse result = new DescribeTableResponse();
result.setLocation(table.getBaseLocation());
- result.setStorageOptions(table.getProperties());
+ result.setProperties(table.getProperties());
+ result.setManagedVersioning(false);
+ if (Boolean.TRUE.equals(request.getCheckDeclared())) {
+ result.setIsOnlyDeclared(
+ LanceTableUtil.isOnlyDeclared(table.getBaseLocation(), Collections.emptyMap()));
+ }
return result;
} catch (RestClientException e) {
if (e.isNotFound()) {
@@ -383,7 +399,10 @@ public ListTablesResponse listTables(ListTablesRequest request) {
Set tableNames = new LinkedHashSet<>();
if (response.getIdentifiers() != null) {
for (PolarisModels.TableIdentifier id : response.getIdentifiers()) {
- tableNames.add(id.getName());
+ if (shouldIncludeTable(
+ catalog, namespacePath, id.getName(), request.getIncludeDeclared())) {
+ tableNames.add(id.getName());
+ }
}
}
result.setTables(tableNames);
@@ -419,7 +438,18 @@ public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) {
+ tableName,
PolarisModels.LoadGenericTableResponse.class);
- String location = getResponse.getTable().getBaseLocation();
+ PolarisModels.GenericTable table = getResponse != null ? getResponse.getTable() : null;
+ if (table == null) {
+ throw new TableNotFoundException("Table not found: " + tableId.stringStyleId());
+ }
+ if (!TABLE_FORMAT_LANCE.equalsIgnoreCase(table.getFormat())) {
+ throw new InvalidInputException(
+ String.format(
+ "Table %s is not a Lance table (format: %s)",
+ tableId.stringStyleId(), table.getFormat()));
+ }
+
+ String location = table.getBaseLocation();
restClient.delete(
"/polaris/v1/"
+ catalog
@@ -429,7 +459,9 @@ public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) {
+ tableName);
DeregisterTableResponse result = new DeregisterTableResponse();
+ result.setId(request.getId());
result.setLocation(location);
+ result.setProperties(table.getProperties());
return result;
} catch (RestClientException e) {
if (e.isNotFound()) {
@@ -445,4 +477,31 @@ public void close() throws IOException {
restClient.close();
}
}
+
+ private boolean shouldIncludeTable(
+ String catalog, String namespacePath, String tableName, Boolean includeDeclared) {
+ if (LanceTableUtil.includeDeclared(includeDeclared)) {
+ return true;
+ }
+
+ try {
+ PolarisModels.LoadGenericTableResponse response =
+ restClient.get(
+ "/polaris/v1/"
+ + catalog
+ + "/namespaces/"
+ + namespacePath
+ + "/generic-tables/"
+ + tableName,
+ PolarisModels.LoadGenericTableResponse.class);
+ PolarisModels.GenericTable table = response.getTable();
+ if (table == null || !TABLE_FORMAT_LANCE.equalsIgnoreCase(table.getFormat())) {
+ return false;
+ }
+ return LanceTableUtil.hasStorageComponents(table.getBaseLocation(), Collections.emptyMap());
+ } catch (Exception e) {
+ LOG.debug("Failed to check if table is Lance table: {}", e.getMessage());
+ return false;
+ }
+ }
}
diff --git a/java/lance-namespace-polaris/src/test/java/org/lance/namespace/polaris/TestPolarisNamespace.java b/java/lance-namespace-polaris/src/test/java/org/lance/namespace/polaris/TestPolarisNamespace.java
index 71f4c30..cab3e9c 100644
--- a/java/lance-namespace-polaris/src/test/java/org/lance/namespace/polaris/TestPolarisNamespace.java
+++ b/java/lance-namespace-polaris/src/test/java/org/lance/namespace/polaris/TestPolarisNamespace.java
@@ -54,6 +54,7 @@
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.never;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
@@ -317,6 +318,14 @@ public void testListTables() throws IOException {
assertThat(response.getTables()).hasSize(2);
assertThat(response.getTables()).contains("table1", "table2");
+ verify(restClient, never())
+ .get(
+ eq("/polaris/v1/test_catalog/namespaces/schema1/generic-tables/table1"),
+ eq(PolarisModels.LoadGenericTableResponse.class));
+ verify(restClient, never())
+ .get(
+ eq("/polaris/v1/test_catalog/namespaces/schema1/generic-tables/table2"),
+ eq(PolarisModels.LoadGenericTableResponse.class));
}
@Test
diff --git a/java/lance-namespace-unity/pom.xml b/java/lance-namespace-unity/pom.xml
index 1c42bf6..e742972 100644
--- a/java/lance-namespace-unity/pom.xml
+++ b/java/lance-namespace-unity/pom.xml
@@ -83,6 +83,10 @@
org.apache.maven.plugins
maven-surefire-plugin
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+
-
\ No newline at end of file
+
diff --git a/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespace.java b/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespace.java
index faba9d9..1dd9cb3 100644
--- a/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespace.java
+++ b/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespace.java
@@ -40,6 +40,7 @@
import org.lance.namespace.model.TableExistsRequest;
import org.lance.namespace.rest.RestClient;
import org.lance.namespace.rest.RestClientException;
+import org.lance.namespace.util.LanceTableUtil;
import org.lance.namespace.util.ObjectIdentifier;
import org.lance.namespace.util.ValidationUtil;
@@ -55,7 +56,6 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
-import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
@@ -288,7 +288,7 @@ public ListTablesResponse listTables(ListTablesRequest request) {
if (unityResponse != null && unityResponse.getTables() != null) {
tables =
unityResponse.getTables().stream()
- .filter(this::isLanceTable)
+ .filter(t -> shouldIncludeTable(t, request.getIncludeDeclared()))
.map(UnityModels.TableInfo::getName)
.collect(Collectors.toList());
}
@@ -345,8 +345,9 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) {
createTable.setColumns(columns);
createTable.setStorageLocation(tablePath);
- Map properties = new HashMap<>();
- properties.put(TABLE_TYPE_KEY, TABLE_TYPE_LANCE);
+ Map properties =
+ LanceTableUtil.mergeTableProperties(
+ request.getProperties(), Collections.singletonMap(TABLE_TYPE_KEY, TABLE_TYPE_LANCE));
createTable.setProperties(properties);
UnityModels.TableInfo tableInfo =
@@ -354,6 +355,11 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) {
DeclareTableResponse response = new DeclareTableResponse();
response.setLocation(tablePath);
+ response.setProperties(
+ tableInfo != null && tableInfo.getProperties() != null
+ ? tableInfo.getProperties()
+ : properties);
+ response.setManagedVersioning(false);
return response;
} catch (RestClientException e) {
@@ -396,7 +402,12 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) {
DescribeTableResponse response = new DescribeTableResponse();
response.setLocation(tableInfo.getStorageLocation());
- response.setStorageOptions(tableInfo.getProperties());
+ response.setProperties(tableInfo.getProperties());
+ response.setManagedVersioning(false);
+ if (Boolean.TRUE.equals(request.getCheckDeclared())) {
+ response.setIsOnlyDeclared(
+ LanceTableUtil.isOnlyDeclared(tableInfo.getStorageLocation(), Collections.emptyMap()));
+ }
return response;
} catch (RestClientException e) {
@@ -440,7 +451,9 @@ public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) {
restClient.delete("/tables/" + fullName);
DeregisterTableResponse response = new DeregisterTableResponse();
+ response.setId(request.getId());
response.setLocation(location);
+ response.setProperties(tableInfo.getProperties());
return response;
} catch (RestClientException e) {
@@ -466,6 +479,13 @@ private boolean isLanceTable(UnityModels.TableInfo tableInfo) {
return TABLE_TYPE_LANCE.equalsIgnoreCase(tableType);
}
+ private boolean shouldIncludeTable(UnityModels.TableInfo tableInfo, Boolean includeDeclared) {
+ return isLanceTable(tableInfo)
+ && (LanceTableUtil.includeDeclared(includeDeclared)
+ || LanceTableUtil.hasStorageComponents(
+ tableInfo.getStorageLocation(), Collections.emptyMap()));
+ }
+
private List convertArrowSchemaToUnityColumns(Schema arrowSchema) {
List columns = new ArrayList<>();
for (Field field : arrowSchema.getFields()) {
diff --git a/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespaceConfig.java b/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespaceConfig.java
index 76343b8..5a6e6ee 100644
--- a/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespaceConfig.java
+++ b/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespaceConfig.java
@@ -28,8 +28,8 @@ public class UnityNamespaceConfig {
private static final String ROOT = "root";
private static final String DEFAULT_API_PATH = "/api/2.1/unity-catalog";
- private static final int DEFAULT_CONNECT_TIMEOUT = 10;
- private static final int DEFAULT_READ_TIMEOUT = 60;
+ private static final int DEFAULT_CONNECT_TIMEOUT_MS = 10000;
+ private static final int DEFAULT_READ_TIMEOUT_MS = 60000;
private static final int DEFAULT_MAX_RETRIES = 3;
private final Map properties;
@@ -59,11 +59,13 @@ public UnityNamespaceConfig(Map properties) {
// Inline PropertyUtil.propertyAsInt
String connectTimeoutStr = properties.get(CONNECT_TIMEOUT);
this.connectTimeout =
- connectTimeoutStr != null ? Integer.parseInt(connectTimeoutStr) : DEFAULT_CONNECT_TIMEOUT;
+ connectTimeoutStr != null
+ ? Integer.parseInt(connectTimeoutStr)
+ : DEFAULT_CONNECT_TIMEOUT_MS;
String readTimeoutStr = properties.get(READ_TIMEOUT);
this.readTimeout =
- readTimeoutStr != null ? Integer.parseInt(readTimeoutStr) : DEFAULT_READ_TIMEOUT;
+ readTimeoutStr != null ? Integer.parseInt(readTimeoutStr) : DEFAULT_READ_TIMEOUT_MS;
String maxRetriesStr = properties.get(MAX_RETRIES);
this.maxRetries = maxRetriesStr != null ? Integer.parseInt(maxRetriesStr) : DEFAULT_MAX_RETRIES;
diff --git a/java/lance-namespace-unity/src/test/java/org/lance/namespace/unity/TestUnityNamespaceConfig.java b/java/lance-namespace-unity/src/test/java/org/lance/namespace/unity/TestUnityNamespaceConfig.java
new file mode 100644
index 0000000..05ebf89
--- /dev/null
+++ b/java/lance-namespace-unity/src/test/java/org/lance/namespace/unity/TestUnityNamespaceConfig.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.lance.namespace.unity;
+
+import org.junit.jupiter.api.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+public class TestUnityNamespaceConfig {
+
+ @Test
+ public void testDefaultTimeoutsUseMilliseconds() {
+ UnityNamespaceConfig config = new UnityNamespaceConfig(requiredProperties());
+
+ assertThat(config.getConnectTimeout()).isEqualTo(10000);
+ assertThat(config.getReadTimeout()).isEqualTo(60000);
+ }
+
+ @Test
+ public void testCustomTimeoutsUseMilliseconds() {
+ Map properties = requiredProperties();
+ properties.put("connect_timeout", "5000");
+ properties.put("read_timeout", "45000");
+
+ UnityNamespaceConfig config = new UnityNamespaceConfig(properties);
+
+ assertThat(config.getConnectTimeout()).isEqualTo(5000);
+ assertThat(config.getReadTimeout()).isEqualTo(45000);
+ }
+
+ private Map requiredProperties() {
+ Map properties = new HashMap<>();
+ properties.put("endpoint", "http://localhost:8080");
+ properties.put("catalog", "unity");
+ return properties;
+ }
+}
diff --git a/java/pom.xml b/java/pom.xml
index 43de8e7..1297fdb 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -58,6 +58,7 @@
UTF-8
2.0.0
+ 0.7.2
15.0.0
4.1.118.Final
5.8.2
@@ -83,6 +84,7 @@
3.2.5
+ 3.5.3
@@ -96,6 +98,11 @@
+
+ org.lance
+ lance-namespace-core
+ ${lance-namespace.version}
+
org.lance
lance-core
@@ -278,6 +285,54 @@
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ ${maven-shade-plugin.version}
+
+
+ bundle-jar
+ package
+
+ shade
+
+
+ true
+ bundle
+ false
+
+
+ org.lance:lance-core
+ org.lance:lance-namespace-core
+ org.lance:lance-namespace-apache-client
+ org.apache.arrow:*
+ org.questdb:jar-jni
+ com.google.flatbuffers:flatbuffers-java
+ org.eclipse.collections:*
+ io.netty:*
+ org.slf4j:*
+
+
+
+
+
+
+
+
+
+ *:*
+
+ LICENSE
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+
+
+
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 1ced000..363c3b6 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -12,7 +12,8 @@ requires-python = ">=3.10"
dependencies = [
"pylance>=0.26.0",
- "lance-namespace-urllib3-client>=0.4.2",
+ "lance-namespace>=0.7.2",
+ "lance-namespace-urllib3-client>=0.7.2",
"pyarrow>=15.0.0",
"typing-extensions>=4.5.0",
]
diff --git a/python/src/lance_namespace_impls/__init__.py b/python/src/lance_namespace_impls/__init__.py
index ceacc6f..ddf1dd8 100644
--- a/python/src/lance_namespace_impls/__init__.py
+++ b/python/src/lance_namespace_impls/__init__.py
@@ -18,6 +18,7 @@
- NamespaceException: Base exception for namespace operations
"""
+from lance_namespace import register_namespace_impl
from lance_namespace_impls.glue import GlueNamespace
from lance_namespace_impls.hive2 import Hive2Namespace
from lance_namespace_impls.hive3 import Hive3Namespace
@@ -36,6 +37,13 @@
InternalException,
)
+register_namespace_impl("glue", "lance_namespace_impls.glue.GlueNamespace")
+register_namespace_impl("hive2", "lance_namespace_impls.hive2.Hive2Namespace")
+register_namespace_impl("hive3", "lance_namespace_impls.hive3.Hive3Namespace")
+register_namespace_impl("iceberg", "lance_namespace_impls.iceberg.IcebergNamespace")
+register_namespace_impl("polaris", "lance_namespace_impls.polaris.PolarisNamespace")
+register_namespace_impl("unity", "lance_namespace_impls.unity.UnityNamespace")
+
__all__ = [
"GlueNamespace",
"Hive2Namespace",
diff --git a/python/src/lance_namespace_impls/glue.py b/python/src/lance_namespace_impls/glue.py
index 21c518f..dca7dda 100644
--- a/python/src/lance_namespace_impls/glue.py
+++ b/python/src/lance_namespace_impls/glue.py
@@ -14,7 +14,7 @@
Config = None
HAS_BOTO3 = False
-from lance.namespace import LanceNamespace
+from lance_namespace import LanceNamespace
from lance_namespace_urllib3_client.models import (
ListNamespacesRequest,
ListNamespacesResponse,
@@ -35,6 +35,12 @@
)
from lance_namespace_impls.rest_client import InvalidInputException
+from lance_namespace_impls.table_utils import (
+ has_storage_components,
+ include_declared,
+ is_only_declared,
+ merge_table_properties,
+)
LANCE_TABLE_TYPE = "LANCE"
TABLE_TYPE = "table_type"
@@ -328,8 +334,9 @@ def list_tables(self, request: ListTablesRequest) -> ListTablesResponse:
response = self.glue.get_tables(DatabaseName=database_name)
for table in response.get("TableList", []):
- # Only include Lance tables
- if self._is_lance_table(table):
+ if self._should_include_lance_table(
+ table, request.include_declared
+ ):
tables.append(table["Name"])
next_token = response.get("NextToken")
@@ -367,8 +374,15 @@ def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse
f"Table has no location: {database_name}.{table_name}"
)
+ properties = table.get("Parameters", {})
return DescribeTableResponse(
- location=location, storage_options=self.config.storage_options
+ location=location,
+ storage_options=self.config.storage_options,
+ properties=properties,
+ managed_versioning=False,
+ is_only_declared=is_only_declared(location, self.config.storage_options)
+ if request.check_declared
+ else None,
)
except Exception as e:
error_name = e.__class__.__name__ if hasattr(e, "__class__") else ""
@@ -407,14 +421,20 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
}
]
+ properties = merge_table_properties(
+ request.properties,
+ {
+ TABLE_TYPE: LANCE_TABLE_TYPE,
+ "managed_by": "storage",
+ "empty_table": "true",
+ },
+ )
+
# Create Glue table entry without creating actual Lance dataset
table_input = {
"Name": table_name,
"TableType": EXTERNAL_TABLE,
- "Parameters": {
- TABLE_TYPE: LANCE_TABLE_TYPE,
- "empty_table": "true", # Mark as empty table
- },
+ "Parameters": properties,
"StorageDescriptor": {
"Location": table_location,
"Columns": glue_columns,
@@ -435,7 +455,12 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
)
raise RuntimeError(f"Failed to declare table: {e}")
- return DeclareTableResponse(location=table_location)
+ return DeclareTableResponse(
+ location=table_location,
+ storage_options=self.config.storage_options,
+ properties=properties,
+ managed_versioning=False,
+ )
def deregister_table(
self, request: DeregisterTableRequest
@@ -444,9 +469,22 @@ def deregister_table(
database_name, table_name = self._parse_table_identifier(request.id)
try:
+ table = self.glue.get_table(DatabaseName=database_name, Name=table_name)[
+ "Table"
+ ]
+ if not self._is_lance_table(table):
+ raise RuntimeError(
+ f"Table is not a Lance table: {database_name}.{table_name}"
+ )
+
# Only remove from Glue catalog, don't delete the Lance dataset
self.glue.delete_table(DatabaseName=database_name, Name=table_name)
- return DeregisterTableResponse()
+ location = table.get("StorageDescriptor", {}).get("Location")
+ return DeregisterTableResponse(
+ id=request.id,
+ location=location,
+ properties=table.get("Parameters", {}),
+ )
except Exception as e:
error_name = e.__class__.__name__ if hasattr(e, "__class__") else ""
if error_name == "EntityNotFoundException":
@@ -470,6 +508,17 @@ def _is_lance_table(self, glue_table: Dict[str, Any]) -> bool:
== LANCE_TABLE_TYPE
)
+ def _should_include_lance_table(
+ self, glue_table: Dict[str, Any], include_declared_value: Optional[bool]
+ ) -> bool:
+ """Check if a Glue table is Lance and matches include_declared."""
+ if not self._is_lance_table(glue_table):
+ return False
+ if include_declared(include_declared_value):
+ return True
+ location = glue_table.get("StorageDescriptor", {}).get("Location")
+ return has_storage_components(location, self.config.storage_options)
+
def __getstate__(self):
"""Prepare instance for pickling by excluding unpickleable objects."""
state = self.__dict__.copy()
diff --git a/python/src/lance_namespace_impls/hive2.py b/python/src/lance_namespace_impls/hive2.py
index 1e7bdcc..8eeac27 100644
--- a/python/src/lance_namespace_impls/hive2.py
+++ b/python/src/lance_namespace_impls/hive2.py
@@ -62,7 +62,7 @@
InvalidOperationException = None
MetaException = None
-from lance.namespace import LanceNamespace
+from lance_namespace import LanceNamespace
from lance_namespace_urllib3_client.models import (
ListNamespacesRequest,
ListNamespacesResponse,
@@ -85,6 +85,12 @@
)
from lance_namespace_impls.rest_client import InvalidInputException
+from lance_namespace_impls.table_utils import (
+ has_storage_components,
+ include_declared,
+ is_only_declared,
+ merge_table_properties,
+)
logger = logging.getLogger(__name__)
@@ -351,8 +357,11 @@ def list_tables(self, request: ListTablesRequest) -> ListTablesResponse:
TABLE_TYPE_KEY, ""
).lower()
if table_type == LANCE_TABLE_FORMAT:
- # Return just table name, not full identifier
- tables.append(table_name)
+ location = table.sd.location if table.sd else None
+ if include_declared(
+ request.include_declared
+ ) or has_storage_components(location):
+ tables.append(table_name)
except Exception:
# Skip tables we can't read
continue
@@ -367,7 +376,7 @@ def list_tables(self, request: ListTablesRequest) -> ListTablesResponse:
def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse:
"""Describe a table in the Hive Metastore.
- Only load_detailed_metadata=false is supported. Returns location only.
+ Only load_detailed_metadata=false is supported.
"""
if request.load_detailed_metadata:
raise ValueError(
@@ -392,7 +401,14 @@ def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse
if not location:
raise ValueError(f"Table {request.id} has no location")
- return DescribeTableResponse(location=location)
+ return DescribeTableResponse(
+ location=location,
+ properties=table.parameters or {},
+ managed_versioning=False,
+ is_only_declared=is_only_declared(location)
+ if request.check_declared
+ else None,
+ )
except Exception as e:
if NoSuchObjectException and isinstance(e, NoSuchObjectException):
raise ValueError(f"Table {request.id} does not exist")
@@ -417,7 +433,10 @@ def drop_table(self, request: DropTableRequest) -> DropTableResponse:
client.drop_table(database, table_name, deleteData=True)
- return DropTableResponse(location=location)
+ return DropTableResponse(
+ location=location,
+ properties=table.parameters or {},
+ )
except Exception as e:
if NoSuchObjectException and isinstance(e, NoSuchObjectException):
raise ValueError(f"Table {request.id} does not exist")
@@ -444,7 +463,11 @@ def deregister_table(
client.drop_table(database, table_name, deleteData=False)
- return DeregisterTableResponse(location=location)
+ return DeregisterTableResponse(
+ id=request.id,
+ location=location,
+ properties=table.parameters or {},
+ )
except Exception as e:
if NoSuchObjectException and isinstance(e, NoSuchObjectException):
raise ValueError(f"Table {request.id} does not exist")
@@ -485,11 +508,14 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
)
# Set table parameters to identify it as Lance table
- parameters = {
- TABLE_TYPE_KEY: "LANCE",
- MANAGED_BY_KEY: "storage",
- "empty_table": "true", # Mark as empty table
- }
+ parameters = merge_table_properties(
+ request.properties,
+ {
+ TABLE_TYPE_KEY: LANCE_TABLE_FORMAT,
+ MANAGED_BY_KEY: "storage",
+ "empty_table": "true",
+ },
+ )
hive_table = HiveTable(
tableName=table_name,
@@ -503,7 +529,11 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
with self.client as client:
client.create_table(hive_table)
- return DeclareTableResponse(location=location)
+ return DeclareTableResponse(
+ location=location,
+ properties=parameters,
+ managed_versioning=False,
+ )
except AlreadyExistsException:
raise ValueError(f"Table {request.id} already exists")
diff --git a/python/src/lance_namespace_impls/hive3.py b/python/src/lance_namespace_impls/hive3.py
index 2680fc3..7395d71 100644
--- a/python/src/lance_namespace_impls/hive3.py
+++ b/python/src/lance_namespace_impls/hive3.py
@@ -64,7 +64,7 @@
InvalidOperationException = None
MetaException = None
-from lance.namespace import LanceNamespace
+from lance_namespace import LanceNamespace
from lance_namespace_urllib3_client.models import (
ListNamespacesRequest,
ListNamespacesResponse,
@@ -87,6 +87,12 @@
)
from lance_namespace_impls.rest_client import InvalidInputException
+from lance_namespace_impls.table_utils import (
+ has_storage_components,
+ include_declared,
+ is_only_declared,
+ merge_table_properties,
+)
logger = logging.getLogger(__name__)
@@ -433,7 +439,11 @@ def list_tables(self, request: ListTablesRequest) -> ListTablesResponse:
TABLE_TYPE_KEY, ""
).lower()
if table_type == LANCE_TABLE_FORMAT:
- tables.append(table_name)
+ location = table.sd.location if table.sd else None
+ if include_declared(
+ request.include_declared
+ ) or has_storage_components(location):
+ tables.append(table_name)
except Exception:
continue
@@ -448,7 +458,7 @@ def list_tables(self, request: ListTablesRequest) -> ListTablesResponse:
def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse:
"""Describe a table.
- Only load_detailed_metadata=false is supported. Returns location only.
+ Only load_detailed_metadata=false is supported.
"""
if request.load_detailed_metadata:
raise ValueError(
@@ -471,7 +481,14 @@ def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse
if not location:
raise ValueError(f"Table {request.id} has no location")
- return DescribeTableResponse(location=location)
+ return DescribeTableResponse(
+ location=location,
+ properties=table.parameters or {},
+ managed_versioning=False,
+ is_only_declared=is_only_declared(location)
+ if request.check_declared
+ else None,
+ )
except Exception as e:
if NoSuchObjectException and isinstance(e, NoSuchObjectException):
@@ -497,7 +514,10 @@ def drop_table(self, request: DropTableRequest) -> DropTableResponse:
client.drop_table(database, table_name, deleteData=True)
- return DropTableResponse(location=location)
+ return DropTableResponse(
+ location=location,
+ properties=table.parameters or {},
+ )
except Exception as e:
if NoSuchObjectException and isinstance(e, NoSuchObjectException):
@@ -525,7 +545,11 @@ def deregister_table(
client.drop_table(database, table_name, deleteData=False)
- return DeregisterTableResponse(location=location)
+ return DeregisterTableResponse(
+ id=request.id,
+ location=location,
+ properties=table.parameters or {},
+ )
except Exception as e:
if NoSuchObjectException and isinstance(e, NoSuchObjectException):
@@ -561,11 +585,14 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
),
)
- parameters = {
- TABLE_TYPE_KEY: LANCE_TABLE_FORMAT,
- MANAGED_BY_KEY: "storage",
- "empty_table": "true",
- }
+ parameters = merge_table_properties(
+ request.properties,
+ {
+ TABLE_TYPE_KEY: LANCE_TABLE_FORMAT,
+ MANAGED_BY_KEY: "storage",
+ "empty_table": "true",
+ },
+ )
hive_table = HiveTable(
tableName=table_name,
@@ -578,7 +605,11 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
with self.client as client:
client.create_table(hive_table)
- return DeclareTableResponse(location=location)
+ return DeclareTableResponse(
+ location=location,
+ properties=parameters,
+ managed_versioning=False,
+ )
except AlreadyExistsException:
raise ValueError(f"Table {request.id} already exists")
diff --git a/python/src/lance_namespace_impls/iceberg.py b/python/src/lance_namespace_impls/iceberg.py
index 89eaf12..08c5795 100644
--- a/python/src/lance_namespace_impls/iceberg.py
+++ b/python/src/lance_namespace_impls/iceberg.py
@@ -14,7 +14,7 @@
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
-from lance.namespace import LanceNamespace
+from lance_namespace import LanceNamespace
from lance_namespace_urllib3_client.models import (
CreateNamespaceRequest,
CreateNamespaceResponse,
@@ -44,6 +44,12 @@
TableAlreadyExistsException,
TableNotFoundException,
)
+from lance_namespace_impls.table_utils import (
+ has_storage_components,
+ include_declared,
+ is_only_declared,
+ merge_table_properties,
+)
logger = logging.getLogger(__name__)
@@ -378,8 +384,8 @@ def list_tables(self, request: ListTablesRequest) -> ListTablesResponse:
if response and "identifiers" in response:
for table_id in response["identifiers"]:
table_name = table_id.get("name")
- if table_name and self._is_lance_table(
- prefix, namespace, table_name
+ if table_name and self._should_include_lance_table(
+ prefix, namespace, table_name, request.include_declared
):
tables.append(table_name)
@@ -424,7 +430,10 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
f"{self.config.root}/{'/'.join(table_id[:-1])}/{table_name}"
)
- properties = {self.TABLE_TYPE_KEY: self.TABLE_TYPE_LANCE}
+ properties = merge_table_properties(
+ request.properties,
+ {self.TABLE_TYPE_KEY: self.TABLE_TYPE_LANCE},
+ )
create_request = {
"name": table_name,
@@ -434,13 +443,20 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
}
namespace_path = self._encode_namespace(namespace)
- self.rest_client.post(
+ response = self.rest_client.post(
f"{prefix_path}/namespaces/{namespace_path}/tables", create_request
)
logger.info(f"Declared table: {'.'.join(table_id)}")
- return DeclareTableResponse(location=table_path)
+ response_properties = (
+ response.get("metadata", {}).get("properties") if response else None
+ )
+ return DeclareTableResponse(
+ location=table_path,
+ properties=response_properties or properties,
+ managed_versioning=False,
+ )
except RestClientException as e:
if e.is_conflict():
@@ -507,7 +523,12 @@ def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse
)
return DescribeTableResponse(
- location=metadata.get("location"), storage_options=props
+ location=metadata.get("location"),
+ properties=props,
+ managed_versioning=False,
+ is_only_declared=is_only_declared(metadata.get("location"))
+ if request.check_declared
+ else None,
)
except RestClientException as e:
@@ -547,9 +568,19 @@ def deregister_table(
f"{prefix_path}/namespaces/{namespace_path}/tables/{encoded_table_name}"
)
- table_location = None
- if response and "metadata" in response:
- table_location = response["metadata"].get("location")
+ metadata = response.get("metadata") if response else None
+ if not metadata:
+ raise TableNotFoundException(f"Table not found: {'.'.join(request.id)}")
+
+ table_location = metadata.get("location")
+ properties = metadata.get("properties") or {}
+ if (
+ properties.get(self.TABLE_TYPE_KEY, "").lower()
+ != self.TABLE_TYPE_LANCE.lower()
+ ):
+ raise InvalidInputException(
+ f"Table {'.'.join(request.id)} is not a Lance table"
+ )
self.rest_client.delete(
f"{prefix_path}/namespaces/{namespace_path}/tables/{encoded_table_name}",
@@ -558,7 +589,11 @@ def deregister_table(
logger.info(f"Deregistered table: {'.'.join(table_id)}")
- return DeregisterTableResponse(location=table_location)
+ return DeregisterTableResponse(
+ id=request.id,
+ location=table_location,
+ properties=properties,
+ )
except RestClientException as e:
if e.is_not_found():
@@ -578,10 +613,14 @@ def _parse_identifier(self, identifier: List[str]) -> List[str]:
"""Parse identifier list."""
return identifier if identifier else []
- def _is_lance_table(
- self, prefix: str, namespace: List[str], table_name: str
+ def _should_include_lance_table(
+ self,
+ prefix: str,
+ namespace: List[str],
+ table_name: str,
+ include_declared_value: Optional[bool],
) -> bool:
- """Check if a table is a Lance table."""
+ """Check if a table is Lance and matches include_declared."""
try:
prefix_path = self._get_prefix_path(prefix)
namespace_path = self._encode_namespace(namespace)
@@ -592,11 +631,16 @@ def _is_lance_table(
)
if response and "metadata" in response:
- props = response["metadata"].get("properties", {})
- return (
+ metadata = response["metadata"]
+ props = metadata.get("properties", {})
+ if (
props.get(self.TABLE_TYPE_KEY, "").lower()
- == self.TABLE_TYPE_LANCE.lower()
- )
+ != self.TABLE_TYPE_LANCE.lower()
+ ):
+ return False
+ return include_declared(
+ include_declared_value
+ ) or has_storage_components(metadata.get("location"))
except Exception as e:
logger.debug(f"Failed to check if table is Lance table: {e}")
return False
diff --git a/python/src/lance_namespace_impls/polaris.py b/python/src/lance_namespace_impls/polaris.py
index 4f17c9d..666f4fc 100644
--- a/python/src/lance_namespace_impls/polaris.py
+++ b/python/src/lance_namespace_impls/polaris.py
@@ -6,7 +6,7 @@
from dataclasses import dataclass
from typing import Dict, List, Optional
-from lance.namespace import LanceNamespace
+from lance_namespace import LanceNamespace
from lance_namespace_urllib3_client.models import (
CreateNamespaceRequest,
CreateNamespaceResponse,
@@ -36,6 +36,12 @@
TableAlreadyExistsException,
TableNotFoundException,
)
+from lance_namespace_impls.table_utils import (
+ has_storage_components,
+ include_declared,
+ is_only_declared,
+ merge_table_properties,
+)
logger = logging.getLogger(__name__)
@@ -264,7 +270,12 @@ def list_tables(self, request: ListTablesRequest) -> ListTablesResponse:
if response and "identifiers" in response:
for table_id in response["identifiers"]:
table_name = table_id.get("name")
- if table_name:
+ if table_name and self._should_include_lance_table(
+ catalog,
+ namespace_path,
+ table_name,
+ request.include_declared,
+ ):
tables.append(table_name)
tables = sorted(set(tables))
@@ -302,7 +313,10 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
f"{self.config.root}/{'/'.join(table_id[:-1])}/{table_name}"
)
- properties = {self.TABLE_TYPE_KEY: self.TABLE_FORMAT_LANCE}
+ properties = merge_table_properties(
+ request.properties,
+ {self.TABLE_TYPE_KEY: self.TABLE_FORMAT_LANCE},
+ )
create_request = {
"name": table_name,
@@ -312,14 +326,21 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
}
namespace_path = ".".join(namespace)
- self.rest_client.post(
+ response = self.rest_client.post(
f"/polaris/v1/{catalog}/namespaces/{namespace_path}/generic-tables",
create_request,
)
logger.info(f"Declared table: {'.'.join(table_id)}")
- return DeclareTableResponse(location=table_path)
+ response_properties = (
+ response.get("table", {}).get("properties") if response else None
+ )
+ return DeclareTableResponse(
+ location=table_path,
+ properties=response_properties or properties,
+ managed_versioning=False,
+ )
except RestClientException as e:
if e.is_conflict():
@@ -343,7 +364,7 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse:
"""Describe a table.
- Only load_detailed_metadata=false is supported. Returns location and storage_options only.
+ Only load_detailed_metadata=false is supported.
"""
if request.load_detailed_metadata:
raise InvalidInputException(
@@ -381,7 +402,11 @@ def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse
return DescribeTableResponse(
location=table.get("base-location"),
- storage_options=table.get("properties", {}),
+ properties=table.get("properties", {}),
+ managed_versioning=False,
+ is_only_declared=is_only_declared(table.get("base-location"))
+ if request.check_declared
+ else None,
)
except RestClientException as e:
@@ -415,9 +440,17 @@ def deregister_table(
f"/polaris/v1/{catalog}/namespaces/{namespace_path}/generic-tables/{table_name}"
)
- table_location = None
- if response and "table" in response:
- table_location = response["table"].get("base-location")
+ table = response.get("table") if response else None
+ if not table:
+ raise TableNotFoundException(f"Table not found: {'.'.join(request.id)}")
+ table_format = table.get("format", "")
+ if table_format.lower() != self.TABLE_FORMAT_LANCE:
+ raise InvalidInputException(
+ f"Table {'.'.join(request.id)} is not a Lance table (format: {table_format})"
+ )
+
+ table_location = table.get("base-location")
+ properties = table.get("properties")
self.rest_client.delete(
f"/polaris/v1/{catalog}/namespaces/{namespace_path}/generic-tables/{table_name}"
@@ -425,7 +458,11 @@ def deregister_table(
logger.info(f"Deregistered table: {'.'.join(table_id)}")
- return DeregisterTableResponse(location=table_location)
+ return DeregisterTableResponse(
+ id=request.id,
+ location=table_location,
+ properties=properties,
+ )
except RestClientException as e:
if e.is_not_found():
@@ -444,3 +481,28 @@ def close(self):
def _parse_identifier(self, identifier: List[str]) -> List[str]:
"""Parse identifier list."""
return identifier if identifier else []
+
+ def _should_include_lance_table(
+ self,
+ catalog: str,
+ namespace_path: str,
+ table_name: str,
+ include_declared_value: Optional[bool],
+ ) -> bool:
+ """Check if a Polaris generic table is Lance and matches include_declared."""
+ if include_declared(include_declared_value):
+ return True
+
+ try:
+ response = self.rest_client.get(
+ f"/polaris/v1/{catalog}/namespaces/{namespace_path}/generic-tables/{table_name}"
+ )
+ if not response or "table" not in response:
+ return False
+ table = response["table"]
+ if table.get("format", "").lower() != self.TABLE_FORMAT_LANCE:
+ return False
+ return has_storage_components(table.get("base-location"))
+ except Exception as e:
+ logger.debug(f"Failed to check if table is Lance table: {e}")
+ return False
diff --git a/python/src/lance_namespace_impls/table_utils.py b/python/src/lance_namespace_impls/table_utils.py
new file mode 100644
index 0000000..2d35e26
--- /dev/null
+++ b/python/src/lance_namespace_impls/table_utils.py
@@ -0,0 +1,45 @@
+"""
+Shared table metadata helpers for Lance Namespace implementations.
+"""
+
+from typing import Dict, Optional
+
+
+def merge_table_properties(
+ properties: Optional[Dict[str, str]], required_properties: Dict[str, str]
+) -> Dict[str, str]:
+ """Merge caller-provided table properties with implementation-required markers."""
+ merged = dict(properties or {})
+ merged.update(required_properties)
+ return merged
+
+
+def include_declared(include_declared_value: Optional[bool]) -> bool:
+ """Return the 0.7.2 ListTables include_declared default."""
+ return include_declared_value is not False
+
+
+def has_storage_components(
+ location: Optional[str], storage_options: Optional[Dict[str, str]] = None
+) -> bool:
+ """Check whether a catalog table location can be opened as a Lance dataset."""
+ if not location:
+ return False
+
+ try:
+ import lance
+
+ dataset = lance.dataset(location, storage_options=storage_options or {})
+ close = getattr(dataset, "close", None)
+ if close is not None:
+ close()
+ return True
+ except Exception:
+ return False
+
+
+def is_only_declared(
+ location: Optional[str], storage_options: Optional[Dict[str, str]] = None
+) -> bool:
+ """Return true when a catalog table exists but no Lance storage can be opened."""
+ return not has_storage_components(location, storage_options)
diff --git a/python/src/lance_namespace_impls/unity.py b/python/src/lance_namespace_impls/unity.py
index 503472a..f873a83 100644
--- a/python/src/lance_namespace_impls/unity.py
+++ b/python/src/lance_namespace_impls/unity.py
@@ -10,7 +10,7 @@
import pyarrow as pa
import pyarrow.ipc as ipc
-from lance.namespace import LanceNamespace
+from lance_namespace import LanceNamespace
from lance_namespace_urllib3_client.models import (
CreateNamespaceRequest,
CreateNamespaceResponse,
@@ -40,6 +40,12 @@
TableAlreadyExistsException,
TableNotFoundException,
)
+from lance_namespace_impls.table_utils import (
+ has_storage_components,
+ include_declared,
+ is_only_declared,
+ merge_table_properties,
+)
logger = logging.getLogger(__name__)
@@ -410,7 +416,9 @@ def list_tables(self, request: ListTablesRequest) -> ListTablesResponse:
tables = []
if response and "tables" in response:
for table_data in response["tables"]:
- if self._is_lance_table(table_data):
+ if self._should_include_lance_table(
+ table_data, request.include_declared
+ ):
tables.append(table_data["name"])
tables = sorted(set(tables))
@@ -451,10 +459,13 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
)
]
- properties = {
- self.TABLE_TYPE_KEY: self.TABLE_TYPE_LANCE,
- self.MANAGED_BY_KEY: "catalog",
- }
+ properties = merge_table_properties(
+ request.properties,
+ {
+ self.TABLE_TYPE_KEY: self.TABLE_TYPE_LANCE,
+ self.MANAGED_BY_KEY: "catalog",
+ },
+ )
create_table = CreateTable(
name=table,
@@ -467,13 +478,17 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
properties=properties,
)
- self.rest_client.post(
+ table_info = self.rest_client.post(
"/tables", create_table, response_converter=_parse_table_info
)
logger.info(f"Declared table: {catalog}.{schema}.{table}")
- return DeclareTableResponse(location=table_path)
+ return DeclareTableResponse(
+ location=table_path,
+ properties=table_info.properties if table_info else properties,
+ managed_versioning=False,
+ )
except RestClientException as e:
if e.is_conflict():
@@ -517,7 +532,11 @@ def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse
return DescribeTableResponse(
location=table_info.storage_location,
- storage_options=table_info.properties,
+ properties=table_info.properties,
+ managed_versioning=False,
+ is_only_declared=is_only_declared(table_info.storage_location)
+ if request.check_declared
+ else None,
)
except RestClientException as e:
@@ -568,7 +587,11 @@ def deregister_table(
logger.info(f"Deregistered table: {full_name}")
- return DeregisterTableResponse(location=location)
+ return DeregisterTableResponse(
+ id=request.id,
+ location=location,
+ properties=table_info.properties,
+ )
except RestClientException as e:
if e.is_not_found():
@@ -596,6 +619,16 @@ def _is_lance_table(self, table_data: Dict[str, Any]) -> bool:
table_type = properties.get(self.TABLE_TYPE_KEY)
return table_type and table_type.lower() == self.TABLE_TYPE_LANCE.lower()
+ def _should_include_lance_table(
+ self, table_data: Dict[str, Any], include_declared_value: Optional[bool]
+ ) -> bool:
+ """Check if a Unity table is Lance and matches include_declared."""
+ if not self._is_lance_table(table_data):
+ return False
+ if include_declared(include_declared_value):
+ return True
+ return has_storage_components(table_data.get("storage_location"))
+
def _is_lance_table_info(self, table_info: TableInfo) -> bool:
"""Check if a TableInfo represents a Lance table."""
if not table_info or not table_info.properties:
diff --git a/python/tests/test_glue.py b/python/tests/test_glue.py
index 0893bfb..85155de 100644
--- a/python/tests/test_glue.py
+++ b/python/tests/test_glue.py
@@ -269,12 +269,23 @@ def test_list_tables(self, glue_namespace):
def test_deregister_table(self, glue_namespace):
"""Test deregistering a table (only removes from Glue, keeps Lance dataset)."""
+ glue_namespace.glue.get_table.return_value = {
+ "Table": {
+ "Name": "test_table",
+ "Parameters": {"table_type": "LANCE"},
+ "StorageDescriptor": {"Location": "s3://bucket/table.lance"},
+ }
+ }
+
request = DeregisterTableRequest(id=["test_db", "test_table"])
- glue_namespace.deregister_table(request)
+ response = glue_namespace.deregister_table(request)
glue_namespace.glue.delete_table.assert_called_once_with(
DatabaseName="test_db", Name="test_table"
)
+ assert response.id == ["test_db", "test_table"]
+ assert response.location == "s3://bucket/table.lance"
+ assert response.properties == {"table_type": "LANCE"}
def test_describe_table(self, glue_namespace):
"""Test describing a table."""
diff --git a/python/tests/test_iceberg.py b/python/tests/test_iceberg.py
index b38937d..67fe2b7 100644
--- a/python/tests/test_iceberg.py
+++ b/python/tests/test_iceberg.py
@@ -427,9 +427,8 @@ def test_describe_table(self, mock_rest_client_class):
response = namespace.describe_table(request)
self.assertEqual(response.location, "/data/lance/ns/table")
- self.assertEqual(
- response.storage_options, {"table_type": "lance", "key": "value"}
- )
+ self.assertEqual(response.properties, {"table_type": "lance", "key": "value"})
+ self.assertFalse(response.managed_versioning)
@patch("lance_namespace_impls.iceberg.RestClient")
def test_describe_table_not_lance(self, mock_rest_client_class):
@@ -482,7 +481,12 @@ def test_deregister_table(self, mock_rest_client_class):
mock_client.get.side_effect = [
{"defaults": {"prefix": "warehouse1"}},
- {"metadata": {"location": "/data/lance/ns/table"}},
+ {
+ "metadata": {
+ "location": "/data/lance/ns/table",
+ "properties": {"table_type": "lance"},
+ }
+ },
]
namespace = IcebergNamespace(**self.properties)
diff --git a/python/tests/test_namespace.py b/python/tests/test_namespace.py
index c502e34..f35cb41 100644
--- a/python/tests/test_namespace.py
+++ b/python/tests/test_namespace.py
@@ -58,6 +58,14 @@ def test_connect_non_namespace_class():
assert "does not implement LanceNamespace interface" in str(exc_info.value)
+def test_package_registers_namespace_aliases():
+ import lance_namespace_impls # noqa: F401
+
+ ns = connect("iceberg", {"endpoint": "http://localhost:8181"})
+
+ assert ns.__class__.__name__ == "IcebergNamespace"
+
+
def test_default_methods_raise_unsupported():
from lance_namespace import UnsupportedOperationError
from lance_namespace_urllib3_client.models import (
diff --git a/python/tests/test_polaris.py b/python/tests/test_polaris.py
index 3ed7130..20063bb 100644
--- a/python/tests/test_polaris.py
+++ b/python/tests/test_polaris.py
@@ -282,9 +282,11 @@ def test_list_tables(self, mock_rest_client_class):
response = namespace.list_tables(request)
self.assertEqual(sorted(response.tables), ["table1", "table2", "table3"])
- mock_client.get.assert_called_once_with(
- "/polaris/v1/test_catalog/namespaces/test_namespace/generic-tables"
+ self.assertEqual(
+ mock_client.get.call_args.args[0],
+ "/polaris/v1/test_catalog/namespaces/test_namespace/generic-tables",
)
+ mock_client.get.assert_called_once()
@patch("lance_namespace_impls.polaris.RestClient")
def test_declare_table(self, mock_rest_client_class):
@@ -374,7 +376,8 @@ def test_describe_table(self, mock_rest_client_class):
response = namespace.describe_table(request)
self.assertEqual(response.location, "/data/lance/ns/table")
- self.assertEqual(response.storage_options, {"key": "value"})
+ self.assertEqual(response.properties, {"key": "value"})
+ self.assertFalse(response.managed_versioning)
mock_client.get.assert_called_once_with(
"/polaris/v1/test_catalog/namespaces/test_namespace/generic-tables/test_table"
)
@@ -426,7 +429,10 @@ def test_deregister_table(self, mock_rest_client_class):
mock_rest_client_class.return_value = mock_client
mock_client.get.return_value = {
- "table": {"base-location": "/data/lance/ns/table"}
+ "table": {
+ "format": "lance",
+ "base-location": "/data/lance/ns/table",
+ }
}
namespace = PolarisNamespace(**self.properties)
diff --git a/python/uv.lock b/python/uv.lock
index 5d832e9..ccc7648 100644
--- a/python/uv.lock
+++ b/python/uv.lock
@@ -200,21 +200,22 @@ wheels = [
[[package]]
name = "lance-namespace"
-version = "0.4.2"
+version = "0.7.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "lance-namespace-urllib3-client" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/42/af/a77e7c9bc16ccf8a793df06bb87a559198f5b5dfb7ca03f4f32e1fe9cc15/lance_namespace-0.4.2.tar.gz", hash = "sha256:6830d0fb0f3f6dc0388ace2aa1a29f1b8e22c62f22e592a8b578c5da92980e7b", size = 9828, upload-time = "2025-12-31T08:31:02.488Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/17/04/633687a8e64383058cdf5e36c69c016006225880242804f35ec1241c82cb/lance_namespace-0.7.2.tar.gz", hash = "sha256:43d6e7be6773c4f0b4c8d36602e7245066fc0c06fa334a239a0452f00492768a", size = 10526, upload-time = "2026-04-25T00:01:40.836Z" }
wheels = [
- { url = "https://files.pythonhosted.org/packages/c3/9a/13242c23f932d8a61288e7c3e5bf5929cd509947535175d0f1ff188c6562/lance_namespace-0.4.2-py3-none-any.whl", hash = "sha256:ad0705dc0fdf37494cccc7163272472b773ad08572b146173114167187e5825e", size = 11702, upload-time = "2025-12-31T08:31:05.309Z" },
+ { url = "https://files.pythonhosted.org/packages/d1/4d/6fae405ec2503e14afdf0490cbdb8314db702a804189a5d01f46e5b00b63/lance_namespace-0.7.2-py3-none-any.whl", hash = "sha256:c7f35b99f79cd7c08ebcda3c06fe4d1acdb4db72458cb9e211971c46964db9e1", size = 12356, upload-time = "2026-04-25T00:01:41.558Z" },
]
[[package]]
name = "lance-namespace-impls"
-version = "0.1.0"
+version = "0.2.0"
source = { editable = "." }
dependencies = [
+ { name = "lance-namespace" },
{ name = "lance-namespace-urllib3-client" },
{ name = "pyarrow" },
{ name = "pylance" },
@@ -255,7 +256,8 @@ requires-dist = [
{ name = "hive-metastore-client", marker = "extra == 'all'", specifier = ">=1.0.0" },
{ name = "hive-metastore-client", marker = "extra == 'hive2'", specifier = ">=1.0.0" },
{ name = "hive-metastore-client", marker = "extra == 'hive3'", specifier = ">=1.0.0" },
- { name = "lance-namespace-urllib3-client", specifier = ">=0.4.2" },
+ { name = "lance-namespace", specifier = ">=0.7.2" },
+ { name = "lance-namespace-urllib3-client", specifier = ">=0.7.2" },
{ name = "pyarrow", specifier = ">=15.0.0" },
{ name = "pylance", specifier = ">=0.26.0" },
{ name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0.0" },
@@ -270,7 +272,7 @@ provides-extras = ["glue", "hive2", "hive3", "iceberg", "polaris", "unity", "all
[[package]]
name = "lance-namespace-urllib3-client"
-version = "0.4.2"
+version = "0.7.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pydantic" },
@@ -278,9 +280,9 @@ dependencies = [
{ name = "typing-extensions" },
{ name = "urllib3" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/d0/33/3f533d87b8ad0867181a86cb17517cabed277d6816ca66a676dd98076064/lance_namespace_urllib3_client-0.4.2.tar.gz", hash = "sha256:294bfd2579f640053486008a77c2b7d43b8bf9614217941eda51b6f1c0f42f28", size = 155837, upload-time = "2025-12-31T08:31:04.605Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/f6/d7/c7d9ae1a2815e3c846fdd6e49f5882d60ffd76a5ddccd34af5fe8ac8124e/lance_namespace_urllib3_client-0.7.2.tar.gz", hash = "sha256:853dcd7affb14fd6ae3039748d1fff4906d51ec41026e43f787ee56f339e18f6", size = 184709, upload-time = "2026-04-25T00:01:42.301Z" }
wheels = [
- { url = "https://files.pythonhosted.org/packages/c2/79/c67652374a99c14e751fc4c16592b3d20c2f855ffc52c77132b3b48e356c/lance_namespace_urllib3_client-0.4.2-py3-none-any.whl", hash = "sha256:da885fd62b37af8653dba7ed22322bfd0a92a60ce78214eebca76783041af668", size = 262107, upload-time = "2025-12-31T08:31:03.431Z" },
+ { url = "https://files.pythonhosted.org/packages/2f/eb/941780e022d9f7c01256eb5d6a1cc1d097a80b188795212d504723ba8fc8/lance_namespace_urllib3_client-0.7.2-py3-none-any.whl", hash = "sha256:0d0316f1a7d6da61c1f17b8f3dfb3643cf882d67712eaa709619c17b2cd9c880", size = 314775, upload-time = "2026-04-25T00:01:39.679Z" },
]
[[package]]