From c3d9cccd27dc1339bfa0a53f30032114cceaa5a5 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Mon, 27 Apr 2026 16:28:29 -0700 Subject: [PATCH 1/5] feat: upgrade lance namespace to 0.7.2 --- docs/src/glue.md | 16 ++-- docs/src/hive2.md | 14 ++-- docs/src/hive3.md | 14 ++-- docs/src/iceberg.md | 15 ++-- docs/src/polaris.md | 15 ++-- docs/src/unity.md | 13 +-- java/lance-namespace-glue/pom.xml | 6 ++ .../lance/namespace/glue/GlueNamespace.java | 31 ++++++- java/lance-namespace-hive2/pom.xml | 3 +- .../lance/namespace/hive2/Hive2Namespace.java | 43 ++++++++-- .../org/lance/namespace/hive2/Hive2Util.java | 2 +- java/lance-namespace-hive3/pom.xml | 3 +- .../lance/namespace/hive3/Hive3Namespace.java | 44 ++++++++-- .../org/lance/namespace/hive3/Hive3Util.java | 1 + .../namespace/iceberg/IcebergNamespace.java | 45 ++++++++-- .../lance/namespace/util/LanceTableUtil.java | 61 ++++++++++++++ .../namespace/polaris/PolarisNamespace.java | 73 ++++++++++++++-- .../polaris/TestPolarisNamespace.java | 9 ++ .../lance/namespace/unity/UnityNamespace.java | 30 +++++-- java/pom.xml | 6 ++ python/pyproject.toml | 3 +- python/src/lance_namespace_impls/__init__.py | 8 ++ python/src/lance_namespace_impls/glue.py | 69 ++++++++++++--- python/src/lance_namespace_impls/hive2.py | 56 ++++++++++--- python/src/lance_namespace_impls/hive3.py | 55 +++++++++--- python/src/lance_namespace_impls/iceberg.py | 80 ++++++++++++++---- python/src/lance_namespace_impls/polaris.py | 84 ++++++++++++++++--- .../src/lance_namespace_impls/table_utils.py | 45 ++++++++++ python/src/lance_namespace_impls/unity.py | 53 +++++++++--- python/tests/test_glue.py | 13 ++- python/tests/test_iceberg.py | 12 ++- python/tests/test_namespace.py | 8 ++ python/tests/test_polaris.py | 14 +++- python/uv.lock | 18 ++-- 34 files changed, 790 insertions(+), 172 deletions(-) create mode 100644 java/lance-namespace-impls-core/src/main/java/org/lance/namespace/util/LanceTableUtil.java create mode 100644 python/src/lance_namespace_impls/table_utils.py diff --git a/docs/src/glue.md b/docs/src/glue.md index aee4da6..eae1fe9 100644 --- a/docs/src/glue.md +++ b/docs/src/glue.md @@ -78,7 +78,7 @@ The **table location** is stored in the [`StorageDescriptor.Location`](https://d ## Lance Table Identification -A table in AWS Glue is identified as a Lance table when it meets the following criteria: the `TableType` is `EXTERNAL_TABLE`, and the `Parameters` map contains a key `table_type` with value `lance` (case insensitive). The `StorageDescriptor.Location` must point to a valid Lance table root directory. +A table in AWS Glue is identified as a Lance table when it meets the following criteria: the `TableType` is `EXTERNAL_TABLE`, and the `Parameters` map contains a key `table_type` with value `lance` (case insensitive). The `StorageDescriptor.Location` may be declared before a Lance dataset exists; storage is checked only for `include_declared=false` listing or `check_declared=true` describe requests. ## Basic Operations @@ -191,9 +191,10 @@ The implementation: - `DatabaseName`: the database name - `TableInput.Name`: the table name - `TableInput.TableType`: `EXTERNAL_TABLE` - - `TableInput.Parameters`: include `table_type=lance` and other properties + - `TableInput.Parameters`: request `properties` merged with implementation markers such as `table_type=lance` - `TableInput.StorageDescriptor.Location`: the specified table location 4. POST the CreateTable request to Glue +5. Return the declared table location, catalog table properties, optional storage options, and `managed_versioning=false` **Error Handling:** @@ -215,7 +216,8 @@ The implementation: 2. Verify the namespace exists using [GetDatabase](https://docs.aws.amazon.com/glue/latest/webapi/API_GetDatabase.html) 3. Use [GetTables](https://docs.aws.amazon.com/glue/latest/webapi/API_GetTables.html) with `CatalogId` and `DatabaseName` 4. Filter tables where `Parameters.table_type=lance` (case insensitive) -5. Sort the results and apply pagination using `NextToken` +5. If `include_declared=false`, only include catalog entries whose `StorageDescriptor.Location` can be opened as a Lance dataset +6. Sort the results and apply pagination using `NextToken` **Error Handling:** @@ -227,14 +229,15 @@ If the Glue service is unavailable, return error code `17` (ServiceUnavailable). ### DescribeTable -Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. When `load_detailed_metadata=false`, only the table location and storage_options are returned; other fields (version, table_uri, schema, stats) are null. +Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. The response includes the table location, catalog table properties, `managed_versioning=false`, and any implementation storage options that should be returned to the caller. The implementation: 1. Parse the table identifier to extract catalog, database, and table name 2. Use [GetTable](https://docs.aws.amazon.com/glue/latest/webapi/API_GetTable.html) with `CatalogId`, `DatabaseName`, and `Name` 3. Validate that the table is a Lance table (check `Parameters.table_type=lance`) -4. Return the table location from `StorageDescriptor.Location` and storage_options from `Parameters` +4. Return the table location from `StorageDescriptor.Location` and catalog properties from `Parameters` +5. If `check_declared=true`, set `is_only_declared=true` when the location cannot be opened as a Lance dataset **Error Handling:** @@ -255,7 +258,8 @@ The implementation: 1. Parse the table identifier to extract catalog, database, and table name 2. Use [GetTable](https://docs.aws.amazon.com/glue/latest/webapi/API_GetTable.html) to retrieve and validate the table is a Lance table 3. Use [DeleteTable](https://docs.aws.amazon.com/glue/latest/webapi/API_DeleteTable.html) with `CatalogId`, `DatabaseName`, and `Name` -4. The underlying Lance table data at `StorageDescriptor.Location` is not deleted +4. Return the table id, location, and catalog properties +5. The underlying Lance table data at `StorageDescriptor.Location` is not deleted **Error Handling:** diff --git a/docs/src/hive2.md b/docs/src/hive2.md index 5d21282..8deda59 100644 --- a/docs/src/hive2.md +++ b/docs/src/hive2.md @@ -38,7 +38,7 @@ The **table location** is stored in the `location` field of the table's `storage ## Lance Table Identification -A table in HMS is identified as a Lance table when it meets the following criteria: the `tableType` is `EXTERNAL_TABLE`, and the `parameters` map contains a key `table_type` with value `lance` (case insensitive). The `location` in `storageDescriptor` must point to a valid Lance table root directory. +A table in HMS is identified as a Lance table when it meets the following criteria: the `tableType` is `EXTERNAL_TABLE`, and the `parameters` map contains a key `table_type` with value `lance` (case insensitive). The `location` in `storageDescriptor` may be declared before a Lance dataset exists; storage is checked only for `include_declared=false` listing or `check_declared=true` describe requests. ## Basic Operations @@ -116,8 +116,9 @@ The implementation: 2. Verify the parent namespace exists 3. Create an HMS Table object with `tableType=EXTERNAL_TABLE` 4. Set the storage descriptor with the specified or default location. When location is not specified, it defaults to `{root}/{database}.db/{table}` -5. Add `table_type=lance` to the table parameters +5. Merge request `properties` with required table parameters such as `table_type=lance` and `managed_by=storage` 6. Register the table in HMS +7. Return the declared table location, table parameters, and `managed_versioning=false` **Error Handling:** @@ -137,7 +138,8 @@ The implementation: 2. Verify the namespace exists 3. Retrieve all tables in the database 4. Filter tables where `parameters.table_type=lance` -5. Sort the results and apply pagination +5. If `include_declared=false`, only include catalog entries whose storage descriptor location can be opened as a Lance dataset +6. Sort the results and apply pagination **Error Handling:** @@ -147,14 +149,15 @@ If the HMS connection fails, return error code `17` (ServiceUnavailable). ### DescribeTable -Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. When `load_detailed_metadata=false`, only the table location is returned; other fields (version, table_uri, schema, stats) are null. +Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. The response includes the table location, HMS table parameters as `properties`, and `managed_versioning=false`. The implementation: 1. Parse the table identifier 2. Retrieve the Table object from HMS 3. Validate that it is a Lance table (check `table_type=lance`) -4. Return the table location from `storageDescriptor.location` +4. Return the table location from `storageDescriptor.location` and the table parameters as `properties` +5. If `check_declared=true`, set `is_only_declared=true` when the location cannot be opened as a Lance dataset **Error Handling:** @@ -191,6 +194,7 @@ The implementation: 1. Parse the table identifier 2. Retrieve the Table object and validate it is a Lance table 3. Drop the table from HMS with `deleteData=false` +4. Return the table id, location, and table parameters **Error Handling:** diff --git a/docs/src/hive3.md b/docs/src/hive3.md index 94ad14e..aa553a6 100644 --- a/docs/src/hive3.md +++ b/docs/src/hive3.md @@ -38,7 +38,7 @@ The **table location** is stored in the [`location`](https://github.com/apache/h ## Lance Table Identification -A table in HMS is identified as a Lance table when it meets the following criteria: the `tableType` is `EXTERNAL_TABLE`, and the `parameters` map contains a key `table_type` with value `lance` (case insensitive). The `location` in `storageDescriptor` must point to a valid Lance table root directory. +A table in HMS is identified as a Lance table when it meets the following criteria: the `tableType` is `EXTERNAL_TABLE`, and the `parameters` map contains a key `table_type` with value `lance` (case insensitive). The `location` in `storageDescriptor` may be declared before a Lance dataset exists; storage is checked only for `include_declared=false` listing or `check_declared=true` describe requests. ## Basic Operations @@ -123,8 +123,9 @@ The implementation: 2. Verify the parent namespace exists 3. Create an HMS Table object with `tableType=EXTERNAL_TABLE` 4. Set the storage descriptor with the specified or default location. When location is not specified, it defaults to `{root}/{database}.db/{table}` for the default `hive` catalog (hive2-compatible), or `{root}/{catalog}/{database}.db/{table}` for other catalogs -5. Add `table_type=lance` to the table parameters +5. Merge request `properties` with required table parameters such as `table_type=lance` and `managed_by=storage` 6. Register the table in HMS +7. Return the declared table location, table parameters, and `managed_versioning=false` **Error Handling:** @@ -144,7 +145,8 @@ The implementation: 2. Verify the namespace exists 3. Retrieve all tables in the database 4. Filter tables where `parameters.table_type=lance` -5. Sort the results and apply pagination +5. If `include_declared=false`, only include catalog entries whose storage descriptor location can be opened as a Lance dataset +6. Sort the results and apply pagination **Error Handling:** @@ -154,14 +156,15 @@ If the HMS connection fails, return error code `17` (ServiceUnavailable). ### DescribeTable -Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. When `load_detailed_metadata=false`, only the table location is returned; other fields (version, table_uri, schema, stats) are null. +Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. The response includes the table location, HMS table parameters as `properties`, and `managed_versioning=false`. The implementation: 1. Parse the table identifier 2. Retrieve the Table object from HMS 3. Validate that it is a Lance table (check `table_type=lance`) -4. Return the table location from `storageDescriptor.location` +4. Return the table location from `storageDescriptor.location` and the table parameters as `properties` +5. If `check_declared=true`, set `is_only_declared=true` when the location cannot be opened as a Lance dataset **Error Handling:** @@ -198,6 +201,7 @@ The implementation: 1. Parse the table identifier 2. Retrieve the Table object and validate it is a Lance table 3. Drop the table from HMS with `deleteData=false` +4. Return the table id, location, and table parameters **Error Handling:** diff --git a/docs/src/iceberg.md b/docs/src/iceberg.md index 54904d1..8b5697b 100644 --- a/docs/src/iceberg.md +++ b/docs/src/iceberg.md @@ -50,7 +50,7 @@ The **table location** is stored in the `location` field of the Iceberg table me ## Lance Table Identification -A table in Iceberg REST Catalog is identified as a Lance table when the `properties` map contains a key `table_type` with value `lance` (case insensitive). The `location` must point to a valid Lance table root directory. The Iceberg table itself serves as a metadata wrapper, with the actual data stored in Lance format. +A table in Iceberg REST Catalog is identified as a Lance table when the `properties` map contains a key `table_type` with value `lance` (case insensitive). The `location` may be declared before a Lance dataset exists. The Iceberg table itself serves as a metadata wrapper, with the actual data stored in Lance format once the table is materialized. ## Basic Operations @@ -146,7 +146,7 @@ The implementation: - `schema`: a dummy Iceberg schema with a single nullable string column `dummy` - `properties`: table properties including `table_type=lance` 6. POST to `/v1/{prefix}/namespaces/{namespace}/tables` -7. Return the declared table location +7. Return the declared table location, catalog table properties, and `managed_versioning=false` **Error Handling:** @@ -167,7 +167,8 @@ The implementation: 3. Extract the namespace path from the remaining elements 4. GET `/v1/{prefix}/namespaces/{namespace}/tables` 5. For each table, load its metadata and filter tables where `properties.table_type=lance` -6. Extract table names from the response identifiers +6. If `include_declared=false`, only include catalog entries whose Iceberg metadata location can be opened as a Lance dataset +7. Extract table names from the response identifiers **Error Handling:** @@ -177,7 +178,7 @@ If the server returns an error, return error code `18` (Internal). ### DescribeTable -Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. When `load_detailed_metadata=false`, only the table location and storage_options are returned; other fields (version, table_uri, schema, stats) are null. +Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. The response includes the table location, Iceberg table properties as `properties`, and `managed_versioning=false`. The implementation: @@ -187,7 +188,8 @@ The implementation: 4. Extract the table name from the last element 5. GET `/v1/{prefix}/namespaces/{namespace}/tables/{table}` 6. Verify the table has `table_type=lance` property -7. Return the table location and storage_options from `properties` +7. Return the table location and Iceberg table properties +8. If `check_declared=true`, set `is_only_declared=true` when the location cannot be opened as a Lance dataset **Error Handling:** @@ -207,7 +209,8 @@ The implementation: 2. Resolve the API prefix from the warehouse config cache 3. Extract the namespace path from the middle elements 4. Extract the table name from the last element -5. DELETE `/v1/{prefix}/namespaces/{namespace}/tables/{table}?purgeRequested=false` +5. Load the table metadata, then DELETE `/v1/{prefix}/namespaces/{namespace}/tables/{table}?purgeRequested=false` +6. Return the table id, location, and Iceberg table properties **Error Handling:** diff --git a/docs/src/polaris.md b/docs/src/polaris.md index 9c7bd3f..2f4951c 100644 --- a/docs/src/polaris.md +++ b/docs/src/polaris.md @@ -48,7 +48,7 @@ The **table location** is stored in the `base-location` field of the Generic Tab ## Lance Table Identification -A table in Polaris is identified as a Lance table when it is a Generic Table with `format` set to `lance`. The `base-location` must point to a valid Lance table root directory. The table `properties` should contain `table_type=lance` for consistency with other catalog implementations. +A table in Polaris is identified as a Lance table when it is a Generic Table with `format` set to `lance`. The `base-location` may be declared before a Lance dataset exists. The table `properties` should contain `table_type=lance` for consistency with other catalog implementations. ## Basic Operations @@ -140,7 +140,7 @@ The implementation: - `doc`: optional description from properties - `properties`: table properties including `table_type=lance` 4. POST to `/api/catalog/polaris/v1/{catalog}/namespaces/{namespace}/generic-tables` -5. Return the created table location and properties +5. Return the created table location, table properties, and `managed_versioning=false` **Error Handling:** @@ -159,7 +159,8 @@ The implementation: 1. Parse the namespace identifier to extract the catalog (first level) and namespace path 2. Validate that at least 2 levels are provided (catalog + namespace) 3. GET `/api/catalog/polaris/v1/{catalog}/namespaces/{namespace}/generic-tables` -4. Extract table names from the response identifiers +4. When `include_declared=true` or unset, extract table names from the response identifiers +5. When `include_declared=false`, load each generic table and only include entries whose `base-location` can be opened as a Lance dataset **Error Handling:** @@ -169,7 +170,7 @@ If the server returns an error, return error code `18` (Internal). ### DescribeTable -Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. When `load_detailed_metadata=false`, only the table location and storage_options are returned; other fields (version, table_uri, schema, stats) are null. +Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. The response includes the table location, Polaris table properties as `properties`, and `managed_versioning=false`. The implementation: @@ -177,7 +178,8 @@ The implementation: 2. Validate that at least 3 levels are provided (catalog + namespace + table) 3. GET `/api/catalog/polaris/v1/{catalog}/namespaces/{namespace}/generic-tables/{table}` 4. Verify the table format is `lance` -5. Return the table location from `base-location` and storage_options from `properties` +5. Return the table location from `base-location` and Polaris table properties +6. If `check_declared=true`, set `is_only_declared=true` when the location cannot be opened as a Lance dataset **Error Handling:** @@ -195,7 +197,8 @@ The implementation: 1. Parse the table identifier to extract catalog (first level), namespace (middle levels), and table name (last level) 2. Validate that at least 3 levels are provided (catalog + namespace + table) -3. DELETE `/api/catalog/polaris/v1/{catalog}/namespaces/{namespace}/generic-tables/{table}` +3. Load the generic table, then DELETE `/api/catalog/polaris/v1/{catalog}/namespaces/{namespace}/generic-tables/{table}` +4. Return the table id, location, and Polaris table properties **Error Handling:** diff --git a/docs/src/unity.md b/docs/src/unity.md index a8199dd..39bab4e 100644 --- a/docs/src/unity.md +++ b/docs/src/unity.md @@ -46,7 +46,7 @@ The **table location** is stored in the `storage_location` field of the Unity Ta ## Lance Table Identification -A table in Unity Catalog is identified as a Lance table when it meets the following criteria: the `table_type` is `EXTERNAL`, and the `properties` map contains a key `table_type` with value `lance` (case insensitive). The `storage_location` must point to a valid Lance table root directory. +A table in Unity Catalog is identified as a Lance table when it meets the following criteria: the `table_type` is `EXTERNAL`, and the `properties` map contains a key `table_type` with value `lance` (case insensitive). The `storage_location` may be declared before a Lance dataset exists; storage is checked only for `include_declared=false` listing or `check_declared=true` describe requests. Note: Unity Catalog does not natively recognize the `LANCE` data source format, so `data_source_format` is set to `TEXT` as a generic format for external tables. The actual format is determined by the `table_type=lance` property. @@ -135,7 +135,7 @@ The implementation: - `storage_location`: the specified or default location - `properties`: including `table_type=lance` 3. POST to `/tables` endpoint -4. Return the created table location and properties +4. Return the created table location, table properties, and `managed_versioning=false` **Error Handling:** @@ -154,7 +154,8 @@ The implementation: 1. Parse the namespace identifier (must be 2-level: catalog.schema) 2. GET `/tables` with catalog_name and schema_name parameters 3. Filter tables where `properties.table_type=lance` -4. Sort the results +4. If `include_declared=false`, only include catalog entries whose `storage_location` can be opened as a Lance dataset +5. Sort the results **Error Handling:** @@ -164,14 +165,15 @@ If the server returns an error, return error code `18` (Internal). ### DescribeTable -Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. When `load_detailed_metadata=false`, only the table location and storage_options are returned; other fields (version, table_uri, schema, stats) are null. +Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. The response includes the table location, Unity table properties as `properties`, and `managed_versioning=false`. The implementation: 1. Parse the table identifier (must be 3-level: catalog.schema.table) 2. GET `/tables/{catalog}.{schema}.{table}` 3. Verify the table is a Lance table (check `properties.table_type=lance`) -4. Return the table location from `storage_location` and storage_options from `properties` +4. Return the table location from `storage_location` and Unity table properties +5. If `check_declared=true`, set `is_only_declared=true` when the location cannot be opened as a Lance dataset **Error Handling:** @@ -190,6 +192,7 @@ The implementation: 1. Parse the table identifier (must be 3-level: catalog.schema.table) 2. GET the table and verify it is a Lance table 3. DELETE `/tables/{catalog}.{schema}.{table}` +4. Return the table id, location, and Unity table properties **Error Handling:** diff --git a/java/lance-namespace-glue/pom.xml b/java/lance-namespace-glue/pom.xml index 04cd34d..22cbfbb 100644 --- a/java/lance-namespace-glue/pom.xml +++ b/java/lance-namespace-glue/pom.xml @@ -22,6 +22,12 @@ + + org.lance + lance-namespace-impls-core + ${project.version} + + software.amazon.awssdk glue diff --git a/java/lance-namespace-glue/src/main/java/org/lance/namespace/glue/GlueNamespace.java b/java/lance-namespace-glue/src/main/java/org/lance/namespace/glue/GlueNamespace.java index 82af17e..bb3f7b5 100644 --- a/java/lance-namespace-glue/src/main/java/org/lance/namespace/glue/GlueNamespace.java +++ b/java/lance-namespace-glue/src/main/java/org/lance/namespace/glue/GlueNamespace.java @@ -36,6 +36,7 @@ import org.lance.namespace.model.ListNamespacesResponse; import org.lance.namespace.model.ListTablesRequest; import org.lance.namespace.model.ListTablesResponse; +import org.lance.namespace.util.LanceTableUtil; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableMap; @@ -221,7 +222,10 @@ public ListTablesResponse listTables(ListTablesRequest request) { GetTablesResponse response = glueClient.getTables( listRequest.maxResults(fetchSize).nextToken(glueNextToken).build()); - response.tableList().stream().filter(this::isLanceTable).forEach(t -> tables.add(t.name())); + response.tableList().stream() + .filter(this::isLanceTable) + .filter(t -> shouldIncludeTable(t, request.getIncludeDeclared())) + .forEach(t -> tables.add(t.name())); glueNextToken = response.nextToken(); remaining = pageSize - tables.size(); } while (glueNextToken != null && remaining > 0); @@ -253,6 +257,12 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) { response.setLocation(table.storageDescriptor().location()); } response.setStorageOptions(config.getStorageOptions()); + response.setProperties(table.parameters()); + response.setManagedVersioning(false); + if (Boolean.TRUE.equals(request.getCheckDeclared())) { + response.setIsOnlyDeclared( + LanceTableUtil.isOnlyDeclared(response.getLocation(), config.getStorageOptions())); + } return response; } @@ -298,9 +308,11 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) { } try { - Map params = Maps.newHashMap(); - params.put(TABLE_TYPE_PROP, LANCE_TABLE_TYPE_VALUE); - params.put(MANAGED_BY_PROP, STORAGE_VALUE); + Map params = + LanceTableUtil.mergeTableProperties( + request.getProperties(), + ImmutableMap.of( + TABLE_TYPE_PROP, LANCE_TABLE_TYPE_VALUE, MANAGED_BY_PROP, STORAGE_VALUE)); TableInput tableInput = TableInput.builder() @@ -320,6 +332,8 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) { DeclareTableResponse response = new DeclareTableResponse(); response.setLocation(location); response.setStorageOptions(config.getStorageOptions()); + response.setProperties(params); + response.setManagedVersioning(false); return response; } catch (AlreadyExistsException e) { throw GlueToLanceErrorConverter.tableConflict( @@ -522,6 +536,15 @@ private boolean isLanceTable(Table table) { return LANCE_TABLE_TYPE_VALUE.equalsIgnoreCase(table.parameters().get(TABLE_TYPE_PROP)); } + private boolean shouldIncludeTable(Table table, Boolean includeDeclared) { + if (LanceTableUtil.includeDeclared(includeDeclared)) { + return true; + } + String location = + table.storageDescriptor() != null ? table.storageDescriptor().location() : null; + return LanceTableUtil.hasStorageComponents(location, config.getStorageOptions()); + } + private void ensureLanceTable(Table table) { if (!isLanceTable(table)) { throw new TableNotFoundException( diff --git a/java/lance-namespace-hive2/pom.xml b/java/lance-namespace-hive2/pom.xml index be0e29d..ca717fe 100644 --- a/java/lance-namespace-hive2/pom.xml +++ b/java/lance-namespace-hive2/pom.xml @@ -207,7 +207,6 @@ org.lance lance-namespace-impls-core ${project.version} - test org.junit.jupiter @@ -222,4 +221,4 @@ test - \ No newline at end of file + diff --git a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Namespace.java b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Namespace.java index 6f19192..f61ef01 100644 --- a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Namespace.java +++ b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Namespace.java @@ -42,6 +42,7 @@ import org.lance.namespace.model.ListTablesResponse; import org.lance.namespace.model.NamespaceExistsRequest; import org.lance.namespace.model.TableExistsRequest; +import org.lance.namespace.util.LanceTableUtil; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -231,7 +232,7 @@ public ListTablesResponse listTables(ListTablesRequest request) { !nsId.isRoot() && nsId.levels() == 1, "Expect a 1-level namespace but get %s", nsId); String db = nsId.levelAtListPos(0).toLowerCase(); - List tables = doListTables(db); + List tables = doListTables(db, request.getIncludeDeclared()); Collections.sort(tables); PageUtil.Page page = @@ -278,17 +279,33 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) { ValidationUtil.checkArgument( tableId.levels() == 2, "Expect 2-level table identifier but get %s", tableId); - Optional location = doDescribeTable(tableId); + String db = tableId.levelAtListPos(0).toLowerCase(); + String table = tableId.levelAtListPos(1).toLowerCase(); + Optional hmsTable = Hive2Util.getTable(clientPool, db, table); - if (!location.isPresent()) { + if (!hmsTable.isPresent()) { throw new TableNotFoundException( String.format("Table does not exist: %s", tableId.stringStyleId()), TableNotFound.getType(), tableId.stringStyleId()); } + Hive2Util.validateLanceTable(hmsTable.get()); + String location = hmsTable.get().getSd() != null ? hmsTable.get().getSd().getLocation() : null; + if (location == null || location.isEmpty()) { + throw new TableNotFoundException( + String.format("Table does not have a location: %s", tableId.stringStyleId()), + TableNotFound.getType(), + tableId.stringStyleId()); + } + DescribeTableResponse response = new DescribeTableResponse(); - response.setLocation(location.get()); + response.setLocation(location); + response.setProperties(hmsTable.get().getParameters()); + response.setManagedVersioning(false); + if (Boolean.TRUE.equals(request.getCheckDeclared())) { + response.setIsOnlyDeclared(LanceTableUtil.isOnlyDeclared(location, Collections.emptyMap())); + } return response; } @@ -306,11 +323,15 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) { location = getDefaultTableLocation(tableId.levelAtListPos(0), tableId.levelAtListPos(1)); } + Map properties = Hive2Util.createLanceTableParams(request.getProperties()); + // Create table in metastore without data (pass null for requestData) - doCreateTable(tableId, null, location, null, null); + doCreateTable(tableId, null, location, properties, null); DeclareTableResponse response = new DeclareTableResponse(); response.setLocation(location); + response.setProperties(properties); + response.setManagedVersioning(false); return response; } @@ -504,7 +525,7 @@ protected void doCreateTable( } } - protected List doListTables(String db) { + protected List doListTables(String db, Boolean includeDeclared) { try { // First validate that database exists Database database = Hive2Util.getDatabaseOrNull(clientPool, db); @@ -521,7 +542,13 @@ protected List doListTables(String db) { Optional
table = Hive2Util.getTable(clientPool, db, tableName); if (table.isPresent()) { Map params = table.get().getParameters(); - if (params != null && "lance".equalsIgnoreCase(params.get("table_type"))) { + boolean isLanceTable = + params != null && "lance".equalsIgnoreCase(params.get("table_type")); + if (isLanceTable + && (LanceTableUtil.includeDeclared(includeDeclared) + || LanceTableUtil.hasStorageComponents( + table.get().getSd() != null ? table.get().getSd().getLocation() : null, + Collections.emptyMap()))) { lanceTables.add(tableName); } } @@ -592,7 +619,7 @@ protected Map doDropNamespace(ObjectIdentifier id, String mode, // Check if database contains tables (RESTRICT behavior only, not for Cascade) boolean cascade = "Cascade".equalsIgnoreCase(behavior); if (!cascade) { - List tables = doListTables(db); + List tables = doListTables(db, true); if (!tables.isEmpty()) { throw new InvalidInputException( String.format( diff --git a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Util.java b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Util.java index 23c5374..3d82220 100644 --- a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Util.java +++ b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Util.java @@ -123,7 +123,7 @@ public static Map createLanceTableParams(Map pro params.putAll(properties); } params.put("table_type", "lance"); - params.putIfAbsent("managed_by", "storage"); + params.put("managed_by", "storage"); return params; } } diff --git a/java/lance-namespace-hive3/pom.xml b/java/lance-namespace-hive3/pom.xml index 1523146..95627d2 100644 --- a/java/lance-namespace-hive3/pom.xml +++ b/java/lance-namespace-hive3/pom.xml @@ -122,7 +122,6 @@ org.lance lance-namespace-impls-core ${project.version} - test org.junit.jupiter @@ -135,4 +134,4 @@ test - \ No newline at end of file + diff --git a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Namespace.java b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Namespace.java index 15f02bb..3a69698 100644 --- a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Namespace.java +++ b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Namespace.java @@ -43,6 +43,7 @@ import org.lance.namespace.model.ListTablesResponse; import org.lance.namespace.model.NamespaceExistsRequest; import org.lance.namespace.model.TableExistsRequest; +import org.lance.namespace.util.LanceTableUtil; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -270,7 +271,7 @@ public ListTablesResponse listTables(ListTablesRequest request) { String catalog = nsId.levelAtListPos(0).toLowerCase(); String db = nsId.levelAtListPos(1).toLowerCase(); - List tables = doListTables(catalog, db); + List tables = doListTables(catalog, db, request.getIncludeDeclared()); Collections.sort(tables); PageUtil.Page page = @@ -295,15 +296,30 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) { ValidationUtil.checkArgument( tableId.levels() == 3, "Expect 3-level table identifier but get %s", tableId); - Optional location = doDescribeTable(tableId); + String catalog = tableId.levelAtListPos(0).toLowerCase(); + String db = tableId.levelAtListPos(1).toLowerCase(); + String table = tableId.levelAtListPos(2).toLowerCase(); + Optional
hmsTable = Hive3Util.getTable(clientPool, catalog, db, table); - if (!location.isPresent()) { + if (!hmsTable.isPresent()) { throw new TableNotFoundException( String.format("Table does not exist: %s", tableId.stringStyleId())); } + Hive3Util.validateLanceTable(hmsTable.get()); + String location = hmsTable.get().getSd() != null ? hmsTable.get().getSd().getLocation() : null; + if (location == null || location.isEmpty()) { + throw new TableNotFoundException( + String.format("Table does not have a location: %s", tableId.stringStyleId())); + } + DescribeTableResponse response = new DescribeTableResponse(); - response.setLocation(location.get()); + response.setLocation(location); + response.setProperties(hmsTable.get().getParameters()); + response.setManagedVersioning(false); + if (Boolean.TRUE.equals(request.getCheckDeclared())) { + response.setIsOnlyDeclared(LanceTableUtil.isOnlyDeclared(location, Collections.emptyMap())); + } return response; } @@ -323,11 +339,15 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) { tableId.levelAtListPos(0), tableId.levelAtListPos(1), tableId.levelAtListPos(2)); } - // Create table in metastore without data (pass null for requestData and properties) - doCreateTable(tableId, null, location, null, null); + Map properties = Hive3Util.createLanceTableParams(request.getProperties()); + + // Create table in metastore without data (pass null for requestData) + doCreateTable(tableId, null, location, properties, null); DeclareTableResponse response = new DeclareTableResponse(); response.setLocation(location); + response.setProperties(properties); + response.setManagedVersioning(false); return response; } @@ -556,7 +576,7 @@ protected void doCreateTable( } } - protected List doListTables(String catalog, String db) { + protected List doListTables(String catalog, String db, Boolean includeDeclared) { try { // First validate that catalog and database exist Catalog catalogObj = Hive3Util.getCatalogOrNull(clientPool, catalog); @@ -578,7 +598,13 @@ protected List doListTables(String catalog, String db) { Optional
table = Hive3Util.getTable(clientPool, catalog, db, tableName); if (table.isPresent()) { Map params = table.get().getParameters(); - if (params != null && "lance".equalsIgnoreCase(params.get("table_type"))) { + boolean isLanceTable = + params != null && "lance".equalsIgnoreCase(params.get("table_type")); + if (isLanceTable + && (LanceTableUtil.includeDeclared(includeDeclared) + || LanceTableUtil.hasStorageComponents( + table.get().getSd() != null ? table.get().getSd().getLocation() : null, + Collections.emptyMap()))) { lanceTables.add(tableName); } } @@ -708,7 +734,7 @@ private Map doDropDatabase( // Check if database contains tables (RESTRICT behavior only, not for Cascade) boolean cascade = "Cascade".equalsIgnoreCase(behavior); if (!cascade) { - List tables = doListTables(catalog, db); + List tables = doListTables(catalog, db, true); if (!tables.isEmpty()) { throw new InvalidInputException( String.format( diff --git a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Util.java b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Util.java index b77eb89..9ffff4c 100644 --- a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Util.java +++ b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Util.java @@ -174,6 +174,7 @@ public static Map createLanceTableParams(Map pro params.putAll(properties); } params.put("table_type", "lance"); + params.put("managed_by", "storage"); return params; } } diff --git a/java/lance-namespace-iceberg/src/main/java/org/lance/namespace/iceberg/IcebergNamespace.java b/java/lance-namespace-iceberg/src/main/java/org/lance/namespace/iceberg/IcebergNamespace.java index ca7038c..33ade28 100644 --- a/java/lance-namespace-iceberg/src/main/java/org/lance/namespace/iceberg/IcebergNamespace.java +++ b/java/lance-namespace-iceberg/src/main/java/org/lance/namespace/iceberg/IcebergNamespace.java @@ -40,6 +40,7 @@ import org.lance.namespace.model.TableExistsRequest; import org.lance.namespace.rest.RestClient; import org.lance.namespace.rest.RestClientException; +import org.lance.namespace.util.LanceTableUtil; import org.lance.namespace.util.ObjectIdentifier; import org.lance.namespace.util.ValidationUtil; @@ -325,7 +326,8 @@ public ListTablesResponse listTables(ListTablesRequest request) { List tables = new ArrayList<>(); if (response != null && response.getIdentifiers() != null) { for (IcebergModels.TableIdentifier tableId : response.getIdentifiers()) { - if (isLanceTable(prefix, namespace, tableId.getName())) { + if (shouldIncludeTable( + prefix, namespace, tableId.getName(), request.getIncludeDeclared())) { tables.add(tableId.getName()); } } @@ -369,8 +371,9 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) { createRequest.setLocation(tablePath); createRequest.setSchema(IcebergModels.createDummySchema()); - Map properties = new HashMap<>(); - properties.put(TABLE_TYPE_KEY, TABLE_TYPE_LANCE); + Map properties = + LanceTableUtil.mergeTableProperties( + request.getProperties(), Collections.singletonMap(TABLE_TYPE_KEY, TABLE_TYPE_LANCE)); createRequest.setProperties(properties); String namespacePath = encodeNamespace(namespace); @@ -383,6 +386,8 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) { DeclareTableResponse result = new DeclareTableResponse(); result.setLocation(tablePath); + result.setProperties(properties); + result.setManagedVersioning(false); return result; } catch (RestClientException e) { if (e.isConflict()) { @@ -435,7 +440,13 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) { DescribeTableResponse result = new DescribeTableResponse(); result.setLocation(response.getMetadata().getLocation()); - result.setStorageOptions(props); + result.setProperties(props); + result.setManagedVersioning(false); + if (Boolean.TRUE.equals(request.getCheckDeclared())) { + result.setIsOnlyDeclared( + LanceTableUtil.isOnlyDeclared( + response.getMetadata().getLocation(), Collections.emptyMap())); + } return result; } catch (RestClientException e) { if (e.isNotFound()) { @@ -470,17 +481,27 @@ public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) { prefixPath + "/namespaces/" + namespacePath + "/tables/" + encodedTableName, IcebergModels.LoadTableResponse.class); - String location = null; - if (getResponse != null && getResponse.getMetadata() != null) { - location = getResponse.getMetadata().getLocation(); + if (getResponse == null || getResponse.getMetadata() == null) { + throw new TableNotFoundException("Table not found: " + tableId.stringStyleId()); + } + Map properties = getResponse.getMetadata().getProperties(); + if (properties == null + || !TABLE_TYPE_LANCE.equalsIgnoreCase(properties.get(TABLE_TYPE_KEY))) { + throw new InvalidInputException( + String.format( + "Table %s is not a Lance table (missing table_type property)", + tableId.stringStyleId())); } + String location = getResponse.getMetadata().getLocation(); restClient.delete( prefixPath + "/namespaces/" + namespacePath + "/tables/" + encodedTableName); LOG.info("Deregistered table: {}", tableId.stringStyleId()); DeregisterTableResponse result = new DeregisterTableResponse(); + result.setId(request.getId()); result.setLocation(location); + result.setProperties(properties); return result; } catch (RestClientException e) { if (e.isNotFound()) { @@ -513,7 +534,8 @@ private String urlEncode(String s) { } } - private boolean isLanceTable(String prefix, List namespace, String tableName) { + private boolean shouldIncludeTable( + String prefix, List namespace, String tableName, Boolean includeDeclared) { try { String prefixPath = getPrefixPath(prefix); String namespacePath = encodeNamespace(namespace); @@ -526,7 +548,12 @@ private boolean isLanceTable(String prefix, List namespace, String table if (response != null && response.getMetadata() != null) { Map props = response.getMetadata().getProperties(); - return props != null && TABLE_TYPE_LANCE.equalsIgnoreCase(props.get(TABLE_TYPE_KEY)); + if (props == null || !TABLE_TYPE_LANCE.equalsIgnoreCase(props.get(TABLE_TYPE_KEY))) { + return false; + } + return LanceTableUtil.includeDeclared(includeDeclared) + || LanceTableUtil.hasStorageComponents( + response.getMetadata().getLocation(), Collections.emptyMap()); } } catch (Exception e) { LOG.debug("Failed to check if table is Lance table: {}", e.getMessage()); diff --git a/java/lance-namespace-impls-core/src/main/java/org/lance/namespace/util/LanceTableUtil.java b/java/lance-namespace-impls-core/src/main/java/org/lance/namespace/util/LanceTableUtil.java new file mode 100644 index 0000000..eec729d --- /dev/null +++ b/java/lance-namespace-impls-core/src/main/java/org/lance/namespace/util/LanceTableUtil.java @@ -0,0 +1,61 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.namespace.util; + +import org.lance.Dataset; +import org.lance.ReadOptions; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** Utility methods for Lance table metadata shared by namespace implementations. */ +public final class LanceTableUtil { + + private LanceTableUtil() {} + + public static Map mergeTableProperties( + Map properties, Map requiredProperties) { + Map result = new HashMap<>(); + if (properties != null) { + result.putAll(properties); + } + if (requiredProperties != null) { + result.putAll(requiredProperties); + } + return result; + } + + public static boolean includeDeclared(Boolean includeDeclared) { + return includeDeclared == null || includeDeclared; + } + + public static boolean isOnlyDeclared(String location, Map storageOptions) { + return !hasStorageComponents(location, storageOptions); + } + + public static boolean hasStorageComponents(String location, Map storageOptions) { + if (location == null || location.isEmpty()) { + return false; + } + + Map options = storageOptions != null ? storageOptions : Collections.emptyMap(); + ReadOptions readOptions = new ReadOptions.Builder().setStorageOptions(options).build(); + try (Dataset ignored = Dataset.open(location, readOptions)) { + return true; + } catch (RuntimeException | LinkageError e) { + return false; + } + } +} diff --git a/java/lance-namespace-polaris/src/main/java/org/lance/namespace/polaris/PolarisNamespace.java b/java/lance-namespace-polaris/src/main/java/org/lance/namespace/polaris/PolarisNamespace.java index 21f63a3..b359145 100644 --- a/java/lance-namespace-polaris/src/main/java/org/lance/namespace/polaris/PolarisNamespace.java +++ b/java/lance-namespace-polaris/src/main/java/org/lance/namespace/polaris/PolarisNamespace.java @@ -40,6 +40,7 @@ import org.lance.namespace.model.TableExistsRequest; import org.lance.namespace.rest.RestClient; import org.lance.namespace.rest.RestClientException; +import org.lance.namespace.util.LanceTableUtil; import org.lance.namespace.util.ObjectIdentifier; import org.lance.namespace.util.ValidationUtil; @@ -50,7 +51,6 @@ import java.io.Closeable; import java.io.IOException; import java.util.Collections; -import java.util.HashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -286,8 +286,10 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) { List namespaceParts = parts.subList(1, parts.size() - 1); String namespacePath = String.join(".", namespaceParts); - Map properties = new HashMap<>(); - properties.put(TABLE_TYPE_KEY, TABLE_FORMAT_LANCE); + Map properties = + LanceTableUtil.mergeTableProperties( + request.getProperties(), + Collections.singletonMap(TABLE_TYPE_KEY, TABLE_FORMAT_LANCE)); String comment = null; PolarisModels.CreateGenericTableRequest tableRequest = @@ -303,7 +305,16 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) { LOG.info("Created Lance table: {}.{}.{}", catalog, namespacePath, tableName); DeclareTableResponse result = new DeclareTableResponse(); - result.setLocation(response.getTable().getBaseLocation()); + PolarisModels.GenericTable responseTable = response != null ? response.getTable() : null; + result.setLocation( + responseTable != null && responseTable.getBaseLocation() != null + ? responseTable.getBaseLocation() + : request.getLocation()); + result.setProperties( + responseTable != null && responseTable.getProperties() != null + ? responseTable.getProperties() + : properties); + result.setManagedVersioning(false); return result; } catch (RestClientException e) { if (e.isConflict()) { @@ -352,7 +363,12 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) { DescribeTableResponse result = new DescribeTableResponse(); result.setLocation(table.getBaseLocation()); - result.setStorageOptions(table.getProperties()); + result.setProperties(table.getProperties()); + result.setManagedVersioning(false); + if (Boolean.TRUE.equals(request.getCheckDeclared())) { + result.setIsOnlyDeclared( + LanceTableUtil.isOnlyDeclared(table.getBaseLocation(), Collections.emptyMap())); + } return result; } catch (RestClientException e) { if (e.isNotFound()) { @@ -383,7 +399,10 @@ public ListTablesResponse listTables(ListTablesRequest request) { Set tableNames = new LinkedHashSet<>(); if (response.getIdentifiers() != null) { for (PolarisModels.TableIdentifier id : response.getIdentifiers()) { - tableNames.add(id.getName()); + if (shouldIncludeTable( + catalog, namespacePath, id.getName(), request.getIncludeDeclared())) { + tableNames.add(id.getName()); + } } } result.setTables(tableNames); @@ -419,7 +438,18 @@ public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) { + tableName, PolarisModels.LoadGenericTableResponse.class); - String location = getResponse.getTable().getBaseLocation(); + PolarisModels.GenericTable table = getResponse != null ? getResponse.getTable() : null; + if (table == null) { + throw new TableNotFoundException("Table not found: " + tableId.stringStyleId()); + } + if (!TABLE_FORMAT_LANCE.equalsIgnoreCase(table.getFormat())) { + throw new InvalidInputException( + String.format( + "Table %s is not a Lance table (format: %s)", + tableId.stringStyleId(), table.getFormat())); + } + + String location = table.getBaseLocation(); restClient.delete( "/polaris/v1/" + catalog @@ -429,7 +459,9 @@ public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) { + tableName); DeregisterTableResponse result = new DeregisterTableResponse(); + result.setId(request.getId()); result.setLocation(location); + result.setProperties(table.getProperties()); return result; } catch (RestClientException e) { if (e.isNotFound()) { @@ -445,4 +477,31 @@ public void close() throws IOException { restClient.close(); } } + + private boolean shouldIncludeTable( + String catalog, String namespacePath, String tableName, Boolean includeDeclared) { + if (LanceTableUtil.includeDeclared(includeDeclared)) { + return true; + } + + try { + PolarisModels.LoadGenericTableResponse response = + restClient.get( + "/polaris/v1/" + + catalog + + "/namespaces/" + + namespacePath + + "/generic-tables/" + + tableName, + PolarisModels.LoadGenericTableResponse.class); + PolarisModels.GenericTable table = response.getTable(); + if (table == null || !TABLE_FORMAT_LANCE.equalsIgnoreCase(table.getFormat())) { + return false; + } + return LanceTableUtil.hasStorageComponents(table.getBaseLocation(), Collections.emptyMap()); + } catch (Exception e) { + LOG.debug("Failed to check if table is Lance table: {}", e.getMessage()); + return false; + } + } } diff --git a/java/lance-namespace-polaris/src/test/java/org/lance/namespace/polaris/TestPolarisNamespace.java b/java/lance-namespace-polaris/src/test/java/org/lance/namespace/polaris/TestPolarisNamespace.java index 71f4c30..cab3e9c 100644 --- a/java/lance-namespace-polaris/src/test/java/org/lance/namespace/polaris/TestPolarisNamespace.java +++ b/java/lance-namespace-polaris/src/test/java/org/lance/namespace/polaris/TestPolarisNamespace.java @@ -54,6 +54,7 @@ import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.never; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -317,6 +318,14 @@ public void testListTables() throws IOException { assertThat(response.getTables()).hasSize(2); assertThat(response.getTables()).contains("table1", "table2"); + verify(restClient, never()) + .get( + eq("/polaris/v1/test_catalog/namespaces/schema1/generic-tables/table1"), + eq(PolarisModels.LoadGenericTableResponse.class)); + verify(restClient, never()) + .get( + eq("/polaris/v1/test_catalog/namespaces/schema1/generic-tables/table2"), + eq(PolarisModels.LoadGenericTableResponse.class)); } @Test diff --git a/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespace.java b/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespace.java index faba9d9..1dd9cb3 100644 --- a/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespace.java +++ b/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespace.java @@ -40,6 +40,7 @@ import org.lance.namespace.model.TableExistsRequest; import org.lance.namespace.rest.RestClient; import org.lance.namespace.rest.RestClientException; +import org.lance.namespace.util.LanceTableUtil; import org.lance.namespace.util.ObjectIdentifier; import org.lance.namespace.util.ValidationUtil; @@ -55,7 +56,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; -import java.util.HashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -288,7 +288,7 @@ public ListTablesResponse listTables(ListTablesRequest request) { if (unityResponse != null && unityResponse.getTables() != null) { tables = unityResponse.getTables().stream() - .filter(this::isLanceTable) + .filter(t -> shouldIncludeTable(t, request.getIncludeDeclared())) .map(UnityModels.TableInfo::getName) .collect(Collectors.toList()); } @@ -345,8 +345,9 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) { createTable.setColumns(columns); createTable.setStorageLocation(tablePath); - Map properties = new HashMap<>(); - properties.put(TABLE_TYPE_KEY, TABLE_TYPE_LANCE); + Map properties = + LanceTableUtil.mergeTableProperties( + request.getProperties(), Collections.singletonMap(TABLE_TYPE_KEY, TABLE_TYPE_LANCE)); createTable.setProperties(properties); UnityModels.TableInfo tableInfo = @@ -354,6 +355,11 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) { DeclareTableResponse response = new DeclareTableResponse(); response.setLocation(tablePath); + response.setProperties( + tableInfo != null && tableInfo.getProperties() != null + ? tableInfo.getProperties() + : properties); + response.setManagedVersioning(false); return response; } catch (RestClientException e) { @@ -396,7 +402,12 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) { DescribeTableResponse response = new DescribeTableResponse(); response.setLocation(tableInfo.getStorageLocation()); - response.setStorageOptions(tableInfo.getProperties()); + response.setProperties(tableInfo.getProperties()); + response.setManagedVersioning(false); + if (Boolean.TRUE.equals(request.getCheckDeclared())) { + response.setIsOnlyDeclared( + LanceTableUtil.isOnlyDeclared(tableInfo.getStorageLocation(), Collections.emptyMap())); + } return response; } catch (RestClientException e) { @@ -440,7 +451,9 @@ public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) { restClient.delete("/tables/" + fullName); DeregisterTableResponse response = new DeregisterTableResponse(); + response.setId(request.getId()); response.setLocation(location); + response.setProperties(tableInfo.getProperties()); return response; } catch (RestClientException e) { @@ -466,6 +479,13 @@ private boolean isLanceTable(UnityModels.TableInfo tableInfo) { return TABLE_TYPE_LANCE.equalsIgnoreCase(tableType); } + private boolean shouldIncludeTable(UnityModels.TableInfo tableInfo, Boolean includeDeclared) { + return isLanceTable(tableInfo) + && (LanceTableUtil.includeDeclared(includeDeclared) + || LanceTableUtil.hasStorageComponents( + tableInfo.getStorageLocation(), Collections.emptyMap())); + } + private List convertArrowSchemaToUnityColumns(Schema arrowSchema) { List columns = new ArrayList<>(); for (Field field : arrowSchema.getFields()) { diff --git a/java/pom.xml b/java/pom.xml index 43de8e7..aecd3b9 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -58,6 +58,7 @@ UTF-8 2.0.0 + 0.7.2 15.0.0 4.1.118.Final 5.8.2 @@ -96,6 +97,11 @@ + + org.lance + lance-namespace-core + ${lance-namespace.version} + org.lance lance-core diff --git a/python/pyproject.toml b/python/pyproject.toml index 1ced000..363c3b6 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -12,7 +12,8 @@ requires-python = ">=3.10" dependencies = [ "pylance>=0.26.0", - "lance-namespace-urllib3-client>=0.4.2", + "lance-namespace>=0.7.2", + "lance-namespace-urllib3-client>=0.7.2", "pyarrow>=15.0.0", "typing-extensions>=4.5.0", ] diff --git a/python/src/lance_namespace_impls/__init__.py b/python/src/lance_namespace_impls/__init__.py index ceacc6f..ddf1dd8 100644 --- a/python/src/lance_namespace_impls/__init__.py +++ b/python/src/lance_namespace_impls/__init__.py @@ -18,6 +18,7 @@ - NamespaceException: Base exception for namespace operations """ +from lance_namespace import register_namespace_impl from lance_namespace_impls.glue import GlueNamespace from lance_namespace_impls.hive2 import Hive2Namespace from lance_namespace_impls.hive3 import Hive3Namespace @@ -36,6 +37,13 @@ InternalException, ) +register_namespace_impl("glue", "lance_namespace_impls.glue.GlueNamespace") +register_namespace_impl("hive2", "lance_namespace_impls.hive2.Hive2Namespace") +register_namespace_impl("hive3", "lance_namespace_impls.hive3.Hive3Namespace") +register_namespace_impl("iceberg", "lance_namespace_impls.iceberg.IcebergNamespace") +register_namespace_impl("polaris", "lance_namespace_impls.polaris.PolarisNamespace") +register_namespace_impl("unity", "lance_namespace_impls.unity.UnityNamespace") + __all__ = [ "GlueNamespace", "Hive2Namespace", diff --git a/python/src/lance_namespace_impls/glue.py b/python/src/lance_namespace_impls/glue.py index 21c518f..dca7dda 100644 --- a/python/src/lance_namespace_impls/glue.py +++ b/python/src/lance_namespace_impls/glue.py @@ -14,7 +14,7 @@ Config = None HAS_BOTO3 = False -from lance.namespace import LanceNamespace +from lance_namespace import LanceNamespace from lance_namespace_urllib3_client.models import ( ListNamespacesRequest, ListNamespacesResponse, @@ -35,6 +35,12 @@ ) from lance_namespace_impls.rest_client import InvalidInputException +from lance_namespace_impls.table_utils import ( + has_storage_components, + include_declared, + is_only_declared, + merge_table_properties, +) LANCE_TABLE_TYPE = "LANCE" TABLE_TYPE = "table_type" @@ -328,8 +334,9 @@ def list_tables(self, request: ListTablesRequest) -> ListTablesResponse: response = self.glue.get_tables(DatabaseName=database_name) for table in response.get("TableList", []): - # Only include Lance tables - if self._is_lance_table(table): + if self._should_include_lance_table( + table, request.include_declared + ): tables.append(table["Name"]) next_token = response.get("NextToken") @@ -367,8 +374,15 @@ def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse f"Table has no location: {database_name}.{table_name}" ) + properties = table.get("Parameters", {}) return DescribeTableResponse( - location=location, storage_options=self.config.storage_options + location=location, + storage_options=self.config.storage_options, + properties=properties, + managed_versioning=False, + is_only_declared=is_only_declared(location, self.config.storage_options) + if request.check_declared + else None, ) except Exception as e: error_name = e.__class__.__name__ if hasattr(e, "__class__") else "" @@ -407,14 +421,20 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse: } ] + properties = merge_table_properties( + request.properties, + { + TABLE_TYPE: LANCE_TABLE_TYPE, + "managed_by": "storage", + "empty_table": "true", + }, + ) + # Create Glue table entry without creating actual Lance dataset table_input = { "Name": table_name, "TableType": EXTERNAL_TABLE, - "Parameters": { - TABLE_TYPE: LANCE_TABLE_TYPE, - "empty_table": "true", # Mark as empty table - }, + "Parameters": properties, "StorageDescriptor": { "Location": table_location, "Columns": glue_columns, @@ -435,7 +455,12 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse: ) raise RuntimeError(f"Failed to declare table: {e}") - return DeclareTableResponse(location=table_location) + return DeclareTableResponse( + location=table_location, + storage_options=self.config.storage_options, + properties=properties, + managed_versioning=False, + ) def deregister_table( self, request: DeregisterTableRequest @@ -444,9 +469,22 @@ def deregister_table( database_name, table_name = self._parse_table_identifier(request.id) try: + table = self.glue.get_table(DatabaseName=database_name, Name=table_name)[ + "Table" + ] + if not self._is_lance_table(table): + raise RuntimeError( + f"Table is not a Lance table: {database_name}.{table_name}" + ) + # Only remove from Glue catalog, don't delete the Lance dataset self.glue.delete_table(DatabaseName=database_name, Name=table_name) - return DeregisterTableResponse() + location = table.get("StorageDescriptor", {}).get("Location") + return DeregisterTableResponse( + id=request.id, + location=location, + properties=table.get("Parameters", {}), + ) except Exception as e: error_name = e.__class__.__name__ if hasattr(e, "__class__") else "" if error_name == "EntityNotFoundException": @@ -470,6 +508,17 @@ def _is_lance_table(self, glue_table: Dict[str, Any]) -> bool: == LANCE_TABLE_TYPE ) + def _should_include_lance_table( + self, glue_table: Dict[str, Any], include_declared_value: Optional[bool] + ) -> bool: + """Check if a Glue table is Lance and matches include_declared.""" + if not self._is_lance_table(glue_table): + return False + if include_declared(include_declared_value): + return True + location = glue_table.get("StorageDescriptor", {}).get("Location") + return has_storage_components(location, self.config.storage_options) + def __getstate__(self): """Prepare instance for pickling by excluding unpickleable objects.""" state = self.__dict__.copy() diff --git a/python/src/lance_namespace_impls/hive2.py b/python/src/lance_namespace_impls/hive2.py index 1e7bdcc..8eeac27 100644 --- a/python/src/lance_namespace_impls/hive2.py +++ b/python/src/lance_namespace_impls/hive2.py @@ -62,7 +62,7 @@ InvalidOperationException = None MetaException = None -from lance.namespace import LanceNamespace +from lance_namespace import LanceNamespace from lance_namespace_urllib3_client.models import ( ListNamespacesRequest, ListNamespacesResponse, @@ -85,6 +85,12 @@ ) from lance_namespace_impls.rest_client import InvalidInputException +from lance_namespace_impls.table_utils import ( + has_storage_components, + include_declared, + is_only_declared, + merge_table_properties, +) logger = logging.getLogger(__name__) @@ -351,8 +357,11 @@ def list_tables(self, request: ListTablesRequest) -> ListTablesResponse: TABLE_TYPE_KEY, "" ).lower() if table_type == LANCE_TABLE_FORMAT: - # Return just table name, not full identifier - tables.append(table_name) + location = table.sd.location if table.sd else None + if include_declared( + request.include_declared + ) or has_storage_components(location): + tables.append(table_name) except Exception: # Skip tables we can't read continue @@ -367,7 +376,7 @@ def list_tables(self, request: ListTablesRequest) -> ListTablesResponse: def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse: """Describe a table in the Hive Metastore. - Only load_detailed_metadata=false is supported. Returns location only. + Only load_detailed_metadata=false is supported. """ if request.load_detailed_metadata: raise ValueError( @@ -392,7 +401,14 @@ def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse if not location: raise ValueError(f"Table {request.id} has no location") - return DescribeTableResponse(location=location) + return DescribeTableResponse( + location=location, + properties=table.parameters or {}, + managed_versioning=False, + is_only_declared=is_only_declared(location) + if request.check_declared + else None, + ) except Exception as e: if NoSuchObjectException and isinstance(e, NoSuchObjectException): raise ValueError(f"Table {request.id} does not exist") @@ -417,7 +433,10 @@ def drop_table(self, request: DropTableRequest) -> DropTableResponse: client.drop_table(database, table_name, deleteData=True) - return DropTableResponse(location=location) + return DropTableResponse( + location=location, + properties=table.parameters or {}, + ) except Exception as e: if NoSuchObjectException and isinstance(e, NoSuchObjectException): raise ValueError(f"Table {request.id} does not exist") @@ -444,7 +463,11 @@ def deregister_table( client.drop_table(database, table_name, deleteData=False) - return DeregisterTableResponse(location=location) + return DeregisterTableResponse( + id=request.id, + location=location, + properties=table.parameters or {}, + ) except Exception as e: if NoSuchObjectException and isinstance(e, NoSuchObjectException): raise ValueError(f"Table {request.id} does not exist") @@ -485,11 +508,14 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse: ) # Set table parameters to identify it as Lance table - parameters = { - TABLE_TYPE_KEY: "LANCE", - MANAGED_BY_KEY: "storage", - "empty_table": "true", # Mark as empty table - } + parameters = merge_table_properties( + request.properties, + { + TABLE_TYPE_KEY: LANCE_TABLE_FORMAT, + MANAGED_BY_KEY: "storage", + "empty_table": "true", + }, + ) hive_table = HiveTable( tableName=table_name, @@ -503,7 +529,11 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse: with self.client as client: client.create_table(hive_table) - return DeclareTableResponse(location=location) + return DeclareTableResponse( + location=location, + properties=parameters, + managed_versioning=False, + ) except AlreadyExistsException: raise ValueError(f"Table {request.id} already exists") diff --git a/python/src/lance_namespace_impls/hive3.py b/python/src/lance_namespace_impls/hive3.py index 2680fc3..7395d71 100644 --- a/python/src/lance_namespace_impls/hive3.py +++ b/python/src/lance_namespace_impls/hive3.py @@ -64,7 +64,7 @@ InvalidOperationException = None MetaException = None -from lance.namespace import LanceNamespace +from lance_namespace import LanceNamespace from lance_namespace_urllib3_client.models import ( ListNamespacesRequest, ListNamespacesResponse, @@ -87,6 +87,12 @@ ) from lance_namespace_impls.rest_client import InvalidInputException +from lance_namespace_impls.table_utils import ( + has_storage_components, + include_declared, + is_only_declared, + merge_table_properties, +) logger = logging.getLogger(__name__) @@ -433,7 +439,11 @@ def list_tables(self, request: ListTablesRequest) -> ListTablesResponse: TABLE_TYPE_KEY, "" ).lower() if table_type == LANCE_TABLE_FORMAT: - tables.append(table_name) + location = table.sd.location if table.sd else None + if include_declared( + request.include_declared + ) or has_storage_components(location): + tables.append(table_name) except Exception: continue @@ -448,7 +458,7 @@ def list_tables(self, request: ListTablesRequest) -> ListTablesResponse: def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse: """Describe a table. - Only load_detailed_metadata=false is supported. Returns location only. + Only load_detailed_metadata=false is supported. """ if request.load_detailed_metadata: raise ValueError( @@ -471,7 +481,14 @@ def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse if not location: raise ValueError(f"Table {request.id} has no location") - return DescribeTableResponse(location=location) + return DescribeTableResponse( + location=location, + properties=table.parameters or {}, + managed_versioning=False, + is_only_declared=is_only_declared(location) + if request.check_declared + else None, + ) except Exception as e: if NoSuchObjectException and isinstance(e, NoSuchObjectException): @@ -497,7 +514,10 @@ def drop_table(self, request: DropTableRequest) -> DropTableResponse: client.drop_table(database, table_name, deleteData=True) - return DropTableResponse(location=location) + return DropTableResponse( + location=location, + properties=table.parameters or {}, + ) except Exception as e: if NoSuchObjectException and isinstance(e, NoSuchObjectException): @@ -525,7 +545,11 @@ def deregister_table( client.drop_table(database, table_name, deleteData=False) - return DeregisterTableResponse(location=location) + return DeregisterTableResponse( + id=request.id, + location=location, + properties=table.parameters or {}, + ) except Exception as e: if NoSuchObjectException and isinstance(e, NoSuchObjectException): @@ -561,11 +585,14 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse: ), ) - parameters = { - TABLE_TYPE_KEY: LANCE_TABLE_FORMAT, - MANAGED_BY_KEY: "storage", - "empty_table": "true", - } + parameters = merge_table_properties( + request.properties, + { + TABLE_TYPE_KEY: LANCE_TABLE_FORMAT, + MANAGED_BY_KEY: "storage", + "empty_table": "true", + }, + ) hive_table = HiveTable( tableName=table_name, @@ -578,7 +605,11 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse: with self.client as client: client.create_table(hive_table) - return DeclareTableResponse(location=location) + return DeclareTableResponse( + location=location, + properties=parameters, + managed_versioning=False, + ) except AlreadyExistsException: raise ValueError(f"Table {request.id} already exists") diff --git a/python/src/lance_namespace_impls/iceberg.py b/python/src/lance_namespace_impls/iceberg.py index 89eaf12..08c5795 100644 --- a/python/src/lance_namespace_impls/iceberg.py +++ b/python/src/lance_namespace_impls/iceberg.py @@ -14,7 +14,7 @@ from dataclasses import dataclass from typing import Any, Dict, List, Optional -from lance.namespace import LanceNamespace +from lance_namespace import LanceNamespace from lance_namespace_urllib3_client.models import ( CreateNamespaceRequest, CreateNamespaceResponse, @@ -44,6 +44,12 @@ TableAlreadyExistsException, TableNotFoundException, ) +from lance_namespace_impls.table_utils import ( + has_storage_components, + include_declared, + is_only_declared, + merge_table_properties, +) logger = logging.getLogger(__name__) @@ -378,8 +384,8 @@ def list_tables(self, request: ListTablesRequest) -> ListTablesResponse: if response and "identifiers" in response: for table_id in response["identifiers"]: table_name = table_id.get("name") - if table_name and self._is_lance_table( - prefix, namespace, table_name + if table_name and self._should_include_lance_table( + prefix, namespace, table_name, request.include_declared ): tables.append(table_name) @@ -424,7 +430,10 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse: f"{self.config.root}/{'/'.join(table_id[:-1])}/{table_name}" ) - properties = {self.TABLE_TYPE_KEY: self.TABLE_TYPE_LANCE} + properties = merge_table_properties( + request.properties, + {self.TABLE_TYPE_KEY: self.TABLE_TYPE_LANCE}, + ) create_request = { "name": table_name, @@ -434,13 +443,20 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse: } namespace_path = self._encode_namespace(namespace) - self.rest_client.post( + response = self.rest_client.post( f"{prefix_path}/namespaces/{namespace_path}/tables", create_request ) logger.info(f"Declared table: {'.'.join(table_id)}") - return DeclareTableResponse(location=table_path) + response_properties = ( + response.get("metadata", {}).get("properties") if response else None + ) + return DeclareTableResponse( + location=table_path, + properties=response_properties or properties, + managed_versioning=False, + ) except RestClientException as e: if e.is_conflict(): @@ -507,7 +523,12 @@ def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse ) return DescribeTableResponse( - location=metadata.get("location"), storage_options=props + location=metadata.get("location"), + properties=props, + managed_versioning=False, + is_only_declared=is_only_declared(metadata.get("location")) + if request.check_declared + else None, ) except RestClientException as e: @@ -547,9 +568,19 @@ def deregister_table( f"{prefix_path}/namespaces/{namespace_path}/tables/{encoded_table_name}" ) - table_location = None - if response and "metadata" in response: - table_location = response["metadata"].get("location") + metadata = response.get("metadata") if response else None + if not metadata: + raise TableNotFoundException(f"Table not found: {'.'.join(request.id)}") + + table_location = metadata.get("location") + properties = metadata.get("properties") or {} + if ( + properties.get(self.TABLE_TYPE_KEY, "").lower() + != self.TABLE_TYPE_LANCE.lower() + ): + raise InvalidInputException( + f"Table {'.'.join(request.id)} is not a Lance table" + ) self.rest_client.delete( f"{prefix_path}/namespaces/{namespace_path}/tables/{encoded_table_name}", @@ -558,7 +589,11 @@ def deregister_table( logger.info(f"Deregistered table: {'.'.join(table_id)}") - return DeregisterTableResponse(location=table_location) + return DeregisterTableResponse( + id=request.id, + location=table_location, + properties=properties, + ) except RestClientException as e: if e.is_not_found(): @@ -578,10 +613,14 @@ def _parse_identifier(self, identifier: List[str]) -> List[str]: """Parse identifier list.""" return identifier if identifier else [] - def _is_lance_table( - self, prefix: str, namespace: List[str], table_name: str + def _should_include_lance_table( + self, + prefix: str, + namespace: List[str], + table_name: str, + include_declared_value: Optional[bool], ) -> bool: - """Check if a table is a Lance table.""" + """Check if a table is Lance and matches include_declared.""" try: prefix_path = self._get_prefix_path(prefix) namespace_path = self._encode_namespace(namespace) @@ -592,11 +631,16 @@ def _is_lance_table( ) if response and "metadata" in response: - props = response["metadata"].get("properties", {}) - return ( + metadata = response["metadata"] + props = metadata.get("properties", {}) + if ( props.get(self.TABLE_TYPE_KEY, "").lower() - == self.TABLE_TYPE_LANCE.lower() - ) + != self.TABLE_TYPE_LANCE.lower() + ): + return False + return include_declared( + include_declared_value + ) or has_storage_components(metadata.get("location")) except Exception as e: logger.debug(f"Failed to check if table is Lance table: {e}") return False diff --git a/python/src/lance_namespace_impls/polaris.py b/python/src/lance_namespace_impls/polaris.py index 4f17c9d..666f4fc 100644 --- a/python/src/lance_namespace_impls/polaris.py +++ b/python/src/lance_namespace_impls/polaris.py @@ -6,7 +6,7 @@ from dataclasses import dataclass from typing import Dict, List, Optional -from lance.namespace import LanceNamespace +from lance_namespace import LanceNamespace from lance_namespace_urllib3_client.models import ( CreateNamespaceRequest, CreateNamespaceResponse, @@ -36,6 +36,12 @@ TableAlreadyExistsException, TableNotFoundException, ) +from lance_namespace_impls.table_utils import ( + has_storage_components, + include_declared, + is_only_declared, + merge_table_properties, +) logger = logging.getLogger(__name__) @@ -264,7 +270,12 @@ def list_tables(self, request: ListTablesRequest) -> ListTablesResponse: if response and "identifiers" in response: for table_id in response["identifiers"]: table_name = table_id.get("name") - if table_name: + if table_name and self._should_include_lance_table( + catalog, + namespace_path, + table_name, + request.include_declared, + ): tables.append(table_name) tables = sorted(set(tables)) @@ -302,7 +313,10 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse: f"{self.config.root}/{'/'.join(table_id[:-1])}/{table_name}" ) - properties = {self.TABLE_TYPE_KEY: self.TABLE_FORMAT_LANCE} + properties = merge_table_properties( + request.properties, + {self.TABLE_TYPE_KEY: self.TABLE_FORMAT_LANCE}, + ) create_request = { "name": table_name, @@ -312,14 +326,21 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse: } namespace_path = ".".join(namespace) - self.rest_client.post( + response = self.rest_client.post( f"/polaris/v1/{catalog}/namespaces/{namespace_path}/generic-tables", create_request, ) logger.info(f"Declared table: {'.'.join(table_id)}") - return DeclareTableResponse(location=table_path) + response_properties = ( + response.get("table", {}).get("properties") if response else None + ) + return DeclareTableResponse( + location=table_path, + properties=response_properties or properties, + managed_versioning=False, + ) except RestClientException as e: if e.is_conflict(): @@ -343,7 +364,7 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse: def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse: """Describe a table. - Only load_detailed_metadata=false is supported. Returns location and storage_options only. + Only load_detailed_metadata=false is supported. """ if request.load_detailed_metadata: raise InvalidInputException( @@ -381,7 +402,11 @@ def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse return DescribeTableResponse( location=table.get("base-location"), - storage_options=table.get("properties", {}), + properties=table.get("properties", {}), + managed_versioning=False, + is_only_declared=is_only_declared(table.get("base-location")) + if request.check_declared + else None, ) except RestClientException as e: @@ -415,9 +440,17 @@ def deregister_table( f"/polaris/v1/{catalog}/namespaces/{namespace_path}/generic-tables/{table_name}" ) - table_location = None - if response and "table" in response: - table_location = response["table"].get("base-location") + table = response.get("table") if response else None + if not table: + raise TableNotFoundException(f"Table not found: {'.'.join(request.id)}") + table_format = table.get("format", "") + if table_format.lower() != self.TABLE_FORMAT_LANCE: + raise InvalidInputException( + f"Table {'.'.join(request.id)} is not a Lance table (format: {table_format})" + ) + + table_location = table.get("base-location") + properties = table.get("properties") self.rest_client.delete( f"/polaris/v1/{catalog}/namespaces/{namespace_path}/generic-tables/{table_name}" @@ -425,7 +458,11 @@ def deregister_table( logger.info(f"Deregistered table: {'.'.join(table_id)}") - return DeregisterTableResponse(location=table_location) + return DeregisterTableResponse( + id=request.id, + location=table_location, + properties=properties, + ) except RestClientException as e: if e.is_not_found(): @@ -444,3 +481,28 @@ def close(self): def _parse_identifier(self, identifier: List[str]) -> List[str]: """Parse identifier list.""" return identifier if identifier else [] + + def _should_include_lance_table( + self, + catalog: str, + namespace_path: str, + table_name: str, + include_declared_value: Optional[bool], + ) -> bool: + """Check if a Polaris generic table is Lance and matches include_declared.""" + if include_declared(include_declared_value): + return True + + try: + response = self.rest_client.get( + f"/polaris/v1/{catalog}/namespaces/{namespace_path}/generic-tables/{table_name}" + ) + if not response or "table" not in response: + return False + table = response["table"] + if table.get("format", "").lower() != self.TABLE_FORMAT_LANCE: + return False + return has_storage_components(table.get("base-location")) + except Exception as e: + logger.debug(f"Failed to check if table is Lance table: {e}") + return False diff --git a/python/src/lance_namespace_impls/table_utils.py b/python/src/lance_namespace_impls/table_utils.py new file mode 100644 index 0000000..2d35e26 --- /dev/null +++ b/python/src/lance_namespace_impls/table_utils.py @@ -0,0 +1,45 @@ +""" +Shared table metadata helpers for Lance Namespace implementations. +""" + +from typing import Dict, Optional + + +def merge_table_properties( + properties: Optional[Dict[str, str]], required_properties: Dict[str, str] +) -> Dict[str, str]: + """Merge caller-provided table properties with implementation-required markers.""" + merged = dict(properties or {}) + merged.update(required_properties) + return merged + + +def include_declared(include_declared_value: Optional[bool]) -> bool: + """Return the 0.7.2 ListTables include_declared default.""" + return include_declared_value is not False + + +def has_storage_components( + location: Optional[str], storage_options: Optional[Dict[str, str]] = None +) -> bool: + """Check whether a catalog table location can be opened as a Lance dataset.""" + if not location: + return False + + try: + import lance + + dataset = lance.dataset(location, storage_options=storage_options or {}) + close = getattr(dataset, "close", None) + if close is not None: + close() + return True + except Exception: + return False + + +def is_only_declared( + location: Optional[str], storage_options: Optional[Dict[str, str]] = None +) -> bool: + """Return true when a catalog table exists but no Lance storage can be opened.""" + return not has_storage_components(location, storage_options) diff --git a/python/src/lance_namespace_impls/unity.py b/python/src/lance_namespace_impls/unity.py index 503472a..f873a83 100644 --- a/python/src/lance_namespace_impls/unity.py +++ b/python/src/lance_namespace_impls/unity.py @@ -10,7 +10,7 @@ import pyarrow as pa import pyarrow.ipc as ipc -from lance.namespace import LanceNamespace +from lance_namespace import LanceNamespace from lance_namespace_urllib3_client.models import ( CreateNamespaceRequest, CreateNamespaceResponse, @@ -40,6 +40,12 @@ TableAlreadyExistsException, TableNotFoundException, ) +from lance_namespace_impls.table_utils import ( + has_storage_components, + include_declared, + is_only_declared, + merge_table_properties, +) logger = logging.getLogger(__name__) @@ -410,7 +416,9 @@ def list_tables(self, request: ListTablesRequest) -> ListTablesResponse: tables = [] if response and "tables" in response: for table_data in response["tables"]: - if self._is_lance_table(table_data): + if self._should_include_lance_table( + table_data, request.include_declared + ): tables.append(table_data["name"]) tables = sorted(set(tables)) @@ -451,10 +459,13 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse: ) ] - properties = { - self.TABLE_TYPE_KEY: self.TABLE_TYPE_LANCE, - self.MANAGED_BY_KEY: "catalog", - } + properties = merge_table_properties( + request.properties, + { + self.TABLE_TYPE_KEY: self.TABLE_TYPE_LANCE, + self.MANAGED_BY_KEY: "catalog", + }, + ) create_table = CreateTable( name=table, @@ -467,13 +478,17 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse: properties=properties, ) - self.rest_client.post( + table_info = self.rest_client.post( "/tables", create_table, response_converter=_parse_table_info ) logger.info(f"Declared table: {catalog}.{schema}.{table}") - return DeclareTableResponse(location=table_path) + return DeclareTableResponse( + location=table_path, + properties=table_info.properties if table_info else properties, + managed_versioning=False, + ) except RestClientException as e: if e.is_conflict(): @@ -517,7 +532,11 @@ def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse return DescribeTableResponse( location=table_info.storage_location, - storage_options=table_info.properties, + properties=table_info.properties, + managed_versioning=False, + is_only_declared=is_only_declared(table_info.storage_location) + if request.check_declared + else None, ) except RestClientException as e: @@ -568,7 +587,11 @@ def deregister_table( logger.info(f"Deregistered table: {full_name}") - return DeregisterTableResponse(location=location) + return DeregisterTableResponse( + id=request.id, + location=location, + properties=table_info.properties, + ) except RestClientException as e: if e.is_not_found(): @@ -596,6 +619,16 @@ def _is_lance_table(self, table_data: Dict[str, Any]) -> bool: table_type = properties.get(self.TABLE_TYPE_KEY) return table_type and table_type.lower() == self.TABLE_TYPE_LANCE.lower() + def _should_include_lance_table( + self, table_data: Dict[str, Any], include_declared_value: Optional[bool] + ) -> bool: + """Check if a Unity table is Lance and matches include_declared.""" + if not self._is_lance_table(table_data): + return False + if include_declared(include_declared_value): + return True + return has_storage_components(table_data.get("storage_location")) + def _is_lance_table_info(self, table_info: TableInfo) -> bool: """Check if a TableInfo represents a Lance table.""" if not table_info or not table_info.properties: diff --git a/python/tests/test_glue.py b/python/tests/test_glue.py index 0893bfb..85155de 100644 --- a/python/tests/test_glue.py +++ b/python/tests/test_glue.py @@ -269,12 +269,23 @@ def test_list_tables(self, glue_namespace): def test_deregister_table(self, glue_namespace): """Test deregistering a table (only removes from Glue, keeps Lance dataset).""" + glue_namespace.glue.get_table.return_value = { + "Table": { + "Name": "test_table", + "Parameters": {"table_type": "LANCE"}, + "StorageDescriptor": {"Location": "s3://bucket/table.lance"}, + } + } + request = DeregisterTableRequest(id=["test_db", "test_table"]) - glue_namespace.deregister_table(request) + response = glue_namespace.deregister_table(request) glue_namespace.glue.delete_table.assert_called_once_with( DatabaseName="test_db", Name="test_table" ) + assert response.id == ["test_db", "test_table"] + assert response.location == "s3://bucket/table.lance" + assert response.properties == {"table_type": "LANCE"} def test_describe_table(self, glue_namespace): """Test describing a table.""" diff --git a/python/tests/test_iceberg.py b/python/tests/test_iceberg.py index b38937d..67fe2b7 100644 --- a/python/tests/test_iceberg.py +++ b/python/tests/test_iceberg.py @@ -427,9 +427,8 @@ def test_describe_table(self, mock_rest_client_class): response = namespace.describe_table(request) self.assertEqual(response.location, "/data/lance/ns/table") - self.assertEqual( - response.storage_options, {"table_type": "lance", "key": "value"} - ) + self.assertEqual(response.properties, {"table_type": "lance", "key": "value"}) + self.assertFalse(response.managed_versioning) @patch("lance_namespace_impls.iceberg.RestClient") def test_describe_table_not_lance(self, mock_rest_client_class): @@ -482,7 +481,12 @@ def test_deregister_table(self, mock_rest_client_class): mock_client.get.side_effect = [ {"defaults": {"prefix": "warehouse1"}}, - {"metadata": {"location": "/data/lance/ns/table"}}, + { + "metadata": { + "location": "/data/lance/ns/table", + "properties": {"table_type": "lance"}, + } + }, ] namespace = IcebergNamespace(**self.properties) diff --git a/python/tests/test_namespace.py b/python/tests/test_namespace.py index c502e34..f35cb41 100644 --- a/python/tests/test_namespace.py +++ b/python/tests/test_namespace.py @@ -58,6 +58,14 @@ def test_connect_non_namespace_class(): assert "does not implement LanceNamespace interface" in str(exc_info.value) +def test_package_registers_namespace_aliases(): + import lance_namespace_impls # noqa: F401 + + ns = connect("iceberg", {"endpoint": "http://localhost:8181"}) + + assert ns.__class__.__name__ == "IcebergNamespace" + + def test_default_methods_raise_unsupported(): from lance_namespace import UnsupportedOperationError from lance_namespace_urllib3_client.models import ( diff --git a/python/tests/test_polaris.py b/python/tests/test_polaris.py index 3ed7130..20063bb 100644 --- a/python/tests/test_polaris.py +++ b/python/tests/test_polaris.py @@ -282,9 +282,11 @@ def test_list_tables(self, mock_rest_client_class): response = namespace.list_tables(request) self.assertEqual(sorted(response.tables), ["table1", "table2", "table3"]) - mock_client.get.assert_called_once_with( - "/polaris/v1/test_catalog/namespaces/test_namespace/generic-tables" + self.assertEqual( + mock_client.get.call_args.args[0], + "/polaris/v1/test_catalog/namespaces/test_namespace/generic-tables", ) + mock_client.get.assert_called_once() @patch("lance_namespace_impls.polaris.RestClient") def test_declare_table(self, mock_rest_client_class): @@ -374,7 +376,8 @@ def test_describe_table(self, mock_rest_client_class): response = namespace.describe_table(request) self.assertEqual(response.location, "/data/lance/ns/table") - self.assertEqual(response.storage_options, {"key": "value"}) + self.assertEqual(response.properties, {"key": "value"}) + self.assertFalse(response.managed_versioning) mock_client.get.assert_called_once_with( "/polaris/v1/test_catalog/namespaces/test_namespace/generic-tables/test_table" ) @@ -426,7 +429,10 @@ def test_deregister_table(self, mock_rest_client_class): mock_rest_client_class.return_value = mock_client mock_client.get.return_value = { - "table": {"base-location": "/data/lance/ns/table"} + "table": { + "format": "lance", + "base-location": "/data/lance/ns/table", + } } namespace = PolarisNamespace(**self.properties) diff --git a/python/uv.lock b/python/uv.lock index 5d832e9..ccc7648 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -200,21 +200,22 @@ wheels = [ [[package]] name = "lance-namespace" -version = "0.4.2" +version = "0.7.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "lance-namespace-urllib3-client" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/42/af/a77e7c9bc16ccf8a793df06bb87a559198f5b5dfb7ca03f4f32e1fe9cc15/lance_namespace-0.4.2.tar.gz", hash = "sha256:6830d0fb0f3f6dc0388ace2aa1a29f1b8e22c62f22e592a8b578c5da92980e7b", size = 9828, upload-time = "2025-12-31T08:31:02.488Z" } +sdist = { url = "https://files.pythonhosted.org/packages/17/04/633687a8e64383058cdf5e36c69c016006225880242804f35ec1241c82cb/lance_namespace-0.7.2.tar.gz", hash = "sha256:43d6e7be6773c4f0b4c8d36602e7245066fc0c06fa334a239a0452f00492768a", size = 10526, upload-time = "2026-04-25T00:01:40.836Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c3/9a/13242c23f932d8a61288e7c3e5bf5929cd509947535175d0f1ff188c6562/lance_namespace-0.4.2-py3-none-any.whl", hash = "sha256:ad0705dc0fdf37494cccc7163272472b773ad08572b146173114167187e5825e", size = 11702, upload-time = "2025-12-31T08:31:05.309Z" }, + { url = "https://files.pythonhosted.org/packages/d1/4d/6fae405ec2503e14afdf0490cbdb8314db702a804189a5d01f46e5b00b63/lance_namespace-0.7.2-py3-none-any.whl", hash = "sha256:c7f35b99f79cd7c08ebcda3c06fe4d1acdb4db72458cb9e211971c46964db9e1", size = 12356, upload-time = "2026-04-25T00:01:41.558Z" }, ] [[package]] name = "lance-namespace-impls" -version = "0.1.0" +version = "0.2.0" source = { editable = "." } dependencies = [ + { name = "lance-namespace" }, { name = "lance-namespace-urllib3-client" }, { name = "pyarrow" }, { name = "pylance" }, @@ -255,7 +256,8 @@ requires-dist = [ { name = "hive-metastore-client", marker = "extra == 'all'", specifier = ">=1.0.0" }, { name = "hive-metastore-client", marker = "extra == 'hive2'", specifier = ">=1.0.0" }, { name = "hive-metastore-client", marker = "extra == 'hive3'", specifier = ">=1.0.0" }, - { name = "lance-namespace-urllib3-client", specifier = ">=0.4.2" }, + { name = "lance-namespace", specifier = ">=0.7.2" }, + { name = "lance-namespace-urllib3-client", specifier = ">=0.7.2" }, { name = "pyarrow", specifier = ">=15.0.0" }, { name = "pylance", specifier = ">=0.26.0" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0.0" }, @@ -270,7 +272,7 @@ provides-extras = ["glue", "hive2", "hive3", "iceberg", "polaris", "unity", "all [[package]] name = "lance-namespace-urllib3-client" -version = "0.4.2" +version = "0.7.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, @@ -278,9 +280,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d0/33/3f533d87b8ad0867181a86cb17517cabed277d6816ca66a676dd98076064/lance_namespace_urllib3_client-0.4.2.tar.gz", hash = "sha256:294bfd2579f640053486008a77c2b7d43b8bf9614217941eda51b6f1c0f42f28", size = 155837, upload-time = "2025-12-31T08:31:04.605Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f6/d7/c7d9ae1a2815e3c846fdd6e49f5882d60ffd76a5ddccd34af5fe8ac8124e/lance_namespace_urllib3_client-0.7.2.tar.gz", hash = "sha256:853dcd7affb14fd6ae3039748d1fff4906d51ec41026e43f787ee56f339e18f6", size = 184709, upload-time = "2026-04-25T00:01:42.301Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c2/79/c67652374a99c14e751fc4c16592b3d20c2f855ffc52c77132b3b48e356c/lance_namespace_urllib3_client-0.4.2-py3-none-any.whl", hash = "sha256:da885fd62b37af8653dba7ed22322bfd0a92a60ce78214eebca76783041af668", size = 262107, upload-time = "2025-12-31T08:31:03.431Z" }, + { url = "https://files.pythonhosted.org/packages/2f/eb/941780e022d9f7c01256eb5d6a1cc1d097a80b188795212d504723ba8fc8/lance_namespace_urllib3_client-0.7.2-py3-none-any.whl", hash = "sha256:0d0316f1a7d6da61c1f17b8f3dfb3643cf882d67712eaa709619c17b2cd9c880", size = 314775, upload-time = "2026-04-25T00:01:39.679Z" }, ] [[package]] From 554f10739e8915f7d03bc87bb8f4b104a81fc8fd Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Mon, 27 Apr 2026 19:09:50 -0700 Subject: [PATCH 2/5] Add Java bundles and fix Glue namespace support --- .github/workflows/java-integ-glue.yml | 1 + .github/workflows/python-integ-glue.yml | 1 + java/Makefile | 8 + java/lance-namespace-glue/pom.xml | 4 + .../lance/namespace/glue/GlueNamespace.java | 80 +++++---- .../namespace/glue/TestGlueNamespace.java | 155 ++++++++++++++++++ java/lance-namespace-hive2/pom.xml | 9 + java/lance-namespace-hive3/pom.xml | 9 + java/lance-namespace-iceberg/pom.xml | 4 + java/lance-namespace-polaris/pom.xml | 11 +- java/lance-namespace-unity/pom.xml | 6 +- java/pom.xml | 49 ++++++ 12 files changed, 302 insertions(+), 35 deletions(-) diff --git a/.github/workflows/java-integ-glue.yml b/.github/workflows/java-integ-glue.yml index 9d7364a..a5f6efd 100644 --- a/.github/workflows/java-integ-glue.yml +++ b/.github/workflows/java-integ-glue.yml @@ -36,6 +36,7 @@ concurrency: jobs: integration-test: + if: github.event_name != 'pull_request_target' || github.event.pull_request.head.repo.full_name == github.repository runs-on: ubuntu-24.04 timeout-minutes: 30 steps: diff --git a/.github/workflows/python-integ-glue.yml b/.github/workflows/python-integ-glue.yml index c83f233..ed0f086 100644 --- a/.github/workflows/python-integ-glue.yml +++ b/.github/workflows/python-integ-glue.yml @@ -40,6 +40,7 @@ concurrency: jobs: integration-test: + if: github.event_name != 'pull_request_target' || github.event.pull_request.head.repo.full_name == github.repository runs-on: ubuntu-24.04 timeout-minutes: 30 steps: diff --git a/java/Makefile b/java/Makefile index ac70fc4..0c178ef 100644 --- a/java/Makefile +++ b/java/Makefile @@ -23,6 +23,10 @@ build-glue: ./mvnw spotless:apply -pl lance-namespace-glue -am ./mvnw install -pl lance-namespace-glue -am -DskipTests +.PHONY: bundle-glue +bundle-glue: + ./mvnw package -pl lance-namespace-glue -am -DskipTests + .PHONY: test-glue test-glue: ./mvnw test -pl lance-namespace-glue -Dtest="!*Integration" @@ -129,6 +133,10 @@ build: ./mvnw spotless:apply ./mvnw install -DskipTests +.PHONY: bundle +bundle: + ./mvnw package -DskipTests + .PHONY: test test: ./mvnw test diff --git a/java/lance-namespace-glue/pom.xml b/java/lance-namespace-glue/pom.xml index 22cbfbb..daecbf2 100644 --- a/java/lance-namespace-glue/pom.xml +++ b/java/lance-namespace-glue/pom.xml @@ -99,6 +99,10 @@ + + org.apache.maven.plugins + maven-shade-plugin + diff --git a/java/lance-namespace-glue/src/main/java/org/lance/namespace/glue/GlueNamespace.java b/java/lance-namespace-glue/src/main/java/org/lance/namespace/glue/GlueNamespace.java index bb3f7b5..66a7cc8 100644 --- a/java/lance-namespace-glue/src/main/java/org/lance/namespace/glue/GlueNamespace.java +++ b/java/lance-namespace-glue/src/main/java/org/lance/namespace/glue/GlueNamespace.java @@ -31,11 +31,14 @@ import org.lance.namespace.model.DescribeTableResponse; import org.lance.namespace.model.DropNamespaceRequest; import org.lance.namespace.model.DropNamespaceResponse; +import org.lance.namespace.model.DropTableRequest; +import org.lance.namespace.model.DropTableResponse; import org.lance.namespace.model.JsonArrowSchema; import org.lance.namespace.model.ListNamespacesRequest; import org.lance.namespace.model.ListNamespacesResponse; import org.lance.namespace.model.ListTablesRequest; import org.lance.namespace.model.ListTablesResponse; +import org.lance.namespace.model.TableExistsRequest; import org.lance.namespace.util.LanceTableUtil; import com.google.common.annotations.VisibleForTesting; @@ -55,7 +58,6 @@ import software.amazon.awssdk.services.glue.model.GetDatabasesRequest; import software.amazon.awssdk.services.glue.model.GetDatabasesResponse; import software.amazon.awssdk.services.glue.model.GetTableRequest; -import software.amazon.awssdk.services.glue.model.GetTableVersionRequest; import software.amazon.awssdk.services.glue.model.GetTablesRequest; import software.amazon.awssdk.services.glue.model.GetTablesResponse; import software.amazon.awssdk.services.glue.model.GlueException; @@ -249,7 +251,7 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) { String namespaceName = request.getId().get(0); String tableName = request.getId().get(1); - Table table = getGlueTableAtVersion(namespaceName, tableName, request.getVersion()); + Table table = getGlueTable(namespaceName, tableName); ensureLanceTable(table); DescribeTableResponse response = new DescribeTableResponse(); @@ -266,6 +268,16 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) { return response; } + @Override + public void tableExists(TableExistsRequest request) { + validateTableId(request.getId()); + String namespaceName = request.getId().get(0); + String tableName = request.getId().get(1); + + Table table = getGlueTable(namespaceName, tableName); + ensureLanceTable(table); + } + @Override public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) { validateTableId(request.getId()); @@ -296,6 +308,39 @@ public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) { } } + @Override + public DropTableResponse dropTable(DropTableRequest request) { + validateTableId(request.getId()); + String namespaceName = request.getId().get(0); + String tableName = request.getId().get(1); + + try { + Table table = getGlueTable(namespaceName, tableName); + ensureLanceTable(table); + String location = + table.storageDescriptor() != null ? table.storageDescriptor().location() : null; + + deleteGlueTable(namespaceName, tableName, true); + if (location != null && !location.isEmpty()) { + safeDropDataset(location); + } + + DropTableResponse response = new DropTableResponse(); + response.setId(request.getId()); + response.setLocation(location); + if (table.parameters() != null && !table.parameters().isEmpty()) { + response.setProperties(table.parameters()); + } + return response; + } catch (EntityNotFoundException e) { + throw GlueToLanceErrorConverter.notFound( + e, "Glue table not found: %s.%s", namespaceName, tableName); + } catch (GlueException e) { + throw GlueToLanceErrorConverter.serverError( + e, "Failed to drop table: %s.%s", namespaceName, tableName); + } + } + @Override public DeclareTableResponse declareTable(DeclareTableRequest request) { validateTableId(request.getId()); @@ -609,37 +654,6 @@ private void deleteGlueTable(String namespaceName, String tableName, boolean fai } } - private Table getGlueTableAtVersion(String namespaceName, String tableName, Long version) { - try { - Table table; - if (version != null) { - // Get specific table version - String tableVersion = String.valueOf(version); - table = - glueClient - .getTableVersion( - GetTableVersionRequest.builder() - .catalogId(config.catalogId()) - .databaseName(namespaceName) - .tableName(tableName) - .versionId(tableVersion) - .build()) - .tableVersion() - .table(); - } else { - // Get current table version - table = getGlueTable(namespaceName, tableName); - } - return table; - } catch (EntityNotFoundException e) { - throw GlueToLanceErrorConverter.notFound( - e, "Glue table not found: %s.%s", namespaceName, tableName); - } catch (GlueException e) { - throw GlueToLanceErrorConverter.serverError( - e, "Failed to get Glue table: %s.%s", namespaceName, tableName); - } - } - private void validateSchemaNotNull( JsonArrowSchema schema, String namespaceName, String tableName) { if (schema == null) { diff --git a/java/lance-namespace-glue/src/test/java/org/lance/namespace/glue/TestGlueNamespace.java b/java/lance-namespace-glue/src/test/java/org/lance/namespace/glue/TestGlueNamespace.java index 0735518..bfb322c 100644 --- a/java/lance-namespace-glue/src/test/java/org/lance/namespace/glue/TestGlueNamespace.java +++ b/java/lance-namespace-glue/src/test/java/org/lance/namespace/glue/TestGlueNamespace.java @@ -24,10 +24,13 @@ import org.lance.namespace.model.DescribeTableRequest; import org.lance.namespace.model.DescribeTableResponse; import org.lance.namespace.model.DropNamespaceRequest; +import org.lance.namespace.model.DropTableRequest; +import org.lance.namespace.model.DropTableResponse; import org.lance.namespace.model.ListNamespacesRequest; import org.lance.namespace.model.ListNamespacesResponse; import org.lance.namespace.model.ListTablesRequest; import org.lance.namespace.model.ListTablesResponse; +import org.lance.namespace.model.TableExistsRequest; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -57,6 +60,7 @@ import software.amazon.awssdk.services.glue.model.GetDatabasesResponse; import software.amazon.awssdk.services.glue.model.GetTableRequest; import software.amazon.awssdk.services.glue.model.GetTableResponse; +import software.amazon.awssdk.services.glue.model.GetTableVersionRequest; import software.amazon.awssdk.services.glue.model.GetTablesRequest; import software.amazon.awssdk.services.glue.model.GetTablesResponse; import software.amazon.awssdk.services.glue.model.StorageDescriptor; @@ -565,6 +569,25 @@ public void testDescribeTableBasic() { assertEquals("s3://bucket/tbl", resp.getLocation()); } + @Test + public void testDescribeTableWithLanceVersionUsesCurrentGlueTable() { + Table tbl = + Table.builder() + .name("tbl") + .storageDescriptor(StorageDescriptor.builder().location("s3://bucket/tbl").build()) + .parameters(ImmutableMap.of(TABLE_TYPE_PROP, LANCE_TABLE_TYPE_VALUE)) + .build(); + when(glue.getTable(any(GetTableRequest.class))) + .thenReturn(GetTableResponse.builder().table(tbl).build()); + + DescribeTableResponse resp = + glueNamespace.describeTable( + new DescribeTableRequest().id(ImmutableList.of("ns1", "tbl")).version(42L)); + + assertEquals("s3://bucket/tbl", resp.getLocation()); + verify(glue, never()).getTableVersion(any(GetTableVersionRequest.class)); + } + @Test public void testDescribeTableNonLanceTable() { Table tbl = @@ -602,6 +625,76 @@ public void testDescribeTableWithInvalidId() { () -> glueNamespace.describeTable(new DescribeTableRequest().id(ImmutableList.of("ns1")))); } + @Test + public void testTableExistsBasic() { + Table tbl = + Table.builder() + .name("tbl") + .storageDescriptor(StorageDescriptor.builder().location("s3://bucket/tbl").build()) + .parameters(ImmutableMap.of(TABLE_TYPE_PROP, LANCE_TABLE_TYPE_VALUE)) + .build(); + when(glue.getTable(any(GetTableRequest.class))) + .thenReturn(GetTableResponse.builder().table(tbl).build()); + + glueNamespace.tableExists(new TableExistsRequest().id(ImmutableList.of("ns1", "tbl"))); + } + + @Test + public void testTableExistsWithLanceVersionUsesCurrentGlueTable() { + Table tbl = + Table.builder() + .name("tbl") + .storageDescriptor(StorageDescriptor.builder().location("s3://bucket/tbl").build()) + .parameters(ImmutableMap.of(TABLE_TYPE_PROP, LANCE_TABLE_TYPE_VALUE)) + .build(); + when(glue.getTable(any(GetTableRequest.class))) + .thenReturn(GetTableResponse.builder().table(tbl).build()); + + glueNamespace.tableExists( + new TableExistsRequest().id(ImmutableList.of("ns1", "tbl")).version(42L)); + + verify(glue, never()).getTableVersion(any(GetTableVersionRequest.class)); + } + + @Test + public void testTableExistsNonLanceTable() { + Table tbl = + Table.builder() + .name("tbl") + .storageDescriptor(StorageDescriptor.builder().location("s3://bucket/tbl").build()) + .build(); + + when(glue.getTable(any(GetTableRequest.class))) + .thenReturn(GetTableResponse.builder().table(tbl).build()); + + assertThrows( + LanceNamespaceException.class, + () -> + glueNamespace.tableExists(new TableExistsRequest().id(ImmutableList.of("ns", "tbl")))); + } + + @Test + public void testTableExistsNotFound() { + when(glue.getTable(any(GetTableRequest.class))) + .thenThrow(EntityNotFoundException.builder().message("Entity Not Found").build()); + + assertThrows( + LanceNamespaceException.class, + () -> + glueNamespace.tableExists(new TableExistsRequest().id(ImmutableList.of("ns1", "tbl")))); + } + + @Test + public void testTableExistsWithInvalidId() { + assertThrows( + LanceNamespaceException.class, + () -> glueNamespace.tableExists(new TableExistsRequest().id(ImmutableList.of("ns1")))); + assertThrows( + LanceNamespaceException.class, () -> glueNamespace.tableExists(new TableExistsRequest())); + + verify(glue, never()).getTable(any(GetTableRequest.class)); + } + @Test public void testBasicDeregisterTable() { List id = ImmutableList.of("ns1", "tbl"); @@ -624,6 +717,68 @@ public void testBasicDeregisterTable() { assertEquals(ImmutableMap.of("key", "val", "table_type", "lance"), resp.getProperties()); } + @Test + public void testBasicDropTable() { + List id = ImmutableList.of("ns1", "tbl"); + Table tbl = + Table.builder() + .name("tbl") + .storageDescriptor(StorageDescriptor.builder().location("s3://bucket/tbl").build()) + .parameters(ImmutableMap.of("key", "val", TABLE_TYPE_PROP, LANCE_TABLE_TYPE_VALUE)) + .build(); + when(glue.getTable(any(GetTableRequest.class))) + .thenReturn(GetTableResponse.builder().table(tbl).build()); + when(glue.deleteTable(any(DeleteTableRequest.class))) + .thenReturn(DeleteTableResponse.builder().build()); + + DropTableResponse resp = glueNamespace.dropTable(new DropTableRequest().id(id)); + + assertEquals(id, resp.getId()); + assertEquals("s3://bucket/tbl", resp.getLocation()); + assertEquals(ImmutableMap.of("key", "val", "table_type", "lance"), resp.getProperties()); + verify(glue).deleteTable(any(DeleteTableRequest.class)); + } + + @Test + public void testDropTableRejectsNonLanceTable() { + Table tbl = + Table.builder() + .name("tbl") + .storageDescriptor(StorageDescriptor.builder().location("s3://bucket/tbl").build()) + .build(); + + when(glue.getTable(any(GetTableRequest.class))) + .thenReturn(GetTableResponse.builder().table(tbl).build()); + + assertThrows( + LanceNamespaceException.class, + () -> glueNamespace.dropTable(new DropTableRequest().id(ImmutableList.of("ns", "tbl")))); + verify(glue, never()).deleteTable(any(DeleteTableRequest.class)); + } + + @Test + public void testDropTableNotFound() { + when(glue.getTable(any(GetTableRequest.class))) + .thenThrow(EntityNotFoundException.builder().message("Entity Not Found").build()); + + assertThrows( + LanceNamespaceException.class, + () -> glueNamespace.dropTable(new DropTableRequest().id(ImmutableList.of("ns1", "tbl")))); + verify(glue, never()).deleteTable(any(DeleteTableRequest.class)); + } + + @Test + public void testDropTableWithInvalidId() { + assertThrows( + LanceNamespaceException.class, + () -> glueNamespace.dropTable(new DropTableRequest().id(ImmutableList.of("ns1")))); + assertThrows( + LanceNamespaceException.class, () -> glueNamespace.dropTable(new DropTableRequest())); + + verify(glue, never()).getTable(any(GetTableRequest.class)); + verify(glue, never()).deleteTable(any(DeleteTableRequest.class)); + } + @Test public void testDeregisterTableRejectsNonLanceTable() { Table tbl = diff --git a/java/lance-namespace-hive2/pom.xml b/java/lance-namespace-hive2/pom.xml index ca717fe..6533a3a 100644 --- a/java/lance-namespace-hive2/pom.xml +++ b/java/lance-namespace-hive2/pom.xml @@ -221,4 +221,13 @@ test + + + + + org.apache.maven.plugins + maven-shade-plugin + + + diff --git a/java/lance-namespace-hive3/pom.xml b/java/lance-namespace-hive3/pom.xml index 95627d2..0a95bff 100644 --- a/java/lance-namespace-hive3/pom.xml +++ b/java/lance-namespace-hive3/pom.xml @@ -134,4 +134,13 @@ test + + + + + org.apache.maven.plugins + maven-shade-plugin + + + diff --git a/java/lance-namespace-iceberg/pom.xml b/java/lance-namespace-iceberg/pom.xml index 3a6ec89..86803a5 100644 --- a/java/lance-namespace-iceberg/pom.xml +++ b/java/lance-namespace-iceberg/pom.xml @@ -79,6 +79,10 @@ org.apache.maven.plugins maven-surefire-plugin + + org.apache.maven.plugins + maven-shade-plugin + diff --git a/java/lance-namespace-polaris/pom.xml b/java/lance-namespace-polaris/pom.xml index 29052b1..1f51d44 100644 --- a/java/lance-namespace-polaris/pom.xml +++ b/java/lance-namespace-polaris/pom.xml @@ -79,4 +79,13 @@ test - \ No newline at end of file + + + + + org.apache.maven.plugins + maven-shade-plugin + + + + diff --git a/java/lance-namespace-unity/pom.xml b/java/lance-namespace-unity/pom.xml index 1c42bf6..e742972 100644 --- a/java/lance-namespace-unity/pom.xml +++ b/java/lance-namespace-unity/pom.xml @@ -83,6 +83,10 @@ org.apache.maven.plugins maven-surefire-plugin + + org.apache.maven.plugins + maven-shade-plugin + - \ No newline at end of file + diff --git a/java/pom.xml b/java/pom.xml index aecd3b9..1297fdb 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -84,6 +84,7 @@ 3.2.5 + 3.5.3 @@ -284,6 +285,54 @@ + + org.apache.maven.plugins + maven-shade-plugin + ${maven-shade-plugin.version} + + + bundle-jar + package + + shade + + + true + bundle + false + + + org.lance:lance-core + org.lance:lance-namespace-core + org.lance:lance-namespace-apache-client + org.apache.arrow:* + org.questdb:jar-jni + com.google.flatbuffers:flatbuffers-java + org.eclipse.collections:* + io.netty:* + org.slf4j:* + + + + + + + + + + *:* + + LICENSE + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + From 124c5d66eaa946b3f7a22f7782b938e534063d53 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Mon, 27 Apr 2026 19:18:24 -0700 Subject: [PATCH 3/5] Check Java bundle artifacts after publish --- .github/workflows/java-publish.yml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/.github/workflows/java-publish.yml b/.github/workflows/java-publish.yml index 46304e3..07e05a1 100644 --- a/.github/workflows/java-publish.yml +++ b/.github/workflows/java-publish.yml @@ -96,6 +96,17 @@ jobs: "lance-namespace-glue" "lance-namespace-hive2" "lance-namespace-hive3" + "lance-namespace-iceberg" + "lance-namespace-unity" + "lance-namespace-polaris" + ) + + # Implementation artifacts also publish an attached shaded bundle classifier. + BUNDLE_ARTIFACTS=( + "lance-namespace-glue" + "lance-namespace-hive2" + "lance-namespace-hive3" + "lance-namespace-iceberg" "lance-namespace-unity" "lance-namespace-polaris" ) @@ -122,6 +133,17 @@ jobs: fi done + for ARTIFACT_ID in "${BUNDLE_ARTIFACTS[@]}"; do + URL="https://repo1.maven.org/maven2/org/lance/${ARTIFACT_ID}/${VERSION}/${ARTIFACT_ID}-${VERSION}-bundle.jar" + + if curl --head --silent --fail "$URL" > /dev/null 2>&1; then + echo "OK ${ARTIFACT_ID} bundle is available" + else + echo "X ${ARTIFACT_ID} bundle is not yet available" + ALL_AVAILABLE=false + fi + done + if [ "$ALL_AVAILABLE" = true ]; then echo "" echo "All artifacts are now available in Maven Central!" From 222c4e91fc624a7622e11d0de1d16aaf17a8f7fe Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Mon, 27 Apr 2026 19:26:37 -0700 Subject: [PATCH 4/5] ci: wait for Unity Catalog health --- .github/workflows/java-integ-unity.yml | 5 +++-- docker/unity/docker-compose.yml | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/java-integ-unity.yml b/.github/workflows/java-integ-unity.yml index ed4d054..f067c08 100644 --- a/.github/workflows/java-integ-unity.yml +++ b/.github/workflows/java-integ-unity.yml @@ -55,8 +55,8 @@ jobs: run: | echo "Waiting for Unity Catalog to be ready..." timeout 120 bash -c ' - until curl -sf http://localhost:8080/api/2.1/unity-catalog/catalogs > /dev/null 2>&1; do - echo "Waiting for Unity Catalog API..." + until [ "$(docker inspect --format="{{.State.Health.Status}}" unity-catalog 2>/dev/null)" = "healthy" ]; do + echo "Waiting for Unity Catalog container health..." sleep 5 done ' || { @@ -64,6 +64,7 @@ jobs: docker compose -f docker/unity/docker-compose.yml logs exit 1 } + curl -sf http://localhost:8080/api/2.1/unity-catalog/catalogs > /dev/null echo "Unity Catalog is ready" - name: Create test catalog run: | diff --git a/docker/unity/docker-compose.yml b/docker/unity/docker-compose.yml index 6e72cfe..322f5a1 100644 --- a/docker/unity/docker-compose.yml +++ b/docker/unity/docker-compose.yml @@ -15,7 +15,7 @@ services: networks: - unity-network healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8080/api/2.1/unity-catalog/catalogs"] + test: ["CMD", "wget", "-q", "-O", "/dev/null", "http://localhost:8080/api/2.1/unity-catalog/catalogs"] interval: 10s timeout: 10s retries: 10 From 744b067bf51d776aa8960d4b06f971c79b507c54 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Mon, 27 Apr 2026 19:36:47 -0700 Subject: [PATCH 5/5] fix: use millisecond Unity timeouts --- .../namespace/unity/UnityNamespaceConfig.java | 10 ++-- .../unity/TestUnityNamespaceConfig.java | 51 +++++++++++++++++++ 2 files changed, 57 insertions(+), 4 deletions(-) create mode 100644 java/lance-namespace-unity/src/test/java/org/lance/namespace/unity/TestUnityNamespaceConfig.java diff --git a/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespaceConfig.java b/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespaceConfig.java index 76343b8..5a6e6ee 100644 --- a/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespaceConfig.java +++ b/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespaceConfig.java @@ -28,8 +28,8 @@ public class UnityNamespaceConfig { private static final String ROOT = "root"; private static final String DEFAULT_API_PATH = "/api/2.1/unity-catalog"; - private static final int DEFAULT_CONNECT_TIMEOUT = 10; - private static final int DEFAULT_READ_TIMEOUT = 60; + private static final int DEFAULT_CONNECT_TIMEOUT_MS = 10000; + private static final int DEFAULT_READ_TIMEOUT_MS = 60000; private static final int DEFAULT_MAX_RETRIES = 3; private final Map properties; @@ -59,11 +59,13 @@ public UnityNamespaceConfig(Map properties) { // Inline PropertyUtil.propertyAsInt String connectTimeoutStr = properties.get(CONNECT_TIMEOUT); this.connectTimeout = - connectTimeoutStr != null ? Integer.parseInt(connectTimeoutStr) : DEFAULT_CONNECT_TIMEOUT; + connectTimeoutStr != null + ? Integer.parseInt(connectTimeoutStr) + : DEFAULT_CONNECT_TIMEOUT_MS; String readTimeoutStr = properties.get(READ_TIMEOUT); this.readTimeout = - readTimeoutStr != null ? Integer.parseInt(readTimeoutStr) : DEFAULT_READ_TIMEOUT; + readTimeoutStr != null ? Integer.parseInt(readTimeoutStr) : DEFAULT_READ_TIMEOUT_MS; String maxRetriesStr = properties.get(MAX_RETRIES); this.maxRetries = maxRetriesStr != null ? Integer.parseInt(maxRetriesStr) : DEFAULT_MAX_RETRIES; diff --git a/java/lance-namespace-unity/src/test/java/org/lance/namespace/unity/TestUnityNamespaceConfig.java b/java/lance-namespace-unity/src/test/java/org/lance/namespace/unity/TestUnityNamespaceConfig.java new file mode 100644 index 0000000..05ebf89 --- /dev/null +++ b/java/lance-namespace-unity/src/test/java/org/lance/namespace/unity/TestUnityNamespaceConfig.java @@ -0,0 +1,51 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.namespace.unity; + +import org.junit.jupiter.api.Test; + +import java.util.HashMap; +import java.util.Map; + +import static org.assertj.core.api.Assertions.assertThat; + +public class TestUnityNamespaceConfig { + + @Test + public void testDefaultTimeoutsUseMilliseconds() { + UnityNamespaceConfig config = new UnityNamespaceConfig(requiredProperties()); + + assertThat(config.getConnectTimeout()).isEqualTo(10000); + assertThat(config.getReadTimeout()).isEqualTo(60000); + } + + @Test + public void testCustomTimeoutsUseMilliseconds() { + Map properties = requiredProperties(); + properties.put("connect_timeout", "5000"); + properties.put("read_timeout", "45000"); + + UnityNamespaceConfig config = new UnityNamespaceConfig(properties); + + assertThat(config.getConnectTimeout()).isEqualTo(5000); + assertThat(config.getReadTimeout()).isEqualTo(45000); + } + + private Map requiredProperties() { + Map properties = new HashMap<>(); + properties.put("endpoint", "http://localhost:8080"); + properties.put("catalog", "unity"); + return properties; + } +}